1 /*******************************************************************************
2     Copyright (c) 2013-2022 NVIDIA Corporation
3 
4     Permission is hereby granted, free of charge, to any person obtaining a copy
5     of this software and associated documentation files (the "Software"), to
6     deal in the Software without restriction, including without limitation the
7     rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8     sell copies of the Software, and to permit persons to whom the Software is
9     furnished to do so, subject to the following conditions:
10 
11         The above copyright notice and this permission notice shall be
12         included in all copies or substantial portions of the Software.
13 
14     THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15     IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16     FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17     THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18     LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19     FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20     DEALINGS IN THE SOFTWARE.
21 *******************************************************************************/
22 
23 //
24 // uvm.h
25 //
26 // This file contains the UVM API declarations, for the userspace-to-kernel
27 // calls. For legacy API definitions that are in use on Windows, see
28 // uvm_legacy.h.
29 //
30 
31 // UVM API signature modification steps
32 // In order to change API signature for any of the APIs defined in this file, a
33 // particular sequence of steps has to be followed since the consumer of this
34 // API (i.e. CUDA) belongs to a different module branch than the one for this
35 // file. Here are the steps to change the signature for a hypothetical API named
36 // UvmExampleApi. The assumption being made here is that this file is being
37 // modified in chips_a.
38 // 1) Increment the value of UVM_API_LATEST_REVISION defined in this file.
39 // 2) Use the macro UVM_API_REV_IS_AT_MOST to define the two revisions of the
40 //    API as follows:
41 //     #if UVM_API_REV_IS_AT_MOST(<previous_value_of_UVM_API_LATEST_REVISION>)
42 //     // Old UvmExampleApi declaration
43 //     #else
44 //     // New UvmExampleApi declaration
45 //     #endif
46 // 3) Do the same thing for the function definition, and for any structs that
47 //    are taken as arguments to these functions.
48 // 4) Let this change propagate over to cuda_a, so that the CUDA driver can
49 //    start using the new API by bumping up the API version number its using.
50 //    This can be found in gpgpu/cuda/cuda.nvmk.
51 // 5) Once the cuda_a changes have made it back into chips_a, remove the old API
52 //    declaration, definition, and any old structs that were in use.
53 
54 #ifndef _UVM_H_
55 #define _UVM_H_
56 
57 #define UVM_API_LATEST_REVISION 7
58 
59 #if !defined(UVM_API_REVISION)
60 #error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro"
61 #endif
62 
63 #define UVM_API_REV_IS_AT_MOST(rev) (UVM_API_REVISION <= rev)
64 
65 #include "uvm_types.h"
66 #include "uvm_user_types.h"
67 #include "uvm_legacy.h"
68 
69 #ifdef __cplusplus
70 extern "C" {
71 #endif
72 
73 //------------------------------------------------------------------------------
74 // UvmSetDriverVersion
75 //
76 // Informs the user-mode layer which kernel driver version is running. The user-
77 // mode layer uses this information to know what flavor to use when calling
78 // kernel APIs.
79 //
80 // If this API is not called, the user-mode layer assumes that the kernel
81 // version is the same as the user-mode layer version.
82 //
83 // The last UvmDeinitialize will reset this state.
84 //
85 // If this API is called, it must be called before UvmInitialize. It is an error
86 // to call this API after UvmInitialize and before the last UvmDeinitialize, or
87 // to call this API more than once before the last UvmDeinitialize.
88 //
89 // Arguments:
90 //     major: (INPUT)
91 //         The kernel driver's major version number, such as 384.
92 //
93 //     changelist: (INPUT)
94 //         The changelist at which the kernel driver was built.
95 //
96 // Error codes:
97 //     NV_ERR_INVALID_STATE:
98 //         UvmInitialize or UvmSetDriverVersion has already been called.
99 //
100 //------------------------------------------------------------------------------
101 NV_STATUS UvmSetDriverVersion(NvU32 major, NvU32 changelist);
102 
103 //------------------------------------------------------------------------------
104 // UvmInitialize
105 //
106 // This must be called before any other UVM functions except for
107 // UvmSetDriverVersion. Repeated calls to UvmInitialize increment a refcount,
108 // which is decremented by calls to UvmDeinitialize. UVM deinitilization occurs
109 // when the refcount reaches zero.
110 //
111 // The UVM file descriptor passed in can either be UVM_AUTO_FD or a valid file
112 // descriptor created during a prior call to UvmInitialize. If UVM_AUTO_FD is
113 // passed and the refcount is zero, a new file descriptor is created. Subsequent
114 // calls must either also specify UVM_AUTO_FD or use the current file
115 // descriptor. If the first call to UvmInitialize did not specify UVM_AUTO_FD,
116 // all subsequent calls must use the same file descriptor used in the initial
117 // call. The file descriptor that is currently in use can be retrieved using
118 // UvmGetFileDescriptor.
119 //
120 // If flags does not contain UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE, the
121 // UvmInitialize call which creates the file descriptor will associate the
122 // calling process with that file descriptor when the Operating System can
123 // support such an association. In that case UvmInitialize may be called using
124 // the same file in other processes, but internally the file remains associated
125 // with the original process.
126 //
127 // Arguments:
128 //     fd: (INPUT)
129 //         The UVM file descriptor to initialize UVM with. Passing in
130 //         UVM_AUTO_FD creates a new file descriptor on the first call to
131 //         UvmInitialize.
132 //
133 //     flags: (INPUT)
134 //         Must be a combination of 0 or more of following flags:
135 //
136 //         - UVM_INIT_FLAGS_DISABLE_HMM
137 //             Specifying this flag will only have an effect if the system
138 //             allows GPUs to read/write system (CPU) pageable memory and the
139 //             GPUs do not have hardware support to do it transparently, and the
140 //             UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE flag is not specified.
141 //             In such cases pageable access from the GPU will be disabled.
142 //
143 //             Pageable memory here refers to memory allocated by the Operating
144 //             System for the process's globals, stack variables, heap
145 //             allocations, etc. that has not been registered for CUDA access
146 //             using cudaHostRegister.
147 //
148 //         - UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE
149 //             Specifying this flag will prevent UVM from creating any
150 //             association between this process and the UVM file descriptor.
151 //             Pageable memory access of any kind will be disabled (regardless
152 //             of whether UVM_INIT_FLAGS_DISABLE_HMM was specified) and the GPU
153 //             resources used by the UVM file descriptor will be freed when the
154 //             last reference to the file is dropped rather than when this
155 //             process exits.
156 //
157 //             If this flag is not specified, calling UvmMemMap or
158 //             UvmAllocSemaphorePool on the same file from a different process
159 //             may return an error.
160 //
161 //         If UvmInitialize is called multiple times on the same file, even from
162 //         different processes, the flags to each call must match.
163 //
164 // Error codes:
165 //     NV_ERR_NOT_SUPPORTED:
166 //         The Linux kernel is not able to support UVM. This could be because
167 //         the kernel is too old, or because it lacks a feature that UVM
168 //         requires. The kernel log will have details.
169 //
170 //     NV_ERR_INVALID_ARGUMENT:
171 //         The file descriptor passed in is neither UVM_AUTO_FD nor a valid file
172 //         descriptor created during a prior call to UvmInitialize, or the flags
173 //         do not match a prior call to UvmInitialize.
174 //
175 //     NV_ERR_NO_MEMORY:
176 //         Internal memory allocation failed.
177 //
178 //     NV_ERR_GENERIC:
179 //         Unexpected error. We try hard to avoid returning this error code,
180 //         because it is not very informative.
181 //
182 //------------------------------------------------------------------------------
183 #if UVM_API_REV_IS_AT_MOST(4)
184 NV_STATUS UvmInitialize(UvmFileDescriptor fd);
185 #else
186 NV_STATUS UvmInitialize(UvmFileDescriptor fd,
187                         NvU64             flags);
188 #endif
189 
190 //------------------------------------------------------------------------------
191 // UvmDeinitialize
192 //
193 // Releases the reference implicitly obtained by UvmInitialize. If the refcount
194 // reaches zero, cleans up all UVM resources associated with the calling
195 // process. Any channels that are still registered will be unregistered prior to
196 // unmapping any managed allocations. Any resources that have been shared with
197 // other processes and are still being used will continue to remain valid.
198 //
199 // Error codes:
200 //     NV_ERR_INVALID_STATE:
201 //         Refcount is zero.
202 //
203 //     NV_ERR_GENERIC:
204 //         Unexpected error. We try hard to avoid returning this error code,
205 //         because it is not very informative.
206 //
207 //------------------------------------------------------------------------------
208 NV_STATUS UvmDeinitialize(void);
209 
210 //------------------------------------------------------------------------------
211 // UvmReopen
212 //
213 // Reinitializes the UVM driver after checking for minimal user-mode state.
214 // Before calling this function, all GPUs must be unregistered with
215 // UvmUnregisterGpu() and all allocated VA ranges must be freed with UvmFree().
216 // Note that it is not required to release VA ranges that were reserved with
217 // UvmReserveVa().
218 //
219 // UvmReopen() closes the open file returned by UvmGetFileDescriptor() and
220 // replaces it with a new open file with the same name.
221 //
222 // Arguments:
223 //     flags: (INPUT)
224 //         Must be zero.  UVM will be reinitialized with the
225 //         Same flags that were passed to UvmInitialize() originally.
226 //
227 // Error codes:
228 //     NV_ERR_INVALID_STATE:
229 //         UVM was not initialized before calling this function.
230 //
231 //     NV_ERR_UVM_ADDRESS_IN_USE:
232 //         Not all allocated VA ranges were freed before calling this function.
233 //
234 //     NV_ERR_IN_USE:
235 //         Not all GPUs were unregistered before calling this function.
236 //
237 //     NV_ERR_INVALID_FLAGS:
238 //         Flags is not zero.
239 //
240 //     NV_ERR_OPERATING_SYSTEM:
241 //         Replacing the original UVM file descriptor failed.
242 //
243 //     NV_ERR_GENERIC:
244 //         Unexpected error. We try hard to avoid returning this error code,
245 //         because it is not very informative.
246 //
247 //------------------------------------------------------------------------------
248 NV_STATUS UvmReopen(NvU64 flags);
249 
250 //------------------------------------------------------------------------------
251 // UvmIsPageableMemoryAccessSupported
252 //
253 // Returns true only if pageable memory access from GPUs is supported by the
254 // system and that support was not explicitly disabled via UvmInitialize.
255 //
256 // Pageable memory here refers to memory allocated by the Operating System for
257 // the process's globals, stack variables, heap allocations, etc that has not
258 // been registered for CUDA access using cudaHostRegister.
259 //
260 // Note that this does not check whether GPUs are present which can make use of
261 // this feature, just whether system support exists. If
262 // UvmIsPageableMemoryAccessSupported reports that pageable memory access is
263 // supported, UvmIsPageableMemoryAccessSupportedOnGpu can be used for querying
264 // per-GPU support.
265 //
266 // Arguments:
267 //     pageableMemAccess: (OUTPUT)
268 //         Returns true (non-zero) if the system supports pageable memory access
269 //         from GPUs and that support was not explicitly disabled via
270 //         UvmInitialize, and false (zero) otherwise.
271 //
272 // Error codes:
273 //     NV_ERR_INVALID_STATE:
274 //         UVM was not initialized.
275 //
276 //     NV_ERR_GENERIC:
277 //         Unexpected error. We try hard to avoid returning this error code,
278 //         because it is not very informative.
279 //
280 //------------------------------------------------------------------------------
281 NV_STATUS UvmIsPageableMemoryAccessSupported(NvBool *pageableMemAccess);
282 
283 //------------------------------------------------------------------------------
284 // UvmIsPageableMemoryAccessSupportedOnGpu
285 //
286 // Returns whether pageable memory access is supported from the given GPU on
287 // this system and that support was not explicitly disabled via UvmInitialize.
288 // The GPU must have been previously registered with UvmRegisterGpu first.
289 //
290 // Pageable memory here refers to memory allocated by the Operating System for
291 // the process's globals, stack variables, heap allocations, etc that has not
292 // been registered for CUDA access using cudaHostRegister.
293 //
294 // Arguments:
295 //     gpuUuid: (INPUT)
296 //         UUID of the GPU for which pageable memory access support is queried.
297 //
298 //     pageableMemAccess: (OUTPUT)
299 //         Returns true (non-zero) if the GPU represented by gpuUuid supports
300 //         pageable memory access and that support was not explicitly disabled
301 //         via UvmInitialize, and false (zero) otherwise.
302 //
303 // Error codes:
304 //     NV_ERR_INVALID_STATE:
305 //         UVM was not initialized.
306 //
307 //     NV_ERR_INVALID_DEVICE:
308 //         The given GPU has not been registered.
309 //
310 //     NV_ERR_GENERIC:
311 //         Unexpected error. We try hard to avoid returning this error code,
312 //         because it is not very informative.
313 //
314 //------------------------------------------------------------------------------
315 NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid,
316                                                   NvBool *pageableMemAccess);
317 
318 //------------------------------------------------------------------------------
319 // UvmRegisterGpu
320 //
321 // Registers a GPU with UVM. If this is the first process to register this GPU,
322 // the UVM driver initializes resources on the GPU and prepares it for CUDA
323 // usage. Calling UvmRegisterGpu multiple times on the same GPU from the same
324 // process results in an error.
325 //
326 // Arguments:
327 //     gpuUuid: (INPUT)
328 //         UUID of the GPU to register.
329 //
330 // Error codes:
331 //     NV_ERR_NO_MEMORY:
332 //         Internal memory allocation failed.
333 //
334 //     NV_ERR_INSUFFICIENT_RESOURCES
335 //         Internal client or object allocation failed.
336 //
337 //     NV_ERR_INVALID_DEVICE:
338 //         The GPU referred to by pGpuUuid has already been registered by this
339 //         process.
340 //
341 //         The GPU referred to by pGpuUuid doesn't have a NVLINK2 link to the
342 //         CPU but a GPU with such a link has already been registered by this
343 //         process, or vice-versa.
344 //
345 //     NV_ERR_NOT_SUPPORTED:
346 //         The GPU referred to by pGpuUuid is not supported by UVM or the GPU
347 //         is configured to run in virtualization mode without SRIOV support.
348 //
349 //     NV_ERR_GPU_UUID_NOT_FOUND:
350 //         The GPU referred to by pGpuUuid was not found.
351 //
352 //     NV_ERR_PAGE_TABLE_NOT_AVAIL:
353 //         The system requires that the UVM file descriptor be associated with a
354 //         single process, and that process has exited.
355 //
356 //     NV_ERR_INVALID_ARGUMENT:
357 //         OS state required to register the GPU is not present.
358 //
359 //     NV_ERR_OBJECT_NOT_FOUND:
360 //         OS state required to register the GPU is not present.
361 //
362 //     NV_ERR_INVALID_STATE:
363 //         OS state required to register the GPU is malformed.
364 //
365 //     NV_ERR_GENERIC:
366 //         Unexpected error. We try hard to avoid returning this error code,
367 //         because it is not very informative.
368 //
369 //------------------------------------------------------------------------------
370 NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid);
371 
372 //------------------------------------------------------------------------------
373 // UvmRegisterGpuSmc
374 //
375 // The same as UvmRegisterGpu, but takes additional parameters to specify the
376 // GPU partition being registered if SMC is enabled.
377 //
378 // TODO: Bug 2844714: Merge UvmRegisterGpuSmc() with UvmRegisterGpu() once
379 //       the initial SMC support is in place.
380 //
381 // Arguments:
382 //     gpuUuid: (INPUT)
383 //         UUID of the parent GPU of the SMC partition to register.
384 //
385 //     platformParams: (INPUT)
386 //         User handles identifying the partition to register.
387 //
388 // Error codes (see UvmRegisterGpu also):
389 //
390 //     NV_ERR_INVALID_STATE:
391 //         SMC was not enabled, or the partition identified by the user
392 //         handles or its configuration changed.
393 //
394 NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid,
395                             const UvmGpuPlatformParams *platformParams);
396 
397 //------------------------------------------------------------------------------
398 // UvmUnregisterGpu
399 //
400 // Unregisters a GPU from UVM. If this is the last process to unregister this
401 // GPU, the UVM driver frees all resources allocated on the GPU when the GPU
402 // was first registered. Any pages on the GPU allocated by the UVM driver will
403 // be migrated to CPU memory before the GPU resources are freed.
404 //
405 // Any GPU VA spaces or channels that were registered on this GPU using
406 // UvmRegisterGpuVaSpace or UvmRegisterChannel respectively, will be
407 // unregistered. Any state that was set by calling UvmSetPreferredLocation or
408 // UvmSetAccessedBy for this GPU will be cleared. Any pages that were associated
409 // with a non-migratable range group and had this GPU as their preferred
410 // location will have their range group association changed to
411 // UVM_RANGE_GROUP_ID_NONE.
412 //
413 // Arguments:
414 //     gpuUuid: (INPUT)
415 //         UUID of the GPU to unregister.
416 //
417 // Error codes:
418 //     NV_ERR_INVALID_DEVICE:
419 //         The GPU referred to by pGpuUuid was not registered by this process.
420 //
421 //     NV_ERR_GPU_UUID_NOT_FOUND:
422 //         The GPU referred to by pGpuUuid was not found.
423 //
424 //     NV_ERR_GENERIC:
425 //         Unexpected error. We try hard to avoid returning this error code,
426 //         because it is not very informative.
427 //
428 //------------------------------------------------------------------------------
429 NV_STATUS UvmUnregisterGpu(const NvProcessorUuid *gpuUuid);
430 
431 //------------------------------------------------------------------------------
432 // UvmRegisterGpuVaSpace
433 //
434 // Registers a GPU's VA (virtual address) space for use with UVM. Only one GPU
435 // VA space can be registered for a given GPU at a time. Once a VA space has
436 // been registered for a GPU, all page table updates for that VA space on that
437 // GPU will be managed by the UVM driver.
438 //
439 // The GPU must have been registered using UvmRegisterGpu prior to making this
440 // call.
441 //
442 // On systems with GPUs that support transparent access to pageable memory, this
443 // feature is enabled per GPU VA space. This setting must match for all
444 // registered GPU VA spaces.
445 //
446 // Any VA ranges that were allocated using UvmAllocSemaphorePool will be mapped
447 // on this GPU with the mapping and caching attributes as specified during that
448 // call, or with default attributes if none were specified.
449 //
450 // Any VA ranges that had a preferred location set to this GPU will be mapped on
451 // this GPU only if this GPU is not fault-capable and the VA range belongs to a
452 // non-migratable range group. If such a mapping cannot be established, an error
453 // is returned.
454 //
455 // Any VA ranges which have accessed-by set for this GPU will be mapped on this
456 // GPU. If that VA range resides in a PCIe peer GPU's memory and P2P support
457 // between the two GPUs has not been enabled via UvmEnablePeerAccess, then a
458 // mapping won't be established. Also, if read duplication is enabled for this
459 // VA range, or its preferred location is set to this GPU, and this GPU is a
460 // fault-capable GPU, then a mapping will not be established.  If this is a
461 // non-fault-capable GPU and a mapping cannot be established, then an error is
462 // returned.
463 //
464 // If P2P support has been enabled between this GPU and another GPU that also
465 // has a GPU VA space registered, then the two GPU VA spaces must support the
466 // same set of page sizes for GPU mappings. Otherwise, an error is returned.
467 //
468 // Note that all the aforementioned VA ranges must lie within the largest
469 // possible virtual address supported by this GPU.
470 //
471 // Arguments:
472 //     gpuUuid: (INPUT)
473 //         UUID of the GPU to register.
474 //
475 //     platformParams: (INPUT)
476 //         On Linux: RM ctrl fd, hClient and hVaSpace.
477 //
478 // Error codes:
479 //     NV_ERR_NO_MEMORY:
480 //         Internal memory allocation failed.
481 //
482 //     NV_ERR_OUT_OF_RANGE:
483 //         A VA range that needs to be mapped on this GPU exceeds the largest
484 //         virtual address supported by the GPU.
485 //
486 //     NV_ERR_INVALID_DEVICE:
487 //         The GPU referred to by gpuUuid was not registered or a VA space has
488 //         already been registered for this GPU. Or this is a non-fault-capable
489 //         GPU that is present in the accessed-by list of a VA range that
490 //         resides on another non-fault-capable GPU, and P2P support between
491 //         both GPUs is not enabled.
492 //
493 //     NV_ERR_OTHER_DEVICE_FOUND:
494 //         The UUID does not match the UUID of the device that is associated
495 //         with the VA space handles in the platformParams argument.
496 //
497 //     NV_ERR_INVALID_FLAGS:
498 //         The VA space was originally allocated with UVM-incompatible flags.
499 //         This includes the case in which the value for the setting to enable
500 //         transparent access to pageable memory for the given GPU VA space does
501 //         not match the value in previously-registered GPU VA spaces, or that
502 //         value is set but pageable memory access has been disabled via
503 //         UvmInitialize.
504 //
505 //     NV_ERR_NOT_COMPATIBLE:
506 //         The GPU referred to by gpuUuid has P2P support enabled with another
507 //         GPU and the set of page sizes supported by the specified VA space
508 //         doesn't match that of the VA space registered on the peer GPU.
509 //
510 //     NV_ERR_INVALID_ARGUMENT:
511 //         Some problem with the platform specific arguments was detected.
512 //
513 //     NV_ERR_NOT_SUPPORTED:
514 //         A GPU VA space has already been registered using a different UVM file
515 //         descriptor in this process and this platform does not support that
516 //         operation, or a GPU VA space has already been registered on this UVM
517 //         file descriptor by a different process and this platform does not
518 //         support that operation.
519 //
520 //     NV_ERR_PAGE_TABLE_NOT_AVAIL:
521 //         The system requires that the UVM file descriptor be associated with a
522 //         single process, and that process has exited.
523 //
524 //     NV_ERR_GENERIC:
525 //         Unexpected error. We try hard to avoid returning this error code,
526 //         because it is not very informative.
527 //
528 //------------------------------------------------------------------------------
529 NV_STATUS UvmRegisterGpuVaSpace(const NvProcessorUuid             *gpuUuid,
530                                 const UvmGpuVaSpacePlatformParams *platformParams);
531 
532 //------------------------------------------------------------------------------
533 // UvmUnregisterGpuVaSpace
534 //
535 // Unregisters the GPU VA space that was previously registered via a call to
536 // UvmRegisterGpuVaSpace.
537 //
538 // Any page table mappings created by UVM on that GPU for that VA space will be
539 // unmapped. Any channels that were registered on this GPU using
540 // UvmRegisterChannel will be unregistered.
541 //
542 // Arguments:
543 //     gpuUuid: (INPUT)
544 //         UUID of the GPU whose VA space should be unregistered.
545 //
546 // Error codes:
547 //     NV_ERR_INVALID_DEVICE:
548 //         The GPU referred to by gpuUuid was not registered or no VA space has
549 //         been registered for this GPU.
550 //
551 //     NV_ERR_GENERIC:
552 //         Unexpected error. We try hard to avoid returning this error code,
553 //         because it is not very informative.
554 //
555 //------------------------------------------------------------------------------
556 NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid);
557 
558 //------------------------------------------------------------------------------
559 // UvmEnablePeerAccess
560 //
561 // Enables P2P (peer to peer) support in the UVM driver between two GPUs
562 // connected via PCIe. NVLink peers are automatically discovered/enabled in the
563 // driver at UvmRegisterGpu time. Enabling P2P support between two GPUs allows
564 // peer mappings to be created as part of fault servicing, memory allocation,
565 // etc. The P2P support is bidirectional i.e. enabling P2P between GPU A and
566 // GPU B also enables P2P support between GPU B and GPU A.
567 //
568 // The two GPUs must have been registered via UvmRegisterGpu prior to making
569 // this call. An error is returned if P2P support has already been enabled
570 // between these two GPUs in this process.
571 //
572 // The two GPUs must be connected via PCIe. An error is returned if the GPUs are
573 // not connected or are connected over an interconnect different than PCIe
574 // (NVLink, for example).
575 //
576 // If both GPUs have GPU VA spaces registered for them, the two GPU VA spaces
577 // must support the same set of page sizes for GPU mappings.
578 //
579 // If any VA range resides in one GPU's memory, and the peer GPU is in the
580 // accessed-by list of that VA range, then a peer mapping will be established
581 // unless the VA space for the peer GPU has not been registered, or read
582 // duplication is enabled for the VA range, or the preferred location of the VA
583 // range is the peer GPU.
584 //
585 // Arguments:
586 //     gpuUuidA: (INPUT)
587 //         UUID of GPU A.
588 //
589 //     gpuUuidB: (INPUT)
590 //         UUID of GPU B.
591 //
592 // Error codes:
593 //     NV_ERR_NO_MEMORY:
594 //         Internal memory allocation failed.
595 //
596 //     NV_ERR_INVALID_DEVICE:
597 //         At least one GPU has not been registered, P2P support has already
598 //         been enabled between the two GPUs, or the GPUs are connected via an
599 //         interconnect other than PCIe.
600 //
601 //     NV_ERR_NOT_SUPPORTED:
602 //         The two GPUs are not peer capable.
603 //
604 //     NV_ERR_NOT_COMPATIBLE:
605 //         Both GPUs have a GPU VA space registered for them and the two VA
606 //         spaces don't support the same set of page sizes for GPU mappings.
607 //
608 //     NV_ERR_GENERIC:
609 //         Unexpected error. We try hard to avoid returning this error code,
610 //         because it is not very informative.
611 //
612 //------------------------------------------------------------------------------
613 NV_STATUS UvmEnablePeerAccess(const NvProcessorUuid *gpuUuidA,
614                               const NvProcessorUuid *gpuUuidB);
615 
616 //------------------------------------------------------------------------------
617 // UvmDisablePeerAccess
618 //
619 // Disables P2P (peer to peer) support in the UVM driver between two GPUs.
620 // connected via PCIe. NVLink peers are automatically disabled in the driver
621 // at UvmUnregisterGpu time. Disabling P2P support between two GPUs removes all
622 // existing peer mappings from either GPU to the other, and also prevents new
623 // peer mappings from being established between the two GPUs.
624 //
625 // The two GPUs must be connected via PCIe. An error is returned if the GPUs are
626 // not connected or are connected over an interconnect different than PCIe
627 // (NVLink, for example).
628 //
629 // If one of the two GPUs is present in the accessed-by list of a non-migratable
630 // VA range that has a preferred location set to the other GPU, and the two GPUs
631 // are not fault-capable, then the GPU is removed from the accessed-by list of
632 // the range.
633 //
634 // Arguments:
635 //     gpuUuidA: (INPUT)
636 //         UUID of GPU A.
637 //
638 //     gpuUuidB: (INPUT)
639 //         UUID of GPU B.
640 //
641 // Error codes:
642 //     NV_ERR_INVALID_DEVICE:
643 //         At least one GPU has not been registered, or P2P support has not been
644 //         enabled between the two GPUs, or the GPUs are connected via an
645 //         interconnect other than PCIe.
646 //
647 //     NV_ERR_GENERIC:
648 //         Unexpected error. We try hard to avoid returning this error code,
649 //         because it is not very informative.
650 //
651 //------------------------------------------------------------------------------
652 NV_STATUS UvmDisablePeerAccess(const NvProcessorUuid *gpuUuidA,
653                                const NvProcessorUuid *gpuUuidB);
654 
655 //------------------------------------------------------------------------------
656 // UvmRegisterChannel
657 //
658 // Register a channel for use with UVM. Any faults that occur on this channel
659 // will be handled by the UVM driver.
660 //
661 // A GPU VA space must have been registered on this GPU via
662 // UvmRegisterGpuVaSpace prior to making this call.
663 //
664 // For channels that require associated mappings, the base and length of a
665 // virtual address region that was reserved via UvmReserveVa must be supplied to
666 // this call in order to map those allocations. The size and alignment of this
667 // region can be obtained by calling the appropriate platform specific API. For
668 // example, on RM, an RM control call has to be made with the control type as
669 // NV2080_CTRL_CMD_GR_GET_CTX_BUFFER_SIZE. If no region needs to be reserved for
670 // this channel, the base and length arguments are ignored.
671 //
672 // Using the same VA region for multiple UvmRegisterChannel calls is allowed,
673 // provided all allocations required by all of those calls fit within the
674 // region.
675 //
676 // Registering the same channel on multiple subdevices of an SLI group is
677 // disallowed.
678 //
679 // On any errors, the channel may be reset, thereby terminating any pending
680 // work on that channel.
681 //
682 // Arguments:
683 //     gpuUuid: (INPUT)
684 //        UUID of the GPU that the channel is associated with.
685 //
686 //     platformParams: (INPUT)
687 //         On Linux: RM ctrl fd, hClient and hChannel.
688 //
689 //     base: (INPUT)
690 //         Base address (starting point) of the VA (virtual address) range
691 //         reserved for mapping the allocations associated with this channel.
692 //         If this channel does not have associated allocations, this argument
693 //         is ignored.
694 //
695 //     length: (INPUT)
696 //         Length, in bytes, of the range. If this channel does not have
697 //         associated allocations, this argument is ignored.
698 //
699 // Error codes:
700 //     NV_ERR_NO_MEMORY:
701 //         Internal memory allocation failed.
702 //
703 //     NV_ERR_OTHER_DEVICE_FOUND:
704 //         The UUID does not match the UUID of the device that is associated
705 //         with the channel identifier in the platformParams argument.
706 //
707 //     NV_ERR_GPU_INVALID_DEVICE:
708 //         The GPU referred to by pGpuUuid was not registered or no VA space
709 //         has been registered for this GPU.
710 //
711 //     NV_ERR_INVALID_CHANNEL:
712 //         The given channel identifier is invalid or has already been
713 //         registered.
714 //
715 //     NV_ERR_INVALID_ADDRESS:
716 //         The channel has allocations which need to be mapped but the base
717 //         address is invalid, or the VA range specified by base and length
718 //         is too small.
719 //
720 //     NV_ERR_INVALID_ARGUMENT:
721 //         Either some problem with the platform-specific arguments was detected
722 //         or the channel has allocations which need to be mapped but length is
723 //         invalid.
724 //
725 //     NV_ERR_GENERIC:
726 //         Unexpected error. We try hard to avoid returning this error code,
727 //         because it is not very informative.
728 //
729 //------------------------------------------------------------------------------
730 NV_STATUS UvmRegisterChannel(const NvProcessorUuid          *gpuUuid,
731                              const UvmChannelPlatformParams *platformParams,
732                              void                           *base,
733                              NvLength                        length);
734 
735 //------------------------------------------------------------------------------
736 // UvmUnregisterChannel
737 //
738 // Unregisters a channel from UVM. The channel must have been previously
739 // registered via a call to UvmRegisterChannel. The channel will be reset,
740 // thereby terminating any pending work on that channel.
741 //
742 // Since channels may share virtual mappings, a call to UvmUnregisterChannel is
743 // not guaranteed to unmap the VA range passed into the corresponding
744 // UvmRegisterChannel call because other still-registered channels may be using
745 // allocations in that VA range. Only channels which share the same TSG can
746 // share allocations, so a channel's VA range can only be considered released
747 // after UvmUnregisterChannel has been called on all channels under that TSG.
748 //
749 // Arguments:
750 //     platformParams: (INPUT)
751 //         On Linux: RM ctrl fd, hClient and hChannel.
752 //
753 // Error codes:
754 //     NV_ERR_INVALID_CHANNEL:
755 //         The given channel identifier was not registered.
756 //
757 //     NV_ERR_INVALID_ARGUMENT:
758 //         Some problem with the platform specific arguments was detected.
759 //
760 //     NV_ERR_GENERIC:
761 //         Unexpected error. We try hard to avoid returning this error code,
762 //         because it is not very informative.
763 //
764 //------------------------------------------------------------------------------
765 NV_STATUS UvmUnregisterChannel(const UvmChannelPlatformParams *platformParams);
766 
767 //------------------------------------------------------------------------------
768 // UvmReserveVa
769 //
770 // Reserves VA space on the CPU for future use. Multiple, non-contiguous VA
771 // ranges can be reserved via this API.
772 //
773 // The starting address for the VA reservation can be either explicitly
774 // specified or left NULL to let the API implementation select one. When the
775 // starting address is specified, it must be aligned to the smallest CPU page
776 // size. When the starting address is not specified, the bounds of the search
777 // space within which the VA range should be reserved must be specified. The
778 // specified lower bound of the search space is rounded up to the nearest
779 // non-zero multiple of the requested alignment. The total size of the search
780 // space taking into consideration the rounded up lower bound cannot be less
781 // than the requested length for the VA reservation. The starting address chosen
782 // by the API implementation is guaranteed to be aligned to the requested
783 // alignment.
784 //
785 // The requested alignment must be either a power of two that is at least the
786 // smallest CPU page size or left zero to indicate default alignment which is
787 // the smallest CPU page size.
788 //
789 // The length of the VA reservation must be a multiple of the smallest CPU page
790 // size.
791 //
792 // Arguments:
793 //     base: (INPUT/OUTPUT)
794 //         Contains the starting address of the VA reservation when the call
795 //         returns successfully. If *base is NULL when this API is invoked, a VA
796 //         range that falls within the requested bounds is reserved. Note that
797 //         the lower bound will be rounded up to the nearest non-zero multiple
798 //         of the requested alignment. If *base is non-NULL when this API
799 //         is invoked, then that address is chosen as the starting address of
800 //         the VA reservation.
801 //
802 //     length: (INPUT)
803 //         Length in bytes of the region. Must be a multiple of the smallest CPU
804 //         page size.
805 //
806 //     minVa: (INPUT)
807 //         Lower limit for the search space within which the VA range must be
808 //         reserved. Will be rounded up to the nearest non-zero multiple of the
809 //         requested alignment. Ignored if *base is non-NULL when the API is
810 //         invoked.
811 //
812 //     maxVa: (INPUT)
813 //         Upper limit for the search space within which the VA range must be
814 //         reserved. Ignored if *base is non-NULL when the API is invoked.
815 //
816 //     alignment: (INPUT)
817 //         Alignment required for the starting address of the reservation. Must
818 //         either be zero to indicate default alignment which is smallest CPU
819 //         page size or a power of two that is at least the smallest CPU page
820 //         size. Ignored if *base is non-NULL when the API is invoked.
821 //
822 // Error codes:
823 //     NV_ERR_NO_MEMORY:
824 //         Either *base is NULL and no suitable VA reservation could be made or
825 //         some other internal memory allocation failed.
826 //
827 //     NV_ERR_UVM_ADDRESS_IN_USE:
828 //         *base is non-NULL and reserving the VA range at that address failed.
829 //
830 //     NV_ERR_INVALID_ADDRESS:
831 //         One of the following occurred:
832 //         - base is NULL.
833 //         - *base is non-NULL and is not aligned to the smallest CPU page size.
834 //         - *base is NULL and one of the following occurred:
835 //             - the rounded up minVa is not less than maxVa.
836 //             - the region covered by the rounded up minVa and maxVa is not big
837 //               enough to contain a VA reservation of the requested length.
838 //         - alignment is non-zero and is either not a power of two or is less
839 //           than the smallest CPU size.
840 //         - length is zero or is not a multiple of the smallest CPU page size.
841 //
842 //     NV_ERR_GENERIC:
843 //         Unexpected error. We try hard to avoid returning this error code,
844 //         because it is not very informative.
845 //
846 //------------------------------------------------------------------------------
847 NV_STATUS UvmReserveVa(void     **base,
848                        NvLength   length,
849                        void      *minVa,
850                        void      *maxVa,
851                        NvLength   alignment);
852 
853 //------------------------------------------------------------------------------
854 // UvmReleaseVa
855 //
856 // Releases all pages within the VA range. If any of the pages were committed,
857 // they are automatically decomitted as well.
858 //
859 // The release may encompass more than a single reserve VA or commit call, but
860 // must not partially release any regions that were either reserved or
861 // committed previously.
862 //
863 // Arguments:
864 //     base: (INPUT)
865 //         Base address (starting point) of the VA (virtual address) range.
866 //
867 //     length: (INPUT)
868 //         Length, in bytes, of the range.
869 //
870 // Error codes:
871 //     NV_ERR_NO_MEMORY:
872 //         Internal memory allocation failed. There is likely more than one
873 //         possible cause of this error.
874 //
875 //     NV_ERR_INVALID_ADDRESS:
876 //         base and length are not properly aligned or the range was not
877 //         previously reserved via UvmReserveVa.
878 //
879 //     NV_ERR_GENERIC:
880 //         Unexpected error. We try hard to avoid returning this error code,
881 //         because it is not very informative.
882 //
883 //------------------------------------------------------------------------------
884 NV_STATUS UvmReleaseVa(void     *base,
885                        NvLength  length);
886 
887 //------------------------------------------------------------------------------
888 // UvmCreateRangeGroup
889 //
890 // Creates a new range group. Virtual address ranges can be associated with
891 // this range group as outlined in UvmSetRangeGroup.
892 //
893 // Arguments:
894 //     rangeGroupId: (OUTPUT)
895 //         Id of the newly created range group.
896 //
897 // Error codes:
898 //     NV_ERR_NO_MEMORY:
899 //         Internal memory allocation failed.
900 //
901 //     NV_ERR_INVALID_ARGUMENT:
902 //         A NULL pointer was passed in the rangeGroupId argument.
903 //
904 //     NV_ERR_GENERIC:
905 //         Unexpected error. We try hard to avoid returning this error code,
906 //         because it is not very informative.
907 //
908 //------------------------------------------------------------------------------
909 NV_STATUS UvmCreateRangeGroup(NvU64 *rangeGroupId);
910 
911 //------------------------------------------------------------------------------
912 // UvmDestroyRangeGroup
913 //
914 // Destroys a previously created range group. If there are any pages associated
915 // with this range group, that association is cleared. i.e. the behavior is the
916 // same as associating those pages with UVM_RANGE_GROUP_ID_NONE via a call to
917 // UvmSetRangeGroup.
918 //
919 // Arguments:
920 //     rangeGroupId: (INPUT)
921 //         Id of the range group to be destroyed.
922 //
923 // Error codes:
924 //     NV_ERR_OBJECT_NOT_FOUND:
925 //         rangeGroupId was not created by a previous call to
926 //         UvmCreateRangeGroup.
927 //
928 //     NV_ERR_GENERIC:
929 //         Unexpected error. We try hard to avoid returning this error code,
930 //         because it is not very informative.
931 //
932 //------------------------------------------------------------------------------
933 NV_STATUS UvmDestroyRangeGroup(NvU64 rangeGroupId);
934 
935 //------------------------------------------------------------------------------
936 // UvmSetRangeGroup
937 //
938 // Associates the pages in a virtual address (VA) range with the specified
939 // range group. The base address and length of the VA range must be aligned to
940 // the smallest page size supported by the CPU. If any pages in that VA range
941 // were associated with another range group, that association is changed to
942 // this range group. The VA range must have been allocated via either UvmAlloc
943 // or UvmMemMap.
944 //
945 // If the range group was made non-migratable by a previous call to
946 // UvmPreventMigrationRangeGroups, then all pages in the VA range are migrated
947 // to their preferred location if they are not already located there. If any
948 // page does not have a preferred location or if the preferred location is a
949 // fault-capable GPU, an error is returned.
950 //
951 // If rangeGroupId is UVM_RANGE_GROUP_ID_NONE, then all pages in the VA range
952 // will have their range group association removed.
953 //
954 // Arguments:
955 //     base: (INPUT)
956 //         Base address of the virtual address range.
957 //
958 //     length: (INPUT)
959 //         Length, in bytes, of the range.
960 //
961 //     rangeGroupId: (INPUT)
962 //         Id of the range group to associate the VA range with.
963 //
964 // Errors:
965 //     NV_ERR_NO_MEMORY:
966 //         Internal memory allocation failed.
967 //
968 //     NV_ERR_INVALID_ADDRESS:
969 //         base and length are not properly aligned or don't represent a valid
970 //         address range.
971 //
972 //     NV_ERR_INVALID_DEVICE:
973 //         The range group is non-migratable and at least one page in the VA
974 //         range either does not have a preferred location or its preferred
975 //         location is a fault-capable GPU.
976 //
977 //     NV_ERR_OBJECT_NOT_FOUND:
978 //         rangeGroupId was not created by a previous call to
979 //         UvmCreateRangeGroup.
980 //
981 //     NV_ERR_GENERIC:
982 //         Unexpected error. We try hard to avoid returning this error code,
983 //         because it is not very informative.
984 //
985 //------------------------------------------------------------------------------
986 NV_STATUS UvmSetRangeGroup(void     *base,
987                            NvLength  length,
988                            NvU64     rangeGroupId);
989 
990 //------------------------------------------------------------------------------
991 // UvmPreventMigrationRangeGroups
992 //
993 // Migrates all pages associated with the specified range groups to their
994 // preferred location and prevents them from being migrated on faults from
995 // either the CPU or the GPU. Any unpopulated pages are populated at the
996 // preferred location. If any page does not have a preferred location or if the
997 // preferred location is a fault-capable GPU, an error is returned. All the
998 // specified range groups must be valid range groups allocated using
999 // UvmCreateRangeGroup.
1000 //
1001 // All pages associated with the specified range groups are mapped at the
1002 // preferred location and from all the GPUs present in the accessed-by list of
1003 // those pages, provided establishing a mapping is possible. If any page
1004 // associated with any of the specified range groups has a preferred location
1005 // set to a non-fault-capable GPU, and another non-fault-capable GPU is in the
1006 // accessed-by list of the page but P2P support between both GPUs is not
1007 // enabled, an error is returned.
1008 //
1009 // GPUs are allowed to map any pages belonging to these range groups on faults.
1010 // If establishing such a mapping is not possible, the fault is fatal.
1011 //
1012 // Existing CPU mappings to any pages belonging to these range groups are
1013 // revoked, even if the pages are in system memory and even if the CPU is in
1014 // the accessed-by list of those pages. The CPU is not allowed to map these
1015 // pages on faults even if they are located in system memory and so, CPU faults
1016 // to these pages are always fatal.
1017 //
1018 // Multiple calls to UvmPreventMigrationRangeGroups are not refcounted. i.e.
1019 // calling UvmPreventMigrationRangeGroups on a range group on which
1020 // UvmPreventMigrationRangeGroups has already been called results in a no-op.
1021 //
1022 // Arguments:
1023 //     rangeGroupIds: (INPUT)
1024 //         An array of range group IDs.
1025 //
1026 //     numGroupIds: (INPUT)
1027 //         Number of items in the rangeGroupIds array.
1028 //
1029 // Errors:
1030 //     NV_ERR_NO_MEMORY:
1031 //         Internal memory allocation failed.
1032 //
1033 //     NV_ERR_OBJECT_NOT_FOUND:
1034 //         One or more rangeGroupIds was not found.
1035 //
1036 //     NV_ERR_INVALID_ARGUMENT:
1037 //         A NULL pointer was passed in for rangeGroupIds or numGroupIds was
1038 //         zero.
1039 //
1040 //     NV_ERR_INVALID_DEVICE:
1041 //         At least one page in one of the VA ranges associated with these range
1042 //         groups does not have a preferred location or its preferred location
1043 //         is a fault-capable GPU. Or the preferred location has been set to a
1044 //         non-fault-capable GPU, and another non-fault-capable GPU is present
1045 //         in the accessed-by list of a page but P2P support between both GPUs
1046 //         has not been enabled.
1047 //
1048 //     NV_ERR_GENERIC:
1049 //         Unexpected error. We try hard to avoid returning this error code,
1050 //         because it is not very informative.
1051 //
1052 //------------------------------------------------------------------------------
1053 NV_STATUS UvmPreventMigrationRangeGroups(const NvU64 *rangeGroupIds,
1054                                          NvLength     numGroupIds);
1055 
1056 //------------------------------------------------------------------------------
1057 // UvmAllowMigrationRangeGroups
1058 //
1059 // Undoes the effect of UvmPreventMigrationRangeGroups. Pages associated with
1060 // these range groups are now allowed to migrate at any time, and CPU or GPU
1061 // faults to these pages are no longer fatal. All the specified range groups
1062 // must be valid range groups allocated using UvmCreateRangeGroup.
1063 //
1064 // Multiple calls to UvmAllowMigrationRangeGroups are not refcounted. i.e.
1065 // calling UvmAllowMigrationRangeGroups on a range group on which
1066 // UvmAllowMigrationRangeGroups has already been called results in a no-op.
1067 //
1068 // Arguments:
1069 //     rangeGroupIds: (INPUT)
1070 //         An array of range group IDs.
1071 //
1072 //     numGroupIds: (INPUT)
1073 //         Number of items in the rangeGroupIds array.
1074 //
1075 // Errors:
1076 //     NV_ERR_OBJECT_NOT_FOUND:
1077 //         One or more rangeGroupIds was not found.
1078 //
1079 //     NV_ERR_INVALID_ARGUMENT:
1080 //         A NULL pointer was passed in for rangeGroupIds or numGroupIds was
1081 //         zero.
1082 //
1083 //     NV_ERR_GENERIC:
1084 //         Unexpected error. We try hard to avoid returning this error code,
1085 //         because it is not very informative.
1086 //
1087 //------------------------------------------------------------------------------
1088 NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds,
1089                                        NvLength     numGroupIds);
1090 
1091 //------------------------------------------------------------------------------
1092 // UvmAlloc
1093 //
1094 // Creates a new mapping in the virtual address space of the process, populates
1095 // it at the specified preferred location, maps it on the provided list of
1096 // processors if feasible and associates the range with the given range group.
1097 //
1098 // This API is equivalent to the following code sequence:
1099 //     UvmMemMap(base, length);
1100 //     UvmSetPreferredLocation(base, length, preferredLocationUuid);
1101 //     for (i = 0; i < accessedByCount; i++) {
1102 //         UvmSetAccessedBy(base, length, &accessedByUuids[i]);
1103 //     }
1104 //     UvmSetRangeGroup(base, length, rangeGroupId);
1105 //     UvmMigrate(base, length, preferredLocationUuid, 0);
1106 //
1107 // Please see those APIs for further details on their behavior. If an error is
1108 // encountered during any part of the sequence, the completed portion will be
1109 // undone.
1110 //
1111 // The VA range can be unmapped and freed via a call to UvmFree.
1112 //
1113 // Arguments:
1114 //     base: (INPUT)
1115 //         Base address of the virtual address range.
1116 //
1117 //     length: (INPUT)
1118 //         Length, in bytes, of the range.
1119 //
1120 //     preferredLocationUuid: (INPUT)
1121 //         UUID of the preferred location for this VA range.
1122 //
1123 //     accessedByUuids: (INPUT)
1124 //         UUIDs of all processors that should have persistent mappings to this
1125 //         VA range.
1126 //
1127 //     accessedByCount: (INPUT)
1128 //         Number of elements in the accessedByUuids array.
1129 //
1130 //     rangeGroupId: (INPUT)
1131 //         ID of the range group to associate this VA range with.
1132 //
1133 // Errors:
1134 //     NV_ERR_UVM_ADDRESS_IN_USE:
1135 //         The requested address range overlaps with an existing allocation.
1136 //
1137 //     NV_ERR_INVALID_ADDRESS:
1138 //         base and length are not properly aligned or the range was not
1139 //         previously reserved via UvmReserveVa.
1140 //
1141 //     NV_ERR_INVALID_DEVICE:
1142 //         Either preferredLocationUuid or one of the UUIDs in the
1143 //         accessedByUuids array was not registered or the UUID represents a GPU
1144 //         that has no VA space registered for it.
1145 //
1146 //     NV_ERR_OBJECT_NOT_FOUND:
1147 //         rangeGroupId was not found.
1148 //
1149 //     NV_ERR_NO_MEMORY:
1150 //         Internal memory allocation failed.
1151 //
1152 //     NV_ERR_GENERIC:
1153 //         Unexpected error. We try hard to avoid returning this error code,
1154 //         because it is not very informative.
1155 //
1156 //------------------------------------------------------------------------------
1157 NV_STATUS UvmAlloc(void                  *base,
1158                    NvLength               length,
1159                    const NvProcessorUuid *preferredLocationUuid,
1160                    const NvProcessorUuid *accessedByUuids,
1161                    NvLength               accessedByCount,
1162                    NvU64                  rangeGroupId);
1163 
1164 //------------------------------------------------------------------------------
1165 // UvmFree
1166 //
1167 // Frees a VA range previously allocated via one of the UVM allocator APIs,
1168 // namely either UvmAlloc, UvmMemMap, UvmCreateExternalRange,
1169 // UvmMapDynamicParallelismRegion or UvmAllocSemaphorePool.
1170 //
1171 // For VA ranges allocated via UvmAlloc, UvmMemMap or UvmAllocSemaphorePool, all
1172 // CPU and GPU page table mappings are cleared and all allocated pages are
1173 // freed.
1174 //
1175 // For VA ranges allocated via UvmCreateExternalRange, all GPU page table
1176 // mappings are cleared. No CPU page table mappings for this range are affected,
1177 // and no physical pages for this range are freed.
1178 //
1179 // For VA ranges allocated via UvmMapDynamicParallelismRegion, all GPU page
1180 // table mappings are cleared. No CPU page table mappings for this range are
1181 // affected.
1182 //
1183 // The base address of the VA range to be freed must match the base address used
1184 // when allocating the range. If the VA range came from a region previously
1185 // reserved via UvmReserveVa, then this VA range is put back in the reserved
1186 // state.
1187 //
1188 // Note that the reason this API does not take a length argument is because this
1189 // API is modeled after the C library free() API. Partial frees are not allowed
1190 // and the UVM usermode layer tracks the base and length of each allocated
1191 // range, so having a length argument would be redundant. This also eliminates
1192 // the need for the caller to track the length of each allocation.
1193 //
1194 // Arguments:
1195 //     base: (INPUT)
1196 //         Starting address of the range to be freed. This must be match an
1197 //         address that was obtained via a UVM allocator API.
1198 //
1199 // Errors:
1200 //     NV_ERR_INVALID_ADDRESS:
1201 //         base does not match an address that was passed into a UVM allocator
1202 //         API.
1203 //
1204 //     NV_ERR_GENERIC:
1205 //         Unexpected error. We try hard to avoid returning this error code,
1206 //         because it is not very informative.
1207 //
1208 //------------------------------------------------------------------------------
1209 NV_STATUS UvmFree(void *base);
1210 
1211 //------------------------------------------------------------------------------
1212 // UvmCleanUpZombieResources
1213 //
1214 // Clean up resources left by processes that specify
1215 // UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE. Resources not freed before
1216 // termination by such processes are not immediately freed by UVM if another
1217 // processes is using the same UVM file.
1218 //
1219 // Errors:
1220 //     NV_ERR_GENERIC:
1221 //         Unexpected error. We try hard to avoid returning this error code,
1222 //         because it is not very informative.
1223 //
1224 //------------------------------------------------------------------------------
1225 NV_STATUS UvmCleanUpZombieResources(void);
1226 
1227 //------------------------------------------------------------------------------
1228 // UvmAllocSemaphorePool
1229 //
1230 // Allocates memory from which semaphores can be suballocated and used to order
1231 // work between UVM and CUDA as described in UvmMigrateAsync.
1232 //
1233 // The virtual address range specified by (base, length) must have been
1234 // previously reserved via a call to UvmReserveVa. Both base and length must be
1235 // aligned to the smallest page size supported by the CPU.
1236 //
1237 // The pages are populated in CPU memory and zero initialized. They are mapped
1238 // on the CPU and in all registered GPU VA spaces. They will also be mapped in
1239 // any GPU VA spaces registered after this call. The pages are non-migratable
1240 // and the GPU mappings are persistent, which makes them safe to access from
1241 // non-fault-capable HW engines.
1242 //
1243 // By default, all mappings to this VA range have read, write and atomic access
1244 // and are uncached. This behavior can be overridden for GPUs by explicitly
1245 // specifying the mapping and caching attributes through this API. At most one
1246 // GPU may cache the allocation, in which case no other processor should write
1247 // to it. These GPUs must have been registered via UvmRegisterGpu. These GPUs
1248 // do not need to have a GPU VA space registered at the time of this API call.
1249 // Overriding default mapping and caching attributes for the CPU is disallowed.
1250 // If a new GPU is registered or a currently registered GPU is unregistered via
1251 // UvmUnregisterGpu and then re-registered, default mapping and caching
1252 // attributes will be applied for that GPU.
1253 //
1254 // The VA range must lie within the largest possible virtual address supported
1255 // by all GPUs that currently have a GPU VA space registered for them. Also, if
1256 // a GPU VA space is registered in the future for a GPU which is unable to map
1257 // this allocation, that GPU VA space registration will fail.
1258 //
1259 // The pages in this VA range cannot be associated with range groups, cannot be
1260 // the target for read duplication, cannot have a preferred location set, and
1261 // cannot have any accessed-by processors.
1262 //
1263 // The VA range can be unmapped and freed via a call to UvmFree.
1264 //
1265 // Arguments:
1266 //     base: (INPUT)
1267 //         Base address of the virtual address range.
1268 //
1269 //     length: (INPUT)
1270 //         Length, in bytes, of the range.
1271 //
1272 //     perGpuAttribs: (INPUT)
1273 //         List of per GPU mapping and caching attributes. GPUs not in the list
1274 //         are mapped with default attributes.
1275 //
1276 //     gpuAttribsCount: (INPUT)
1277 //         Number of entries in the perGpuAttribs array.
1278 //
1279 // Errors:
1280 //     NV_ERR_UVM_ADDRESS_IN_USE:
1281 //         The requested address range overlaps with an existing allocation.
1282 //
1283 //     NV_ERR_INVALID_ADDRESS:
1284 //         base and length are not properly aligned or the range was not
1285 //         previously reserved via UvmReserveVa.
1286 //
1287 //     NV_ERR_OUT_OF_RANGE:
1288 //         The VA range exceeds the largest virtual address supported by one or
1289 //         more registered GPUs.
1290 //
1291 //     NV_ERR_INVALID_DEVICE:
1292 //         At least one of the UUIDs in the perGpuAttribs list was either not
1293 //         registered or is the UUID of the CPU.
1294 //
1295 //     NV_ERR_NO_MEMORY:
1296 //         Internal memory allocation failed.
1297 //
1298 //     NV_ERR_INVALID_ARGUMENT:
1299 //         perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa,
1300 //         or caching is requested on more than one GPU.
1301 //
1302 //     NV_ERR_NOT_SUPPORTED:
1303 //         The current process is not the one which called UvmInitialize, and
1304 //         UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE was not specified to
1305 //         UvmInitialize.
1306 //
1307 //     NV_ERR_GENERIC:
1308 //         Unexpected error. We try hard to avoid returning this error code,
1309 //         because it is not very informative.
1310 //
1311 //------------------------------------------------------------------------------
1312 NV_STATUS UvmAllocSemaphorePool(void                          *base,
1313                                 NvLength                       length,
1314                                 const UvmGpuMappingAttributes *perGpuAttribs,
1315                                 NvLength                       gpuAttribsCount);
1316 
1317 //------------------------------------------------------------------------------
1318 // UvmMigrate
1319 //
1320 // Migrates the backing of a given virtual address range to the specified
1321 // destination processor. If any page in the VA range is unpopulated, it is
1322 // populated at the destination processor. The migrated pages in the VA range
1323 // are also mapped on the destination processor.
1324 //
1325 // Both base and length must be aligned to the smallest page size supported by
1326 // the CPU. The VA range must lie within the largest possible virtual address
1327 // supported by the specified processor.
1328 //
1329 // The virtual address range specified by (base, length) must have been
1330 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
1331 // system-allocated pageable memory.
1332 //
1333 // If the input virtual range corresponds to system-allocated pageable memory,
1334 // and there is at least one GPU in the system that supports transparent access
1335 // to pageable memory, the behavior described in the next paragraphs does not
1336 // take effect. Instead, the driver will first populate any unpopulated pages
1337 // according to the memory policy defined by the calling process and address
1338 // range. Then, pages will be migrated to the requested processor. If the
1339 // destination processor is the CPU, and the memory policy has not defined
1340 // preferred CPU memory nodes or the given preferredCpuMemoryNode is in the
1341 // mask of preferred memory nodes, the driver will try to migrate memory to
1342 // preferredCpuMemoryNode first, and will fallback to the rest of CPU the nodes
1343 // if it doesn't succeed. If pages were already resident on any CPU memory node,
1344 // they will not be migrated.
1345 //
1346 // If the input virtual range corresponds to system-allocated pageable memory,
1347 // and UvmIsPageableMemoryAccessSupported reports that pageable memory access
1348 // is supported, then the driver will populate any unpopulated pages at the
1349 // destination processor and migrate the data from any source location to the
1350 // destination. Pages in the VA range are migrated even if their preferred
1351 // location is set to a processor other than the destination processor.
1352 // If the accessed-by list of any of the pages in the VA range is not empty,
1353 // then mappings to those pages from all the appropriate processors are updated
1354 // to refer to the new location if establishing such a mapping is possible.
1355 // Otherwise, those mappings are cleared.
1356 // Note that in this case, software managed pageable memory does not support
1357 // migration of MAP_SHARED, file-backed, or PROT_NONE mappings.
1358 //
1359 // If any pages in the given VA range are associated with a range group which
1360 // has been made non-migratable via UvmPreventMigrationRangeGroups, then those
1361 // pages are not migrated and the mappings on the destination processor for
1362 // those pages are left unmodified. If the VA range is associated with a
1363 // migratable range group and the destination processor is a non-fault-capable
1364 // GPU, then an error is returned if that GPU is in the accessed-by list of the
1365 // VA range but that GPU is not the preferred location.
1366 //
1367 // If read duplication is enabled on any pages in the VA range, then those pages
1368 // are read duplicated at the destination processor, leaving the source copy, if
1369 // present, intact with only its mapping changed to read-only if it wasn't
1370 // already mapped that way.
1371 //
1372 // Pages in the VA range are migrated even if their preferred location is set to
1373 // a processor other than the destination processor.
1374 //
1375 // If the accessed-by list of any of the pages in the VA range is not empty,
1376 // then mappings to those pages from all the appropriate processors are updated
1377 // to refer to the new location if establishing such a mapping is possible.
1378 // Otherwise, those mappings are cleared.
1379 //
1380 // If fewer than the number of requested pages were migrated,
1381 // NV_WARN_MORE_PROCESSING_REQUIRED is returned. An example scenario where this
1382 // could happen is when UvmPreventMigrationRangeGroups has been called on a
1383 // range group associated with some pages in this range. If fewer than the
1384 // number of requested pages were migrated due to insufficient memory to
1385 // allocate physical pages or page tables, then NV_ERR_NO_MEMORY is returned.
1386 //
1387 // Arguments:
1388 //     base: (INPUT)
1389 //         Base address of the virtual address range.
1390 //
1391 //     length: (INPUT)
1392 //         Length, in bytes, of the range.
1393 //
1394 //     destinationUuid: (INPUT)
1395 //         UUID of the destination processor to migrate pages to.
1396 //
1397 //     preferredCpuMemoryNode: (INPUT)
1398 //         Preferred CPU NUMA memory node used if the destination processor is
1399 //         the CPU. This argument is ignored if the given virtual address range
1400 //         corresponds to managed memory.
1401 //
1402 // Error codes:
1403 //     NV_ERR_INVALID_ADDRESS:
1404 //         base and length are not properly aligned, or the range does not
1405 //         represent a migratable allocation created via UvmMemMap, or the
1406 //         range is pageable memory and the system does not support accessing
1407 //         pageable memory, or the range does not represent a supported
1408 //         Operating System allocation.
1409 //
1410 //     NV_ERR_OUT_OF_RANGE:
1411 //         The VA range exceeds the largest virtual address supported by the
1412 //         destination processor.
1413 //
1414 //     NV_ERR_INVALID_DEVICE:
1415 //         destinationUuid does not represent a valid processor such as a CPU or
1416 //         a GPU with a GPU VA space registered for it. Or destinationUuid is a
1417 //         non-fault-capable GPU, and that GPU is present in the accessed-by
1418 //         list of the VA range but that GPU is not the preferred location.
1419 //
1420 //     NV_ERR_NO_MEMORY:
1421 //         There was insufficient memory to allocate physical pages or page
1422 //         tables to complete the migration. Or internal memory allocation
1423 //         failed.
1424 //
1425 //     NV_ERR_NOT_SUPPORTED:
1426 //         The UVM file descriptor is associated with another process and the
1427 //         input virtual range corresponds to system-allocated pageable memory
1428 //         that cannot be migrated from this process.
1429 //
1430 //     NV_ERR_GENERIC:
1431 //         Unexpected error. We try hard to avoid returning this error code,
1432 //         because it is not very informative.
1433 //
1434 //     NV_WARN_MORE_PROCESSING_REQUIRED:
1435 //         Fewer than the number of requested pages were migrated because some
1436 //         pages were associated with a non-migratable range group.
1437 //
1438 //------------------------------------------------------------------------------
1439 #if UVM_API_REV_IS_AT_MOST(5)
1440 NV_STATUS UvmMigrate(void                  *base,
1441                      NvLength               length,
1442                      const NvProcessorUuid *destinationUuid);
1443 #else
1444 NV_STATUS UvmMigrate(void                  *base,
1445                      NvLength               length,
1446                      const NvProcessorUuid *destinationUuid,
1447                      NvU32                  preferredCpuMemoryNode);
1448 #endif
1449 
1450 //------------------------------------------------------------------------------
1451 // UvmMigrateAsync
1452 //
1453 // Migrates the backing of a given virtual address range to the specified
1454 // destination processor. The behavior of this API is exactly the same as that
1455 // of UvmMigrate except for the differences outlined below.
1456 //
1457 // When this call returns NV_OK, the migration operation is considered to be
1458 // in-flight and can be synchronized upon by waiting for the specified payload
1459 // to be written at the given semaphore address. The semaphore address must be
1460 // 4-byte aligned and must fall within a VA range allocated using
1461 // UvmAllocSemaphorePool. It is up to the caller to ensure that the payload has
1462 // been written before reusing the address in a subsequent UvmMigrateAsync call.
1463 // Specifying a semaphore address is optional. If the semaphore address is NULL
1464 // the payload must be zero.
1465 //
1466 // The API makes no guarantees about how many pages will be migrated, and there
1467 // is no provision to detect errors that occur during the in-flight operations.
1468 // However, the API does guarantee that the semaphore will eventually be
1469 // released regardless of errors during in-flight operations, as long as the API
1470 // call itself returned NV_OK.
1471 //
1472 // Arguments:
1473 //     base: (INPUT)
1474 //         Base address of the virtual address range.
1475 //
1476 //     length: (INPUT)
1477 //         Length, in bytes, of the range.
1478 //
1479 //     destinationUuid: (INPUT)
1480 //         UUID of the destination processor to migrate pages to.
1481 //
1482 //     preferredCpuMemoryNode: (INPUT)
1483 //         Preferred CPU NUMA memory node used if the destination processor is
1484 //         the CPU. This argument is ignored if the given virtual address range
1485 //         corresponds to managed memory.
1486 //
1487 //     semaphoreAddress: (INPUT)
1488 //         Base address of the semaphore.
1489 //
1490 //     semaphorePayload: (INPUT)
1491 //         Payload to be written at semaphoreAddress when the operation
1492 //         completes. Must be zero if semaphoreAddress is NULL.
1493 //
1494 // Error codes:
1495 //     NV_ERR_INVALID_ADDRESS:
1496 //         base and length are not properly aligned, or the range does not
1497 //         represent a migratable allocation created via UvmMemMap, or the
1498 //         range is pageable memory and the system does not support accessing
1499 //         pageable memory, or the range does not represent a supported
1500 //         Operating System allocation, or the semaphoreAddress isn't properly
1501 //         aligned, or isn't suballocated from a semaphore pool.
1502 //
1503 //     NV_ERR_OUT_OF_RANGE:
1504 //         The VA range exceeds the largest virtual address supported by the
1505 //         destination processor.
1506 //
1507 //     NV_ERR_INVALID_DEVICE:
1508 //         destinationUuid does not represent a valid processor such as a CPU or
1509 //         a GPU with a GPU VA space registered for it. Or destinationUuid is a
1510 //         non-fault-capable GPU, and that GPU is present in the accessed-by
1511 //         list of the VA range but that GPU is not the preferred location.
1512 //
1513 //     NV_ERR_INVALID_ARGUMENT:
1514 //         semaphoreAddress is NULL and semaphorePayload is not zero.
1515 //
1516 //     NV_ERR_NO_MEMORY:
1517 //         There was insufficient memory to allocate physical pages or page
1518 //         tables to complete the migration. Or internal memory allocation
1519 //         failed.
1520 //
1521 //     NV_ERR_GENERIC:
1522 //         Unexpected error. We try hard to avoid returning this error code,
1523 //         because it is not very informative.
1524 //
1525 //     NV_WARN_MORE_PROCESSING_REQUIRED:
1526 //         Fewer than the number of requested pages were migrated because some
1527 //         pages were associated with a non-migratable range group.
1528 //
1529 //------------------------------------------------------------------------------
1530 #if UVM_API_REV_IS_AT_MOST(5)
1531 NV_STATUS UvmMigrateAsync(void                  *base,
1532                           NvLength               length,
1533                           const NvProcessorUuid *destinationUuid,
1534                           void                  *semaphoreAddress,
1535                           NvU32                  semaphorePayload);
1536 #else
1537 NV_STATUS UvmMigrateAsync(void                  *base,
1538                           NvLength               length,
1539                           const NvProcessorUuid *destinationUuid,
1540                           NvU32                  preferredCpuMemoryNode,
1541                           void                  *semaphoreAddress,
1542                           NvU32                  semaphorePayload);
1543 #endif
1544 
1545 //------------------------------------------------------------------------------
1546 // UvmMigrateRangeGroup
1547 //
1548 // Migrates the backing of all virtual address ranges associated with the given
1549 // range group to the specified destination processor. The behavior of this API
1550 // is equivalent to calling UvmMigrate on each VA range associated with this
1551 // range group. The value for the preferredCpuMemoryNode is irrelevant in this
1552 // case as it only applies to migrations of pageable address, which cannot be
1553 // used to create range groups.
1554 //
1555 // Any errors encountered during migration are returned immediately. No attempt
1556 // is made to migrate the remaining unmigrated ranges and the ranges that are
1557 // already migrated are not rolled back to their previous location.
1558 //
1559 // The range group id specified must have been allocated via
1560 // UvmCreateRangeGroup.
1561 //
1562 // Arguments:
1563 //     rangeGroupId: (INPUT)
1564 //         Id of the range group whose associated VA ranges have to be migrated.
1565 //
1566 //     destinationUuid: (INPUT)
1567 //         UUID of the destination processor to migrate pages to.
1568 //
1569 // Error codes:
1570 //     NV_ERR_OBJECT_NOT_FOUND:
1571 //         Either UVM_RANGE_GROUP_ID_NONE was specified or the rangeGroupId was
1572 //         not found.
1573 //
1574 //     NV_ERR_INVALID_DEVICE:
1575 //         destinationUuid does not represent a valid processor such as a CPU or
1576 //         a GPU with a GPU VA space registered for it.
1577 //
1578 //     NV_ERR_NO_MEMORY:
1579 //         Internal memory allocation failed.
1580 //
1581 //     NV_ERR_OUT_OF_RANGE:
1582 //         One or more of the VA ranges exceeds the largest virtual address
1583 //         supported by the destination processor.
1584 //
1585 //     NV_ERR_GENERIC:
1586 //         Unexpected error. We try hard to avoid returning this error code,
1587 //         because it is not very informative.
1588 //
1589 //     NV_WARN_MORE_PROCESSING_REQUIRED:
1590 //         Fewer than requested pages were migrated because for example, the
1591 //         range group was non-migratable.
1592 //
1593 //------------------------------------------------------------------------------
1594 NV_STATUS UvmMigrateRangeGroup(NvU64                  rangeGroupId,
1595                                const NvProcessorUuid *destinationUuid);
1596 
1597 //------------------------------------------------------------------------------
1598 // UvmPopulatePageable
1599 //
1600 // Forces the population of the given virtual address range. Memory will be
1601 // populated by the system according to the memory policy defined by the calling
1602 // process and address range.
1603 //
1604 // This function only supports pageable memory. None of the pages within the
1605 // virtual address range specified by (base, length) may belong to a virtual
1606 // address range allocated or registered using any of the UVM
1607 // allocation/mapping APIs. Also, all pages must be mapped with at least read
1608 // permissions.
1609 //
1610 // If fewer than the number of requested pages were populated, NV_ERR_NO_MEMORY
1611 // is returned.
1612 //
1613 // Arguments:
1614 //     base: (INPUT)
1615 //         Base address of the virtual address range.
1616 //
1617 //     length: (INPUT)
1618 //         Length, in bytes, of the range.
1619 //
1620 // Errors:
1621 //     NV_ERR_INVALID_ADDRESS:
1622 //         base and length are not properly aligned, the range does not
1623 //         represent a supported Operating System allocation, or the range
1624 //         contains pages not mapped with at least read permissions.
1625 //
1626 //     NV_ERR_NO_MEMORY:
1627 //         Fewer than the number of requested pages were populated, likely
1628 //         because the system ran out of memory.
1629 //
1630 //     NV_ERR_GENERIC:
1631 //         Unexpected error. We try hard to avoid returning this error code,
1632 //         because it is not very informative.
1633 //
1634 //------------------------------------------------------------------------------
1635 NV_STATUS UvmPopulatePageable(void     *base,
1636                               NvLength  length);
1637 
1638 //------------------------------------------------------------------------------
1639 // UvmMemMap
1640 //
1641 // Creates a new mapping in the virtual address space of the process that is
1642 // valid for access from any fault-capable CPU or GPU.
1643 //
1644 // The virtual address range specified by (base, length) must have been
1645 // previously reserved via a call to UvmReserveVa. Both base and length must be
1646 // aligned to the smallest page size supported by the CPU. Note that using a
1647 // larger alignment for base and length, such as the largest GPU page size, may
1648 // result in higher performance.
1649 //
1650 // The pages in the VA range are zero initialized. They are typically populated
1651 // on demand, for example, through CPU or GPU faults.
1652 //
1653 // The VA range can be unmapped and freed via a call to UvmFree.
1654 //
1655 // Arguments:
1656 //     base: (INPUT)
1657 //         Base address of the virtual address range.
1658 //
1659 //     length: (INPUT)
1660 //         Length, in bytes, of the range.
1661 //
1662 // Errors:
1663 //     NV_ERR_UVM_ADDRESS_IN_USE:
1664 //         The requested address range overlaps with an existing allocation.
1665 //
1666 //     NV_ERR_INVALID_ADDRESS:
1667 //         base and length are not properly aligned or the range was not
1668 //         previously reserved via UvmReserveVa.
1669 //
1670 //     NV_ERR_NOT_SUPPORTED:
1671 //         The current process is not the one which called UvmInitialize, and
1672 //         UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE was not specified to
1673 //         UvmInitialize.
1674 //
1675 //     NV_ERR_GENERIC:
1676 //         Unexpected error. We try hard to avoid returning this error code,
1677 //         because it is not very informative.
1678 //
1679 //------------------------------------------------------------------------------
1680 NV_STATUS UvmMemMap(void     *base,
1681                     NvLength  length);
1682 
1683 //------------------------------------------------------------------------------
1684 // UvmCreateExternalRange
1685 //
1686 // Create a VA range within the process's address space reserved for external
1687 // allocations. The VA range is not mapped to any physical allocation at the
1688 // time of creation. Once an external VA range has been created using this API,
1689 // the user is free to map any number of physical allocations within the VA
1690 // range (see UvmMapExternalAllocation and UvmMapExternalSparse for more
1691 // details).
1692 //
1693 // The virtual address range, itself, does not impose any restrictions on the
1694 // alignment of the physical allocations mapped within it. However, both base
1695 // and length must be aligned to 4K.
1696 //
1697 // The VA range must not overlap with an existing VA range, irrespective of
1698 // whether the existing range corresponds to a UVM allocation or an external
1699 // allocation.
1700 //
1701 // It is allowed (but not required) for the VA range to come from a region
1702 // previously reserved via UvmReserveVa.
1703 //
1704 // Any mappings created within this VA range are considered non-migratable.
1705 // Consequently, pages cannot be associated with range groups, cannot be
1706 // the target for read duplication, cannot have a preferred location set,
1707 // cannot have any accessed-by processors, and any GPU faults within this range
1708 // are fatal.
1709 //
1710 // Mappings within this range neither create nor modify any CPU mappings, even
1711 // if the mappings came from a region previously reserved via UvmReserveVa.
1712 // This implies that CPU accesses to any mappings within this range will cause
1713 // a fatal fault if it's not mapped.
1714 //
1715 // The VA range is not reclaimed until UvmFree is called on it even if it is
1716 // fully unmapped from all GPUs either explicitly via UvmUnmapExternal or
1717 // implicitly via APIs such as UvmUnregisterGpu, UvmUnregisterGpuVaSpace,
1718 // UvmDisablePeerAccess, etc.
1719 //
1720 // Arguments:
1721 //     base: (INPUT)
1722 //         Base address of the virtual address range.
1723 //
1724 //     length: (INPUT)
1725 //         Length, in bytes, of the range.
1726 //
1727 // Errors:
1728 //     NV_ERR_INVALID_ADDRESS:
1729 //         base is NULL or length is zero or at least one of base and length is
1730 //         not aligned to 4K.
1731 //
1732 //     NV_ERR_UVM_ADDRESS_IN_USE:
1733 //         The requested address range overlaps with an existing allocation.
1734 //
1735 //     NV_ERR_NO_MEMORY:
1736 //         Internal memory allocation failed.
1737 //
1738 //------------------------------------------------------------------------------
1739 NV_STATUS UvmCreateExternalRange(void     *base,
1740                                  NvLength  length);
1741 
1742 //------------------------------------------------------------------------------
1743 // UvmMapExternalAllocation
1744 //
1745 // Maps an allocation that was allocated outside of UVM on the specified list of
1746 // GPUs. The external allocation can be unmapped from a specific GPU using
1747 // UvmUnmapExternal or from all GPUs using UvmFree.
1748 //
1749 // The virtual address range specified by (base, length) must fall within a VA
1750 // range previously created with UvmCreateExternalRange. A GPU VA space must
1751 // have been registered for each GPU in the list. The (base, length) range must
1752 // lie within the largest possible virtual address supported by the specified
1753 // GPUs.
1754 //
1755 // The page size used for the mapping is the largest supported page size less
1756 // than or equal to the alignments of base, length, offset, and the allocation
1757 // page size.
1758 //
1759 // If the range specified by (base, length) falls within any existing mappings,
1760 // the behavior is the same as if UvmUnmapExternal with the range specified by
1761 // (base, length) had been called first, provided that base and length are
1762 // aligned to the page size used for the existing one.
1763 //
1764 // If the allocation resides in GPU memory, that GPU must have been registered
1765 // via UvmRegisterGpu. If the allocation resides in GPU memory and a mapping is
1766 // requested for a different GPU, then P2P support should have been enabled via
1767 // UvmEnablePeerAccess between the two GPUs if connected by PCIe.
1768 //
1769 // The allocation can be mapped with different access permissions and
1770 // cacheability settings on different GPUs. The settings to use for each GPU are
1771 // specified in the perGpuAttribs array. It is also legal to map the allocation
1772 // multiple times on the same GPU with different access permissions and
1773 // cacheability settings as long as all of the mappings are fully contained
1774 // within the VA range. Calling this API with the same GPU appearing multiple
1775 // times in the list is equivalent to calling the API multiple times on the same
1776 // GPU.
1777 //
1778 // Access permissions control which of 3 types of accesses (reads, writes and
1779 // atomics) are allowed for this VA range. Any GPU accesses of a disallowed kind
1780 // result in a fatal fault. If UvmGpuMappingTypeDefault is specified, the UVM
1781 // driver chooses the appropriate access permissions. On non-fault-capable GPUs,
1782 // specifying either UvmGpuMappingTypeReadOnly or UvmGpuMappingTypeReadWrite is
1783 // disallowed.
1784 //
1785 // Caching can be forced on or off, or can be left to the UVM driver to manage
1786 // by specifying UvmGpuCachingTypeDefault. Specifying UvmGpuCachingTypeDefault
1787 // will result in a cached mapping only if the allocation is physically located
1788 // in that GPU's memory. Note that caching here only refers to GPU L2 caching
1789 // and not GPU L1 caching as the latter is controlled via instruction opcode
1790 // modifiers and not through page table attributes.
1791 //
1792 // Format and element bits can be forced, or can be left to the UVM driver to
1793 // manage by specifying UvmGpuFormatTypeDefault and
1794 // UvmGpuFormatElementBitsDefault respectively. UvmGpuFormatTypeDefault and
1795 // UvmGpuFormatElementBitsDefault are mutually inclusive, meaning that if one
1796 // of them is specified then the other one must be specified too.
1797 //
1798 // Compression type of the specified virtual address range can be specified with
1799 // UvmGpuCompressionType mapping attribute.
1800 //
1801 // The UVM driver retains a reference on the external allocation as long as at
1802 // least one GPU has any portion of that allocation mapped.
1803 //
1804 // The pages in this mapping are not zero initialized or modified in any way.
1805 //
1806 // Note that calling UvmUnregisterGpuVaSpace will also unmap all mappings
1807 // created via this API on the GPU that the GPU VA space is associated with.
1808 // Also, if a mapping has to be created on a GPU for a physical allocation that
1809 // resides on a PCIe peer GPU, then peer-to-peer support must have been enabled
1810 // between those two GPUs via UvmEnablePeerAccess. Disabling peer-to-peer
1811 // support via UvmDisablePeerAccess will tear down all peer mappings between the
1812 // two GPUs.
1813 //
1814 // Arguments:
1815 //     base: (INPUT)
1816 //         Base address of the virtual address range.
1817 //
1818 //     length: (INPUT)
1819 //         Length, in bytes, of the range.
1820 //
1821 //     offset: (INPUT)
1822 //         Offset, in bytes, in the physical allocation at which the VA range
1823 //         must be mapped.
1824 //
1825 //     perGpuAttribs: (INPUT)
1826 //         List of per GPU mapping and caching attributes. GPUs not in the list
1827 //         are not affected.
1828 //
1829 //     gpuAttribsCount: (INPUT)
1830 //         Number of entries in the perGpuAttribs array.
1831 //
1832 //     platformParams: (INPUT)
1833 //         Platform specific parameters that identify the allocation.
1834 //         On Linux: RM ctrl fd, hClient and hMemory.
1835 //
1836 // Errors:
1837 //     NV_ERR_INVALID_ADDRESS:
1838 //         One of the following occurred:
1839 //         - base is NULL.
1840 //         - length is zero.
1841 //         - The requested address range does not fall entirely within an
1842 //           existing external VA range created with a single call to
1843 //           UvmCreateExternalRange.
1844 //         - The mapping page size allowed by the alignments of base, length,
1845 //           and offset is smaller than the minimum supported page size on the
1846 //           GPU.
1847 //         - base or base + length fall within an existing mapping but are not
1848 //           aligned to that mapping's page size.
1849 //
1850 //     NV_ERR_OUT_OF_RANGE:
1851 //         The range specified by (base, length) exceeds the largest virtual
1852 //         address supported by one or more of the specified GPUs.
1853 //
1854 //     NV_ERR_INVALID_OFFSET:
1855 //         - offset+length exceeds the allocation size.
1856 //
1857 //     NV_ERR_INVALID_DEVICE:
1858 //         One of the following occurred:
1859 //         - The allocation resides in GPU memory whose UUID was not registered.
1860 //         - One or more of the UUIDs in the perGpuAttribs list was either not
1861 //           registered or has no GPU VA space registered for it.
1862 //         - The allocation resides in GPU memory and a mapping was requested
1863 //           for a different GPU and P2P support was not enabled between them.
1864 //         - The UUID of the CPU was specified in the perGpuAttribs list.
1865 //         - UvmGpuCompressionTypeEnabledNoPlc compression type was used on one
1866 //           or more GPUs that don't support PLC.
1867 //
1868 //     NV_ERR_NO_MEMORY:
1869 //         Internal memory allocation failed.
1870 //
1871 //     NV_ERR_INVALID_ARGUMENT:
1872 //         One of the following occurred:
1873 //         - perGpuAttribs is NULL.
1874 //         - gpuAttribsCount is zero.
1875 //         - an invalid mapping type was specified.
1876 //         - an invalid caching type was specified.
1877 //         - an invalid format/element bits combination was specified.
1878 //         - an invalid compression type was specified.
1879 //         - UvmGpuCompressionTypeEnabledNoPlc compression type was used with a
1880 //           non-compressible physical allocation.
1881 //
1882 //     NV_ERR_GENERIC:
1883 //         Unexpected error. We try hard to avoid returning this error code,
1884 //         because it is not very informative.
1885 //
1886 //------------------------------------------------------------------------------
1887 NV_STATUS UvmMapExternalAllocation(void                              *base,
1888                                    NvLength                           length,
1889                                    NvLength                           offset,
1890                                    const UvmGpuMappingAttributes     *perGpuAttribs,
1891                                    NvLength                           gpuAttribsCount,
1892                                    const UvmAllocationPlatformParams *platformParams);
1893 
1894 //------------------------------------------------------------------------------
1895 // UvmMapExternalSparse
1896 //
1897 // Create a Sparse mapping for the virtual address range specified by (base,
1898 // length). The mapping does not have any physical backing, rather the PTEs use
1899 // a special pattern. The virtual address range specified by (base, length) must
1900 // be fully contained within a virtual address range previously created with
1901 // UvmCreateExternalRange.
1902 //
1903 // Virtual address ranges with Sparse mappings will not generate any faults when
1904 // accessed. Instead, writes will be discarded and reads will return 0.
1905 //
1906 // Sparse mappings are supported only on fault-capable GPUs and only for 64K
1907 // pages, so the virtual address range specified by (base, length) must be
1908 // aligned to 64K.
1909 //
1910 // If the range specified by (base, length) falls within any existing mappings,
1911 // the behavior is the same as if UvmUnmapExternal with the range specified by
1912 // (base, length) had been called first.
1913 //
1914 // Note that calling UvmUnregisterGpuVaSpace will also unmap all mappings
1915 // created via this API on the GPU that the GPU VA space is associated with.
1916 // Notably the mappings won't be recreated when the GPU VA space is
1917 // re-registered.
1918 //
1919 // Arguments:
1920 //     base: (INPUT)
1921 //         Base address of the virtual address range. The address must be
1922 //         aligned on a 64K boundary.
1923 //
1924 //     length: (INPUT)
1925 //         Length, in bytes, of the range. The length must be 64K aligned.
1926 //
1927 //
1928 //     gpuUuid: (INPUT)
1929 //         UUID of the GPU to map the sparse region on.
1930 //
1931 // Errors:
1932 //     NV_ERR_INVALID_ADDRESS:
1933 //         One of the following occurred:
1934 //         - base is NULL.
1935 //         - length is zero.
1936 //         - The requested address range does not fall entirely within an
1937 //           existing external VA range created with a single call to
1938 //           UvmCreateExternalRange.
1939 //         - At least one of base and length is not aligned to a 64K
1940 //           boundary.
1941 //
1942 //     NV_ERR_OUT_OF_RANGE:
1943 //         The range specified by (base, length) exceeds the largest virtual
1944 //         address supported by the specified GPU.
1945 //
1946 //     NV_ERR_INVALID_DEVICE:
1947 //         One of the following occurred:
1948 //         - The specified GPU was not registered.
1949 //         - The GPU specified has no VA space registered for it.
1950 //         - The UUID of the CPU was specified.
1951 //         - Sparse mappings are not supported on the specified GPU.
1952 //
1953 //     NV_ERR_NO_MEMORY:
1954 //         Internal memory allocation failed.
1955 //------------------------------------------------------------------------------
1956 NV_STATUS UvmMapExternalSparse(void                  *base,
1957                                NvLength               length,
1958                                const NvProcessorUuid *gpuUuid);
1959 
1960 //------------------------------------------------------------------------------
1961 // UvmUnmapExternal
1962 //
1963 // Unmaps a virtual address range that was mapped using UvmMapExternalAllocation
1964 // or UvmMapExternalSparse from the specified GPU. The range specified by (base,
1965 // length) must be fully contained within a single External VA range created
1966 // with UvmCreateExternalRange.
1967 //
1968 // If the range specified by (base, length) range partially overlaps existing
1969 // mappings, the overlapping portion of the existing mappings will be unmapped
1970 // provided that the split points are aligned to the mappings' respective page
1971 // sizes. Otherwise, the overlapping portions of the existing mappings will be
1972 // left in an undefined state.
1973 //
1974 // Note that the VA range is not reclaimed until UvmFree is called on it even if
1975 // all mappings in the created range have been unmapped from all GPUs via this
1976 // API.
1977 //
1978 // Arguments:
1979 //     base: (INPUT)
1980 //         Base address of the virtual address range.
1981 //
1982 //     length: (INPUT)
1983 //         The length of the virtual address range.
1984 //
1985 //     gpuUuid: (INPUT)
1986 //         UUID of the GPU to unmap the VA range from.
1987 //
1988 // Errors:
1989 //     NV_ERR_INVALID_ADDRESS:
1990 //         One of the following has occurred:
1991 //         - base is NULL.
1992 //         - The requested address range does not fall entirely within an
1993 //           existing external VA range created with a single call to
1994 //           UvmCreateExternalRange.
1995 //         - base or base + length fall within an existing mapping but are not
1996 //           aligned to that mapping's page size.
1997 //
1998 //     NV_ERR_INVALID_DEVICE:
1999 //         Either gpuUuid does not represent a valid registered GPU or the VA
2000 //         range corresponding to the given base address is not mapped on the
2001 //         specified GPU.
2002 //
2003 //     NV_ERR_GENERIC:
2004 //         Unexpected error. We try hard to avoid returning this error code,
2005 //         because it is not very informative.
2006 //
2007 //------------------------------------------------------------------------------
2008 NV_STATUS UvmUnmapExternal(void                  *base,
2009                            NvLength               length,
2010                            const NvProcessorUuid *gpuUuid);
2011 
2012 // TODO: Bug 2732305: Remove this declaration when the new external APIs have
2013 //       been implemented.
2014 NV_STATUS UvmUnmapExternalAllocation(void                  *base,
2015                                      const NvProcessorUuid *gpuUuid);
2016 
2017 //------------------------------------------------------------------------------
2018 // UvmMapDynamicParallelismRegion
2019 //
2020 // Creates a special mapping required for dynamic parallelism. The mapping
2021 // doesn't have any physical backing, it's just a PTE with a special kind.
2022 //
2023 // The virtual address range specified by (base, length) must cover exactly one
2024 // GPU page, so length must be a page size supported by the GPU and base must be
2025 // aligned to that page size. The VA range must not overlap with an existing
2026 // mapping for the GPU. A GPU VA space must have been registered for the GPU and
2027 // the GPU must support dynamic parallelism.
2028 //
2029 // The mapping is created immediately and not modified until a call to UvmFree
2030 // Calling UvmFree frees the GPU page table mapping. The range cannot be
2031 // associated with range groups and any GPU faults within this range are fatal.
2032 // Also, the pages cannot be the target for read duplication, cannot have a
2033 // preferred location set, and cannot have any accessed-by processors.
2034 //
2035 // Note that calling UvmUnregisterGpuVaSpace will also unmap all mappings
2036 // created via this API on the GPU that the GPU VA space is associated with.
2037 // Notably the mappings won't be recreated when the GPU VA space is
2038 // re-registered, but the range should still be destroyed with UvmFree.
2039 //
2040 // This call neither creates nor modifies any CPU mappings, even if the VA range
2041 // came from a region previously reserved via UvmReserveVa. This implies that
2042 // CPU accesses to this range will cause a fatal fault if it's not mapped.
2043 //
2044 // Arguments:
2045 //     base: (INPUT)
2046 //         Base address of the virtual address range.
2047 //
2048 //     length: (INPUT)
2049 //         Length, in bytes, of the range. Must be equal to a page size
2050 //         supported by the GPU.
2051 //
2052 //     gpuUuid: (INPUT)
2053 //         UUID of the GPU to map the dynamic parallelism region on.
2054 //
2055 // Errors:
2056 //     NV_ERR_UVM_ADDRESS_IN_USE:
2057 //         The requested address range overlaps with an existing allocation.
2058 //
2059 //     NV_ERR_INVALID_ADDRESS:
2060 //         base is NULL or not aligned to length or length is not a page size
2061 //         supported by the GPU.
2062 //
2063 //     NV_ERR_OUT_OF_RANGE:
2064 //         The VA range exceeds the largest virtual address supported by one or
2065 //         more of the specified GPUs.
2066 //
2067 //     NV_ERR_INVALID_DEVICE:
2068 //         The gpuUuid was either not registered, has no GPU VA space
2069 //         registered for it, or the GPU doesn't support dynamic parallelism.
2070 //
2071 //     NV_ERR_NO_MEMORY:
2072 //         Internal memory allocation failed.
2073 //
2074 //     NV_ERR_GENERIC:
2075 //         Unexpected error. We try hard to avoid returning this error code,
2076 //         because it is not very informative.
2077 //
2078 //------------------------------------------------------------------------------
2079 NV_STATUS UvmMapDynamicParallelismRegion(void                  *base,
2080                                          NvLength               length,
2081                                          const NvProcessorUuid *gpuUuid);
2082 
2083 //------------------------------------------------------------------------------
2084 // UvmEnableReadDuplication
2085 //
2086 // Enables read duplication on the specified virtual address range, overriding
2087 // the UVM driver's default migration and mapping policy on read faults.
2088 //
2089 // The virtual address range specified by (base, length) must have been
2090 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
2091 // system-allocated pageable memory. If the input virtual range corresponds to
2092 // system-allocated pageable memory and UvmIsPageableMemoryAccessSupported
2093 // reports that pageable memory access is supported, the behavior described
2094 // below does not take effect, and read duplication will not be enabled for
2095 // the input range.
2096 //
2097 // Both base and length must be aligned to the smallest page size supported by
2098 // the CPU.
2099 //
2100 // On a read fault from a processor on a page in this range, any existing
2101 // mapping to that page from all other processors will be made read-only. If the
2102 // page does not reside in the faulting processor's memory, a duplicate copy of
2103 // the page will be created there. The copy of the page in the faulting
2104 // processor's memory will then be mapped as read-only on that processor. Note
2105 // that a write to this page from any processor will collapse the duplicated
2106 // copies.
2107 //
2108 // If UvmMigrate, UvmMigrateAsync or UvmMigrateRangeGroup is called on any pages
2109 // in this VA range, then those pages will also be read duplicated on the
2110 // destination processor for the migration.
2111 //
2112 // Enabling read duplication on a VA range requires the CPU and all GPUs with
2113 // registered VA spaces to be fault-capable. Otherwise, the migration and
2114 // mapping policies outlined above are not applied until all the
2115 // non-fault-capable GPUs are unregistered via UvmUnregisterGpu. If a
2116 // non-fault-capable GPU is registered after a page has already been
2117 // read-duplicated, then the copies of that page will be collapsed into a single
2118 // page.
2119 //
2120 // If UvmPreventMigrationRangeGroups has been called on the range group that
2121 // this VA range is associated with, then the migration and mapping policies
2122 // outlined above don't take effect until UvmAllowMigrationRangeGroups is called
2123 // for that range group.
2124 //
2125 // If any page in the VA range has a preferred location, then the migration and
2126 // mapping policies associated with this API take precedence over those related
2127 // to the preferred location.
2128 //
2129 // If any pages in this VA range have any processors present in their
2130 // accessed-by list, the migration and mapping policies associated with this
2131 // API override those associated with the accessed-by list.
2132 //
2133 // Multiple calls to this API for the same VA range and the same processor are
2134 // not refcounted, i.e. calling this API on a VA range after it has already been
2135 // called for that same VA range results in a no-op.
2136 //
2137 // Arguments:
2138 //     base: (INPUT)
2139 //         Base address of the virtual address range.
2140 //
2141 //     length: (INPUT)
2142 //         Length, in bytes, of the range.
2143 //
2144 // Errors:
2145 //     NV_ERR_INVALID_ADDRESS:
2146 //         base and length are not properly aligned, or the range does not
2147 //         represent a valid UVM allocation, or the range is pageable memory and
2148 //         the system does not support accessing pageable memory, or the range
2149 //         does not represent a supported Operating System allocation.
2150 //
2151 //     NV_ERR_GENERIC:
2152 //         Unexpected error. We try hard to avoid returning this error code,
2153 //         because it is not very informative.
2154 //
2155 //------------------------------------------------------------------------------
2156 NV_STATUS UvmEnableReadDuplication(void     *base,
2157                                    NvLength  length);
2158 
2159 //------------------------------------------------------------------------------
2160 // UvmDisableReadDuplication
2161 //
2162 // Disables read duplication on the specified virtual address range, and reverts
2163 // the associated policies. This also disables any default read duplication
2164 // heuristics employed by the kernel driver.
2165 //
2166 // The virtual address range specified by (base, length) must have been
2167 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
2168 // system-allocated pageable memory. If the input virtual range corresponds to
2169 // system-allocated pageable memory and UvmIsPageableMemoryAccessSupported
2170 // reports that pageable memory access is supported, the behavior described
2171 // below does not take effect, and read duplication will not be enabled for
2172 // the input range.
2173 //
2174 // Both base and length must be aligned to the smallest page size supported by
2175 // the CPU.
2176 //
2177 // Any pages in the VA range that are currently read duplicated will be
2178 // collapsed into a single copy. The location for the collapsed copy will be the
2179 // preferred location if the page has a preferred location and was resident at
2180 // that location when this API was called. Otherwise, the location will be
2181 // chosen arbitrarily.
2182 //
2183 // It is ok to call this API only on a subset of the VA range on which
2184 // UvmEnableReadDuplication was called or for a VA range on which
2185 // UvmEnableReadDuplication was never called.
2186 //
2187 // Arguments:
2188 //     base: (INPUT)
2189 //         Base address of the virtual address range.
2190 //
2191 //     length: (INPUT)
2192 //         Length, in bytes, of the range.
2193 //
2194 // Errors:
2195 //     NV_ERR_INVALID_ADDRESS:
2196 //         base and length are not properly aligned, or the range does not
2197 //         represent a valid UVM allocation, or the range is pageable memory and
2198 //         the system does not support accessing pageable memory, or the range
2199 //         does not represent a supported Operating System allocation.
2200 //
2201 //     NV_ERR_GENERIC:
2202 //         Unexpected error. We try hard to avoid returning this error code,
2203 //         because it is not very informative.
2204 //
2205 //-----------------------------------------------------------------------------
2206 NV_STATUS UvmDisableReadDuplication(void     *base,
2207                                     NvLength  length);
2208 
2209 //------------------------------------------------------------------------------
2210 // UvmSetPreferredLocation
2211 //
2212 // Sets the preferred location for the given virtual address range to be the
2213 // specified processor's memory.
2214 //
2215 // Both base and length must be aligned to the smallest page size supported by
2216 // the CPU. The VA range must lie within the largest possible virtual address
2217 // supported by the specified processor.
2218 //
2219 // The virtual address range specified by (base, length) must have been
2220 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
2221 // system-allocated pageable memory. If the input range is pageable memory and
2222 // at least one GPU in the system supports transparent access to pageable
2223 // memory, the behavior described below does not take effect and the preferred
2224 // location of the pages in the given range does not change.
2225 //
2226 // If any pages in the VA range are associated with a range group that was made
2227 // non-migratable via UvmPreventMigrationRangeGroups, then those pages are
2228 // migrated immediately to the specified preferred location and mapped according
2229 // to policies specified in UvmPreventMigrationRangeGroups. Otherwise, this API
2230 // neither migrates pages nor does it populate unpopulated pages. Note that if
2231 // the specified preferred location is a fault-capable GPU and at least one page
2232 // in the VA range is associated with a non-migratable range group, then an
2233 // error is returned. Additionally, if the specified preferred location is a
2234 // non-fault capable GPU and at least one page in the VA range is associated
2235 // with a non-migratable range group, an error is returned if another
2236 // non-fault-capable GPU is present in the accessed-by list of that page but P2P
2237 // support has not been enabled between both GPUs.
2238 //
2239 // When a page is in its preferred location, a fault from another processor will
2240 // not cause a migration if a mapping for that page from that processor can be
2241 // established without migrating the page.
2242 //
2243 // When a page migrates away from its preferred location, the mapping on the
2244 // preferred location's processor is cleared so that the next access from that
2245 // processor will cause a fault and migrate the page back to its preferred
2246 // location. In other words, a page is mapped on the preferred location's
2247 // processor only if the page is in its preferred location. Thus, when the
2248 // preferred location changes, mappings to pages in the given range are removed
2249 // from the new preferred location if the pages are resident in a different
2250 // processor. Note that if the preferred location's processor is a GPU, then a
2251 // mapping from that GPU to a page in the VA range is only created if a GPU VA
2252 // space has been registered for that GPU and the page is in its preferred
2253 // location.
2254 //
2255 // If read duplication has been enabled for any pages in this VA range and
2256 // UvmPreventMigrationRangeGroups has not been called on the range group that
2257 // those pages are associated with, then the migration and mapping policies
2258 // associated with UvmEnableReadDuplication override the policies outlined
2259 // above. Note that enabling read duplication on on any pages in this VA range
2260 // does not clear the state set by this API for those pages. It merely overrides
2261 // the policies associated with this state until read duplication is disabled
2262 // for those pages.
2263 //
2264 // If the preferred location processor is present in the accessed-by list of any
2265 // of the pages in this VA range, then the migration and mapping policies
2266 // associated with associated with the accessed-by list.
2267 //
2268 // The state set by this API can be cleared either by calling
2269 // UvmUnsetPreferredLocation for the same VA range or by calling
2270 // UvmUnregisterGpu on this processor if the processor is a GPU. Note that
2271 // calling UvmUnregisterGpuVaSpace will not clear the state set by this API.
2272 // Multiple calls to this API for the same VA range and the same processor are
2273 // not refcounted, i.e. calling this API on a VA range and processor after it
2274 // has already been called for that same VA range and processor results in a
2275 // no-op.
2276 //
2277 // Arguments:
2278 //     base: (INPUT)
2279 //         Base address of the virtual address range.
2280 //
2281 //     length: (INPUT)
2282 //         Length, in bytes, of the range.
2283 //
2284 //     preferredLocationUuid: (INPUT)
2285 //         UUID of the preferred location.
2286 //
2287 // Errors:
2288 //     NV_ERR_INVALID_ADDRESS:
2289 //         base and length are not properly aligned, or the range does not
2290 //         represent a valid UVM allocation, or the range is pageable memory and
2291 //         the system does not support accessing pageable memory, or the range
2292 //         does not represent a supported Operating System allocation.
2293 //
2294 //     NV_ERR_OUT_OF_RANGE:
2295 //         The VA range exceeds the largest virtual address supported by the
2296 //         specified processor.
2297 //
2298 //     NV_ERR_INVALID_DEVICE:
2299 //         preferredLocationUuid is neither the UUID of the CPU nor the UUID of
2300 //         a GPU that was registered by this process. Or at least one page in
2301 //         VA range belongs to a non-migratable range group and the specified
2302 //         UUID represents a fault-capable GPU. Or preferredLocationUuid is the
2303 //         UUID of a non-fault-capable GPU and at least one page in the VA range
2304 //         belongs to a non-migratable range group and another non-fault-capable
2305 //         GPU is in the accessed-by list of the same page but P2P support
2306 //         between both GPUs has not been enabled.
2307 //
2308 //     NV_ERR_GENERIC:
2309 //         Unexpected error. We try hard to avoid returning this error code,
2310 //         because it is not very informative.
2311 //
2312 //------------------------------------------------------------------------------
2313 NV_STATUS UvmSetPreferredLocation(void                  *base,
2314                                   NvLength               length,
2315                                   const NvProcessorUuid *preferredLocationUuid);
2316 
2317 //------------------------------------------------------------------------------
2318 // UvmUnsetPreferredLocation
2319 //
2320 // Unsets the preferred location associated with all pages in the specified
2321 // virtual address range, reverting the migration and mapping policies outlined
2322 // in UvmSetPreferredLocation.
2323 //
2324 // Both base and length must be aligned to the smallest page size supported by
2325 // the CPU.
2326 //
2327 // The virtual address range specified by (base, length) must have been
2328 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
2329 // system-allocated pageable memory. If the input range is pageable memory and
2330 // at least one GPU in the system supports transparent access to pageable
2331 // memory, the behavior described below does not take effect and the preferred
2332 // location of the pages in the given range does not change.
2333 //
2334 // If the VA range is associated with a non-migratable range group, then that
2335 // association is cleared. i.e. the pages in this VA range have their range
2336 // group association changed to UVM_RANGE_GROUP_ID_NONE.
2337 //
2338 // It is ok to call this API only on a subset of the VA range on which
2339 // UvmSetPreferredLocation was called or for a VA range on which
2340 // UvmSetPreferredLocation was never called.
2341 //
2342 // Arguments:
2343 //     base: (INPUT)
2344 //         Base address of the virtual address range.
2345 //
2346 //     length: (INPUT)
2347 //         Length, in bytes, of the range.
2348 //
2349 // Errors:
2350 //     NV_ERR_INVALID_ADDRESS:
2351 //         base and length are not properly aligned or the range does not
2352 //         represent a valid UVM allocation, or the range is pageable memory and
2353 //         the system does not support accessing pageable memory, or the range
2354 //         does not represent a supported Operating System allocation.
2355 //
2356 //     NV_ERR_GENERIC:
2357 //         Unexpected error. We try hard to avoid returning this error code,
2358 //         because it is not very informative.
2359 //
2360 //------------------------------------------------------------------------------
2361 NV_STATUS UvmUnsetPreferredLocation(void     *base,
2362                                     NvLength  length);
2363 
2364 //------------------------------------------------------------------------------
2365 // UvmSetAccessedBy
2366 //
2367 // Indicates to the UVM driver that the pages in the given virtual address range
2368 // should be mapped on the specified processor whenever establishing such a
2369 // mapping is possible. The purpose of this API is to prevent faults from the
2370 // specified processor to the given VA range as much as possible.
2371 //
2372 // Both base and length must be aligned to the smallest page size supported by
2373 // the CPU. The VA range must lie within the largest possible virtual address
2374 // supported by the specified processor.
2375 //
2376 // The virtual address range specified by (base, length) must have been
2377 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
2378 // system-allocated pageable memory. If the input range is pageable memory and
2379 // at least one GPU in the system supports transparent access to pageable
2380 // memory, the behavior described below does not take effect and the accessed-by
2381 // processor list of the VA range does not change.
2382 //
2383 // If a page in the VA range is not populated or its current location doesn't
2384 // permit a mapping to be established, then no mapping is created for that page.
2385 // If a page in the VA range migrates to a new location, then the mapping is
2386 // updated to point to the new location if establishing such a mapping is
2387 // possible. If a page in the VA range is associated with a non-migratable range
2388 // group and the specified processor is a non-fault-capable GPU, then an error
2389 // is returned if the mapping cannot be established.
2390 //
2391 // If the specified processor is a GPU and no GPU VA space has been registered
2392 // for it or if the registered GPU VA space gets unregistered, then the policies
2393 // outlined above will take effect the next time a GPU VA space gets registered
2394 // for this GPU.
2395 //
2396 // If read duplication is enabled in any pages in this VA range, then the page
2397 // mapping policy associated with read duplication overrides the mapping policy
2398 // associated with this API.
2399 //
2400 // Similarly, if any page in this VA range has a preferred location, and the
2401 // UUID of the preferred location is the same as the UUID passed in to this API,
2402 // then the mapping policy associated with having a preferred location overrides
2403 // the mapping policy associated with this API.
2404 //
2405 // Note that enabling read duplication or setting a preferred location on any
2406 // pages in this VA range does not clear the state set by this API for those
2407 // pages. It merely overrides the policies associated with this state until read
2408 // duplication is disabled on those pages or their preferred location is
2409 // cleared.
2410 //
2411 // The state set by this API can be cleared either by calling UvmUnsetAccessedBy
2412 // for the same VA range and processor or by calling UvmUnregisterGpu on this
2413 // processor if the processor is a GPU. It is also cleared if the processor is a
2414 // non-fault-capable GPU and the VA range has a preferred location set to a peer
2415 // GPU and peer access is disabled via UvmDisablePeerAccess. Note however that
2416 // calling UvmUnregisterGpuVaSpace will not clear the state set by this API.
2417 //
2418 // Multiple calls to this API for the same VA range and the same processor are
2419 // not refcounted. i.e. calling this API on a VA range and processor after it
2420 // has already been called for that same VA range and processor results in a
2421 // no-op.
2422 //
2423 // Arguments:
2424 //     base: (INPUT)
2425 //         Base address of the virtual address range.
2426 //
2427 //     length: (INPUT)
2428 //         Length, in bytes, of the range.
2429 //
2430 //     accessedByUuid: (INPUT)
2431 //         UUID of the processor that should have pages in the the VA range
2432 //         mapped when possible.
2433 //
2434 // Errors:
2435 //     NV_ERR_INVALID_ADDRESS:
2436 //         base and length are not properly aligned or the range does not
2437 //         represent a valid UVM allocation, or the range is pageable memory and
2438 //         the system does not support accessing pageable memory, or the range
2439 //         does not represent a supported Operating System allocation.
2440 //
2441 //     NV_ERR_OUT_OF_RANGE:
2442 //         The VA range exceeds the largest virtual address supported by the
2443 //         specified processor.
2444 //
2445 //     NV_ERR_INVALID_DEVICE:
2446 //         accessedByUuid is neither the UUID of the CPU nor the UUID of a GPU
2447 //         that was registered by this process. Or accessedByUuid is the UUID of
2448 //         a non-fault-capable GPU and the VA range is associated with a
2449 //         non-migratable range group with a preferred location set to another
2450 //         non-fault-capable GPU that doesn't have P2P support enabled with this
2451 //         GPU.
2452 //
2453 //     NV_ERR_NO_MEMORY:
2454 //         accessedByUuid is a non-fault-capable GPU and there was insufficient
2455 //         memory to create the mapping.
2456 //
2457 //     NV_ERR_GENERIC:
2458 //         Unexpected error. We try hard to avoid returning this error code,
2459 //         because it is not very informative.
2460 //
2461 //------------------------------------------------------------------------------
2462 NV_STATUS UvmSetAccessedBy(void                  *base,
2463                            NvLength               length,
2464                            const NvProcessorUuid *accessedByUuid);
2465 
2466 //------------------------------------------------------------------------------
2467 // UvmUnsetAccessedBy
2468 //
2469 // Undoes the effect of UvmSetAccessedBy for the given virtual address range on
2470 // the specified processor, thereby reverting the mapping policies imposed by
2471 // UvmSetAccessedBy.
2472 //
2473 // Both base and length must be aligned to the smallest page size supported by
2474 // the CPU
2475 //
2476 // The virtual address range specified by (base, length) must have been
2477 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported
2478 // system-allocated pageable memory. If the input range is pageable memory and
2479 // at least one GPU in the system supports transparent access to pageable
2480 // memory, the behavior described below does not take effect and the accessed-by
2481 // processor list of the VA range does not change.
2482 
2483 //
2484 // Existing mappings to this VA range from the given processor are not affected.
2485 // If any page in the VA range migrates to a different location however, the
2486 // mapping may be cleared or updated based on other mapping policies that are in
2487 // effect.
2488 //
2489 // It is ok to call this API for a subset of a VA range with a accessed-by list
2490 // containing this processor, or for a VA range with an empty accessed-by list.
2491 //
2492 // Arguments:
2493 //     base: (INPUT)
2494 //         Base address of the virtual address range.
2495 //
2496 //     length: (INPUT)
2497 //         Length, in bytes, of the range.
2498 //
2499 //     accessedByUuid: (INPUT)
2500 //         UUID of the processor from which any policies set by
2501 //         UvmSetAccessedBy should be revoked for the given VA range.
2502 //
2503 // Errors:
2504 //     NV_ERR_INVALID_ADDRESS:
2505 //         base and length are not properly aligned or the range does not
2506 //         represent a valid UVM allocation, or the range is pageable memory and
2507 //         the system does not support accessing pageable memory, or the range
2508 //         does not represent a supported Operating System allocation.
2509 //
2510 //     NV_ERR_INVALID_DEVICE:
2511 //         accessedByUuid is neither the UUID of the CPU nor the UUID of a GPU
2512 //         that was registered by this process.
2513 //
2514 //     NV_ERR_GENERIC:
2515 //         Unexpected error. We try hard to avoid returning this error code,
2516 //         because it is not very informative.
2517 //
2518 //------------------------------------------------------------------------------
2519 NV_STATUS UvmUnsetAccessedBy(void                  *base,
2520                              NvLength               length,
2521                              const NvProcessorUuid *accessedByUuid);
2522 
2523 //------------------------------------------------------------------------------
2524 // UvmEnableSystemWideAtomics
2525 //
2526 // Enables software-assisted system-wide atomics support on the specified GPU.
2527 // Any system-wide atomic operation issued from this GPU is now guaranteed to be
2528 // atomic with respect to all accesses from other processors that also support
2529 // system-wide atomics regardless of whether that support is enabled on those
2530 // other processors or not.
2531 //
2532 // The class of atomic operations from the GPU that are considered system-wide
2533 // is GPU architecture dependent. All atomic operations from the CPU are always
2534 // considered to be system-wide and support for system-wide atomics on the CPU
2535 // is always considered to be enabled.
2536 //
2537 // System-wide atomics which cannot be natively supported in hardware are
2538 // emulated using virtual mappings and page faults. For example, assume a
2539 // virtual address which is resident in CPU memory and has CPU memory as its
2540 // preferred location. A GPU with system-wide atomics enabled but without native
2541 // atomics support to CPU memory will not have atomics enabled in its virtual
2542 // mapping of the page that contains that address. If that GPU performs an
2543 // atomic operation, the access will fault, all other processors' mappings to
2544 // that page will have their write permissions revoked, the faulting GPU will be
2545 // granted atomic permissions in its virtual mapping, and the faulting GPU will
2546 // retry its access. Further atomic accesses from that GPU will not cause page
2547 // faults until another processor attempts a write access to the same page.
2548 //
2549 // Multiple calls to this API for the same GPU are not refcounted, i.e. calling
2550 // this API for a GPU for which software-assisted system-wide atomics support
2551 // has already been enabled results in a no-op.
2552 //
2553 // The GPU must have been registered using UvmRegisterGpu prior to making this
2554 // call. By default, software-assisted system-wide atomics support is enabled
2555 // when a GPU is registered.
2556 //
2557 // Arguments:
2558 //     gpuUuid: (INPUT)
2559 //         UUID of the GPU to enable software-assisted system-wide atomics on.
2560 //
2561 // Error codes:
2562 //     NV_ERR_NO_MEMORY:
2563 //         Internal memory allocation failed.
2564 //
2565 //     NV_ERR_INVALID_DEVICE:
2566 //         The GPU referred to by gpuUuid was not registered.
2567 //
2568 //     NV_ERR_NOT_SUPPORTED:
2569 //         The GPU does not support system-wide atomic operations, or the GPU
2570 //         has hardware support for scoped atomic operations.
2571 //
2572 //     NV_ERR_GENERIC:
2573 //         Unexpected error. We try hard to avoid returning this error code,
2574 //         because it is not very informative.
2575 //
2576 //------------------------------------------------------------------------------
2577 NV_STATUS UvmEnableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
2578 
2579 //------------------------------------------------------------------------------
2580 // UvmDisableSystemWideAtomics
2581 //
2582 // Disables software-assisted system-wide atomics support on the specified GPU.
2583 // Any atomic operation from this GPU is no longer guaranteed to be atomic with
2584 // respect to accesses from other processors in the system, even if the
2585 // operation has system-wide scope at the instruction level.
2586 //
2587 // The GPU must have been registered using UvmRegisterGpu prior to making this
2588 // call. It is however ok to call this API for GPUs that do not have support for
2589 // system-wide atomic operations enabled. If the GPU is unregistered via
2590 // UvmUnregisterGpu and then registered again via UvmRegisterGpu, support for
2591 // software-assisted system-wide atomics will be enabled.
2592 //
2593 // Arguments:
2594 //     gpuUuid: (INPUT)
2595 //         UUID of the GPU to disable software-assisted system-wide atomics on.
2596 //
2597 // Error codes:
2598 //     NV_ERR_INVALID_DEVICE:
2599 //         The GPU referred to by gpuUuid was not registered.
2600 //
2601 //     NV_ERR_NOT_SUPPORTED:
2602 //         The GPU does not support system-wide atomic operations, or the GPU
2603 //         has hardware support for scoped atomic operations.
2604 //
2605 //     NV_ERR_GENERIC:
2606 //         Unexpected error. We try hard to avoid returning this error code,
2607 //         because it is not very informative.
2608 //
2609 //------------------------------------------------------------------------------
2610 NV_STATUS UvmDisableSystemWideAtomics(const NvProcessorUuid *gpuUuid);
2611 
2612 //------------------------------------------------------------------------------
2613 // UvmGetFileDescriptor
2614 //
2615 // Returns the UVM file descriptor currently being used to call into the UVM
2616 // kernel mode driver. The data type of the returned file descriptor is platform
2617 // specific.
2618 //
2619 // If UvmInitialize has not yet been called, an error is returned. If
2620 // UvmInitialize was called with UVM_AUTO_FD, then the file created during
2621 // UvmInitialize is returned. If UvmInitialize was called with an existing UVM
2622 // file descriptor, then that file descriptor is returned.
2623 //
2624 // Arguments:
2625 //     returnedFd: (OUTPUT)
2626 //         A platform specific file descriptor.
2627 //
2628 // Error codes:
2629 //     NV_ERR_INVALID_ARGUMENT:
2630 //         returnedFd is NULL.
2631 //
2632 //     NV_ERR_INVALID_STATE:
2633 //         UVM was not initialized before calling this function.
2634 //
2635 //     NV_ERR_GENERIC:
2636 //         Unexpected error. We try hard to avoid returning this error code,
2637 //         because it is not very informative.
2638 //
2639 //------------------------------------------------------------------------------
2640 NV_STATUS UvmGetFileDescriptor(UvmFileDescriptor *returnedFd);
2641 
2642 //------------------------------------------------------------------------------
2643 // UvmIs8Supported
2644 //
2645 // Returns whether the kernel driver has been loaded in UVM 8 mode or not.
2646 //
2647 // Argument:
2648 //     is8Supported: (OUTPUT)
2649 //         Will be set to true (nonzero) if the driver was loaded as UVM 8, or
2650 //         false (zero) if it was loaded as UVM Lite.
2651 //
2652 // Error codes:
2653 //     NV_ERR_INVALID_ARGUMENT:
2654 //         is8Supported is NULL.
2655 //
2656 //     NV_ERR_GENERIC:
2657 //         Unexpected error. We try hard to avoid returning this error code,
2658 //         because it is not very informative.
2659 //
2660 //------------------------------------------------------------------------------
2661 NV_STATUS UvmIs8Supported(NvU32 *is8Supported);
2662 
2663 //------------------------------------------------------------------------------
2664 //    Tools API
2665 //------------------------------------------------------------------------------
2666 
2667 //------------------------------------------------------------------------------
2668 // UvmDebugGetVersion
2669 //
2670 // Returns the version number of the UVM debug library
2671 // See uvm_types.h for valid verion numbers, e.g. UVM_DEBUG_V1
2672 //
2673 //------------------------------------------------------------------------------
2674 unsigned UvmDebugVersion(void);
2675 
2676 //------------------------------------------------------------------------------
2677 // UvmDebugCreateSession
2678 //
2679 // Creates a handle for a debugging session.
2680 //
2681 // When the client initializes, it will pass in a process handle and get a
2682 // session ID for itself. Subsequent calls to the UVM API will take in that
2683 // session ID.
2684 //
2685 // There are security requirements to this call.
2686 // One of the following must be true:
2687 // 1.  The session owner must be running as an elevated user
2688 // 2.  The session owner and target must belong to the same user and the
2689 //     session owner is at least as privileged as the target.
2690 //
2691 // For CUDA 6.0 we can create at most 64 sessions per debugger process.
2692 //
2693 // Arguments:
2694 //     pid: (INPUT)
2695 //         Process id for which the debugging session will be created
2696 //
2697 //     session: (OUTPUT)
2698 //         Handle to the debugging session associated to that pid.
2699 //
2700 // Error codes:
2701 //     NV_ERR_PID_NOT_FOUND:
2702 //         pid is invalid/ not associated with UVM.
2703 //
2704 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
2705 //         Function fails the security check.
2706 //
2707 //     NV_ERR_INSUFFICIENT_RESOURCES:
2708 //         Attempt is made to allocate more than 64 sessions per process.
2709 //
2710 //     NV_ERR_BUSY_RETRY:
2711 //         internal resources are blocked by other threads.
2712 //
2713 //------------------------------------------------------------------------------
2714 NV_STATUS UvmDebugCreateSession(unsigned         pid,
2715                                 UvmDebugSession *session);
2716 
2717 //------------------------------------------------------------------------------
2718 // UvmDebugDestroySession
2719 //
2720 // Destroys a debugging session.
2721 //
2722 // Arguments:
2723 //     session: (INPUT)
2724 //         Handle to the debugging session associated to that pid.
2725 //
2726 // Error codes:
2727 //     NV_ERR_INVALID_ARGUMENT:
2728 //         session is invalid.
2729 //
2730 //     NV_ERR_BUSY_RETRY:
2731 //         ebug session is in use by some other thread.
2732 //
2733 //------------------------------------------------------------------------------
2734 NV_STATUS UvmDebugDestroySession(UvmDebugSession session);
2735 
2736 //------------------------------------------------------------------------------
2737 // UvmDebugCountersEnable
2738 //
2739 // Enables the counters following the user specified configuration.
2740 //
2741 // The user must fill a list with the configuration of the counters it needs to
2742 // either enable or disable. It can only enable one counter per line.
2743 //
2744 // The structure (UvmCounterConfig) has several fields:
2745 //  - scope: Please see the UvmCounterScope  enum (above), for details.
2746 //  - name: Name of the counter. Please check UvmCounterName for list.
2747 //  - gpuid: Identifies the GPU for which the counter will be enabled/disabled
2748 //           This parameter is ignored in AllGpu scopes.
2749 //  - state: A value of 0 will disable the counter, a value of 1 will enable
2750 //           the counter.
2751 //
2752 //  Note: All counters are refcounted, that means that a counter will only be
2753 //  disable when its refcount reached zero.
2754 //
2755 // Arguments:
2756 //     session: (INPUT)
2757 //         Handle to the debugging session.
2758 //
2759 //     config: (INPUT)
2760 //         pointer to configuration list as per above.
2761 //
2762 //     count: (INPUT)
2763 //         number of entries in the config list.
2764 //
2765 // Error codes:
2766 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
2767 //         Function fails the security check
2768 //
2769 //     RM_INVALID_ARGUMENT:
2770 //         debugging session is invalid or one of the counter lines is invalid.
2771 //         If call returns this value, no action specified by the config list
2772 //         will have taken effect.
2773 //
2774 //     NV_ERR_NOT_SUPPORTED:
2775 //         UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0
2776 //
2777 //     NV_ERR_BUSY_RETRY:
2778 //         the debug session is in use by some other thread.
2779 //
2780 //------------------------------------------------------------------------------
2781 NV_STATUS UvmDebugCountersEnable(UvmDebugSession   session,
2782                                  UvmCounterConfig *config,
2783                                  unsigned          count);
2784 
2785 //------------------------------------------------------------------------------
2786 // UvmDebugGetCounterHandle
2787 //
2788 // Returns handle to a particular counter. This is an opaque handle that the
2789 // implementation uses in order to find your counter, later. This handle can be
2790 // used in subsequent calls to UvmDebugGetCounterVal().
2791 //
2792 // Arguments:
2793 //     session: (INPUT)
2794 //         Handle to the debugging session.
2795 //
2796 //     scope: (INPUT)
2797 //         Scope that will be mapped.
2798 //
2799 //     counterName: (INPUT)
2800 //         Name of the counter in that scope.
2801 //
2802 //     gpu: (INPUT)
2803 //         Gpuid of the scoped GPU. This parameter is ignored in AllGpu scopes.
2804 //
2805 //     pCounterHandle: (OUTPUT)
2806 //         Handle to the counter address.
2807 //
2808 // Error codes:
2809 //     NV_ERR_INVALID_ARGUMENT:
2810 //         Specified scope/gpu pair or session id is invalid
2811 //
2812 //     NV_ERR_NOT_SUPPORTED:
2813 //         UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0
2814 //
2815 //     NV_ERR_BUSY_RETRY:
2816 //         debug session is in use by some other thread.
2817 //
2818 //------------------------------------------------------------------------------
2819 NV_STATUS UvmDebugGetCounterHandle(UvmDebugSession  session,
2820                                    UvmCounterScope  scope,
2821                                    UvmCounterName   counterName,
2822                                    NvProcessorUuid  gpu,
2823                                    NvUPtr          *pCounterHandle);
2824 
2825 //------------------------------------------------------------------------------
2826 // UvmDebugGetCounterVal
2827 //
2828 // Returns the counter value specified by the counter name.
2829 //
2830 // Arguments:
2831 //     session: (INPUT)
2832 //         Handle to the debugging session.
2833 //
2834 //     counterHandleArray: (INPUT)
2835 //         Array of counter handles
2836 //
2837 //     handleCount: (INPUT)
2838 //         Number of handles in the pPCounterHandle array.
2839 //
2840 //     counterValArray: (OUTPUT)
2841 //         Array of counter values corresponding to the handles.
2842 //
2843 // Error codes:
2844 //     NV_ERR_INVALID_ARGUMENT:
2845 //         one of the specified handles is invalid.
2846 //
2847 //------------------------------------------------------------------------------
2848 NV_STATUS UvmDebugGetCounterVal(UvmDebugSession     session,
2849                                 NvUPtr             *counterHandleArray,
2850                                 unsigned            handleCount,
2851                                 unsigned long long *counterValArray);
2852 
2853 //------------------------------------------------------------------------------
2854 // UvmEventQueueCreate
2855 //
2856 // This call creates an event queue of the given size.
2857 // No events are added in the queue till they are enabled by the user.
2858 // Event queue data is visible to the user even after the target process dies
2859 // if the session is active and queue is not freed.
2860 //
2861 // User doesn't need to serialize multiple UvmEventQueueCreate calls as
2862 // each call creates a new queue state associated with the returned queue
2863 // handle.
2864 //
2865 // Arguments:
2866 //     sessionHandle: (INPUT)
2867 //         Handle to the debugging session.
2868 //
2869 //     queueHandle: (OUTPUT)
2870 //         Handle to created queue.
2871 //
2872 //     queueSize: (INPUT)
2873 //         Size of the event queue buffer in units of UvmEventEntry's.
2874 //         This quantity must be > 1.
2875 //
2876 //     notificationCount: (INPUT)
2877 //         Number of entries after which the user should be notified that
2878 //         there are events to fetch.
2879 //         User is notified when queueEntries >= notification count.
2880 //
2881 // Error codes:
2882 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
2883 //         Function fails the security check.
2884 //
2885 //     NV_ERR_INVALID_ARGUMENT:
2886 //         One of the arguments is invalid.
2887 //
2888 //     NV_ERR_INSUFFICIENT_RESOURCES:
2889 //         it's not possible to allocate a queue of requested size.
2890 //
2891 //     NV_ERR_BUSY_RETRY:
2892 //         internal resources are blocked by other threads.
2893 //
2894 //     NV_ERR_PID_NOT_FOUND:
2895 //         queue create call is made on a session after the target dies.
2896 //
2897 //------------------------------------------------------------------------------
2898 NV_STATUS UvmEventQueueCreate(UvmDebugSession        sessionHandle,
2899                               UvmEventQueueHandle   *queueHandle,
2900                               NvS64                  queueSize,
2901                               NvU64                  notificationCount,
2902                               UvmEventTimeStampType  timeStampType);
2903 
2904 //------------------------------------------------------------------------------
2905 // UvmEventQueueDestroy
2906 //
2907 // This call frees all interal resources associated with the queue, including
2908 // upinning of the memory associated with that queue. Freeing user buffer is
2909 // responsibility of a caller. Event queue might be also destroyed as a side
2910 // effect of destroying a session associated with this queue.
2911 //
2912 // User needs to ensure that a queue handle is not deleted while some other
2913 // thread is using the same queue handle.
2914 //
2915 // Arguments:
2916 //     sessionHandle: (INPUT)
2917 //         Handle to the debugging session.
2918 //
2919 //     queueHandle: (INPUT)
2920 //         Handle to the queue which is to be freed
2921 //
2922 // Error codes:
2923 //     RM_ERR_NOT_PERMITTED:
2924 //         Function fails the security check.
2925 //
2926 //     NV_ERR_INVALID_ARGUMENT:
2927 //         One of the arguments is invalid.
2928 //
2929 //     NV_ERR_BUSY_RETRY:
2930 //         internal resources are blocked by other threads.
2931 //
2932 //------------------------------------------------------------------------------
2933 NV_STATUS UvmEventQueueDestroy(UvmDebugSession     sessionHandle,
2934                                UvmEventQueueHandle queueHandle);
2935 
2936 //------------------------------------------------------------------------------
2937 // UvmEventEnable
2938 //
2939 // This call enables a particular event type in the event queue.
2940 // All events are disabled by default when a queue is created.
2941 //
2942 // This API does not access the queue state maintained in the user
2943 // library so the user doesn't need to acquire a lock to protect the queue
2944 // state.
2945 //
2946 // Arguments:
2947 //     sessionHandle: (INPUT)
2948 //         Handle to the debugging session.
2949 //
2950 //     queueHandle: (INPUT)
2951 //         Handle to the queue where events are to be enabled
2952 //
2953 //     eventTypeFlags: (INPUT)
2954 //         This field specifies the event types to be enabled. For example:
2955 //         To enable migration events and memory violations: pass flags
2956 //         "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION"
2957 //
2958 // Error codes:
2959 //     RM_ERR_NOT_PERMITTED:
2960 //         Function fails the security check.
2961 //
2962 //     NV_ERR_INVALID_ARGUMENT:
2963 //         One of the arguments is invalid.
2964 //
2965 //     NV_ERR_PID_NOT_FOUND:
2966 //         this call is made after the target process dies
2967 //
2968 //     NV_ERR_BUSY_RETRY:
2969 //         internal resources are blocked by other threads.
2970 //
2971 //------------------------------------------------------------------------------
2972 NV_STATUS UvmEventEnable(UvmDebugSession     sessionHandle,
2973                          UvmEventQueueHandle queueHandle,
2974                          unsigned            eventTypeFlags);
2975 
2976 //------------------------------------------------------------------------------
2977 // UvmEventDisable
2978 //
2979 // This call disables a particular event type in the queue.
2980 //
2981 // This API does not access the queue state maintained in the user
2982 // library so the user doesn't need to acquire a lock to protect the queue
2983 // state.
2984 //
2985 // Arguments:
2986 //     sessionHandle: (INPUT)
2987 //         Handle to the debugging session.
2988 //
2989 //     queueHandle: (INPUT)
2990 //         Handle to the queue where events are to be enabled
2991 //
2992 //     eventTypeFlags: (INPUT)
2993 //         This field specifies the event types to be enabled
2994 //         For example: To enable migration events and memory violations:
2995 //         pass "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION"
2996 //         as flags
2997 //
2998 // Error codes:
2999 //     RM_ERR_NOT_PERMITTED:
3000 //         Function fails the security check.
3001 //
3002 //     NV_ERR_INVALID_ARGUMENT:
3003 //         One of the arguments is invalid.
3004 //
3005 //     NV_ERR_PID_NOT_FOUND:
3006 //         this call is made after the target process dies
3007 //
3008 //     NV_ERR_BUSY_RETRY:
3009 //         internal resources are blocked by other threads.
3010 //
3011 //------------------------------------------------------------------------------
3012 NV_STATUS UvmEventDisable(UvmDebugSession     sessionHandle,
3013                           UvmEventQueueHandle queueHandle,
3014                           unsigned            eventTypeFlags);
3015 
3016 //------------------------------------------------------------------------------
3017 // UvmEventWaitOnQueueHandles
3018 //
3019 // User is notified when queueEntries >= notification count.
3020 // This call does a blocking wait for this notification. It returns when
3021 // at least one of the queue handles has events to be fetched or if it timeouts
3022 //
3023 //     This API accesses constant data maintained in the queue state. Hence,
3024 //     the user doesn't need to acquire a lock to protect the queue state.
3025 //
3026 // Arguments:
3027 //     queueHandles: (INPUT)
3028 //         array of queue handles.
3029 //
3030 //     arraySize: (INPUT)
3031 //         number of handles in array.
3032 //
3033 //     timeout: (INPUT)
3034 //         timeout in msec
3035 //
3036 //     pNotificationFlags: (OUTPUT)
3037 //         If a particular queue handle in the input array is notified then
3038 //         the respective bit flag is set in pNotificationFlags.
3039 //
3040 // Error codes:
3041 //     NV_ERR_INVALID_ARGUMENT:
3042 //         one of the queueHandles is invalid.
3043 //
3044 //------------------------------------------------------------------------------
3045 NV_STATUS UvmEventWaitOnQueueHandles(UvmEventQueueHandle *queueHandleArray,
3046                                      unsigned             arraySize,
3047                                      NvU64                timeout,
3048                                      unsigned            *pNotificationFlags);
3049 
3050 //------------------------------------------------------------------------------
3051 // UvmEventGetNotificationHandles
3052 //
3053 // User is notified when queueEntries >= notification count.
3054 // The user can directly get the queue notification handles rather than using
3055 // a UVM API to wait on queue handles. This helps the user to wait on other
3056 // objects (apart from queue notification) along with queue notification
3057 // handles in the same thread. The user can safely use this call along with the
3058 // library supported wait call UvmEventWaitOnQueueHandles.
3059 //
3060 // This API reads constant data maintained in the queue state. Hence,
3061 // the user doesn't need to acquire a lock to protect the queue state.
3062 //
3063 // Arguments:
3064 //     queueHandles: (INPUT)
3065 //         array of queue handles.
3066 //
3067 //     arraySize: (INPUT)
3068 //         number of handles in array.
3069 //
3070 //     notificationHandles: (OUTPUT)
3071 //         Windows: Output of this call contains an array of 'windows event
3072 //             handles' corresponding to the queue handles passes as input.
3073 //         Linux: All queues belonging to the same process share the same
3074 //             file descriptor(fd) for notification. If the user chooses to use
3075 //             UvmEventGetNotificationHandles then he should check all queues
3076 //             for new events (by calling UvmEventFetch) when notified on
3077 //             the fd.
3078 //
3079 // Error codes:
3080 //     NV_ERR_INVALID_ARGUMENT:
3081 //         One of the arguments is invalid.
3082 //
3083 //------------------------------------------------------------------------------
3084 NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle  *queueHandleArray,
3085                                          unsigned              arraySize,
3086                                          void                **notificationHandleArray);
3087 
3088 //------------------------------------------------------------------------------
3089 // UvmEventGetGpuUuidTable
3090 //
3091 // Each migration event entry contains the gpu index to/from where data is
3092 // migrated. This index maps to a corresponding gpu UUID in the gpuUuidTable.
3093 // Using indices saves on the size of each event entry. This API provides the
3094 // gpuIndex to gpuUuid relation to the user.
3095 //
3096 // This API does not access the queue state maintained in the user
3097 // library and so the user doesn't need to acquire a lock to protect the
3098 // queue state.
3099 //
3100 // Arguments:
3101 //     gpuUuidTable: (OUTPUT)
3102 //         The return value is an array of UUIDs. The array index is the
3103 //         corresponding gpuIndex. There can be at max 32 gpus associated with
3104 //         UVM, so array size is 32.
3105 //
3106 //     validCount: (OUTPUT)
3107 //         The system doesn't normally contain 32 GPUs. This field gives the
3108 //         count of entries that are valid in the returned gpuUuidTable.
3109 //
3110 // Error codes:
3111 //     NV_ERR_BUSY_RETRY:
3112 //         internal resources are blocked by other threads.
3113 //
3114 //------------------------------------------------------------------------------
3115 NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable,
3116                                   unsigned        *validCount);
3117 
3118 //------------------------------------------------------------------------------
3119 // UvmEventFetch
3120 //
3121 // This call is used to fetch the queue entries in a user buffer.
3122 //
3123 // This API updates the queue state. Hence simultaneous calls to fetch/skip
3124 // events should be avoided as that might corrupt the queue state.
3125 //
3126 // Arguments:
3127 //     sessionHandle: (INPUT)
3128 //         Handle to the debugging session.
3129 //
3130 //     queueHandle: (INPUT)
3131 //         queue from where to fetch the events.
3132 //
3133 //     pBuffer: (OUTPUT)
3134 //         Pointer to the buffer where the API will copy the events. User
3135 //         shall ensure the size is enough.
3136 //
3137 //     nEntries: (INPUT/OUTPUT)
3138 //         It provides the maximum number of entries that will be fetched
3139 //         from the queue. If this number is larger than the size of the
3140 //         queue it will be internally capped to that value.
3141 //         As output it returns the actual number of entries copies to the
3142 //         buffer.
3143 //
3144 // Error codes:
3145 //     RM_ERR_NOT_PERMITTED:
3146 //         Function fails the security check.
3147 //
3148 //     NV_ERR_INVALID_ARGUMENT:
3149 //         One of the arguments is invalid.
3150 //
3151 //     NV_ERR_INVALID_INDEX:
3152 //         The indices of the queue have been corrupted.
3153 //
3154 //     NV_ERR_BUFFER_TOO_SMALL:
3155 //         The event queue buffer provided by the caller was too small to
3156 //         contain all of the events that occurred during this run.
3157 //         Events were therefore dropped (not recorded).
3158 //         Please re-run with a larger buffer.
3159 //
3160 //------------------------------------------------------------------------------
3161 NV_STATUS UvmEventFetch(UvmDebugSession      sessionHandle,
3162                         UvmEventQueueHandle  queueHandle,
3163                         UvmEventEntry       *pBuffer,
3164                         NvU64               *nEntries);
3165 
3166 //------------------------------------------------------------------------------
3167 // UvmEventSkipAll
3168 //
3169 // This API drops all event entries from the queue.
3170 //
3171 // This API updates the queue state. Hence simultaneous calls to fetch/
3172 // skip events should be avoided as that might corrupt the queue state.
3173 //
3174 // Arguments:
3175 //     sessionHandle: (INPUT)
3176 //         Handle to the debugging session.
3177 //
3178 //     queueHandle: (INPUT)
3179 //         target queue.
3180 //
3181 // Error codes:
3182 //     RM_ERR_NOT_PERMITTED:
3183 //         Function fails the security check.
3184 //
3185 //     NV_ERR_INVALID_ARGUMENT:
3186 //         One of the arguments is invalid.
3187 //
3188 //------------------------------------------------------------------------------
3189 NV_STATUS UvmEventSkipAll(UvmDebugSession     sessionHandle,
3190                           UvmEventQueueHandle queueHandle);
3191 
3192 //------------------------------------------------------------------------------
3193 // UvmEventQueryTimeStampType
3194 //
3195 // This API returns the type of time stamp used in an event entry for a given
3196 // queue.
3197 //
3198 // This API reads constant data maintained in the queue state. Hence,
3199 // the user doesn't need to acquire a lock to protect the queue state.
3200 //
3201 // Arguments:
3202 //     sessionHandle: (INPUT)
3203 //         Handle to the debugging session.
3204 //
3205 //     queueHandle: (INPUT)
3206 //         target queue.
3207 //
3208 //     timeStampType: (OUTPUT)
3209 //         type of time stamp used in event entry. See UvmEventTimestampType
3210 //         for supported types of time stamps.
3211 //
3212 // Error codes:
3213 //     RM_ERR_NOT_PERMITTED:
3214 //         Function fails the security check.
3215 //
3216 //     NV_ERR_INVALID_ARGUMENT:
3217 //         One of the arguments is invalid.
3218 //
3219 //------------------------------------------------------------------------------
3220 NV_STATUS UvmEventQueryTimeStampType(UvmDebugSession        sessionHandle,
3221                                      UvmEventQueueHandle    queueHandle,
3222                                      UvmEventTimeStampType *timeStampType);
3223 
3224 //------------------------------------------------------------------------------
3225 // UvmDebugAccessMemory
3226 //
3227 // This call can be used by the debugger to read/write memory range. UVM driver
3228 // may not be aware of all the pages in this range. A bit per page is set by the
3229 // driver if it is read/written by UVM.
3230 //
3231 // Arguments:
3232 //     session: (INPUT)
3233 //         Handle to the debugging session.
3234 //
3235 //     baseAddress: (INPUT)
3236 //         base address from where memory is to be accessed
3237 //
3238 //     sizeInBytes: (INPUT)
3239 //         Number of bytes to be accessed
3240 //
3241 //     accessType: (INPUT)
3242 //         Read or write access request
3243 //
3244 //     buffer: (INPUT/OUTPUT)
3245 //         This buffer would be read or written to by the driver.
3246 //         User needs to allocate a big enough buffer to fit sizeInBytes.
3247 //
3248 //     isBitmaskSet: (INPUT/OUTPUT)
3249 //         Set to 1, if any field in bitmask is set
3250 //         NULL(INPUT) if unused
3251 //
3252 //     bitmask: (INPUT/OUTPUT)
3253 //         One bit per page is set if UVM reads or writes to it.
3254 //         User should allocate a bitmask big enough to fit one bit per page
3255 //         covered by baseAddress + sizeInBytes:
3256 //         (baseAlignmentBytes + sizeInBytes + pageSize - 1)/pageSize number
3257 //         of bits.
3258 //         NULL(IN) if unused.
3259 //
3260 // Error codes:
3261 //     NV_ERR_INVALID_ARGUMENT:
3262 //         One of the arguments is invalid.
3263 //
3264 //------------------------------------------------------------------------------
3265 NV_STATUS UvmDebugAccessMemory(UvmDebugSession     session,
3266                                void               *baseAddress,
3267                                NvU64               sizeInBytes,
3268                                UvmDebugAccessType  accessType,
3269                                void               *buffer,
3270                                NvBool             *isBitmaskSet,
3271                                NvU64              *bitmask);
3272 
3273 //
3274 // Uvm Tools uvm API
3275 //
3276 
3277 
3278 //------------------------------------------------------------------------------
3279 // UvmToolsCreateSession
3280 //
3281 // Creates a handle for a tools session.
3282 //
3283 // When the client initializes, it will pass a duplicated Uvm file handle from
3284 // target's process UvmGetFileDescriptor API, e.g. by calling DuplicateHandle,
3285 // dup2, share file descriptor over Unix Socket Domains. Returned session
3286 // handle is required to create other Tool's objects, e.g. events, counters.
3287 //
3288 // In order to guarantee that session persists the lifetime of a target process,
3289 // callee is responsible for passing a duplicate file descriptor. This is also
3290 // required for correctness in case of out-of-process session.
3291 //
3292 // Passing non duplicated Uvm file handle results in undefined behaviour. The
3293 // least that you should expect is that all your session related objects will
3294 // become useless once target process closes Uvm file handle.
3295 //
3296 //
3297 // There are security requirements for this call to be successful. Fortunately,
3298 // after validating a file descriptor, one of the following conditions must
3299 // hold:
3300 // 1.  The session owner is running as an elevated user
3301 // 2.  The session owner and target belong to the same user and the
3302 //     session owner is at least as privileged as the target.
3303 //
3304 // Arguments:
3305 //     fd: (INPUT)
3306 //         Duplicated file handle from target process.
3307 //
3308 //     session: (OUTPUT)
3309 //         Handle to the tools session associated to fd above.
3310 //
3311 // Error codes:
3312 //     NV_ERR_INVALID_ARGUMENT:
3313 //         fd is either closed or points to non uvm device.
3314 //
3315 //     NV_ERR_NO_MEMORY:
3316 //         Internal memory allocation failed.
3317 //
3318 //------------------------------------------------------------------------------
3319 NV_STATUS UvmToolsCreateSession(UvmFileDescriptor      fd,
3320                                 UvmToolsSessionHandle *session);
3321 
3322 //------------------------------------------------------------------------------
3323 // UvmToolsDestroySession
3324 //
3325 // Destroys a tools session. This also has a side-effect of closing fd
3326 // associated with this session during UvmToolsCreateSession.
3327 //
3328 // All resources associated with this session (counters, event queues) are also
3329 // destroyed.
3330 //
3331 // Arguments:
3332 //     session: (INPUT)
3333 //         Handle associated with a Tool's session.
3334 //
3335 // Error codes:
3336 //     NV_ERR_INVALID_ARGUMENT:
3337 //         session handle does not refer to a valid session.
3338 //
3339 //------------------------------------------------------------------------------
3340 NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session);
3341 
3342 //
3343 // Events subsystem
3344 //
3345 // Events subsystem is useful for a tools process to track target process
3346 // behaviour. Every event refers to a single process using Unified memory.
3347 //
3348 // The most typical use case is as follows:
3349 // 1. Create event Queue using UvmToolsCreateEventQueue
3350 // 2. Start capture of interesting event types using
3351 //    UvmToolsEventQueueEnableEvents
3352 // 3. poll / Loop using Get/Put pointer
3353 // - Consume existing events from user's buffer
3354 // - exit loop based on some condition (e.g. timeout, target process exit)
3355 // - pause (Stop) capture of some of the events
3356 // 4. Destroy event Queue using UvmToolsDestroyEventQueue
3357 //
3358 
3359 
3360 NvLength UvmToolsGetEventControlSize(void);
3361 
3362 NvLength UvmToolsGetEventEntrySize(void);
3363 
3364 NvLength UvmToolsGetNumberOfCounters(void);
3365 
3366 //------------------------------------------------------------------------------
3367 // UvmToolsCreateEventQueue
3368 //
3369 // This call creates an event queue that can hold the given number of events.
3370 // All events are disabled by default. Event queue data persists lifetime of the
3371 // target process.
3372 //
3373 // Arguments:
3374 //     session: (INPUT)
3375 //         Handle to the tools session.
3376 //
3377 //     event_buffer: (INPUT)
3378 //         User allocated buffer. Must be page-aligned. Must be large enough to
3379 //         hold at least event_buffer_size events. Gets pinned until queue is
3380 //         destroyed.
3381 //
3382 //     event_buffer_size: (INPUT)
3383 //         Size of the event queue buffer in units of UvmEventEntry's. Must be
3384 //         a power of two, and greater than 1.
3385 //
3386 //     event_control (INPUT)
3387 //         User allocated buffer. Must be page-aligned. Must be large enough to
3388 //         hold UvmToolsEventControlData (although single page-size allocation
3389 //         should be more than enough). One could call
3390 //         UvmToolsGetEventControlSize() function to find out current size of
3391 //         UvmToolsEventControlData. Gets pinned until queue is destroyed.
3392 //
3393 //     queue: (OUTPUT)
3394 //         Handle to the created queue.
3395 //
3396 // Error codes:
3397 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3398 //         Session handle does not refer to a valid session
3399 //
3400 //     NV_ERR_INVALID_ARGUMENT:
3401 //         One of the parameters: event_buffer, event_buffer_size, event_control
3402 //         is not valid
3403 //
3404 //     NV_ERR_INSUFFICIENT_RESOURCES:
3405 //         There could be multiple reasons for this error. One would be that it's
3406 //         not possible to allocate a queue of requested size. Another would be
3407 //         that either event_buffer or event_control memory couldn't be pinned
3408 //         (e.g. because of OS limitation of pinnable memory). Also it could not
3409 //         have been possible to create UvmToolsEventQueueDescriptor.
3410 //
3411 //------------------------------------------------------------------------------
3412 NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle     session,
3413                                    void                     *event_buffer,
3414                                    NvLength                  event_buffer_size,
3415                                    void                     *event_control,
3416                                    UvmToolsEventQueueHandle *queue);
3417 
3418 UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue);
3419 
3420 
3421 //------------------------------------------------------------------------------
3422 // UvmToolsSetNotificationThreshold
3423 //
3424 // Sets a custom notification threshold in number of events for a given queue.
3425 // Polling subsystem will notify user about this queue if and only if number
3426 // of unconsumed events is greater or equal notification_threshold. Default
3427 // threshold upon creating an event queue is floor(N / 2), where N represents
3428 // maximum number of events that this queue can fit.
3429 //
3430 // Consequently, if notifications_threshold is greater than queue size, there
3431 // will be no notification.
3432 //
3433 // Arguments:
3434 //     queue: (INPUT)
3435 //         Handle to the queue, for which events are supposed to be enabled
3436 //
3437 //     notification_threshold: (INPUT)
3438 //         A new threshold, in number of events, to be set for this queue.
3439 //
3440 // Error codes:
3441 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3442 //         Event Queue might be corrupted (associated session is not valid).
3443 //
3444 //     NV_ERR_INVALID_ARGUMENT:
3445 //         Queue handle does not refer to a valid queue.
3446 //------------------------------------------------------------------------------
3447 NV_STATUS UvmToolsSetNotificationThreshold(UvmToolsEventQueueHandle queue,
3448                                            NvLength notification_threshold);
3449 
3450 //------------------------------------------------------------------------------
3451 // UvmToolsDestroyEventQueue
3452 //
3453 // Destroys all internal resources associated with the queue. It unpinns the
3454 // buffers provided in UvmToolsCreateEventQueue. Event Queue is also auto
3455 // destroyed when corresponding session gets destroyed.
3456 //
3457 // Arguments:
3458 //     queue: (INPUT)
3459 //         Handle to the queue to be destroyed
3460 //
3461 // Error codes:
3462 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3463 //         Event Queue might be corrupted (associated session is not valid).
3464 //
3465 //     NV_ERR_INVALID_ARGUMENT:
3466 //         Queue handle does not refer to a valid queue.
3467 //
3468 //------------------------------------------------------------------------------
3469 NV_STATUS UvmToolsDestroyEventQueue(UvmToolsEventQueueHandle queue);
3470 
3471 //------------------------------------------------------------------------------
3472 // UvmEventQueueEnableEvents
3473 //
3474 // This call enables a particular event type in the event queue. All events are
3475 // disabled by default. Any event type is considered listed if and only if it's
3476 // corresponding value is equal to 1 (in other words, bit is set). Disabled
3477 // events listed in eventTypeFlags are going to be enabled. Enabled events and
3478 // events not listed in eventTypeFlags are not affected by this call.
3479 //
3480 // It is not an error to call this function multiple times with the same
3481 // arguments.
3482 //
3483 // Arguments:
3484 //     queue: (INPUT)
3485 //         Handle to the queue, for which events are supposed to be enabled
3486 //
3487 //     eventTypeFlags: (INPUT)
3488 //         This bit field specifies the event types to be enabled. Events not
3489 //         specified in this field do not change their state. For example to
3490 //         enable migration and memory violations events pass flags
3491 //         "UVM_EVENT_ENABLE_MEMORY_VIOLATION | UVM_EVENT_ENABLE_MIGRATION"
3492 //
3493 // Error codes:
3494 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3495 //         Event Queue might be corrupted (associated session is not valid).
3496 //
3497 //     NV_ERR_INVALID_ARGUMENT:
3498 //         Queue handle does not refer to a valid queue.
3499 //
3500 //------------------------------------------------------------------------------
3501 NV_STATUS UvmToolsEventQueueEnableEvents(UvmToolsEventQueueHandle queue,
3502                                          NvU64                    eventTypeFlags);
3503 
3504 //------------------------------------------------------------------------------
3505 // UvmToolsEventQueueDisableEvents
3506 //
3507 // This call disables a particular event type in the event queue. Any event type
3508 // is considered listed if and only if it's corresponding value is equal to 1
3509 // (in other words, bit is set). Enabled events listed in eventTypeFlags are
3510 // going to be disabled. Disabled events and events not listed in eventTypeFlags
3511 // are not affected by this call.
3512 //
3513 // It is not an error to call this function multiple times with the same
3514 // arguments.
3515 //
3516 // Arguments:
3517 //     queue: (INPUT)
3518 //         Handle to the queue, for which events are supposed to be enabled
3519 //
3520 //     eventTypeFlags: (INPUT)
3521 //         This bit field specifies the event types to be disabled. Events not
3522 //         specified in this field do not change their state. For example to
3523 //         disable migration and memory violations events pass flags
3524 //         "UVM_EVENT_ENABLE_MEMORY_VIOLATION | UVM_EVENT_ENABLE_MIGRATION"
3525 //
3526 // Error codes:
3527 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3528 //         Event Queue might be corrupted (associated session is not valid).
3529 //
3530 //     NV_ERR_INVALID_ARGUMENT:
3531 //         Queue handle does not refer to a valid event queue.
3532 //
3533 //------------------------------------------------------------------------------
3534 NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue,
3535                                           NvU64                    eventTypeFlags);
3536 
3537 
3538 //------------------------------------------------------------------------------
3539 // UvmToolsCreateProcessAggregateCounters
3540 //
3541 // Creates the counters structure for tracking aggregate process counters.
3542 // These counters are enabled by default.
3543 //
3544 // Counters position follows the layout of the memory that UVM driver decides to
3545 // use. To obtain particular counter value, user should perform consecutive
3546 // atomic reads at a a given buffer + offset address.
3547 //
3548 // It is not defined what is the initial value of a counter. User should rely on
3549 // a difference between each snapshot.
3550 //
3551 // Arguments:
3552 //     session: (INPUT)
3553 //         Handle to the tools session.
3554 //
3555 //     counters_buffer : (INPUT)
3556 //         User allocated buffer. Must be aligned to the OS's page aligned. Must
3557 //         be large enough to hold all possible counter types. In practice, 4kB
3558 //         system page (minimal granurality) should be sufficent. This memory
3559 //         gets pinned until counters are destroyed.
3560 //
3561 //     counters: (OUTPUT)
3562 //         Handle to the created counters.
3563 //
3564 // Error codes:
3565 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3566 //         Provided session is not valid
3567 //
3568 //     NV_ERR_INSUFFICIENT_RESOURCES
3569 //         There could be multiple reasons for this error. One would be that it's
3570 //         not possible to allocate counters structure. Another would be that
3571 //         either event_buffer or event_control memory couldn't be pinned
3572 //         (e.g. because of OS limitation of pinnable memory)
3573 //
3574 //------------------------------------------------------------------------------
3575 NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle   session,
3576                                                  void                   *counters_buffer,
3577                                                  UvmToolsCountersHandle *counters);
3578 
3579 //------------------------------------------------------------------------------
3580 // UvmToolsCreateProcessorCounters
3581 //
3582 // Creates the counters structure for tracking per-process counters.
3583 // These counters are disabled by default.
3584 //
3585 // Counters position follows the layout of the memory that UVM driver decides to
3586 // use. To obtain particular counter value, user should perform consecutive
3587 // atomic reads at a a given buffer + offset address.
3588 //
3589 // It is not defined what is the initial value of a counter. User should rely on
3590 // a difference between each snapshot.
3591 //
3592 // Arguments:
3593 //     session: (INPUT)
3594 //         Handle to the tools session.
3595 //
3596 //     counters_buffer : (INPUT)
3597 //         User allocated buffer. Must be aligned to the OS's page aligned. Must
3598 //         be large enough to hold all possible counter types. In practice, 4kB
3599 //         system page should be sufficent. This memory gets pinned until
3600 //         counters are destroyed.
3601 //
3602 //     processorUuid: (INPUT)
3603 //        UUID of the resource, for which counters will provide statistic data.
3604 //
3605 //     counters: (OUTPUT)
3606 //         Handle to the created counters.
3607 //
3608 // Error codes:
3609 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3610 //         session handle does not refer to a valid tools session
3611 //
3612 //     NV_ERR_INSUFFICIENT_RESOURCES
3613 //         There could be multiple reasons for this error. One would be that it's
3614 //         not possible to allocate counters structure. Another would be that
3615 //         either event_buffer or event_control memory couldn't be pinned
3616 //         (e.g. because of OS limitation of pinnable memory)
3617 //
3618 //     NV_ERR_INVALID_ARGUMENT
3619 //         processorUuid does not refer to any known resource in UVM driver
3620 //
3621 //------------------------------------------------------------------------------
3622 NV_STATUS UvmToolsCreateProcessorCounters(UvmToolsSessionHandle   session,
3623                                           void                   *counters_buffer,
3624                                           const NvProcessorUuid  *processorUuid,
3625                                           UvmToolsCountersHandle *counters);
3626 
3627 //------------------------------------------------------------------------------
3628 // UvmToolsDestroyCounters
3629 //
3630 // Destroys all internal resources associated with this counters structure.
3631 // It unpinns the buffer provided in UvmToolsCreate*Counters. Counters structure
3632 // also gest destroyed when corresponding session is destroyed.
3633 //
3634 // Arguments:
3635 //     counters: (INPUT)
3636 //         Handle to the counters structure.
3637 //
3638 // Error codes:
3639 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3640 //         State of the counters has been corrupted.
3641 //
3642 //     NV_ERR_INVALID_ARGUMENT:
3643 //         Counters handle does not refer to a valid Counters structure.
3644 //
3645 //------------------------------------------------------------------------------
3646 NV_STATUS UvmToolsDestroyCounters(UvmToolsCountersHandle counters);
3647 
3648 //------------------------------------------------------------------------------
3649 // UvmToolsEnableCounters
3650 //
3651 // This call enables certain counter types in the counters structure. Any
3652 // counter type is considered listed if and only if it's corresponding value is
3653 // equal to 1 (in other words, bit is set). Disabled counter types listed in
3654 // counterTypeFlags are going to be enabled. Already enabled counter types and
3655 // counter types not listed in counterTypeFlags are not affected by this call.
3656 //
3657 // It is not an error to call this function multiple times with the same
3658 // arguments.
3659 //
3660 // Arguments:
3661 //     counters: (INPUT)
3662 //         Handle to the counters structure.
3663 //
3664 //     counterTypeFlags: (INPUT)
3665 //         This bit field specifies the counter types to be enabled.
3666 //         For example, to enable faults number accounting and number of bytes
3667 //         transferred into a given resource (or aggregate) pass flags
3668 //         "UVM_COUNTER_ENABLE_FAULTS_NUMBER |
3669 //          UVM_COUNTER_ENABLE_BYTES_TRANSFERRED_IN"
3670 //
3671 // Error codes:
3672 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3673 //         Counters structure mighe be corrupted (associated session is not
3674 //         valid).
3675 //
3676 //     NV_ERR_INVALID_ARGUMENT:
3677 //         Counters handle does not refer to a valid counters structure.
3678 //------------------------------------------------------------------------------
3679 NV_STATUS UvmToolsEnableCounters(UvmToolsCountersHandle counters,
3680                                  NvU64                  counterTypeFlags);
3681 
3682 //------------------------------------------------------------------------------
3683 // UvmToolsDisableCounters
3684 //
3685 // This call disables certain counter types in the counters structure. Any
3686 // counter type is considered listed if and only if it's corresponding value is
3687 // equal to 1 (in other words, bit is set). Enabled counter types listed in
3688 // counterTypeFlags are going to be disabled. Already disabled counter types and
3689 // counter types not listed in counterTypeFlags are not affected by this call.
3690 //
3691 // It is not an error to call this function multiple times with the same
3692 // arguments.
3693 //
3694 // Arguments:
3695 //     counters: (INPUT)
3696 //         Handle to the counters structure.
3697 //
3698 //     counterTypeFlags: (INPUT)
3699 //         This bit field specifies the counter types to be disabled.
3700 //         For example, to disable faults number accounting and number of bytes
3701 //         transferred into a given resource (or aggregate) pass flags
3702 //         "UVM_COUNTER_ENABLE_FAULTS_NUMBER |
3703 //          UVM_COUNTER_ENABLE_BYTES_TRANSFERRED_IN"
3704 //
3705 // Error codes:
3706 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3707 //         Counters structure mighe be corrupted (associated session is not
3708 //         valid).
3709 //
3710 //     NV_ERR_INVALID_ARGUMENT:
3711 //         Counters handle does not refer to a valid counters structure.
3712 //------------------------------------------------------------------------------
3713 NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters,
3714                                   NvU64                  counterTypeFlags);
3715 
3716 //------------------------------------------------------------------------------
3717 // UvmToolsReadProcessMemory
3718 //
3719 // Reads up to size bytes from a given target process's virtual address.
3720 // If size is 0, function should successfully return size of the largest size
3721 // that can be read starting at a given target process's virtual memory. This
3722 // might be used to discover size of user's allocation.
3723 //
3724 // Upon successful execution and size greater than 0, user should have a copy of
3725 // target's process memory in a given buffer. Result is unspecified in case of
3726 // In-process scenario when targetVa address + size overlaps with buffer + size.
3727 //
3728 // This is essentially a UVM version of RM ctrl call
3729 // NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
3730 // information), please refer to the documentation:
3731 // //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
3732 //
3733 // Arguments:
3734 //     session: (INPUT)
3735 //         Handle to the tools session.
3736 //
3737 //     buffer: (INPUT)
3738 //         User buffer (destination) address, where requested memory shall be
3739 //         copied.
3740 //
3741 //     size: (INPUT)
3742 //         Number of bytes requested to be copied. If user's buffer is not large
3743 //         enough to fit size bytes, result is unspecified. If this is 0,
3744 //         function should return largest chunk of memory available to read.
3745 //
3746 //     targetVa: (INPUT)
3747 //         Target process's (source) address, from which memory should be
3748 //         copied.
3749 //
3750 //     bytes_read: (OUTPUT)
3751 //         Either number of bytes successfully read or the largest chunk of
3752 //         memory available to read, depending on size parameter.
3753 //
3754 // Error codes:
3755 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3756 //         session handle does not refer to a valid tools session
3757 //
3758 //     NV_ERR_INVALID_ADDRESS:
3759 //         UVM driver has no knowledge of targetVa address.
3760 //
3761 //     NV_ERR_INVALID_ARGUMENT:
3762 //         Read spans more than a single target process allocation.
3763 //
3764 //
3765 //------------------------------------------------------------------------------
3766 NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle  session,
3767                                     void                  *buffer,
3768                                     NvLength               size,
3769                                     void                  *targetVa,
3770                                     NvLength              *bytes_read);
3771 
3772 //------------------------------------------------------------------------------
3773 // UvmToolsWriteProcessMemory
3774 //
3775 // Writes up to size bytes from a given target process's virtual address.
3776 // If size is 0, function should successfully return size of the largest size
3777 // that can be written starting at a given target process's virtual address.
3778 // This might be used to discover size of user's allocation.
3779 //
3780 // Upon successful execution and size greater than 0, target process should have
3781 // a copy of buffer starting at targetVa address. Result is unspecified in case
3782 // of In-process scenario when targetVa address + size overlaps with
3783 // buffer + size.
3784 //
3785 // This is essentially a UVM version of RM ctrl call
3786 // NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more
3787 // information), please refer to the documentation:
3788 // //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx
3789 //
3790 // Arguments:
3791 //     session: (INPUT)
3792 //         Handle to the tools session.
3793 //
3794 //     buffer: (INPUT)
3795 //         User buffer (source) address, from which requested memory shall be
3796 //         copied.
3797 //
3798 //     size: (INPUT)
3799 //         Number of bytes requested to be copied. If user's buffer is not large
3800 //         enough to fit size bytes, result is unspecified. If this is 0,
3801 //         function should return largest chunk of memory available to write.
3802 //
3803 //     targetVa: (INPUT)
3804 //         Target process's (destination) address, where memory should be
3805 //         copied.
3806 //
3807 //     bytes_read: (OUTPUT)
3808 //         Either number of bytes successfully written or the largest chunk of
3809 //         memory available to write, depending on size parameter.
3810 //
3811 // Error codes:
3812 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3813 //         session handle does not refer to a valid tools session
3814 //
3815 //     NV_ERR_INVALID_ADDRESS:
3816 //         UVM driver has no knowledge of targetVa address.
3817 //
3818 //     NV_ERR_INVALID_ARGUMENT:
3819 //         Write spans more than a single target process allocation.
3820 //
3821 //------------------------------------------------------------------------------
3822 NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle  session,
3823                                      void                  *buffer,
3824                                      NvLength               size,
3825                                      void                  *targetVa,
3826                                      NvLength              *bytes_read);
3827 
3828 //------------------------------------------------------------------------------
3829 // UvmToolsGetProcessorUuidTable
3830 //
3831 // Populate a table with the UUIDs of all the currently registered processors
3832 // in the target process.  When a GPU is registered, it is added to the table.
3833 // When a GPU is unregistered, it is removed.  As long as a GPU remains registered,
3834 // its index in the table does not change.  New registrations obtain the first
3835 // unused index.
3836 //
3837 // Arguments:
3838 //     session: (INPUT)
3839 //         Handle to the tools session.
3840 //
3841 //     table: (OUTPUT)
3842 //         Array of processor UUIDs, including the CPU's UUID which is always
3843 //         at index zero.  The srcIndex and dstIndex fields of the
3844 //         UvmEventMigrationInfo struct index this array.  Unused indices will
3845 //         have a UUID of zero.
3846 //
3847 //     count: (OUTPUT)
3848 //         Set by UVM to the number of UUIDs written, including any gaps in
3849 //         the table due to unregistered GPUs.
3850 //
3851 // Error codes:
3852 //     NV_ERR_INVALID_ADDRESS:
3853 //         writing to table failed.
3854 //------------------------------------------------------------------------------
3855 NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle  session,
3856                                         NvProcessorUuid       *table,
3857                                         NvLength              *count);
3858 
3859 //------------------------------------------------------------------------------
3860 // UvmToolsFlushEvents
3861 //
3862 // Some events, like migrations, which have end timestamps are not immediately
3863 // submitted to queues when they are completed.  This call enqueues any
3864 // completed but unenqueued events associated with the session.
3865 //
3866 // Arguments:
3867 //     session: (INPUT)
3868 //         Handle to the tools session.
3869 //
3870 // Error codes:
3871 //     NV_ERR_INSUFFICIENT_PERMISSIONS:
3872 //         Session handle does not refer to a valid session
3873 //------------------------------------------------------------------------------
3874 NV_STATUS UvmToolsFlushEvents(UvmToolsSessionHandle session);
3875 
3876 #ifdef __cplusplus
3877 }
3878 #endif
3879 
3880 #endif // _UVM_H_
3881