1 /******************************************************************************* 2 Copyright (c) 2013-2022 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 *******************************************************************************/ 22 23 // 24 // uvm.h 25 // 26 // This file contains the UVM API declarations, for the userspace-to-kernel 27 // calls. For legacy API definitions that are in use on Windows, see 28 // uvm_legacy.h. 29 // 30 31 // UVM API signature modification steps 32 // In order to change API signature for any of the APIs defined in this file, a 33 // particular sequence of steps has to be followed since the consumer of this 34 // API (i.e. CUDA) belongs to a different module branch than the one for this 35 // file. Here are the steps to change the signature for a hypothetical API named 36 // UvmExampleApi. The assumption being made here is that this file is being 37 // modified in chips_a. 38 // 1) Increment the value of UVM_API_LATEST_REVISION defined in this file. 39 // 2) Use the macro UVM_API_REV_IS_AT_MOST to define the two revisions of the 40 // API as follows: 41 // #if UVM_API_REV_IS_AT_MOST(<previous_value_of_UVM_API_LATEST_REVISION>) 42 // // Old UvmExampleApi declaration 43 // #else 44 // // New UvmExampleApi declaration 45 // #endif 46 // 3) Do the same thing for the function definition, and for any structs that 47 // are taken as arguments to these functions. 48 // 4) Let this change propagate over to cuda_a, so that the CUDA driver can 49 // start using the new API by bumping up the API version number its using. 50 // This can be found in gpgpu/cuda/cuda.nvmk. 51 // 5) Once the cuda_a changes have made it back into chips_a, remove the old API 52 // declaration, definition, and any old structs that were in use. 53 54 #ifndef _UVM_H_ 55 #define _UVM_H_ 56 57 #define UVM_API_LATEST_REVISION 7 58 59 #if !defined(UVM_API_REVISION) 60 #error "please define UVM_API_REVISION macro to a desired version number or UVM_API_LATEST_REVISION macro" 61 #endif 62 63 #define UVM_API_REV_IS_AT_MOST(rev) (UVM_API_REVISION <= rev) 64 65 #include "uvm_types.h" 66 #include "uvm_user_types.h" 67 #include "uvm_legacy.h" 68 69 #ifdef __cplusplus 70 extern "C" { 71 #endif 72 73 //------------------------------------------------------------------------------ 74 // UvmSetDriverVersion 75 // 76 // Informs the user-mode layer which kernel driver version is running. The user- 77 // mode layer uses this information to know what flavor to use when calling 78 // kernel APIs. 79 // 80 // If this API is not called, the user-mode layer assumes that the kernel 81 // version is the same as the user-mode layer version. 82 // 83 // The last UvmDeinitialize will reset this state. 84 // 85 // If this API is called, it must be called before UvmInitialize. It is an error 86 // to call this API after UvmInitialize and before the last UvmDeinitialize, or 87 // to call this API more than once before the last UvmDeinitialize. 88 // 89 // Arguments: 90 // major: (INPUT) 91 // The kernel driver's major version number, such as 384. 92 // 93 // changelist: (INPUT) 94 // The changelist at which the kernel driver was built. 95 // 96 // Error codes: 97 // NV_ERR_INVALID_STATE: 98 // UvmInitialize or UvmSetDriverVersion has already been called. 99 // 100 //------------------------------------------------------------------------------ 101 NV_STATUS UvmSetDriverVersion(NvU32 major, NvU32 changelist); 102 103 //------------------------------------------------------------------------------ 104 // UvmInitialize 105 // 106 // This must be called before any other UVM functions except for 107 // UvmSetDriverVersion. Repeated calls to UvmInitialize increment a refcount, 108 // which is decremented by calls to UvmDeinitialize. UVM deinitilization occurs 109 // when the refcount reaches zero. 110 // 111 // The UVM file descriptor passed in can either be UVM_AUTO_FD or a valid file 112 // descriptor created during a prior call to UvmInitialize. If UVM_AUTO_FD is 113 // passed and the refcount is zero, a new file descriptor is created. Subsequent 114 // calls must either also specify UVM_AUTO_FD or use the current file 115 // descriptor. If the first call to UvmInitialize did not specify UVM_AUTO_FD, 116 // all subsequent calls must use the same file descriptor used in the initial 117 // call. The file descriptor that is currently in use can be retrieved using 118 // UvmGetFileDescriptor. 119 // 120 // If flags does not contain UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE, the 121 // UvmInitialize call which creates the file descriptor will associate the 122 // calling process with that file descriptor when the Operating System can 123 // support such an association. In that case UvmInitialize may be called using 124 // the same file in other processes, but internally the file remains associated 125 // with the original process. 126 // 127 // Arguments: 128 // fd: (INPUT) 129 // The UVM file descriptor to initialize UVM with. Passing in 130 // UVM_AUTO_FD creates a new file descriptor on the first call to 131 // UvmInitialize. 132 // 133 // flags: (INPUT) 134 // Must be a combination of 0 or more of following flags: 135 // 136 // - UVM_INIT_FLAGS_DISABLE_HMM 137 // Specifying this flag will only have an effect if the system 138 // allows GPUs to read/write system (CPU) pageable memory and the 139 // GPUs do not have hardware support to do it transparently, and the 140 // UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE flag is not specified. 141 // In such cases pageable access from the GPU will be disabled. 142 // 143 // Pageable memory here refers to memory allocated by the Operating 144 // System for the process's globals, stack variables, heap 145 // allocations, etc. that has not been registered for CUDA access 146 // using cudaHostRegister. 147 // 148 // - UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE 149 // Specifying this flag will prevent UVM from creating any 150 // association between this process and the UVM file descriptor. 151 // Pageable memory access of any kind will be disabled (regardless 152 // of whether UVM_INIT_FLAGS_DISABLE_HMM was specified) and the GPU 153 // resources used by the UVM file descriptor will be freed when the 154 // last reference to the file is dropped rather than when this 155 // process exits. 156 // 157 // If this flag is not specified, calling UvmMemMap or 158 // UvmAllocSemaphorePool on the same file from a different process 159 // may return an error. 160 // 161 // If UvmInitialize is called multiple times on the same file, even from 162 // different processes, the flags to each call must match. 163 // 164 // Error codes: 165 // NV_ERR_NOT_SUPPORTED: 166 // The Linux kernel is not able to support UVM. This could be because 167 // the kernel is too old, or because it lacks a feature that UVM 168 // requires. The kernel log will have details. 169 // 170 // NV_ERR_INVALID_ARGUMENT: 171 // The file descriptor passed in is neither UVM_AUTO_FD nor a valid file 172 // descriptor created during a prior call to UvmInitialize, or the flags 173 // do not match a prior call to UvmInitialize. 174 // 175 // NV_ERR_NO_MEMORY: 176 // Internal memory allocation failed. 177 // 178 // NV_ERR_GENERIC: 179 // Unexpected error. We try hard to avoid returning this error code, 180 // because it is not very informative. 181 // 182 //------------------------------------------------------------------------------ 183 #if UVM_API_REV_IS_AT_MOST(4) 184 NV_STATUS UvmInitialize(UvmFileDescriptor fd); 185 #else 186 NV_STATUS UvmInitialize(UvmFileDescriptor fd, 187 NvU64 flags); 188 #endif 189 190 //------------------------------------------------------------------------------ 191 // UvmDeinitialize 192 // 193 // Releases the reference implicitly obtained by UvmInitialize. If the refcount 194 // reaches zero, cleans up all UVM resources associated with the calling 195 // process. Any channels that are still registered will be unregistered prior to 196 // unmapping any managed allocations. Any resources that have been shared with 197 // other processes and are still being used will continue to remain valid. 198 // 199 // Error codes: 200 // NV_ERR_INVALID_STATE: 201 // Refcount is zero. 202 // 203 // NV_ERR_GENERIC: 204 // Unexpected error. We try hard to avoid returning this error code, 205 // because it is not very informative. 206 // 207 //------------------------------------------------------------------------------ 208 NV_STATUS UvmDeinitialize(void); 209 210 //------------------------------------------------------------------------------ 211 // UvmReopen 212 // 213 // Reinitializes the UVM driver after checking for minimal user-mode state. 214 // Before calling this function, all GPUs must be unregistered with 215 // UvmUnregisterGpu() and all allocated VA ranges must be freed with UvmFree(). 216 // Note that it is not required to release VA ranges that were reserved with 217 // UvmReserveVa(). 218 // 219 // UvmReopen() closes the open file returned by UvmGetFileDescriptor() and 220 // replaces it with a new open file with the same name. 221 // 222 // Arguments: 223 // flags: (INPUT) 224 // Must be zero. UVM will be reinitialized with the 225 // Same flags that were passed to UvmInitialize() originally. 226 // 227 // Error codes: 228 // NV_ERR_INVALID_STATE: 229 // UVM was not initialized before calling this function. 230 // 231 // NV_ERR_UVM_ADDRESS_IN_USE: 232 // Not all allocated VA ranges were freed before calling this function. 233 // 234 // NV_ERR_IN_USE: 235 // Not all GPUs were unregistered before calling this function. 236 // 237 // NV_ERR_INVALID_FLAGS: 238 // Flags is not zero. 239 // 240 // NV_ERR_OPERATING_SYSTEM: 241 // Replacing the original UVM file descriptor failed. 242 // 243 // NV_ERR_GENERIC: 244 // Unexpected error. We try hard to avoid returning this error code, 245 // because it is not very informative. 246 // 247 //------------------------------------------------------------------------------ 248 NV_STATUS UvmReopen(NvU64 flags); 249 250 //------------------------------------------------------------------------------ 251 // UvmIsPageableMemoryAccessSupported 252 // 253 // Returns true only if pageable memory access from GPUs is supported by the 254 // system and that support was not explicitly disabled via UvmInitialize. 255 // 256 // Pageable memory here refers to memory allocated by the Operating System for 257 // the process's globals, stack variables, heap allocations, etc that has not 258 // been registered for CUDA access using cudaHostRegister. 259 // 260 // Note that this does not check whether GPUs are present which can make use of 261 // this feature, just whether system support exists. If 262 // UvmIsPageableMemoryAccessSupported reports that pageable memory access is 263 // supported, UvmIsPageableMemoryAccessSupportedOnGpu can be used for querying 264 // per-GPU support. 265 // 266 // Arguments: 267 // pageableMemAccess: (OUTPUT) 268 // Returns true (non-zero) if the system supports pageable memory access 269 // from GPUs and that support was not explicitly disabled via 270 // UvmInitialize, and false (zero) otherwise. 271 // 272 // Error codes: 273 // NV_ERR_INVALID_STATE: 274 // UVM was not initialized. 275 // 276 // NV_ERR_GENERIC: 277 // Unexpected error. We try hard to avoid returning this error code, 278 // because it is not very informative. 279 // 280 //------------------------------------------------------------------------------ 281 NV_STATUS UvmIsPageableMemoryAccessSupported(NvBool *pageableMemAccess); 282 283 //------------------------------------------------------------------------------ 284 // UvmIsPageableMemoryAccessSupportedOnGpu 285 // 286 // Returns whether pageable memory access is supported from the given GPU on 287 // this system and that support was not explicitly disabled via UvmInitialize. 288 // The GPU must have been previously registered with UvmRegisterGpu first. 289 // 290 // Pageable memory here refers to memory allocated by the Operating System for 291 // the process's globals, stack variables, heap allocations, etc that has not 292 // been registered for CUDA access using cudaHostRegister. 293 // 294 // Arguments: 295 // gpuUuid: (INPUT) 296 // UUID of the GPU for which pageable memory access support is queried. 297 // 298 // pageableMemAccess: (OUTPUT) 299 // Returns true (non-zero) if the GPU represented by gpuUuid supports 300 // pageable memory access and that support was not explicitly disabled 301 // via UvmInitialize, and false (zero) otherwise. 302 // 303 // Error codes: 304 // NV_ERR_INVALID_STATE: 305 // UVM was not initialized. 306 // 307 // NV_ERR_INVALID_DEVICE: 308 // The given GPU has not been registered. 309 // 310 // NV_ERR_GENERIC: 311 // Unexpected error. We try hard to avoid returning this error code, 312 // because it is not very informative. 313 // 314 //------------------------------------------------------------------------------ 315 NV_STATUS UvmIsPageableMemoryAccessSupportedOnGpu(const NvProcessorUuid *gpuUuid, 316 NvBool *pageableMemAccess); 317 318 //------------------------------------------------------------------------------ 319 // UvmRegisterGpu 320 // 321 // Registers a GPU with UVM. If this is the first process to register this GPU, 322 // the UVM driver initializes resources on the GPU and prepares it for CUDA 323 // usage. Calling UvmRegisterGpu multiple times on the same GPU from the same 324 // process results in an error. 325 // 326 // Arguments: 327 // gpuUuid: (INPUT) 328 // UUID of the GPU to register. 329 // 330 // Error codes: 331 // NV_ERR_NO_MEMORY: 332 // Internal memory allocation failed. 333 // 334 // NV_ERR_INSUFFICIENT_RESOURCES 335 // Internal client or object allocation failed. 336 // 337 // NV_ERR_INVALID_DEVICE: 338 // The GPU referred to by pGpuUuid has already been registered by this 339 // process. 340 // 341 // The GPU referred to by pGpuUuid doesn't have a NVLINK2 link to the 342 // CPU but a GPU with such a link has already been registered by this 343 // process, or vice-versa. 344 // 345 // NV_ERR_NOT_SUPPORTED: 346 // The GPU referred to by pGpuUuid is not supported by UVM or the GPU 347 // is configured to run in virtualization mode without SRIOV support. 348 // 349 // NV_ERR_GPU_UUID_NOT_FOUND: 350 // The GPU referred to by pGpuUuid was not found. 351 // 352 // NV_ERR_PAGE_TABLE_NOT_AVAIL: 353 // The system requires that the UVM file descriptor be associated with a 354 // single process, and that process has exited. 355 // 356 // NV_ERR_INVALID_ARGUMENT: 357 // OS state required to register the GPU is not present. 358 // 359 // NV_ERR_OBJECT_NOT_FOUND: 360 // OS state required to register the GPU is not present. 361 // 362 // NV_ERR_INVALID_STATE: 363 // OS state required to register the GPU is malformed. 364 // 365 // NV_ERR_GENERIC: 366 // Unexpected error. We try hard to avoid returning this error code, 367 // because it is not very informative. 368 // 369 //------------------------------------------------------------------------------ 370 NV_STATUS UvmRegisterGpu(const NvProcessorUuid *gpuUuid); 371 372 //------------------------------------------------------------------------------ 373 // UvmRegisterGpuSmc 374 // 375 // The same as UvmRegisterGpu, but takes additional parameters to specify the 376 // GPU partition being registered if SMC is enabled. 377 // 378 // TODO: Bug 2844714: Merge UvmRegisterGpuSmc() with UvmRegisterGpu() once 379 // the initial SMC support is in place. 380 // 381 // Arguments: 382 // gpuUuid: (INPUT) 383 // UUID of the parent GPU of the SMC partition to register. 384 // 385 // platformParams: (INPUT) 386 // User handles identifying the partition to register. 387 // 388 // Error codes (see UvmRegisterGpu also): 389 // 390 // NV_ERR_INVALID_STATE: 391 // SMC was not enabled, or the partition identified by the user 392 // handles or its configuration changed. 393 // 394 NV_STATUS UvmRegisterGpuSmc(const NvProcessorUuid *gpuUuid, 395 const UvmGpuPlatformParams *platformParams); 396 397 //------------------------------------------------------------------------------ 398 // UvmUnregisterGpu 399 // 400 // Unregisters a GPU from UVM. If this is the last process to unregister this 401 // GPU, the UVM driver frees all resources allocated on the GPU when the GPU 402 // was first registered. Any pages on the GPU allocated by the UVM driver will 403 // be migrated to CPU memory before the GPU resources are freed. 404 // 405 // Any GPU VA spaces or channels that were registered on this GPU using 406 // UvmRegisterGpuVaSpace or UvmRegisterChannel respectively, will be 407 // unregistered. Any state that was set by calling UvmSetPreferredLocation or 408 // UvmSetAccessedBy for this GPU will be cleared. Any pages that were associated 409 // with a non-migratable range group and had this GPU as their preferred 410 // location will have their range group association changed to 411 // UVM_RANGE_GROUP_ID_NONE. 412 // 413 // Arguments: 414 // gpuUuid: (INPUT) 415 // UUID of the GPU to unregister. 416 // 417 // Error codes: 418 // NV_ERR_INVALID_DEVICE: 419 // The GPU referred to by pGpuUuid was not registered by this process. 420 // 421 // NV_ERR_GPU_UUID_NOT_FOUND: 422 // The GPU referred to by pGpuUuid was not found. 423 // 424 // NV_ERR_GENERIC: 425 // Unexpected error. We try hard to avoid returning this error code, 426 // because it is not very informative. 427 // 428 //------------------------------------------------------------------------------ 429 NV_STATUS UvmUnregisterGpu(const NvProcessorUuid *gpuUuid); 430 431 //------------------------------------------------------------------------------ 432 // UvmRegisterGpuVaSpace 433 // 434 // Registers a GPU's VA (virtual address) space for use with UVM. Only one GPU 435 // VA space can be registered for a given GPU at a time. Once a VA space has 436 // been registered for a GPU, all page table updates for that VA space on that 437 // GPU will be managed by the UVM driver. 438 // 439 // The GPU must have been registered using UvmRegisterGpu prior to making this 440 // call. 441 // 442 // On systems with GPUs that support transparent access to pageable memory, this 443 // feature is enabled per GPU VA space. This setting must match for all 444 // registered GPU VA spaces. 445 // 446 // Any VA ranges that were allocated using UvmAllocSemaphorePool will be mapped 447 // on this GPU with the mapping and caching attributes as specified during that 448 // call, or with default attributes if none were specified. 449 // 450 // Any VA ranges that had a preferred location set to this GPU will be mapped on 451 // this GPU only if this GPU is not fault-capable and the VA range belongs to a 452 // non-migratable range group. If such a mapping cannot be established, an error 453 // is returned. 454 // 455 // Any VA ranges which have accessed-by set for this GPU will be mapped on this 456 // GPU. If that VA range resides in a PCIe peer GPU's memory and P2P support 457 // between the two GPUs has not been enabled via UvmEnablePeerAccess, then a 458 // mapping won't be established. Also, if read duplication is enabled for this 459 // VA range, or its preferred location is set to this GPU, and this GPU is a 460 // fault-capable GPU, then a mapping will not be established. If this is a 461 // non-fault-capable GPU and a mapping cannot be established, then an error is 462 // returned. 463 // 464 // If P2P support has been enabled between this GPU and another GPU that also 465 // has a GPU VA space registered, then the two GPU VA spaces must support the 466 // same set of page sizes for GPU mappings. Otherwise, an error is returned. 467 // 468 // Note that all the aforementioned VA ranges must lie within the largest 469 // possible virtual address supported by this GPU. 470 // 471 // Arguments: 472 // gpuUuid: (INPUT) 473 // UUID of the GPU to register. 474 // 475 // platformParams: (INPUT) 476 // On Linux: RM ctrl fd, hClient and hVaSpace. 477 // 478 // Error codes: 479 // NV_ERR_NO_MEMORY: 480 // Internal memory allocation failed. 481 // 482 // NV_ERR_OUT_OF_RANGE: 483 // A VA range that needs to be mapped on this GPU exceeds the largest 484 // virtual address supported by the GPU. 485 // 486 // NV_ERR_INVALID_DEVICE: 487 // The GPU referred to by gpuUuid was not registered or a VA space has 488 // already been registered for this GPU. Or this is a non-fault-capable 489 // GPU that is present in the accessed-by list of a VA range that 490 // resides on another non-fault-capable GPU, and P2P support between 491 // both GPUs is not enabled. 492 // 493 // NV_ERR_OTHER_DEVICE_FOUND: 494 // The UUID does not match the UUID of the device that is associated 495 // with the VA space handles in the platformParams argument. 496 // 497 // NV_ERR_INVALID_FLAGS: 498 // The VA space was originally allocated with UVM-incompatible flags. 499 // This includes the case in which the value for the setting to enable 500 // transparent access to pageable memory for the given GPU VA space does 501 // not match the value in previously-registered GPU VA spaces, or that 502 // value is set but pageable memory access has been disabled via 503 // UvmInitialize. 504 // 505 // NV_ERR_NOT_COMPATIBLE: 506 // The GPU referred to by gpuUuid has P2P support enabled with another 507 // GPU and the set of page sizes supported by the specified VA space 508 // doesn't match that of the VA space registered on the peer GPU. 509 // 510 // NV_ERR_INVALID_ARGUMENT: 511 // Some problem with the platform specific arguments was detected. 512 // 513 // NV_ERR_NOT_SUPPORTED: 514 // A GPU VA space has already been registered using a different UVM file 515 // descriptor in this process and this platform does not support that 516 // operation, or a GPU VA space has already been registered on this UVM 517 // file descriptor by a different process and this platform does not 518 // support that operation. 519 // 520 // NV_ERR_PAGE_TABLE_NOT_AVAIL: 521 // The system requires that the UVM file descriptor be associated with a 522 // single process, and that process has exited. 523 // 524 // NV_ERR_GENERIC: 525 // Unexpected error. We try hard to avoid returning this error code, 526 // because it is not very informative. 527 // 528 //------------------------------------------------------------------------------ 529 NV_STATUS UvmRegisterGpuVaSpace(const NvProcessorUuid *gpuUuid, 530 const UvmGpuVaSpacePlatformParams *platformParams); 531 532 //------------------------------------------------------------------------------ 533 // UvmUnregisterGpuVaSpace 534 // 535 // Unregisters the GPU VA space that was previously registered via a call to 536 // UvmRegisterGpuVaSpace. 537 // 538 // Any page table mappings created by UVM on that GPU for that VA space will be 539 // unmapped. Any channels that were registered on this GPU using 540 // UvmRegisterChannel will be unregistered. 541 // 542 // Arguments: 543 // gpuUuid: (INPUT) 544 // UUID of the GPU whose VA space should be unregistered. 545 // 546 // Error codes: 547 // NV_ERR_INVALID_DEVICE: 548 // The GPU referred to by gpuUuid was not registered or no VA space has 549 // been registered for this GPU. 550 // 551 // NV_ERR_GENERIC: 552 // Unexpected error. We try hard to avoid returning this error code, 553 // because it is not very informative. 554 // 555 //------------------------------------------------------------------------------ 556 NV_STATUS UvmUnregisterGpuVaSpace(const NvProcessorUuid *gpuUuid); 557 558 //------------------------------------------------------------------------------ 559 // UvmEnablePeerAccess 560 // 561 // Enables P2P (peer to peer) support in the UVM driver between two GPUs 562 // connected via PCIe. NVLink peers are automatically discovered/enabled in the 563 // driver at UvmRegisterGpu time. Enabling P2P support between two GPUs allows 564 // peer mappings to be created as part of fault servicing, memory allocation, 565 // etc. The P2P support is bidirectional i.e. enabling P2P between GPU A and 566 // GPU B also enables P2P support between GPU B and GPU A. 567 // 568 // The two GPUs must have been registered via UvmRegisterGpu prior to making 569 // this call. An error is returned if P2P support has already been enabled 570 // between these two GPUs in this process. 571 // 572 // The two GPUs must be connected via PCIe. An error is returned if the GPUs are 573 // not connected or are connected over an interconnect different than PCIe 574 // (NVLink, for example). 575 // 576 // If both GPUs have GPU VA spaces registered for them, the two GPU VA spaces 577 // must support the same set of page sizes for GPU mappings. 578 // 579 // If any VA range resides in one GPU's memory, and the peer GPU is in the 580 // accessed-by list of that VA range, then a peer mapping will be established 581 // unless the VA space for the peer GPU has not been registered, or read 582 // duplication is enabled for the VA range, or the preferred location of the VA 583 // range is the peer GPU. 584 // 585 // Arguments: 586 // gpuUuidA: (INPUT) 587 // UUID of GPU A. 588 // 589 // gpuUuidB: (INPUT) 590 // UUID of GPU B. 591 // 592 // Error codes: 593 // NV_ERR_NO_MEMORY: 594 // Internal memory allocation failed. 595 // 596 // NV_ERR_INVALID_DEVICE: 597 // At least one GPU has not been registered, P2P support has already 598 // been enabled between the two GPUs, or the GPUs are connected via an 599 // interconnect other than PCIe. 600 // 601 // NV_ERR_NOT_SUPPORTED: 602 // The two GPUs are not peer capable. 603 // 604 // NV_ERR_NOT_COMPATIBLE: 605 // Both GPUs have a GPU VA space registered for them and the two VA 606 // spaces don't support the same set of page sizes for GPU mappings. 607 // 608 // NV_ERR_GENERIC: 609 // Unexpected error. We try hard to avoid returning this error code, 610 // because it is not very informative. 611 // 612 //------------------------------------------------------------------------------ 613 NV_STATUS UvmEnablePeerAccess(const NvProcessorUuid *gpuUuidA, 614 const NvProcessorUuid *gpuUuidB); 615 616 //------------------------------------------------------------------------------ 617 // UvmDisablePeerAccess 618 // 619 // Disables P2P (peer to peer) support in the UVM driver between two GPUs. 620 // connected via PCIe. NVLink peers are automatically disabled in the driver 621 // at UvmUnregisterGpu time. Disabling P2P support between two GPUs removes all 622 // existing peer mappings from either GPU to the other, and also prevents new 623 // peer mappings from being established between the two GPUs. 624 // 625 // The two GPUs must be connected via PCIe. An error is returned if the GPUs are 626 // not connected or are connected over an interconnect different than PCIe 627 // (NVLink, for example). 628 // 629 // If one of the two GPUs is present in the accessed-by list of a non-migratable 630 // VA range that has a preferred location set to the other GPU, and the two GPUs 631 // are not fault-capable, then the GPU is removed from the accessed-by list of 632 // the range. 633 // 634 // Arguments: 635 // gpuUuidA: (INPUT) 636 // UUID of GPU A. 637 // 638 // gpuUuidB: (INPUT) 639 // UUID of GPU B. 640 // 641 // Error codes: 642 // NV_ERR_INVALID_DEVICE: 643 // At least one GPU has not been registered, or P2P support has not been 644 // enabled between the two GPUs, or the GPUs are connected via an 645 // interconnect other than PCIe. 646 // 647 // NV_ERR_GENERIC: 648 // Unexpected error. We try hard to avoid returning this error code, 649 // because it is not very informative. 650 // 651 //------------------------------------------------------------------------------ 652 NV_STATUS UvmDisablePeerAccess(const NvProcessorUuid *gpuUuidA, 653 const NvProcessorUuid *gpuUuidB); 654 655 //------------------------------------------------------------------------------ 656 // UvmRegisterChannel 657 // 658 // Register a channel for use with UVM. Any faults that occur on this channel 659 // will be handled by the UVM driver. 660 // 661 // A GPU VA space must have been registered on this GPU via 662 // UvmRegisterGpuVaSpace prior to making this call. 663 // 664 // For channels that require associated mappings, the base and length of a 665 // virtual address region that was reserved via UvmReserveVa must be supplied to 666 // this call in order to map those allocations. The size and alignment of this 667 // region can be obtained by calling the appropriate platform specific API. For 668 // example, on RM, an RM control call has to be made with the control type as 669 // NV2080_CTRL_CMD_GR_GET_CTX_BUFFER_SIZE. If no region needs to be reserved for 670 // this channel, the base and length arguments are ignored. 671 // 672 // Using the same VA region for multiple UvmRegisterChannel calls is allowed, 673 // provided all allocations required by all of those calls fit within the 674 // region. 675 // 676 // Registering the same channel on multiple subdevices of an SLI group is 677 // disallowed. 678 // 679 // On any errors, the channel may be reset, thereby terminating any pending 680 // work on that channel. 681 // 682 // Arguments: 683 // gpuUuid: (INPUT) 684 // UUID of the GPU that the channel is associated with. 685 // 686 // platformParams: (INPUT) 687 // On Linux: RM ctrl fd, hClient and hChannel. 688 // 689 // base: (INPUT) 690 // Base address (starting point) of the VA (virtual address) range 691 // reserved for mapping the allocations associated with this channel. 692 // If this channel does not have associated allocations, this argument 693 // is ignored. 694 // 695 // length: (INPUT) 696 // Length, in bytes, of the range. If this channel does not have 697 // associated allocations, this argument is ignored. 698 // 699 // Error codes: 700 // NV_ERR_NO_MEMORY: 701 // Internal memory allocation failed. 702 // 703 // NV_ERR_OTHER_DEVICE_FOUND: 704 // The UUID does not match the UUID of the device that is associated 705 // with the channel identifier in the platformParams argument. 706 // 707 // NV_ERR_GPU_INVALID_DEVICE: 708 // The GPU referred to by pGpuUuid was not registered or no VA space 709 // has been registered for this GPU. 710 // 711 // NV_ERR_INVALID_CHANNEL: 712 // The given channel identifier is invalid or has already been 713 // registered. 714 // 715 // NV_ERR_INVALID_ADDRESS: 716 // The channel has allocations which need to be mapped but the base 717 // address is invalid, or the VA range specified by base and length 718 // is too small. 719 // 720 // NV_ERR_INVALID_ARGUMENT: 721 // Either some problem with the platform-specific arguments was detected 722 // or the channel has allocations which need to be mapped but length is 723 // invalid. 724 // 725 // NV_ERR_GENERIC: 726 // Unexpected error. We try hard to avoid returning this error code, 727 // because it is not very informative. 728 // 729 //------------------------------------------------------------------------------ 730 NV_STATUS UvmRegisterChannel(const NvProcessorUuid *gpuUuid, 731 const UvmChannelPlatformParams *platformParams, 732 void *base, 733 NvLength length); 734 735 //------------------------------------------------------------------------------ 736 // UvmUnregisterChannel 737 // 738 // Unregisters a channel from UVM. The channel must have been previously 739 // registered via a call to UvmRegisterChannel. The channel will be reset, 740 // thereby terminating any pending work on that channel. 741 // 742 // Since channels may share virtual mappings, a call to UvmUnregisterChannel is 743 // not guaranteed to unmap the VA range passed into the corresponding 744 // UvmRegisterChannel call because other still-registered channels may be using 745 // allocations in that VA range. Only channels which share the same TSG can 746 // share allocations, so a channel's VA range can only be considered released 747 // after UvmUnregisterChannel has been called on all channels under that TSG. 748 // 749 // Arguments: 750 // platformParams: (INPUT) 751 // On Linux: RM ctrl fd, hClient and hChannel. 752 // 753 // Error codes: 754 // NV_ERR_INVALID_CHANNEL: 755 // The given channel identifier was not registered. 756 // 757 // NV_ERR_INVALID_ARGUMENT: 758 // Some problem with the platform specific arguments was detected. 759 // 760 // NV_ERR_GENERIC: 761 // Unexpected error. We try hard to avoid returning this error code, 762 // because it is not very informative. 763 // 764 //------------------------------------------------------------------------------ 765 NV_STATUS UvmUnregisterChannel(const UvmChannelPlatformParams *platformParams); 766 767 //------------------------------------------------------------------------------ 768 // UvmReserveVa 769 // 770 // Reserves VA space on the CPU for future use. Multiple, non-contiguous VA 771 // ranges can be reserved via this API. 772 // 773 // The starting address for the VA reservation can be either explicitly 774 // specified or left NULL to let the API implementation select one. When the 775 // starting address is specified, it must be aligned to the smallest CPU page 776 // size. When the starting address is not specified, the bounds of the search 777 // space within which the VA range should be reserved must be specified. The 778 // specified lower bound of the search space is rounded up to the nearest 779 // non-zero multiple of the requested alignment. The total size of the search 780 // space taking into consideration the rounded up lower bound cannot be less 781 // than the requested length for the VA reservation. The starting address chosen 782 // by the API implementation is guaranteed to be aligned to the requested 783 // alignment. 784 // 785 // The requested alignment must be either a power of two that is at least the 786 // smallest CPU page size or left zero to indicate default alignment which is 787 // the smallest CPU page size. 788 // 789 // The length of the VA reservation must be a multiple of the smallest CPU page 790 // size. 791 // 792 // Arguments: 793 // base: (INPUT/OUTPUT) 794 // Contains the starting address of the VA reservation when the call 795 // returns successfully. If *base is NULL when this API is invoked, a VA 796 // range that falls within the requested bounds is reserved. Note that 797 // the lower bound will be rounded up to the nearest non-zero multiple 798 // of the requested alignment. If *base is non-NULL when this API 799 // is invoked, then that address is chosen as the starting address of 800 // the VA reservation. 801 // 802 // length: (INPUT) 803 // Length in bytes of the region. Must be a multiple of the smallest CPU 804 // page size. 805 // 806 // minVa: (INPUT) 807 // Lower limit for the search space within which the VA range must be 808 // reserved. Will be rounded up to the nearest non-zero multiple of the 809 // requested alignment. Ignored if *base is non-NULL when the API is 810 // invoked. 811 // 812 // maxVa: (INPUT) 813 // Upper limit for the search space within which the VA range must be 814 // reserved. Ignored if *base is non-NULL when the API is invoked. 815 // 816 // alignment: (INPUT) 817 // Alignment required for the starting address of the reservation. Must 818 // either be zero to indicate default alignment which is smallest CPU 819 // page size or a power of two that is at least the smallest CPU page 820 // size. Ignored if *base is non-NULL when the API is invoked. 821 // 822 // Error codes: 823 // NV_ERR_NO_MEMORY: 824 // Either *base is NULL and no suitable VA reservation could be made or 825 // some other internal memory allocation failed. 826 // 827 // NV_ERR_UVM_ADDRESS_IN_USE: 828 // *base is non-NULL and reserving the VA range at that address failed. 829 // 830 // NV_ERR_INVALID_ADDRESS: 831 // One of the following occurred: 832 // - base is NULL. 833 // - *base is non-NULL and is not aligned to the smallest CPU page size. 834 // - *base is NULL and one of the following occurred: 835 // - the rounded up minVa is not less than maxVa. 836 // - the region covered by the rounded up minVa and maxVa is not big 837 // enough to contain a VA reservation of the requested length. 838 // - alignment is non-zero and is either not a power of two or is less 839 // than the smallest CPU size. 840 // - length is zero or is not a multiple of the smallest CPU page size. 841 // 842 // NV_ERR_GENERIC: 843 // Unexpected error. We try hard to avoid returning this error code, 844 // because it is not very informative. 845 // 846 //------------------------------------------------------------------------------ 847 NV_STATUS UvmReserveVa(void **base, 848 NvLength length, 849 void *minVa, 850 void *maxVa, 851 NvLength alignment); 852 853 //------------------------------------------------------------------------------ 854 // UvmReleaseVa 855 // 856 // Releases all pages within the VA range. If any of the pages were committed, 857 // they are automatically decomitted as well. 858 // 859 // The release may encompass more than a single reserve VA or commit call, but 860 // must not partially release any regions that were either reserved or 861 // committed previously. 862 // 863 // Arguments: 864 // base: (INPUT) 865 // Base address (starting point) of the VA (virtual address) range. 866 // 867 // length: (INPUT) 868 // Length, in bytes, of the range. 869 // 870 // Error codes: 871 // NV_ERR_NO_MEMORY: 872 // Internal memory allocation failed. There is likely more than one 873 // possible cause of this error. 874 // 875 // NV_ERR_INVALID_ADDRESS: 876 // base and length are not properly aligned or the range was not 877 // previously reserved via UvmReserveVa. 878 // 879 // NV_ERR_GENERIC: 880 // Unexpected error. We try hard to avoid returning this error code, 881 // because it is not very informative. 882 // 883 //------------------------------------------------------------------------------ 884 NV_STATUS UvmReleaseVa(void *base, 885 NvLength length); 886 887 //------------------------------------------------------------------------------ 888 // UvmCreateRangeGroup 889 // 890 // Creates a new range group. Virtual address ranges can be associated with 891 // this range group as outlined in UvmSetRangeGroup. 892 // 893 // Arguments: 894 // rangeGroupId: (OUTPUT) 895 // Id of the newly created range group. 896 // 897 // Error codes: 898 // NV_ERR_NO_MEMORY: 899 // Internal memory allocation failed. 900 // 901 // NV_ERR_INVALID_ARGUMENT: 902 // A NULL pointer was passed in the rangeGroupId argument. 903 // 904 // NV_ERR_GENERIC: 905 // Unexpected error. We try hard to avoid returning this error code, 906 // because it is not very informative. 907 // 908 //------------------------------------------------------------------------------ 909 NV_STATUS UvmCreateRangeGroup(NvU64 *rangeGroupId); 910 911 //------------------------------------------------------------------------------ 912 // UvmDestroyRangeGroup 913 // 914 // Destroys a previously created range group. If there are any pages associated 915 // with this range group, that association is cleared. i.e. the behavior is the 916 // same as associating those pages with UVM_RANGE_GROUP_ID_NONE via a call to 917 // UvmSetRangeGroup. 918 // 919 // Arguments: 920 // rangeGroupId: (INPUT) 921 // Id of the range group to be destroyed. 922 // 923 // Error codes: 924 // NV_ERR_OBJECT_NOT_FOUND: 925 // rangeGroupId was not created by a previous call to 926 // UvmCreateRangeGroup. 927 // 928 // NV_ERR_GENERIC: 929 // Unexpected error. We try hard to avoid returning this error code, 930 // because it is not very informative. 931 // 932 //------------------------------------------------------------------------------ 933 NV_STATUS UvmDestroyRangeGroup(NvU64 rangeGroupId); 934 935 //------------------------------------------------------------------------------ 936 // UvmSetRangeGroup 937 // 938 // Associates the pages in a virtual address (VA) range with the specified 939 // range group. The base address and length of the VA range must be aligned to 940 // the smallest page size supported by the CPU. If any pages in that VA range 941 // were associated with another range group, that association is changed to 942 // this range group. The VA range must have been allocated via either UvmAlloc 943 // or UvmMemMap. 944 // 945 // If the range group was made non-migratable by a previous call to 946 // UvmPreventMigrationRangeGroups, then all pages in the VA range are migrated 947 // to their preferred location if they are not already located there. If any 948 // page does not have a preferred location or if the preferred location is a 949 // fault-capable GPU, an error is returned. 950 // 951 // If rangeGroupId is UVM_RANGE_GROUP_ID_NONE, then all pages in the VA range 952 // will have their range group association removed. 953 // 954 // Arguments: 955 // base: (INPUT) 956 // Base address of the virtual address range. 957 // 958 // length: (INPUT) 959 // Length, in bytes, of the range. 960 // 961 // rangeGroupId: (INPUT) 962 // Id of the range group to associate the VA range with. 963 // 964 // Errors: 965 // NV_ERR_NO_MEMORY: 966 // Internal memory allocation failed. 967 // 968 // NV_ERR_INVALID_ADDRESS: 969 // base and length are not properly aligned or don't represent a valid 970 // address range. 971 // 972 // NV_ERR_INVALID_DEVICE: 973 // The range group is non-migratable and at least one page in the VA 974 // range either does not have a preferred location or its preferred 975 // location is a fault-capable GPU. 976 // 977 // NV_ERR_OBJECT_NOT_FOUND: 978 // rangeGroupId was not created by a previous call to 979 // UvmCreateRangeGroup. 980 // 981 // NV_ERR_GENERIC: 982 // Unexpected error. We try hard to avoid returning this error code, 983 // because it is not very informative. 984 // 985 //------------------------------------------------------------------------------ 986 NV_STATUS UvmSetRangeGroup(void *base, 987 NvLength length, 988 NvU64 rangeGroupId); 989 990 //------------------------------------------------------------------------------ 991 // UvmPreventMigrationRangeGroups 992 // 993 // Migrates all pages associated with the specified range groups to their 994 // preferred location and prevents them from being migrated on faults from 995 // either the CPU or the GPU. Any unpopulated pages are populated at the 996 // preferred location. If any page does not have a preferred location or if the 997 // preferred location is a fault-capable GPU, an error is returned. All the 998 // specified range groups must be valid range groups allocated using 999 // UvmCreateRangeGroup. 1000 // 1001 // All pages associated with the specified range groups are mapped at the 1002 // preferred location and from all the GPUs present in the accessed-by list of 1003 // those pages, provided establishing a mapping is possible. If any page 1004 // associated with any of the specified range groups has a preferred location 1005 // set to a non-fault-capable GPU, and another non-fault-capable GPU is in the 1006 // accessed-by list of the page but P2P support between both GPUs is not 1007 // enabled, an error is returned. 1008 // 1009 // GPUs are allowed to map any pages belonging to these range groups on faults. 1010 // If establishing such a mapping is not possible, the fault is fatal. 1011 // 1012 // Existing CPU mappings to any pages belonging to these range groups are 1013 // revoked, even if the pages are in system memory and even if the CPU is in 1014 // the accessed-by list of those pages. The CPU is not allowed to map these 1015 // pages on faults even if they are located in system memory and so, CPU faults 1016 // to these pages are always fatal. 1017 // 1018 // Multiple calls to UvmPreventMigrationRangeGroups are not refcounted. i.e. 1019 // calling UvmPreventMigrationRangeGroups on a range group on which 1020 // UvmPreventMigrationRangeGroups has already been called results in a no-op. 1021 // 1022 // Arguments: 1023 // rangeGroupIds: (INPUT) 1024 // An array of range group IDs. 1025 // 1026 // numGroupIds: (INPUT) 1027 // Number of items in the rangeGroupIds array. 1028 // 1029 // Errors: 1030 // NV_ERR_NO_MEMORY: 1031 // Internal memory allocation failed. 1032 // 1033 // NV_ERR_OBJECT_NOT_FOUND: 1034 // One or more rangeGroupIds was not found. 1035 // 1036 // NV_ERR_INVALID_ARGUMENT: 1037 // A NULL pointer was passed in for rangeGroupIds or numGroupIds was 1038 // zero. 1039 // 1040 // NV_ERR_INVALID_DEVICE: 1041 // At least one page in one of the VA ranges associated with these range 1042 // groups does not have a preferred location or its preferred location 1043 // is a fault-capable GPU. Or the preferred location has been set to a 1044 // non-fault-capable GPU, and another non-fault-capable GPU is present 1045 // in the accessed-by list of a page but P2P support between both GPUs 1046 // has not been enabled. 1047 // 1048 // NV_ERR_GENERIC: 1049 // Unexpected error. We try hard to avoid returning this error code, 1050 // because it is not very informative. 1051 // 1052 //------------------------------------------------------------------------------ 1053 NV_STATUS UvmPreventMigrationRangeGroups(const NvU64 *rangeGroupIds, 1054 NvLength numGroupIds); 1055 1056 //------------------------------------------------------------------------------ 1057 // UvmAllowMigrationRangeGroups 1058 // 1059 // Undoes the effect of UvmPreventMigrationRangeGroups. Pages associated with 1060 // these range groups are now allowed to migrate at any time, and CPU or GPU 1061 // faults to these pages are no longer fatal. All the specified range groups 1062 // must be valid range groups allocated using UvmCreateRangeGroup. 1063 // 1064 // Multiple calls to UvmAllowMigrationRangeGroups are not refcounted. i.e. 1065 // calling UvmAllowMigrationRangeGroups on a range group on which 1066 // UvmAllowMigrationRangeGroups has already been called results in a no-op. 1067 // 1068 // Arguments: 1069 // rangeGroupIds: (INPUT) 1070 // An array of range group IDs. 1071 // 1072 // numGroupIds: (INPUT) 1073 // Number of items in the rangeGroupIds array. 1074 // 1075 // Errors: 1076 // NV_ERR_OBJECT_NOT_FOUND: 1077 // One or more rangeGroupIds was not found. 1078 // 1079 // NV_ERR_INVALID_ARGUMENT: 1080 // A NULL pointer was passed in for rangeGroupIds or numGroupIds was 1081 // zero. 1082 // 1083 // NV_ERR_GENERIC: 1084 // Unexpected error. We try hard to avoid returning this error code, 1085 // because it is not very informative. 1086 // 1087 //------------------------------------------------------------------------------ 1088 NV_STATUS UvmAllowMigrationRangeGroups(const NvU64 *rangeGroupIds, 1089 NvLength numGroupIds); 1090 1091 //------------------------------------------------------------------------------ 1092 // UvmAlloc 1093 // 1094 // Creates a new mapping in the virtual address space of the process, populates 1095 // it at the specified preferred location, maps it on the provided list of 1096 // processors if feasible and associates the range with the given range group. 1097 // 1098 // This API is equivalent to the following code sequence: 1099 // UvmMemMap(base, length); 1100 // UvmSetPreferredLocation(base, length, preferredLocationUuid); 1101 // for (i = 0; i < accessedByCount; i++) { 1102 // UvmSetAccessedBy(base, length, &accessedByUuids[i]); 1103 // } 1104 // UvmSetRangeGroup(base, length, rangeGroupId); 1105 // UvmMigrate(base, length, preferredLocationUuid, 0); 1106 // 1107 // Please see those APIs for further details on their behavior. If an error is 1108 // encountered during any part of the sequence, the completed portion will be 1109 // undone. 1110 // 1111 // The VA range can be unmapped and freed via a call to UvmFree. 1112 // 1113 // Arguments: 1114 // base: (INPUT) 1115 // Base address of the virtual address range. 1116 // 1117 // length: (INPUT) 1118 // Length, in bytes, of the range. 1119 // 1120 // preferredLocationUuid: (INPUT) 1121 // UUID of the preferred location for this VA range. 1122 // 1123 // accessedByUuids: (INPUT) 1124 // UUIDs of all processors that should have persistent mappings to this 1125 // VA range. 1126 // 1127 // accessedByCount: (INPUT) 1128 // Number of elements in the accessedByUuids array. 1129 // 1130 // rangeGroupId: (INPUT) 1131 // ID of the range group to associate this VA range with. 1132 // 1133 // Errors: 1134 // NV_ERR_UVM_ADDRESS_IN_USE: 1135 // The requested address range overlaps with an existing allocation. 1136 // 1137 // NV_ERR_INVALID_ADDRESS: 1138 // base and length are not properly aligned or the range was not 1139 // previously reserved via UvmReserveVa. 1140 // 1141 // NV_ERR_INVALID_DEVICE: 1142 // Either preferredLocationUuid or one of the UUIDs in the 1143 // accessedByUuids array was not registered or the UUID represents a GPU 1144 // that has no VA space registered for it. 1145 // 1146 // NV_ERR_OBJECT_NOT_FOUND: 1147 // rangeGroupId was not found. 1148 // 1149 // NV_ERR_NO_MEMORY: 1150 // Internal memory allocation failed. 1151 // 1152 // NV_ERR_GENERIC: 1153 // Unexpected error. We try hard to avoid returning this error code, 1154 // because it is not very informative. 1155 // 1156 //------------------------------------------------------------------------------ 1157 NV_STATUS UvmAlloc(void *base, 1158 NvLength length, 1159 const NvProcessorUuid *preferredLocationUuid, 1160 const NvProcessorUuid *accessedByUuids, 1161 NvLength accessedByCount, 1162 NvU64 rangeGroupId); 1163 1164 //------------------------------------------------------------------------------ 1165 // UvmFree 1166 // 1167 // Frees a VA range previously allocated via one of the UVM allocator APIs, 1168 // namely either UvmAlloc, UvmMemMap, UvmCreateExternalRange, 1169 // UvmMapDynamicParallelismRegion or UvmAllocSemaphorePool. 1170 // 1171 // For VA ranges allocated via UvmAlloc, UvmMemMap or UvmAllocSemaphorePool, all 1172 // CPU and GPU page table mappings are cleared and all allocated pages are 1173 // freed. 1174 // 1175 // For VA ranges allocated via UvmCreateExternalRange, all GPU page table 1176 // mappings are cleared. No CPU page table mappings for this range are affected, 1177 // and no physical pages for this range are freed. 1178 // 1179 // For VA ranges allocated via UvmMapDynamicParallelismRegion, all GPU page 1180 // table mappings are cleared. No CPU page table mappings for this range are 1181 // affected. 1182 // 1183 // The base address of the VA range to be freed must match the base address used 1184 // when allocating the range. If the VA range came from a region previously 1185 // reserved via UvmReserveVa, then this VA range is put back in the reserved 1186 // state. 1187 // 1188 // Note that the reason this API does not take a length argument is because this 1189 // API is modeled after the C library free() API. Partial frees are not allowed 1190 // and the UVM usermode layer tracks the base and length of each allocated 1191 // range, so having a length argument would be redundant. This also eliminates 1192 // the need for the caller to track the length of each allocation. 1193 // 1194 // Arguments: 1195 // base: (INPUT) 1196 // Starting address of the range to be freed. This must be match an 1197 // address that was obtained via a UVM allocator API. 1198 // 1199 // Errors: 1200 // NV_ERR_INVALID_ADDRESS: 1201 // base does not match an address that was passed into a UVM allocator 1202 // API. 1203 // 1204 // NV_ERR_GENERIC: 1205 // Unexpected error. We try hard to avoid returning this error code, 1206 // because it is not very informative. 1207 // 1208 //------------------------------------------------------------------------------ 1209 NV_STATUS UvmFree(void *base); 1210 1211 //------------------------------------------------------------------------------ 1212 // UvmCleanUpZombieResources 1213 // 1214 // Clean up resources left by processes that specify 1215 // UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE. Resources not freed before 1216 // termination by such processes are not immediately freed by UVM if another 1217 // processes is using the same UVM file. 1218 // 1219 // Errors: 1220 // NV_ERR_GENERIC: 1221 // Unexpected error. We try hard to avoid returning this error code, 1222 // because it is not very informative. 1223 // 1224 //------------------------------------------------------------------------------ 1225 NV_STATUS UvmCleanUpZombieResources(void); 1226 1227 //------------------------------------------------------------------------------ 1228 // UvmAllocSemaphorePool 1229 // 1230 // Allocates memory from which semaphores can be suballocated and used to order 1231 // work between UVM and CUDA as described in UvmMigrateAsync. 1232 // 1233 // The virtual address range specified by (base, length) must have been 1234 // previously reserved via a call to UvmReserveVa. Both base and length must be 1235 // aligned to the smallest page size supported by the CPU. 1236 // 1237 // The pages are populated in CPU memory and zero initialized. They are mapped 1238 // on the CPU and in all registered GPU VA spaces. They will also be mapped in 1239 // any GPU VA spaces registered after this call. The pages are non-migratable 1240 // and the GPU mappings are persistent, which makes them safe to access from 1241 // non-fault-capable HW engines. 1242 // 1243 // By default, all mappings to this VA range have read, write and atomic access 1244 // and are uncached. This behavior can be overridden for GPUs by explicitly 1245 // specifying the mapping and caching attributes through this API. At most one 1246 // GPU may cache the allocation, in which case no other processor should write 1247 // to it. These GPUs must have been registered via UvmRegisterGpu. These GPUs 1248 // do not need to have a GPU VA space registered at the time of this API call. 1249 // Overriding default mapping and caching attributes for the CPU is disallowed. 1250 // If a new GPU is registered or a currently registered GPU is unregistered via 1251 // UvmUnregisterGpu and then re-registered, default mapping and caching 1252 // attributes will be applied for that GPU. 1253 // 1254 // The VA range must lie within the largest possible virtual address supported 1255 // by all GPUs that currently have a GPU VA space registered for them. Also, if 1256 // a GPU VA space is registered in the future for a GPU which is unable to map 1257 // this allocation, that GPU VA space registration will fail. 1258 // 1259 // The pages in this VA range cannot be associated with range groups, cannot be 1260 // the target for read duplication, cannot have a preferred location set, and 1261 // cannot have any accessed-by processors. 1262 // 1263 // The VA range can be unmapped and freed via a call to UvmFree. 1264 // 1265 // Arguments: 1266 // base: (INPUT) 1267 // Base address of the virtual address range. 1268 // 1269 // length: (INPUT) 1270 // Length, in bytes, of the range. 1271 // 1272 // perGpuAttribs: (INPUT) 1273 // List of per GPU mapping and caching attributes. GPUs not in the list 1274 // are mapped with default attributes. 1275 // 1276 // gpuAttribsCount: (INPUT) 1277 // Number of entries in the perGpuAttribs array. 1278 // 1279 // Errors: 1280 // NV_ERR_UVM_ADDRESS_IN_USE: 1281 // The requested address range overlaps with an existing allocation. 1282 // 1283 // NV_ERR_INVALID_ADDRESS: 1284 // base and length are not properly aligned or the range was not 1285 // previously reserved via UvmReserveVa. 1286 // 1287 // NV_ERR_OUT_OF_RANGE: 1288 // The VA range exceeds the largest virtual address supported by one or 1289 // more registered GPUs. 1290 // 1291 // NV_ERR_INVALID_DEVICE: 1292 // At least one of the UUIDs in the perGpuAttribs list was either not 1293 // registered or is the UUID of the CPU. 1294 // 1295 // NV_ERR_NO_MEMORY: 1296 // Internal memory allocation failed. 1297 // 1298 // NV_ERR_INVALID_ARGUMENT: 1299 // perGpuAttribs is NULL but gpuAttribsCount is non-zero or vice-versa, 1300 // or caching is requested on more than one GPU. 1301 // 1302 // NV_ERR_NOT_SUPPORTED: 1303 // The current process is not the one which called UvmInitialize, and 1304 // UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE was not specified to 1305 // UvmInitialize. 1306 // 1307 // NV_ERR_GENERIC: 1308 // Unexpected error. We try hard to avoid returning this error code, 1309 // because it is not very informative. 1310 // 1311 //------------------------------------------------------------------------------ 1312 NV_STATUS UvmAllocSemaphorePool(void *base, 1313 NvLength length, 1314 const UvmGpuMappingAttributes *perGpuAttribs, 1315 NvLength gpuAttribsCount); 1316 1317 //------------------------------------------------------------------------------ 1318 // UvmMigrate 1319 // 1320 // Migrates the backing of a given virtual address range to the specified 1321 // destination processor. If any page in the VA range is unpopulated, it is 1322 // populated at the destination processor. The migrated pages in the VA range 1323 // are also mapped on the destination processor. 1324 // 1325 // Both base and length must be aligned to the smallest page size supported by 1326 // the CPU. The VA range must lie within the largest possible virtual address 1327 // supported by the specified processor. 1328 // 1329 // The virtual address range specified by (base, length) must have been 1330 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported 1331 // system-allocated pageable memory. 1332 // 1333 // If the input virtual range corresponds to system-allocated pageable memory, 1334 // and there is at least one GPU in the system that supports transparent access 1335 // to pageable memory, the behavior described in the next paragraphs does not 1336 // take effect. Instead, the driver will first populate any unpopulated pages 1337 // according to the memory policy defined by the calling process and address 1338 // range. Then, pages will be migrated to the requested processor. If the 1339 // destination processor is the CPU, and the memory policy has not defined 1340 // preferred CPU memory nodes or the given preferredCpuMemoryNode is in the 1341 // mask of preferred memory nodes, the driver will try to migrate memory to 1342 // preferredCpuMemoryNode first, and will fallback to the rest of CPU the nodes 1343 // if it doesn't succeed. If pages were already resident on any CPU memory node, 1344 // they will not be migrated. 1345 // 1346 // If the input virtual range corresponds to system-allocated pageable memory, 1347 // and UvmIsPageableMemoryAccessSupported reports that pageable memory access 1348 // is supported, then the driver will populate any unpopulated pages at the 1349 // destination processor and migrate the data from any source location to the 1350 // destination. Pages in the VA range are migrated even if their preferred 1351 // location is set to a processor other than the destination processor. 1352 // If the accessed-by list of any of the pages in the VA range is not empty, 1353 // then mappings to those pages from all the appropriate processors are updated 1354 // to refer to the new location if establishing such a mapping is possible. 1355 // Otherwise, those mappings are cleared. 1356 // Note that in this case, software managed pageable memory does not support 1357 // migration of MAP_SHARED, file-backed, or PROT_NONE mappings. 1358 // 1359 // If any pages in the given VA range are associated with a range group which 1360 // has been made non-migratable via UvmPreventMigrationRangeGroups, then those 1361 // pages are not migrated and the mappings on the destination processor for 1362 // those pages are left unmodified. If the VA range is associated with a 1363 // migratable range group and the destination processor is a non-fault-capable 1364 // GPU, then an error is returned if that GPU is in the accessed-by list of the 1365 // VA range but that GPU is not the preferred location. 1366 // 1367 // If read duplication is enabled on any pages in the VA range, then those pages 1368 // are read duplicated at the destination processor, leaving the source copy, if 1369 // present, intact with only its mapping changed to read-only if it wasn't 1370 // already mapped that way. 1371 // 1372 // Pages in the VA range are migrated even if their preferred location is set to 1373 // a processor other than the destination processor. 1374 // 1375 // If the accessed-by list of any of the pages in the VA range is not empty, 1376 // then mappings to those pages from all the appropriate processors are updated 1377 // to refer to the new location if establishing such a mapping is possible. 1378 // Otherwise, those mappings are cleared. 1379 // 1380 // If fewer than the number of requested pages were migrated, 1381 // NV_WARN_MORE_PROCESSING_REQUIRED is returned. An example scenario where this 1382 // could happen is when UvmPreventMigrationRangeGroups has been called on a 1383 // range group associated with some pages in this range. If fewer than the 1384 // number of requested pages were migrated due to insufficient memory to 1385 // allocate physical pages or page tables, then NV_ERR_NO_MEMORY is returned. 1386 // 1387 // Arguments: 1388 // base: (INPUT) 1389 // Base address of the virtual address range. 1390 // 1391 // length: (INPUT) 1392 // Length, in bytes, of the range. 1393 // 1394 // destinationUuid: (INPUT) 1395 // UUID of the destination processor to migrate pages to. 1396 // 1397 // preferredCpuMemoryNode: (INPUT) 1398 // Preferred CPU NUMA memory node used if the destination processor is 1399 // the CPU. This argument is ignored if the given virtual address range 1400 // corresponds to managed memory. 1401 // 1402 // Error codes: 1403 // NV_ERR_INVALID_ADDRESS: 1404 // base and length are not properly aligned, or the range does not 1405 // represent a migratable allocation created via UvmMemMap, or the 1406 // range is pageable memory and the system does not support accessing 1407 // pageable memory, or the range does not represent a supported 1408 // Operating System allocation. 1409 // 1410 // NV_ERR_OUT_OF_RANGE: 1411 // The VA range exceeds the largest virtual address supported by the 1412 // destination processor. 1413 // 1414 // NV_ERR_INVALID_DEVICE: 1415 // destinationUuid does not represent a valid processor such as a CPU or 1416 // a GPU with a GPU VA space registered for it. Or destinationUuid is a 1417 // non-fault-capable GPU, and that GPU is present in the accessed-by 1418 // list of the VA range but that GPU is not the preferred location. 1419 // 1420 // NV_ERR_NO_MEMORY: 1421 // There was insufficient memory to allocate physical pages or page 1422 // tables to complete the migration. Or internal memory allocation 1423 // failed. 1424 // 1425 // NV_ERR_NOT_SUPPORTED: 1426 // The UVM file descriptor is associated with another process and the 1427 // input virtual range corresponds to system-allocated pageable memory 1428 // that cannot be migrated from this process. 1429 // 1430 // NV_ERR_GENERIC: 1431 // Unexpected error. We try hard to avoid returning this error code, 1432 // because it is not very informative. 1433 // 1434 // NV_WARN_MORE_PROCESSING_REQUIRED: 1435 // Fewer than the number of requested pages were migrated because some 1436 // pages were associated with a non-migratable range group. 1437 // 1438 //------------------------------------------------------------------------------ 1439 #if UVM_API_REV_IS_AT_MOST(5) 1440 NV_STATUS UvmMigrate(void *base, 1441 NvLength length, 1442 const NvProcessorUuid *destinationUuid); 1443 #else 1444 NV_STATUS UvmMigrate(void *base, 1445 NvLength length, 1446 const NvProcessorUuid *destinationUuid, 1447 NvU32 preferredCpuMemoryNode); 1448 #endif 1449 1450 //------------------------------------------------------------------------------ 1451 // UvmMigrateAsync 1452 // 1453 // Migrates the backing of a given virtual address range to the specified 1454 // destination processor. The behavior of this API is exactly the same as that 1455 // of UvmMigrate except for the differences outlined below. 1456 // 1457 // When this call returns NV_OK, the migration operation is considered to be 1458 // in-flight and can be synchronized upon by waiting for the specified payload 1459 // to be written at the given semaphore address. The semaphore address must be 1460 // 4-byte aligned and must fall within a VA range allocated using 1461 // UvmAllocSemaphorePool. It is up to the caller to ensure that the payload has 1462 // been written before reusing the address in a subsequent UvmMigrateAsync call. 1463 // Specifying a semaphore address is optional. If the semaphore address is NULL 1464 // the payload must be zero. 1465 // 1466 // The API makes no guarantees about how many pages will be migrated, and there 1467 // is no provision to detect errors that occur during the in-flight operations. 1468 // However, the API does guarantee that the semaphore will eventually be 1469 // released regardless of errors during in-flight operations, as long as the API 1470 // call itself returned NV_OK. 1471 // 1472 // Arguments: 1473 // base: (INPUT) 1474 // Base address of the virtual address range. 1475 // 1476 // length: (INPUT) 1477 // Length, in bytes, of the range. 1478 // 1479 // destinationUuid: (INPUT) 1480 // UUID of the destination processor to migrate pages to. 1481 // 1482 // preferredCpuMemoryNode: (INPUT) 1483 // Preferred CPU NUMA memory node used if the destination processor is 1484 // the CPU. This argument is ignored if the given virtual address range 1485 // corresponds to managed memory. 1486 // 1487 // semaphoreAddress: (INPUT) 1488 // Base address of the semaphore. 1489 // 1490 // semaphorePayload: (INPUT) 1491 // Payload to be written at semaphoreAddress when the operation 1492 // completes. Must be zero if semaphoreAddress is NULL. 1493 // 1494 // Error codes: 1495 // NV_ERR_INVALID_ADDRESS: 1496 // base and length are not properly aligned, or the range does not 1497 // represent a migratable allocation created via UvmMemMap, or the 1498 // range is pageable memory and the system does not support accessing 1499 // pageable memory, or the range does not represent a supported 1500 // Operating System allocation, or the semaphoreAddress isn't properly 1501 // aligned, or isn't suballocated from a semaphore pool. 1502 // 1503 // NV_ERR_OUT_OF_RANGE: 1504 // The VA range exceeds the largest virtual address supported by the 1505 // destination processor. 1506 // 1507 // NV_ERR_INVALID_DEVICE: 1508 // destinationUuid does not represent a valid processor such as a CPU or 1509 // a GPU with a GPU VA space registered for it. Or destinationUuid is a 1510 // non-fault-capable GPU, and that GPU is present in the accessed-by 1511 // list of the VA range but that GPU is not the preferred location. 1512 // 1513 // NV_ERR_INVALID_ARGUMENT: 1514 // semaphoreAddress is NULL and semaphorePayload is not zero. 1515 // 1516 // NV_ERR_NO_MEMORY: 1517 // There was insufficient memory to allocate physical pages or page 1518 // tables to complete the migration. Or internal memory allocation 1519 // failed. 1520 // 1521 // NV_ERR_GENERIC: 1522 // Unexpected error. We try hard to avoid returning this error code, 1523 // because it is not very informative. 1524 // 1525 // NV_WARN_MORE_PROCESSING_REQUIRED: 1526 // Fewer than the number of requested pages were migrated because some 1527 // pages were associated with a non-migratable range group. 1528 // 1529 //------------------------------------------------------------------------------ 1530 #if UVM_API_REV_IS_AT_MOST(5) 1531 NV_STATUS UvmMigrateAsync(void *base, 1532 NvLength length, 1533 const NvProcessorUuid *destinationUuid, 1534 void *semaphoreAddress, 1535 NvU32 semaphorePayload); 1536 #else 1537 NV_STATUS UvmMigrateAsync(void *base, 1538 NvLength length, 1539 const NvProcessorUuid *destinationUuid, 1540 NvU32 preferredCpuMemoryNode, 1541 void *semaphoreAddress, 1542 NvU32 semaphorePayload); 1543 #endif 1544 1545 //------------------------------------------------------------------------------ 1546 // UvmMigrateRangeGroup 1547 // 1548 // Migrates the backing of all virtual address ranges associated with the given 1549 // range group to the specified destination processor. The behavior of this API 1550 // is equivalent to calling UvmMigrate on each VA range associated with this 1551 // range group. The value for the preferredCpuMemoryNode is irrelevant in this 1552 // case as it only applies to migrations of pageable address, which cannot be 1553 // used to create range groups. 1554 // 1555 // Any errors encountered during migration are returned immediately. No attempt 1556 // is made to migrate the remaining unmigrated ranges and the ranges that are 1557 // already migrated are not rolled back to their previous location. 1558 // 1559 // The range group id specified must have been allocated via 1560 // UvmCreateRangeGroup. 1561 // 1562 // Arguments: 1563 // rangeGroupId: (INPUT) 1564 // Id of the range group whose associated VA ranges have to be migrated. 1565 // 1566 // destinationUuid: (INPUT) 1567 // UUID of the destination processor to migrate pages to. 1568 // 1569 // Error codes: 1570 // NV_ERR_OBJECT_NOT_FOUND: 1571 // Either UVM_RANGE_GROUP_ID_NONE was specified or the rangeGroupId was 1572 // not found. 1573 // 1574 // NV_ERR_INVALID_DEVICE: 1575 // destinationUuid does not represent a valid processor such as a CPU or 1576 // a GPU with a GPU VA space registered for it. 1577 // 1578 // NV_ERR_NO_MEMORY: 1579 // Internal memory allocation failed. 1580 // 1581 // NV_ERR_OUT_OF_RANGE: 1582 // One or more of the VA ranges exceeds the largest virtual address 1583 // supported by the destination processor. 1584 // 1585 // NV_ERR_GENERIC: 1586 // Unexpected error. We try hard to avoid returning this error code, 1587 // because it is not very informative. 1588 // 1589 // NV_WARN_MORE_PROCESSING_REQUIRED: 1590 // Fewer than requested pages were migrated because for example, the 1591 // range group was non-migratable. 1592 // 1593 //------------------------------------------------------------------------------ 1594 NV_STATUS UvmMigrateRangeGroup(NvU64 rangeGroupId, 1595 const NvProcessorUuid *destinationUuid); 1596 1597 //------------------------------------------------------------------------------ 1598 // UvmPopulatePageable 1599 // 1600 // Forces the population of the given virtual address range. Memory will be 1601 // populated by the system according to the memory policy defined by the calling 1602 // process and address range. 1603 // 1604 // This function only supports pageable memory. None of the pages within the 1605 // virtual address range specified by (base, length) may belong to a virtual 1606 // address range allocated or registered using any of the UVM 1607 // allocation/mapping APIs. Also, all pages must be mapped with at least read 1608 // permissions. 1609 // 1610 // If fewer than the number of requested pages were populated, NV_ERR_NO_MEMORY 1611 // is returned. 1612 // 1613 // Arguments: 1614 // base: (INPUT) 1615 // Base address of the virtual address range. 1616 // 1617 // length: (INPUT) 1618 // Length, in bytes, of the range. 1619 // 1620 // Errors: 1621 // NV_ERR_INVALID_ADDRESS: 1622 // base and length are not properly aligned, the range does not 1623 // represent a supported Operating System allocation, or the range 1624 // contains pages not mapped with at least read permissions. 1625 // 1626 // NV_ERR_NO_MEMORY: 1627 // Fewer than the number of requested pages were populated, likely 1628 // because the system ran out of memory. 1629 // 1630 // NV_ERR_GENERIC: 1631 // Unexpected error. We try hard to avoid returning this error code, 1632 // because it is not very informative. 1633 // 1634 //------------------------------------------------------------------------------ 1635 NV_STATUS UvmPopulatePageable(void *base, 1636 NvLength length); 1637 1638 //------------------------------------------------------------------------------ 1639 // UvmMemMap 1640 // 1641 // Creates a new mapping in the virtual address space of the process that is 1642 // valid for access from any fault-capable CPU or GPU. 1643 // 1644 // The virtual address range specified by (base, length) must have been 1645 // previously reserved via a call to UvmReserveVa. Both base and length must be 1646 // aligned to the smallest page size supported by the CPU. Note that using a 1647 // larger alignment for base and length, such as the largest GPU page size, may 1648 // result in higher performance. 1649 // 1650 // The pages in the VA range are zero initialized. They are typically populated 1651 // on demand, for example, through CPU or GPU faults. 1652 // 1653 // The VA range can be unmapped and freed via a call to UvmFree. 1654 // 1655 // Arguments: 1656 // base: (INPUT) 1657 // Base address of the virtual address range. 1658 // 1659 // length: (INPUT) 1660 // Length, in bytes, of the range. 1661 // 1662 // Errors: 1663 // NV_ERR_UVM_ADDRESS_IN_USE: 1664 // The requested address range overlaps with an existing allocation. 1665 // 1666 // NV_ERR_INVALID_ADDRESS: 1667 // base and length are not properly aligned or the range was not 1668 // previously reserved via UvmReserveVa. 1669 // 1670 // NV_ERR_NOT_SUPPORTED: 1671 // The current process is not the one which called UvmInitialize, and 1672 // UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE was not specified to 1673 // UvmInitialize. 1674 // 1675 // NV_ERR_GENERIC: 1676 // Unexpected error. We try hard to avoid returning this error code, 1677 // because it is not very informative. 1678 // 1679 //------------------------------------------------------------------------------ 1680 NV_STATUS UvmMemMap(void *base, 1681 NvLength length); 1682 1683 //------------------------------------------------------------------------------ 1684 // UvmCreateExternalRange 1685 // 1686 // Create a VA range within the process's address space reserved for external 1687 // allocations. The VA range is not mapped to any physical allocation at the 1688 // time of creation. Once an external VA range has been created using this API, 1689 // the user is free to map any number of physical allocations within the VA 1690 // range (see UvmMapExternalAllocation and UvmMapExternalSparse for more 1691 // details). 1692 // 1693 // The virtual address range, itself, does not impose any restrictions on the 1694 // alignment of the physical allocations mapped within it. However, both base 1695 // and length must be aligned to 4K. 1696 // 1697 // The VA range must not overlap with an existing VA range, irrespective of 1698 // whether the existing range corresponds to a UVM allocation or an external 1699 // allocation. 1700 // 1701 // It is allowed (but not required) for the VA range to come from a region 1702 // previously reserved via UvmReserveVa. 1703 // 1704 // Any mappings created within this VA range are considered non-migratable. 1705 // Consequently, pages cannot be associated with range groups, cannot be 1706 // the target for read duplication, cannot have a preferred location set, 1707 // cannot have any accessed-by processors, and any GPU faults within this range 1708 // are fatal. 1709 // 1710 // Mappings within this range neither create nor modify any CPU mappings, even 1711 // if the mappings came from a region previously reserved via UvmReserveVa. 1712 // This implies that CPU accesses to any mappings within this range will cause 1713 // a fatal fault if it's not mapped. 1714 // 1715 // The VA range is not reclaimed until UvmFree is called on it even if it is 1716 // fully unmapped from all GPUs either explicitly via UvmUnmapExternal or 1717 // implicitly via APIs such as UvmUnregisterGpu, UvmUnregisterGpuVaSpace, 1718 // UvmDisablePeerAccess, etc. 1719 // 1720 // Arguments: 1721 // base: (INPUT) 1722 // Base address of the virtual address range. 1723 // 1724 // length: (INPUT) 1725 // Length, in bytes, of the range. 1726 // 1727 // Errors: 1728 // NV_ERR_INVALID_ADDRESS: 1729 // base is NULL or length is zero or at least one of base and length is 1730 // not aligned to 4K. 1731 // 1732 // NV_ERR_UVM_ADDRESS_IN_USE: 1733 // The requested address range overlaps with an existing allocation. 1734 // 1735 // NV_ERR_NO_MEMORY: 1736 // Internal memory allocation failed. 1737 // 1738 //------------------------------------------------------------------------------ 1739 NV_STATUS UvmCreateExternalRange(void *base, 1740 NvLength length); 1741 1742 //------------------------------------------------------------------------------ 1743 // UvmMapExternalAllocation 1744 // 1745 // Maps an allocation that was allocated outside of UVM on the specified list of 1746 // GPUs. The external allocation can be unmapped from a specific GPU using 1747 // UvmUnmapExternal or from all GPUs using UvmFree. 1748 // 1749 // The virtual address range specified by (base, length) must fall within a VA 1750 // range previously created with UvmCreateExternalRange. A GPU VA space must 1751 // have been registered for each GPU in the list. The (base, length) range must 1752 // lie within the largest possible virtual address supported by the specified 1753 // GPUs. 1754 // 1755 // The page size used for the mapping is the largest supported page size less 1756 // than or equal to the alignments of base, length, offset, and the allocation 1757 // page size. 1758 // 1759 // If the range specified by (base, length) falls within any existing mappings, 1760 // the behavior is the same as if UvmUnmapExternal with the range specified by 1761 // (base, length) had been called first, provided that base and length are 1762 // aligned to the page size used for the existing one. 1763 // 1764 // If the allocation resides in GPU memory, that GPU must have been registered 1765 // via UvmRegisterGpu. If the allocation resides in GPU memory and a mapping is 1766 // requested for a different GPU, then P2P support should have been enabled via 1767 // UvmEnablePeerAccess between the two GPUs if connected by PCIe. 1768 // 1769 // The allocation can be mapped with different access permissions and 1770 // cacheability settings on different GPUs. The settings to use for each GPU are 1771 // specified in the perGpuAttribs array. It is also legal to map the allocation 1772 // multiple times on the same GPU with different access permissions and 1773 // cacheability settings as long as all of the mappings are fully contained 1774 // within the VA range. Calling this API with the same GPU appearing multiple 1775 // times in the list is equivalent to calling the API multiple times on the same 1776 // GPU. 1777 // 1778 // Access permissions control which of 3 types of accesses (reads, writes and 1779 // atomics) are allowed for this VA range. Any GPU accesses of a disallowed kind 1780 // result in a fatal fault. If UvmGpuMappingTypeDefault is specified, the UVM 1781 // driver chooses the appropriate access permissions. On non-fault-capable GPUs, 1782 // specifying either UvmGpuMappingTypeReadOnly or UvmGpuMappingTypeReadWrite is 1783 // disallowed. 1784 // 1785 // Caching can be forced on or off, or can be left to the UVM driver to manage 1786 // by specifying UvmGpuCachingTypeDefault. Specifying UvmGpuCachingTypeDefault 1787 // will result in a cached mapping only if the allocation is physically located 1788 // in that GPU's memory. Note that caching here only refers to GPU L2 caching 1789 // and not GPU L1 caching as the latter is controlled via instruction opcode 1790 // modifiers and not through page table attributes. 1791 // 1792 // Format and element bits can be forced, or can be left to the UVM driver to 1793 // manage by specifying UvmGpuFormatTypeDefault and 1794 // UvmGpuFormatElementBitsDefault respectively. UvmGpuFormatTypeDefault and 1795 // UvmGpuFormatElementBitsDefault are mutually inclusive, meaning that if one 1796 // of them is specified then the other one must be specified too. 1797 // 1798 // Compression type of the specified virtual address range can be specified with 1799 // UvmGpuCompressionType mapping attribute. 1800 // 1801 // The UVM driver retains a reference on the external allocation as long as at 1802 // least one GPU has any portion of that allocation mapped. 1803 // 1804 // The pages in this mapping are not zero initialized or modified in any way. 1805 // 1806 // Note that calling UvmUnregisterGpuVaSpace will also unmap all mappings 1807 // created via this API on the GPU that the GPU VA space is associated with. 1808 // Also, if a mapping has to be created on a GPU for a physical allocation that 1809 // resides on a PCIe peer GPU, then peer-to-peer support must have been enabled 1810 // between those two GPUs via UvmEnablePeerAccess. Disabling peer-to-peer 1811 // support via UvmDisablePeerAccess will tear down all peer mappings between the 1812 // two GPUs. 1813 // 1814 // Arguments: 1815 // base: (INPUT) 1816 // Base address of the virtual address range. 1817 // 1818 // length: (INPUT) 1819 // Length, in bytes, of the range. 1820 // 1821 // offset: (INPUT) 1822 // Offset, in bytes, in the physical allocation at which the VA range 1823 // must be mapped. 1824 // 1825 // perGpuAttribs: (INPUT) 1826 // List of per GPU mapping and caching attributes. GPUs not in the list 1827 // are not affected. 1828 // 1829 // gpuAttribsCount: (INPUT) 1830 // Number of entries in the perGpuAttribs array. 1831 // 1832 // platformParams: (INPUT) 1833 // Platform specific parameters that identify the allocation. 1834 // On Linux: RM ctrl fd, hClient and hMemory. 1835 // 1836 // Errors: 1837 // NV_ERR_INVALID_ADDRESS: 1838 // One of the following occurred: 1839 // - base is NULL. 1840 // - length is zero. 1841 // - The requested address range does not fall entirely within an 1842 // existing external VA range created with a single call to 1843 // UvmCreateExternalRange. 1844 // - The mapping page size allowed by the alignments of base, length, 1845 // and offset is smaller than the minimum supported page size on the 1846 // GPU. 1847 // - base or base + length fall within an existing mapping but are not 1848 // aligned to that mapping's page size. 1849 // 1850 // NV_ERR_OUT_OF_RANGE: 1851 // The range specified by (base, length) exceeds the largest virtual 1852 // address supported by one or more of the specified GPUs. 1853 // 1854 // NV_ERR_INVALID_OFFSET: 1855 // - offset+length exceeds the allocation size. 1856 // 1857 // NV_ERR_INVALID_DEVICE: 1858 // One of the following occurred: 1859 // - The allocation resides in GPU memory whose UUID was not registered. 1860 // - One or more of the UUIDs in the perGpuAttribs list was either not 1861 // registered or has no GPU VA space registered for it. 1862 // - The allocation resides in GPU memory and a mapping was requested 1863 // for a different GPU and P2P support was not enabled between them. 1864 // - The UUID of the CPU was specified in the perGpuAttribs list. 1865 // - UvmGpuCompressionTypeEnabledNoPlc compression type was used on one 1866 // or more GPUs that don't support PLC. 1867 // 1868 // NV_ERR_NO_MEMORY: 1869 // Internal memory allocation failed. 1870 // 1871 // NV_ERR_INVALID_ARGUMENT: 1872 // One of the following occurred: 1873 // - perGpuAttribs is NULL. 1874 // - gpuAttribsCount is zero. 1875 // - an invalid mapping type was specified. 1876 // - an invalid caching type was specified. 1877 // - an invalid format/element bits combination was specified. 1878 // - an invalid compression type was specified. 1879 // - UvmGpuCompressionTypeEnabledNoPlc compression type was used with a 1880 // non-compressible physical allocation. 1881 // 1882 // NV_ERR_GENERIC: 1883 // Unexpected error. We try hard to avoid returning this error code, 1884 // because it is not very informative. 1885 // 1886 //------------------------------------------------------------------------------ 1887 NV_STATUS UvmMapExternalAllocation(void *base, 1888 NvLength length, 1889 NvLength offset, 1890 const UvmGpuMappingAttributes *perGpuAttribs, 1891 NvLength gpuAttribsCount, 1892 const UvmAllocationPlatformParams *platformParams); 1893 1894 //------------------------------------------------------------------------------ 1895 // UvmMapExternalSparse 1896 // 1897 // Create a Sparse mapping for the virtual address range specified by (base, 1898 // length). The mapping does not have any physical backing, rather the PTEs use 1899 // a special pattern. The virtual address range specified by (base, length) must 1900 // be fully contained within a virtual address range previously created with 1901 // UvmCreateExternalRange. 1902 // 1903 // Virtual address ranges with Sparse mappings will not generate any faults when 1904 // accessed. Instead, writes will be discarded and reads will return 0. 1905 // 1906 // Sparse mappings are supported only on fault-capable GPUs and only for 64K 1907 // pages, so the virtual address range specified by (base, length) must be 1908 // aligned to 64K. 1909 // 1910 // If the range specified by (base, length) falls within any existing mappings, 1911 // the behavior is the same as if UvmUnmapExternal with the range specified by 1912 // (base, length) had been called first. 1913 // 1914 // Note that calling UvmUnregisterGpuVaSpace will also unmap all mappings 1915 // created via this API on the GPU that the GPU VA space is associated with. 1916 // Notably the mappings won't be recreated when the GPU VA space is 1917 // re-registered. 1918 // 1919 // Arguments: 1920 // base: (INPUT) 1921 // Base address of the virtual address range. The address must be 1922 // aligned on a 64K boundary. 1923 // 1924 // length: (INPUT) 1925 // Length, in bytes, of the range. The length must be 64K aligned. 1926 // 1927 // 1928 // gpuUuid: (INPUT) 1929 // UUID of the GPU to map the sparse region on. 1930 // 1931 // Errors: 1932 // NV_ERR_INVALID_ADDRESS: 1933 // One of the following occurred: 1934 // - base is NULL. 1935 // - length is zero. 1936 // - The requested address range does not fall entirely within an 1937 // existing external VA range created with a single call to 1938 // UvmCreateExternalRange. 1939 // - At least one of base and length is not aligned to a 64K 1940 // boundary. 1941 // 1942 // NV_ERR_OUT_OF_RANGE: 1943 // The range specified by (base, length) exceeds the largest virtual 1944 // address supported by the specified GPU. 1945 // 1946 // NV_ERR_INVALID_DEVICE: 1947 // One of the following occurred: 1948 // - The specified GPU was not registered. 1949 // - The GPU specified has no VA space registered for it. 1950 // - The UUID of the CPU was specified. 1951 // - Sparse mappings are not supported on the specified GPU. 1952 // 1953 // NV_ERR_NO_MEMORY: 1954 // Internal memory allocation failed. 1955 //------------------------------------------------------------------------------ 1956 NV_STATUS UvmMapExternalSparse(void *base, 1957 NvLength length, 1958 const NvProcessorUuid *gpuUuid); 1959 1960 //------------------------------------------------------------------------------ 1961 // UvmUnmapExternal 1962 // 1963 // Unmaps a virtual address range that was mapped using UvmMapExternalAllocation 1964 // or UvmMapExternalSparse from the specified GPU. The range specified by (base, 1965 // length) must be fully contained within a single External VA range created 1966 // with UvmCreateExternalRange. 1967 // 1968 // If the range specified by (base, length) range partially overlaps existing 1969 // mappings, the overlapping portion of the existing mappings will be unmapped 1970 // provided that the split points are aligned to the mappings' respective page 1971 // sizes. Otherwise, the overlapping portions of the existing mappings will be 1972 // left in an undefined state. 1973 // 1974 // Note that the VA range is not reclaimed until UvmFree is called on it even if 1975 // all mappings in the created range have been unmapped from all GPUs via this 1976 // API. 1977 // 1978 // Arguments: 1979 // base: (INPUT) 1980 // Base address of the virtual address range. 1981 // 1982 // length: (INPUT) 1983 // The length of the virtual address range. 1984 // 1985 // gpuUuid: (INPUT) 1986 // UUID of the GPU to unmap the VA range from. 1987 // 1988 // Errors: 1989 // NV_ERR_INVALID_ADDRESS: 1990 // One of the following has occurred: 1991 // - base is NULL. 1992 // - The requested address range does not fall entirely within an 1993 // existing external VA range created with a single call to 1994 // UvmCreateExternalRange. 1995 // - base or base + length fall within an existing mapping but are not 1996 // aligned to that mapping's page size. 1997 // 1998 // NV_ERR_INVALID_DEVICE: 1999 // Either gpuUuid does not represent a valid registered GPU or the VA 2000 // range corresponding to the given base address is not mapped on the 2001 // specified GPU. 2002 // 2003 // NV_ERR_GENERIC: 2004 // Unexpected error. We try hard to avoid returning this error code, 2005 // because it is not very informative. 2006 // 2007 //------------------------------------------------------------------------------ 2008 NV_STATUS UvmUnmapExternal(void *base, 2009 NvLength length, 2010 const NvProcessorUuid *gpuUuid); 2011 2012 // TODO: Bug 2732305: Remove this declaration when the new external APIs have 2013 // been implemented. 2014 NV_STATUS UvmUnmapExternalAllocation(void *base, 2015 const NvProcessorUuid *gpuUuid); 2016 2017 //------------------------------------------------------------------------------ 2018 // UvmMapDynamicParallelismRegion 2019 // 2020 // Creates a special mapping required for dynamic parallelism. The mapping 2021 // doesn't have any physical backing, it's just a PTE with a special kind. 2022 // 2023 // The virtual address range specified by (base, length) must cover exactly one 2024 // GPU page, so length must be a page size supported by the GPU and base must be 2025 // aligned to that page size. The VA range must not overlap with an existing 2026 // mapping for the GPU. A GPU VA space must have been registered for the GPU and 2027 // the GPU must support dynamic parallelism. 2028 // 2029 // The mapping is created immediately and not modified until a call to UvmFree 2030 // Calling UvmFree frees the GPU page table mapping. The range cannot be 2031 // associated with range groups and any GPU faults within this range are fatal. 2032 // Also, the pages cannot be the target for read duplication, cannot have a 2033 // preferred location set, and cannot have any accessed-by processors. 2034 // 2035 // Note that calling UvmUnregisterGpuVaSpace will also unmap all mappings 2036 // created via this API on the GPU that the GPU VA space is associated with. 2037 // Notably the mappings won't be recreated when the GPU VA space is 2038 // re-registered, but the range should still be destroyed with UvmFree. 2039 // 2040 // This call neither creates nor modifies any CPU mappings, even if the VA range 2041 // came from a region previously reserved via UvmReserveVa. This implies that 2042 // CPU accesses to this range will cause a fatal fault if it's not mapped. 2043 // 2044 // Arguments: 2045 // base: (INPUT) 2046 // Base address of the virtual address range. 2047 // 2048 // length: (INPUT) 2049 // Length, in bytes, of the range. Must be equal to a page size 2050 // supported by the GPU. 2051 // 2052 // gpuUuid: (INPUT) 2053 // UUID of the GPU to map the dynamic parallelism region on. 2054 // 2055 // Errors: 2056 // NV_ERR_UVM_ADDRESS_IN_USE: 2057 // The requested address range overlaps with an existing allocation. 2058 // 2059 // NV_ERR_INVALID_ADDRESS: 2060 // base is NULL or not aligned to length or length is not a page size 2061 // supported by the GPU. 2062 // 2063 // NV_ERR_OUT_OF_RANGE: 2064 // The VA range exceeds the largest virtual address supported by one or 2065 // more of the specified GPUs. 2066 // 2067 // NV_ERR_INVALID_DEVICE: 2068 // The gpuUuid was either not registered, has no GPU VA space 2069 // registered for it, or the GPU doesn't support dynamic parallelism. 2070 // 2071 // NV_ERR_NO_MEMORY: 2072 // Internal memory allocation failed. 2073 // 2074 // NV_ERR_GENERIC: 2075 // Unexpected error. We try hard to avoid returning this error code, 2076 // because it is not very informative. 2077 // 2078 //------------------------------------------------------------------------------ 2079 NV_STATUS UvmMapDynamicParallelismRegion(void *base, 2080 NvLength length, 2081 const NvProcessorUuid *gpuUuid); 2082 2083 //------------------------------------------------------------------------------ 2084 // UvmEnableReadDuplication 2085 // 2086 // Enables read duplication on the specified virtual address range, overriding 2087 // the UVM driver's default migration and mapping policy on read faults. 2088 // 2089 // The virtual address range specified by (base, length) must have been 2090 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported 2091 // system-allocated pageable memory. If the input virtual range corresponds to 2092 // system-allocated pageable memory and UvmIsPageableMemoryAccessSupported 2093 // reports that pageable memory access is supported, the behavior described 2094 // below does not take effect, and read duplication will not be enabled for 2095 // the input range. 2096 // 2097 // Both base and length must be aligned to the smallest page size supported by 2098 // the CPU. 2099 // 2100 // On a read fault from a processor on a page in this range, any existing 2101 // mapping to that page from all other processors will be made read-only. If the 2102 // page does not reside in the faulting processor's memory, a duplicate copy of 2103 // the page will be created there. The copy of the page in the faulting 2104 // processor's memory will then be mapped as read-only on that processor. Note 2105 // that a write to this page from any processor will collapse the duplicated 2106 // copies. 2107 // 2108 // If UvmMigrate, UvmMigrateAsync or UvmMigrateRangeGroup is called on any pages 2109 // in this VA range, then those pages will also be read duplicated on the 2110 // destination processor for the migration. 2111 // 2112 // Enabling read duplication on a VA range requires the CPU and all GPUs with 2113 // registered VA spaces to be fault-capable. Otherwise, the migration and 2114 // mapping policies outlined above are not applied until all the 2115 // non-fault-capable GPUs are unregistered via UvmUnregisterGpu. If a 2116 // non-fault-capable GPU is registered after a page has already been 2117 // read-duplicated, then the copies of that page will be collapsed into a single 2118 // page. 2119 // 2120 // If UvmPreventMigrationRangeGroups has been called on the range group that 2121 // this VA range is associated with, then the migration and mapping policies 2122 // outlined above don't take effect until UvmAllowMigrationRangeGroups is called 2123 // for that range group. 2124 // 2125 // If any page in the VA range has a preferred location, then the migration and 2126 // mapping policies associated with this API take precedence over those related 2127 // to the preferred location. 2128 // 2129 // If any pages in this VA range have any processors present in their 2130 // accessed-by list, the migration and mapping policies associated with this 2131 // API override those associated with the accessed-by list. 2132 // 2133 // Multiple calls to this API for the same VA range and the same processor are 2134 // not refcounted, i.e. calling this API on a VA range after it has already been 2135 // called for that same VA range results in a no-op. 2136 // 2137 // Arguments: 2138 // base: (INPUT) 2139 // Base address of the virtual address range. 2140 // 2141 // length: (INPUT) 2142 // Length, in bytes, of the range. 2143 // 2144 // Errors: 2145 // NV_ERR_INVALID_ADDRESS: 2146 // base and length are not properly aligned, or the range does not 2147 // represent a valid UVM allocation, or the range is pageable memory and 2148 // the system does not support accessing pageable memory, or the range 2149 // does not represent a supported Operating System allocation. 2150 // 2151 // NV_ERR_GENERIC: 2152 // Unexpected error. We try hard to avoid returning this error code, 2153 // because it is not very informative. 2154 // 2155 //------------------------------------------------------------------------------ 2156 NV_STATUS UvmEnableReadDuplication(void *base, 2157 NvLength length); 2158 2159 //------------------------------------------------------------------------------ 2160 // UvmDisableReadDuplication 2161 // 2162 // Disables read duplication on the specified virtual address range, and reverts 2163 // the associated policies. This also disables any default read duplication 2164 // heuristics employed by the kernel driver. 2165 // 2166 // The virtual address range specified by (base, length) must have been 2167 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported 2168 // system-allocated pageable memory. If the input virtual range corresponds to 2169 // system-allocated pageable memory and UvmIsPageableMemoryAccessSupported 2170 // reports that pageable memory access is supported, the behavior described 2171 // below does not take effect, and read duplication will not be enabled for 2172 // the input range. 2173 // 2174 // Both base and length must be aligned to the smallest page size supported by 2175 // the CPU. 2176 // 2177 // Any pages in the VA range that are currently read duplicated will be 2178 // collapsed into a single copy. The location for the collapsed copy will be the 2179 // preferred location if the page has a preferred location and was resident at 2180 // that location when this API was called. Otherwise, the location will be 2181 // chosen arbitrarily. 2182 // 2183 // It is ok to call this API only on a subset of the VA range on which 2184 // UvmEnableReadDuplication was called or for a VA range on which 2185 // UvmEnableReadDuplication was never called. 2186 // 2187 // Arguments: 2188 // base: (INPUT) 2189 // Base address of the virtual address range. 2190 // 2191 // length: (INPUT) 2192 // Length, in bytes, of the range. 2193 // 2194 // Errors: 2195 // NV_ERR_INVALID_ADDRESS: 2196 // base and length are not properly aligned, or the range does not 2197 // represent a valid UVM allocation, or the range is pageable memory and 2198 // the system does not support accessing pageable memory, or the range 2199 // does not represent a supported Operating System allocation. 2200 // 2201 // NV_ERR_GENERIC: 2202 // Unexpected error. We try hard to avoid returning this error code, 2203 // because it is not very informative. 2204 // 2205 //----------------------------------------------------------------------------- 2206 NV_STATUS UvmDisableReadDuplication(void *base, 2207 NvLength length); 2208 2209 //------------------------------------------------------------------------------ 2210 // UvmSetPreferredLocation 2211 // 2212 // Sets the preferred location for the given virtual address range to be the 2213 // specified processor's memory. 2214 // 2215 // Both base and length must be aligned to the smallest page size supported by 2216 // the CPU. The VA range must lie within the largest possible virtual address 2217 // supported by the specified processor. 2218 // 2219 // The virtual address range specified by (base, length) must have been 2220 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported 2221 // system-allocated pageable memory. If the input range is pageable memory and 2222 // at least one GPU in the system supports transparent access to pageable 2223 // memory, the behavior described below does not take effect and the preferred 2224 // location of the pages in the given range does not change. 2225 // 2226 // If any pages in the VA range are associated with a range group that was made 2227 // non-migratable via UvmPreventMigrationRangeGroups, then those pages are 2228 // migrated immediately to the specified preferred location and mapped according 2229 // to policies specified in UvmPreventMigrationRangeGroups. Otherwise, this API 2230 // neither migrates pages nor does it populate unpopulated pages. Note that if 2231 // the specified preferred location is a fault-capable GPU and at least one page 2232 // in the VA range is associated with a non-migratable range group, then an 2233 // error is returned. Additionally, if the specified preferred location is a 2234 // non-fault capable GPU and at least one page in the VA range is associated 2235 // with a non-migratable range group, an error is returned if another 2236 // non-fault-capable GPU is present in the accessed-by list of that page but P2P 2237 // support has not been enabled between both GPUs. 2238 // 2239 // When a page is in its preferred location, a fault from another processor will 2240 // not cause a migration if a mapping for that page from that processor can be 2241 // established without migrating the page. 2242 // 2243 // When a page migrates away from its preferred location, the mapping on the 2244 // preferred location's processor is cleared so that the next access from that 2245 // processor will cause a fault and migrate the page back to its preferred 2246 // location. In other words, a page is mapped on the preferred location's 2247 // processor only if the page is in its preferred location. Thus, when the 2248 // preferred location changes, mappings to pages in the given range are removed 2249 // from the new preferred location if the pages are resident in a different 2250 // processor. Note that if the preferred location's processor is a GPU, then a 2251 // mapping from that GPU to a page in the VA range is only created if a GPU VA 2252 // space has been registered for that GPU and the page is in its preferred 2253 // location. 2254 // 2255 // If read duplication has been enabled for any pages in this VA range and 2256 // UvmPreventMigrationRangeGroups has not been called on the range group that 2257 // those pages are associated with, then the migration and mapping policies 2258 // associated with UvmEnableReadDuplication override the policies outlined 2259 // above. Note that enabling read duplication on on any pages in this VA range 2260 // does not clear the state set by this API for those pages. It merely overrides 2261 // the policies associated with this state until read duplication is disabled 2262 // for those pages. 2263 // 2264 // If the preferred location processor is present in the accessed-by list of any 2265 // of the pages in this VA range, then the migration and mapping policies 2266 // associated with associated with the accessed-by list. 2267 // 2268 // The state set by this API can be cleared either by calling 2269 // UvmUnsetPreferredLocation for the same VA range or by calling 2270 // UvmUnregisterGpu on this processor if the processor is a GPU. Note that 2271 // calling UvmUnregisterGpuVaSpace will not clear the state set by this API. 2272 // Multiple calls to this API for the same VA range and the same processor are 2273 // not refcounted, i.e. calling this API on a VA range and processor after it 2274 // has already been called for that same VA range and processor results in a 2275 // no-op. 2276 // 2277 // Arguments: 2278 // base: (INPUT) 2279 // Base address of the virtual address range. 2280 // 2281 // length: (INPUT) 2282 // Length, in bytes, of the range. 2283 // 2284 // preferredLocationUuid: (INPUT) 2285 // UUID of the preferred location. 2286 // 2287 // Errors: 2288 // NV_ERR_INVALID_ADDRESS: 2289 // base and length are not properly aligned, or the range does not 2290 // represent a valid UVM allocation, or the range is pageable memory and 2291 // the system does not support accessing pageable memory, or the range 2292 // does not represent a supported Operating System allocation. 2293 // 2294 // NV_ERR_OUT_OF_RANGE: 2295 // The VA range exceeds the largest virtual address supported by the 2296 // specified processor. 2297 // 2298 // NV_ERR_INVALID_DEVICE: 2299 // preferredLocationUuid is neither the UUID of the CPU nor the UUID of 2300 // a GPU that was registered by this process. Or at least one page in 2301 // VA range belongs to a non-migratable range group and the specified 2302 // UUID represents a fault-capable GPU. Or preferredLocationUuid is the 2303 // UUID of a non-fault-capable GPU and at least one page in the VA range 2304 // belongs to a non-migratable range group and another non-fault-capable 2305 // GPU is in the accessed-by list of the same page but P2P support 2306 // between both GPUs has not been enabled. 2307 // 2308 // NV_ERR_GENERIC: 2309 // Unexpected error. We try hard to avoid returning this error code, 2310 // because it is not very informative. 2311 // 2312 //------------------------------------------------------------------------------ 2313 NV_STATUS UvmSetPreferredLocation(void *base, 2314 NvLength length, 2315 const NvProcessorUuid *preferredLocationUuid); 2316 2317 //------------------------------------------------------------------------------ 2318 // UvmUnsetPreferredLocation 2319 // 2320 // Unsets the preferred location associated with all pages in the specified 2321 // virtual address range, reverting the migration and mapping policies outlined 2322 // in UvmSetPreferredLocation. 2323 // 2324 // Both base and length must be aligned to the smallest page size supported by 2325 // the CPU. 2326 // 2327 // The virtual address range specified by (base, length) must have been 2328 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported 2329 // system-allocated pageable memory. If the input range is pageable memory and 2330 // at least one GPU in the system supports transparent access to pageable 2331 // memory, the behavior described below does not take effect and the preferred 2332 // location of the pages in the given range does not change. 2333 // 2334 // If the VA range is associated with a non-migratable range group, then that 2335 // association is cleared. i.e. the pages in this VA range have their range 2336 // group association changed to UVM_RANGE_GROUP_ID_NONE. 2337 // 2338 // It is ok to call this API only on a subset of the VA range on which 2339 // UvmSetPreferredLocation was called or for a VA range on which 2340 // UvmSetPreferredLocation was never called. 2341 // 2342 // Arguments: 2343 // base: (INPUT) 2344 // Base address of the virtual address range. 2345 // 2346 // length: (INPUT) 2347 // Length, in bytes, of the range. 2348 // 2349 // Errors: 2350 // NV_ERR_INVALID_ADDRESS: 2351 // base and length are not properly aligned or the range does not 2352 // represent a valid UVM allocation, or the range is pageable memory and 2353 // the system does not support accessing pageable memory, or the range 2354 // does not represent a supported Operating System allocation. 2355 // 2356 // NV_ERR_GENERIC: 2357 // Unexpected error. We try hard to avoid returning this error code, 2358 // because it is not very informative. 2359 // 2360 //------------------------------------------------------------------------------ 2361 NV_STATUS UvmUnsetPreferredLocation(void *base, 2362 NvLength length); 2363 2364 //------------------------------------------------------------------------------ 2365 // UvmSetAccessedBy 2366 // 2367 // Indicates to the UVM driver that the pages in the given virtual address range 2368 // should be mapped on the specified processor whenever establishing such a 2369 // mapping is possible. The purpose of this API is to prevent faults from the 2370 // specified processor to the given VA range as much as possible. 2371 // 2372 // Both base and length must be aligned to the smallest page size supported by 2373 // the CPU. The VA range must lie within the largest possible virtual address 2374 // supported by the specified processor. 2375 // 2376 // The virtual address range specified by (base, length) must have been 2377 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported 2378 // system-allocated pageable memory. If the input range is pageable memory and 2379 // at least one GPU in the system supports transparent access to pageable 2380 // memory, the behavior described below does not take effect and the accessed-by 2381 // processor list of the VA range does not change. 2382 // 2383 // If a page in the VA range is not populated or its current location doesn't 2384 // permit a mapping to be established, then no mapping is created for that page. 2385 // If a page in the VA range migrates to a new location, then the mapping is 2386 // updated to point to the new location if establishing such a mapping is 2387 // possible. If a page in the VA range is associated with a non-migratable range 2388 // group and the specified processor is a non-fault-capable GPU, then an error 2389 // is returned if the mapping cannot be established. 2390 // 2391 // If the specified processor is a GPU and no GPU VA space has been registered 2392 // for it or if the registered GPU VA space gets unregistered, then the policies 2393 // outlined above will take effect the next time a GPU VA space gets registered 2394 // for this GPU. 2395 // 2396 // If read duplication is enabled in any pages in this VA range, then the page 2397 // mapping policy associated with read duplication overrides the mapping policy 2398 // associated with this API. 2399 // 2400 // Similarly, if any page in this VA range has a preferred location, and the 2401 // UUID of the preferred location is the same as the UUID passed in to this API, 2402 // then the mapping policy associated with having a preferred location overrides 2403 // the mapping policy associated with this API. 2404 // 2405 // Note that enabling read duplication or setting a preferred location on any 2406 // pages in this VA range does not clear the state set by this API for those 2407 // pages. It merely overrides the policies associated with this state until read 2408 // duplication is disabled on those pages or their preferred location is 2409 // cleared. 2410 // 2411 // The state set by this API can be cleared either by calling UvmUnsetAccessedBy 2412 // for the same VA range and processor or by calling UvmUnregisterGpu on this 2413 // processor if the processor is a GPU. It is also cleared if the processor is a 2414 // non-fault-capable GPU and the VA range has a preferred location set to a peer 2415 // GPU and peer access is disabled via UvmDisablePeerAccess. Note however that 2416 // calling UvmUnregisterGpuVaSpace will not clear the state set by this API. 2417 // 2418 // Multiple calls to this API for the same VA range and the same processor are 2419 // not refcounted. i.e. calling this API on a VA range and processor after it 2420 // has already been called for that same VA range and processor results in a 2421 // no-op. 2422 // 2423 // Arguments: 2424 // base: (INPUT) 2425 // Base address of the virtual address range. 2426 // 2427 // length: (INPUT) 2428 // Length, in bytes, of the range. 2429 // 2430 // accessedByUuid: (INPUT) 2431 // UUID of the processor that should have pages in the the VA range 2432 // mapped when possible. 2433 // 2434 // Errors: 2435 // NV_ERR_INVALID_ADDRESS: 2436 // base and length are not properly aligned or the range does not 2437 // represent a valid UVM allocation, or the range is pageable memory and 2438 // the system does not support accessing pageable memory, or the range 2439 // does not represent a supported Operating System allocation. 2440 // 2441 // NV_ERR_OUT_OF_RANGE: 2442 // The VA range exceeds the largest virtual address supported by the 2443 // specified processor. 2444 // 2445 // NV_ERR_INVALID_DEVICE: 2446 // accessedByUuid is neither the UUID of the CPU nor the UUID of a GPU 2447 // that was registered by this process. Or accessedByUuid is the UUID of 2448 // a non-fault-capable GPU and the VA range is associated with a 2449 // non-migratable range group with a preferred location set to another 2450 // non-fault-capable GPU that doesn't have P2P support enabled with this 2451 // GPU. 2452 // 2453 // NV_ERR_NO_MEMORY: 2454 // accessedByUuid is a non-fault-capable GPU and there was insufficient 2455 // memory to create the mapping. 2456 // 2457 // NV_ERR_GENERIC: 2458 // Unexpected error. We try hard to avoid returning this error code, 2459 // because it is not very informative. 2460 // 2461 //------------------------------------------------------------------------------ 2462 NV_STATUS UvmSetAccessedBy(void *base, 2463 NvLength length, 2464 const NvProcessorUuid *accessedByUuid); 2465 2466 //------------------------------------------------------------------------------ 2467 // UvmUnsetAccessedBy 2468 // 2469 // Undoes the effect of UvmSetAccessedBy for the given virtual address range on 2470 // the specified processor, thereby reverting the mapping policies imposed by 2471 // UvmSetAccessedBy. 2472 // 2473 // Both base and length must be aligned to the smallest page size supported by 2474 // the CPU 2475 // 2476 // The virtual address range specified by (base, length) must have been 2477 // allocated via a call to either UvmAlloc or UvmMemMap, or be supported 2478 // system-allocated pageable memory. If the input range is pageable memory and 2479 // at least one GPU in the system supports transparent access to pageable 2480 // memory, the behavior described below does not take effect and the accessed-by 2481 // processor list of the VA range does not change. 2482 2483 // 2484 // Existing mappings to this VA range from the given processor are not affected. 2485 // If any page in the VA range migrates to a different location however, the 2486 // mapping may be cleared or updated based on other mapping policies that are in 2487 // effect. 2488 // 2489 // It is ok to call this API for a subset of a VA range with a accessed-by list 2490 // containing this processor, or for a VA range with an empty accessed-by list. 2491 // 2492 // Arguments: 2493 // base: (INPUT) 2494 // Base address of the virtual address range. 2495 // 2496 // length: (INPUT) 2497 // Length, in bytes, of the range. 2498 // 2499 // accessedByUuid: (INPUT) 2500 // UUID of the processor from which any policies set by 2501 // UvmSetAccessedBy should be revoked for the given VA range. 2502 // 2503 // Errors: 2504 // NV_ERR_INVALID_ADDRESS: 2505 // base and length are not properly aligned or the range does not 2506 // represent a valid UVM allocation, or the range is pageable memory and 2507 // the system does not support accessing pageable memory, or the range 2508 // does not represent a supported Operating System allocation. 2509 // 2510 // NV_ERR_INVALID_DEVICE: 2511 // accessedByUuid is neither the UUID of the CPU nor the UUID of a GPU 2512 // that was registered by this process. 2513 // 2514 // NV_ERR_GENERIC: 2515 // Unexpected error. We try hard to avoid returning this error code, 2516 // because it is not very informative. 2517 // 2518 //------------------------------------------------------------------------------ 2519 NV_STATUS UvmUnsetAccessedBy(void *base, 2520 NvLength length, 2521 const NvProcessorUuid *accessedByUuid); 2522 2523 //------------------------------------------------------------------------------ 2524 // UvmEnableSystemWideAtomics 2525 // 2526 // Enables software-assisted system-wide atomics support on the specified GPU. 2527 // Any system-wide atomic operation issued from this GPU is now guaranteed to be 2528 // atomic with respect to all accesses from other processors that also support 2529 // system-wide atomics regardless of whether that support is enabled on those 2530 // other processors or not. 2531 // 2532 // The class of atomic operations from the GPU that are considered system-wide 2533 // is GPU architecture dependent. All atomic operations from the CPU are always 2534 // considered to be system-wide and support for system-wide atomics on the CPU 2535 // is always considered to be enabled. 2536 // 2537 // System-wide atomics which cannot be natively supported in hardware are 2538 // emulated using virtual mappings and page faults. For example, assume a 2539 // virtual address which is resident in CPU memory and has CPU memory as its 2540 // preferred location. A GPU with system-wide atomics enabled but without native 2541 // atomics support to CPU memory will not have atomics enabled in its virtual 2542 // mapping of the page that contains that address. If that GPU performs an 2543 // atomic operation, the access will fault, all other processors' mappings to 2544 // that page will have their write permissions revoked, the faulting GPU will be 2545 // granted atomic permissions in its virtual mapping, and the faulting GPU will 2546 // retry its access. Further atomic accesses from that GPU will not cause page 2547 // faults until another processor attempts a write access to the same page. 2548 // 2549 // Multiple calls to this API for the same GPU are not refcounted, i.e. calling 2550 // this API for a GPU for which software-assisted system-wide atomics support 2551 // has already been enabled results in a no-op. 2552 // 2553 // The GPU must have been registered using UvmRegisterGpu prior to making this 2554 // call. By default, software-assisted system-wide atomics support is enabled 2555 // when a GPU is registered. 2556 // 2557 // Arguments: 2558 // gpuUuid: (INPUT) 2559 // UUID of the GPU to enable software-assisted system-wide atomics on. 2560 // 2561 // Error codes: 2562 // NV_ERR_NO_MEMORY: 2563 // Internal memory allocation failed. 2564 // 2565 // NV_ERR_INVALID_DEVICE: 2566 // The GPU referred to by gpuUuid was not registered. 2567 // 2568 // NV_ERR_NOT_SUPPORTED: 2569 // The GPU does not support system-wide atomic operations, or the GPU 2570 // has hardware support for scoped atomic operations. 2571 // 2572 // NV_ERR_GENERIC: 2573 // Unexpected error. We try hard to avoid returning this error code, 2574 // because it is not very informative. 2575 // 2576 //------------------------------------------------------------------------------ 2577 NV_STATUS UvmEnableSystemWideAtomics(const NvProcessorUuid *gpuUuid); 2578 2579 //------------------------------------------------------------------------------ 2580 // UvmDisableSystemWideAtomics 2581 // 2582 // Disables software-assisted system-wide atomics support on the specified GPU. 2583 // Any atomic operation from this GPU is no longer guaranteed to be atomic with 2584 // respect to accesses from other processors in the system, even if the 2585 // operation has system-wide scope at the instruction level. 2586 // 2587 // The GPU must have been registered using UvmRegisterGpu prior to making this 2588 // call. It is however ok to call this API for GPUs that do not have support for 2589 // system-wide atomic operations enabled. If the GPU is unregistered via 2590 // UvmUnregisterGpu and then registered again via UvmRegisterGpu, support for 2591 // software-assisted system-wide atomics will be enabled. 2592 // 2593 // Arguments: 2594 // gpuUuid: (INPUT) 2595 // UUID of the GPU to disable software-assisted system-wide atomics on. 2596 // 2597 // Error codes: 2598 // NV_ERR_INVALID_DEVICE: 2599 // The GPU referred to by gpuUuid was not registered. 2600 // 2601 // NV_ERR_NOT_SUPPORTED: 2602 // The GPU does not support system-wide atomic operations, or the GPU 2603 // has hardware support for scoped atomic operations. 2604 // 2605 // NV_ERR_GENERIC: 2606 // Unexpected error. We try hard to avoid returning this error code, 2607 // because it is not very informative. 2608 // 2609 //------------------------------------------------------------------------------ 2610 NV_STATUS UvmDisableSystemWideAtomics(const NvProcessorUuid *gpuUuid); 2611 2612 //------------------------------------------------------------------------------ 2613 // UvmGetFileDescriptor 2614 // 2615 // Returns the UVM file descriptor currently being used to call into the UVM 2616 // kernel mode driver. The data type of the returned file descriptor is platform 2617 // specific. 2618 // 2619 // If UvmInitialize has not yet been called, an error is returned. If 2620 // UvmInitialize was called with UVM_AUTO_FD, then the file created during 2621 // UvmInitialize is returned. If UvmInitialize was called with an existing UVM 2622 // file descriptor, then that file descriptor is returned. 2623 // 2624 // Arguments: 2625 // returnedFd: (OUTPUT) 2626 // A platform specific file descriptor. 2627 // 2628 // Error codes: 2629 // NV_ERR_INVALID_ARGUMENT: 2630 // returnedFd is NULL. 2631 // 2632 // NV_ERR_INVALID_STATE: 2633 // UVM was not initialized before calling this function. 2634 // 2635 // NV_ERR_GENERIC: 2636 // Unexpected error. We try hard to avoid returning this error code, 2637 // because it is not very informative. 2638 // 2639 //------------------------------------------------------------------------------ 2640 NV_STATUS UvmGetFileDescriptor(UvmFileDescriptor *returnedFd); 2641 2642 //------------------------------------------------------------------------------ 2643 // UvmIs8Supported 2644 // 2645 // Returns whether the kernel driver has been loaded in UVM 8 mode or not. 2646 // 2647 // Argument: 2648 // is8Supported: (OUTPUT) 2649 // Will be set to true (nonzero) if the driver was loaded as UVM 8, or 2650 // false (zero) if it was loaded as UVM Lite. 2651 // 2652 // Error codes: 2653 // NV_ERR_INVALID_ARGUMENT: 2654 // is8Supported is NULL. 2655 // 2656 // NV_ERR_GENERIC: 2657 // Unexpected error. We try hard to avoid returning this error code, 2658 // because it is not very informative. 2659 // 2660 //------------------------------------------------------------------------------ 2661 NV_STATUS UvmIs8Supported(NvU32 *is8Supported); 2662 2663 //------------------------------------------------------------------------------ 2664 // Tools API 2665 //------------------------------------------------------------------------------ 2666 2667 //------------------------------------------------------------------------------ 2668 // UvmDebugGetVersion 2669 // 2670 // Returns the version number of the UVM debug library 2671 // See uvm_types.h for valid verion numbers, e.g. UVM_DEBUG_V1 2672 // 2673 //------------------------------------------------------------------------------ 2674 unsigned UvmDebugVersion(void); 2675 2676 //------------------------------------------------------------------------------ 2677 // UvmDebugCreateSession 2678 // 2679 // Creates a handle for a debugging session. 2680 // 2681 // When the client initializes, it will pass in a process handle and get a 2682 // session ID for itself. Subsequent calls to the UVM API will take in that 2683 // session ID. 2684 // 2685 // There are security requirements to this call. 2686 // One of the following must be true: 2687 // 1. The session owner must be running as an elevated user 2688 // 2. The session owner and target must belong to the same user and the 2689 // session owner is at least as privileged as the target. 2690 // 2691 // For CUDA 6.0 we can create at most 64 sessions per debugger process. 2692 // 2693 // Arguments: 2694 // pid: (INPUT) 2695 // Process id for which the debugging session will be created 2696 // 2697 // session: (OUTPUT) 2698 // Handle to the debugging session associated to that pid. 2699 // 2700 // Error codes: 2701 // NV_ERR_PID_NOT_FOUND: 2702 // pid is invalid/ not associated with UVM. 2703 // 2704 // NV_ERR_INSUFFICIENT_PERMISSIONS: 2705 // Function fails the security check. 2706 // 2707 // NV_ERR_INSUFFICIENT_RESOURCES: 2708 // Attempt is made to allocate more than 64 sessions per process. 2709 // 2710 // NV_ERR_BUSY_RETRY: 2711 // internal resources are blocked by other threads. 2712 // 2713 //------------------------------------------------------------------------------ 2714 NV_STATUS UvmDebugCreateSession(unsigned pid, 2715 UvmDebugSession *session); 2716 2717 //------------------------------------------------------------------------------ 2718 // UvmDebugDestroySession 2719 // 2720 // Destroys a debugging session. 2721 // 2722 // Arguments: 2723 // session: (INPUT) 2724 // Handle to the debugging session associated to that pid. 2725 // 2726 // Error codes: 2727 // NV_ERR_INVALID_ARGUMENT: 2728 // session is invalid. 2729 // 2730 // NV_ERR_BUSY_RETRY: 2731 // ebug session is in use by some other thread. 2732 // 2733 //------------------------------------------------------------------------------ 2734 NV_STATUS UvmDebugDestroySession(UvmDebugSession session); 2735 2736 //------------------------------------------------------------------------------ 2737 // UvmDebugCountersEnable 2738 // 2739 // Enables the counters following the user specified configuration. 2740 // 2741 // The user must fill a list with the configuration of the counters it needs to 2742 // either enable or disable. It can only enable one counter per line. 2743 // 2744 // The structure (UvmCounterConfig) has several fields: 2745 // - scope: Please see the UvmCounterScope enum (above), for details. 2746 // - name: Name of the counter. Please check UvmCounterName for list. 2747 // - gpuid: Identifies the GPU for which the counter will be enabled/disabled 2748 // This parameter is ignored in AllGpu scopes. 2749 // - state: A value of 0 will disable the counter, a value of 1 will enable 2750 // the counter. 2751 // 2752 // Note: All counters are refcounted, that means that a counter will only be 2753 // disable when its refcount reached zero. 2754 // 2755 // Arguments: 2756 // session: (INPUT) 2757 // Handle to the debugging session. 2758 // 2759 // config: (INPUT) 2760 // pointer to configuration list as per above. 2761 // 2762 // count: (INPUT) 2763 // number of entries in the config list. 2764 // 2765 // Error codes: 2766 // NV_ERR_INSUFFICIENT_PERMISSIONS: 2767 // Function fails the security check 2768 // 2769 // RM_INVALID_ARGUMENT: 2770 // debugging session is invalid or one of the counter lines is invalid. 2771 // If call returns this value, no action specified by the config list 2772 // will have taken effect. 2773 // 2774 // NV_ERR_NOT_SUPPORTED: 2775 // UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0 2776 // 2777 // NV_ERR_BUSY_RETRY: 2778 // the debug session is in use by some other thread. 2779 // 2780 //------------------------------------------------------------------------------ 2781 NV_STATUS UvmDebugCountersEnable(UvmDebugSession session, 2782 UvmCounterConfig *config, 2783 unsigned count); 2784 2785 //------------------------------------------------------------------------------ 2786 // UvmDebugGetCounterHandle 2787 // 2788 // Returns handle to a particular counter. This is an opaque handle that the 2789 // implementation uses in order to find your counter, later. This handle can be 2790 // used in subsequent calls to UvmDebugGetCounterVal(). 2791 // 2792 // Arguments: 2793 // session: (INPUT) 2794 // Handle to the debugging session. 2795 // 2796 // scope: (INPUT) 2797 // Scope that will be mapped. 2798 // 2799 // counterName: (INPUT) 2800 // Name of the counter in that scope. 2801 // 2802 // gpu: (INPUT) 2803 // Gpuid of the scoped GPU. This parameter is ignored in AllGpu scopes. 2804 // 2805 // pCounterHandle: (OUTPUT) 2806 // Handle to the counter address. 2807 // 2808 // Error codes: 2809 // NV_ERR_INVALID_ARGUMENT: 2810 // Specified scope/gpu pair or session id is invalid 2811 // 2812 // NV_ERR_NOT_SUPPORTED: 2813 // UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0 2814 // 2815 // NV_ERR_BUSY_RETRY: 2816 // debug session is in use by some other thread. 2817 // 2818 //------------------------------------------------------------------------------ 2819 NV_STATUS UvmDebugGetCounterHandle(UvmDebugSession session, 2820 UvmCounterScope scope, 2821 UvmCounterName counterName, 2822 NvProcessorUuid gpu, 2823 NvUPtr *pCounterHandle); 2824 2825 //------------------------------------------------------------------------------ 2826 // UvmDebugGetCounterVal 2827 // 2828 // Returns the counter value specified by the counter name. 2829 // 2830 // Arguments: 2831 // session: (INPUT) 2832 // Handle to the debugging session. 2833 // 2834 // counterHandleArray: (INPUT) 2835 // Array of counter handles 2836 // 2837 // handleCount: (INPUT) 2838 // Number of handles in the pPCounterHandle array. 2839 // 2840 // counterValArray: (OUTPUT) 2841 // Array of counter values corresponding to the handles. 2842 // 2843 // Error codes: 2844 // NV_ERR_INVALID_ARGUMENT: 2845 // one of the specified handles is invalid. 2846 // 2847 //------------------------------------------------------------------------------ 2848 NV_STATUS UvmDebugGetCounterVal(UvmDebugSession session, 2849 NvUPtr *counterHandleArray, 2850 unsigned handleCount, 2851 unsigned long long *counterValArray); 2852 2853 //------------------------------------------------------------------------------ 2854 // UvmEventQueueCreate 2855 // 2856 // This call creates an event queue of the given size. 2857 // No events are added in the queue till they are enabled by the user. 2858 // Event queue data is visible to the user even after the target process dies 2859 // if the session is active and queue is not freed. 2860 // 2861 // User doesn't need to serialize multiple UvmEventQueueCreate calls as 2862 // each call creates a new queue state associated with the returned queue 2863 // handle. 2864 // 2865 // Arguments: 2866 // sessionHandle: (INPUT) 2867 // Handle to the debugging session. 2868 // 2869 // queueHandle: (OUTPUT) 2870 // Handle to created queue. 2871 // 2872 // queueSize: (INPUT) 2873 // Size of the event queue buffer in units of UvmEventEntry's. 2874 // This quantity must be > 1. 2875 // 2876 // notificationCount: (INPUT) 2877 // Number of entries after which the user should be notified that 2878 // there are events to fetch. 2879 // User is notified when queueEntries >= notification count. 2880 // 2881 // Error codes: 2882 // NV_ERR_INSUFFICIENT_PERMISSIONS: 2883 // Function fails the security check. 2884 // 2885 // NV_ERR_INVALID_ARGUMENT: 2886 // One of the arguments is invalid. 2887 // 2888 // NV_ERR_INSUFFICIENT_RESOURCES: 2889 // it's not possible to allocate a queue of requested size. 2890 // 2891 // NV_ERR_BUSY_RETRY: 2892 // internal resources are blocked by other threads. 2893 // 2894 // NV_ERR_PID_NOT_FOUND: 2895 // queue create call is made on a session after the target dies. 2896 // 2897 //------------------------------------------------------------------------------ 2898 NV_STATUS UvmEventQueueCreate(UvmDebugSession sessionHandle, 2899 UvmEventQueueHandle *queueHandle, 2900 NvS64 queueSize, 2901 NvU64 notificationCount, 2902 UvmEventTimeStampType timeStampType); 2903 2904 //------------------------------------------------------------------------------ 2905 // UvmEventQueueDestroy 2906 // 2907 // This call frees all interal resources associated with the queue, including 2908 // upinning of the memory associated with that queue. Freeing user buffer is 2909 // responsibility of a caller. Event queue might be also destroyed as a side 2910 // effect of destroying a session associated with this queue. 2911 // 2912 // User needs to ensure that a queue handle is not deleted while some other 2913 // thread is using the same queue handle. 2914 // 2915 // Arguments: 2916 // sessionHandle: (INPUT) 2917 // Handle to the debugging session. 2918 // 2919 // queueHandle: (INPUT) 2920 // Handle to the queue which is to be freed 2921 // 2922 // Error codes: 2923 // RM_ERR_NOT_PERMITTED: 2924 // Function fails the security check. 2925 // 2926 // NV_ERR_INVALID_ARGUMENT: 2927 // One of the arguments is invalid. 2928 // 2929 // NV_ERR_BUSY_RETRY: 2930 // internal resources are blocked by other threads. 2931 // 2932 //------------------------------------------------------------------------------ 2933 NV_STATUS UvmEventQueueDestroy(UvmDebugSession sessionHandle, 2934 UvmEventQueueHandle queueHandle); 2935 2936 //------------------------------------------------------------------------------ 2937 // UvmEventEnable 2938 // 2939 // This call enables a particular event type in the event queue. 2940 // All events are disabled by default when a queue is created. 2941 // 2942 // This API does not access the queue state maintained in the user 2943 // library so the user doesn't need to acquire a lock to protect the queue 2944 // state. 2945 // 2946 // Arguments: 2947 // sessionHandle: (INPUT) 2948 // Handle to the debugging session. 2949 // 2950 // queueHandle: (INPUT) 2951 // Handle to the queue where events are to be enabled 2952 // 2953 // eventTypeFlags: (INPUT) 2954 // This field specifies the event types to be enabled. For example: 2955 // To enable migration events and memory violations: pass flags 2956 // "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION" 2957 // 2958 // Error codes: 2959 // RM_ERR_NOT_PERMITTED: 2960 // Function fails the security check. 2961 // 2962 // NV_ERR_INVALID_ARGUMENT: 2963 // One of the arguments is invalid. 2964 // 2965 // NV_ERR_PID_NOT_FOUND: 2966 // this call is made after the target process dies 2967 // 2968 // NV_ERR_BUSY_RETRY: 2969 // internal resources are blocked by other threads. 2970 // 2971 //------------------------------------------------------------------------------ 2972 NV_STATUS UvmEventEnable(UvmDebugSession sessionHandle, 2973 UvmEventQueueHandle queueHandle, 2974 unsigned eventTypeFlags); 2975 2976 //------------------------------------------------------------------------------ 2977 // UvmEventDisable 2978 // 2979 // This call disables a particular event type in the queue. 2980 // 2981 // This API does not access the queue state maintained in the user 2982 // library so the user doesn't need to acquire a lock to protect the queue 2983 // state. 2984 // 2985 // Arguments: 2986 // sessionHandle: (INPUT) 2987 // Handle to the debugging session. 2988 // 2989 // queueHandle: (INPUT) 2990 // Handle to the queue where events are to be enabled 2991 // 2992 // eventTypeFlags: (INPUT) 2993 // This field specifies the event types to be enabled 2994 // For example: To enable migration events and memory violations: 2995 // pass "UVM_EVENT_ENABLE_MEMORY_VIOLATION |UVM_EVENT_ENABLE_MIGRATION" 2996 // as flags 2997 // 2998 // Error codes: 2999 // RM_ERR_NOT_PERMITTED: 3000 // Function fails the security check. 3001 // 3002 // NV_ERR_INVALID_ARGUMENT: 3003 // One of the arguments is invalid. 3004 // 3005 // NV_ERR_PID_NOT_FOUND: 3006 // this call is made after the target process dies 3007 // 3008 // NV_ERR_BUSY_RETRY: 3009 // internal resources are blocked by other threads. 3010 // 3011 //------------------------------------------------------------------------------ 3012 NV_STATUS UvmEventDisable(UvmDebugSession sessionHandle, 3013 UvmEventQueueHandle queueHandle, 3014 unsigned eventTypeFlags); 3015 3016 //------------------------------------------------------------------------------ 3017 // UvmEventWaitOnQueueHandles 3018 // 3019 // User is notified when queueEntries >= notification count. 3020 // This call does a blocking wait for this notification. It returns when 3021 // at least one of the queue handles has events to be fetched or if it timeouts 3022 // 3023 // This API accesses constant data maintained in the queue state. Hence, 3024 // the user doesn't need to acquire a lock to protect the queue state. 3025 // 3026 // Arguments: 3027 // queueHandles: (INPUT) 3028 // array of queue handles. 3029 // 3030 // arraySize: (INPUT) 3031 // number of handles in array. 3032 // 3033 // timeout: (INPUT) 3034 // timeout in msec 3035 // 3036 // pNotificationFlags: (OUTPUT) 3037 // If a particular queue handle in the input array is notified then 3038 // the respective bit flag is set in pNotificationFlags. 3039 // 3040 // Error codes: 3041 // NV_ERR_INVALID_ARGUMENT: 3042 // one of the queueHandles is invalid. 3043 // 3044 //------------------------------------------------------------------------------ 3045 NV_STATUS UvmEventWaitOnQueueHandles(UvmEventQueueHandle *queueHandleArray, 3046 unsigned arraySize, 3047 NvU64 timeout, 3048 unsigned *pNotificationFlags); 3049 3050 //------------------------------------------------------------------------------ 3051 // UvmEventGetNotificationHandles 3052 // 3053 // User is notified when queueEntries >= notification count. 3054 // The user can directly get the queue notification handles rather than using 3055 // a UVM API to wait on queue handles. This helps the user to wait on other 3056 // objects (apart from queue notification) along with queue notification 3057 // handles in the same thread. The user can safely use this call along with the 3058 // library supported wait call UvmEventWaitOnQueueHandles. 3059 // 3060 // This API reads constant data maintained in the queue state. Hence, 3061 // the user doesn't need to acquire a lock to protect the queue state. 3062 // 3063 // Arguments: 3064 // queueHandles: (INPUT) 3065 // array of queue handles. 3066 // 3067 // arraySize: (INPUT) 3068 // number of handles in array. 3069 // 3070 // notificationHandles: (OUTPUT) 3071 // Windows: Output of this call contains an array of 'windows event 3072 // handles' corresponding to the queue handles passes as input. 3073 // Linux: All queues belonging to the same process share the same 3074 // file descriptor(fd) for notification. If the user chooses to use 3075 // UvmEventGetNotificationHandles then he should check all queues 3076 // for new events (by calling UvmEventFetch) when notified on 3077 // the fd. 3078 // 3079 // Error codes: 3080 // NV_ERR_INVALID_ARGUMENT: 3081 // One of the arguments is invalid. 3082 // 3083 //------------------------------------------------------------------------------ 3084 NV_STATUS UvmEventGetNotificationHandles(UvmEventQueueHandle *queueHandleArray, 3085 unsigned arraySize, 3086 void **notificationHandleArray); 3087 3088 //------------------------------------------------------------------------------ 3089 // UvmEventGetGpuUuidTable 3090 // 3091 // Each migration event entry contains the gpu index to/from where data is 3092 // migrated. This index maps to a corresponding gpu UUID in the gpuUuidTable. 3093 // Using indices saves on the size of each event entry. This API provides the 3094 // gpuIndex to gpuUuid relation to the user. 3095 // 3096 // This API does not access the queue state maintained in the user 3097 // library and so the user doesn't need to acquire a lock to protect the 3098 // queue state. 3099 // 3100 // Arguments: 3101 // gpuUuidTable: (OUTPUT) 3102 // The return value is an array of UUIDs. The array index is the 3103 // corresponding gpuIndex. There can be at max 32 gpus associated with 3104 // UVM, so array size is 32. 3105 // 3106 // validCount: (OUTPUT) 3107 // The system doesn't normally contain 32 GPUs. This field gives the 3108 // count of entries that are valid in the returned gpuUuidTable. 3109 // 3110 // Error codes: 3111 // NV_ERR_BUSY_RETRY: 3112 // internal resources are blocked by other threads. 3113 // 3114 //------------------------------------------------------------------------------ 3115 NV_STATUS UvmEventGetGpuUuidTable(NvProcessorUuid *gpuUuidTable, 3116 unsigned *validCount); 3117 3118 //------------------------------------------------------------------------------ 3119 // UvmEventFetch 3120 // 3121 // This call is used to fetch the queue entries in a user buffer. 3122 // 3123 // This API updates the queue state. Hence simultaneous calls to fetch/skip 3124 // events should be avoided as that might corrupt the queue state. 3125 // 3126 // Arguments: 3127 // sessionHandle: (INPUT) 3128 // Handle to the debugging session. 3129 // 3130 // queueHandle: (INPUT) 3131 // queue from where to fetch the events. 3132 // 3133 // pBuffer: (OUTPUT) 3134 // Pointer to the buffer where the API will copy the events. User 3135 // shall ensure the size is enough. 3136 // 3137 // nEntries: (INPUT/OUTPUT) 3138 // It provides the maximum number of entries that will be fetched 3139 // from the queue. If this number is larger than the size of the 3140 // queue it will be internally capped to that value. 3141 // As output it returns the actual number of entries copies to the 3142 // buffer. 3143 // 3144 // Error codes: 3145 // RM_ERR_NOT_PERMITTED: 3146 // Function fails the security check. 3147 // 3148 // NV_ERR_INVALID_ARGUMENT: 3149 // One of the arguments is invalid. 3150 // 3151 // NV_ERR_INVALID_INDEX: 3152 // The indices of the queue have been corrupted. 3153 // 3154 // NV_ERR_BUFFER_TOO_SMALL: 3155 // The event queue buffer provided by the caller was too small to 3156 // contain all of the events that occurred during this run. 3157 // Events were therefore dropped (not recorded). 3158 // Please re-run with a larger buffer. 3159 // 3160 //------------------------------------------------------------------------------ 3161 NV_STATUS UvmEventFetch(UvmDebugSession sessionHandle, 3162 UvmEventQueueHandle queueHandle, 3163 UvmEventEntry *pBuffer, 3164 NvU64 *nEntries); 3165 3166 //------------------------------------------------------------------------------ 3167 // UvmEventSkipAll 3168 // 3169 // This API drops all event entries from the queue. 3170 // 3171 // This API updates the queue state. Hence simultaneous calls to fetch/ 3172 // skip events should be avoided as that might corrupt the queue state. 3173 // 3174 // Arguments: 3175 // sessionHandle: (INPUT) 3176 // Handle to the debugging session. 3177 // 3178 // queueHandle: (INPUT) 3179 // target queue. 3180 // 3181 // Error codes: 3182 // RM_ERR_NOT_PERMITTED: 3183 // Function fails the security check. 3184 // 3185 // NV_ERR_INVALID_ARGUMENT: 3186 // One of the arguments is invalid. 3187 // 3188 //------------------------------------------------------------------------------ 3189 NV_STATUS UvmEventSkipAll(UvmDebugSession sessionHandle, 3190 UvmEventQueueHandle queueHandle); 3191 3192 //------------------------------------------------------------------------------ 3193 // UvmEventQueryTimeStampType 3194 // 3195 // This API returns the type of time stamp used in an event entry for a given 3196 // queue. 3197 // 3198 // This API reads constant data maintained in the queue state. Hence, 3199 // the user doesn't need to acquire a lock to protect the queue state. 3200 // 3201 // Arguments: 3202 // sessionHandle: (INPUT) 3203 // Handle to the debugging session. 3204 // 3205 // queueHandle: (INPUT) 3206 // target queue. 3207 // 3208 // timeStampType: (OUTPUT) 3209 // type of time stamp used in event entry. See UvmEventTimestampType 3210 // for supported types of time stamps. 3211 // 3212 // Error codes: 3213 // RM_ERR_NOT_PERMITTED: 3214 // Function fails the security check. 3215 // 3216 // NV_ERR_INVALID_ARGUMENT: 3217 // One of the arguments is invalid. 3218 // 3219 //------------------------------------------------------------------------------ 3220 NV_STATUS UvmEventQueryTimeStampType(UvmDebugSession sessionHandle, 3221 UvmEventQueueHandle queueHandle, 3222 UvmEventTimeStampType *timeStampType); 3223 3224 //------------------------------------------------------------------------------ 3225 // UvmDebugAccessMemory 3226 // 3227 // This call can be used by the debugger to read/write memory range. UVM driver 3228 // may not be aware of all the pages in this range. A bit per page is set by the 3229 // driver if it is read/written by UVM. 3230 // 3231 // Arguments: 3232 // session: (INPUT) 3233 // Handle to the debugging session. 3234 // 3235 // baseAddress: (INPUT) 3236 // base address from where memory is to be accessed 3237 // 3238 // sizeInBytes: (INPUT) 3239 // Number of bytes to be accessed 3240 // 3241 // accessType: (INPUT) 3242 // Read or write access request 3243 // 3244 // buffer: (INPUT/OUTPUT) 3245 // This buffer would be read or written to by the driver. 3246 // User needs to allocate a big enough buffer to fit sizeInBytes. 3247 // 3248 // isBitmaskSet: (INPUT/OUTPUT) 3249 // Set to 1, if any field in bitmask is set 3250 // NULL(INPUT) if unused 3251 // 3252 // bitmask: (INPUT/OUTPUT) 3253 // One bit per page is set if UVM reads or writes to it. 3254 // User should allocate a bitmask big enough to fit one bit per page 3255 // covered by baseAddress + sizeInBytes: 3256 // (baseAlignmentBytes + sizeInBytes + pageSize - 1)/pageSize number 3257 // of bits. 3258 // NULL(IN) if unused. 3259 // 3260 // Error codes: 3261 // NV_ERR_INVALID_ARGUMENT: 3262 // One of the arguments is invalid. 3263 // 3264 //------------------------------------------------------------------------------ 3265 NV_STATUS UvmDebugAccessMemory(UvmDebugSession session, 3266 void *baseAddress, 3267 NvU64 sizeInBytes, 3268 UvmDebugAccessType accessType, 3269 void *buffer, 3270 NvBool *isBitmaskSet, 3271 NvU64 *bitmask); 3272 3273 // 3274 // Uvm Tools uvm API 3275 // 3276 3277 3278 //------------------------------------------------------------------------------ 3279 // UvmToolsCreateSession 3280 // 3281 // Creates a handle for a tools session. 3282 // 3283 // When the client initializes, it will pass a duplicated Uvm file handle from 3284 // target's process UvmGetFileDescriptor API, e.g. by calling DuplicateHandle, 3285 // dup2, share file descriptor over Unix Socket Domains. Returned session 3286 // handle is required to create other Tool's objects, e.g. events, counters. 3287 // 3288 // In order to guarantee that session persists the lifetime of a target process, 3289 // callee is responsible for passing a duplicate file descriptor. This is also 3290 // required for correctness in case of out-of-process session. 3291 // 3292 // Passing non duplicated Uvm file handle results in undefined behaviour. The 3293 // least that you should expect is that all your session related objects will 3294 // become useless once target process closes Uvm file handle. 3295 // 3296 // 3297 // There are security requirements for this call to be successful. Fortunately, 3298 // after validating a file descriptor, one of the following conditions must 3299 // hold: 3300 // 1. The session owner is running as an elevated user 3301 // 2. The session owner and target belong to the same user and the 3302 // session owner is at least as privileged as the target. 3303 // 3304 // Arguments: 3305 // fd: (INPUT) 3306 // Duplicated file handle from target process. 3307 // 3308 // session: (OUTPUT) 3309 // Handle to the tools session associated to fd above. 3310 // 3311 // Error codes: 3312 // NV_ERR_INVALID_ARGUMENT: 3313 // fd is either closed or points to non uvm device. 3314 // 3315 // NV_ERR_NO_MEMORY: 3316 // Internal memory allocation failed. 3317 // 3318 //------------------------------------------------------------------------------ 3319 NV_STATUS UvmToolsCreateSession(UvmFileDescriptor fd, 3320 UvmToolsSessionHandle *session); 3321 3322 //------------------------------------------------------------------------------ 3323 // UvmToolsDestroySession 3324 // 3325 // Destroys a tools session. This also has a side-effect of closing fd 3326 // associated with this session during UvmToolsCreateSession. 3327 // 3328 // All resources associated with this session (counters, event queues) are also 3329 // destroyed. 3330 // 3331 // Arguments: 3332 // session: (INPUT) 3333 // Handle associated with a Tool's session. 3334 // 3335 // Error codes: 3336 // NV_ERR_INVALID_ARGUMENT: 3337 // session handle does not refer to a valid session. 3338 // 3339 //------------------------------------------------------------------------------ 3340 NV_STATUS UvmToolsDestroySession(UvmToolsSessionHandle session); 3341 3342 // 3343 // Events subsystem 3344 // 3345 // Events subsystem is useful for a tools process to track target process 3346 // behaviour. Every event refers to a single process using Unified memory. 3347 // 3348 // The most typical use case is as follows: 3349 // 1. Create event Queue using UvmToolsCreateEventQueue 3350 // 2. Start capture of interesting event types using 3351 // UvmToolsEventQueueEnableEvents 3352 // 3. poll / Loop using Get/Put pointer 3353 // - Consume existing events from user's buffer 3354 // - exit loop based on some condition (e.g. timeout, target process exit) 3355 // - pause (Stop) capture of some of the events 3356 // 4. Destroy event Queue using UvmToolsDestroyEventQueue 3357 // 3358 3359 3360 NvLength UvmToolsGetEventControlSize(void); 3361 3362 NvLength UvmToolsGetEventEntrySize(void); 3363 3364 NvLength UvmToolsGetNumberOfCounters(void); 3365 3366 //------------------------------------------------------------------------------ 3367 // UvmToolsCreateEventQueue 3368 // 3369 // This call creates an event queue that can hold the given number of events. 3370 // All events are disabled by default. Event queue data persists lifetime of the 3371 // target process. 3372 // 3373 // Arguments: 3374 // session: (INPUT) 3375 // Handle to the tools session. 3376 // 3377 // event_buffer: (INPUT) 3378 // User allocated buffer. Must be page-aligned. Must be large enough to 3379 // hold at least event_buffer_size events. Gets pinned until queue is 3380 // destroyed. 3381 // 3382 // event_buffer_size: (INPUT) 3383 // Size of the event queue buffer in units of UvmEventEntry's. Must be 3384 // a power of two, and greater than 1. 3385 // 3386 // event_control (INPUT) 3387 // User allocated buffer. Must be page-aligned. Must be large enough to 3388 // hold UvmToolsEventControlData (although single page-size allocation 3389 // should be more than enough). One could call 3390 // UvmToolsGetEventControlSize() function to find out current size of 3391 // UvmToolsEventControlData. Gets pinned until queue is destroyed. 3392 // 3393 // queue: (OUTPUT) 3394 // Handle to the created queue. 3395 // 3396 // Error codes: 3397 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3398 // Session handle does not refer to a valid session 3399 // 3400 // NV_ERR_INVALID_ARGUMENT: 3401 // One of the parameters: event_buffer, event_buffer_size, event_control 3402 // is not valid 3403 // 3404 // NV_ERR_INSUFFICIENT_RESOURCES: 3405 // There could be multiple reasons for this error. One would be that it's 3406 // not possible to allocate a queue of requested size. Another would be 3407 // that either event_buffer or event_control memory couldn't be pinned 3408 // (e.g. because of OS limitation of pinnable memory). Also it could not 3409 // have been possible to create UvmToolsEventQueueDescriptor. 3410 // 3411 //------------------------------------------------------------------------------ 3412 NV_STATUS UvmToolsCreateEventQueue(UvmToolsSessionHandle session, 3413 void *event_buffer, 3414 NvLength event_buffer_size, 3415 void *event_control, 3416 UvmToolsEventQueueHandle *queue); 3417 3418 UvmToolsEventQueueDescriptor UvmToolsGetEventQueueDescriptor(UvmToolsEventQueueHandle queue); 3419 3420 3421 //------------------------------------------------------------------------------ 3422 // UvmToolsSetNotificationThreshold 3423 // 3424 // Sets a custom notification threshold in number of events for a given queue. 3425 // Polling subsystem will notify user about this queue if and only if number 3426 // of unconsumed events is greater or equal notification_threshold. Default 3427 // threshold upon creating an event queue is floor(N / 2), where N represents 3428 // maximum number of events that this queue can fit. 3429 // 3430 // Consequently, if notifications_threshold is greater than queue size, there 3431 // will be no notification. 3432 // 3433 // Arguments: 3434 // queue: (INPUT) 3435 // Handle to the queue, for which events are supposed to be enabled 3436 // 3437 // notification_threshold: (INPUT) 3438 // A new threshold, in number of events, to be set for this queue. 3439 // 3440 // Error codes: 3441 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3442 // Event Queue might be corrupted (associated session is not valid). 3443 // 3444 // NV_ERR_INVALID_ARGUMENT: 3445 // Queue handle does not refer to a valid queue. 3446 //------------------------------------------------------------------------------ 3447 NV_STATUS UvmToolsSetNotificationThreshold(UvmToolsEventQueueHandle queue, 3448 NvLength notification_threshold); 3449 3450 //------------------------------------------------------------------------------ 3451 // UvmToolsDestroyEventQueue 3452 // 3453 // Destroys all internal resources associated with the queue. It unpinns the 3454 // buffers provided in UvmToolsCreateEventQueue. Event Queue is also auto 3455 // destroyed when corresponding session gets destroyed. 3456 // 3457 // Arguments: 3458 // queue: (INPUT) 3459 // Handle to the queue to be destroyed 3460 // 3461 // Error codes: 3462 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3463 // Event Queue might be corrupted (associated session is not valid). 3464 // 3465 // NV_ERR_INVALID_ARGUMENT: 3466 // Queue handle does not refer to a valid queue. 3467 // 3468 //------------------------------------------------------------------------------ 3469 NV_STATUS UvmToolsDestroyEventQueue(UvmToolsEventQueueHandle queue); 3470 3471 //------------------------------------------------------------------------------ 3472 // UvmEventQueueEnableEvents 3473 // 3474 // This call enables a particular event type in the event queue. All events are 3475 // disabled by default. Any event type is considered listed if and only if it's 3476 // corresponding value is equal to 1 (in other words, bit is set). Disabled 3477 // events listed in eventTypeFlags are going to be enabled. Enabled events and 3478 // events not listed in eventTypeFlags are not affected by this call. 3479 // 3480 // It is not an error to call this function multiple times with the same 3481 // arguments. 3482 // 3483 // Arguments: 3484 // queue: (INPUT) 3485 // Handle to the queue, for which events are supposed to be enabled 3486 // 3487 // eventTypeFlags: (INPUT) 3488 // This bit field specifies the event types to be enabled. Events not 3489 // specified in this field do not change their state. For example to 3490 // enable migration and memory violations events pass flags 3491 // "UVM_EVENT_ENABLE_MEMORY_VIOLATION | UVM_EVENT_ENABLE_MIGRATION" 3492 // 3493 // Error codes: 3494 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3495 // Event Queue might be corrupted (associated session is not valid). 3496 // 3497 // NV_ERR_INVALID_ARGUMENT: 3498 // Queue handle does not refer to a valid queue. 3499 // 3500 //------------------------------------------------------------------------------ 3501 NV_STATUS UvmToolsEventQueueEnableEvents(UvmToolsEventQueueHandle queue, 3502 NvU64 eventTypeFlags); 3503 3504 //------------------------------------------------------------------------------ 3505 // UvmToolsEventQueueDisableEvents 3506 // 3507 // This call disables a particular event type in the event queue. Any event type 3508 // is considered listed if and only if it's corresponding value is equal to 1 3509 // (in other words, bit is set). Enabled events listed in eventTypeFlags are 3510 // going to be disabled. Disabled events and events not listed in eventTypeFlags 3511 // are not affected by this call. 3512 // 3513 // It is not an error to call this function multiple times with the same 3514 // arguments. 3515 // 3516 // Arguments: 3517 // queue: (INPUT) 3518 // Handle to the queue, for which events are supposed to be enabled 3519 // 3520 // eventTypeFlags: (INPUT) 3521 // This bit field specifies the event types to be disabled. Events not 3522 // specified in this field do not change their state. For example to 3523 // disable migration and memory violations events pass flags 3524 // "UVM_EVENT_ENABLE_MEMORY_VIOLATION | UVM_EVENT_ENABLE_MIGRATION" 3525 // 3526 // Error codes: 3527 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3528 // Event Queue might be corrupted (associated session is not valid). 3529 // 3530 // NV_ERR_INVALID_ARGUMENT: 3531 // Queue handle does not refer to a valid event queue. 3532 // 3533 //------------------------------------------------------------------------------ 3534 NV_STATUS UvmToolsEventQueueDisableEvents(UvmToolsEventQueueHandle queue, 3535 NvU64 eventTypeFlags); 3536 3537 3538 //------------------------------------------------------------------------------ 3539 // UvmToolsCreateProcessAggregateCounters 3540 // 3541 // Creates the counters structure for tracking aggregate process counters. 3542 // These counters are enabled by default. 3543 // 3544 // Counters position follows the layout of the memory that UVM driver decides to 3545 // use. To obtain particular counter value, user should perform consecutive 3546 // atomic reads at a a given buffer + offset address. 3547 // 3548 // It is not defined what is the initial value of a counter. User should rely on 3549 // a difference between each snapshot. 3550 // 3551 // Arguments: 3552 // session: (INPUT) 3553 // Handle to the tools session. 3554 // 3555 // counters_buffer : (INPUT) 3556 // User allocated buffer. Must be aligned to the OS's page aligned. Must 3557 // be large enough to hold all possible counter types. In practice, 4kB 3558 // system page (minimal granurality) should be sufficent. This memory 3559 // gets pinned until counters are destroyed. 3560 // 3561 // counters: (OUTPUT) 3562 // Handle to the created counters. 3563 // 3564 // Error codes: 3565 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3566 // Provided session is not valid 3567 // 3568 // NV_ERR_INSUFFICIENT_RESOURCES 3569 // There could be multiple reasons for this error. One would be that it's 3570 // not possible to allocate counters structure. Another would be that 3571 // either event_buffer or event_control memory couldn't be pinned 3572 // (e.g. because of OS limitation of pinnable memory) 3573 // 3574 //------------------------------------------------------------------------------ 3575 NV_STATUS UvmToolsCreateProcessAggregateCounters(UvmToolsSessionHandle session, 3576 void *counters_buffer, 3577 UvmToolsCountersHandle *counters); 3578 3579 //------------------------------------------------------------------------------ 3580 // UvmToolsCreateProcessorCounters 3581 // 3582 // Creates the counters structure for tracking per-process counters. 3583 // These counters are disabled by default. 3584 // 3585 // Counters position follows the layout of the memory that UVM driver decides to 3586 // use. To obtain particular counter value, user should perform consecutive 3587 // atomic reads at a a given buffer + offset address. 3588 // 3589 // It is not defined what is the initial value of a counter. User should rely on 3590 // a difference between each snapshot. 3591 // 3592 // Arguments: 3593 // session: (INPUT) 3594 // Handle to the tools session. 3595 // 3596 // counters_buffer : (INPUT) 3597 // User allocated buffer. Must be aligned to the OS's page aligned. Must 3598 // be large enough to hold all possible counter types. In practice, 4kB 3599 // system page should be sufficent. This memory gets pinned until 3600 // counters are destroyed. 3601 // 3602 // processorUuid: (INPUT) 3603 // UUID of the resource, for which counters will provide statistic data. 3604 // 3605 // counters: (OUTPUT) 3606 // Handle to the created counters. 3607 // 3608 // Error codes: 3609 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3610 // session handle does not refer to a valid tools session 3611 // 3612 // NV_ERR_INSUFFICIENT_RESOURCES 3613 // There could be multiple reasons for this error. One would be that it's 3614 // not possible to allocate counters structure. Another would be that 3615 // either event_buffer or event_control memory couldn't be pinned 3616 // (e.g. because of OS limitation of pinnable memory) 3617 // 3618 // NV_ERR_INVALID_ARGUMENT 3619 // processorUuid does not refer to any known resource in UVM driver 3620 // 3621 //------------------------------------------------------------------------------ 3622 NV_STATUS UvmToolsCreateProcessorCounters(UvmToolsSessionHandle session, 3623 void *counters_buffer, 3624 const NvProcessorUuid *processorUuid, 3625 UvmToolsCountersHandle *counters); 3626 3627 //------------------------------------------------------------------------------ 3628 // UvmToolsDestroyCounters 3629 // 3630 // Destroys all internal resources associated with this counters structure. 3631 // It unpinns the buffer provided in UvmToolsCreate*Counters. Counters structure 3632 // also gest destroyed when corresponding session is destroyed. 3633 // 3634 // Arguments: 3635 // counters: (INPUT) 3636 // Handle to the counters structure. 3637 // 3638 // Error codes: 3639 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3640 // State of the counters has been corrupted. 3641 // 3642 // NV_ERR_INVALID_ARGUMENT: 3643 // Counters handle does not refer to a valid Counters structure. 3644 // 3645 //------------------------------------------------------------------------------ 3646 NV_STATUS UvmToolsDestroyCounters(UvmToolsCountersHandle counters); 3647 3648 //------------------------------------------------------------------------------ 3649 // UvmToolsEnableCounters 3650 // 3651 // This call enables certain counter types in the counters structure. Any 3652 // counter type is considered listed if and only if it's corresponding value is 3653 // equal to 1 (in other words, bit is set). Disabled counter types listed in 3654 // counterTypeFlags are going to be enabled. Already enabled counter types and 3655 // counter types not listed in counterTypeFlags are not affected by this call. 3656 // 3657 // It is not an error to call this function multiple times with the same 3658 // arguments. 3659 // 3660 // Arguments: 3661 // counters: (INPUT) 3662 // Handle to the counters structure. 3663 // 3664 // counterTypeFlags: (INPUT) 3665 // This bit field specifies the counter types to be enabled. 3666 // For example, to enable faults number accounting and number of bytes 3667 // transferred into a given resource (or aggregate) pass flags 3668 // "UVM_COUNTER_ENABLE_FAULTS_NUMBER | 3669 // UVM_COUNTER_ENABLE_BYTES_TRANSFERRED_IN" 3670 // 3671 // Error codes: 3672 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3673 // Counters structure mighe be corrupted (associated session is not 3674 // valid). 3675 // 3676 // NV_ERR_INVALID_ARGUMENT: 3677 // Counters handle does not refer to a valid counters structure. 3678 //------------------------------------------------------------------------------ 3679 NV_STATUS UvmToolsEnableCounters(UvmToolsCountersHandle counters, 3680 NvU64 counterTypeFlags); 3681 3682 //------------------------------------------------------------------------------ 3683 // UvmToolsDisableCounters 3684 // 3685 // This call disables certain counter types in the counters structure. Any 3686 // counter type is considered listed if and only if it's corresponding value is 3687 // equal to 1 (in other words, bit is set). Enabled counter types listed in 3688 // counterTypeFlags are going to be disabled. Already disabled counter types and 3689 // counter types not listed in counterTypeFlags are not affected by this call. 3690 // 3691 // It is not an error to call this function multiple times with the same 3692 // arguments. 3693 // 3694 // Arguments: 3695 // counters: (INPUT) 3696 // Handle to the counters structure. 3697 // 3698 // counterTypeFlags: (INPUT) 3699 // This bit field specifies the counter types to be disabled. 3700 // For example, to disable faults number accounting and number of bytes 3701 // transferred into a given resource (or aggregate) pass flags 3702 // "UVM_COUNTER_ENABLE_FAULTS_NUMBER | 3703 // UVM_COUNTER_ENABLE_BYTES_TRANSFERRED_IN" 3704 // 3705 // Error codes: 3706 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3707 // Counters structure mighe be corrupted (associated session is not 3708 // valid). 3709 // 3710 // NV_ERR_INVALID_ARGUMENT: 3711 // Counters handle does not refer to a valid counters structure. 3712 //------------------------------------------------------------------------------ 3713 NV_STATUS UvmToolsDisableCounters(UvmToolsCountersHandle counters, 3714 NvU64 counterTypeFlags); 3715 3716 //------------------------------------------------------------------------------ 3717 // UvmToolsReadProcessMemory 3718 // 3719 // Reads up to size bytes from a given target process's virtual address. 3720 // If size is 0, function should successfully return size of the largest size 3721 // that can be read starting at a given target process's virtual memory. This 3722 // might be used to discover size of user's allocation. 3723 // 3724 // Upon successful execution and size greater than 0, user should have a copy of 3725 // target's process memory in a given buffer. Result is unspecified in case of 3726 // In-process scenario when targetVa address + size overlaps with buffer + size. 3727 // 3728 // This is essentially a UVM version of RM ctrl call 3729 // NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more 3730 // information), please refer to the documentation: 3731 // //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx 3732 // 3733 // Arguments: 3734 // session: (INPUT) 3735 // Handle to the tools session. 3736 // 3737 // buffer: (INPUT) 3738 // User buffer (destination) address, where requested memory shall be 3739 // copied. 3740 // 3741 // size: (INPUT) 3742 // Number of bytes requested to be copied. If user's buffer is not large 3743 // enough to fit size bytes, result is unspecified. If this is 0, 3744 // function should return largest chunk of memory available to read. 3745 // 3746 // targetVa: (INPUT) 3747 // Target process's (source) address, from which memory should be 3748 // copied. 3749 // 3750 // bytes_read: (OUTPUT) 3751 // Either number of bytes successfully read or the largest chunk of 3752 // memory available to read, depending on size parameter. 3753 // 3754 // Error codes: 3755 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3756 // session handle does not refer to a valid tools session 3757 // 3758 // NV_ERR_INVALID_ADDRESS: 3759 // UVM driver has no knowledge of targetVa address. 3760 // 3761 // NV_ERR_INVALID_ARGUMENT: 3762 // Read spans more than a single target process allocation. 3763 // 3764 // 3765 //------------------------------------------------------------------------------ 3766 NV_STATUS UvmToolsReadProcessMemory(UvmToolsSessionHandle session, 3767 void *buffer, 3768 NvLength size, 3769 void *targetVa, 3770 NvLength *bytes_read); 3771 3772 //------------------------------------------------------------------------------ 3773 // UvmToolsWriteProcessMemory 3774 // 3775 // Writes up to size bytes from a given target process's virtual address. 3776 // If size is 0, function should successfully return size of the largest size 3777 // that can be written starting at a given target process's virtual address. 3778 // This might be used to discover size of user's allocation. 3779 // 3780 // Upon successful execution and size greater than 0, target process should have 3781 // a copy of buffer starting at targetVa address. Result is unspecified in case 3782 // of In-process scenario when targetVa address + size overlaps with 3783 // buffer + size. 3784 // 3785 // This is essentially a UVM version of RM ctrl call 3786 // NV83DE_CTRL_CMD_DEBUG_READ_MEMORY. For implementation constraints (and more 3787 // information), please refer to the documentation: 3788 // //sw/docs/resman/components/compute/UVM/subsystems/UVM_8_Tools_API_Design.docx 3789 // 3790 // Arguments: 3791 // session: (INPUT) 3792 // Handle to the tools session. 3793 // 3794 // buffer: (INPUT) 3795 // User buffer (source) address, from which requested memory shall be 3796 // copied. 3797 // 3798 // size: (INPUT) 3799 // Number of bytes requested to be copied. If user's buffer is not large 3800 // enough to fit size bytes, result is unspecified. If this is 0, 3801 // function should return largest chunk of memory available to write. 3802 // 3803 // targetVa: (INPUT) 3804 // Target process's (destination) address, where memory should be 3805 // copied. 3806 // 3807 // bytes_read: (OUTPUT) 3808 // Either number of bytes successfully written or the largest chunk of 3809 // memory available to write, depending on size parameter. 3810 // 3811 // Error codes: 3812 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3813 // session handle does not refer to a valid tools session 3814 // 3815 // NV_ERR_INVALID_ADDRESS: 3816 // UVM driver has no knowledge of targetVa address. 3817 // 3818 // NV_ERR_INVALID_ARGUMENT: 3819 // Write spans more than a single target process allocation. 3820 // 3821 //------------------------------------------------------------------------------ 3822 NV_STATUS UvmToolsWriteProcessMemory(UvmToolsSessionHandle session, 3823 void *buffer, 3824 NvLength size, 3825 void *targetVa, 3826 NvLength *bytes_read); 3827 3828 //------------------------------------------------------------------------------ 3829 // UvmToolsGetProcessorUuidTable 3830 // 3831 // Populate a table with the UUIDs of all the currently registered processors 3832 // in the target process. When a GPU is registered, it is added to the table. 3833 // When a GPU is unregistered, it is removed. As long as a GPU remains registered, 3834 // its index in the table does not change. New registrations obtain the first 3835 // unused index. 3836 // 3837 // Arguments: 3838 // session: (INPUT) 3839 // Handle to the tools session. 3840 // 3841 // table: (OUTPUT) 3842 // Array of processor UUIDs, including the CPU's UUID which is always 3843 // at index zero. The srcIndex and dstIndex fields of the 3844 // UvmEventMigrationInfo struct index this array. Unused indices will 3845 // have a UUID of zero. 3846 // 3847 // count: (OUTPUT) 3848 // Set by UVM to the number of UUIDs written, including any gaps in 3849 // the table due to unregistered GPUs. 3850 // 3851 // Error codes: 3852 // NV_ERR_INVALID_ADDRESS: 3853 // writing to table failed. 3854 //------------------------------------------------------------------------------ 3855 NV_STATUS UvmToolsGetProcessorUuidTable(UvmToolsSessionHandle session, 3856 NvProcessorUuid *table, 3857 NvLength *count); 3858 3859 //------------------------------------------------------------------------------ 3860 // UvmToolsFlushEvents 3861 // 3862 // Some events, like migrations, which have end timestamps are not immediately 3863 // submitted to queues when they are completed. This call enqueues any 3864 // completed but unenqueued events associated with the session. 3865 // 3866 // Arguments: 3867 // session: (INPUT) 3868 // Handle to the tools session. 3869 // 3870 // Error codes: 3871 // NV_ERR_INSUFFICIENT_PERMISSIONS: 3872 // Session handle does not refer to a valid session 3873 //------------------------------------------------------------------------------ 3874 NV_STATUS UvmToolsFlushEvents(UvmToolsSessionHandle session); 3875 3876 #ifdef __cplusplus 3877 } 3878 #endif 3879 3880 #endif // _UVM_H_ 3881