1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2014-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 // 25 // This file provides common types for both UVM driver and RM's UVM interface. 26 // 27 28 #ifndef _NV_UVM_TYPES_H_ 29 #define _NV_UVM_TYPES_H_ 30 31 #include "nvtypes.h" 32 #include "nvstatus.h" 33 #include "nvgputypes.h" 34 #include "nvCpuUuid.h" 35 36 37 // 38 // Default Page Size if left "0" because in RM BIG page size is default & there 39 // are multiple BIG page sizes in RM. These defines are used as flags to "0" 40 // should be OK when user is not sure which pagesize allocation it wants 41 // 42 #define UVM_PAGE_SIZE_DEFAULT 0x0 43 #define UVM_PAGE_SIZE_4K 0x1000 44 #define UVM_PAGE_SIZE_64K 0x10000 45 #define UVM_PAGE_SIZE_128K 0x20000 46 #define UVM_PAGE_SIZE_2M 0x200000 47 #define UVM_PAGE_SIZE_512M 0x20000000 48 49 // 50 // When modifying flags, make sure they are compatible with the mirrored 51 // PMA_* flags in phys_mem_allocator.h. 52 // 53 // Input flags 54 #define UVM_PMA_ALLOCATE_DONT_EVICT NVBIT(0) 55 #define UVM_PMA_ALLOCATE_PINNED NVBIT(1) 56 #define UVM_PMA_ALLOCATE_SPECIFY_MINIMUM_SPEED NVBIT(2) 57 #define UVM_PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE NVBIT(3) 58 #define UVM_PMA_ALLOCATE_SPECIFY_REGION_ID NVBIT(4) 59 #define UVM_PMA_ALLOCATE_PREFER_SLOWEST NVBIT(5) 60 #define UVM_PMA_ALLOCATE_CONTIGUOUS NVBIT(6) 61 #define UVM_PMA_ALLOCATE_PERSISTENT NVBIT(7) 62 #define UVM_PMA_ALLOCATE_PROTECTED_REGION NVBIT(8) 63 #define UVM_PMA_ALLOCATE_FORCE_ALIGNMENT NVBIT(9) 64 #define UVM_PMA_ALLOCATE_NO_ZERO NVBIT(10) 65 #define UVM_PMA_ALLOCATE_TURN_BLACKLIST_OFF NVBIT(11) 66 #define UVM_PMA_ALLOCATE_ALLOW_PARTIAL NVBIT(12) 67 68 // Output flags 69 #define UVM_PMA_ALLOCATE_RESULT_IS_ZERO NVBIT(0) 70 71 // Input flags to pmaFree 72 #define UVM_PMA_FREE_IS_ZERO NVBIT(0) 73 74 // 75 // Indicate that the PMA operation is being done from one of the PMA eviction 76 // callbacks. 77 // 78 // Notably this flag is currently used only by the UVM/RM interface and not 79 // mirrored in PMA. 80 // 81 #define UVM_PMA_CALLED_FROM_PMA_EVICTION 16384 82 83 #define UVM_UUID_LEN 16 84 #define UVM_SW_OBJ_SUBCHANNEL 5 85 86 typedef unsigned long long UvmGpuPointer; 87 88 // 89 // The following typedefs serve to explain the resources they point to. 90 // The actual resources remain RM internal and not exposed. 91 // 92 typedef struct uvmGpuSession_tag *uvmGpuSessionHandle; // gpuSessionHandle 93 typedef struct uvmGpuDevice_tag *uvmGpuDeviceHandle; // gpuDeviceHandle 94 typedef struct uvmGpuAddressSpace_tag *uvmGpuAddressSpaceHandle; // gpuAddressSpaceHandle 95 typedef struct uvmGpuTsg_tag *uvmGpuTsgHandle; // gpuTsgHandle 96 typedef struct uvmGpuChannel_tag *uvmGpuChannelHandle; // gpuChannelHandle 97 typedef struct uvmGpuCopyEngine_tag *uvmGpuCopyEngineHandle; // gpuObjectHandle 98 99 typedef struct UvmGpuMemoryInfo_tag 100 { 101 // Out: Memory layout. 102 NvU32 kind; 103 104 // Out: Set to TRUE, if the allocation is in sysmem. 105 NvBool sysmem; 106 107 // Out: Set to TRUE, if the allocation is a constructed 108 // under a Device or Subdevice. 109 // All permutations of sysmem and deviceDescendant are valid. 110 // !sysmem && !deviceDescendant implies a fabric allocation. 111 NvBool deviceDescendant; 112 113 // Out: Page size associated with the phys alloc. 114 NvU64 pageSize; 115 116 // Out: Set to TRUE, if the allocation is contiguous. 117 NvBool contig; 118 119 // Out: Starting Addr if the allocation is contiguous. 120 // This is only valid if contig is NV_TRUE. 121 NvU64 physAddr; 122 123 // Out: Total size of the allocation. 124 NvU64 size; 125 126 // Out: Uuid of the GPU to which the allocation belongs. 127 // This is only valid if deviceDescendant is NV_TRUE. 128 // Note: If the allocation is owned by a device in 129 // an SLI group and the allocation is broadcast 130 // across the SLI group, this UUID will be any one 131 // of the subdevices in the SLI group. 132 NvProcessorUuid uuid; 133 } UvmGpuMemoryInfo; 134 135 // Some resources must share the same virtual mappings across channels. A mapped 136 // resource must be shared by a channel iff: 137 // 138 // 1) The channel belongs to a TSG (UvmGpuChannelInstanceInfo::bTsgChannel is 139 // NV_TRUE). 140 // 141 // 2) The channel is in the same TSG as all other channels sharing that mapping 142 // (UvmGpuChannelInstanceInfo::tsgId matches among channels). 143 // 144 // 3) The channel is in the same GPU address space as the other channels 145 // sharing that mapping. 146 // 147 // 4) The resource handle(s) match those of the shared mapping 148 // (UvmGpuChannelResourceInfo::resourceDescriptor and 149 // UvmGpuChannelResourceInfo::resourceId). 150 typedef struct UvmGpuChannelResourceInfo_tag 151 { 152 // Out: Ptr to the RM memDesc of the channel resource. 153 NvP64 resourceDescriptor; 154 155 // Out: RM ID of the channel resource. 156 NvU32 resourceId; 157 158 // Out: Alignment needed for the resource allocation. 159 NvU64 alignment; 160 161 // Out: Info about the resource allocation. 162 UvmGpuMemoryInfo resourceInfo; 163 } UvmGpuChannelResourceInfo; 164 165 typedef struct UvmGpuPagingChannelInfo_tag 166 { 167 // Pointer to a shadown buffer mirroring the contents of the error notifier 168 // for the paging channel 169 NvNotification *shadowErrorNotifier; 170 } UvmGpuPagingChannelInfo; 171 172 typedef enum 173 { 174 UVM_GPU_CHANNEL_ENGINE_TYPE_GR = 1, 175 UVM_GPU_CHANNEL_ENGINE_TYPE_CE = 2, 176 UVM_GPU_CHANNEL_ENGINE_TYPE_SEC2 = 3, 177 } UVM_GPU_CHANNEL_ENGINE_TYPE; 178 179 #define UVM_GPU_CHANNEL_MAX_RESOURCES 13 180 181 typedef struct UvmGpuChannelInstanceInfo_tag 182 { 183 // Out: Starting address of the channel instance. 184 NvU64 base; 185 186 // Out: Set to NV_TRUE, if the instance is in sysmem. 187 // Set to NV_FALSE, if the instance is in vidmem. 188 NvBool sysmem; 189 190 // Out: Hardware runlist ID. 191 NvU32 runlistId; 192 193 // Out: Hardware channel ID. 194 NvU32 chId; 195 196 // Out: NV_TRUE if the channel belongs to a subcontext or NV_FALSE if it 197 // belongs to a regular context. 198 NvBool bInSubctx; 199 200 // Out: ID of the subcontext to which the channel belongs. 201 NvU32 subctxId; 202 203 // Out: Whether the channel belongs to a TSG or not 204 NvBool bTsgChannel; 205 206 // Out: ID of the TSG to which the channel belongs 207 NvU32 tsgId; 208 209 // Out: Maximum number of subcontexts in the TSG to which the channel belongs 210 NvU32 tsgMaxSubctxCount; 211 212 // Out: Info of channel resources associated with the channel. 213 UvmGpuChannelResourceInfo resourceInfo[UVM_GPU_CHANNEL_MAX_RESOURCES]; 214 215 // Out: Number of valid entries in resourceInfo array. 216 NvU32 resourceCount; 217 218 // Out: Type of the engine the channel is bound to 219 NvU32 channelEngineType; 220 221 // Out: Channel handle to be used in the CLEAR_FAULTED method 222 NvU32 clearFaultedToken; 223 224 // Out: Address of the NV_CHRAM_CHANNEL register required to clear the 225 // ENG_FAULTED/PBDMA_FAULTED bits after servicing non-replayable faults on 226 // Ampere+ GPUs 227 volatile NvU32 *pChramChannelRegister; 228 229 // Out: Address of the Runlist PRI Base Register required to ring the 230 // doorbell after clearing the faulted bit. 231 volatile NvU32 *pRunlistPRIBaseRegister; 232 233 // Out: SMC engine id to which the GR channel is bound, or zero if the GPU 234 // does not support SMC or it is a CE channel 235 NvU32 smcEngineId; 236 237 // Out: Start of the VEID range assigned to the SMC engine the GR channel 238 // is bound to, or zero if the GPU does not support SMC or it is a CE 239 // channel 240 NvU32 smcEngineVeIdOffset; 241 } UvmGpuChannelInstanceInfo; 242 243 typedef struct UvmGpuChannelResourceBindParams_tag 244 { 245 // In: RM ID of the channel resource. 246 NvU32 resourceId; 247 248 // In: Starting VA at which the channel resource is mapped. 249 NvU64 resourceVa; 250 } UvmGpuChannelResourceBindParams; 251 252 typedef struct UvmGpuChannelInfo_tag 253 { 254 volatile unsigned *gpGet; 255 volatile unsigned *gpPut; 256 UvmGpuPointer *gpFifoEntries; 257 unsigned numGpFifoEntries; 258 unsigned channelClassNum; 259 260 // The errorNotifier is filled out when the channel hits an RC error. 261 NvNotification *errorNotifier; 262 263 NvU32 hwRunlistId; 264 NvU32 hwChannelId; 265 266 volatile unsigned *dummyBar1Mapping; 267 268 // These values are filled by nvUvmInterfaceCopyEngineAlloc. The work 269 // submission token requires the channel to be bound to a runlist and that 270 // happens after CE allocation. 271 volatile NvU32 *workSubmissionOffset; 272 273 // To be deprecated. See pWorkSubmissionToken below. 274 NvU32 workSubmissionToken; 275 276 // 277 // This is the memory location where the most recently updated work 278 // submission token for this channel will be written to. After submitting 279 // new work and updating GP_PUT with the appropriate fence, the token must 280 // be read from this location before writing it to the workSubmissionOffset 281 // to kick off the new work. 282 // 283 volatile NvU32 *pWorkSubmissionToken; 284 285 // GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing 286 // so a channel can be controlled via another channel (SEC2 or WLC/LCIC) 287 NvU64 gpFifoGpuVa; 288 NvU64 gpPutGpuVa; 289 NvU64 gpGetGpuVa; 290 // GPU VA of work submission offset is needed in Confidential Computing 291 // so CE channels can ring doorbell of other channels as required for 292 // WLC/LCIC work submission 293 NvU64 workSubmissionOffsetGpuVa; 294 } UvmGpuChannelInfo; 295 296 typedef enum 297 { 298 // This value must be passed by Pascal and pre-Pascal GPUs for those 299 // allocations for which a specific location cannot be enforced. 300 UVM_BUFFER_LOCATION_DEFAULT = 0, 301 302 UVM_BUFFER_LOCATION_SYS = 1, 303 UVM_BUFFER_LOCATION_VID = 2, 304 } UVM_BUFFER_LOCATION; 305 306 typedef struct UvmGpuTsgAllocParams_tag 307 { 308 // Interpreted as UVM_GPU_CHANNEL_ENGINE_TYPE 309 NvU32 engineType; 310 311 // Index of the engine the TSG is bound to. 312 // Ignored if engineType is anything other than 313 // UVM_GPU_CHANNEL_ENGINE_TYPE_CE. 314 NvU32 engineIndex; 315 } UvmGpuTsgAllocParams; 316 317 typedef struct UvmGpuChannelAllocParams_tag 318 { 319 NvU32 numGpFifoEntries; 320 321 // The next two fields store UVM_BUFFER_LOCATION values 322 NvU32 gpFifoLoc; 323 NvU32 gpPutLoc; 324 } UvmGpuChannelAllocParams; 325 326 typedef struct UvmGpuPagingChannelAllocParams_tag 327 { 328 // Index of the LCE engine the channel will be bound to, a zero-based offset 329 // from NV2080_ENGINE_TYPE_COPY0. 330 NvU32 engineIndex; 331 } UvmGpuPagingChannelAllocParams; 332 333 // The max number of Copy Engines supported by a GPU. 334 // The gpu ops build has a static assert that this is the correct number. 335 #define UVM_COPY_ENGINE_COUNT_MAX 10 336 337 typedef struct 338 { 339 // True if the CE is supported at all 340 NvBool supported:1; 341 342 // True if the CE is synchronous with GR 343 NvBool grce:1; 344 345 // True if the CE shares physical CEs with any other CE 346 // 347 // The value returned by RM for this field may change when a GPU is 348 // registered with RM for the first time, so UVM needs to query it 349 // again each time a GPU is registered. 350 NvBool shared:1; 351 352 // True if the CE can give enhanced performance for SYSMEM reads over other CEs 353 NvBool sysmemRead:1; 354 355 // True if the CE can give enhanced performance for SYSMEM writes over other CEs 356 NvBool sysmemWrite:1; 357 358 // True if the CE can be used for SYSMEM transactions 359 NvBool sysmem:1; 360 361 // True if the CE can be used for P2P transactions using NVLINK 362 NvBool nvlinkP2p:1; 363 364 // True if the CE can be used for P2P transactions 365 NvBool p2p:1; 366 367 // Mask of physical CEs assigned to this LCE 368 // 369 // The value returned by RM for this field may change when a GPU is 370 // registered with RM for the first time, so UVM needs to query it 371 // again each time a GPU is registered. 372 NvU32 cePceMask; 373 } UvmGpuCopyEngineCaps; 374 375 typedef struct UvmGpuCopyEnginesCaps_tag 376 { 377 // Supported CEs may not be contiguous 378 UvmGpuCopyEngineCaps copyEngineCaps[UVM_COPY_ENGINE_COUNT_MAX]; 379 } UvmGpuCopyEnginesCaps; 380 381 typedef enum 382 { 383 UVM_LINK_TYPE_NONE, 384 UVM_LINK_TYPE_PCIE, 385 UVM_LINK_TYPE_NVLINK_1, 386 UVM_LINK_TYPE_NVLINK_2, 387 UVM_LINK_TYPE_NVLINK_3, 388 UVM_LINK_TYPE_NVLINK_4, 389 UVM_LINK_TYPE_C2C, 390 } UVM_LINK_TYPE; 391 392 typedef struct UvmGpuCaps_tag 393 { 394 // If numaEnabled is NV_TRUE, then the system address of allocated GPU 395 // memory can be converted to struct pages. See 396 // UvmGpuInfo::systemMemoryWindowStart. 397 NvBool numaEnabled; 398 NvU32 numaNodeId; 399 } UvmGpuCaps; 400 401 typedef struct UvmGpuAddressSpaceInfo_tag 402 { 403 NvU64 bigPageSize; 404 405 NvBool atsEnabled; 406 407 // Mapped registers that contain the current GPU time 408 volatile NvU32 *time0Offset; 409 volatile NvU32 *time1Offset; 410 411 // Maximum number of subcontexts supported under this GPU address space 412 NvU32 maxSubctxCount; 413 414 NvBool smcEnabled; 415 416 NvU32 smcSwizzId; 417 418 NvU32 smcGpcCount; 419 } UvmGpuAddressSpaceInfo; 420 421 typedef struct UvmGpuAllocInfo_tag 422 { 423 NvU64 gpuPhysOffset; // Returns gpuPhysOffset if contiguous requested 424 NvU64 pageSize; // default is RM big page size - 64K or 128 K" else use 4K or 2M 425 NvU64 alignment; // Virtual alignment 426 NvBool bContiguousPhysAlloc; // Flag to request contiguous physical allocation 427 NvBool bMemGrowsDown; // Causes RM to reserve physical heap from top of FB 428 NvBool bPersistentVidmem; // Causes RM to allocate persistent video memory 429 NvHandle hPhysHandle; // Handle for phys allocation either provided or retrieved 430 NvBool bUnprotected; // Allocation to be made in unprotected memory whenever 431 // SEV or GPU CC modes are enabled. Ignored otherwise 432 } UvmGpuAllocInfo; 433 434 typedef enum 435 { 436 UVM_VIRT_MODE_NONE = 0, // Baremetal or passthrough virtualization 437 UVM_VIRT_MODE_LEGACY = 1, // Virtualization without SRIOV support 438 UVM_VIRT_MODE_SRIOV_HEAVY = 2, // Virtualization with SRIOV Heavy configured 439 UVM_VIRT_MODE_SRIOV_STANDARD = 3, // Virtualization with SRIOV Standard configured 440 UVM_VIRT_MODE_COUNT = 4, 441 } UVM_VIRT_MODE; 442 443 // !!! The following enums (with UvmRm prefix) are defined and documented in 444 // mm/uvm/interface/uvm_types.h and must be mirrored. Please refer to that file 445 // for more details. 446 447 // UVM GPU mapping types 448 typedef enum 449 { 450 UvmRmGpuMappingTypeDefault = 0, 451 UvmRmGpuMappingTypeReadWriteAtomic = 1, 452 UvmRmGpuMappingTypeReadWrite = 2, 453 UvmRmGpuMappingTypeReadOnly = 3, 454 UvmRmGpuMappingTypeCount = 4 455 } UvmRmGpuMappingType; 456 457 // UVM GPU caching types 458 typedef enum 459 { 460 UvmRmGpuCachingTypeDefault = 0, 461 UvmRmGpuCachingTypeForceUncached = 1, 462 UvmRmGpuCachingTypeForceCached = 2, 463 UvmRmGpuCachingTypeCount = 3 464 } UvmRmGpuCachingType; 465 466 // UVM GPU format types 467 typedef enum { 468 UvmRmGpuFormatTypeDefault = 0, 469 UvmRmGpuFormatTypeBlockLinear = 1, 470 UvmRmGpuFormatTypeCount = 2 471 } UvmRmGpuFormatType; 472 473 // UVM GPU Element bits types 474 typedef enum { 475 UvmRmGpuFormatElementBitsDefault = 0, 476 UvmRmGpuFormatElementBits8 = 1, 477 UvmRmGpuFormatElementBits16 = 2, 478 // Cuda does not support 24-bit width 479 UvmRmGpuFormatElementBits32 = 4, 480 UvmRmGpuFormatElementBits64 = 5, 481 UvmRmGpuFormatElementBits128 = 6, 482 UvmRmGpuFormatElementBitsCount = 7 483 } UvmRmGpuFormatElementBits; 484 485 // UVM GPU Compression types 486 typedef enum { 487 UvmRmGpuCompressionTypeDefault = 0, 488 UvmRmGpuCompressionTypeEnabledNoPlc = 1, 489 UvmRmGpuCompressionTypeCount = 2 490 } UvmRmGpuCompressionType; 491 492 typedef struct UvmGpuExternalMappingInfo_tag 493 { 494 // In: GPU caching ability. 495 UvmRmGpuCachingType cachingType; 496 497 // In: Virtual permissions. 498 UvmRmGpuMappingType mappingType; 499 500 // In: RM virtual mapping memory format 501 UvmRmGpuFormatType formatType; 502 503 // In: RM virtual mapping element bits 504 UvmRmGpuFormatElementBits elementBits; 505 506 // In: RM virtual compression type 507 UvmRmGpuCompressionType compressionType; 508 509 // In: Size of the buffer to store PTEs (in bytes). 510 NvU64 pteBufferSize; 511 512 // In: Page size for mapping 513 // If this field is passed as 0, the page size 514 // of the allocation is used for mapping. 515 // nvUvmInterfaceGetExternalAllocPtes must pass 516 // this field as zero. 517 NvU64 mappingPageSize; 518 519 // In: Pointer to a buffer to store PTEs. 520 // Out: The interface will fill the buffer with PTEs 521 NvU64 *pteBuffer; 522 523 // Out: Number of PTEs filled in to the buffer. 524 NvU64 numWrittenPtes; 525 526 // Out: Number of PTEs remaining to be filled 527 // if the buffer is not sufficient to accommodate 528 // requested PTEs. 529 NvU64 numRemainingPtes; 530 531 // Out: PTE size (in bytes) 532 NvU32 pteSize; 533 } UvmGpuExternalMappingInfo; 534 535 typedef struct UvmGpuP2PCapsParams_tag 536 { 537 // Out: peerId[i] contains gpu[i]'s peer id of gpu[1 - i]. Only defined if 538 // the GPUs are direct peers. 539 NvU32 peerIds[2]; 540 541 // Out: UVM_LINK_TYPE 542 NvU32 p2pLink; 543 544 // Out: optimalNvlinkWriteCEs[i] contains gpu[i]'s optimal CE for writing to 545 // gpu[1 - i]. The CE indexes are valid only if the GPUs are NVLink peers. 546 // 547 // The value returned by RM for this field may change when a GPU is 548 // registered with RM for the first time, so UVM needs to query it again 549 // each time a GPU is registered. 550 NvU32 optimalNvlinkWriteCEs[2]; 551 552 // Out: Maximum unidirectional bandwidth between the peers in megabytes per 553 // second, not taking into account the protocols overhead. The reported 554 // bandwidth for indirect peers is zero. 555 NvU32 totalLinkLineRateMBps; 556 557 // Out: True if the peers have a indirect link to communicate. On P9 558 // systems, this is true if peers are connected to different NPUs that 559 // forward the requests between them. 560 NvU32 indirectAccess : 1; 561 } UvmGpuP2PCapsParams; 562 563 // Platform-wide information 564 typedef struct UvmPlatformInfo_tag 565 { 566 // Out: ATS (Address Translation Services) is supported 567 NvBool atsSupported; 568 569 // Out: AMD SEV (Secure Encrypted Virtualization) is enabled 570 NvBool sevEnabled; 571 } UvmPlatformInfo; 572 573 typedef struct UvmGpuClientInfo_tag 574 { 575 NvHandle hClient; 576 577 NvHandle hSmcPartRef; 578 } UvmGpuClientInfo; 579 580 typedef enum 581 { 582 UVM_GPU_CONF_COMPUTE_MODE_NONE, 583 UVM_GPU_CONF_COMPUTE_MODE_APM, 584 UVM_GPU_CONF_COMPUTE_MODE_HCC, 585 UVM_GPU_CONF_COMPUTE_MODE_COUNT 586 } UvmGpuConfComputeMode; 587 588 typedef struct UvmGpuConfComputeCaps_tag 589 { 590 // Out: GPU's confidential compute mode 591 UvmGpuConfComputeMode mode; 592 } UvmGpuConfComputeCaps; 593 594 #define UVM_GPU_NAME_LENGTH 0x40 595 596 typedef struct UvmGpuInfo_tag 597 { 598 // Printable gpu name 599 char name[UVM_GPU_NAME_LENGTH]; 600 601 // Uuid of this gpu 602 NvProcessorUuid uuid; 603 604 // Gpu architecture; NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_* 605 NvU32 gpuArch; 606 607 // Gpu implementation; NV2080_CTRL_MC_ARCH_INFO_IMPLEMENTATION_* 608 NvU32 gpuImplementation; 609 610 // Host (gpfifo) class; *_CHANNEL_GPFIFO_*, e.g. KEPLER_CHANNEL_GPFIFO_A 611 NvU32 hostClass; 612 613 // Copy engine (dma) class; *_DMA_COPY_*, e.g. KEPLER_DMA_COPY_A 614 NvU32 ceClass; 615 616 // Compute class; *_COMPUTE_*, e.g. KEPLER_COMPUTE_A 617 NvU32 computeClass; 618 619 // Set if GPU supports TCC Mode & is in TCC mode. 620 NvBool gpuInTcc; 621 622 // Number of subdevices in SLI group. 623 NvU32 subdeviceCount; 624 625 // Virtualization mode of this gpu. 626 NvU32 virtMode; // UVM_VIRT_MODE 627 628 // NV_TRUE if this is a simulated/emulated GPU. NV_FALSE, otherwise. 629 NvBool isSimulated; 630 631 // Number of GPCs 632 // If SMC is enabled, this is the currently configured number of GPCs for 633 // the given partition (also see the smcSwizzId field below). 634 NvU32 gpcCount; 635 636 // Maximum number of GPCs; NV_SCAL_LITTER_NUM_GPCS 637 // This number is independent of the partition configuration, and can be 638 // used to conservatively size GPU-global constructs. 639 NvU32 maxGpcCount; 640 641 // Number of TPCs 642 NvU32 tpcCount; 643 644 // Maximum number of TPCs per GPC 645 NvU32 maxTpcPerGpcCount; 646 647 // NV_TRUE if SMC is enabled on this GPU. 648 NvBool smcEnabled; 649 650 // SMC partition ID (unique per GPU); note: valid when first looked up in 651 // nvUvmInterfaceGetGpuInfo(), but not guaranteed to remain valid. 652 // nvUvmInterfaceDeviceCreate() re-verifies the swizzId and fails if it is 653 // no longer valid. 654 NvU32 smcSwizzId; 655 656 UvmGpuClientInfo smcUserClientInfo; 657 658 // Confidential Compute capabilities of this GPU 659 UvmGpuConfComputeCaps gpuConfComputeCaps; 660 661 // UVM_LINK_TYPE 662 NvU32 sysmemLink; 663 664 // See UvmGpuP2PCapsParams::totalLinkLineRateMBps 665 NvU32 sysmemLinkRateMBps; 666 667 // On coherent systems each GPU maps its memory to a window in the System 668 // Physical Address (SPA) space. The following fields describe that window. 669 // 670 // systemMemoryWindowSize > 0 indicates that the window is valid. meaning 671 // that GPU memory can be mapped by the CPU as cache-coherent by adding the 672 // GPU address to the window start. 673 NvU64 systemMemoryWindowStart; 674 NvU64 systemMemoryWindowSize; 675 676 // This tells if the GPU is connected to NVSwitch. On systems with NVSwitch 677 // all GPUs are connected to it. If connectedToSwitch is NV_TRUE, 678 // nvswitchMemoryWindowStart tells the base address for the GPU in the 679 // NVSwitch address space. It is used when creating PTEs of memory mappings 680 // to NVSwitch peers. 681 NvBool connectedToSwitch; 682 NvU64 nvswitchMemoryWindowStart; 683 } UvmGpuInfo; 684 685 typedef struct UvmGpuFbInfo_tag 686 { 687 // Max physical address that can be allocated by UVM. This excludes internal 688 // RM regions that are not registered with PMA either. 689 NvU64 maxAllocatableAddress; 690 691 NvU32 heapSize; // RAM in KB available for user allocations 692 NvU32 reservedHeapSize; // RAM in KB reserved for internal RM allocation 693 NvBool bZeroFb; // Zero FB mode enabled. 694 } UvmGpuFbInfo; 695 696 typedef struct UvmGpuEccInfo_tag 697 { 698 unsigned eccMask; 699 unsigned eccOffset; 700 void *eccReadLocation; 701 NvBool *eccErrorNotifier; 702 NvBool bEccEnabled; 703 } UvmGpuEccInfo; 704 705 typedef struct UvmPmaAllocationOptions_tag 706 { 707 NvU32 flags; 708 NvU32 minimumSpeed; // valid if flags & UVM_PMA_ALLOCATE_SPECIFY_MININUM_SPEED 709 NvU64 physBegin, physEnd; // valid if flags & UVM_PMA_ALLOCATE_SPECIFY_ADDRESS_RANGE 710 NvU32 regionId; // valid if flags & UVM_PMA_ALLOCATE_SPECIFY_REGION_ID 711 NvU64 alignment; // valid if flags & UVM_PMA_ALLOCATE_FORCE_ALIGNMENT 712 NvLength numPagesAllocated; // valid if flags & UVM_PMA_ALLOCATE_ALLOW_PARTIAL 713 714 NvU32 resultFlags; // valid if the allocation function returns NV_OK 715 } UvmPmaAllocationOptions; 716 717 // 718 // Mirrored in PMA (PMA_STATS) 719 // 720 typedef struct UvmPmaStatistics_tag 721 { 722 volatile NvU64 numPages2m; // PMA-wide 2MB pages count across all regions 723 volatile NvU64 numFreePages64k; // PMA-wide free 64KB page count across all regions 724 volatile NvU64 numFreePages2m; // PMA-wide free 2MB pages count across all regions 725 volatile NvU64 numPages2mProtected; // PMA-wide 2MB pages count in protected memory 726 volatile NvU64 numFreePages64kProtected; // PMA-wide free 64KB page count in protected memory 727 volatile NvU64 numFreePages2mProtected; // PMA-wide free 2MB pages count in protected memory 728 } UvmPmaStatistics; 729 730 /******************************************************************************* 731 uvmEventSuspend 732 This function will be called by the GPU driver to signal to UVM that the 733 system is about to enter a sleep state. When it is called, the 734 following assumptions/guarantees are valid/made: 735 736 * User channels have been preempted and disabled 737 * UVM channels are still running normally and will continue to do 738 so until after this function returns control 739 * User threads are still running, but can no longer issue system 740 system calls to the GPU driver 741 * Until exit from this function, UVM is allowed to make full use of 742 the GPUs under its control, as well as of the GPU driver 743 744 Upon return from this function, UVM may not access GPUs under its control 745 until the GPU driver calls uvmEventResume(). It may still receive 746 calls to uvmEventIsrTopHalf() during this time, which it should return 747 NV_ERR_NO_INTR_PENDING from. It will not receive any other calls. 748 */ 749 typedef NV_STATUS (*uvmEventSuspend_t) (void); 750 751 /******************************************************************************* 752 uvmEventResume 753 This function will be called by the GPU driver to signal to UVM that the 754 system has exited a previously entered sleep state. When it is called, 755 the following assumptions/guarantees are valid/made: 756 757 * UVM is again allowed to make full use of the GPUs under its 758 control, as well as of the GPU driver 759 * UVM channels are running normally 760 * User channels are still preempted and disabled 761 * User threads are again running, but still cannot issue system 762 calls to the GPU driver, nor submit new work 763 764 Upon return from this function, UVM is expected to be fully functional. 765 */ 766 typedef NV_STATUS (*uvmEventResume_t) (void); 767 768 /******************************************************************************* 769 uvmEventStartDevice 770 This function will be called by the GPU driver once it has finished its 771 initialization to tell the UVM driver that this GPU has come up. 772 */ 773 typedef NV_STATUS (*uvmEventStartDevice_t) (const NvProcessorUuid *pGpuUuidStruct); 774 775 /******************************************************************************* 776 uvmEventStopDevice 777 This function will be called by the GPU driver to let UVM know that a GPU 778 is going down. 779 */ 780 typedef NV_STATUS (*uvmEventStopDevice_t) (const NvProcessorUuid *pGpuUuidStruct); 781 782 /******************************************************************************* 783 uvmEventIsrTopHalf_t 784 This function will be called by the GPU driver to let UVM know 785 that an interrupt has occurred. 786 787 Returns: 788 NV_OK if the UVM driver handled the interrupt 789 NV_ERR_NO_INTR_PENDING if the interrupt is not for the UVM driver 790 */ 791 #if defined (__linux__) 792 typedef NV_STATUS (*uvmEventIsrTopHalf_t) (const NvProcessorUuid *pGpuUuidStruct); 793 #else 794 typedef void (*uvmEventIsrTopHalf_t) (void); 795 #endif 796 797 struct UvmOpsUvmEvents 798 { 799 uvmEventSuspend_t suspend; 800 uvmEventResume_t resume; 801 uvmEventStartDevice_t startDevice; 802 uvmEventStopDevice_t stopDevice; 803 uvmEventIsrTopHalf_t isrTopHalf; 804 }; 805 806 #define UVM_CSL_SIGN_AUTH_TAG_SIZE_BYTES 32 807 #define UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES 16 808 809 typedef union UvmFaultMetadataPacket_tag 810 { 811 struct { 812 NvU8 authTag[UVM_CSL_CRYPT_AUTH_TAG_SIZE_BYTES]; 813 NvBool valid; 814 }; 815 // padding to 32Bytes 816 NvU8 _padding[32]; 817 } UvmFaultMetadataPacket; 818 819 // This struct shall not be accessed nor modified directly by UVM as it is 820 // entirely managed by the RM layer 821 typedef struct UvmCslContext_tag 822 { 823 struct ccslContext_t *ctx; 824 void *nvidia_stack; 825 } UvmCslContext; 826 827 typedef struct UvmGpuFaultInfo_tag 828 { 829 struct 830 { 831 // Fault buffer GET register mapping. 832 // 833 // When Confidential Computing is enabled, GET refers to the shadow 834 // buffer (see bufferAddress below), and not to the actual HW buffer. 835 // In this setup, writes of GET (by UVM) do not result on re-evaluation 836 // of any interrupt condition. 837 volatile NvU32* pFaultBufferGet; 838 839 // Fault buffer PUT register mapping. 840 // 841 // When Confidential Computing is enabled, PUT refers to the shadow 842 // buffer (see bufferAddress below), and not to the actual HW buffer. 843 // In this setup, writes of PUT (by GSP-RM) do not result on 844 // re-evaluation of any interrupt condition. 845 volatile NvU32* pFaultBufferPut; 846 847 // Note: this variable is deprecated since buffer overflow is not a 848 // separate register from future chips. 849 volatile NvU32* pFaultBufferInfo; 850 851 // Register mapping used to clear a replayable fault interrupt in 852 // Turing+ GPUs. 853 volatile NvU32* pPmcIntr; 854 855 // Register mapping used to enable replayable fault interrupts. 856 volatile NvU32* pPmcIntrEnSet; 857 858 // Register mapping used to disable replayable fault interrupts. 859 volatile NvU32* pPmcIntrEnClear; 860 861 // Register used to enable, or disable, faults on prefetches. 862 volatile NvU32* pPrefetchCtrl; 863 864 // Replayable fault interrupt mask identifier. 865 NvU32 replayableFaultMask; 866 867 // Fault buffer CPU mapping 868 void* bufferAddress; 869 // 870 // When Confidential Computing is disabled, the mapping points to the 871 // actual HW fault buffer. 872 // 873 // When Confidential Computing is enabled, the mapping points to a 874 // copy of the HW fault buffer. This "shadow buffer" is maintained 875 // by GSP-RM. 876 877 // Size, in bytes, of the fault buffer pointed by bufferAddress. 878 NvU32 bufferSize; 879 // Mapping pointing to the start of the fault buffer metadata containing 880 // a 16Byte authentication tag and a valid byte. Always NULL when 881 // Confidential Computing is disabled. 882 UvmFaultMetadataPacket *bufferMetadata; 883 884 // CSL context used for performing decryption of replayable faults when 885 // Confidential Computing is enabled. 886 UvmCslContext cslCtx; 887 888 // Indicates whether UVM owns the replayable fault buffer. 889 // The value of this field is always NV_TRUE When Confidential Computing 890 // is disabled. 891 NvBool bUvmOwnsHwFaultBuffer; 892 } replayable; 893 struct 894 { 895 // Shadow buffer for non-replayable faults on cpu memory. Resman copies 896 // here the non-replayable faults that need to be handled by UVM 897 void* shadowBufferAddress; 898 899 // Execution context for the queue associated with the fault buffer 900 void* shadowBufferContext; 901 902 // Fault buffer size 903 NvU32 bufferSize; 904 905 // Preallocated stack for functions called from the UVM isr top half 906 void *isr_sp; 907 908 // Preallocated stack for functions called from the UVM isr bottom half 909 void *isr_bh_sp; 910 911 // Used only when Hopper Confidential Compute is enabled 912 // Register mappings obtained from RM 913 volatile NvU32* pFaultBufferPut; 914 915 // Used only when Hopper Confidential Compute is enabled 916 // Cached get index of the non-replayable shadow buffer 917 NvU32 shadowBufferGet; 918 919 // See replayable.bufferMetadata 920 UvmFaultMetadataPacket *shadowBufferMetadata; 921 } nonReplayable; 922 NvHandle faultBufferHandle; 923 struct Device *pDevice; 924 } UvmGpuFaultInfo; 925 926 struct Device; 927 928 typedef struct UvmGpuPagingChannel_tag 929 { 930 struct gpuDevice *device; 931 NvNotification *errorNotifier; 932 NvHandle channelHandle; 933 NvHandle errorNotifierHandle; 934 void *pushStreamSp; 935 struct Device *pDevice; 936 } UvmGpuPagingChannel, *UvmGpuPagingChannelHandle; 937 938 typedef struct UvmGpuAccessCntrInfo_tag 939 { 940 // Register mappings obtained from RM 941 // pointer to the Get register for the access counter buffer 942 volatile NvU32* pAccessCntrBufferGet; 943 // pointer to the Put register for the access counter buffer 944 volatile NvU32* pAccessCntrBufferPut; 945 // pointer to the Full register for the access counter buffer 946 volatile NvU32* pAccessCntrBufferFull; 947 // pointer to the hub interrupt 948 volatile NvU32* pHubIntr; 949 // pointer to interrupt enable register 950 volatile NvU32* pHubIntrEnSet; 951 // pointer to interrupt disable register 952 volatile NvU32* pHubIntrEnClear; 953 // mask for the access counter buffer 954 NvU32 accessCounterMask; 955 // access counter buffer cpu mapping and size 956 void* bufferAddress; 957 NvU32 bufferSize; 958 NvHandle accessCntrBufferHandle; 959 } UvmGpuAccessCntrInfo; 960 961 typedef enum 962 { 963 UVM_ACCESS_COUNTER_GRANULARITY_64K = 1, 964 UVM_ACCESS_COUNTER_GRANULARITY_2M = 2, 965 UVM_ACCESS_COUNTER_GRANULARITY_16M = 3, 966 UVM_ACCESS_COUNTER_GRANULARITY_16G = 4, 967 } UVM_ACCESS_COUNTER_GRANULARITY; 968 969 typedef enum 970 { 971 UVM_ACCESS_COUNTER_USE_LIMIT_NONE = 1, 972 UVM_ACCESS_COUNTER_USE_LIMIT_QTR = 2, 973 UVM_ACCESS_COUNTER_USE_LIMIT_HALF = 3, 974 UVM_ACCESS_COUNTER_USE_LIMIT_FULL = 4, 975 } UVM_ACCESS_COUNTER_USE_LIMIT; 976 977 typedef struct UvmGpuAccessCntrConfig_tag 978 { 979 NvU32 mimcGranularity; 980 981 NvU32 momcGranularity; 982 983 NvU32 mimcUseLimit; 984 985 NvU32 momcUseLimit; 986 987 NvU32 threshold; 988 } UvmGpuAccessCntrConfig; 989 990 // 991 // When modifying this enum, make sure they are compatible with the mirrored 992 // MEMORY_PROTECTION enum in phys_mem_allocator.h. 993 // 994 typedef enum UvmPmaGpuMemoryType_tag 995 { 996 UVM_PMA_GPU_MEMORY_TYPE_UNPROTECTED = 0, 997 UVM_PMA_GPU_MEMORY_TYPE_PROTECTED = 1 998 } UVM_PMA_GPU_MEMORY_TYPE; 999 1000 typedef UvmGpuChannelInfo gpuChannelInfo; 1001 typedef UvmGpuTsgAllocParams gpuTsgAllocParams; 1002 typedef UvmGpuChannelAllocParams gpuChannelAllocParams; 1003 typedef UvmGpuCaps gpuCaps; 1004 typedef UvmGpuCopyEngineCaps gpuCeCaps; 1005 typedef UvmGpuCopyEnginesCaps gpuCesCaps; 1006 typedef UvmGpuP2PCapsParams getP2PCapsParams; 1007 typedef UvmGpuAddressSpaceInfo gpuAddressSpaceInfo; 1008 typedef UvmGpuAllocInfo gpuAllocInfo; 1009 typedef UvmGpuInfo gpuInfo; 1010 typedef UvmGpuClientInfo gpuClientInfo; 1011 typedef UvmGpuAccessCntrInfo gpuAccessCntrInfo; 1012 typedef UvmGpuAccessCntrConfig gpuAccessCntrConfig; 1013 typedef UvmGpuFaultInfo gpuFaultInfo; 1014 typedef UvmGpuMemoryInfo gpuMemoryInfo; 1015 typedef UvmGpuExternalMappingInfo gpuExternalMappingInfo; 1016 typedef UvmGpuChannelResourceInfo gpuChannelResourceInfo; 1017 typedef UvmGpuChannelInstanceInfo gpuChannelInstanceInfo; 1018 typedef UvmGpuChannelResourceBindParams gpuChannelResourceBindParams; 1019 typedef UvmGpuFbInfo gpuFbInfo; 1020 typedef UvmGpuEccInfo gpuEccInfo; 1021 typedef UvmGpuPagingChannel *gpuPagingChannelHandle; 1022 typedef UvmGpuPagingChannelInfo gpuPagingChannelInfo; 1023 typedef UvmGpuPagingChannelAllocParams gpuPagingChannelAllocParams; 1024 typedef UvmPmaAllocationOptions gpuPmaAllocationOptions; 1025 1026 typedef struct UvmCslIv 1027 { 1028 NvU8 iv[12]; 1029 NvU8 fresh; 1030 } UvmCslIv; 1031 1032 typedef enum UvmCslOperation 1033 { 1034 UVM_CSL_OPERATION_ENCRYPT, 1035 UVM_CSL_OPERATION_DECRYPT 1036 } UvmCslOperation; 1037 1038 #endif // _NV_UVM_TYPES_H_ 1039