1 /******************************************************************************* 2 Copyright (c) 2013-2023 NVidia Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 *******************************************************************************/ 22 23 // 24 // uvm_types.h 25 // 26 // This file contains basic datatypes that UVM requires. 27 // 28 29 #ifndef _UVM_TYPES_H_ 30 #define _UVM_TYPES_H_ 31 32 #include "nvlimits.h" 33 #include "nvtypes.h" 34 #include "nvstatus.h" 35 #include "nvCpuUuid.h" 36 37 38 /******************************************************************************* 39 UVM stream types 40 *******************************************************************************/ 41 42 typedef enum 43 { 44 UvmStreamTypeRegular = 0, 45 UvmStreamTypeAll = 1, 46 UvmStreamTypeNone = 2 47 } UvmStreamType; 48 49 #define UVM_STREAM_INVALID ((UvmStream)0ULL) 50 #define UVM_STREAM_ALL ((UvmStream)2ULL) 51 #define UVM_STREAM_NONE ((UvmStream)3ULL) 52 53 typedef unsigned long long UvmStream; 54 55 // The maximum number of GPUs changed when multiple MIG instances per 56 // uvm_parent_gpu_t were added. See UvmEventQueueCreate(). 57 #define UVM_MAX_GPUS_V1 NV_MAX_DEVICES 58 #define UVM_MAX_PROCESSORS_V1 (UVM_MAX_GPUS_V1 + 1) 59 #define UVM_MAX_GPUS_V2 (NV_MAX_DEVICES * NV_MAX_SUBDEVICES) 60 #define UVM_MAX_PROCESSORS_V2 (UVM_MAX_GPUS_V2 + 1) 61 62 // For backward compatibility: 63 // TODO: Bug 4465348: remove these after replacing old references. 64 #define UVM_MAX_GPUS UVM_MAX_GPUS_V1 65 #define UVM_MAX_PROCESSORS UVM_MAX_PROCESSORS_V1 66 67 #define UVM_PROCESSOR_MASK_SIZE ((UVM_MAX_PROCESSORS_V2 + (sizeof(NvU64) * 8) - 1) / (sizeof(NvU64) * 8)) 68 69 #define UVM_INIT_FLAGS_DISABLE_HMM ((NvU64)0x1) 70 #define UVM_INIT_FLAGS_MULTI_PROCESS_SHARING_MODE ((NvU64)0x2) 71 #define UVM_INIT_FLAGS_MASK ((NvU64)0x3) 72 73 #define UVM_RANGE_GROUP_ID_NONE ((NvU64)0) 74 75 //------------------------------------------------------------------------------ 76 // UVM GPU mapping types 77 // 78 // These types indicate the kinds of accesses allowed from a given GPU at the 79 // specified virtual address range. There are 3 basic kinds of accesses: read, 80 // write and atomics. Each type indicates what kinds of accesses are allowed. 81 // Accesses of any disallowed kind are fatal. The "Default" type specifies that 82 // the UVM driver should decide on the types of accesses allowed. 83 //------------------------------------------------------------------------------ 84 typedef enum 85 { 86 UvmGpuMappingTypeDefault = 0, 87 UvmGpuMappingTypeReadWriteAtomic = 1, 88 UvmGpuMappingTypeReadWrite = 2, 89 UvmGpuMappingTypeReadOnly = 3, 90 UvmGpuMappingTypeCount = 4 91 } UvmGpuMappingType; 92 93 //------------------------------------------------------------------------------ 94 // UVM GPU caching types 95 // 96 // These types indicate the cacheability of the specified virtual address range 97 // from a given GPU. The "Default" type specifies that the UVM driver should 98 // set caching on or off as required to follow the UVM coherence model. The 99 // "ForceUncached" and "ForceCached" types will always turn caching off or on 100 // respectively. These two types override the cacheability specified by the UVM 101 // coherence model. 102 //------------------------------------------------------------------------------ 103 typedef enum 104 { 105 UvmGpuCachingTypeDefault = 0, 106 UvmGpuCachingTypeForceUncached = 1, 107 UvmGpuCachingTypeForceCached = 2, 108 UvmGpuCachingTypeCount = 3 109 } UvmGpuCachingType; 110 111 //------------------------------------------------------------------------------ 112 // UVM GPU format types 113 // 114 // These types indicate the memory format of the specified virtual address 115 // range for a given GPU. The "Default" type specifies that the UVM driver will 116 // detect the format based on the allocation and is mutually inclusive with 117 // UvmGpuFormatElementBitsDefault. 118 //------------------------------------------------------------------------------ 119 typedef enum { 120 UvmGpuFormatTypeDefault = 0, 121 UvmGpuFormatTypeBlockLinear = 1, 122 UvmGpuFormatTypeCount = 2 123 } UvmGpuFormatType; 124 125 //------------------------------------------------------------------------------ 126 // UVM GPU Element bits types 127 // 128 // These types indicate the element size of the specified virtual address range 129 // for a given GPU. The "Default" type specifies that the UVM driver will 130 // detect the element size based on the allocation and is mutually inclusive 131 // with UvmGpuFormatTypeDefault. The element size is specified in bits: 132 // UvmGpuFormatElementBits8 uses the 8-bits format. 133 //------------------------------------------------------------------------------ 134 typedef enum { 135 UvmGpuFormatElementBitsDefault = 0, 136 UvmGpuFormatElementBits8 = 1, 137 UvmGpuFormatElementBits16 = 2, 138 // Cuda does not support 24-bit width 139 UvmGpuFormatElementBits32 = 4, 140 UvmGpuFormatElementBits64 = 5, 141 UvmGpuFormatElementBits128 = 6, 142 UvmGpuFormatElementBitsCount = 7 143 } UvmGpuFormatElementBits; 144 145 //------------------------------------------------------------------------------ 146 // UVM GPU Compression types 147 // 148 // These types indicate the compression type of the specified virtual address 149 // range for a given GPU. The "Default" type specifies that the UVM driver will 150 // detect the compression attributes based on the allocation. Any type other 151 // than the default will override the compression behavior of the physical 152 // allocation. UvmGpuCompressionTypeEnabledNoPlc will disable PLC but enables 153 // generic compression. UvmGpuCompressionTypeEnabledNoPlc type is only supported 154 // on Turing plus GPUs. Since UvmGpuCompressionTypeEnabledNoPlc type enables 155 // generic compression, it can only be used when the compression attribute of 156 // the underlying physical allocation is enabled. 157 //------------------------------------------------------------------------------ 158 typedef enum { 159 UvmGpuCompressionTypeDefault = 0, 160 UvmGpuCompressionTypeEnabledNoPlc = 1, 161 UvmGpuCompressionTypeCount = 2 162 } UvmGpuCompressionType; 163 164 typedef struct 165 { 166 // UUID of the physical GPU if the GPU is not SMC capable or SMC enabled, 167 // or the GPU instance UUID of the partition. 168 NvProcessorUuid gpuUuid; 169 NvU32 gpuMappingType; // UvmGpuMappingType 170 NvU32 gpuCachingType; // UvmGpuCachingType 171 NvU32 gpuFormatType; // UvmGpuFormatType 172 NvU32 gpuElementBits; // UvmGpuFormatElementBits 173 NvU32 gpuCompressionType; // UvmGpuCompressionType 174 } UvmGpuMappingAttributes; 175 176 // forward declaration of OS-dependent structure 177 struct UvmGlobalState_tag; 178 179 // Platform specific parameters for UvmRegisterGpu* 180 typedef union 181 { 182 struct { 183 // File descriptor for RM's control file 184 int ctrlFd; 185 // RM client handle 186 NvHandle hClient; 187 // RM SMC partition reference 188 NvHandle hSmcPartRef; 189 } rm_linux; 190 } UvmGpuPlatformParams; 191 192 // Platform specific parameters for UvmRegisterGpuVaSpace 193 typedef union 194 { 195 struct { 196 // File descriptor for RM's control file 197 int ctrlFd; 198 // RM client handle 199 NvHandle hClient; 200 // RM GPU VA space handle 201 NvHandle hVaSpace; 202 } rm_linux; 203 struct { 204 // RM client handle 205 NvHandle hClient; 206 // RM GPU VA space handle 207 NvHandle hVaSpace; 208 } rm_windows; 209 } UvmGpuVaSpacePlatformParams; 210 211 // Platform specific parameters for UvmRegisterChannel and UvmUnregisterChannel 212 typedef union 213 { 214 struct { 215 // File descriptor for RM's control file 216 int ctrlFd; 217 // RM client handle 218 NvHandle hClient; 219 // RM channel handle 220 NvHandle hChannel; 221 } rm_linux; 222 } UvmChannelPlatformParams; 223 224 // Platform specific parameters for UvmMapExternalAllocation 225 typedef union 226 { 227 struct { 228 // File descriptor for RM's control file 229 int ctrlFd; 230 // RM client handle 231 NvHandle hClient; 232 // RM allocation handle 233 NvHandle hMemory; 234 } rm_linux; 235 } UvmAllocationPlatformParams; 236 237 //------------------------------------------------------------------------------ 238 // Tools API types 239 //------------------------------------------------------------------------------ 240 241 #define UVM_DEBUG_V1 0x00000001 242 243 typedef NvUPtr UvmDebugSession; 244 245 //------------------------------------------------------------------------------ 246 // Counter scope: It can be one of the following: 247 // - Single GPU for a process (UvmCounterScopeProcessSingleGpu) 248 // - Aggregate of all GPUs for a process (UvmCounterScopeProcessAllGpu) 249 // - Single GPU system-wide (UvmCounterScopeGlobalSingleGpu) 250 // (UvmCounterScopeGlobalSingleGpu is not supported for CUDA 6.0) 251 // 252 // Note: The user must not assume that the counter values are equal to zero 253 // at the time of enabling counters. 254 // Difference between end state counter value and start state counter value 255 // should be used to find out the correct value over a given period of time. 256 //------------------------------------------------------------------------------ 257 typedef enum 258 { 259 UvmCounterScopeProcessSingleGpu = 0, 260 UvmCounterScopeProcessAllGpu = 1, 261 UvmCounterScopeGlobalSingleGpu = 2, 262 UvmCounterScopeSize 263 } UvmCounterScope; 264 265 //------------------------------------------------------------------------------ 266 // Following numbers assigned to the counter name are used to index their value 267 // in the counter array. 268 //------------------------------------------------------------------------------ 269 typedef enum 270 { 271 UvmCounterNameBytesXferHtD = 0, // host to device 272 UvmCounterNameBytesXferDtH = 1, // device to host 273 UvmCounterNameCpuPageFaultCount = 2, 274 #ifdef __windows__ 275 UvmCounterNameWddmBytesXferBtH = 3, // backing store to host 276 UvmCounterNameWddmBytesXferHtB = 4, // host to backing store 277 // 278 // eviction (device to backing store) 279 // 280 UvmCounterNameWddmBytesXferDtB = 5, 281 // 282 // restoration (backing store to device) 283 // 284 UvmCounterNameWddmBytesXferBtD = 6, 285 #endif 286 // 287 // bytes prefetched host to device. 288 // These bytes are also counted in 289 // UvmCounterNameBytesXferHtD 290 // 291 UvmCounterNamePrefetchBytesXferHtD = 7, 292 // 293 // bytes prefetched device to host. 294 // These bytes are also counted in 295 // UvmCounterNameBytesXferDtH 296 // 297 UvmCounterNamePrefetchBytesXferDtH = 8, 298 // 299 // number of faults reported on the GPU 300 // 301 UvmCounterNameGpuPageFaultCount = 9, 302 UVM_TOTAL_COUNTERS 303 } UvmCounterName; 304 305 #define UVM_COUNTER_NAME_FLAG_BYTES_XFER_HTD 0x1 306 #define UVM_COUNTER_NAME_FLAG_BYTES_XFER_DTH 0x2 307 #define UVM_COUNTER_NAME_FLAG_CPU_PAGE_FAULT_COUNT 0x4 308 #define UVM_COUNTER_NAME_FLAG_WDDM_BYTES_XFER_BTH 0x8 309 #define UVM_COUNTER_NAME_FLAG_WDDM_BYTES_XFER_HTB 0x10 310 #define UVM_COUNTER_NAME_FLAG_BYTES_XFER_DTB 0x20 311 #define UVM_COUNTER_NAME_FLAG_BYTES_XFER_BTD 0x40 312 #define UVM_COUNTER_NAME_FLAG_PREFETCH_BYTES_XFER_HTD 0x80 313 #define UVM_COUNTER_NAME_FLAG_PREFETCH_BYTES_XFER_DTH 0x100 314 #define UVM_COUNTER_NAME_FLAG_GPU_PAGE_FAULT_COUNT 0x200 315 316 //------------------------------------------------------------------------------ 317 // UVM counter config structure 318 // 319 // - scope: Please see the UvmCounterScope enum (above), for details. 320 // - name: Name of the counter. Please check UvmCounterName for list. 321 // - gpuid: Identifies the GPU for which the counter will be enabled/disabled 322 // This parameter is ignored in AllGpu scopes. 323 // - state: A value of 0 will disable the counter, a value of 1 will enable 324 // the counter. 325 //------------------------------------------------------------------------------ 326 typedef struct 327 { 328 NvU32 scope; //UVM_DEBUG_V1 (UvmCounterScope) 329 NvU32 name; //UVM_DEBUG_V1 (UvmCounterName) 330 NvProcessorUuid gpuid; //UVM_DEBUG_V1 331 NvU32 state; //UVM_DEBUG_V1 332 } UvmCounterConfig; 333 334 #define UVM_COUNTER_CONFIG_STATE_DISABLE_REQUESTED 0 335 #define UVM_COUNTER_CONFIG_STATE_ENABLE_REQUESTED 1 336 337 typedef enum 338 { 339 UvmEventMemoryAccessTypeInvalid = 0, 340 UvmEventMemoryAccessTypeRead = 1, 341 UvmEventMemoryAccessTypeWrite = 2, 342 UvmEventMemoryAccessTypeAtomic = 3, 343 UvmEventMemoryAccessTypePrefetch = 4, 344 // ---- Add new values above this line 345 UvmEventNumMemoryAccessTypes 346 } UvmEventMemoryAccessType; 347 348 typedef enum 349 { 350 UvmEventTypeInvalid = 0, 351 352 UvmEventTypeMemoryViolation = 1, 353 UvmEventTypeCpuFault = UvmEventTypeMemoryViolation, 354 UvmEventTypeMigration = 2, 355 UvmEventTypeGpuFault = 3, 356 UvmEventTypeGpuFaultReplay = 4, 357 UvmEventTypeFaultBufferOverflow = 5, 358 UvmEventTypeFatalFault = 6, 359 UvmEventTypeReadDuplicate = 7, 360 UvmEventTypeReadDuplicateInvalidate = 8, 361 UvmEventTypePageSizeChange = 9, 362 UvmEventTypeThrashingDetected = 10, 363 UvmEventTypeThrottlingStart = 11, 364 UvmEventTypeThrottlingEnd = 12, 365 UvmEventTypeMapRemote = 13, 366 UvmEventTypeEviction = 14, 367 368 // ---- Add new values above this line 369 UvmEventNumTypes, 370 371 // ---- Private event types for uvm tests 372 UvmEventTestTypesFirst = 62, 373 374 UvmEventTypeTestHmmSplitInvalidate = UvmEventTestTypesFirst, 375 UvmEventTypeTestAccessCounter = UvmEventTestTypesFirst + 1, 376 377 UvmEventTestTypesLast = UvmEventTypeTestAccessCounter, 378 379 UvmEventNumTypesAll 380 } UvmEventType; 381 382 //------------------------------------------------------------------------------ 383 // Bit flags used to enable/ disable events: 384 //------------------------------------------------------------------------------ 385 #define UVM_EVENT_ENABLE_MEMORY_VIOLATION ((NvU64)1 << UvmEventTypeMemoryViolation) 386 #define UVM_EVENT_ENABLE_CPU_FAULT ((NvU64)1 << UvmEventTypeCpuFault) 387 #define UVM_EVENT_ENABLE_MIGRATION ((NvU64)1 << UvmEventTypeMigration) 388 #define UVM_EVENT_ENABLE_GPU_FAULT ((NvU64)1 << UvmEventTypeGpuFault) 389 #define UVM_EVENT_ENABLE_GPU_FAULT_REPLAY ((NvU64)1 << UvmEventTypeGpuFaultReplay) 390 #define UVM_EVENT_ENABLE_FAULT_BUFFER_OVERFLOW ((NvU64)1 << UvmEventTypeFaultBufferOverflow) 391 #define UVM_EVENT_ENABLE_FATAL_FAULT ((NvU64)1 << UvmEventTypeFatalFault) 392 #define UVM_EVENT_ENABLE_READ_DUPLICATE ((NvU64)1 << UvmEventTypeReadDuplicate) 393 #define UVM_EVENT_ENABLE_READ_DUPLICATE_INVALIDATE ((NvU64)1 << UvmEventTypeReadDuplicateInvalidate) 394 #define UVM_EVENT_ENABLE_PAGE_SIZE_CHANGE ((NvU64)1 << UvmEventTypePageSizeChange) 395 #define UVM_EVENT_ENABLE_THRASHING_DETECTED ((NvU64)1 << UvmEventTypeThrashingDetected) 396 #define UVM_EVENT_ENABLE_THROTTLING_START ((NvU64)1 << UvmEventTypeThrottlingStart) 397 #define UVM_EVENT_ENABLE_THROTTLING_END ((NvU64)1 << UvmEventTypeThrottlingEnd) 398 #define UVM_EVENT_ENABLE_MAP_REMOTE ((NvU64)1 << UvmEventTypeMapRemote) 399 #define UVM_EVENT_ENABLE_EVICTION ((NvU64)1 << UvmEventTypeEviction) 400 #define UVM_EVENT_ENABLE_TEST_ACCESS_COUNTER ((NvU64)1 << UvmEventTypeTestAccessCounter) 401 #define UVM_EVENT_ENABLE_TEST_HMM_SPLIT_INVALIDATE ((NvU64)1 << UvmEventTypeTestHmmSplitInvalidate) 402 403 //------------------------------------------------------------------------------ 404 // Information associated with a memory violation event 405 //------------------------------------------------------------------------------ 406 typedef struct 407 { 408 // 409 // eventType has to be 1st argument of this structure. Setting eventType to 410 // UvmEventTypeMemoryViolation helps to identify event data in a queue. 411 // 412 NvU8 eventType; 413 NvU8 accessType; // read/write violation (UvmEventMemoryAccessType) 414 // 415 // This structure is shared between UVM kernel and tools. 416 // Manually padding the structure so that compiler options like pragma pack 417 // or malign-double will have no effect on the field offsets. 418 // 419 NvU16 padding16Bits; 420 NvU32 padding32Bits; 421 NvU64 address; // faulting address 422 NvU64 timeStamp; // cpu time when the fault occurred 423 NvU32 pid; // process id causing the fault 424 NvU32 threadId; // thread id causing the fault 425 NvU64 pc; // address of the instruction causing the fault 426 } UvmEventCpuFaultInfo_V1; 427 428 typedef struct 429 { 430 // 431 // eventType has to be 1st argument of this structure. Setting eventType to 432 // UvmEventTypeMemoryViolation helps to identify event data in a queue. 433 // 434 NvU8 eventType; 435 NvU8 accessType; // read/write violation (UvmEventMemoryAccessType) 436 // 437 // This structure is shared between UVM kernel and tools. 438 // Manually padding the structure so that compiler options like pragma pack 439 // or malign-double will have no effect on the field offsets. 440 // 441 NvU16 padding16Bits; 442 NvS32 nid; // NUMA node ID of faulting CPU 443 NvU64 address; // faulting address 444 NvU64 timeStamp; // cpu time when the fault occurred 445 NvU32 pid; // process id causing the fault 446 NvU32 threadId; // thread id causing the fault 447 NvU64 pc; // address of the instruction causing the fault 448 } UvmEventCpuFaultInfo_V2; 449 450 typedef enum 451 { 452 UvmEventMigrationDirectionInvalid = 0, 453 UvmEventMigrationDirectionCpuToGpu = 1, 454 UvmEventMigrationDirectionGpuToCpu = 2, 455 // ---- Add new values above this line 456 UvmEventNumMigrationDirections 457 } UvmEventMigrationDirection; 458 459 //------------------------------------------------------------------------------ 460 // Information associated with a migration event 461 //------------------------------------------------------------------------------ 462 typedef struct 463 { 464 // 465 // eventType has to be the 1st argument of this structure. 466 // Setting eventType = UvmEventTypeMigration helps to identify event data in 467 // a queue. 468 // 469 NvU8 eventType; 470 // direction of migration (UvmEventMigrationDirection ) 471 // this field is deprecated, in favor of (src|dst)Index 472 NvU8 direction; 473 // 474 // Indices are used for the source and destination of migration instead of 475 // using gpu uuid/cpu id. This reduces the size of each event. gpuIndex to 476 // gpuUuid relation can be obtained from UvmEventGetGpuUuidTable. 477 // Currently we do not distinguish between CPUs so they all use index 0xFF. 478 // 479 NvU8 srcIndex; // source CPU/GPU index 480 NvU8 dstIndex; // destination CPU/GPU index 481 // 482 // This structure is shared between UVM kernel and tools. 483 // Manually padding the structure so that compiler options like pragma pack 484 // or malign-double will have no effect on the field offsets 485 // 486 NvU32 padding32Bits; 487 NvU64 address; // base virtual addr used for migration 488 NvU64 migratedBytes; // number of bytes migrated 489 NvU64 beginTimeStamp; // cpu time stamp when the migration was 490 // queued on the gpu 491 NvU64 endTimeStamp; // cpu time stamp when the migration 492 // finalization was communicated to the cpu 493 NvU64 streamId; // stream causing the migration 494 } UvmEventMigrationInfo_Lite; 495 496 typedef enum 497 { 498 // These fault types are handled and may be "fixed" by the UVM driver 499 UvmFaultTypeInvalid = 0, 500 UvmFaultTypeInvalidPde = 1, 501 UvmFaultTypeInvalidPte = 2, 502 UvmFaultTypeWrite = 3, 503 UvmFaultTypeAtomic = 4, 504 // The next fault types are fatal and cannot be serviced by the UVM driver 505 UvmFaultTypeFatal = 5, 506 UvmFaultTypeInvalidPdeSize = UvmFaultTypeFatal, 507 UvmFaultTypeLimitViolation = 6, 508 UvmFaultTypeUnboundInstBlock = 7, 509 UvmFaultTypePrivViolation = 8, 510 UvmFaultTypePitchMaskViolation = 9, 511 UvmFaultTypeWorkCreation = 10, 512 UvmFaultTypeUnsupportedAperture = 11, 513 UvmFaultTypeCompressionFailure = 12, 514 UvmFaultTypeUnsupportedKind = 13, 515 UvmFaultTypeRegionViolation = 14, 516 UvmFaultTypePoison = 15, 517 // ---- Add new values above this line 518 UvmEventNumFaultTypes 519 } UvmEventFaultType; 520 521 typedef enum 522 { 523 UvmEventFatalReasonInvalid = 0, 524 UvmEventFatalReasonInvalidAddress = 1, 525 UvmEventFatalReasonInvalidPermissions = 2, 526 UvmEventFatalReasonInvalidFaultType = 3, 527 UvmEventFatalReasonOutOfMemory = 4, 528 UvmEventFatalReasonInternalError = 5, 529 530 // This value is reported when a fault is triggered in an invalid context 531 // Example: CPU fault on a managed allocation while a kernel is running on a 532 // pre-Pascal GPU 533 UvmEventFatalReasonInvalidOperation = 6, 534 // ---- Add new values above this line 535 UvmEventNumFatalReasons 536 } UvmEventFatalReason; 537 538 typedef enum 539 { 540 UvmEventMigrationCauseInvalid = 0, 541 542 // The migration was initiated by the user via UvmMigrate/UvmMigrateAsync 543 UvmEventMigrationCauseUser = 1, 544 545 // The UVM runtime initiated the migration to ensure that processors can 546 // access data coherently 547 UvmEventMigrationCauseCoherence = 2, 548 549 // Speculative migration of pages that are likely to be accessed in the 550 // near future. Initiated by the UVM driver performance heuristics. 551 UvmEventMigrationCausePrefetch = 3, 552 553 // Migration performed to evict memory from the GPU. 554 UvmEventMigrationCauseEviction = 4, 555 556 // Migration of pages that are being accessed remotely by the GPU and 557 // detected via access counter notifications. 558 UvmEventMigrationCauseAccessCounters = 5, 559 560 // ---- Add new values above this line 561 UvmEventNumMigrationCauses 562 } UvmEventMigrationCause; 563 564 //------------------------------------------------------------------------------ 565 // Information associated with a migration event UVM onwards 566 //------------------------------------------------------------------------------ 567 typedef struct 568 { 569 // 570 // eventType has to be the 1st argument of this structure. Setting eventType 571 // to UvmEventTypeMigration helps to identify event data in a queue. 572 // 573 NvU8 eventType; 574 // 575 // Cause that triggered the migration 576 // 577 NvU8 migrationCause; 578 // 579 // Indices are used for the source and destination of migration instead of 580 // using gpu uuid/cpu id. This reduces the size of each event. The index to 581 // gpuUuid relation can be obtained from UvmToolsGetProcessorUuidTable. 582 // Currently we do not distinguish between CPUs so they all use index 0. 583 // 584 NvU8 srcIndex; // source CPU/GPU index 585 NvU8 dstIndex; // destination CPU/GPU index 586 // 587 // This structure is shared between UVM kernel and tools. 588 // Manually padding the structure so that compiler options like pragma pack 589 // or malign-double will have no effect on the field offsets 590 // 591 NvU32 padding32Bits; 592 NvU64 address; // base virtual addr used for migration 593 NvU64 migratedBytes; // number of bytes migrated 594 NvU64 beginTimeStamp; // cpu time stamp when the memory transfer 595 // was queued on the gpu 596 NvU64 endTimeStamp; // cpu time stamp when the memory transfer 597 // finalization was communicated to the cpu 598 // For asynchronous operations this field 599 // will be zero 600 NvU64 rangeGroupId; // range group tied with this migration 601 NvU64 beginTimeStampGpu; // time stamp when the migration started 602 // on the gpu 603 NvU64 endTimeStampGpu; // time stamp when the migration finished 604 // on the gpu 605 } UvmEventMigrationInfo_V1; 606 607 typedef struct 608 { 609 // 610 // eventType has to be the 1st argument of this structure. Setting eventType 611 // to UvmEventTypeMigration helps to identify event data in a queue. 612 // 613 NvU8 eventType; 614 // 615 // Cause that triggered the migration 616 // 617 NvU8 migrationCause; 618 // 619 // This structure is shared between UVM kernel and tools. 620 // Manually padding the structure so that compiler options like pragma pack 621 // or malign-double will have no effect on the field offsets 622 // 623 NvU16 padding16Bits; 624 // 625 // Indices are used for the source and destination of migration instead of 626 // using gpu uuid/cpu id. This reduces the size of each event. The index to 627 // gpuUuid relation can be obtained from UvmToolsGetProcessorUuidTable. 628 // Currently we do not distinguish between CPUs so they all use index 0. 629 // 630 NvU16 srcIndex; // source CPU/GPU index 631 NvU16 dstIndex; // destination CPU/GPU index 632 NvS32 srcNid; // source CPU NUMA node ID 633 NvS32 dstNid; // destination CPU NUMA node ID 634 NvU64 address; // base virtual addr used for migration 635 NvU64 migratedBytes; // number of bytes migrated 636 NvU64 beginTimeStamp; // cpu time stamp when the memory transfer 637 // was queued on the gpu 638 NvU64 endTimeStamp; // cpu time stamp when the memory transfer 639 // finalization was communicated to the cpu 640 // For asynchronous operations this field 641 // will be zero 642 NvU64 rangeGroupId; // range group tied with this migration 643 NvU64 beginTimeStampGpu; // time stamp when the migration started 644 // on the gpu 645 NvU64 endTimeStampGpu; // time stamp when the migration finished 646 // on the gpu 647 } UvmEventMigrationInfo_V2; 648 649 typedef enum 650 { 651 UvmEventFaultClientTypeInvalid = 0, 652 UvmEventFaultClientTypeGpc = 1, 653 UvmEventFaultClientTypeHub = 2, 654 655 // ---- Add new values above this line 656 UvmEventNumFaultClientTypes 657 } UvmEventFaultClientType; 658 659 //------------------------------------------------------------------------------ 660 // This info is provided per gpu fault 661 // This event can be treated as a start event for gpu fault handling 662 //------------------------------------------------------------------------------ 663 typedef struct 664 { 665 // 666 // eventType has to be the 1st argument of this structure. 667 // Setting eventType = UvmEventTypeGpuFault helps to identify event data in 668 // a queue. 669 // 670 NvU8 eventType; 671 NvU8 faultType; // type of gpu fault, refer UvmEventFaultType 672 NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType 673 NvU8 gpuIndex; // GPU that experienced the fault 674 union 675 { 676 NvU16 gpcId; // If this is a replayable fault, this field contains 677 // the physical GPC index where the fault was 678 // triggered 679 680 NvU16 channelId; // If this is a non-replayable fault, this field 681 // contains the id of the channel that launched the 682 // operation that caused the fault. 683 // 684 // TODO: Bug 3283289: this field is ambiguous for 685 // Ampere+ GPUs, but it is never consumed by clients. 686 }; 687 NvU16 clientId; // Id of the MMU client that triggered the fault. This 688 // is the value provided by HW and is architecture- 689 // specific. There are separate client ids for 690 // different client types (See dev_fault.h). 691 NvU64 address; // virtual address at which gpu faulted 692 NvU64 timeStamp; // time stamp when the cpu started processing the 693 // fault 694 NvU64 timeStampGpu; // gpu time stamp when the fault entry was written 695 // in the fault buffer 696 NvU32 batchId; // Per-GPU unique id to identify the faults serviced 697 // in batch before: 698 // - Issuing a replay for replayable faults 699 // - Re-scheduling the channel for non-replayable 700 // faults. 701 NvU8 clientType; // Volta+ GPUs can fault on clients other than GR. 702 // UvmEventFaultClientTypeGpc indicates replayable 703 // fault, while UvmEventFaultClientTypeHub indicates 704 // non-replayable fault. 705 706 // 707 // This structure is shared between UVM kernel and tools. 708 // Manually padding the structure so that compiler options like pragma pack 709 // or malign-double will have no effect on the field offsets 710 // 711 NvU8 padding8Bits; 712 NvU16 padding16Bits; 713 } UvmEventGpuFaultInfo_V1; 714 715 typedef struct 716 { 717 // 718 // eventType has to be the 1st argument of this structure. 719 // Setting eventType = UvmEventTypeGpuFault helps to identify event data in 720 // a queue. 721 // 722 NvU8 eventType; 723 NvU8 faultType; // type of gpu fault, refer UvmEventFaultType 724 NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType 725 // 726 // This structure is shared between UVM kernel and tools. 727 // Manually padding the structure so that compiler options like pragma pack 728 // or malign-double will have no effect on the field offsets 729 // 730 NvU8 padding8Bits_1; 731 union 732 { 733 NvU16 gpcId; // If this is a replayable fault, this field contains 734 // the physical GPC index where the fault was 735 // triggered 736 737 NvU16 channelId; // If this is a non-replayable fault, this field 738 // contains the id of the channel that launched the 739 // operation that caused the fault. 740 // 741 // TODO: Bug 3283289: this field is ambiguous for 742 // Ampere+ GPUs, but it is never consumed by clients. 743 }; 744 NvU16 clientId; // Id of the MMU client that triggered the fault. This 745 // is the value provided by HW and is architecture- 746 // specific. There are separate client ids for 747 // different client types (See dev_fault.h). 748 NvU64 address; // virtual address at which gpu faulted 749 NvU64 timeStamp; // time stamp when the cpu started processing the 750 // fault 751 NvU64 timeStampGpu; // gpu time stamp when the fault entry was written 752 // in the fault buffer 753 NvU32 batchId; // Per-GPU unique id to identify the faults serviced 754 // in batch before: 755 // - Issuing a replay for replayable faults 756 // - Re-scheduling the channel for non-replayable 757 // faults. 758 NvU8 clientType; // Volta+ GPUs can fault on clients other than GR. 759 // UvmEventFaultClientTypeGpc indicates replayable 760 // fault, while UvmEventFaultClientTypeHub indicates 761 // non-replayable fault. 762 763 // 764 // This structure is shared between UVM kernel and tools. 765 // Manually padding the structure so that compiler options like pragma pack 766 // or malign-double will have no effect on the field offsets 767 // 768 NvU8 padding8Bits_2; 769 NvU16 gpuIndex; // GPU that experienced the fault 770 } UvmEventGpuFaultInfo_V2; 771 772 //------------------------------------------------------------------------------ 773 // This info is provided when a gpu fault is replayed (for replayable faults) 774 // or when the channel that launched the operation that triggered the fault is 775 // rescheduled for execution (for non-replayable faults). 776 // 777 // This event can be treated as an end event for gpu fault handling. 778 // Any other events eg migration events caused as a side-effect of the gpu fault 779 // would lie between the start and end event. 780 //------------------------------------------------------------------------------ 781 typedef struct 782 { 783 // 784 // eventType has to be the 1st argument of this structure. 785 // Setting eventType = UvmEventTypeGpuFaultReplay helps to identify event 786 // data in a queue. 787 // 788 NvU8 eventType; 789 NvU8 gpuIndex; // GPU that experienced the fault 790 NvU8 clientType; // See clientType in UvmEventGpuFaultInfo 791 // 792 // This structure is shared between UVM kernel and tools. 793 // Manually padding the structure so that compiler options like pragma pack 794 // or malign-double will have no effect on the field offsets 795 // 796 NvU8 padding8bits; 797 NvU32 batchId; // Per-GPU unique id to identify the faults that 798 // have been serviced in batch 799 NvU64 timeStamp; // cpu time when the replay of the faulting memory 800 // accesses is queued on the gpu 801 NvU64 timeStampGpu; // gpu time stamp when the replay operation finished 802 // executing on the gpu 803 } UvmEventGpuFaultReplayInfo_V1; 804 805 typedef struct 806 { 807 // 808 // eventType has to be the 1st argument of this structure. 809 // Setting eventType = UvmEventTypeGpuFaultReplay helps to identify event 810 // data in a queue. 811 // 812 NvU8 eventType; 813 NvU8 clientType; // See clientType in UvmEventGpuFaultInfo 814 NvU16 gpuIndex; // GPU that experienced the fault 815 NvU32 batchId; // Per-GPU unique id to identify the faults that 816 // have been serviced in batch 817 NvU64 timeStamp; // cpu time when the replay of the faulting memory 818 // accesses is queued on the gpu 819 NvU64 timeStampGpu; // gpu time stamp when the replay operation finished 820 // executing on the gpu 821 } UvmEventGpuFaultReplayInfo_V2; 822 823 //------------------------------------------------------------------------------ 824 // This info is provided per fatal fault 825 //------------------------------------------------------------------------------ 826 typedef struct 827 { 828 // 829 // eventType has to be the 1st argument of this structure. 830 // Setting eventType = UvmEventTypeFatalFault helps to identify event data 831 // in a queue. 832 // 833 NvU8 eventType; 834 NvU8 faultType; // type of gpu fault, refer UvmEventFaultType. Only 835 // valid if processorIndex is a GPU 836 NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType 837 NvU8 processorIndex; // processor that experienced the fault 838 NvU8 reason; // reason why the fault is fatal, refer 839 // UvmEventFatalReason 840 NvU8 padding8bits; 841 NvU16 padding16bits; 842 NvU64 address; // virtual address at which the processor faulted 843 NvU64 timeStamp; // CPU time when the fault is detected to be fatal 844 } UvmEventFatalFaultInfo_V1; 845 846 typedef struct 847 { 848 // 849 // eventType has to be the 1st argument of this structure. 850 // Setting eventType = UvmEventTypeFatalFault helps to identify event data 851 // in a queue. 852 // 853 NvU8 eventType; 854 NvU8 faultType; // type of gpu fault, refer UvmEventFaultType. Only 855 // valid if processorIndex is a GPU 856 NvU8 accessType; // memory access type, refer UvmEventMemoryAccessType 857 NvU8 reason; // reason why the fault is fatal, refer 858 // UvmEventFatalReason 859 NvU16 processorIndex; // processor that experienced the fault 860 NvU16 padding16bits; 861 NvU64 address; // virtual address at which the processor faulted 862 NvU64 timeStamp; // CPU time when the fault is detected to be fatal 863 } UvmEventFatalFaultInfo_V2; 864 865 typedef struct 866 { 867 // 868 // eventType has to be the 1st argument of this structure. 869 // Setting eventType = UvmEventTypeReadDuplicate helps to identify event 870 // data in a queue. 871 // 872 NvU8 eventType; 873 // 874 // This structure is shared between UVM kernel and tools. 875 // Manually padding the structure so that compiler options like pragma pack 876 // or malign-double will have no effect on the field offsets 877 // 878 NvU8 padding8bits; 879 NvU16 padding16bits; 880 NvU32 padding32bits; 881 NvU64 processors; // mask that specifies in which processors this 882 // memory region is read-duplicated 883 NvU64 address; // virtual address of the memory region that is 884 // read-duplicated 885 NvU64 size; // size in bytes of the memory region that is 886 // read-duplicated 887 NvU64 timeStamp; // cpu time stamp when the memory region becomes 888 // read-duplicate. Since many processors can 889 // participate in read-duplicate this is time stamp 890 // when all the operations have been pushed to all 891 // the processors. 892 } UvmEventReadDuplicateInfo_V1; 893 894 typedef struct 895 { 896 // 897 // eventType has to be the 1st argument of this structure. 898 // Setting eventType = UvmEventTypeReadDuplicate helps to identify event 899 // data in a queue. 900 // 901 NvU8 eventType; 902 // 903 // This structure is shared between UVM kernel and tools. 904 // Manually padding the structure so that compiler options like pragma pack 905 // or malign-double will have no effect on the field offsets 906 // 907 NvU8 padding8bits; 908 NvU16 padding16bits; 909 NvU32 padding32bits; 910 NvU64 address; // virtual address of the memory region that is 911 // read-duplicated 912 NvU64 size; // size in bytes of the memory region that is 913 // read-duplicated 914 NvU64 timeStamp; // cpu time stamp when the memory region becomes 915 // read-duplicate. Since many processors can 916 // participate in read-duplicate this is time stamp 917 // when all the operations have been pushed to all 918 // the processors. 919 NvU64 processors[UVM_PROCESSOR_MASK_SIZE]; 920 // mask that specifies in which processors this 921 // memory region is read-duplicated. This is last 922 // so UVM_PROCESSOR_MASK_SIZE can grow. 923 } UvmEventReadDuplicateInfo_V2; 924 925 typedef struct 926 { 927 // 928 // eventType has to be the 1st argument of this structure. 929 // Setting eventType = UvmEventTypeReadDuplicateInvalidate helps to 930 // identify event data in a queue. 931 // 932 NvU8 eventType; 933 NvU8 residentIndex; // index of the cpu/gpu that now contains the only 934 // valid copy of the memory region 935 // 936 // This structure is shared between UVM kernel and tools. 937 // Manually padding the structure so that compiler options like pragma pack 938 // or malign-double will have no effect on the field offsets 939 // 940 NvU16 padding16bits; 941 NvU32 padding32bits; 942 NvU64 address; // virtual address of the memory region that is 943 // read-duplicated 944 NvU64 size; // size of the memory region that is 945 // read-duplicated 946 NvU64 timeStamp; // cpu time stamp when the memory region is no 947 // longer read-duplicate. Since many processors can 948 // participate in read-duplicate this is time stamp 949 // when all the operations have been pushed to all 950 // the processors. 951 } UvmEventReadDuplicateInvalidateInfo_V1; 952 953 typedef struct 954 { 955 // 956 // eventType has to be the 1st argument of this structure. 957 // Setting eventType = UvmEventTypeReadDuplicateInvalidate helps to 958 // identify event data in a queue. 959 // 960 NvU8 eventType; 961 NvU8 padding8bits; 962 NvU16 residentIndex; 963 // 964 // This structure is shared between UVM kernel and tools. 965 // Manually padding the structure so that compiler options like pragma pack 966 // or malign-double will have no effect on the field offsets 967 // 968 NvU32 padding32bits; 969 NvU64 address; // virtual address of the memory region that is 970 // read-duplicated 971 NvU64 size; // size of the memory region that is 972 // read-duplicated 973 NvU64 timeStamp; // cpu time stamp when the memory region is no 974 // longer read-duplicate. Since many processors can 975 // participate in read-duplicate this is time stamp 976 // when all the operations have been pushed to all 977 // the processors. 978 } UvmEventReadDuplicateInvalidateInfo_V2; 979 980 typedef struct 981 { 982 // 983 // eventType has to be the 1st argument of this structure. 984 // Setting eventType = UvmEventTypePageSizeChange helps to identify event 985 // data in a queue. 986 // 987 NvU8 eventType; 988 // 989 // This structure is shared between UVM kernel and tools. 990 // Manually padding the structure so that compiler options like pragma pack 991 // or malign-double will have no effect on the field offsets 992 // 993 NvU8 processorIndex; // cpu/gpu processor index for which the page size 994 // changed 995 NvU16 padding16bits; 996 NvU32 size; // new page size 997 NvU64 address; // virtual address of the page whose size has 998 // changed 999 NvU64 timeStamp; // cpu time stamp when the new page size is 1000 // queued on the gpu 1001 } UvmEventPageSizeChangeInfo_V1; 1002 1003 typedef struct 1004 { 1005 // 1006 // eventType has to be the 1st argument of this structure. 1007 // Setting eventType = UvmEventTypePageSizeChange helps to identify event 1008 // data in a queue. 1009 // 1010 NvU8 eventType; 1011 // 1012 // This structure is shared between UVM kernel and tools. 1013 // Manually padding the structure so that compiler options like pragma pack 1014 // or malign-double will have no effect on the field offsets 1015 // 1016 NvU8 padding8bits; 1017 NvU16 processorIndex; // cpu/gpu processor index for which the page size 1018 // changed 1019 NvU32 size; // new page size 1020 NvU64 address; // virtual address of the page whose size has 1021 // changed 1022 NvU64 timeStamp; // cpu time stamp when the new page size is 1023 // queued on the gpu 1024 } UvmEventPageSizeChangeInfo_V2; 1025 1026 typedef struct 1027 { 1028 // 1029 // eventType has to be the 1st argument of this structure. 1030 // Setting eventType = UvmEventTypeThrashingDetected helps to identify event 1031 // data in a queue. 1032 // 1033 NvU8 eventType; 1034 // 1035 // This structure is shared between UVM kernel and tools. 1036 // Manually padding the structure so that compiler options like pragma pack 1037 // or malign-double will have no effect on the field offsets 1038 // 1039 NvU8 padding8bits; 1040 NvU16 padding16bits; 1041 NvU32 padding32bits; 1042 NvU64 processors; // mask that specifies which processors are 1043 // fighting for this memory region 1044 NvU64 address; // virtual address of the memory region that is 1045 // thrashing 1046 NvU64 size; // size of the memory region that is thrashing 1047 NvU64 timeStamp; // cpu time stamp when thrashing is detected 1048 } UvmEventThrashingDetectedInfo_V1; 1049 1050 typedef struct 1051 { 1052 // 1053 // eventType has to be the 1st argument of this structure. 1054 // Setting eventType = UvmEventTypeThrashingDetected helps to identify event 1055 // data in a queue. 1056 // 1057 NvU8 eventType; 1058 // 1059 // This structure is shared between UVM kernel and tools. 1060 // Manually padding the structure so that compiler options like pragma pack 1061 // or malign-double will have no effect on the field offsets 1062 // 1063 NvU8 padding8bits; 1064 NvU16 padding16bits; 1065 NvU32 padding32bits; 1066 NvU64 address; // virtual address of the memory region that is 1067 // thrashing 1068 NvU64 size; // size of the memory region that is thrashing 1069 NvU64 timeStamp; // cpu time stamp when thrashing is detected 1070 NvU64 processors[UVM_PROCESSOR_MASK_SIZE]; 1071 // mask that specifies which processors are 1072 // fighting for this memory region. This is last 1073 // so UVM_PROCESSOR_MASK_SIZE can grow. 1074 } UvmEventThrashingDetectedInfo_V2; 1075 1076 typedef struct 1077 { 1078 // 1079 // eventType has to be the 1st argument of this structure. 1080 // Setting eventType = UvmEventTypeThrottlingStart helps to identify event 1081 // data in a queue. 1082 // 1083 NvU8 eventType; 1084 NvU8 processorIndex; // index of the cpu/gpu that was throttled 1085 // 1086 // This structure is shared between UVM kernel and tools. 1087 // Manually padding the structure so that compiler options like pragma pack 1088 // or malign-double will have no effect on the field offsets 1089 // 1090 NvU16 padding16bits; 1091 NvU32 padding32bits; 1092 NvU64 address; // address of the page whose servicing is being 1093 // throttled 1094 NvU64 timeStamp; // cpu start time stamp for the throttling operation 1095 } UvmEventThrottlingStartInfo_V1; 1096 1097 typedef struct 1098 { 1099 // 1100 // eventType has to be the 1st argument of this structure. 1101 // Setting eventType = UvmEventTypeThrottlingStart helps to identify event 1102 // data in a queue. 1103 // 1104 NvU8 eventType; 1105 // 1106 // This structure is shared between UVM kernel and tools. 1107 // Manually padding the structure so that compiler options like pragma pack 1108 // or malign-double will have no effect on the field offsets 1109 // 1110 NvU8 padding8bits; 1111 NvU16 padding16bits[2]; 1112 NvU16 processorIndex; // index of the cpu/gpu that was throttled 1113 NvU64 address; // address of the page whose servicing is being 1114 // throttled 1115 NvU64 timeStamp; // cpu start time stamp for the throttling operation 1116 } UvmEventThrottlingStartInfo_V2; 1117 1118 typedef struct 1119 { 1120 // 1121 // eventType has to be the 1st argument of this structure. 1122 // Setting eventType = UvmEventTypeThrottlingEnd helps to identify event 1123 // data in a queue. 1124 // 1125 NvU8 eventType; 1126 NvU8 processorIndex; // index of the cpu/gpu that was throttled 1127 // 1128 // This structure is shared between UVM kernel and tools. 1129 // Manually padding the structure so that compiler options like pragma pack 1130 // or malign-double will have no effect on the field offsets 1131 // 1132 NvU16 padding16bits; 1133 NvU32 padding32bits; 1134 NvU64 address; // address of the page whose servicing is being 1135 // throttled 1136 NvU64 timeStamp; // cpu end time stamp for the throttling operation 1137 } UvmEventThrottlingEndInfo_V1; 1138 1139 typedef struct 1140 { 1141 // 1142 // eventType has to be the 1st argument of this structure. 1143 // Setting eventType = UvmEventTypeThrottlingEnd helps to identify event 1144 // data in a queue. 1145 // 1146 NvU8 eventType; 1147 // 1148 // This structure is shared between UVM kernel and tools. 1149 // Manually padding the structure so that compiler options like pragma pack 1150 // or malign-double will have no effect on the field offsets 1151 // 1152 NvU8 padding8bits; 1153 NvU16 padding16bits[2]; 1154 NvU16 processorIndex; // index of the cpu/gpu that was throttled 1155 NvU64 address; // address of the page whose servicing is being 1156 // throttled 1157 NvU64 timeStamp; // cpu end time stamp for the throttling operation 1158 } UvmEventThrottlingEndInfo_V2; 1159 1160 typedef enum 1161 { 1162 UvmEventMapRemoteCauseInvalid = 0, 1163 1164 // The remote mapping is created to ensure coherence on systems with no 1165 // GPU fault support (UVM-Lite) 1166 UvmEventMapRemoteCauseCoherence = 1, 1167 1168 // The thrashing mitigation policy pinned a memory region on a specific 1169 // processor memory. This cause is used for the remote mappings created 1170 // on the rest of processors to the pinned location. 1171 UvmEventMapRemoteCauseThrashing = 2, 1172 1173 // The remote mapping was created to enforce the PreferredLocation or 1174 // AccessedBy hints provided by the user. 1175 UvmEventMapRemoteCausePolicy = 3, 1176 1177 // There is no available memory on the system so a remote mapping was 1178 // created to the current location. 1179 UvmEventMapRemoteCauseOutOfMemory = 4, 1180 1181 // On GPUs with access counters, memory evicted to sysmem is always mapped 1182 // from the GPU. The UVM driver will invalidate the mapping if the region 1183 // is heavily accessed by the GPU later on. 1184 UvmEventMapRemoteCauseEviction = 5, 1185 } UvmEventMapRemoteCause; 1186 1187 typedef struct 1188 { 1189 // 1190 // eventType has to be the 1st argument of this structure. 1191 // Setting eventType = UvmEventTypeMapRemote helps to identify event data 1192 // in a queue. 1193 // 1194 NvU8 eventType; 1195 NvU8 srcIndex; // index of the cpu/gpu being remapped 1196 NvU8 dstIndex; // index of the cpu/gpu memory that contains the 1197 // memory region data 1198 NvU8 mapRemoteCause; // field to type UvmEventMapRemoteCause that tells 1199 // the cause for the page to be mapped remotely 1200 // 1201 // This structure is shared between UVM kernel and tools. 1202 // Manually padding the structure so that compiler options like pragma pack 1203 // or malign-double will have no effect on the field offsets 1204 // 1205 NvU32 padding32bits; 1206 NvU64 address; // virtual address of the memory region that is 1207 // thrashing 1208 NvU64 size; // size of the memory region that is thrashing 1209 NvU64 timeStamp; // cpu time stamp when all the required operations 1210 // have been pushed to the processor 1211 NvU64 timeStampGpu; // time stamp when the new mapping is effective in 1212 // the processor specified by srcIndex. If srcIndex 1213 // is a cpu, this field will be zero. 1214 } UvmEventMapRemoteInfo_V1; 1215 1216 typedef struct 1217 { 1218 // 1219 // eventType has to be the 1st argument of this structure. 1220 // Setting eventType = UvmEventTypeMapRemote helps to identify event data 1221 // in a queue. 1222 // 1223 NvU8 eventType; 1224 NvU8 mapRemoteCause; // field to type UvmEventMapRemoteCause that tells 1225 // the cause for the page to be mapped remotely 1226 // 1227 // This structure is shared between UVM kernel and tools. 1228 // Manually padding the structure so that compiler options like pragma pack 1229 // or malign-double will have no effect on the field offsets 1230 // 1231 NvU16 padding16bits; 1232 NvU16 srcIndex; // index of the cpu/gpu being remapped 1233 NvU16 dstIndex; // index of the cpu/gpu memory that contains the 1234 // memory region data 1235 NvU64 address; // virtual address of the memory region that is 1236 // thrashing 1237 NvU64 size; // size of the memory region that is thrashing 1238 NvU64 timeStamp; // cpu time stamp when all the required operations 1239 // have been pushed to the processor 1240 NvU64 timeStampGpu; // time stamp when the new mapping is effective in 1241 // the processor specified by srcIndex. If srcIndex 1242 // is a cpu, this field will be zero. 1243 } UvmEventMapRemoteInfo_V2; 1244 1245 typedef struct 1246 { 1247 // 1248 // eventType has to be the 1st argument of this structure. 1249 // Setting eventType = UvmEventTypeEviction helps to identify event data 1250 // in a queue. 1251 // 1252 NvU8 eventType; 1253 NvU8 srcIndex; // index of the cpu/gpu from which data is being 1254 // evicted 1255 NvU8 dstIndex; // index of the cpu/gpu memory to which data is 1256 // going to be stored 1257 // 1258 // This structure is shared between UVM kernel and tools. 1259 // Manually padding the structure so that compiler options like pragma pack 1260 // or malign-double will have no effect on the field offsets 1261 // 1262 NvU8 padding8bits; 1263 NvU32 padding32bits; 1264 NvU64 addressOut; // virtual address of the memory region that is 1265 // being evicted 1266 NvU64 addressIn; // virtual address that caused the eviction 1267 NvU64 size; // size of the memory region that being evicted 1268 NvU64 timeStamp; // cpu time stamp when eviction starts on the cpu 1269 } UvmEventEvictionInfo_V1; 1270 1271 typedef struct 1272 { 1273 // 1274 // eventType has to be the 1st argument of this structure. 1275 // Setting eventType = UvmEventTypeEviction helps to identify event data 1276 // in a queue. 1277 // 1278 NvU8 eventType; 1279 // 1280 // This structure is shared between UVM kernel and tools. 1281 // Manually padding the structure so that compiler options like pragma pack 1282 // or malign-double will have no effect on the field offsets 1283 // 1284 NvU8 padding8bits; 1285 NvU16 padding16bits; 1286 NvU16 srcIndex; // index of the cpu/gpu from which data is being 1287 // evicted 1288 NvU16 dstIndex; // index of the cpu/gpu memory to which data is 1289 // going to be stored 1290 NvU64 addressOut; // virtual address of the memory region that is 1291 // being evicted 1292 NvU64 addressIn; // virtual address that caused the eviction 1293 NvU64 size; // size of the memory region that being evicted 1294 NvU64 timeStamp; // cpu time stamp when eviction starts on the cpu 1295 } UvmEventEvictionInfo_V2; 1296 1297 // TODO: Bug 1870362: [uvm] Provide virtual address and processor index in 1298 // AccessCounter events 1299 // 1300 // Currently we are just passing raw information from the notification buffer 1301 // entries, which includes physical address + aperture. Instead, translate the 1302 // information to something more useful such as virtual address and then index 1303 // of the processor where the accessed data is resident. Most of the 1304 // implementation is required to service access counter notifications 1305 // themselves. 1306 typedef enum 1307 { 1308 UvmEventAperturePeer0 = 1, 1309 UvmEventAperturePeer1 = 2, 1310 UvmEventAperturePeer2 = 3, 1311 UvmEventAperturePeer3 = 4, 1312 UvmEventAperturePeer4 = 5, 1313 UvmEventAperturePeer5 = 6, 1314 UvmEventAperturePeer6 = 7, 1315 UvmEventAperturePeer7 = 8, 1316 UvmEventAperturePeerMax = UvmEventAperturePeer7, 1317 UvmEventApertureSys = 9, 1318 UvmEventApertureVid = 10, 1319 } UvmEventApertureType; 1320 1321 typedef struct 1322 { 1323 // 1324 // eventType has to be the 1st argument of this structure. 1325 // Setting eventType = UvmEventTypeAccessCounter helps to identify event 1326 // data in a queue. 1327 // 1328 NvU8 eventType; 1329 NvU8 srcIndex; // index of the gpu that received the access counter 1330 // notification 1331 // 1332 // This structure is shared between UVM kernel and tools. 1333 // Manually padding the structure so that compiler options like pragma pack 1334 // or malign-double will have no effect on the field offsets 1335 // 1336 // See uvm_access_counter_buffer_entry_t for details 1337 NvU8 aperture; 1338 NvU8 instancePtrAperture; 1339 1340 NvU8 isVirtual; 1341 NvU8 isFromCpu; 1342 1343 NvU8 veId; 1344 1345 // The physical access counter notification was triggered on a managed 1346 // memory region. This is not set for virtual access counter notifications. 1347 NvU8 physOnManaged; 1348 1349 NvU32 value; 1350 NvU32 subGranularity; 1351 NvU32 tag; 1352 NvU32 bank; 1353 NvU64 address; 1354 NvU64 instancePtr; 1355 } UvmEventTestAccessCounterInfo_V1; 1356 1357 typedef struct 1358 { 1359 // 1360 // eventType has to be the 1st argument of this structure. 1361 // Setting eventType = UvmEventTypeAccessCounter helps to identify event 1362 // data in a queue. 1363 // 1364 NvU8 eventType; 1365 // See uvm_access_counter_buffer_entry_t for details 1366 NvU8 aperture; 1367 NvU8 instancePtrAperture; 1368 NvU8 isVirtual; 1369 NvU8 isFromCpu; 1370 NvU8 veId; 1371 1372 // The physical access counter notification was triggered on a managed 1373 // memory region. This is not set for virtual access counter notifications. 1374 NvU8 physOnManaged; 1375 1376 // 1377 // This structure is shared between UVM kernel and tools. 1378 // Manually padding the structure so that compiler options like pragma pack 1379 // or malign-double will have no effect on the field offsets 1380 // 1381 NvU8 padding8bits; 1382 NvU16 srcIndex; // index of the gpu that received the access counter 1383 // notification 1384 NvU16 padding16bits; 1385 NvU32 value; 1386 NvU32 subGranularity; 1387 NvU32 tag; 1388 NvU32 bank; 1389 NvU32 padding32bits; 1390 NvU64 address; 1391 NvU64 instancePtr; 1392 } UvmEventTestAccessCounterInfo_V2; 1393 1394 typedef struct 1395 { 1396 NvU8 eventType; 1397 } UvmEventTestSplitInvalidateInfo; 1398 1399 //------------------------------------------------------------------------------ 1400 // Entry added in the event queue buffer when an enabled event occurs. For 1401 // compatibility with all tools ensure that this structure is 64 bit aligned. 1402 //------------------------------------------------------------------------------ 1403 typedef struct 1404 { 1405 union 1406 { 1407 union 1408 { 1409 NvU8 eventType; 1410 UvmEventMigrationInfo_Lite migration_Lite; 1411 1412 UvmEventCpuFaultInfo_V1 cpuFault; 1413 UvmEventMigrationInfo_V1 migration; 1414 UvmEventGpuFaultInfo_V1 gpuFault; 1415 UvmEventGpuFaultReplayInfo_V1 gpuFaultReplay; 1416 UvmEventFatalFaultInfo_V1 fatalFault; 1417 UvmEventReadDuplicateInfo_V1 readDuplicate; 1418 UvmEventReadDuplicateInvalidateInfo_V1 readDuplicateInvalidate; 1419 UvmEventPageSizeChangeInfo_V1 pageSizeChange; 1420 UvmEventThrashingDetectedInfo_V1 thrashing; 1421 UvmEventThrottlingStartInfo_V1 throttlingStart; 1422 UvmEventThrottlingEndInfo_V1 throttlingEnd; 1423 UvmEventMapRemoteInfo_V1 mapRemote; 1424 UvmEventEvictionInfo_V1 eviction; 1425 } eventData; 1426 1427 union 1428 { 1429 NvU8 eventType; 1430 1431 UvmEventTestAccessCounterInfo_V1 accessCounter; 1432 UvmEventTestSplitInvalidateInfo splitInvalidate; 1433 } testEventData; 1434 }; 1435 } UvmEventEntry_V1; 1436 1437 typedef struct 1438 { 1439 union 1440 { 1441 union 1442 { 1443 NvU8 eventType; 1444 UvmEventMigrationInfo_Lite migration_Lite; 1445 1446 UvmEventCpuFaultInfo_V2 cpuFault; 1447 UvmEventMigrationInfo_V2 migration; 1448 UvmEventGpuFaultInfo_V2 gpuFault; 1449 UvmEventGpuFaultReplayInfo_V2 gpuFaultReplay; 1450 UvmEventFatalFaultInfo_V2 fatalFault; 1451 UvmEventReadDuplicateInfo_V2 readDuplicate; 1452 UvmEventReadDuplicateInvalidateInfo_V2 readDuplicateInvalidate; 1453 UvmEventPageSizeChangeInfo_V2 pageSizeChange; 1454 UvmEventThrashingDetectedInfo_V2 thrashing; 1455 UvmEventThrottlingStartInfo_V2 throttlingStart; 1456 UvmEventThrottlingEndInfo_V2 throttlingEnd; 1457 UvmEventMapRemoteInfo_V2 mapRemote; 1458 UvmEventEvictionInfo_V2 eviction; 1459 } eventData; 1460 1461 union 1462 { 1463 NvU8 eventType; 1464 1465 UvmEventTestAccessCounterInfo_V2 accessCounter; 1466 UvmEventTestSplitInvalidateInfo splitInvalidate; 1467 } testEventData; 1468 }; 1469 } UvmEventEntry_V2; 1470 1471 //------------------------------------------------------------------------------ 1472 // Type of time stamp used in the event entry: 1473 // 1474 // On windows we support QPC type which uses RDTSC if possible else fallbacks to 1475 // HPET. 1476 // 1477 // On Linux ClockGetTime provides similar functionality. 1478 // In UvmEventTimeStampTypeAuto the system decides which time stamp best suites 1479 // current environment. 1480 //------------------------------------------------------------------------------ 1481 typedef enum 1482 { 1483 UvmEventTimeStampTypeInvalid = 0, 1484 UvmEventTimeStampTypeWin32QPC = 1, 1485 UvmEventTimeStampTypePosixClockGetTime = 2, 1486 UvmEventTimeStampTypeAuto = 3, 1487 // ---- Add new values above this line 1488 UvmEventNumTimeStampTypes 1489 } UvmEventTimeStampType; 1490 1491 //------------------------------------------------------------------------------ 1492 // An opaque queue handle is returned to the user when a queue is created. 1493 //------------------------------------------------------------------------------ 1494 typedef NvUPtr UvmEventQueueHandle; 1495 1496 //------------------------------------------------------------------------------ 1497 // Setting default page size to 4k, 1498 // this can be updated to 64k in case of power PC 1499 //------------------------------------------------------------------------------ 1500 #define UVM_DEBUG_ACCESS_PAGE_SIZE (1 << 12) // 4k page 1501 1502 typedef enum 1503 { 1504 UvmDebugAccessTypeRead = 0, 1505 UvmDebugAccessTypeWrite = 1, 1506 } UvmDebugAccessType; 1507 1508 typedef enum { 1509 UvmToolsEventQueueVersion_V1 = 1, 1510 UvmToolsEventQueueVersion_V2 = 2, 1511 } UvmToolsEventQueueVersion; 1512 1513 typedef struct UvmEventControlData_V1_tag { 1514 // entries between get_ahead and get_behind are currently being read 1515 volatile NvU32 get_ahead; 1516 volatile NvU32 get_behind; 1517 // entries between put_ahead and put_behind are currently being written 1518 volatile NvU32 put_ahead; 1519 volatile NvU32 put_behind; 1520 1521 // counter of dropped events 1522 NvU64 dropped[UvmEventNumTypesAll]; 1523 } UvmToolsEventControlData_V1; 1524 1525 typedef struct UvmEventControlData_V2_tag { 1526 // entries between get_ahead and get_behind are currently being read 1527 volatile NvU32 get_ahead; 1528 volatile NvU32 get_behind; 1529 1530 // entries between put_ahead and put_behind are currently being written 1531 volatile NvU32 put_ahead; 1532 volatile NvU32 put_behind; 1533 1534 // The version values are limited to UvmToolsEventQueueVersion and 1535 // initialized by UvmToolsCreateEventQueue(). 1536 NvU32 version; 1537 NvU32 padding32Bits; 1538 1539 // counter of dropped events 1540 NvU64 dropped[UvmEventNumTypesAll]; 1541 } UvmToolsEventControlData_V2; 1542 1543 // For backward compatibility: 1544 // TODO: Bug 4465348: remove these after replacing old references. 1545 typedef UvmToolsEventControlData_V1 UvmToolsEventControlData; 1546 typedef UvmEventEntry_V1 UvmEventEntry; 1547 1548 //------------------------------------------------------------------------------ 1549 // UVM Tools forward types (handles) definitions 1550 //------------------------------------------------------------------------------ 1551 struct UvmToolsSession_tag; 1552 struct UvmToolsEventQueue_tag; 1553 struct UvmToolsCounters_tag; 1554 1555 typedef struct UvmToolsSession_tag UvmToolsSession; 1556 typedef struct UvmToolsEventQueue_tag UvmToolsEventQueue; 1557 typedef struct UvmToolsCounters_tag UvmToolsCounters; 1558 1559 typedef UvmToolsSession *UvmToolsSessionHandle; 1560 typedef UvmToolsEventQueue *UvmToolsEventQueueHandle; 1561 typedef UvmToolsCounters *UvmToolsCountersHandle; 1562 1563 #endif // _UVM_TYPES_H_ 1564