1 /******************************************************************************* 2 Copyright (c) 2013-2023 NVidia Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 *******************************************************************************/ 22 23 #ifndef _UVM_IOCTL_H 24 #define _UVM_IOCTL_H 25 26 #include "uvm_types.h" 27 28 #ifdef __cplusplus 29 extern "C" { 30 #endif 31 32 // 33 // Please see the header file (uvm.h) for detailed documentation on each of the 34 // associated API calls. 35 // 36 37 #if defined(WIN32) || defined(WIN64) 38 # define UVM_IOCTL_BASE(i) CTL_CODE(FILE_DEVICE_UNKNOWN, 0x800+i, METHOD_BUFFERED, FILE_READ_DATA | FILE_WRITE_DATA) 39 #else 40 # define UVM_IOCTL_BASE(i) i 41 #endif 42 43 // 44 // UvmReserveVa 45 // 46 #define UVM_RESERVE_VA UVM_IOCTL_BASE(1) 47 48 typedef struct 49 { 50 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 51 NvU64 length NV_ALIGN_BYTES(8); // IN 52 NV_STATUS rmStatus; // OUT 53 } UVM_RESERVE_VA_PARAMS; 54 55 // 56 // UvmReleaseVa 57 // 58 #define UVM_RELEASE_VA UVM_IOCTL_BASE(2) 59 60 typedef struct 61 { 62 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 63 NvU64 length NV_ALIGN_BYTES(8); // IN 64 NV_STATUS rmStatus; // OUT 65 } UVM_RELEASE_VA_PARAMS; 66 67 // 68 // UvmRegionCommit 69 // 70 #define UVM_REGION_COMMIT UVM_IOCTL_BASE(3) 71 72 typedef struct 73 { 74 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 75 NvU64 length NV_ALIGN_BYTES(8); // IN 76 UvmStream streamId NV_ALIGN_BYTES(8); // IN 77 NvProcessorUuid gpuUuid; // IN 78 NV_STATUS rmStatus; // OUT 79 } UVM_REGION_COMMIT_PARAMS; 80 81 // 82 // UvmRegionDecommit 83 // 84 #define UVM_REGION_DECOMMIT UVM_IOCTL_BASE(4) 85 86 typedef struct 87 { 88 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 89 NvU64 length NV_ALIGN_BYTES(8); // IN 90 NV_STATUS rmStatus; // OUT 91 } UVM_REGION_DECOMMIT_PARAMS; 92 93 // 94 // UvmRegionSetStream 95 // 96 #define UVM_REGION_SET_STREAM UVM_IOCTL_BASE(5) 97 98 typedef struct 99 { 100 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 101 NvU64 length NV_ALIGN_BYTES(8); // IN 102 UvmStream newStreamId NV_ALIGN_BYTES(8); // IN 103 NvProcessorUuid gpuUuid; // IN 104 NV_STATUS rmStatus; // OUT 105 } UVM_REGION_SET_STREAM_PARAMS; 106 107 // 108 // UvmSetStreamRunning 109 // 110 #define UVM_SET_STREAM_RUNNING UVM_IOCTL_BASE(6) 111 112 typedef struct 113 { 114 UvmStream streamId NV_ALIGN_BYTES(8); // IN 115 NV_STATUS rmStatus; // OUT 116 } UVM_SET_STREAM_RUNNING_PARAMS; 117 118 119 // 120 // Due to limitations in how much we want to send per ioctl call, the nStreams 121 // member must be less than or equal to about 250. That's an upper limit. 122 // 123 // However, from a typical user-space driver's point of view (for example, the 124 // CUDA driver), a vast majority of the time, we expect there to be only one 125 // stream passed in. The second most common case is something like atmost 32 126 // streams being passed in. The cases where there are more than 32 streams are 127 // the most rare. So we might want to optimize the ioctls accordingly so that we 128 // don't always copy a 250 * sizeof(streamID) sized array when there's only one 129 // or a few streams. 130 // 131 // For that reason, UVM_MAX_STREAMS_PER_IOCTL_CALL is set to 32. 132 // 133 // If the higher-level (uvm.h) call requires more streams to be stopped than 134 // this value, then multiple ioctl calls should be made. 135 // 136 #define UVM_MAX_STREAMS_PER_IOCTL_CALL 32 137 138 // 139 // UvmSetStreamStopped 140 // 141 #define UVM_SET_STREAM_STOPPED UVM_IOCTL_BASE(7) 142 143 typedef struct 144 { 145 UvmStream streamIdArray[UVM_MAX_STREAMS_PER_IOCTL_CALL] NV_ALIGN_BYTES(8); // IN 146 NvU64 nStreams NV_ALIGN_BYTES(8); // IN 147 NV_STATUS rmStatus; // OUT 148 } UVM_SET_STREAM_STOPPED_PARAMS; 149 150 // 151 // UvmCallTestFunction 152 // 153 #define UVM_RUN_TEST UVM_IOCTL_BASE(9) 154 155 typedef struct 156 { 157 NvProcessorUuid gpuUuid; // IN 158 NvU32 test; // IN 159 struct 160 { 161 NvProcessorUuid peerGpuUuid; // IN 162 NvU32 peerId; // IN 163 } multiGpu; 164 NV_STATUS rmStatus; // OUT 165 } UVM_RUN_TEST_PARAMS; 166 167 // 168 // This is a magic offset for mmap. Any mapping of an offset above this 169 // threshold will be treated as a counters mapping, not as an allocation 170 // mapping. Since allocation offsets must be identical to the virtual address 171 // of the mapping, this threshold has to be an offset that cannot be 172 // a valid virtual address. 173 // 174 #if defined(__linux__) 175 #if defined(NV_64_BITS) 176 #define UVM_EVENTS_OFFSET_BASE (1UL << 63) 177 #define UVM_COUNTERS_OFFSET_BASE (1UL << 62) 178 #else 179 #define UVM_EVENTS_OFFSET_BASE (1UL << 31) 180 #define UVM_COUNTERS_OFFSET_BASE (1UL << 30) 181 #endif 182 #endif // defined(__linux___) 183 184 // 185 // UvmAddSession 186 // 187 #define UVM_ADD_SESSION UVM_IOCTL_BASE(10) 188 189 typedef struct 190 { 191 NvU32 pidTarget; // IN 192 #ifdef __linux__ 193 NvP64 countersBaseAddress NV_ALIGN_BYTES(8); // IN 194 NvS32 sessionIndex; // OUT (session index that got added) 195 #endif 196 NV_STATUS rmStatus; // OUT 197 } UVM_ADD_SESSION_PARAMS; 198 199 // 200 // UvmRemoveSession 201 // 202 #define UVM_REMOVE_SESSION UVM_IOCTL_BASE(11) 203 204 typedef struct 205 { 206 #ifdef __linux__ 207 NvS32 sessionIndex; // IN (session index to be removed) 208 #endif 209 NV_STATUS rmStatus; // OUT 210 } UVM_REMOVE_SESSION_PARAMS; 211 212 213 #define UVM_MAX_COUNTERS_PER_IOCTL_CALL 32 214 215 // 216 // UvmEnableCounters 217 // 218 #define UVM_ENABLE_COUNTERS UVM_IOCTL_BASE(12) 219 220 typedef struct 221 { 222 #ifdef __linux__ 223 NvS32 sessionIndex; // IN 224 #endif 225 UvmCounterConfig config[UVM_MAX_COUNTERS_PER_IOCTL_CALL]; // IN 226 NvU32 count; // IN 227 NV_STATUS rmStatus; // OUT 228 } UVM_ENABLE_COUNTERS_PARAMS; 229 230 // 231 // UvmMapCounter 232 // 233 #define UVM_MAP_COUNTER UVM_IOCTL_BASE(13) 234 235 typedef struct 236 { 237 #ifdef __linux__ 238 NvS32 sessionIndex; // IN 239 #endif 240 NvU32 scope; // IN (UvmCounterScope) 241 NvU32 counterName; // IN (UvmCounterName) 242 NvProcessorUuid gpuUuid; // IN 243 NvP64 addr NV_ALIGN_BYTES(8); // OUT 244 NV_STATUS rmStatus; // OUT 245 } UVM_MAP_COUNTER_PARAMS; 246 247 // 248 // UvmCreateEventQueue 249 // 250 #define UVM_CREATE_EVENT_QUEUE UVM_IOCTL_BASE(14) 251 252 typedef struct 253 { 254 #ifdef __linux__ 255 NvS32 sessionIndex; // IN 256 #endif 257 NvU32 eventQueueIndex; // OUT 258 NvU64 queueSize NV_ALIGN_BYTES(8); // IN 259 NvU64 notificationCount NV_ALIGN_BYTES(8); // IN 260 #if defined(WIN32) || defined(WIN64) 261 NvU64 notificationHandle NV_ALIGN_BYTES(8); // IN 262 #endif 263 NvU32 timeStampType; // IN (UvmEventTimeStampType) 264 NV_STATUS rmStatus; // OUT 265 } UVM_CREATE_EVENT_QUEUE_PARAMS; 266 267 // 268 // UvmRemoveEventQueue 269 // 270 #define UVM_REMOVE_EVENT_QUEUE UVM_IOCTL_BASE(15) 271 272 typedef struct 273 { 274 #ifdef __linux__ 275 NvS32 sessionIndex; // IN 276 #endif 277 NvU32 eventQueueIndex; // IN 278 NV_STATUS rmStatus; // OUT 279 } UVM_REMOVE_EVENT_QUEUE_PARAMS; 280 281 // 282 // UvmMapEventQueue 283 // 284 #define UVM_MAP_EVENT_QUEUE UVM_IOCTL_BASE(16) 285 286 typedef struct 287 { 288 #ifdef __linux__ 289 NvS32 sessionIndex; // IN 290 #endif 291 NvU32 eventQueueIndex; // IN 292 NvP64 userRODataAddr NV_ALIGN_BYTES(8); // IN 293 NvP64 userRWDataAddr NV_ALIGN_BYTES(8); // IN 294 NvP64 readIndexAddr NV_ALIGN_BYTES(8); // OUT 295 NvP64 writeIndexAddr NV_ALIGN_BYTES(8); // OUT 296 NvP64 queueBufferAddr NV_ALIGN_BYTES(8); // OUT 297 NV_STATUS rmStatus; // OUT 298 } UVM_MAP_EVENT_QUEUE_PARAMS; 299 300 // 301 // UvmEnableEvent 302 // 303 #define UVM_EVENT_CTRL UVM_IOCTL_BASE(17) 304 305 typedef struct 306 { 307 #ifdef __linux__ 308 NvS32 sessionIndex; // IN 309 #endif 310 NvU32 eventQueueIndex; // IN 311 NvS32 eventType; // IN 312 NvU32 enable; // IN 313 NV_STATUS rmStatus; // OUT 314 } UVM_EVENT_CTRL_PARAMS; 315 316 // 317 // UvmRegisterMpsServer 318 // 319 #define UVM_REGISTER_MPS_SERVER UVM_IOCTL_BASE(18) 320 321 typedef struct 322 { 323 NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // IN 324 NvU32 numGpus; // IN 325 NvU64 serverId NV_ALIGN_BYTES(8); // OUT 326 NV_STATUS rmStatus; // OUT 327 } UVM_REGISTER_MPS_SERVER_PARAMS; 328 329 // 330 // UvmRegisterMpsClient 331 // 332 #define UVM_REGISTER_MPS_CLIENT UVM_IOCTL_BASE(19) 333 334 typedef struct 335 { 336 NvU64 serverId NV_ALIGN_BYTES(8); // IN 337 NV_STATUS rmStatus; // OUT 338 } UVM_REGISTER_MPS_CLIENT_PARAMS; 339 340 // 341 // UvmEventGetGpuUuidTable 342 // 343 #define UVM_GET_GPU_UUID_TABLE UVM_IOCTL_BASE(20) 344 345 typedef struct 346 { 347 NvProcessorUuid gpuUuidArray[UVM_MAX_GPUS_V1]; // OUT 348 NvU32 validCount; // OUT 349 NV_STATUS rmStatus; // OUT 350 } UVM_GET_GPU_UUID_TABLE_PARAMS; 351 352 #if defined(WIN32) || defined(WIN64) 353 // 354 // UvmRegionSetBacking 355 // 356 #define UVM_REGION_SET_BACKING UVM_IOCTL_BASE(21) 357 358 typedef struct 359 { 360 NvProcessorUuid gpuUuid; // IN 361 NvU32 hAllocation; // IN 362 NvP64 vaAddr NV_ALIGN_BYTES(8); // IN 363 NvU64 regionLength NV_ALIGN_BYTES(8); // IN 364 NV_STATUS rmStatus; // OUT 365 } UVM_REGION_SET_BACKING_PARAMS; 366 367 // 368 // UvmRegionUnsetBacking 369 // 370 #define UVM_REGION_UNSET_BACKING UVM_IOCTL_BASE(22) 371 372 typedef struct 373 { 374 NvP64 vaAddr NV_ALIGN_BYTES(8); // IN 375 NvU64 regionLength NV_ALIGN_BYTES(8); // IN 376 NV_STATUS rmStatus; // OUT 377 } UVM_REGION_UNSET_BACKING_PARAMS; 378 379 #endif 380 381 #define UVM_CREATE_RANGE_GROUP UVM_IOCTL_BASE(23) 382 383 typedef struct 384 { 385 NvU64 rangeGroupId NV_ALIGN_BYTES(8); // OUT 386 NV_STATUS rmStatus; // OUT 387 } UVM_CREATE_RANGE_GROUP_PARAMS; 388 389 #define UVM_DESTROY_RANGE_GROUP UVM_IOCTL_BASE(24) 390 391 typedef struct 392 { 393 NvU64 rangeGroupId NV_ALIGN_BYTES(8); // IN 394 NV_STATUS rmStatus; // OUT 395 } UVM_DESTROY_RANGE_GROUP_PARAMS; 396 397 // 398 // UvmRegisterGpuVaSpace 399 // 400 #define UVM_REGISTER_GPU_VASPACE UVM_IOCTL_BASE(25) 401 402 typedef struct 403 { 404 NvProcessorUuid gpuUuid; // IN 405 NvS32 rmCtrlFd; // IN 406 NvHandle hClient; // IN 407 NvHandle hVaSpace; // IN 408 NV_STATUS rmStatus; // OUT 409 } UVM_REGISTER_GPU_VASPACE_PARAMS; 410 411 // 412 // UvmUnregisterGpuVaSpace 413 // 414 #define UVM_UNREGISTER_GPU_VASPACE UVM_IOCTL_BASE(26) 415 416 typedef struct 417 { 418 NvProcessorUuid gpuUuid; // IN 419 NV_STATUS rmStatus; // OUT 420 } UVM_UNREGISTER_GPU_VASPACE_PARAMS; 421 422 // 423 // UvmRegisterChannel 424 // 425 #define UVM_REGISTER_CHANNEL UVM_IOCTL_BASE(27) 426 427 typedef struct 428 { 429 NvProcessorUuid gpuUuid; // IN 430 NvS32 rmCtrlFd; // IN 431 NvHandle hClient; // IN 432 NvHandle hChannel; // IN 433 NvU64 base NV_ALIGN_BYTES(8); // IN 434 NvU64 length NV_ALIGN_BYTES(8); // IN 435 NV_STATUS rmStatus; // OUT 436 } UVM_REGISTER_CHANNEL_PARAMS; 437 438 // 439 // UvmUnregisterChannel 440 // 441 #define UVM_UNREGISTER_CHANNEL UVM_IOCTL_BASE(28) 442 443 typedef struct 444 { 445 NvProcessorUuid gpuUuid; // IN 446 NvHandle hClient; // IN 447 NvHandle hChannel; // IN 448 NV_STATUS rmStatus; // OUT 449 } UVM_UNREGISTER_CHANNEL_PARAMS; 450 451 // 452 // UvmEnablePeerAccess 453 // 454 #define UVM_ENABLE_PEER_ACCESS UVM_IOCTL_BASE(29) 455 456 typedef struct 457 { 458 NvProcessorUuid gpuUuidA; // IN 459 NvProcessorUuid gpuUuidB; // IN 460 NV_STATUS rmStatus; // OUT 461 } UVM_ENABLE_PEER_ACCESS_PARAMS; 462 463 // 464 // UvmDisablePeerAccess 465 // 466 #define UVM_DISABLE_PEER_ACCESS UVM_IOCTL_BASE(30) 467 468 typedef struct 469 { 470 NvProcessorUuid gpuUuidA; // IN 471 NvProcessorUuid gpuUuidB; // IN 472 NV_STATUS rmStatus; // OUT 473 } UVM_DISABLE_PEER_ACCESS_PARAMS; 474 475 // 476 // UvmSetRangeGroup 477 // 478 #define UVM_SET_RANGE_GROUP UVM_IOCTL_BASE(31) 479 480 typedef struct 481 { 482 NvU64 rangeGroupId NV_ALIGN_BYTES(8); // IN 483 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 484 NvU64 length NV_ALIGN_BYTES(8); // IN 485 NV_STATUS rmStatus; // OUT 486 } UVM_SET_RANGE_GROUP_PARAMS; 487 488 // 489 // UvmMapExternalAllocation 490 // 491 #define UVM_MAP_EXTERNAL_ALLOCATION UVM_IOCTL_BASE(33) 492 typedef struct 493 { 494 NvU64 base NV_ALIGN_BYTES(8); // IN 495 NvU64 length NV_ALIGN_BYTES(8); // IN 496 NvU64 offset NV_ALIGN_BYTES(8); // IN 497 UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN 498 NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN 499 NvS32 rmCtrlFd; // IN 500 NvU32 hClient; // IN 501 NvU32 hMemory; // IN 502 503 NV_STATUS rmStatus; // OUT 504 } UVM_MAP_EXTERNAL_ALLOCATION_PARAMS; 505 506 // 507 // UvmFree 508 // 509 #define UVM_FREE UVM_IOCTL_BASE(34) 510 typedef struct 511 { 512 NvU64 base NV_ALIGN_BYTES(8); // IN 513 NvU64 length NV_ALIGN_BYTES(8); // IN 514 NV_STATUS rmStatus; // OUT 515 } UVM_FREE_PARAMS; 516 517 // 518 // UvmMemMap 519 // 520 #define UVM_MEM_MAP UVM_IOCTL_BASE(35) 521 522 typedef struct 523 { 524 NvP64 regionBase NV_ALIGN_BYTES(8); // IN 525 NvU64 regionLength NV_ALIGN_BYTES(8); // IN 526 NV_STATUS rmStatus; // OUT 527 } UVM_MEM_MAP_PARAMS; 528 529 // 530 // UvmDebugAccessMemory 531 // 532 #define UVM_DEBUG_ACCESS_MEMORY UVM_IOCTL_BASE(36) 533 534 typedef struct 535 { 536 #ifdef __linux__ 537 NvS32 sessionIndex; // IN 538 #endif 539 NvU64 baseAddress NV_ALIGN_BYTES(8); // IN 540 NvU64 sizeInBytes NV_ALIGN_BYTES(8); // IN 541 NvU32 accessType; // IN (UvmDebugAccessType) 542 NvU64 buffer NV_ALIGN_BYTES(8); // IN/OUT 543 NvBool isBitmaskSet; // OUT 544 NvU64 bitmask NV_ALIGN_BYTES(8); // IN/OUT 545 NV_STATUS rmStatus; // OUT 546 } UVM_DEBUG_ACCESS_MEMORY_PARAMS; 547 548 // 549 // UvmRegisterGpu 550 // 551 #define UVM_REGISTER_GPU UVM_IOCTL_BASE(37) 552 553 typedef struct 554 { 555 NvProcessorUuid gpu_uuid; // IN/OUT 556 NvBool numaEnabled; // OUT 557 NvS32 numaNodeId; // OUT 558 NvS32 rmCtrlFd; // IN 559 NvHandle hClient; // IN 560 NvHandle hSmcPartRef; // IN 561 NV_STATUS rmStatus; // OUT 562 } UVM_REGISTER_GPU_PARAMS; 563 564 // 565 // UvmUnregisterGpu 566 // 567 #define UVM_UNREGISTER_GPU UVM_IOCTL_BASE(38) 568 569 typedef struct 570 { 571 NvProcessorUuid gpu_uuid; // IN 572 NV_STATUS rmStatus; // OUT 573 } UVM_UNREGISTER_GPU_PARAMS; 574 575 #define UVM_PAGEABLE_MEM_ACCESS UVM_IOCTL_BASE(39) 576 577 typedef struct 578 { 579 NvBool pageableMemAccess; // OUT 580 NV_STATUS rmStatus; // OUT 581 } UVM_PAGEABLE_MEM_ACCESS_PARAMS; 582 583 // 584 // Due to limitations in how much we want to send per ioctl call, the numGroupIds 585 // member must be less than or equal to about 250. That's an upper limit. 586 // 587 // However, from a typical user-space driver's point of view (for example, the 588 // CUDA driver), a vast majority of the time, we expect there to be only one 589 // range group passed in. The second most common case is something like atmost 32 590 // range groups being passed in. The cases where there are more than 32 range 591 // groups are the most rare. So we might want to optimize the ioctls accordingly 592 // so that we don't always copy a 250 * sizeof(NvU64) sized array when there's 593 // only one or a few range groups. 594 // 595 // For that reason, UVM_MAX_RANGE_GROUPS_PER_IOCTL_CALL is set to 32. 596 // 597 // If the higher-level (uvm.h) call requires more range groups than 598 // this value, then multiple ioctl calls should be made. 599 // 600 #define UVM_MAX_RANGE_GROUPS_PER_IOCTL_CALL 32 601 602 // 603 // UvmPreventMigrationRangeGroups 604 // 605 #define UVM_PREVENT_MIGRATION_RANGE_GROUPS UVM_IOCTL_BASE(40) 606 607 typedef struct 608 { 609 NvU64 rangeGroupIds[UVM_MAX_RANGE_GROUPS_PER_IOCTL_CALL] NV_ALIGN_BYTES(8); // IN 610 NvU64 numGroupIds NV_ALIGN_BYTES(8); // IN 611 NV_STATUS rmStatus; // OUT 612 } UVM_PREVENT_MIGRATION_RANGE_GROUPS_PARAMS; 613 614 // 615 // UvmAllowMigrationRangeGroups 616 // 617 #define UVM_ALLOW_MIGRATION_RANGE_GROUPS UVM_IOCTL_BASE(41) 618 619 typedef struct 620 { 621 NvU64 rangeGroupIds[UVM_MAX_RANGE_GROUPS_PER_IOCTL_CALL] NV_ALIGN_BYTES(8); // IN 622 NvU64 numGroupIds NV_ALIGN_BYTES(8); // IN 623 NV_STATUS rmStatus; // OUT 624 } UVM_ALLOW_MIGRATION_RANGE_GROUPS_PARAMS; 625 626 // 627 // UvmSetPreferredLocation 628 // 629 #define UVM_SET_PREFERRED_LOCATION UVM_IOCTL_BASE(42) 630 631 typedef struct 632 { 633 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 634 NvU64 length NV_ALIGN_BYTES(8); // IN 635 NvProcessorUuid preferredLocation; // IN 636 NvS32 preferredCpuNumaNode; // IN 637 NV_STATUS rmStatus; // OUT 638 } UVM_SET_PREFERRED_LOCATION_PARAMS; 639 640 // 641 // UvmUnsetPreferredLocation 642 // 643 #define UVM_UNSET_PREFERRED_LOCATION UVM_IOCTL_BASE(43) 644 645 typedef struct 646 { 647 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 648 NvU64 length NV_ALIGN_BYTES(8); // IN 649 NV_STATUS rmStatus; // OUT 650 } UVM_UNSET_PREFERRED_LOCATION_PARAMS; 651 652 // 653 // UvmEnableReadDuplication 654 // 655 #define UVM_ENABLE_READ_DUPLICATION UVM_IOCTL_BASE(44) 656 657 typedef struct 658 { 659 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 660 NvU64 length NV_ALIGN_BYTES(8); // IN 661 NV_STATUS rmStatus; // OUT 662 } UVM_ENABLE_READ_DUPLICATION_PARAMS; 663 664 // 665 // UvmDisableReadDuplication 666 // 667 #define UVM_DISABLE_READ_DUPLICATION UVM_IOCTL_BASE(45) 668 669 typedef struct 670 { 671 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 672 NvU64 length NV_ALIGN_BYTES(8); // IN 673 NV_STATUS rmStatus; // OUT 674 } UVM_DISABLE_READ_DUPLICATION_PARAMS; 675 676 // 677 // UvmSetAccessedBy 678 // 679 #define UVM_SET_ACCESSED_BY UVM_IOCTL_BASE(46) 680 681 typedef struct 682 { 683 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 684 NvU64 length NV_ALIGN_BYTES(8); // IN 685 NvProcessorUuid accessedByUuid; // IN 686 NV_STATUS rmStatus; // OUT 687 } UVM_SET_ACCESSED_BY_PARAMS; 688 689 // 690 // UvmUnsetAccessedBy 691 // 692 #define UVM_UNSET_ACCESSED_BY UVM_IOCTL_BASE(47) 693 694 typedef struct 695 { 696 NvU64 requestedBase NV_ALIGN_BYTES(8); // IN 697 NvU64 length NV_ALIGN_BYTES(8); // IN 698 NvProcessorUuid accessedByUuid; // IN 699 NV_STATUS rmStatus; // OUT 700 } UVM_UNSET_ACCESSED_BY_PARAMS; 701 702 // For managed allocations, UVM_MIGRATE implements the behavior described in 703 // UvmMigrate. If the input virtual range corresponds to system-allocated 704 // pageable memory, and the GPUs in the system support transparent access to 705 // pageable memory, the scheme is a bit more elaborate, potentially with 706 // several transitions betwen user and kernel spaces: 707 // 708 // 1) UVM_MIGRATE with the range base address and size. This will migrate 709 // anonymous vmas until: 710 // a) It finds a file-backed vma or no GPUs are registered in the VA space 711 // so no GPU can drive the copy. It will try to populate the vma using 712 // get_user_pages and return NV_WARN_NOTHING_TO_DO. 713 // b) It fails to allocate memory on the destination CPU node. It will return 714 // NV_ERR_MORE_PROCESSING_REQUIRED. 715 // c) It fails to populate pages directly on the destination GPU. It will try 716 // to populate the vma using get_user_pages and return. 717 // d) The full input range is migrated (or empty), this call will release 718 // the semaphore before returning. 719 // 2) The user-mode needs to handle the following error codes: 720 // a) NV_WARN_NOTHING_TO_DO: use move_pages to migrate pages for the VA 721 // range corresponding to the vma that couldn't be migrated in kernel 722 // mode. Then, it processes the remainder of the range, starting after 723 // that vma. 724 // b) NV_ERR_MORE_PROCESSING_REQUIRED: choose a different CPU NUMA node, 725 // trying to enforce the NUMA policies of the thread and retry the 726 // ioctl. If there are no more CPU NUMA nodes to try, try to populate 727 // the remainder of the range anywhere using the UVM_POPULATE_PAGEABLE 728 // ioctl. 729 // c) NV_OK: success. This only guarantees that pages were populated, not 730 // that they moved to the requested destination. 731 // 3) For cases 2.a) and 2.b) Goto 1 732 // 733 // If UVM_MIGRATE_FLAG_ASYNC is 0, the ioctl won't return until the migration is 734 // done and all mappings are updated, subject to the special rules for pageable 735 // memory described above. semaphoreAddress must be 0. semaphorePayload is 736 // ignored. 737 // 738 // If UVM_MIGRATE_FLAG_ASYNC is 1, the ioctl may return before the migration is 739 // complete. If semaphoreAddress is 0, semaphorePayload is ignored and no 740 // notification will be given on completion. If semaphoreAddress is non-zero 741 // and the returned error code is NV_OK, semaphorePayload will be written to 742 // semaphoreAddress once the migration is complete. 743 #define UVM_MIGRATE_FLAG_ASYNC 0x00000001 744 745 // When the migration destination is the CPU, skip the step which creates new 746 // virtual mappings on the CPU. Creating CPU mappings must wait for the 747 // migration to complete, so skipping this step allows the migration to be 748 // fully asynchronous. This flag is ignored for pageable migrations if the GPUs 749 // in the system support transparent access to pageable memory. 750 // 751 // The UVM driver must have builtin tests enabled for the API to use this flag. 752 #define UVM_MIGRATE_FLAG_SKIP_CPU_MAP 0x00000002 753 754 // By default UVM_MIGRATE returns an error if the destination UUID is a GPU 755 // without a registered GPU VA space. Setting this flag skips that check, so the 756 // destination GPU only needs to have been registered. 757 // 758 // This can be used in tests to trigger migrations of physical memory without 759 // the overhead of GPU PTE mappings. 760 // 761 // The UVM driver must have builtin tests enabled for the API to use this flag. 762 #define UVM_MIGRATE_FLAG_NO_GPU_VA_SPACE 0x00000004 763 764 #define UVM_MIGRATE_FLAGS_TEST_ALL (UVM_MIGRATE_FLAG_SKIP_CPU_MAP | \ 765 UVM_MIGRATE_FLAG_NO_GPU_VA_SPACE) 766 767 #define UVM_MIGRATE_FLAGS_ALL (UVM_MIGRATE_FLAG_ASYNC | \ 768 UVM_MIGRATE_FLAGS_TEST_ALL) 769 770 // If NV_ERR_INVALID_ARGUMENT is returned it is because cpuMemoryNode is not 771 // valid and the destination processor is the CPU. cpuMemoryNode is considered 772 // invalid if: 773 // * it is less than -1, 774 // * it is equal to or larger than the maximum number of nodes, or 775 // * it corresponds to a registered GPU. 776 // * it is not in the node_possible_map set of nodes, 777 // * it does not have onlined memory 778 // 779 // For pageable migrations: 780 // 781 // In addition to the above, in the case of pageable memory, the 782 // cpuMemoryNode is considered invalid if it's -1. 783 // 784 // If NV_WARN_NOTHING_TO_DO is returned, user-space is responsible for 785 // completing the migration of the VA range described by userSpaceStart and 786 // userSpaceLength using move_pages. 787 // 788 // If NV_ERR_MORE_PROCESSING_REQUIRED is returned, user-space is responsible 789 // for re-trying with a different cpuNumaNode, starting at userSpaceStart. 790 // 791 #define UVM_MIGRATE UVM_IOCTL_BASE(51) 792 typedef struct 793 { 794 NvU64 base NV_ALIGN_BYTES(8); // IN 795 NvU64 length NV_ALIGN_BYTES(8); // IN 796 NvProcessorUuid destinationUuid; // IN 797 NvU32 flags; // IN 798 NvU64 semaphoreAddress NV_ALIGN_BYTES(8); // IN 799 NvU32 semaphorePayload; // IN 800 NvS32 cpuNumaNode; // IN 801 NvU64 userSpaceStart NV_ALIGN_BYTES(8); // OUT 802 NvU64 userSpaceLength NV_ALIGN_BYTES(8); // OUT 803 NV_STATUS rmStatus; // OUT 804 } UVM_MIGRATE_PARAMS; 805 806 #define UVM_MIGRATE_RANGE_GROUP UVM_IOCTL_BASE(53) 807 typedef struct 808 { 809 NvU64 rangeGroupId NV_ALIGN_BYTES(8); // IN 810 NvProcessorUuid destinationUuid; // IN 811 NV_STATUS rmStatus; // OUT 812 } UVM_MIGRATE_RANGE_GROUP_PARAMS; 813 814 // 815 // UvmEnableSystemWideAtomics 816 // 817 #define UVM_ENABLE_SYSTEM_WIDE_ATOMICS UVM_IOCTL_BASE(54) 818 819 typedef struct 820 { 821 NvProcessorUuid gpu_uuid; // IN 822 NV_STATUS rmStatus; // OUT 823 } UVM_ENABLE_SYSTEM_WIDE_ATOMICS_PARAMS; 824 825 // 826 // UvmDisableSystemWideAtomics 827 // 828 #define UVM_DISABLE_SYSTEM_WIDE_ATOMICS UVM_IOCTL_BASE(55) 829 830 typedef struct 831 { 832 NvProcessorUuid gpu_uuid; // IN 833 NV_STATUS rmStatus; // OUT 834 } UVM_DISABLE_SYSTEM_WIDE_ATOMICS_PARAMS; 835 836 // 837 // Initialize any tracker object such as a queue or counter 838 // UvmToolsCreateEventQueue, UvmToolsCreateProcessAggregateCounters, 839 // UvmToolsCreateProcessorCounters. 840 // Note that the order of structure elements has the version as the last field. 841 // This is used to tell whether the kernel supports V2 events or not because 842 // the V1 UVM_TOOLS_INIT_EVENT_TRACKER ioctl would not read or update that 843 // field but V2 will. This is needed because it is possible to create an event 844 // queue before CUDA is initialized which means UvmSetDriverVersion() hasn't 845 // been called yet and the kernel version is unknown. 846 // 847 #define UVM_TOOLS_INIT_EVENT_TRACKER UVM_IOCTL_BASE(56) 848 typedef struct 849 { 850 NvU64 queueBuffer NV_ALIGN_BYTES(8); // IN 851 NvU64 queueBufferSize NV_ALIGN_BYTES(8); // IN 852 NvU64 controlBuffer NV_ALIGN_BYTES(8); // IN 853 NvProcessorUuid processor; // IN 854 NvU32 allProcessors; // IN 855 NvU32 uvmFd; // IN 856 NV_STATUS rmStatus; // OUT 857 NvU32 requestedVersion; // IN 858 NvU32 grantedVersion; // OUT 859 } UVM_TOOLS_INIT_EVENT_TRACKER_PARAMS; 860 861 // 862 // UvmToolsSetNotificationThreshold 863 // 864 #define UVM_TOOLS_SET_NOTIFICATION_THRESHOLD UVM_IOCTL_BASE(57) 865 typedef struct 866 { 867 NvU32 notificationThreshold; // IN 868 NV_STATUS rmStatus; // OUT 869 } UVM_TOOLS_SET_NOTIFICATION_THRESHOLD_PARAMS; 870 871 // 872 // UvmToolsEventQueueEnableEvents 873 // 874 #define UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS UVM_IOCTL_BASE(58) 875 typedef struct 876 { 877 NvU64 eventTypeFlags NV_ALIGN_BYTES(8); // IN 878 NV_STATUS rmStatus; // OUT 879 } UVM_TOOLS_EVENT_QUEUE_ENABLE_EVENTS_PARAMS; 880 881 // 882 // UvmToolsEventQueueDisableEvents 883 // 884 #define UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS UVM_IOCTL_BASE(59) 885 typedef struct 886 { 887 NvU64 eventTypeFlags NV_ALIGN_BYTES(8); // IN 888 NV_STATUS rmStatus; // OUT 889 } UVM_TOOLS_EVENT_QUEUE_DISABLE_EVENTS_PARAMS; 890 891 // 892 // UvmToolsEnableCounters 893 // 894 #define UVM_TOOLS_ENABLE_COUNTERS UVM_IOCTL_BASE(60) 895 typedef struct 896 { 897 NvU64 counterTypeFlags NV_ALIGN_BYTES(8); // IN 898 NV_STATUS rmStatus; // OUT 899 } UVM_TOOLS_ENABLE_COUNTERS_PARAMS; 900 901 // 902 // UvmToolsDisableCounters 903 // 904 #define UVM_TOOLS_DISABLE_COUNTERS UVM_IOCTL_BASE(61) 905 typedef struct 906 { 907 NvU64 counterTypeFlags NV_ALIGN_BYTES(8); // IN 908 NV_STATUS rmStatus; // OUT 909 } UVM_TOOLS_DISABLE_COUNTERS_PARAMS; 910 911 // 912 // UvmToolsReadProcessMemory 913 // 914 #define UVM_TOOLS_READ_PROCESS_MEMORY UVM_IOCTL_BASE(62) 915 typedef struct 916 { 917 NvU64 buffer NV_ALIGN_BYTES(8); // IN 918 NvU64 size NV_ALIGN_BYTES(8); // IN 919 NvU64 targetVa NV_ALIGN_BYTES(8); // IN 920 NvU64 bytesRead NV_ALIGN_BYTES(8); // OUT 921 NV_STATUS rmStatus; // OUT 922 } UVM_TOOLS_READ_PROCESS_MEMORY_PARAMS; 923 924 // 925 // UvmToolsWriteProcessMemory 926 // 927 #define UVM_TOOLS_WRITE_PROCESS_MEMORY UVM_IOCTL_BASE(63) 928 typedef struct 929 { 930 NvU64 buffer NV_ALIGN_BYTES(8); // IN 931 NvU64 size NV_ALIGN_BYTES(8); // IN 932 NvU64 targetVa NV_ALIGN_BYTES(8); // IN 933 NvU64 bytesWritten NV_ALIGN_BYTES(8); // OUT 934 NV_STATUS rmStatus; // OUT 935 } UVM_TOOLS_WRITE_PROCESS_MEMORY_PARAMS; 936 937 // 938 // UvmToolsGetProcessorUuidTable 939 // Note that tablePtr != 0 and count == 0 means that tablePtr is assumed to be 940 // an array of size UVM_MAX_PROCESSORS_V1 and that only UvmEventEntry_V1 941 // processor IDs (physical GPU UUIDs) will be reported. 942 // tablePtr == 0 and count == 0 can be used to query how many processors are 943 // present in order to dynamically allocate the correct size array since the 944 // total number of processors is returned in 'count'. 945 // 946 #define UVM_TOOLS_GET_PROCESSOR_UUID_TABLE UVM_IOCTL_BASE(64) 947 typedef struct 948 { 949 NvU64 tablePtr NV_ALIGN_BYTES(8); // IN 950 NvU32 count; // IN/OUT 951 NV_STATUS rmStatus; // OUT 952 NvU32 version; // OUT 953 } UVM_TOOLS_GET_PROCESSOR_UUID_TABLE_PARAMS; 954 955 956 // 957 // UvmMapDynamicParallelismRegion 958 // 959 #define UVM_MAP_DYNAMIC_PARALLELISM_REGION UVM_IOCTL_BASE(65) 960 typedef struct 961 { 962 NvU64 base NV_ALIGN_BYTES(8); // IN 963 NvU64 length NV_ALIGN_BYTES(8); // IN 964 NvProcessorUuid gpuUuid; // IN 965 NV_STATUS rmStatus; // OUT 966 } UVM_MAP_DYNAMIC_PARALLELISM_REGION_PARAMS; 967 968 // 969 // UvmUnmapExternal 970 // 971 #define UVM_UNMAP_EXTERNAL UVM_IOCTL_BASE(66) 972 typedef struct 973 { 974 NvU64 base NV_ALIGN_BYTES(8); // IN 975 NvU64 length NV_ALIGN_BYTES(8); // IN 976 NvProcessorUuid gpuUuid; // IN 977 NV_STATUS rmStatus; // OUT 978 } UVM_UNMAP_EXTERNAL_PARAMS; 979 980 981 // 982 // UvmToolsFlushEvents 983 // 984 #define UVM_TOOLS_FLUSH_EVENTS UVM_IOCTL_BASE(67) 985 typedef struct 986 { 987 NV_STATUS rmStatus; // OUT 988 } UVM_TOOLS_FLUSH_EVENTS_PARAMS; 989 990 // 991 // UvmAllocSemaphorePool 992 // 993 #define UVM_ALLOC_SEMAPHORE_POOL UVM_IOCTL_BASE(68) 994 typedef struct 995 { 996 NvU64 base NV_ALIGN_BYTES(8); // IN 997 NvU64 length NV_ALIGN_BYTES(8); // IN 998 UvmGpuMappingAttributes perGpuAttributes[UVM_MAX_GPUS_V2]; // IN 999 NvU64 gpuAttributesCount NV_ALIGN_BYTES(8); // IN 1000 NV_STATUS rmStatus; // OUT 1001 } UVM_ALLOC_SEMAPHORE_POOL_PARAMS; 1002 1003 // 1004 // UvmCleanUpZombieResources 1005 // 1006 #define UVM_CLEAN_UP_ZOMBIE_RESOURCES UVM_IOCTL_BASE(69) 1007 typedef struct 1008 { 1009 NV_STATUS rmStatus; // OUT 1010 } UVM_CLEAN_UP_ZOMBIE_RESOURCES_PARAMS; 1011 1012 // 1013 // UvmIsPageableMemoryAccessSupportedOnGpu 1014 // 1015 #define UVM_PAGEABLE_MEM_ACCESS_ON_GPU UVM_IOCTL_BASE(70) 1016 1017 typedef struct 1018 { 1019 NvProcessorUuid gpu_uuid; // IN 1020 NvBool pageableMemAccess; // OUT 1021 NV_STATUS rmStatus; // OUT 1022 } UVM_PAGEABLE_MEM_ACCESS_ON_GPU_PARAMS; 1023 1024 // 1025 // UvmPopulatePageable 1026 // 1027 #define UVM_POPULATE_PAGEABLE UVM_IOCTL_BASE(71) 1028 1029 // Allow population of managed ranges. 1030 // 1031 // The UVM driver must have builtin tests enabled for the API to use the 1032 // following two flags. 1033 #define UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED 0x00000001 1034 1035 // By default UVM_POPULATE_PAGEABLE returns an error if the destination vma 1036 // does not have read permission. This flag skips that check. 1037 #define UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK 0x00000002 1038 1039 #define UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL (UVM_POPULATE_PAGEABLE_FLAG_ALLOW_MANAGED | \ 1040 UVM_POPULATE_PAGEABLE_FLAG_SKIP_PROT_CHECK) 1041 1042 #define UVM_POPULATE_PAGEABLE_FLAGS_ALL UVM_POPULATE_PAGEABLE_FLAGS_TEST_ALL 1043 1044 typedef struct 1045 { 1046 NvU64 base NV_ALIGN_BYTES(8); // IN 1047 NvU64 length NV_ALIGN_BYTES(8); // IN 1048 NvU32 flags; // IN 1049 NV_STATUS rmStatus; // OUT 1050 } UVM_POPULATE_PAGEABLE_PARAMS; 1051 1052 // 1053 // UvmValidateVaRange 1054 // 1055 #define UVM_VALIDATE_VA_RANGE UVM_IOCTL_BASE(72) 1056 typedef struct 1057 { 1058 NvU64 base NV_ALIGN_BYTES(8); // IN 1059 NvU64 length NV_ALIGN_BYTES(8); // IN 1060 NV_STATUS rmStatus; // OUT 1061 } UVM_VALIDATE_VA_RANGE_PARAMS; 1062 1063 #define UVM_CREATE_EXTERNAL_RANGE UVM_IOCTL_BASE(73) 1064 typedef struct 1065 { 1066 NvU64 base NV_ALIGN_BYTES(8); // IN 1067 NvU64 length NV_ALIGN_BYTES(8); // IN 1068 NV_STATUS rmStatus; // OUT 1069 } UVM_CREATE_EXTERNAL_RANGE_PARAMS; 1070 1071 #define UVM_MAP_EXTERNAL_SPARSE UVM_IOCTL_BASE(74) 1072 typedef struct 1073 { 1074 NvU64 base NV_ALIGN_BYTES(8); // IN 1075 NvU64 length NV_ALIGN_BYTES(8); // IN 1076 NvProcessorUuid gpuUuid; // IN 1077 NV_STATUS rmStatus; // OUT 1078 } UVM_MAP_EXTERNAL_SPARSE_PARAMS; 1079 1080 // 1081 // Used to initialise a secondary UVM file-descriptor which holds a 1082 // reference on the memory map to prevent it being torn down without 1083 // first notifying UVM. This is achieved by preventing mmap() calls on 1084 // the secondary file-descriptor so that on process exit 1085 // uvm_mm_release() will be called while the memory map is present 1086 // such that UVM can cleanly shutdown the GPU by handling faults 1087 // instead of cancelling them. 1088 // 1089 // This ioctl must be called after the primary file-descriptor has 1090 // been initialised with the UVM_INITIALIZE ioctl. The primary FD 1091 // should be passed in the uvmFd field and the UVM_MM_INITIALIZE ioctl 1092 // will hold a reference on the primary FD. Therefore uvm_release() is 1093 // guaranteed to be called after uvm_mm_release(). 1094 // 1095 // Once this file-descriptor has been closed the UVM context is 1096 // effectively dead and subsequent operations requiring a memory map 1097 // will fail. Calling UVM_MM_INITIALIZE on a context that has already 1098 // been initialized via any FD will return NV_ERR_INVALID_STATE. 1099 // 1100 // Calling this with a non-UVM file-descriptor in uvmFd will return 1101 // NV_ERR_INVALID_ARGUMENT. Calling this on the same file-descriptor 1102 // as UVM_INITIALIZE or more than once on the same FD will return 1103 // NV_ERR_IN_USE. 1104 // 1105 // Not all platforms require this secondary file-descriptor. On those 1106 // platforms NV_WARN_NOTHING_TO_DO will be returned and users may 1107 // close the file-descriptor at anytime. 1108 #define UVM_MM_INITIALIZE UVM_IOCTL_BASE(75) 1109 typedef struct 1110 { 1111 NvS32 uvmFd; // IN 1112 NV_STATUS rmStatus; // OUT 1113 } UVM_MM_INITIALIZE_PARAMS; 1114 1115 // 1116 // Temporary ioctls which should be removed before UVM 8 release 1117 // Number backwards from 2047 - highest custom ioctl function number 1118 // windows can handle. 1119 // 1120 1121 // 1122 // UvmIs8Supported 1123 // 1124 #define UVM_IS_8_SUPPORTED UVM_IOCTL_BASE(2047) 1125 1126 typedef struct 1127 { 1128 NvU32 is8Supported; // OUT 1129 NV_STATUS rmStatus; // OUT 1130 } UVM_IS_8_SUPPORTED_PARAMS; 1131 1132 1133 #ifdef __cplusplus 1134 } 1135 #endif 1136 1137 #endif // _UVM_IOCTL_H 1138