1 /******************************************************************************* 2 Copyright (c) 2015-2020 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 25 // For Pascal, UVM page tree 'depth' maps to hardware as follows: 26 // 27 // UVM depth HW level VA bits 28 // 0 PDE3 48:47 29 // 1 PDE2 46:38 30 // 2 PDE1 37:29 31 // 3 PDE0 (dual 64k/4k PDE, or 2M PTE) 28:21 32 // 4 PTE_64K / PTE_4K 20:16 / 20:12 33 34 #include "uvm_types.h" 35 #include "uvm_forward_decl.h" 36 #include "uvm_global.h" 37 #include "uvm_gpu.h" 38 #include "uvm_mmu.h" 39 #include "uvm_push_macros.h" 40 #include "uvm_pascal_fault_buffer.h" 41 #include "hwref/pascal/gp100/dev_fault.h" 42 #include "hwref/pascal/gp100/dev_fb.h" 43 #include "hwref/pascal/gp100/dev_mmu.h" 44 45 #define MMU_BIG 0 46 #define MMU_SMALL 1 47 48 static NvU32 entries_per_index_pascal(NvU32 depth) 49 { 50 UVM_ASSERT(depth < 5); 51 if (depth == 3) 52 return 2; 53 return 1; 54 } 55 56 static NvLength entry_offset_pascal(NvU32 depth, NvU32 page_size) 57 { 58 UVM_ASSERT(depth < 5); 59 if (page_size == UVM_PAGE_SIZE_4K && depth == 3) 60 return MMU_SMALL; 61 return MMU_BIG; 62 } 63 64 static NvU64 single_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc) 65 { 66 NvU64 pde_bits = 0; 67 68 if (phys_alloc != NULL) { 69 NvU64 address = phys_alloc->addr.address >> NV_MMU_VER2_PDE_ADDRESS_SHIFT; 70 pde_bits |= HWCONST64(_MMU_VER2, PDE, IS_PDE, TRUE) | 71 HWCONST64(_MMU_VER2, PDE, VOL, TRUE); 72 73 switch (phys_alloc->addr.aperture) { 74 case UVM_APERTURE_SYS: 75 pde_bits |= HWCONST64(_MMU_VER2, PDE, APERTURE, SYSTEM_COHERENT_MEMORY) | 76 HWVALUE64(_MMU_VER2, PDE, ADDRESS_SYS, address); 77 break; 78 case UVM_APERTURE_VID: 79 pde_bits |= HWCONST64(_MMU_VER2, PDE, APERTURE, VIDEO_MEMORY) | 80 HWVALUE64(_MMU_VER2, PDE, ADDRESS_VID, address); 81 break; 82 default: 83 UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", phys_alloc->addr.aperture); 84 break; 85 } 86 } 87 88 return pde_bits; 89 } 90 91 static NvU64 big_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc) 92 { 93 NvU64 pde_bits = 0; 94 95 if (phys_alloc != NULL) { 96 NvU64 address = phys_alloc->addr.address >> NV_MMU_VER2_DUAL_PDE_ADDRESS_BIG_SHIFT; 97 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, VOL_BIG, TRUE); 98 99 switch (phys_alloc->addr.aperture) { 100 case UVM_APERTURE_SYS: 101 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, APERTURE_BIG, SYSTEM_COHERENT_MEMORY) | 102 HWVALUE64(_MMU_VER2, DUAL_PDE, ADDRESS_BIG_SYS, address); 103 break; 104 case UVM_APERTURE_VID: 105 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, APERTURE_BIG, VIDEO_MEMORY) | 106 HWVALUE64(_MMU_VER2, DUAL_PDE, ADDRESS_BIG_VID, address); 107 break; 108 default: 109 UVM_ASSERT_MSG(0, "Invalid big aperture %d\n", phys_alloc->addr.aperture); 110 break; 111 } 112 } 113 114 return pde_bits; 115 } 116 117 static NvU64 small_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc) 118 { 119 NvU64 pde_bits = 0; 120 121 if (phys_alloc != NULL) { 122 NvU64 address = phys_alloc->addr.address >> NV_MMU_VER2_DUAL_PDE_ADDRESS_SHIFT; 123 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, VOL_SMALL, TRUE); 124 125 switch (phys_alloc->addr.aperture) { 126 case UVM_APERTURE_SYS: 127 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, APERTURE_SMALL, SYSTEM_COHERENT_MEMORY); 128 pde_bits |= HWVALUE64(_MMU_VER2, DUAL_PDE, ADDRESS_SMALL_SYS, address); 129 break; 130 case UVM_APERTURE_VID: 131 pde_bits |= HWCONST64(_MMU_VER2, DUAL_PDE, APERTURE_SMALL, VIDEO_MEMORY); 132 pde_bits |= HWVALUE64(_MMU_VER2, DUAL_PDE, ADDRESS_SMALL_VID, address); 133 break; 134 default: 135 UVM_ASSERT_MSG(0, "Invalid small aperture %d\n", phys_alloc->addr.aperture); 136 break; 137 } 138 } 139 140 return pde_bits; 141 } 142 143 static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth) 144 { 145 NvU32 entry_count = entries_per_index_pascal(depth); 146 NvU64 *entry_bits = (NvU64 *)entry; 147 148 if (entry_count == 1) { 149 *entry_bits = single_pde_pascal(*phys_allocs); 150 } 151 else if (entry_count == 2) { 152 entry_bits[MMU_BIG] = big_half_pde_pascal(phys_allocs[MMU_BIG]); 153 entry_bits[MMU_SMALL] = small_half_pde_pascal(phys_allocs[MMU_SMALL]); 154 155 // This entry applies to the whole dual PDE but is stored in the lower bits 156 entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER2, DUAL_PDE, IS_PDE, TRUE); 157 } 158 else { 159 UVM_ASSERT_MSG(0, "Invalid number of entries per index: %d\n", entry_count); 160 } 161 } 162 163 static NvLength entry_size_pascal(NvU32 depth) 164 { 165 UVM_ASSERT(depth < 5); 166 if (depth == 3) 167 return 16; 168 else 169 return 8; 170 } 171 172 static NvU32 index_bits_pascal(NvU32 depth, NvU32 page_size) 173 { 174 static const NvU32 bit_widths[] = {2, 9, 9, 8}; 175 // some code paths keep on querying this until they get a 0, meaning only the page offset remains. 176 UVM_ASSERT(depth < 5); 177 if (depth < 4) { 178 return bit_widths[depth]; 179 } 180 else if (depth == 4) { 181 switch (page_size) { 182 case UVM_PAGE_SIZE_4K: 183 return 9; 184 case UVM_PAGE_SIZE_64K: 185 return 5; 186 default: 187 break; 188 } 189 } 190 return 0; 191 } 192 193 static NvU32 num_va_bits_pascal(void) 194 { 195 return 49; 196 } 197 198 static NvLength allocation_size_pascal(NvU32 depth, NvU32 page_size) 199 { 200 UVM_ASSERT(depth < 5); 201 if (depth == 4 && page_size == UVM_PAGE_SIZE_64K) 202 return 256; 203 // depth 0 requires only a 32 byte allocation, but it must be 4k aligned 204 return 4096; 205 } 206 207 static NvU32 page_table_depth_pascal(NvU32 page_size) 208 { 209 if (page_size == UVM_PAGE_SIZE_2M) 210 return 3; 211 else 212 return 4; 213 } 214 215 static NvU32 page_sizes_pascal(void) 216 { 217 return UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | UVM_PAGE_SIZE_4K; 218 } 219 220 static NvU64 unmapped_pte_pascal(NvU32 page_size) 221 { 222 // Setting the privilege bit on an otherwise-zeroed big PTE causes the 223 // corresponding 4k PTEs to be ignored. This allows the invalidation of a 224 // mixed PDE range to be much faster. 225 if (page_size != UVM_PAGE_SIZE_64K) 226 return 0; 227 228 // When VALID == 0, MMU still reads the VOL and PRIV fields. VOL == 1 229 // indicates that the PTE is sparse, so make sure we don't use it. 230 return HWCONST64(_MMU_VER2, PTE, VALID, FALSE) | 231 HWCONST64(_MMU_VER2, PTE, VOL, FALSE) | 232 HWCONST64(_MMU_VER2, PTE, PRIVILEGE, TRUE); 233 } 234 235 static NvU64 make_pte_pascal(uvm_aperture_t aperture, NvU64 address, uvm_prot_t prot, NvU64 flags) 236 { 237 NvU8 aperture_bits = 0; 238 NvU64 pte_bits = 0; 239 240 UVM_ASSERT(prot != UVM_PROT_NONE); 241 UVM_ASSERT((flags & ~UVM_MMU_PTE_FLAGS_MASK) == 0); 242 243 // valid 0:0 244 pte_bits |= HWCONST64(_MMU_VER2, PTE, VALID, TRUE); 245 246 // aperture 2:1 247 if (aperture == UVM_APERTURE_SYS) 248 aperture_bits = NV_MMU_VER2_PTE_APERTURE_SYSTEM_COHERENT_MEMORY; 249 else if (aperture == UVM_APERTURE_VID) 250 aperture_bits = NV_MMU_VER2_PTE_APERTURE_VIDEO_MEMORY; 251 else if (aperture >= UVM_APERTURE_PEER_0 && aperture <= UVM_APERTURE_PEER_7) 252 aperture_bits = NV_MMU_VER2_PTE_APERTURE_PEER_MEMORY; 253 else 254 UVM_ASSERT_MSG(0, "Invalid aperture: %d\n", aperture); 255 256 pte_bits |= HWVALUE64(_MMU_VER2, PTE, APERTURE, aperture_bits); 257 258 // volatile 3:3 259 if (flags & UVM_MMU_PTE_FLAGS_CACHED) 260 pte_bits |= HWCONST64(_MMU_VER2, PTE, VOL, FALSE); 261 else 262 pte_bits |= HWCONST64(_MMU_VER2, PTE, VOL, TRUE); 263 264 // encrypted 4:4 265 pte_bits |= HWCONST64(_MMU_VER2, PTE, ENCRYPTED, FALSE); 266 267 // privilege 5:5 268 pte_bits |= HWCONST64(_MMU_VER2, PTE, PRIVILEGE, FALSE); 269 270 // read only 6:6 271 if (prot == UVM_PROT_READ_ONLY) 272 pte_bits |= HWCONST64(_MMU_VER2, PTE, READ_ONLY, TRUE); 273 else 274 pte_bits |= HWCONST64(_MMU_VER2, PTE, READ_ONLY, FALSE); 275 276 // atomic disable 7:7 277 if (prot == UVM_PROT_READ_WRITE_ATOMIC) 278 pte_bits |= HWCONST64(_MMU_VER2, PTE, ATOMIC_DISABLE, FALSE); 279 else 280 pte_bits |= HWCONST64(_MMU_VER2, PTE, ATOMIC_DISABLE, TRUE); 281 282 address >>= NV_MMU_VER2_PTE_ADDRESS_SHIFT; 283 if (aperture == UVM_APERTURE_SYS) { 284 // sys address 53:8 285 pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_SYS, address); 286 } 287 else { 288 // vid address 32:8 289 pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID, address); 290 291 292 // peer id 35:33 293 if (aperture != UVM_APERTURE_VID) 294 pte_bits |= HWVALUE64(_MMU_VER2, PTE, ADDRESS_VID_PEER, UVM_APERTURE_PEER_ID(aperture)); 295 296 // comptagline 53:36 297 pte_bits |= HWVALUE64(_MMU_VER2, PTE, COMPTAGLINE, 0); 298 } 299 300 pte_bits |= HWVALUE64(_MMU_VER2, PTE, KIND, NV_MMU_PTE_KIND_PITCH); 301 302 return pte_bits; 303 } 304 305 static NvU64 make_sked_reflected_pte_pascal(void) 306 { 307 NvU64 pte_bits = 0; 308 309 pte_bits |= HWCONST64(_MMU_VER2, PTE, VALID, TRUE); 310 pte_bits |= HWVALUE64(_MMU_VER2, PTE, KIND, NV_MMU_PTE_KIND_SMSKED_MESSAGE); 311 312 return pte_bits; 313 } 314 315 static NvU64 make_sparse_pte_pascal(void) 316 { 317 return HWCONST64(_MMU_VER2, PTE, VALID, FALSE) | 318 HWCONST64(_MMU_VER2, PTE, VOL, TRUE); 319 } 320 321 static NvU64 poisoned_pte_pascal(void) 322 { 323 // An invalid PTE won't be fatal from faultable units like SM, which is the 324 // most likely source of bad PTE accesses. 325 326 // Engines with priv accesses won't fault on the priv PTE, so add a backup 327 // mechanism using an impossible memory address. MMU will trigger an 328 // interrupt when it detects a bad physical address. 329 // 330 // This address has to fit within 37 bits (max address width of vidmem) and 331 // be aligned to page_size. 332 NvU64 phys_addr = 0x1bad000000ULL; 333 334 NvU64 pte_bits = make_pte_pascal(UVM_APERTURE_VID, phys_addr, UVM_PROT_READ_ONLY, UVM_MMU_PTE_FLAGS_NONE); 335 return WRITE_HWCONST64(pte_bits, _MMU_VER2, PTE, PRIVILEGE, TRUE); 336 } 337 338 static uvm_mmu_mode_hal_t pascal_mmu_mode_hal = 339 { 340 .make_pte = make_pte_pascal, 341 .make_sked_reflected_pte = make_sked_reflected_pte_pascal, 342 .make_sparse_pte = make_sparse_pte_pascal, 343 .unmapped_pte = unmapped_pte_pascal, 344 .poisoned_pte = poisoned_pte_pascal, 345 .make_pde = make_pde_pascal, 346 .entry_size = entry_size_pascal, 347 .index_bits = index_bits_pascal, 348 .entries_per_index = entries_per_index_pascal, 349 .entry_offset = entry_offset_pascal, 350 .num_va_bits = num_va_bits_pascal, 351 .allocation_size = allocation_size_pascal, 352 .page_table_depth = page_table_depth_pascal, 353 .page_sizes = page_sizes_pascal 354 }; 355 356 uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU32 big_page_size) 357 { 358 UVM_ASSERT(big_page_size == UVM_PAGE_SIZE_64K || big_page_size == UVM_PAGE_SIZE_128K); 359 360 // TODO: Bug 1789555: RM should reject the creation of GPU VA spaces with 361 // 128K big page size for Pascal+ GPUs 362 if (big_page_size == UVM_PAGE_SIZE_128K) 363 return NULL; 364 365 return &pascal_mmu_mode_hal; 366 } 367 368 void uvm_hal_pascal_mmu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu) 369 { 370 volatile NvU32 *prefetch_control; 371 NvU32 prefetch_control_value; 372 373 prefetch_control = parent_gpu->fault_buffer_info.rm_info.replayable.pPrefetchCtrl; 374 375 prefetch_control_value = UVM_GPU_READ_ONCE(*prefetch_control); 376 prefetch_control_value = WRITE_HWCONST(prefetch_control_value, _PFB_PRI_MMU_PAGE, FAULT_CTRL, PRF_FILTER, SEND_ALL); 377 UVM_GPU_WRITE_ONCE(*prefetch_control, prefetch_control_value); 378 } 379 380 void uvm_hal_pascal_mmu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu) 381 { 382 volatile NvU32 *prefetch_control; 383 NvU32 prefetch_control_value; 384 385 prefetch_control = parent_gpu->fault_buffer_info.rm_info.replayable.pPrefetchCtrl; 386 387 prefetch_control_value = UVM_GPU_READ_ONCE(*prefetch_control); 388 prefetch_control_value = WRITE_HWCONST(prefetch_control_value, _PFB_PRI_MMU_PAGE, FAULT_CTRL, PRF_FILTER, SEND_NONE); 389 UVM_GPU_WRITE_ONCE(*prefetch_control, prefetch_control_value); 390 } 391 392 NvU16 uvm_hal_pascal_mmu_client_id_to_utlb_id(NvU16 client_id) 393 { 394 switch (client_id) { 395 case NV_PFAULT_CLIENT_GPC_RAST: 396 case NV_PFAULT_CLIENT_GPC_GCC: 397 case NV_PFAULT_CLIENT_GPC_GPCCS: 398 return UVM_PASCAL_GPC_UTLB_ID_RGG; 399 case NV_PFAULT_CLIENT_GPC_PE_0: 400 case NV_PFAULT_CLIENT_GPC_TPCCS_0: 401 case NV_PFAULT_CLIENT_GPC_L1_0: 402 case NV_PFAULT_CLIENT_GPC_T1_0: 403 case NV_PFAULT_CLIENT_GPC_L1_1: 404 case NV_PFAULT_CLIENT_GPC_T1_1: 405 return UVM_PASCAL_GPC_UTLB_ID_LTP0; 406 case NV_PFAULT_CLIENT_GPC_PE_1: 407 case NV_PFAULT_CLIENT_GPC_TPCCS_1: 408 case NV_PFAULT_CLIENT_GPC_L1_2: 409 case NV_PFAULT_CLIENT_GPC_T1_2: 410 case NV_PFAULT_CLIENT_GPC_L1_3: 411 case NV_PFAULT_CLIENT_GPC_T1_3: 412 return UVM_PASCAL_GPC_UTLB_ID_LTP1; 413 case NV_PFAULT_CLIENT_GPC_PE_2: 414 case NV_PFAULT_CLIENT_GPC_TPCCS_2: 415 case NV_PFAULT_CLIENT_GPC_L1_4: 416 case NV_PFAULT_CLIENT_GPC_T1_4: 417 case NV_PFAULT_CLIENT_GPC_L1_5: 418 case NV_PFAULT_CLIENT_GPC_T1_5: 419 return UVM_PASCAL_GPC_UTLB_ID_LTP2; 420 case NV_PFAULT_CLIENT_GPC_PE_3: 421 case NV_PFAULT_CLIENT_GPC_TPCCS_3: 422 case NV_PFAULT_CLIENT_GPC_L1_6: 423 case NV_PFAULT_CLIENT_GPC_T1_6: 424 case NV_PFAULT_CLIENT_GPC_L1_7: 425 case NV_PFAULT_CLIENT_GPC_T1_7: 426 return UVM_PASCAL_GPC_UTLB_ID_LTP3; 427 case NV_PFAULT_CLIENT_GPC_PE_4: 428 case NV_PFAULT_CLIENT_GPC_TPCCS_4: 429 case NV_PFAULT_CLIENT_GPC_L1_8: 430 case NV_PFAULT_CLIENT_GPC_T1_8: 431 case NV_PFAULT_CLIENT_GPC_L1_9: 432 case NV_PFAULT_CLIENT_GPC_T1_9: 433 return UVM_PASCAL_GPC_UTLB_ID_LTP4; 434 default: 435 UVM_ASSERT_MSG(false, "Invalid client value: 0x%x\n", client_id); 436 } 437 438 return 0; 439 } 440