1 /******************************************************************************* 2 Copyright (c) 2016-2023 NVIDIA Corporation 3 4 Permission is hereby granted, free of charge, to any person obtaining a copy 5 of this software and associated documentation files (the "Software"), to 6 deal in the Software without restriction, including without limitation the 7 rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 8 sell copies of the Software, and to permit persons to whom the Software is 9 furnished to do so, subject to the following conditions: 10 11 The above copyright notice and this permission notice shall be 12 included in all copies or substantial portions of the Software. 13 14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 DEALINGS IN THE SOFTWARE. 21 22 *******************************************************************************/ 23 24 #include "uvm_linux.h" 25 #include "uvm_global.h" 26 #include "uvm_gpu.h" 27 #include "uvm_hal.h" 28 #include "uvm_conf_computing.h" 29 #include "nv_uvm_types.h" 30 #include "hwref/volta/gv100/dev_fault.h" 31 #include "hwref/volta/gv100/dev_fb.h" 32 #include "clc369.h" 33 #include "uvm_volta_fault_buffer.h" 34 35 typedef struct { 36 NvU8 bufferEntry[NVC369_BUF_SIZE]; 37 } fault_buffer_entry_c369_t; 38 39 NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu) 40 { 41 NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut); 42 NvU32 index = READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, PTR); 43 UVM_ASSERT(READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, GETPTR_CORRUPTED) == 44 NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_NO); 45 46 return index; 47 } 48 49 NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu) 50 { 51 NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet); 52 UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults); 53 54 return READ_HWVALUE(get, _PFB_PRI_MMU, FAULT_BUFFER_GET, PTR); 55 } 56 57 void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index) 58 { 59 NvU32 get = HWVALUE(_PFB_PRI_MMU, FAULT_BUFFER_GET, PTR, index); 60 61 UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults); 62 63 // If HW has detected an overflow condition (PUT == GET - 1 and a fault has 64 // arrived, which is dropped due to no more space in the fault buffer), it 65 // will not deliver any more faults into the buffer until the overflow 66 // condition has been cleared. The overflow condition is cleared by 67 // updating the GET index to indicate space in the buffer and writing 1 to 68 // the OVERFLOW bit in GET. Unfortunately, this can not be done in the same 69 // write because it can collide with an arriving fault on the same cycle, 70 // resulting in the overflow condition being instantly reasserted. However, 71 // if the index is updated first and then the OVERFLOW bit is cleared such 72 // a collision will not cause a reassertion of the overflow condition. 73 UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get); 74 75 // Clearing GETPTR_CORRUPTED and OVERFLOW is not needed when GSP-RM owns 76 // the HW replayable fault buffer, because UVM does not write to the actual 77 // GET register; GSP-RM is responsible for clearing the bits in the real 78 // GET register. 79 if (!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu)) 80 return; 81 82 // Clear the GETPTR_CORRUPTED and OVERFLOW bits. 83 get |= HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, GETPTR_CORRUPTED, CLEAR) | 84 HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, OVERFLOW, CLEAR); 85 UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get); 86 } 87 88 // TODO: Bug 1835884: [uvm] Query the maximum number of subcontexts from RM 89 // ... to validate the ve_id 90 #define MAX_SUBCONTEXTS 64 91 NvU8 uvm_hal_volta_fault_buffer_get_ve_id(NvU16 mmu_engine_id, uvm_mmu_engine_type_t mmu_engine_type) 92 { 93 // Only graphics engines can generate MMU faults from different subcontexts 94 if (mmu_engine_type == UVM_MMU_ENGINE_TYPE_GRAPHICS) { 95 NvU16 ve_id = mmu_engine_id - NV_PFAULT_MMU_ENG_ID_GRAPHICS; 96 UVM_ASSERT(ve_id < MAX_SUBCONTEXTS); 97 98 return (NvU8)ve_id; 99 } 100 else { 101 return 0; 102 } 103 } 104 105 static uvm_fault_access_type_t get_fault_access_type(const NvU32 *fault_entry) 106 { 107 NvU32 hw_access_type_value = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, ACCESS_TYPE); 108 109 switch (hw_access_type_value) 110 { 111 case NV_PFAULT_ACCESS_TYPE_PHYS_READ: 112 case NV_PFAULT_ACCESS_TYPE_VIRT_READ: 113 return UVM_FAULT_ACCESS_TYPE_READ; 114 case NV_PFAULT_ACCESS_TYPE_PHYS_WRITE: 115 case NV_PFAULT_ACCESS_TYPE_VIRT_WRITE: 116 return UVM_FAULT_ACCESS_TYPE_WRITE; 117 case NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC: 118 case NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG: 119 return UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG; 120 case NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK: 121 return UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK; 122 case NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH: 123 case NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH: 124 return UVM_FAULT_ACCESS_TYPE_PREFETCH; 125 } 126 127 UVM_ASSERT_MSG(false, "Invalid fault access type value: %d\n", hw_access_type_value); 128 129 return UVM_FAULT_ACCESS_TYPE_COUNT; 130 } 131 132 static bool is_fault_address_virtual(const NvU32 *fault_entry) 133 { 134 NvU32 hw_access_type_value = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, ACCESS_TYPE); 135 136 switch (hw_access_type_value) 137 { 138 case NV_PFAULT_ACCESS_TYPE_PHYS_READ: 139 case NV_PFAULT_ACCESS_TYPE_PHYS_WRITE: 140 case NV_PFAULT_ACCESS_TYPE_PHYS_ATOMIC: 141 case NV_PFAULT_ACCESS_TYPE_PHYS_PREFETCH: 142 return false; 143 case NV_PFAULT_ACCESS_TYPE_VIRT_READ: 144 case NV_PFAULT_ACCESS_TYPE_VIRT_WRITE: 145 case NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_STRONG: 146 case NV_PFAULT_ACCESS_TYPE_VIRT_ATOMIC_WEAK: 147 case NV_PFAULT_ACCESS_TYPE_VIRT_PREFETCH: 148 return true; 149 } 150 151 UVM_ASSERT_MSG(false, "Invalid fault access type value: %d\n", hw_access_type_value); 152 153 return UVM_FAULT_ACCESS_TYPE_COUNT; 154 } 155 156 uvm_fault_type_t uvm_hal_volta_fault_buffer_get_fault_type(const NvU32 *fault_entry) 157 { 158 NvU32 hw_fault_type_value = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, FAULT_TYPE); 159 160 switch (hw_fault_type_value) 161 { 162 case NV_PFAULT_FAULT_TYPE_PDE: 163 return UVM_FAULT_TYPE_INVALID_PDE; 164 case NV_PFAULT_FAULT_TYPE_PTE: 165 return UVM_FAULT_TYPE_INVALID_PTE; 166 case NV_PFAULT_FAULT_TYPE_RO_VIOLATION: 167 return UVM_FAULT_TYPE_WRITE; 168 case NV_PFAULT_FAULT_TYPE_ATOMIC_VIOLATION: 169 return UVM_FAULT_TYPE_ATOMIC; 170 case NV_PFAULT_FAULT_TYPE_WO_VIOLATION: 171 return UVM_FAULT_TYPE_READ; 172 173 case NV_PFAULT_FAULT_TYPE_PDE_SIZE: 174 return UVM_FAULT_TYPE_PDE_SIZE; 175 case NV_PFAULT_FAULT_TYPE_VA_LIMIT_VIOLATION: 176 return UVM_FAULT_TYPE_VA_LIMIT_VIOLATION; 177 case NV_PFAULT_FAULT_TYPE_UNBOUND_INST_BLOCK: 178 return UVM_FAULT_TYPE_UNBOUND_INST_BLOCK; 179 case NV_PFAULT_FAULT_TYPE_PRIV_VIOLATION: 180 return UVM_FAULT_TYPE_PRIV_VIOLATION; 181 case NV_PFAULT_FAULT_TYPE_PITCH_MASK_VIOLATION: 182 return UVM_FAULT_TYPE_PITCH_MASK_VIOLATION; 183 case NV_PFAULT_FAULT_TYPE_WORK_CREATION: 184 return UVM_FAULT_TYPE_WORK_CREATION; 185 case NV_PFAULT_FAULT_TYPE_UNSUPPORTED_APERTURE: 186 return UVM_FAULT_TYPE_UNSUPPORTED_APERTURE; 187 case NV_PFAULT_FAULT_TYPE_COMPRESSION_FAILURE: 188 return UVM_FAULT_TYPE_COMPRESSION_FAILURE; 189 case NV_PFAULT_FAULT_TYPE_UNSUPPORTED_KIND: 190 return UVM_FAULT_TYPE_UNSUPPORTED_KIND; 191 case NV_PFAULT_FAULT_TYPE_REGION_VIOLATION: 192 return UVM_FAULT_TYPE_REGION_VIOLATION; 193 case NV_PFAULT_FAULT_TYPE_POISONED: 194 return UVM_FAULT_TYPE_POISONED; 195 } 196 197 UVM_ASSERT_MSG(false, "Invalid fault type value: %d\n", hw_fault_type_value); 198 199 return UVM_FAULT_TYPE_COUNT; 200 } 201 202 static uvm_fault_client_type_t get_fault_client_type(const NvU32 *fault_entry) 203 { 204 NvU32 hw_client_type_value = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, MMU_CLIENT_TYPE); 205 206 switch (hw_client_type_value) 207 { 208 case NV_PFAULT_MMU_CLIENT_TYPE_GPC: 209 return UVM_FAULT_CLIENT_TYPE_GPC; 210 case NV_PFAULT_MMU_CLIENT_TYPE_HUB: 211 return UVM_FAULT_CLIENT_TYPE_HUB; 212 } 213 214 UVM_ASSERT_MSG(false, "Invalid mmu client type value: %d\n", hw_client_type_value); 215 216 return UVM_FAULT_CLIENT_TYPE_COUNT; 217 } 218 219 static uvm_aperture_t get_fault_inst_aperture(const NvU32 *fault_entry) 220 { 221 NvU32 hw_aperture_value = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, INST_APERTURE); 222 223 switch (hw_aperture_value) 224 { 225 case NVC369_BUF_ENTRY_INST_APERTURE_VID_MEM: 226 return UVM_APERTURE_VID; 227 case NVC369_BUF_ENTRY_INST_APERTURE_SYS_MEM_COHERENT: 228 case NVC369_BUF_ENTRY_INST_APERTURE_SYS_MEM_NONCOHERENT: 229 return UVM_APERTURE_SYS; 230 } 231 232 UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value); 233 234 return UVM_APERTURE_MAX; 235 } 236 237 static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index) 238 { 239 fault_buffer_entry_c369_t *buffer_start; 240 NvU32 *fault_entry; 241 242 UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults); 243 244 buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress; 245 fault_entry = (NvU32 *)&buffer_start[index]; 246 247 return fault_entry; 248 } 249 250 // See uvm_pascal_fault_buffer.c::get_fault_buffer_entry_metadata 251 static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t *parent_gpu, NvU32 index) 252 { 253 UvmFaultMetadataPacket *fault_entry_metadata; 254 255 UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults); 256 UVM_ASSERT(!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu)); 257 258 fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata; 259 UVM_ASSERT(fault_entry_metadata != NULL); 260 261 return fault_entry_metadata + index; 262 } 263 264 static void parse_fault_entry_common(uvm_parent_gpu_t *parent_gpu, 265 NvU32 *fault_entry, 266 uvm_fault_buffer_entry_t *buffer_entry) 267 { 268 NvU64 addr_hi, addr_lo; 269 NvU64 timestamp_hi, timestamp_lo; 270 bool replayable_fault_enabled; 271 272 addr_hi = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, INST_HI); 273 addr_lo = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, INST_LO); 274 buffer_entry->instance_ptr.address = addr_lo + (addr_hi << HWSIZE_MW(C369, BUF_ENTRY, INST_LO)); 275 // HW value contains the 4K page number. Shift to build the full address 276 buffer_entry->instance_ptr.address <<= 12; 277 278 buffer_entry->instance_ptr.aperture = get_fault_inst_aperture(fault_entry); 279 280 addr_hi = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, ADDR_HI); 281 addr_lo = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, ADDR_LO); 282 // HW value contains the 4K page number. Shift to build the full address 283 buffer_entry->fault_address = (addr_lo + (addr_hi << HWSIZE_MW(C369, BUF_ENTRY, ADDR_LO))) << 12; 284 buffer_entry->fault_address = uvm_parent_gpu_canonical_address(parent_gpu, buffer_entry->fault_address); 285 286 timestamp_hi = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, TIMESTAMP_HI); 287 timestamp_lo = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, TIMESTAMP_LO); 288 buffer_entry->timestamp = timestamp_lo + (timestamp_hi << HWSIZE_MW(C369, BUF_ENTRY, TIMESTAMP_LO)); 289 290 buffer_entry->fault_type = parent_gpu->fault_buffer_hal->get_fault_type(fault_entry); 291 292 buffer_entry->fault_access_type = get_fault_access_type(fault_entry); 293 294 buffer_entry->fault_source.client_type = get_fault_client_type(fault_entry); 295 296 buffer_entry->fault_source.client_id = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, CLIENT); 297 BUILD_BUG_ON(sizeof(buffer_entry->fault_source.client_id) * 8 < DRF_SIZE_MW(NVC369_BUF_ENTRY_CLIENT)); 298 299 buffer_entry->fault_source.gpc_id = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, GPC_ID); 300 BUILD_BUG_ON(sizeof(buffer_entry->fault_source.gpc_id) * 8 < DRF_SIZE_MW(NVC369_BUF_ENTRY_GPC_ID)); 301 302 buffer_entry->is_replayable = (READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, REPLAYABLE_FAULT) == 303 NVC369_BUF_ENTRY_REPLAYABLE_FAULT_TRUE); 304 305 // Compute global uTLB id 306 if (buffer_entry->fault_source.client_type == UVM_FAULT_CLIENT_TYPE_GPC) { 307 NvU16 gpc_utlb_id = parent_gpu->arch_hal->mmu_client_id_to_utlb_id(buffer_entry->fault_source.client_id); 308 NvU32 utlb_id; 309 UVM_ASSERT(gpc_utlb_id < parent_gpu->utlb_per_gpc_count); 310 311 utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id; 312 UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count); 313 314 buffer_entry->fault_source.utlb_id = utlb_id; 315 } 316 else if (buffer_entry->fault_source.client_type == UVM_FAULT_CLIENT_TYPE_HUB) { 317 buffer_entry->fault_source.utlb_id = 0; 318 } 319 320 buffer_entry->fault_source.mmu_engine_id = READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, ENGINE_ID); 321 BUILD_BUG_ON(sizeof(buffer_entry->fault_source.mmu_engine_id) * 8 < DRF_SIZE_MW(NVC369_BUF_ENTRY_ENGINE_ID)); 322 323 buffer_entry->fault_source.mmu_engine_type = 324 parent_gpu->arch_hal->mmu_engine_id_to_type(buffer_entry->fault_source.mmu_engine_id); 325 326 buffer_entry->fault_source.ve_id = 327 parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->fault_source.mmu_engine_id, 328 buffer_entry->fault_source.mmu_engine_type); 329 BUILD_BUG_ON(1 << (sizeof(buffer_entry->fault_source.ve_id) * 8) < MAX_SUBCONTEXTS); 330 331 buffer_entry->is_virtual = is_fault_address_virtual(fault_entry); 332 333 buffer_entry->in_protected_mode = (READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, PROTECTED_MODE) == 334 NVC369_BUF_ENTRY_PROTECTED_MODE_TRUE); 335 336 replayable_fault_enabled = (READ_HWVALUE_MW(fault_entry, C369, BUF_ENTRY, REPLAYABLE_FAULT_EN) == 337 NVC369_BUF_ENTRY_REPLAYABLE_FAULT_EN_TRUE); 338 UVM_ASSERT_MSG(replayable_fault_enabled, "Fault with REPLAYABLE_FAULT_EN bit unset\n"); 339 } 340 341 NV_STATUS uvm_hal_volta_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *parent_gpu, 342 NvU32 index, 343 uvm_fault_buffer_entry_t *buffer_entry) 344 { 345 fault_buffer_entry_c369_t entry; 346 NvU32 *fault_entry; 347 348 BUILD_BUG_ON(sizeof(entry) > UVM_GPU_MMU_MAX_FAULT_PACKET_SIZE); 349 350 // Valid bit must be set before this function is called 351 UVM_ASSERT(parent_gpu->fault_buffer_hal->entry_is_valid(parent_gpu, index)); 352 353 fault_entry = get_fault_buffer_entry(parent_gpu, index); 354 355 // When Confidential Computing is enabled, faults are encrypted by RM, so 356 // they need to be decrypted before they can be parsed 357 if (!uvm_parent_gpu_replayable_fault_buffer_is_uvm_owned(parent_gpu)) { 358 NV_STATUS status; 359 UvmFaultMetadataPacket *fault_entry_metadata = get_fault_buffer_entry_metadata(parent_gpu, index); 360 361 status = uvm_conf_computing_fault_decrypt(parent_gpu, 362 &entry, 363 fault_entry, 364 fault_entry_metadata->authTag, 365 fault_entry_metadata->valid); 366 if (status != NV_OK) { 367 uvm_global_set_fatal_error(status); 368 return status; 369 } 370 371 fault_entry = (NvU32 *) &entry; 372 } 373 374 parse_fault_entry_common(parent_gpu, fault_entry, buffer_entry); 375 376 UVM_ASSERT(buffer_entry->is_replayable); 377 378 // Automatically clear valid bit for the entry in the fault buffer 379 parent_gpu->fault_buffer_hal->entry_clear_valid(parent_gpu, index); 380 381 return NV_OK; 382 } 383 384 void uvm_hal_volta_fault_buffer_parse_non_replayable_entry(uvm_parent_gpu_t *parent_gpu, 385 void *fault_packet, 386 uvm_fault_buffer_entry_t *buffer_entry) 387 { 388 parse_fault_entry_common(parent_gpu, fault_packet, buffer_entry); 389 390 // No need to clear the valid bit since the fault buffer for non-replayable 391 // faults is owned by RM and we are just parsing a copy of the packet 392 UVM_ASSERT(!buffer_entry->is_replayable); 393 } 394