1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2016-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "core/core.h" 25 #include "gpu/gpu.h" 26 #include "gpu/bus/kern_bus.h" 27 #include "gpu/mem_mgr/mem_mgr.h" 28 #include "gpu/mem_sys/kern_mem_sys.h" 29 #include "os/os.h" 30 31 // @ref busMigrateBarMapping_GV100 to see how FB region is organized 32 #define COHERENT_CPU_MAPPING_WPR COHERENT_CPU_MAPPING_REGION_0 33 34 /*! 35 * @brief Sets up a memdesc and a CPU pointer to the bottom 36 * of FB that will be used for issuing reads in order 37 * to flush pending writes to FB. 38 * 39 * @param[in] pGpu 40 * @param[in] pKernelBus 41 * 42 * @returns NV_OK on success 43 */ 44 NV_STATUS 45 kbusSetupCpuPointerForBusFlush_GV100 46 ( 47 OBJGPU *pGpu, 48 KernelBus *pKernelBus 49 ) 50 { 51 NV_STATUS status = NV_OK; 52 53 // Nothing to be done in paravirtualized guest (or CC) or if we don't want to do CPU reads for flushing. 54 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || 55 !kbusIsReadCpuPointerToFlushEnabled(pKernelBus)) 56 { 57 return NV_OK; 58 } 59 60 NV_ASSERT_OR_RETURN(!kbusIsBarAccessBlocked(pKernelBus), NV_ERR_INVALID_STATE); 61 62 status = memdescCreate(&pKernelBus->pFlushMemDesc, pGpu, 63 RM_PAGE_SIZE, 64 RM_PAGE_SIZE, 65 NV_TRUE, 66 ADDR_FBMEM, 67 NV_MEMORY_UNCACHED, 68 MEMDESC_FLAGS_LOST_ON_SUSPEND); 69 NV_ASSERT_OR_GOTO(status == NV_OK, cleanup); 70 71 // Allocate memory from reserved heap for flush 72 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_54, 73 pKernelBus->pFlushMemDesc); 74 NV_ASSERT_OR_GOTO(status == NV_OK, cleanup); 75 76 // 77 // Please note this is a long-lived BAR2 mapping by design. 78 // The mapping is used for flushing all future vidmem writes on BAR2. 79 // 80 pKernelBus->pReadToFlush = memdescMapInternal(pGpu, 81 pKernelBus->pFlushMemDesc, 82 TRANSFER_FLAGS_PERSISTENT_CPU_MAPPING); 83 if (pKernelBus->pReadToFlush == NULL) 84 { 85 status = NV_ERR_INSUFFICIENT_RESOURCES; 86 NV_ASSERT_OR_GOTO(pKernelBus->pReadToFlush != NULL, cleanup); 87 } 88 89 return NV_OK; 90 cleanup: 91 kbusDestroyCpuPointerForBusFlush_HAL(pGpu, pKernelBus); 92 return status; 93 } 94 95 /*! 96 * @brief Destroys the memdesc and frees the CPU pointer to the bottom of 97 * FB that was used for issuing reads in order to trigger bus flushes. 98 * 99 * @param[in] pGpu 100 * @param[in] pKernelBus 101 * 102 * @returns 103 */ 104 void 105 kbusDestroyCpuPointerForBusFlush_GV100 106 ( 107 OBJGPU *pGpu, 108 KernelBus *pKernelBus 109 ) 110 { 111 if (pKernelBus->pReadToFlush != NULL) 112 { 113 memdescUnmapInternal(pGpu, 114 pKernelBus->pFlushMemDesc, 115 TRANSFER_FLAGS_DEFER_FLUSH); 116 pKernelBus->pReadToFlush = NULL; 117 } 118 119 memdescFree(pKernelBus->pFlushMemDesc); 120 memdescDestroy(pKernelBus->pFlushMemDesc); 121 pKernelBus->pFlushMemDesc = NULL; 122 } 123 124 /** 125 * Helper function to map coherent cpu mapping. 126 * 127 * @param[in] pGpu Pointer to GPU 128 * @param[in] pKernelBus Kernel bus pointer 129 * @param[in] pMemDesc Pointer to memdesc that is to be mapped. 130 * 131 * @return cpu pointer if success 132 * NULL on other errors 133 */ 134 NvU8* 135 kbusMapCoherentCpuMapping_GV100 136 ( 137 OBJGPU *pGpu, 138 KernelBus *pKernelBus, 139 PMEMORY_DESCRIPTOR pMemDesc 140 ) 141 { 142 RmPhysAddr startAddr = memdescGetPhysAddr(pMemDesc, FORCE_VMMU_TRANSLATION(pMemDesc, AT_GPU), 0); 143 NvU64 size = memdescGetSize(pMemDesc); 144 RmPhysAddr endAddr = startAddr + size - 1; 145 RmPhysAddr rangeStart = 0; 146 RmPhysAddr rangeEnd = 0; 147 RmPhysAddr offset = 0; 148 NvU32 i = 0; 149 150 for (i = COHERENT_CPU_MAPPING_REGION_0; i < pKernelBus->coherentCpuMapping.nrMapping; ++i) 151 { 152 // Check if requested mem in the mappings. 153 rangeStart = pKernelBus->coherentCpuMapping.physAddr[i]; 154 rangeEnd = pKernelBus->coherentCpuMapping.physAddr[i] + pKernelBus->coherentCpuMapping.size[i] - 1; 155 offset = 0; 156 157 if (rangeStart <= startAddr && endAddr <= rangeEnd) 158 { 159 NV_ASSERT_OR_RETURN( 160 pKernelBus->coherentCpuMapping.pCpuMapping[i] != NvP64_NULL, NvP64_NULL); 161 162 // Get the offset of the region 163 offset = startAddr - pKernelBus->coherentCpuMapping.physAddr[i]; 164 pKernelBus->coherentCpuMapping.refcnt[i]++; 165 return (NvU8 *)NvP64_VALUE( 166 ((NvUPtr)pKernelBus->coherentCpuMapping.pCpuMapping[i] + 167 (NvUPtr)offset)); 168 } 169 } 170 171 NV_ASSERT_FAILED("No mappings found"); 172 return NvP64_NULL; 173 } 174 175 /** 176 * Helper function to unmap coherent cpu mapping 177 * 178 * @param[in] pGpu Pointer to GPU 179 * @param[in] pKernelBus Kernel bus pointer 180 * @param[in] pMemDesc Pointer to memdesc 181 * 182 * @return void 183 */ 184 void 185 kbusUnmapCoherentCpuMapping_GV100 186 ( 187 OBJGPU *pGpu, 188 KernelBus *pKernelBus, 189 PMEMORY_DESCRIPTOR pMemDesc 190 ) 191 { 192 RmPhysAddr startAddr = memdescGetPhysAddr(pMemDesc, FORCE_VMMU_TRANSLATION(pMemDesc, AT_GPU), 0); 193 NvU64 size = memdescGetSize(pMemDesc); 194 RmPhysAddr endAddr = startAddr + size - 1; 195 NvU32 i = 0; 196 197 NV_ASSERT(pMemDesc->_flags & MEMDESC_FLAGS_PHYSICALLY_CONTIGUOUS); 198 199 for (i = COHERENT_CPU_MAPPING_REGION_0; i < pKernelBus->coherentCpuMapping.nrMapping; ++i) 200 { 201 RmPhysAddr rangeStart = pKernelBus->coherentCpuMapping.physAddr[i]; 202 RmPhysAddr rangeEnd = pKernelBus->coherentCpuMapping.physAddr[i] + 203 pKernelBus->coherentCpuMapping.size[i] - 1; 204 if (rangeStart <= startAddr && endAddr <= rangeEnd) 205 { 206 NV_ASSERT_OR_RETURN_VOID(pKernelBus->coherentCpuMapping.refcnt[i] != 0); 207 pKernelBus->coherentCpuMapping.refcnt[i]--; 208 break; 209 } 210 } 211 212 if (i == pKernelBus->coherentCpuMapping.nrMapping) 213 { 214 NV_ASSERT_FAILED("No mappings found"); 215 } 216 217 // Flush the memory since caller writes to the FB 218 kbusFlush_HAL(pGpu, GPU_GET_KERNEL_BUS(pGpu), BUS_FLUSH_VIDEO_MEMORY); 219 220 return; 221 } 222 /** 223 * Destroy coherent cpu mapping to ACR region. 224 * 225 * This needs to be done only for P9 and not SHH. In SHH, CPU prefetches 226 * to WPR region because of the CPU mapping doesn't result in SW visible error 227 * unlike P9. 228 * 229 * @param[in] pGpu Pointer to Gpu 230 * @param[in] pKernelBus Kernel bus pointer 231 * 232 * @return void 233 */ 234 void kbusTeardownCoherentCpuMappingAcr_GV100 235 ( 236 OBJGPU *pGpu, 237 KernelBus *pKernelBus 238 ) 239 { 240 if (pKernelBus->coherentCpuMapping.bCoherentCpuMapping) 241 { 242 NV_ASSERT_OR_RETURN_VOID(pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING)); 243 NV_ASSERT_OR_RETURN_VOID( pKernelBus->coherentCpuMapping.refcnt[COHERENT_CPU_MAPPING_WPR] == 0); 244 245 osFlushGpuCoherentCpuCacheRange(pGpu->pOsGpuInfo, 246 (NvUPtr)pKernelBus->coherentCpuMapping.pCpuMapping[COHERENT_CPU_MAPPING_WPR], 247 pKernelBus->coherentCpuMapping.size[COHERENT_CPU_MAPPING_WPR]); 248 249 osUnmapPciMemoryKernel64(pGpu, 250 pKernelBus->coherentCpuMapping.pCpuMapping[COHERENT_CPU_MAPPING_WPR]); 251 pKernelBus->coherentCpuMapping.pCpuMapping[COHERENT_CPU_MAPPING_WPR] = NvP64_NULL; 252 } 253 } 254 255 /*! 256 * @brief Destroy coherent cpu mapping. 257 * 258 * @param[in] pGpu OBJGPU pointer 259 * @param[in] pKernelBus Kernel bus pointer 260 * @param[in] bFlush Flush CPU cache or not 261 * 262 * @return 'NV_OK' if successful, an RM error code otherwise. 263 */ 264 void 265 kbusTeardownCoherentCpuMapping_GV100 266 ( 267 OBJGPU *pGpu, 268 KernelBus *pKernelBus, 269 NvBool bFlush 270 ) 271 { 272 NvU32 i = 0; 273 274 if (!pKernelBus->coherentCpuMapping.bCoherentCpuMapping) 275 return; 276 277 for (i = COHERENT_CPU_MAPPING_REGION_0; i < pKernelBus->coherentCpuMapping.nrMapping; ++i) 278 { 279 NV_ASSERT_OR_RETURN_VOID(pKernelBus->coherentCpuMapping.refcnt[i] == 0); 280 281 if (pKernelBus->coherentCpuMapping.pCpuMapping[i] != NvP64_NULL) 282 { 283 if (bFlush) 284 { 285 osFlushGpuCoherentCpuCacheRange(pGpu->pOsGpuInfo, 286 (NvUPtr)pKernelBus->coherentCpuMapping.pCpuMapping[i], 287 pKernelBus->coherentCpuMapping.size[i]); 288 } 289 290 osUnmapPciMemoryKernel64(pGpu, pKernelBus->coherentCpuMapping.pCpuMapping[i]); 291 pKernelBus->coherentCpuMapping.pCpuMapping[i] = NvP64_NULL; 292 } 293 } 294 295 pKernelBus->coherentCpuMapping.bCoherentCpuMapping = NV_FALSE; 296 } 297 298 /*! 299 * @brief Lower level FB flush to push pending writes to FB/sysmem 300 * 301 * NOTE: Must be called inside a SLI loop 302 * 303 * @param[in] pGpu 304 * @param[in] KernelBus 305 * @param[in] flags Flags to indicate aperture and other behaviors 306 * @return NV_OK on success 307 * 308 */ 309 NV_STATUS 310 kbusFlushSingle_GV100 311 ( 312 OBJGPU *pGpu, 313 KernelBus *pKernelBus, 314 NvU32 flags 315 ) 316 { 317 NvBool bCoherentCpuMapping = pGpu->getProperty(pGpu, PDB_PROP_GPU_COHERENT_CPU_MAPPING); 318 319 // 320 // Nothing to be done in the guest in the paravirtualization case or 321 // if guest is running in SRIOV heavy mode. 322 // 323 if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || 324 (IS_VIRTUAL(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu))) 325 { 326 return NV_OK; 327 } 328 329 if (bCoherentCpuMapping) 330 { 331 // 332 // This function issues an HWSYNC. This is needed for synchronizing read/writes 333 // with NVLINK mappings. 334 // 335 portAtomicMemoryFenceFull(); 336 return NV_OK; 337 } 338 339 if (flags & BUS_FLUSH_SYSTEM_MEMORY) 340 { 341 portAtomicMemoryFenceFull(); 342 } 343 344 if (API_GPU_IN_RESET_SANITY_CHECK(pGpu) || API_GPU_IN_RECOVERY_SANITY_CHECK(pGpu) || 345 !API_GPU_ATTACHED_SANITY_CHECK(pGpu)) 346 { 347 // 348 // When the GPU is in full chip reset or lost 349 // We cannot expect to flush successfully so early return here 350 // 351 return NV_OK; 352 } 353 354 if (kbusIsBarAccessBlocked(pKernelBus)) 355 { 356 // If BAR has been blocked, there's nothing to flush for vidmem 357 return NV_OK; 358 } 359 360 if ((flags & BUS_FLUSH_VIDEO_MEMORY) && kbusIsReadCpuPointerToFlushEnabled(pKernelBus)) 361 { 362 volatile NvU32 data; 363 364 // 365 // Read the FB address 0 in order to trigger a flush. 366 // This will not work with reflected mappings so only enable on VOLTA+ 367 // Note SRIOV guest does not have access to uflush register. 368 // 369 NV_ASSERT(pKernelBus->pReadToFlush != NULL || pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping != NULL); 370 371 if (pKernelBus->pReadToFlush != NULL) 372 { 373 data = MEM_RD32(pKernelBus->pReadToFlush); 374 } 375 else if (pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping != NULL) 376 { 377 // 378 // pReadToFlush is still not ready for use. So, use pCpuMapping 379 // instead which should already be mapped to FB addr 0 as 380 // BAR2 is in physical mode right now. 381 // 382 data = MEM_RD32(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping); 383 } 384 (void) data; 385 } 386 387 return NV_OK; 388 } 389