1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2006-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "core/core.h"
25 #include "gpu/gpu.h"
26 #include "os/os.h"
27 #include "gpu/mem_sys/kern_mem_sys.h"
28 #include "gpu/mem_mgr/mem_desc.h"
29 #include "ctrl/ctrl2080/ctrl2080fb.h"
30
31 #include "published/maxwell/gm107/dev_fb.h"
32 #include "published/maxwell/gm107/dev_bus.h"
33 #include "published/maxwell/gm107/dev_flush.h"
34 #include "published/maxwell/gm107/dev_fifo.h"
35
36 // Based on busFlushSingle_GM107
37
38 NV_STATUS
kmemsysDoCacheOp_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 reg,NvU32 regValue,NvU32 pollMask,PRMTIMEOUT pTimeout)39 kmemsysDoCacheOp_GM107
40 (
41 OBJGPU *pGpu,
42 KernelMemorySystem *pKernelMemorySystem,
43 NvU32 reg,
44 NvU32 regValue,
45 NvU32 pollMask,
46 PRMTIMEOUT pTimeout
47 )
48 {
49 NV_STATUS rmStatus = NV_OK;
50 NvU32 cnt = 0;
51 NV_STATUS timeoutStatus = NV_OK;
52 NvU32 regValueRead = 0;
53
54 if (IS_VIRTUAL(pGpu))
55 {
56 switch (reg)
57 {
58 case NV_UFLUSH_L2_PEERMEM_INVALIDATE:
59 case NV_UFLUSH_L2_SYSMEM_INVALIDATE:
60 break;
61 case NV_UFLUSH_L2_FLUSH_DIRTY:
62 return NV_OK;
63 default:
64 return NV_ERR_NOT_SUPPORTED;
65 }
66 }
67
68 if (!API_GPU_ATTACHED_SANITY_CHECK(pGpu))
69 {
70 //
71 // When the GPU is lost we cannot expect to successfully do cache
72 // maintenance (see Bug 1557278).
73 //
74 return rmStatus;
75 }
76
77 // We don't want this breakpoint when a debug build is being used by special test
78 // equipment (e.g. ATE) that expects to hit this situation. Bug 672073
79 #ifdef DEBUG
80 if (!(API_GPU_IN_RESET_SANITY_CHECK(pGpu)) && !IS_VIRTUAL(pGpu))
81 {
82 NV_ASSERT(GPU_REG_RD32(pGpu, NV_UFLUSH_FB_FLUSH) == 0);
83 NV_ASSERT(kmemsysReadL2SysmemInvalidateReg_HAL(pGpu, pKernelMemorySystem) == 0);
84 NV_ASSERT(kmemsysReadL2PeermemInvalidateReg_HAL(pGpu, pKernelMemorySystem) == 0);
85 NV_ASSERT(GPU_REG_RD32(pGpu, NV_UFLUSH_L2_CLEAN_COMPTAGS) == 0);
86 NV_ASSERT(GPU_REG_RD32(pGpu, NV_UFLUSH_L2_FLUSH_DIRTY) == 0);
87 }
88 #endif // DEBUG
89
90 switch (reg)
91 {
92 case NV_UFLUSH_L2_PEERMEM_INVALIDATE:
93 kmemsysWriteL2PeermemInvalidateReg_HAL(pGpu, pKernelMemorySystem, regValue);
94 break;
95 case NV_UFLUSH_L2_SYSMEM_INVALIDATE:
96 kmemsysWriteL2SysmemInvalidateReg_HAL(pGpu, pKernelMemorySystem, regValue);
97 break;
98 default:
99 GPU_REG_WR32(pGpu, reg, regValue);
100 }
101
102 while(1)
103 {
104 switch (reg)
105 {
106 case NV_UFLUSH_L2_PEERMEM_INVALIDATE:
107 regValueRead = kmemsysReadL2PeermemInvalidateReg_HAL(pGpu, pKernelMemorySystem);
108 break;
109 case NV_UFLUSH_L2_SYSMEM_INVALIDATE:
110 regValueRead = kmemsysReadL2SysmemInvalidateReg_HAL(pGpu, pKernelMemorySystem);
111 break;
112 default:
113 regValueRead = GPU_REG_RD32(pGpu, reg);
114 }
115
116 if (regValueRead & pollMask)
117 {
118 if (timeoutStatus == NV_ERR_TIMEOUT)
119 {
120 //
121 // This should not timeout, except for a HW bug. Famous last words.
122 // On !DEBUG we just keep trucking, it's the best we can do.
123 //
124 NV_PRINTF(LEVEL_ERROR,
125 "- timeout error waiting for reg 0x%x update cnt=%d\n",
126 reg, cnt);
127 rmStatus = NV_ERR_TIMEOUT;
128 DBG_BREAKPOINT();
129 break;
130 }
131 else if ( API_GPU_IN_RESET_SANITY_CHECK(pGpu) ||
132 !API_GPU_ATTACHED_SANITY_CHECK(pGpu))
133 {
134 //
135 // The GPU is in full chip reset, or has fallen off the bus
136 // Just return
137 //
138 return NV_OK;
139 }
140 timeoutStatus = gpuCheckTimeout(pGpu, pTimeout);
141 osSpinLoop();
142 cnt++;
143 }
144 else
145 break;
146 }
147
148 #ifdef DEBUG
149 if (cnt > 1 && !IS_VIRTUAL(pGpu))
150 {
151 NvU32 intr0 = 0;
152 intr0 = GPU_REG_RD32(pGpu, NV_PBUS_INTR_0);
153 NV_ASSERT(DRF_VAL(_PBUS, _INTR_0, _FB_ACK_TIMEOUT, intr0) != NV_PBUS_INTR_0_FB_ACK_TIMEOUT_PENDING);
154 }
155 #endif // DEBUG
156
157 return rmStatus;
158 }
159
160 void
kmemsysWriteL2SysmemInvalidateReg_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 value)161 kmemsysWriteL2SysmemInvalidateReg_GM107
162 (
163 OBJGPU *pGpu,
164 KernelMemorySystem *pKernelMemorySystem,
165 NvU32 value
166 )
167 {
168 GPU_REG_WR32(pGpu, NV_UFLUSH_L2_SYSMEM_INVALIDATE, value);
169 }
170
171 NvU32
kmemsysReadL2SysmemInvalidateReg_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)172 kmemsysReadL2SysmemInvalidateReg_GM107
173 (
174 OBJGPU *pGpu,
175 KernelMemorySystem *pKernelMemorySystem
176 )
177 {
178 return GPU_REG_RD32(pGpu, NV_UFLUSH_L2_SYSMEM_INVALIDATE);
179 }
180
181 void
kmemsysWriteL2PeermemInvalidateReg_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem,NvU32 value)182 kmemsysWriteL2PeermemInvalidateReg_GM107
183 (
184 OBJGPU *pGpu,
185 KernelMemorySystem *pKernelMemorySystem,
186 NvU32 value
187 )
188 {
189 GPU_REG_WR32(pGpu, NV_UFLUSH_L2_PEERMEM_INVALIDATE, value);
190 }
191
192 NvU32
kmemsysReadL2PeermemInvalidateReg_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)193 kmemsysReadL2PeermemInvalidateReg_GM107
194 (
195 OBJGPU *pGpu,
196 KernelMemorySystem *pKernelMemorySystem
197 )
198 {
199 return GPU_REG_RD32(pGpu, NV_UFLUSH_L2_PEERMEM_INVALIDATE);
200 }
201
202 /*!
203 * @brief Initialize the sysmem flush buffer
204 *
205 * Setting up the sysmem flush buffer needs to be done very early in some cases
206 * as it's required for the GPU to perform a system flush. One such case is
207 * resetting GPU FALCONs and in particular resetting the PMU as part of VBIOS
208 * init.
209 *
210 * @returns NV_OK if all is okay. Otherwise an error-specific value.
211 */
212 NV_STATUS
kmemsysInitFlushSysmemBuffer_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)213 kmemsysInitFlushSysmemBuffer_GM107
214 (
215 OBJGPU *pGpu,
216 KernelMemorySystem *pKernelMemorySystem
217 )
218 {
219 NV_STATUS status;
220 NvBool bTryAgain = NV_FALSE;
221
222 //
223 // In case of suspend/resume, the buffer might be already allocated, but
224 // the HW still needs to be programmed below.
225 //
226 if (pKernelMemorySystem->pSysmemFlushBufferMemDesc == NULL)
227 {
228 const NvU32 flushBufferDmaAddressSize = 40;
229 RmPhysAddr dmaWindowStartAddr = gpuGetDmaStartAddress(pGpu);
230 RmPhysAddr dmaWindowEndAddr = dmaWindowStartAddr +
231 (1ULL << flushBufferDmaAddressSize) - 1;
232
233 //
234 // Sysmem flush buffer
235 // The sysmembar flush does a zero byte read of sysmem if there was a
236 // sysmem write since the last flush. The actual memory does have
237 // to be valid and allocated at all times because an actual read may
238 // be issued (observed on e.g. GF108).
239 //
240
241 //
242 // First, try to allocate a 32-bit addressable DMA memory without
243 // lowering the DMA address size.
244 // This is currently implemented for Linux where the MEMDESC_FLAGS_ALLOC_32BIT_ADDRESSABLE
245 // will allocate 32-bit memory by using GFP_DMA32 flag.
246 //
247 status = memdescCreate(&pKernelMemorySystem->pSysmemFlushBufferMemDesc,
248 pGpu, RM_PAGE_SIZE,
249 (1 << kmemsysGetFlushSysmemBufferAddrShift_HAL(pGpu, pKernelMemorySystem)),
250 NV_TRUE,
251 ADDR_SYSMEM,
252 NV_MEMORY_UNCACHED,
253 MEMDESC_FLAGS_ALLOC_32BIT_ADDRESSABLE);
254 if (status != NV_OK)
255 return status;
256
257 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_140,
258 pKernelMemorySystem->pSysmemFlushBufferMemDesc);
259
260 //
261 // Check if the memory allocation failed (probably due to no available
262 // memory under 4GB).
263 //
264 if (status != NV_OK)
265 bTryAgain = NV_TRUE;
266 else
267 {
268 //
269 // Check if the DMA address returned is not within 40-bit boundary
270 // (possible on non-Linux platforms).
271 //
272 pKernelMemorySystem->sysmemFlushBuffer = memdescGetPhysAddr(pKernelMemorySystem->pSysmemFlushBufferMemDesc, AT_GPU, 0);
273 if (pKernelMemorySystem->sysmemFlushBuffer < dmaWindowStartAddr ||
274 pKernelMemorySystem->sysmemFlushBuffer + RM_PAGE_SIZE - 1 > dmaWindowEndAddr)
275 bTryAgain = NV_TRUE;
276 }
277
278 //
279 // If above checks are satisfied, do the actual sysmem flush buffer setup.
280 // If not, try again with the WAR to temporarily lower the DMA address size.
281 //
282 if (!bTryAgain)
283 {
284 GPU_FLD_WR_DRF_NUM(pGpu, _PFB, _NISO_FLUSH_SYSMEM_ADDR, _ADR_39_08,
285 NvU64_LO32(pKernelMemorySystem->sysmemFlushBuffer >> kmemsysGetFlushSysmemBufferAddrShift_HAL(pGpu, pKernelMemorySystem)));
286
287 return NV_OK;
288 }
289
290 memdescFree(pKernelMemorySystem->pSysmemFlushBufferMemDesc);
291 memdescDestroy(pKernelMemorySystem->pSysmemFlushBufferMemDesc);
292
293 status = memdescCreate(&pKernelMemorySystem->pSysmemFlushBufferMemDesc,
294 pGpu, RM_PAGE_SIZE,
295 (1 << kmemsysGetFlushSysmemBufferAddrShift_HAL(pGpu, pKernelMemorySystem)),
296 NV_TRUE,
297 ADDR_SYSMEM,
298 NV_MEMORY_UNCACHED,
299 MEMDESC_FLAGS_NONE);
300 if (status != NV_OK)
301 return status;
302 //
303 // Temporarily lower the DMA address size
304 //
305 // This is admittedly hacky and only safe during GPU initialization,
306 // before other drivers like UVM (at least on Linux), can start
307 // requesting its own DMA mappings for the same device.
308 //
309 // If DMA address size modification ever becomes needed in more places,
310 // making it a part of the memdesc APIs would be cleaner.
311 //
312 if (gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM) > flushBufferDmaAddressSize)
313 {
314 memdescOverridePhysicalAddressWidthWindowsWAR(pGpu, pKernelMemorySystem->pSysmemFlushBufferMemDesc, flushBufferDmaAddressSize);
315 osDmaSetAddressSize(pGpu->pOsGpuInfo, flushBufferDmaAddressSize);
316 }
317
318 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_141,
319 pKernelMemorySystem->pSysmemFlushBufferMemDesc);
320
321 // Restore it back to what HW supports
322 if (gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM) > flushBufferDmaAddressSize)
323 {
324 osDmaSetAddressSize(pGpu->pOsGpuInfo, gpuGetPhysAddrWidth_HAL(pGpu, ADDR_SYSMEM));
325 }
326
327 if (status == NV_OK)
328 {
329 pKernelMemorySystem->sysmemFlushBuffer = memdescGetPhysAddr(pKernelMemorySystem->pSysmemFlushBufferMemDesc, AT_GPU, 0);
330 }
331 else if (status == NV_ERR_INVALID_ADDRESS)
332 {
333 if (NVCPU_IS_PPC64LE && dmaWindowStartAddr != 0)
334 {
335 pKernelMemorySystem->sysmemFlushBuffer = dmaWindowStartAddr;
336 }
337 }
338 else
339 {
340 NV_PRINTF(LEVEL_ERROR,
341 "Could not allocate sysmem flush buffer: %x\n", status);
342 DBG_BREAKPOINT();
343 return status;
344 }
345
346 // Manually redo the memdesc addressability check for the reduced address size
347 if (pKernelMemorySystem->sysmemFlushBuffer < dmaWindowStartAddr ||
348 pKernelMemorySystem->sysmemFlushBuffer + RM_PAGE_SIZE - 1 > dmaWindowEndAddr)
349 {
350 NvBool bMakeItFatal = NV_TRUE;
351 NV_PRINTF(LEVEL_ERROR,
352 "GPU 0x%x: Allocated sysmem flush buffer not addressable 0x%llx\n",
353 pGpu->gpuId, pKernelMemorySystem->sysmemFlushBuffer);
354
355 if (IS_FMODEL(pGpu) || IS_RTLSIM(pGpu) || IS_EMULATION(pGpu))
356 {
357 bMakeItFatal = NV_FALSE;
358 }
359
360 //
361 // MODS on DGX-2 is hitting this. Make it non-fatal for now with
362 // the proper WAR implementation tracked in bug 2403630.
363 //
364
365 //
366 // Windows on greater than 2 TB systems is hitting this. Making it
367 // non-fatal till a proper WAR is implemented. Bug 2423129 had
368 // this issue.
369 //
370 if (RMCFG_FEATURE_PLATFORM_WINDOWS)
371 {
372 bMakeItFatal = NV_FALSE;
373 }
374
375 if (bMakeItFatal)
376 {
377 return NV_ERR_NO_MEMORY;
378 }
379 }
380 }
381
382 NV_ASSERT(pKernelMemorySystem->sysmemFlushBuffer != 0);
383 GPU_FLD_WR_DRF_NUM(pGpu, _PFB, _NISO_FLUSH_SYSMEM_ADDR, _ADR_39_08,
384 NvU64_LO32(pKernelMemorySystem->sysmemFlushBuffer >> kmemsysGetFlushSysmemBufferAddrShift_HAL(pGpu, pKernelMemorySystem)));
385
386 return NV_OK;
387 }
388
389 /*!
390 * @brief Write the sysmemFlushBuffer val into the NV_PFB_NISO_FLUSH_SYSMEM_ADDR register
391 *
392 * @param[in] pGpu OBJGPU pointer
393 * @param[in[ pKernelMemorySystem KernelMemorySystem pointer
394 *
395 * @returns void
396 */
397 void
kmemsysProgramSysmemFlushBuffer_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)398 kmemsysProgramSysmemFlushBuffer_GM107
399 (
400 OBJGPU *pGpu,
401 KernelMemorySystem *pKernelMemorySystem
402 )
403 {
404 //
405 // Q: Why are we writing this twice, both in fbInit and fbLoad?
406 // A: fbLoad is preceded by busLoad which can do sysmem writes.
407 // Writing in fbInit solves the load order guessing problem.
408 //
409 // Q: Why not just in fbInit?
410 // A: Because on power management resume, this value should be restored too.
411 //
412 GPU_FLD_WR_DRF_NUM(pGpu, _PFB, _NISO_FLUSH_SYSMEM_ADDR, _ADR_39_08,
413 NvU64_LO32(pKernelMemorySystem->sysmemFlushBuffer >> kmemsysGetFlushSysmemBufferAddrShift_HAL(pGpu, pKernelMemorySystem)));
414 }
415
416 /*!
417 * @brief Validate the sysmemFlushBuffer val and assert
418 *
419 * @param[in] pGpu OBJGPU pointer
420 * @param[in[ pKernelMemorySystem KernelMemorySystem pointer
421 *
422 * @returns void
423 */
424 void
kmemsysAssertSysmemFlushBufferValid_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)425 kmemsysAssertSysmemFlushBufferValid_GM107
426 (
427 OBJGPU *pGpu,
428 KernelMemorySystem *pKernelMemorySystem
429 )
430 {
431 NV_ASSERT(GPU_REG_RD_DRF(pGpu, _PFB, _NISO_FLUSH_SYSMEM_ADDR, _ADR_39_08) != 0);
432 }
433
434 NvU32
kmemsysGetFlushSysmemBufferAddrShift_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)435 kmemsysGetFlushSysmemBufferAddrShift_GM107
436 (
437 OBJGPU *pGpu,
438 KernelMemorySystem *pKernelMemorySystem
439 )
440 {
441 return NV_PFB_NISO_FLUSH_SYSMEM_ADDR_SHIFT;
442 }
443
444 NvU16
kmemsysGetMaximumBlacklistPages_GM107(OBJGPU * pGpu,KernelMemorySystem * pKernelMemorySystem)445 kmemsysGetMaximumBlacklistPages_GM107
446 (
447 OBJGPU *pGpu,
448 KernelMemorySystem *pKernelMemorySystem
449 )
450 {
451 return NV2080_CTRL_FB_OFFLINED_PAGES_MAX_PAGES;
452 }
453
454 /*!
455 * @brief Do any operations to get ready for a XVE sw reset.
456 *
457 * Set the PFIFO_FB_IFACE to DISABLE
458 *
459 * @return NV_OK
460 */
461 NV_STATUS
kmemsysPrepareForXVEReset_GM107(POBJGPU pGpu,KernelMemorySystem * pKernelMemorySystem)462 kmemsysPrepareForXVEReset_GM107
463 (
464 POBJGPU pGpu,
465 KernelMemorySystem *pKernelMemorySystem
466 )
467 {
468 GPU_REG_WR32(pGpu, NV_PFIFO_FB_IFACE,
469 DRF_DEF(_PFIFO, _FB_IFACE, _CONTROL, _DISABLE) |
470 DRF_DEF(_PFIFO, _FB_IFACE, _STATUS, _DISABLED));
471
472 return NV_OK;
473 }
474