1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "kernel/gpu/fifo/kernel_channel_group.h"
25 #include "kernel/gpu/mem_mgr/mem_mgr.h"
26 #include "kernel/virtualization/hypervisor/hypervisor.h"
27 #include "gpu/mem_mgr/mem_desc.h"
28
29 #include "nvRmReg.h"
30
31 #include "gpu/bus/kern_bus.h"
32
33 /*!
34 * @brief Allocate buffer to save/restore faulting engine methods
35 */
36 NV_STATUS
kchangrpAllocFaultMethodBuffers_GV100(OBJGPU * pGpu,KernelChannelGroup * pKernelChannelGroup)37 kchangrpAllocFaultMethodBuffers_GV100
38 (
39 OBJGPU *pGpu,
40 KernelChannelGroup *pKernelChannelGroup
41 )
42 {
43 NV_STATUS status = NV_OK;
44 NvU32 bufSizeInBytes = 0;
45 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
46 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
47 NvU32 runQueues = kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo);
48 NvU32 index = 0;
49 NvU32 faultBufApert = ADDR_SYSMEM;
50 NvU32 faultBufAttr = NV_MEMORY_CACHED;
51 NvU64 memDescFlags = MEMDESC_FLAGS_LOST_ON_SUSPEND;
52 HW_ENG_FAULT_METHOD_BUFFER *pFaultMthdBuf = NULL;
53 NvU32 gfid = pKernelChannelGroup->gfid;
54 TRANSFER_SURFACE surf = {0};
55
56 //
57 // Allocate method buffer if applicable
58 // For SR-IOV, Guest RM allocates the mthd buffers, no work done by host-RM
59 // For SR-IOV HEAVY and legacy vGpu, mthd buffers allocated by host RM,
60 // For GSP config, method buffer allocation is done by CPU-RM
61 //
62 // Skip method buffer allocation for the rest
63 //
64 if ((IS_GFID_VF(gfid) && !IS_SRIOV_HEAVY(pGpu)) || // SRIOV guest on Host
65 RMCFG_FEATURE_PLATFORM_GSP || // GSP-RM
66 IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || // legacy vgpu
67 IS_SRIOV_HEAVY_GUEST(pGpu)) // SRIOV-heavy guest
68 {
69 return NV_OK;
70 }
71
72 // Pre-reqs
73 NV_ASSERT_OR_RETURN((pKernelChannelGroup->pMthdBuffers != NULL), NV_ERR_INVALID_STATE);
74
75 // Calculate size of buffer
76 NV_ASSERT_OK_OR_RETURN(gpuGetCeFaultMethodBufferSize(pGpu, &bufSizeInBytes));
77 NV_ASSERT((bufSizeInBytes > 0));
78
79 if (IS_SRIOV_HEAVY_HOST(pGpu))
80 {
81 //
82 // In case of SRIOV heavy mode host RM is allocating fault method buffers
83 // on behalf of guest. As VF channels cannot use sysmem allocated in the
84 // host, force fault buffer aperture to vid mem.
85 //
86 faultBufApert = ADDR_FBMEM;
87 faultBufAttr = NV_MEMORY_CACHED;
88 memDescFlags |= MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE;
89 }
90 else
91 {
92 // Get the right aperture/attribute
93 faultBufApert = ADDR_SYSMEM;
94 faultBufAttr = NV_MEMORY_CACHED;
95 memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_3, _FAULT_METHOD_BUFFER, pGpu->instLocOverrides3),
96 "fault method buffer", &faultBufApert, &faultBufAttr);
97 if (faultBufApert == ADDR_FBMEM)
98 memDescFlags |= MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE;
99 }
100
101 // Allocate buffer for each runqueue
102 for (index = 0; index < runQueues; index++)
103 {
104 pFaultMthdBuf = &(pKernelChannelGroup->pMthdBuffers[index]);
105
106 // Allocate and initialize MEMDESC
107 status = memdescCreate(&(pFaultMthdBuf->pMemDesc), pGpu, bufSizeInBytes, 0,
108 NV_TRUE, faultBufApert, faultBufAttr, memDescFlags);
109 if (status != NV_OK)
110 {
111 DBG_BREAKPOINT();
112 goto fail;
113 }
114
115 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_34,
116 pFaultMthdBuf->pMemDesc);
117 if (status != NV_OK)
118 {
119 DBG_BREAKPOINT();
120 memdescDestroy(pFaultMthdBuf->pMemDesc);
121 pFaultMthdBuf->pMemDesc = NULL;
122 goto fail;
123 }
124
125 memdescSetName(pGpu, pFaultMthdBuf->pMemDesc, NV_RM_SURF_NAME_CE_FAULT_METHOD_BUFFER, NULL);
126
127 surf.pMemDesc = pFaultMthdBuf->pMemDesc;
128 surf.offset = 0;
129
130 NV_ASSERT_OK_OR_RETURN(
131 memmgrMemSet(pMemoryManager, &surf, 0, bufSizeInBytes,
132 TRANSFER_FLAGS_NONE));
133
134 pFaultMthdBuf->bar2Addr = 0;
135 }
136
137 return status;
138
139 fail:
140 kchangrpFreeFaultMethodBuffers_HAL(pGpu, pKernelChannelGroup);
141 return status;
142 }
143
144 /*!
145 * @brief Free method buffers
146 */
147 NV_STATUS
kchangrpFreeFaultMethodBuffers_GV100(OBJGPU * pGpu,KernelChannelGroup * pKernelChannelGroup)148 kchangrpFreeFaultMethodBuffers_GV100
149 (
150 OBJGPU *pGpu,
151 KernelChannelGroup *pKernelChannelGroup
152 )
153 {
154 NV_STATUS status = NV_OK;
155 HW_ENG_FAULT_METHOD_BUFFER *pFaultMthdBuf = NULL;
156 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
157 NvU32 runQueues = kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo);
158 NvU32 index = 0;
159 NvU32 gfid = pKernelChannelGroup->gfid;
160
161
162 //
163 // Free the method buffer if applicable
164 // For SR-IOV, Guest RM allocates the mthd buffers but later RPCs into the
165 // host, and populates the data structure, but it should be free-d only by
166 // guest RM.
167 // For SR-IOV HEAVY and legacy vGpu, mthd buffers should be free-d by host RM,
168 // For GSP config, we need to free the method buffer in GSP-RM
169 //
170 // Skip free for the rest
171 //
172 if ((IS_GFID_VF(gfid) && !IS_SRIOV_HEAVY(pGpu)) || // SRIOV guest on Host
173 IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || // legacy vgpu
174 IS_SRIOV_HEAVY_GUEST(pGpu)) // SRIOV-heavy guest
175 {
176 return NV_OK;
177 }
178
179 NV_ASSERT_OR_RETURN((pKernelChannelGroup->pMthdBuffers != NULL), NV_ERR_INVALID_STATE);
180
181 // Free method buffer memdesc if allocated
182 for (index = 0; index < runQueues; index++)
183 {
184 pFaultMthdBuf = &((pKernelChannelGroup->pMthdBuffers)[index]);
185 if ((pFaultMthdBuf != NULL) && (pFaultMthdBuf->pMemDesc != NULL))
186 {
187 // Free the memory
188 memdescFree(pFaultMthdBuf->pMemDesc);
189 memdescDestroy(pFaultMthdBuf->pMemDesc);
190 pFaultMthdBuf->pMemDesc = NULL;
191 }
192 }
193
194 return status;
195 }
196
197 /*!
198 * @brief Map method buffer to invisible BAR2 region
199 */
200 NV_STATUS
kchangrpMapFaultMethodBuffers_GV100(OBJGPU * pGpu,KernelChannelGroup * pKernelChannelGroup,NvU32 runqueue)201 kchangrpMapFaultMethodBuffers_GV100
202 (
203 OBJGPU *pGpu,
204 KernelChannelGroup *pKernelChannelGroup,
205 NvU32 runqueue
206 )
207 {
208 NvU32 gfid = pKernelChannelGroup->gfid;
209 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
210 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
211 NvU32 runQueues = kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo);
212 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
213 HW_ENG_FAULT_METHOD_BUFFER *pFaultMthdBuf = NULL;
214
215 //
216 // Map method buffer to invisible BAR2 if applicable
217 // For SR-IOV, Guest RM maps the mthd buffers, no work done by host-RM
218 // For SR-IOV HEAVY and legacy vGpu, mthd buffers mapped to BAR2 by host RM,
219 // For GSP config, method buffer BAR2 mapping is done by GSP-RM
220 //
221 // Skip method buffer allocation for the rest
222 //
223 if ((IS_GFID_VF(gfid) && !IS_SRIOV_HEAVY(pGpu)) || // SRIOV guest on Host
224 IS_GSP_CLIENT(pGpu) || // CPU-RM
225 IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || // legacy vgpu
226 IS_SRIOV_HEAVY_GUEST(pGpu)) // SRIOV-heavy guest
227 {
228 return NV_OK;
229 }
230
231 // Pre-reqs
232 NV_ASSERT_OR_RETURN((pKernelChannelGroup->pMthdBuffers != NULL), NV_ERR_INVALID_STATE);
233 NV_ASSERT_OR_RETURN((runqueue < runQueues), NV_ERR_INVALID_STATE);
234
235 // Get method buffer handle
236 pFaultMthdBuf = &(pKernelChannelGroup->pMthdBuffers[runqueue]);
237
238 // Skip mapping if memdesc not allocated or if bar2 mapping has already been done
239 if ((pFaultMthdBuf->pMemDesc == NULL) || (pFaultMthdBuf->bar2Addr != 0))
240 {
241 return NV_OK;
242 }
243
244 memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, pFaultMthdBuf->pMemDesc,
245 AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
246
247 // Map the buffer to BAR2 invisible region
248 NV_ASSERT_OK_OR_RETURN(kbusMapCpuInvisibleBar2Aperture_HAL(pGpu, pKernelBus,
249 pFaultMthdBuf->pMemDesc,
250 &(pFaultMthdBuf->bar2Addr),
251 pFaultMthdBuf->pMemDesc->Size,
252 0,
253 gfid));
254
255 NV_PRINTF(LEVEL_INFO,
256 "Allocating Method buffer with Bar2Addr LO 0x%08x Bar2Addr "
257 "HI 0x%08x runqueue 0x%0x\n",
258 NvU64_LO32(pFaultMthdBuf->bar2Addr),
259 NvU64_HI32(pFaultMthdBuf->bar2Addr), runqueue);
260
261 return NV_OK;
262 }
263
264 /*!
265 * @brief Unmap method buffers from CPU invisible BAR2
266 */
267 NV_STATUS
kchangrpUnmapFaultMethodBuffers_GV100(OBJGPU * pGpu,KernelChannelGroup * pKernelChannelGroup,NvU32 runqueue)268 kchangrpUnmapFaultMethodBuffers_GV100
269 (
270 OBJGPU *pGpu,
271 KernelChannelGroup *pKernelChannelGroup,
272 NvU32 runqueue
273 )
274 {
275 NvU32 gfid = pKernelChannelGroup->gfid;
276 KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
277 KernelFifo *pKernelFifo = GPU_GET_KERNEL_FIFO(pGpu);
278 NvU32 runQueues = kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo);
279 HW_ENG_FAULT_METHOD_BUFFER *pFaultMthdBuf = NULL;
280
281 //
282 // Unmap the method buffer if applicable
283 // For SR-IOV, Guest RM maps and is unmapped only by guest-RM
284 // For SR-IOV HEAVY and legacy vGpu, mthd buffers should be unmapped by host RM,
285 // For GSP config, method buffer BAR2 unmapping is done on GSP-RM
286 //
287 // Skip unmap for the rest
288 //
289 if ((IS_GFID_VF(gfid) && !IS_SRIOV_HEAVY(pGpu)) || // SRIOV guest on Host
290 IS_GSP_CLIENT(pGpu) || // CPU-RM
291 IS_VIRTUAL_WITHOUT_SRIOV(pGpu) || // legacy vgpu
292 IS_SRIOV_HEAVY_GUEST(pGpu)) // SRIOV-heavy guest
293 {
294 return NV_OK;
295 }
296
297 // Pre-reqs
298 NV_ASSERT_OR_RETURN((pKernelChannelGroup->pMthdBuffers != NULL), NV_ERR_INVALID_ARGUMENT);
299 NV_ASSERT_OR_RETURN((runqueue < runQueues), NV_ERR_INVALID_STATE);
300
301 // Get method buffer handle
302 pFaultMthdBuf = &(pKernelChannelGroup->pMthdBuffers[runqueue]);
303
304 // TODO: Check to be removed as part of fix for Bug 200691429
305 if (!RMCFG_FEATURE_PLATFORM_GSP)
306 NV_ASSERT_OR_RETURN((pFaultMthdBuf->pMemDesc != NULL), NV_ERR_INVALID_STATE);
307
308 // Unmap method buffer from bar2 invisible region and reset bar2addr
309 if (pFaultMthdBuf->bar2Addr != 0)
310 {
311 kbusUnmapCpuInvisibleBar2Aperture_HAL(pGpu, pKernelBus, pFaultMthdBuf->pMemDesc,
312 pFaultMthdBuf->bar2Addr, gfid);
313 pFaultMthdBuf->bar2Addr = 0;
314 }
315
316 return NV_OK;
317 }
318