1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "kernel/gpu/fifo/kernel_channel_group.h"
25 #include "kernel/gpu/mem_mgr/mem_mgr.h"
26 #include "kernel/virtualization/hypervisor/hypervisor.h"
27 #include "gpu/mem_mgr/mem_desc.h"
28 
29 #include "nvRmReg.h"
30 
31 #include "gpu/bus/kern_bus.h"
32 
33 /*!
34  * @brief Allocate buffer to save/restore faulting engine methods
35  */
36 NV_STATUS
kchangrpAllocFaultMethodBuffers_GV100(OBJGPU * pGpu,KernelChannelGroup * pKernelChannelGroup)37 kchangrpAllocFaultMethodBuffers_GV100
38 (
39     OBJGPU                      *pGpu,
40     KernelChannelGroup          *pKernelChannelGroup
41 )
42 {
43     NV_STATUS                    status         = NV_OK;
44     NvU32                        bufSizeInBytes = 0;
45     KernelFifo                  *pKernelFifo    = GPU_GET_KERNEL_FIFO(pGpu);
46     MemoryManager               *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
47     NvU32                        runQueues      = kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo);
48     NvU32                        index          = 0;
49     NvU32                        faultBufApert  = ADDR_SYSMEM;
50     NvU32                        faultBufAttr   = NV_MEMORY_CACHED;
51     NvU64                        memDescFlags   = MEMDESC_FLAGS_LOST_ON_SUSPEND;
52     HW_ENG_FAULT_METHOD_BUFFER  *pFaultMthdBuf  = NULL;
53     NvU32                        gfid           = pKernelChannelGroup->gfid;
54     TRANSFER_SURFACE             surf           = {0};
55 
56     //
57     // Allocate method buffer if applicable
58     // For SR-IOV, Guest RM allocates the mthd buffers, no work done by host-RM
59     // For SR-IOV HEAVY and legacy vGpu, mthd buffers allocated by host RM,
60     // For GSP config, method buffer allocation is done by CPU-RM
61     //
62     // Skip method buffer allocation for the rest
63     //
64     if ((IS_GFID_VF(gfid) && !IS_SRIOV_HEAVY(pGpu)) ||  // SRIOV guest on Host
65         RMCFG_FEATURE_PLATFORM_GSP ||                       // GSP-RM
66         IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||               // legacy vgpu
67         IS_SRIOV_HEAVY_GUEST(pGpu))                     // SRIOV-heavy guest
68     {
69         return NV_OK;
70     }
71 
72     // Pre-reqs
73     NV_ASSERT_OR_RETURN((pKernelChannelGroup->pMthdBuffers != NULL), NV_ERR_INVALID_STATE);
74 
75     // Calculate size of buffer
76     NV_ASSERT_OK_OR_RETURN(gpuGetCeFaultMethodBufferSize(pGpu, &bufSizeInBytes));
77     NV_ASSERT((bufSizeInBytes > 0));
78 
79     if (IS_SRIOV_HEAVY_HOST(pGpu))
80     {
81         //
82         // In case of SRIOV heavy mode host RM is allocating fault method buffers
83         // on behalf of guest. As VF channels cannot use sysmem allocated in the
84         // host, force fault buffer aperture to vid mem.
85         //
86         faultBufApert = ADDR_FBMEM;
87         faultBufAttr  = NV_MEMORY_CACHED;
88         memDescFlags  |= MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE;
89     }
90     else
91     {
92         // Get the right aperture/attribute
93         faultBufApert = ADDR_SYSMEM;
94         faultBufAttr  = NV_MEMORY_CACHED;
95         memdescOverrideInstLoc(DRF_VAL(_REG_STR_RM, _INST_LOC_3, _FAULT_METHOD_BUFFER, pGpu->instLocOverrides3),
96                                "fault method buffer", &faultBufApert, &faultBufAttr);
97         if (faultBufApert == ADDR_FBMEM)
98             memDescFlags  |= MEMDESC_FLAGS_OWNED_BY_CURRENT_DEVICE;
99     }
100 
101     // Allocate buffer for each runqueue
102     for (index = 0; index < runQueues; index++)
103     {
104         pFaultMthdBuf = &(pKernelChannelGroup->pMthdBuffers[index]);
105 
106         // Allocate and initialize MEMDESC
107         status = memdescCreate(&(pFaultMthdBuf->pMemDesc), pGpu, bufSizeInBytes, 0,
108                                NV_TRUE, faultBufApert, faultBufAttr, memDescFlags);
109         if (status != NV_OK)
110         {
111             DBG_BREAKPOINT();
112             goto fail;
113         }
114 
115         memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_34,
116                     pFaultMthdBuf->pMemDesc);
117         if (status != NV_OK)
118         {
119             DBG_BREAKPOINT();
120             memdescDestroy(pFaultMthdBuf->pMemDesc);
121             pFaultMthdBuf->pMemDesc = NULL;
122             goto fail;
123         }
124 
125         memdescSetName(pGpu, pFaultMthdBuf->pMemDesc, NV_RM_SURF_NAME_CE_FAULT_METHOD_BUFFER, NULL);
126 
127         surf.pMemDesc = pFaultMthdBuf->pMemDesc;
128         surf.offset = 0;
129 
130         NV_ASSERT_OK_OR_RETURN(
131             memmgrMemSet(pMemoryManager, &surf, 0, bufSizeInBytes,
132                          TRANSFER_FLAGS_NONE));
133 
134         pFaultMthdBuf->bar2Addr = 0;
135     }
136 
137     return status;
138 
139 fail:
140     kchangrpFreeFaultMethodBuffers_HAL(pGpu, pKernelChannelGroup);
141     return status;
142 }
143 
144 /*!
145  * @brief Free method buffers
146  */
147 NV_STATUS
kchangrpFreeFaultMethodBuffers_GV100(OBJGPU * pGpu,KernelChannelGroup * pKernelChannelGroup)148 kchangrpFreeFaultMethodBuffers_GV100
149 (
150     OBJGPU             *pGpu,
151     KernelChannelGroup *pKernelChannelGroup
152 )
153 {
154     NV_STATUS                   status         = NV_OK;
155     HW_ENG_FAULT_METHOD_BUFFER *pFaultMthdBuf  = NULL;
156     KernelFifo                 *pKernelFifo    = GPU_GET_KERNEL_FIFO(pGpu);
157     NvU32                       runQueues      = kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo);
158     NvU32                       index          = 0;
159     NvU32                       gfid           = pKernelChannelGroup->gfid;
160 
161 
162     //
163     // Free the method buffer if applicable
164     // For SR-IOV, Guest RM allocates the mthd buffers but later RPCs into the
165     // host, and populates the data structure, but it should be free-d only by
166     // guest RM.
167     // For SR-IOV HEAVY and legacy vGpu, mthd buffers should be free-d by host RM,
168     // For GSP config, we need to free the method buffer in GSP-RM
169     //
170     // Skip free for the rest
171     //
172     if ((IS_GFID_VF(gfid) && !IS_SRIOV_HEAVY(pGpu)) ||  // SRIOV guest on Host
173         IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||               // legacy vgpu
174         IS_SRIOV_HEAVY_GUEST(pGpu))                     // SRIOV-heavy guest
175     {
176         return NV_OK;
177     }
178 
179     NV_ASSERT_OR_RETURN((pKernelChannelGroup->pMthdBuffers != NULL), NV_ERR_INVALID_STATE);
180 
181     // Free method buffer memdesc if allocated
182     for (index = 0; index < runQueues; index++)
183     {
184         pFaultMthdBuf = &((pKernelChannelGroup->pMthdBuffers)[index]);
185         if ((pFaultMthdBuf != NULL) && (pFaultMthdBuf->pMemDesc != NULL))
186         {
187             // Free the memory
188             memdescFree(pFaultMthdBuf->pMemDesc);
189             memdescDestroy(pFaultMthdBuf->pMemDesc);
190             pFaultMthdBuf->pMemDesc = NULL;
191         }
192     }
193 
194     return status;
195 }
196 
197 /*!
198  * @brief Map method buffer to invisible BAR2 region
199  */
200 NV_STATUS
kchangrpMapFaultMethodBuffers_GV100(OBJGPU * pGpu,KernelChannelGroup * pKernelChannelGroup,NvU32 runqueue)201 kchangrpMapFaultMethodBuffers_GV100
202 (
203     OBJGPU                      *pGpu,
204     KernelChannelGroup          *pKernelChannelGroup,
205     NvU32                        runqueue
206 )
207 {
208     NvU32                        gfid           = pKernelChannelGroup->gfid;
209     MemoryManager               *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
210     KernelFifo                  *pKernelFifo    = GPU_GET_KERNEL_FIFO(pGpu);
211     NvU32                        runQueues      = kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo);
212     KernelBus                   *pKernelBus     = GPU_GET_KERNEL_BUS(pGpu);
213     HW_ENG_FAULT_METHOD_BUFFER  *pFaultMthdBuf  = NULL;
214 
215     //
216     // Map method buffer to invisible BAR2 if applicable
217     // For SR-IOV, Guest RM maps the mthd buffers, no work done by host-RM
218     // For SR-IOV HEAVY and legacy vGpu, mthd buffers mapped to BAR2 by host RM,
219     // For GSP config, method buffer BAR2 mapping is done by GSP-RM
220     //
221     // Skip method buffer allocation for the rest
222     //
223     if ((IS_GFID_VF(gfid) && !IS_SRIOV_HEAVY(pGpu)) ||  // SRIOV guest on Host
224         IS_GSP_CLIENT(pGpu) ||                          // CPU-RM
225         IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||               // legacy vgpu
226         IS_SRIOV_HEAVY_GUEST(pGpu))                     // SRIOV-heavy guest
227     {
228         return NV_OK;
229     }
230 
231     // Pre-reqs
232     NV_ASSERT_OR_RETURN((pKernelChannelGroup->pMthdBuffers != NULL), NV_ERR_INVALID_STATE);
233     NV_ASSERT_OR_RETURN((runqueue < runQueues), NV_ERR_INVALID_STATE);
234 
235     // Get method buffer handle
236     pFaultMthdBuf = &(pKernelChannelGroup->pMthdBuffers[runqueue]);
237 
238     // Skip mapping if memdesc not allocated or if bar2 mapping has already been done
239     if ((pFaultMthdBuf->pMemDesc == NULL) || (pFaultMthdBuf->bar2Addr != 0))
240     {
241         return NV_OK;
242     }
243 
244     memmgrSetMemDescPageSize_HAL(pGpu, pMemoryManager, pFaultMthdBuf->pMemDesc,
245                                  AT_GPU, RM_ATTR_PAGE_SIZE_4KB);
246 
247     // Map the buffer to BAR2 invisible region
248     NV_ASSERT_OK_OR_RETURN(kbusMapCpuInvisibleBar2Aperture_HAL(pGpu, pKernelBus,
249                                                                pFaultMthdBuf->pMemDesc,
250                                                                &(pFaultMthdBuf->bar2Addr),
251                                                                pFaultMthdBuf->pMemDesc->Size,
252                                                                0,
253                                                                gfid));
254 
255     NV_PRINTF(LEVEL_INFO,
256               "Allocating Method buffer with Bar2Addr LO 0x%08x Bar2Addr "
257               "HI 0x%08x runqueue 0x%0x\n",
258               NvU64_LO32(pFaultMthdBuf->bar2Addr),
259               NvU64_HI32(pFaultMthdBuf->bar2Addr), runqueue);
260 
261     return NV_OK;
262 }
263 
264 /*!
265  * @brief Unmap method buffers from CPU invisible BAR2
266  */
267 NV_STATUS
kchangrpUnmapFaultMethodBuffers_GV100(OBJGPU * pGpu,KernelChannelGroup * pKernelChannelGroup,NvU32 runqueue)268 kchangrpUnmapFaultMethodBuffers_GV100
269 (
270     OBJGPU             *pGpu,
271     KernelChannelGroup *pKernelChannelGroup,
272     NvU32               runqueue
273 )
274 {
275     NvU32                       gfid           = pKernelChannelGroup->gfid;
276     KernelBus                  *pKernelBus     = GPU_GET_KERNEL_BUS(pGpu);
277     KernelFifo                 *pKernelFifo    = GPU_GET_KERNEL_FIFO(pGpu);
278     NvU32                       runQueues      = kfifoGetNumRunqueues_HAL(pGpu, pKernelFifo);
279     HW_ENG_FAULT_METHOD_BUFFER *pFaultMthdBuf  = NULL;
280 
281     //
282     // Unmap the method buffer if applicable
283     // For SR-IOV, Guest RM maps and is unmapped only by guest-RM
284     // For SR-IOV HEAVY and legacy vGpu, mthd buffers should be unmapped by host RM,
285     // For GSP config, method buffer BAR2 unmapping is done on GSP-RM
286     //
287     // Skip unmap for the rest
288     //
289     if ((IS_GFID_VF(gfid) && !IS_SRIOV_HEAVY(pGpu)) ||   // SRIOV guest on Host
290         IS_GSP_CLIENT(pGpu) ||                           // CPU-RM
291         IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||                // legacy vgpu
292         IS_SRIOV_HEAVY_GUEST(pGpu))                      // SRIOV-heavy guest
293     {
294         return NV_OK;
295     }
296 
297     // Pre-reqs
298     NV_ASSERT_OR_RETURN((pKernelChannelGroup->pMthdBuffers != NULL), NV_ERR_INVALID_ARGUMENT);
299     NV_ASSERT_OR_RETURN((runqueue < runQueues), NV_ERR_INVALID_STATE);
300 
301     // Get method buffer handle
302     pFaultMthdBuf = &(pKernelChannelGroup->pMthdBuffers[runqueue]);
303 
304     // TODO: Check to be removed as part of fix for Bug 200691429
305     if (!RMCFG_FEATURE_PLATFORM_GSP)
306         NV_ASSERT_OR_RETURN((pFaultMthdBuf->pMemDesc != NULL), NV_ERR_INVALID_STATE);
307 
308     // Unmap method buffer from bar2 invisible region and reset bar2addr
309     if (pFaultMthdBuf->bar2Addr != 0)
310     {
311         kbusUnmapCpuInvisibleBar2Aperture_HAL(pGpu, pKernelBus, pFaultMthdBuf->pMemDesc,
312                                               pFaultMthdBuf->bar2Addr, gfid);
313         pFaultMthdBuf->bar2Addr = 0;
314     }
315 
316     return NV_OK;
317 }
318