1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "gpu/mem_mgr/mem_mgr.h"
25 #include "gpu/mem_mgr/heap_base.h"
26 #include "gpu/mem_mgr/mem_utils.h"
27 #include "gpu/mem_mgr/virt_mem_allocator_common.h"
28 #include "os/nv_memory_type.h"
29 #include "core/locks.h"
30 #include "ctrl/ctrl2080.h"
31 
32 #include "gpu/bus/kern_bus.h"
33 
34 // Memory copy block size for if we need to cut up a mapping
35 #define MEMORY_COPY_BLOCK_SIZE 1024 * 1024
36 
37 /* ------------------------ Private functions --------------------------------------- */
38 
39 /*!
40  * @brief This utility routine helps in determining the appropriate
41  *        memory transfer technique to be used
42  */
43 static TRANSFER_TYPE
44 memmgrGetMemTransferType
45 (
46     MemoryManager    *pMemoryManager,
47     TRANSFER_SURFACE *pDst,
48     TRANSFER_SURFACE *pSrc
49 )
50 {
51     TRANSFER_TYPE transferType        = TRANSFER_TYPE_PROCESSOR;
52     OBJGPU    *pGpu       = ENG_GET_GPU(pMemoryManager);
53     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
54 
55     //
56     // In case of copy, both dest and src will be passed
57     // In case of memset/memread/memwrite either dest or src will be passed
58     //
59     if ((pDst != NULL) && (pSrc != NULL) &&
60         (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) &&
61         (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM))
62     {
63         transferType = TRANSFER_TYPE_PROCESSOR;
64     }
65     else if (((pDst != NULL) &&
66              (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)) ||
67              ((pSrc != NULL) &&
68              (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM)))
69     {
70         transferType = TRANSFER_TYPE_PROCESSOR;
71     }
72     else if (kbusIsBarAccessBlocked(pKernelBus))
73     {
74         transferType = TRANSFER_TYPE_GSP_DMA;
75     }
76     return transferType;
77 }
78 
79 static NV_STATUS
80 _memmgrAllocAndMapSurface
81 (
82     OBJGPU             *pGpu,
83     NvU64               size,
84     MEMORY_DESCRIPTOR **ppMemDesc,
85     void              **ppMap,
86     void              **ppPriv
87 )
88 {
89     NV_STATUS status;
90     NvU64 flags = 0;
91 
92     NV_ASSERT_OR_RETURN(ppMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
93     NV_ASSERT_OR_RETURN(ppMap != NULL, NV_ERR_INVALID_ARGUMENT);
94     NV_ASSERT_OR_RETURN(ppPriv != NULL, NV_ERR_INVALID_ARGUMENT);
95 
96     flags = MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
97 
98     NV_ASSERT_OK_OR_RETURN(
99         memdescCreate(ppMemDesc, pGpu, size, RM_PAGE_SIZE, NV_TRUE,
100                       ADDR_SYSMEM, NV_MEMORY_UNCACHED, flags));
101 
102     NV_ASSERT_OK_OR_GOTO(status, memdescAlloc(*ppMemDesc), failed);
103 
104     NV_ASSERT_OK_OR_GOTO(status,
105         memdescMapOld(*ppMemDesc, 0, size, NV_TRUE, NV_PROTECT_READ_WRITE,
106                       ppMap, ppPriv),
107         failed);
108 
109     // Clear surface before use
110     portMemSet(*ppMap, 0, size);
111 
112     return NV_OK;
113 failed:
114     memdescFree(*ppMemDesc);
115     memdescDestroy(*ppMemDesc);
116 
117     *ppMemDesc = NULL;
118     *ppMap = NULL;
119     *ppPriv = NULL;
120 
121     return status;
122 }
123 
124 static void
125 _memmgrUnmapAndFreeSurface
126 (
127     MEMORY_DESCRIPTOR *pMemDesc,
128     void              *pMap,
129     void              *pPriv
130 )
131 {
132     memdescUnmapOld(pMemDesc, NV_TRUE, 0, pMap, pPriv);
133 
134     memdescFree(pMemDesc);
135     memdescDestroy(pMemDesc);
136 }
137 
138 /*!
139  * @brief This function is used for writing/reading data to/from a client
140  *        provided buffer from/to some source region in vidmem
141  *
142  * @param[in] pDst    TRANSFER_SURFACE info for destination region
143  * @param[in] pBuf    Client provided buffer
144  * @param[in] size    Size in bytes of the memory transfer
145  * @param[in] bRead   TRUE for read and FALSE for write
146  */
147 static NV_STATUS
148 _memmgrMemReadOrWriteWithGsp
149 (
150     OBJGPU           *pGpu,
151     TRANSFER_SURFACE *pDst,
152     void             *pBuf,
153     NvU64             size,
154     NvBool            bRead
155 )
156 {
157     NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams;
158     NV_STATUS status;
159     MEMORY_DESCRIPTOR *pStagingBuf = NULL;
160     void *pStagingBufMap = NULL;
161     void *pStagingBufPriv = NULL;
162     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
163 
164     // Do not expect GSP to be used for reading/writing from/to sysmem
165     if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)
166         return NV_ERR_NOT_SUPPORTED;
167 
168     // Allocate and map the staging buffer
169     NV_ASSERT_OK_OR_RETURN(
170         _memmgrAllocAndMapSurface(pGpu, size, &pStagingBuf, &pStagingBufMap,
171                                   &pStagingBufPriv));
172 
173     // Copy the data to staging buffer before poking GSP for copying
174     if (!bRead)
175         portMemCopy(pStagingBufMap, size, pBuf, size);
176 
177     // Setup control call params
178     portMemSet(&gspParams, 0, sizeof(gspParams));
179 
180     gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMCPY;
181     gspParams.transferSize = size;
182 
183     if (bRead)
184     {
185         // Source surface in vidmem
186         gspParams.src.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
187         gspParams.src.size = memdescGetSize(pDst->pMemDesc);
188         gspParams.src.offset = pDst->offset;
189         gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
190         gspParams.src.aperture = memdescGetAddressSpace(pDst->pMemDesc);
191 
192         // Destination surface in unprotected sysmem
193         gspParams.dst.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0);
194         gspParams.dst.size = memdescGetSize(pStagingBuf);
195         gspParams.dst.offset = 0;
196         gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf);
197         gspParams.dst.aperture = memdescGetAddressSpace(pStagingBuf);
198     }
199     else
200     {
201         // Source surface in unprotected sysmem
202         gspParams.src.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0);
203         gspParams.src.size = memdescGetSize(pStagingBuf);
204         gspParams.src.offset = 0;
205         gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf);
206         gspParams.src.aperture = memdescGetAddressSpace(pStagingBuf);
207 
208         // Destination surface in vidmem
209         gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
210         gspParams.dst.size = memdescGetSize(pDst->pMemDesc);
211         gspParams.dst.offset = pDst->offset;
212         gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
213         gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc);
214     }
215 
216     // Send the control call
217     NV_ASSERT_OK_OR_GOTO(status,
218         pRmApi->Control(pRmApi,
219                         pGpu->hInternalClient,
220                         pGpu->hInternalSubdevice,
221                         NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP,
222                         &gspParams,
223                         sizeof(gspParams)),
224         failed);
225 
226     // Read contents from staging buffer after GSP is done copying
227     if (bRead)
228         portMemCopy(pBuf, size, pStagingBufMap, size);
229 
230 failed:
231     _memmgrUnmapAndFreeSurface(pStagingBuf, pStagingBufMap, pStagingBufPriv);
232     return status;
233 }
234 
235 /*!
236  * @brief This function is used for copying data b/w two memory regions
237  *        using GSP.
238  *
239  * @param[in] pDst    TRANSFER_SURFACE info for destination region
240  * @param[in] pSrc    TRANSFER_SURFACE info for source region
241  * @param[in] size    Size in bytes of the memory transfer
242  */
243 static NV_STATUS
244 _memmgrMemcpyWithGsp
245 (
246     OBJGPU           *pGpu,
247     TRANSFER_SURFACE *pDst,
248     TRANSFER_SURFACE *pSrc,
249     NvU64             size
250 )
251 {
252     NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams;
253     NV_STATUS status;
254     MEMORY_DESCRIPTOR *pStagingBuf = NULL;
255     void *pStagingBufMap = NULL;
256     void *pStagingBufPriv = NULL;
257     NvU8 *pMap = NULL;
258     void *pPriv = NULL;
259     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
260 
261     //
262     // Do not expect GSP to be used for copying data b/w two surfaces
263     // in sysmem. For SPT, there is no non-CPR vidmem. So, allow vidmem
264     // to vidmem copies in plain text. For copies b/w CPR and non-CPR
265     // vidmem, encryption/decryption needs to happen at the endpoints.
266     //
267     if (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM &&
268         memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)
269     {
270         return NV_ERR_NOT_SUPPORTED;
271     }
272 
273     // Allocate and map the bounce buffer
274     NV_ASSERT_OK_OR_RETURN(
275         _memmgrAllocAndMapSurface(pGpu, size, &pStagingBuf, &pStagingBufMap,
276                                   &pStagingBufPriv));
277 
278     // Setup control call params
279     portMemSet(&gspParams, 0, sizeof(gspParams));
280 
281     gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMCPY;
282     gspParams.transferSize = size;
283 
284     if (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM)
285     {
286         NV_ASSERT_OK_OR_GOTO(status,
287             memdescMapOld(pSrc->pMemDesc, 0, size, NV_TRUE,
288                           NV_PROTECT_READ_WRITE, (void**)&pMap, &pPriv),
289             failed);
290 
291         // Copy to staging buffer
292         portMemCopy(pStagingBufMap, size, pMap + pSrc->offset, size);
293 
294         memdescUnmapOld(pSrc->pMemDesc, NV_TRUE, 0, (void*)pMap, pPriv);
295 
296         // Source surface in unprotected sysmem
297         gspParams.src.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0);
298         gspParams.src.size = memdescGetSize(pStagingBuf);
299         gspParams.src.offset = 0;
300         gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf);
301         gspParams.src.aperture = memdescGetAddressSpace(pStagingBuf);
302 
303         // Destination surface in vidmem
304         gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
305         gspParams.dst.size = memdescGetSize(pDst->pMemDesc);
306         gspParams.dst.offset = pDst->offset;
307         gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
308         gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc);
309     }
310     else
311     {
312         // Source surface in vidmem
313         gspParams.src.baseAddr = memdescGetPhysAddr(pSrc->pMemDesc, AT_GPU, 0);
314         gspParams.src.size = memdescGetSize(pSrc->pMemDesc);
315         gspParams.src.offset = pSrc->offset;
316         gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pSrc->pMemDesc);
317         gspParams.src.aperture = memdescGetAddressSpace(pSrc->pMemDesc);
318 
319         if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_FBMEM)
320         {
321             // Destination surface in vidmem
322             gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
323             gspParams.dst.size = memdescGetSize(pDst->pMemDesc);
324             gspParams.dst.offset = pDst->offset;
325             gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
326             gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc);
327         }
328         else
329         {
330             // Destination surface in unprotected sysmem
331             gspParams.dst.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0);
332             gspParams.dst.size = memdescGetSize(pStagingBuf);
333             gspParams.dst.offset = 0;
334             gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf);
335             gspParams.dst.aperture = memdescGetAddressSpace(pStagingBuf);
336         }
337     }
338 
339     // Send the control call
340     NV_ASSERT_OK_OR_GOTO(status,
341         pRmApi->Control(pRmApi,
342                         pGpu->hInternalClient,
343                         pGpu->hInternalSubdevice,
344                         NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP,
345                         &gspParams,
346                         sizeof(gspParams)),
347         failed);
348 
349     // Copy from staging buffer to destination
350     if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)
351     {
352         NV_ASSERT_OK_OR_GOTO(status,
353             memdescMapOld(pDst->pMemDesc, 0, size, NV_TRUE,
354                           NV_PROTECT_READ_WRITE, (void**)&pMap, &pPriv),
355             failed);
356 
357         portMemCopy(pMap + pDst->offset, size, pStagingBufMap, size);
358 
359         memdescUnmapOld(pDst->pMemDesc, NV_TRUE, 0, (void*)pMap, pPriv);
360     }
361 
362 failed:
363     _memmgrUnmapAndFreeSurface(pStagingBuf, pStagingBufMap, pStagingBufPriv);
364     return status;
365 }
366 
367 static NV_STATUS
368 _memmgrMemsetWithGsp
369 (
370     OBJGPU           *pGpu,
371     TRANSFER_SURFACE *pDst,
372     NvU32             value,
373     NvU64             size
374 )
375 {
376     NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams;
377     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
378 
379     // Do not expect to use GSP to memset surfaces in sysmem
380     if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)
381         return NV_ERR_NOT_SUPPORTED;
382 
383     portMemSet(&gspParams, 0, sizeof(gspParams));
384 
385     gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMSET;
386     gspParams.transferSize = size;
387     gspParams.value = value;
388     gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
389     gspParams.dst.size = memdescGetSize(pDst->pMemDesc);
390     gspParams.dst.offset = pDst->offset;
391     gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
392     gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc);
393 
394     // Send the control call
395     NV_ASSERT_OK_OR_RETURN(
396         pRmApi->Control(pRmApi,
397                         pGpu->hInternalClient,
398                         pGpu->hInternalSubdevice,
399                         NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP,
400                         &gspParams,
401                         sizeof(gspParams)));
402 
403     return NV_OK;
404 }
405 
406 /*!
407  * @brief This function is used for copying data b/w two memory regions
408  *        using the specified memory transfer technique. Both memory regions
409  *        can be in the same aperture or in different apertures.
410  *
411  * @param[in] pDstInfo      TRANSFER_SURFACE info for destination region
412  * @param[in] pSrcInfo      TRANSFER_SURFACE info for source region
413  * @param[in] size          Size in bytes of the memory transfer
414  * @param[in] transferType  Memory transfer technique to be used
415  * @param[in] flags         Flags
416  */
417 static NV_STATUS
418 memmgrMemCopyWithTransferType
419 (
420     MemoryManager    *pMemoryManager,
421     TRANSFER_SURFACE *pDstInfo,
422     TRANSFER_SURFACE *pSrcInfo,
423     NvU32             size,
424     TRANSFER_TYPE     transferType,
425     NvU32             flags
426 )
427 {
428     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
429     NvU8 *pSrc;
430     NvU8 *pDst;
431 
432     // Sanitize the input
433     NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT);
434     NV_ASSERT_OR_RETURN(pSrcInfo != NULL, NV_ERR_INVALID_ARGUMENT);
435     NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
436     NV_ASSERT_OR_RETURN(pSrcInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
437     NV_ASSERT_OR_RETURN(!memdescDescIsEqual(pDstInfo->pMemDesc, pSrcInfo->pMemDesc),
438                         NV_ERR_INVALID_ARGUMENT);
439 
440     switch (transferType)
441     {
442         case TRANSFER_TYPE_PROCESSOR:
443             pDst = memdescMapInternal(pGpu, pDstInfo->pMemDesc, TRANSFER_FLAGS_NONE);
444             NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
445             pSrc = memdescMapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE);
446             if (pSrc == NULL)
447             {
448                 memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, 0);
449                 NV_ASSERT_OR_RETURN(0, NV_ERR_INSUFFICIENT_RESOURCES);
450             }
451 
452             portMemCopy(pDst + pDstInfo->offset, size, pSrc + pSrcInfo->offset, size);
453 
454             memdescUnmapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE);
455             memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, flags);
456             break;
457         case TRANSFER_TYPE_GSP_DMA:
458             if (IS_GSP_CLIENT(pGpu))
459             {
460                 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n");
461                 NV_ASSERT_OK_OR_RETURN(
462                     _memmgrMemcpyWithGsp(pGpu, pDstInfo, pSrcInfo, size));
463             }
464             else
465             {
466                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
467             }
468             break;
469         case TRANSFER_TYPE_CE:
470             NV_PRINTF(LEVEL_INFO, "Add call to CE\n");
471             break;
472     }
473 
474     return NV_OK;
475 }
476 
477 /*!
478  * @brief This function is used for setting a memory region to a constant state
479  *        using a specified memory transfer technique
480  *
481  * @param[in] pDstInfo      TRANSFER_SURFACE info for destination region
482  * @param[in] value         Value to be written to the region
483  * @param[in] size          Size in bytes of the memory to be initialized
484  * @param[in] transferType  Memory transfer technique to be used
485  * @param[in] flags         Flags
486  */
487 static NV_STATUS
488 memmgrMemSetWithTransferType
489 (
490     MemoryManager    *pMemoryManager,
491     TRANSFER_SURFACE *pDstInfo,
492     NvU32             value,
493     NvU32             size,
494     TRANSFER_TYPE     transferType,
495     NvU32             flags
496 )
497 {
498     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
499     NvU8 *pDst;
500 
501     // Sanitize the input
502     NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT);
503     NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
504     NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT);
505     NV_ASSERT_OR_RETURN(pDstInfo->offset + size <= pDstInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT);
506 
507     switch (transferType)
508     {
509         case TRANSFER_TYPE_PROCESSOR:
510             pDst = memdescMapInternal(pGpu, pDstInfo->pMemDesc, TRANSFER_FLAGS_NONE);
511             NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
512 
513             portMemSet(pDst + pDstInfo->offset, value, size);
514 
515             memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, flags);
516             break;
517         case TRANSFER_TYPE_GSP_DMA:
518             if (IS_GSP_CLIENT(pGpu))
519             {
520                 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n");
521                 NV_ASSERT_OK_OR_RETURN(
522                     _memmgrMemsetWithGsp(pGpu, pDstInfo, value, size));
523             }
524             else
525             {
526                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
527             }
528             break;
529         case TRANSFER_TYPE_CE:
530             NV_PRINTF(LEVEL_INFO, "Add call to CE\n");
531             break;
532     }
533 
534     return NV_OK;
535 }
536 
537 /*!
538  * @brief This function is used to map the appropriate memory descriptor,
539  *        copy the memory from the given buffer, and then unmap.
540  *
541  * @param[in] pMemDesc Memory descriptor of buffer to write
542  * @param[in] pBuf     Buffer allocated by caller
543  * @param[in] offset   Offset of buffer to write
544  * @param[in] size     Size in bytes of the buffer
545  * @param[in] flags    Flags
546  */
547 static NV_STATUS
548 memmgrMemWriteMapAndCopy
549 (
550     MemoryManager     *pMemoryManager,
551     MEMORY_DESCRIPTOR *pMemDesc,
552     void              *pBuf,
553     NvU64              offset,
554     NvU64              size,
555     NvU32              flags
556 )
557 {
558     NvU8   *pDst = NULL;
559     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
560 
561     pDst = memdescMapInternal(pGpu, pMemDesc, TRANSFER_FLAGS_NONE);
562     NV_CHECK_OR_RETURN(LEVEL_SILENT, pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
563 
564     portMemCopy(pDst + offset, size, pBuf, size);
565     memdescUnmapInternal(pGpu, pMemDesc, flags);
566 
567     return NV_OK;
568 }
569 
570 /*!
571  * @brief This function is used for writing data placed in a caller passed buffer
572  *        to a given memory region while only mapping regions as large as the given
573  *        block size.
574  *
575  * @param[in] pMemDesc   Memory descriptor of buffer to write
576  * @param[in] pBuf       Buffer allocated by caller
577  * @param[in] baseOffset Offset of entire buffer to write
578  * @param[in] size       Size in bytes of the buffer
579  * @param[in] flags      Flags
580  * @param[in] blockSize  Maximum size of a mapping to use
581  */
582 static NV_STATUS
583 memmgrMemWriteInBlocks
584 (
585     MemoryManager     *pMemoryManager,
586     MEMORY_DESCRIPTOR *pMemDesc,
587     void              *pBuf,
588     NvU64              baseOffset,
589     NvU64              size,
590     NvU32              flags,
591     NvU32              blockSize
592 )
593 {
594     NV_STATUS  status    = NV_OK;
595     OBJGPU    *pGpu      = ENG_GET_GPU(pMemoryManager);
596     NvU64      remaining = size;
597     NvU64      offset    = 0;
598 
599     while ((remaining > 0) && (status == NV_OK))
600     {
601         MEMORY_DESCRIPTOR *pSubMemDesc = NULL;
602         NvU32              mapSize     = NV_MIN(blockSize, remaining);
603 
604         NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, memdescCreateSubMem(&pSubMemDesc, pMemDesc, pGpu, offset + baseOffset, mapSize));
605 
606         // Set the offset to 0, as the sub descriptor already starts at the offset
607         status = memmgrMemWriteMapAndCopy(pMemoryManager, pSubMemDesc, (NvU8 *)pBuf + offset,
608                                           0, mapSize, flags);
609 
610         memdescFree(pSubMemDesc);
611         memdescDestroy(pSubMemDesc);
612 
613         offset += mapSize;
614         remaining -= mapSize;
615     }
616 
617     return status;
618 }
619 
620 /*!
621  * @brief This function is used for writing data placed in a caller passed buffer
622  *        to a given memory region using the specified memory transfer technique
623  *
624  * @param[in] pDstInfo      TRANSFER_SURFACE info for the destination region
625  * @param[in] pBuf          Buffer allocated by caller
626  * @param[in] size          Size in bytes of the buffer
627  * @param[in] transferType  Memory transfer technique to be used
628  * @param[in] flags         Flags
629  */
630 static NV_STATUS
631 memmgrMemWriteWithTransferType
632 (
633     MemoryManager    *pMemoryManager,
634     TRANSFER_SURFACE *pDstInfo,
635     void             *pBuf,
636     NvU64             size,
637     TRANSFER_TYPE     transferType,
638     NvU32             flags
639 )
640 {
641     NvU8 *pMapping = memdescGetKernelMapping(pDstInfo->pMemDesc);
642     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
643 
644     // Sanitize the input
645     NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT);
646     NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
647     NV_ASSERT_OR_RETURN(pBuf != NULL, NV_ERR_INVALID_ARGUMENT);
648     NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT);
649     NV_ASSERT_OR_RETURN(pDstInfo->offset + size <= pDstInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT);
650 
651     if (pMapping != NULL)
652     {
653         portMemCopy(pMapping + pDstInfo->offset, size, pBuf, size);
654         return NV_OK;
655     }
656 
657     switch (transferType)
658     {
659         case TRANSFER_TYPE_PROCESSOR:
660             if (memmgrMemWriteMapAndCopy(pMemoryManager, pDstInfo->pMemDesc, pBuf, pDstInfo->offset, size, flags) != NV_OK)
661             {
662                 // If we fail to map a block large enough for the entire transfer, split up the mapping.
663                 NV_ASSERT_OK_OR_RETURN(memmgrMemWriteInBlocks(pMemoryManager, pDstInfo->pMemDesc, pBuf,
664                                                               pDstInfo->offset, size, flags, MEMORY_COPY_BLOCK_SIZE));
665             }
666             break;
667         case TRANSFER_TYPE_GSP_DMA:
668             if (IS_GSP_CLIENT(pGpu))
669             {
670                 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n");
671                 NV_ASSERT_OK_OR_RETURN(
672                     _memmgrMemReadOrWriteWithGsp(pGpu, pDstInfo, pBuf, size,
673                                                  NV_FALSE /* bRead */));
674             }
675             else
676             {
677                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
678             }
679             break;
680         case TRANSFER_TYPE_CE:
681             NV_PRINTF(LEVEL_INFO, "Add call to CE\n");
682             break;
683     }
684 
685     return NV_OK;
686 }
687 
688 /*!
689  * @brief This function is used for reading specified number of bytes from
690  *        a source memory region into a caller passed buffer using a specified
691  *        memory transfer technique
692  *
693  * @param[in] pSrcInfo      TRANSFER_SURFACE info for the source region
694  * @param[in] pBuf          Caller allocated buffer
695  * @param[in] size          Size in bytes of the buffer
696  * @param[in] transferType  Memory transfer technique to be used
697  * @param[in] flags         Flags
698  */
699 static NV_STATUS
700 memmgrMemReadWithTransferType
701 (
702     MemoryManager    *pMemoryManager,
703     TRANSFER_SURFACE *pSrcInfo,
704     void             *pBuf,
705     NvU64             size,
706     TRANSFER_TYPE     transferType,
707     NvU32             flags
708 )
709 {
710     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
711     NvU8   *pSrc;
712     NvU8   *pMapping = memdescGetKernelMapping(pSrcInfo->pMemDesc);
713 
714 
715     // Sanitize the input
716     NV_ASSERT_OR_RETURN(pSrcInfo != NULL, NV_ERR_INVALID_ARGUMENT);
717     NV_ASSERT_OR_RETURN(pSrcInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
718     NV_ASSERT_OR_RETURN(pBuf != NULL, NV_ERR_INVALID_ARGUMENT);
719     NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT);
720     NV_ASSERT_OR_RETURN(pSrcInfo->offset + size <= pSrcInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT);
721 
722     if (pMapping != NULL)
723     {
724         portMemCopy(pBuf, size, pMapping + pSrcInfo->offset, size);
725         return NV_OK;
726     }
727 
728     switch (transferType)
729     {
730         case TRANSFER_TYPE_PROCESSOR:
731             pSrc = memdescMapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE);
732             NV_ASSERT_OR_RETURN(pSrc != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
733 
734             portMemCopy(pBuf, size, pSrc + pSrcInfo->offset, size);
735 
736             memdescUnmapInternal(pGpu, pSrcInfo->pMemDesc, 0);
737             break;
738         case TRANSFER_TYPE_GSP_DMA:
739             if (IS_GSP_CLIENT(pGpu))
740             {
741                 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n");
742                 NV_ASSERT_OK_OR_RETURN(
743                     _memmgrMemReadOrWriteWithGsp(pGpu, pSrcInfo, pBuf, size,
744                                                  NV_TRUE /* bRead */));
745             }
746             else
747             {
748                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
749             }
750             break;
751         case TRANSFER_TYPE_CE:
752             NV_PRINTF(LEVEL_INFO, "Add call to CE\n");
753             break;
754     }
755 
756     return NV_OK;
757 }
758 
759 /* ------------------------ Public functions --------------------------------------- */
760 
761 NvU64 memUtilsLeastCommonAlignment(NvU64 align1, NvU64 align2)
762 {
763     NvU64 a, b;  // For Euclid's algorithm
764     NvU64 lcm;   // Least Common Multiple of align1 and align2
765     NvU64 maxAlignment = NV_U64_MAX;
766 
767     // WOLOG, make sure align1 >= align2.
768     //
769     if (align2 > align1)
770     {
771         NvU64 tmp = align1;
772         align1 = align2;
773         align2 = tmp;
774     }
775 
776     // If align2 is 0, return min(align1, maxAlignment)
777     //
778     if (align2 == 0)
779     {
780         return align1 < maxAlignment ? align1 : maxAlignment;
781     }
782 
783     // Use Euclid's algorithm (GCD(a, b) = GCD(b, a % b)) to find the
784     // GCD of the two alignments, and use the GCD to find the LCM.
785     //
786     a = align1;
787     b = align2;
788     while (b != 0)
789     {
790         NvU64 old_a = a;
791         a = b;
792         b = old_a % b;
793         NV_ASSERT(a > b);  // Ensure termination.  Should never fail.
794     }
795     lcm = align1 * (align2 / a);  // May overflow
796 
797     // Return min(lcm, maxAlignment).  Also return maxAlignment if the
798     // lcm calculation overflowed, since that means it must have been
799     // much bigger than maxAlignment.
800     //
801     if (lcm > maxAlignment || lcm < align1 ||
802         0 != (lcm % align1) || 0 != (lcm % align2))
803     {
804         NV_CHECK_FAILED(LEVEL_ERROR, "Alignment limit exceeded");
805         return maxAlignment;
806     }
807     return lcm;
808 }
809 
810 void memUtilsInitFBAllocInfo
811 (
812     NV_MEMORY_ALLOCATION_PARAMS *pAllocParams,
813     FB_ALLOC_INFO *pFbAllocInfo,
814     NvHandle hClient,
815     NvHandle hDevice
816 )
817 {
818     pFbAllocInfo->pageFormat->type  = pAllocParams->type;
819     pFbAllocInfo->owner             = pAllocParams->owner;
820     pFbAllocInfo->hwResId           = 0;
821     pFbAllocInfo->pad               = 0;
822     pFbAllocInfo->alignPad          = 0;
823     pFbAllocInfo->height            = pAllocParams->height;
824     pFbAllocInfo->width             = pAllocParams->width;
825     pFbAllocInfo->pitch             = pAllocParams->pitch;
826     pFbAllocInfo->size              = pAllocParams->size;
827     pFbAllocInfo->origSize          = pAllocParams->size;
828     pFbAllocInfo->adjustedSize      = pAllocParams->size;
829     pFbAllocInfo->offset            = ~0;
830     pFbAllocInfo->pageFormat->flags = pAllocParams->flags;
831     pFbAllocInfo->pageFormat->attr  = pAllocParams->attr;
832     pFbAllocInfo->retAttr           = pAllocParams->attr;
833     pFbAllocInfo->pageFormat->attr2 = pAllocParams->attr2;
834     pFbAllocInfo->retAttr2          = pAllocParams->attr2;
835     pFbAllocInfo->format            = pAllocParams->format;
836     pFbAllocInfo->comprCovg         = pAllocParams->comprCovg;
837     pFbAllocInfo->zcullCovg         = 0;
838     pFbAllocInfo->ctagOffset        = pAllocParams->ctagOffset;
839     pFbAllocInfo->bIsKernelAlloc    = NV_FALSE;
840     pFbAllocInfo->internalflags     = 0;
841     pFbAllocInfo->hClient           = hClient;
842     pFbAllocInfo->hDevice           = hDevice;
843 
844     if ((pAllocParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) ||
845         (pAllocParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
846         pFbAllocInfo->align = pAllocParams->alignment;
847     else
848         pFbAllocInfo->align = RM_PAGE_SIZE;
849 
850     if (pAllocParams->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
851     {
852         pFbAllocInfo->offset = pAllocParams->offset;
853         pFbAllocInfo->desiredOffset = pAllocParams->offset;
854     }
855 }
856 
857 /*!
858  * @brief This function is used for copying data b/w two memory regions
859  *        Both memory regions can be in the same aperture of different apertures
860  *
861  * @param[in] pDstInfo  TRANSFER_SURFACE info for destination region
862  * @param[in] pSrcInfo  TRANSFER_SURFACE info for source region
863  * @param[in] size      Size in bytes of the memory transfer
864  * @param[in] flags     Flags
865  */
866 NV_STATUS
867 memmgrMemCopy_IMPL
868 (
869     MemoryManager    *pMemoryManager,
870     TRANSFER_SURFACE *pDstInfo,
871     TRANSFER_SURFACE *pSrcInfo,
872     NvU32             size,
873     NvU32             flags
874 )
875 {
876     TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager,
877                                                           pDstInfo, pSrcInfo);
878 
879     return memmgrMemCopyWithTransferType(pMemoryManager, pDstInfo, pSrcInfo,
880                                          size, transferType, flags);
881 }
882 
883 /*!
884  * @brief This function is used for setting a memory region to a constant state
885  *
886  * @param[in] pDstInfo  TRANSFER_SURFACE info for the destination region
887  * @param[in] value     Value to be written to the region
888  * @param[in] size      Size in bytes of the memory to be initialized
889  * @param[in] flags     Flags
890  */
891 NV_STATUS
892 memmgrMemSet_IMPL
893 (
894     MemoryManager    *pMemoryManager,
895     TRANSFER_SURFACE *pDstInfo,
896     NvU32             value,
897     NvU32             size,
898     NvU32             flags
899 )
900 {
901     TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager,
902                                                           pDstInfo, NULL);
903 
904     return memmgrMemSetWithTransferType(pMemoryManager, pDstInfo, value,
905                                         size, transferType, flags);
906 }
907 
908 /*!
909  * @brief This function is used for setting a memory region to a constant state
910  *
911  * @param[in] pMemDesc  Memory descriptor to end transfer to
912  * @param[in] value     Value to be written to the region
913  * @param[in] flags     Flags
914  */
915 NV_STATUS
916 memmgrMemDescMemSet_IMPL
917 (
918     MemoryManager     *pMemoryManager,
919     MEMORY_DESCRIPTOR *pMemDesc,
920     NvU32              value,
921     NvU32              flags
922 )
923 {
924     TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc};
925     TRANSFER_TYPE    transferType = memmgrGetMemTransferType(pMemoryManager,
926                                                              &transferSurface, NULL);
927 
928     return memmgrMemSetWithTransferType(pMemoryManager, &transferSurface, value,
929                                         (NvU32)memdescGetSize(pMemDesc),
930                                         transferType, flags);
931 }
932 
933 /*!
934  * @brief This function is used for writing data placed in a user buffer
935  *        to a given memory region
936  *
937  * @param[in] pDstInfo  TRANSFER_SURFACE info for the destination region
938  * @param[in] pBuf      Buffer allocated by caller
939  * @param[in] size      Size in bytes of the buffer
940  * @param[in] flags     Flags
941  */
942 NV_STATUS
943 memmgrMemWrite_IMPL
944 (
945     MemoryManager    *pMemoryManager,
946     TRANSFER_SURFACE *pDstInfo,
947     void             *pBuf,
948     NvU64             size,
949     NvU32             flags
950 )
951 {
952     TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager,
953                                                           pDstInfo, NULL);
954 
955     return memmgrMemWriteWithTransferType(pMemoryManager, pDstInfo, pBuf,
956                                           size, transferType, flags);
957 }
958 
959 /*!
960  * @brief This function is used for reading specified number of bytes from
961  *        a source memory region into a caller passed buffer
962  *
963  * @param[in] pSrcInfo  TRANSFER_SURFACE info for the source region
964  * @param[in] pBuf      Caller allocated buffer
965  * @param[in] size      Size in bytes of the buffer
966  * @param[in] flags     Flags
967  */
968 NV_STATUS
969 memmgrMemRead_IMPL
970 (
971     MemoryManager    *pMemoryManager,
972     TRANSFER_SURFACE *pSrcInfo,
973     void             *pBuf,
974     NvU64             size,
975     NvU32             flags
976 )
977 {
978     TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager,
979                                                           NULL, pSrcInfo);
980 
981     return memmgrMemReadWithTransferType(pMemoryManager, pSrcInfo, pBuf,
982                                          size, transferType, flags);
983 }
984 
985 /*!
986  * @brief This helper function can be used to begin transfers
987  *
988  * @param[in] pTransferInfo      Transfer information
989  * @param[in] shadowBufSize      Size of allocated shadow buffer in case of shadow mapping
990  * @param[in] flags              Flags
991  */
992 NvU8 *
993 memmgrMemBeginTransfer_IMPL
994 (
995     MemoryManager     *pMemoryManager,
996     TRANSFER_SURFACE  *pTransferInfo,
997     NvU64              shadowBufSize,
998     NvU32              flags
999 )
1000 {
1001     TRANSFER_TYPE      transferType = memmgrGetMemTransferType(pMemoryManager,
1002                                                                pTransferInfo, NULL);
1003     MEMORY_DESCRIPTOR *pMemDesc     = pTransferInfo->pMemDesc;
1004     NvU64              offset       = pTransferInfo->offset;
1005     OBJGPU            *pGpu         = ENG_GET_GPU(pMemoryManager);
1006     NvU8              *pPtr         = NULL;
1007     NvU64              memSz        = 0;
1008 
1009     NV_ASSERT_OR_RETURN(pMemDesc != NULL, NULL);
1010     NV_ASSERT_OR_RETURN((memSz = memdescGetSize(pMemDesc)) >= shadowBufSize, NULL);
1011     NV_ASSERT_OR_RETURN(memdescGetKernelMapping(pMemDesc) == NULL, NULL);
1012 
1013     memSz = shadowBufSize == 0 ? memSz : shadowBufSize;
1014 
1015     switch (transferType)
1016     {
1017         case TRANSFER_TYPE_PROCESSOR:
1018             if (flags & TRANSFER_FLAGS_USE_BAR1)
1019             {
1020                 NvP64 pPriv;
1021                 NvU32 protect = NV_PROTECT_READ_WRITE;
1022 
1023                 if (flags & TRANSFER_FLAGS_MAP_PROTECT_READABLE)
1024                 {
1025                     protect = NV_PROTECT_READABLE;
1026                 }
1027                 else if (flags & TRANSFER_FLAGS_MAP_PROTECT_WRITEABLE)
1028                 {
1029                     protect = NV_PROTECT_WRITEABLE;
1030                 }
1031 
1032                 NV_ASSERT_OR_RETURN(memdescMap(pMemDesc, offset, memSz, NV_TRUE, protect,
1033                     (NvP64*) &pPtr, &pPriv) == NV_OK, NULL);
1034                 memdescSetKernelMappingPriv(pMemDesc, pPtr);
1035                 break;
1036             }
1037             NV_ASSERT_OR_RETURN((pPtr = memdescMapInternal(pGpu, pMemDesc, flags)) != NULL, NULL);
1038             pPtr = &pPtr[offset];
1039 
1040             break;
1041         case TRANSFER_TYPE_GSP_DMA:
1042         case TRANSFER_TYPE_CE:
1043             if (flags & TRANSFER_FLAGS_SHADOW_ALLOC)
1044             {
1045                 NV_ASSERT_OR_RETURN((pPtr = portMemAllocNonPaged(memSz)), NULL);
1046                 if (flags & TRANSFER_FLAGS_SHADOW_INIT_MEM)
1047                 {
1048                     NV_ASSERT_OK(memmgrMemRead(pMemoryManager, pTransferInfo, pPtr, memSz, flags));
1049                 }
1050             }
1051             break;
1052         default:
1053             NV_ASSERT(0);
1054     }
1055     memdescSetKernelMapping(pMemDesc, pPtr);
1056     return pPtr;
1057 }
1058 
1059 /*!
1060  * @brief This helper function can be used to end transfers
1061  *
1062  * @param[in] pTransferInfo      Transfer information
1063  * @param[in] shadowBufSize      Size of allocated shadow buffer in case of shadow mapping
1064  * @param[in] flags              Flags
1065  */
1066 void
1067 memmgrMemEndTransfer_IMPL
1068 (
1069     MemoryManager     *pMemoryManager,
1070     TRANSFER_SURFACE  *pTransferInfo,
1071     NvU64              shadowBufSize,
1072     NvU32              flags
1073 )
1074 {
1075     TRANSFER_TYPE      transferType = memmgrGetMemTransferType(pMemoryManager,
1076                                                                pTransferInfo, NULL);
1077     MEMORY_DESCRIPTOR *pMemDesc     = pTransferInfo->pMemDesc;
1078     NvU64              offset       = pTransferInfo->offset;
1079     OBJGPU            *pGpu         = ENG_GET_GPU(pMemoryManager);
1080     NvU64              memSz        = 0;
1081     NvU8              *pMapping     = memdescGetKernelMapping(pMemDesc);
1082 
1083     NV_ASSERT_OR_RETURN_VOID(pMemDesc != NULL);
1084     NV_ASSERT_OR_RETURN_VOID((memSz = memdescGetSize(pMemDesc)) >= (shadowBufSize + offset) );
1085 
1086     memSz = shadowBufSize == 0 ? memSz : shadowBufSize;
1087 
1088     memdescSetKernelMapping(pMemDesc, NULL);
1089 
1090     switch (transferType)
1091     {
1092         case TRANSFER_TYPE_PROCESSOR:
1093             if (flags & TRANSFER_FLAGS_USE_BAR1)
1094             {
1095                 NvP64 pPriv = memdescGetKernelMappingPriv(pMemDesc);
1096                 memdescSetKernelMappingPriv(pMemDesc, NULL);
1097                 memdescUnmap(pMemDesc, NV_TRUE, 0, pMapping, pPriv);
1098                 return;
1099             }
1100             memdescUnmapInternal(pGpu, pMemDesc, flags);
1101             return;
1102         case TRANSFER_TYPE_GSP_DMA:
1103         case TRANSFER_TYPE_CE:
1104             if (pMapping != NULL)
1105             {
1106                 NV_ASSERT_OK(memmgrMemWrite(pMemoryManager, pTransferInfo, pMapping, memSz, flags));
1107                 portMemFree(pMapping);
1108             }
1109             return;
1110         default:
1111             NV_ASSERT(0);
1112     }
1113     return;
1114 }
1115 
1116 /*!
1117  * @brief Helper function that ends transfers to a memdesc with default offset/size
1118  *
1119  * @param[in] pMemDesc           Memory descriptor to end transfer to
1120  * @param[in] flags              Flags
1121  */
1122 void
1123 memmgrMemDescEndTransfer_IMPL
1124 (
1125     MemoryManager *pMemoryManager,
1126     MEMORY_DESCRIPTOR *pMemDesc,
1127     NvU32 flags
1128 )
1129 {
1130     TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc};
1131     memmgrMemEndTransfer(pMemoryManager, &transferSurface, memdescGetSize(pMemDesc), flags);
1132 }
1133 
1134 /*!
1135  * @brief Helper function that begins transfers to a memdesc with default offset/size
1136  *
1137  * @param[in] pMemDesc           Memory descriptor to begin transfer to
1138  * @param[in] flags              Flags
1139  */
1140 NvU8 *
1141 memmgrMemDescBeginTransfer_IMPL
1142 (
1143     MemoryManager *pMemoryManager,
1144     MEMORY_DESCRIPTOR *pMemDesc,
1145     NvU32 flags
1146 )
1147 {
1148     TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc};
1149     return memmgrMemBeginTransfer(pMemoryManager, &transferSurface, memdescGetSize(pMemDesc), flags);
1150 }
1151 
1152 /*!
1153  * @brief This function is used to allocate common resources across memory
1154  *        classes, and must be used before memory-specific resource alloc.
1155  *
1156  * @param[in/out] pAllocRequest     User-provided alloc request struct
1157  * @param[in/out] pFbAllocInfo      Initialized FB_ALLOC_INFO struct to alloc
1158  */
1159 NV_STATUS
1160 memmgrAllocResources_IMPL
1161 (
1162     OBJGPU                      *pGpu,
1163     MemoryManager               *pMemoryManager,
1164     MEMORY_ALLOCATION_REQUEST   *pAllocRequest,
1165     FB_ALLOC_INFO               *pFbAllocInfo
1166 )
1167 {
1168     NV_STATUS                    status        = NV_OK;
1169     NvU64                        alignment     = 0;
1170     NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams;
1171     NV_ADDRESS_SPACE             addrSpace     = memmgrAllocGetAddrSpace(pMemoryManager, pVidHeapAlloc->flags,
1172                                                                          pFbAllocInfo->retAttr);
1173 
1174     NvU64                        pageSize      = 0;
1175     NvBool                       bAllocedHwRes = NV_FALSE;
1176 
1177     // IRQL TEST:  must be running at equivalent of passive-level
1178     IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL());
1179 
1180     //
1181     // Check for valid size.
1182     //
1183     if (pVidHeapAlloc->size == 0)
1184         return NV_ERR_INVALID_ARGUMENT;
1185 
1186     //
1187     // Ensure a valid allocation pVidHeapAlloc->type was passed in
1188     //
1189     if (pVidHeapAlloc->type > NVOS32_NUM_MEM_TYPES - 1)
1190         return NV_ERR_INVALID_ARGUMENT;
1191 
1192     if (ADDR_VIRTUAL != addrSpace)
1193     {
1194         // If vidmem not requested explicitly, decide on the physical location.
1195         if (FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _PCI, pFbAllocInfo->retAttr) ||
1196             FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _ANY, pFbAllocInfo->retAttr))
1197         {
1198             if (ADDR_FBMEM == addrSpace)
1199             {
1200                 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _VIDMEM, pFbAllocInfo->retAttr);
1201             }
1202             else
1203             {
1204                 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _PCI, pFbAllocInfo->retAttr);
1205             }
1206         }
1207     }
1208     else // Virtual
1209     {
1210         // Clear location to ANY since virtual does not associate with location.
1211         pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _ANY, pFbAllocInfo->retAttr);
1212     }
1213 
1214     // Fetch RM page size
1215     pageSize = memmgrDeterminePageSize(pMemoryManager, pFbAllocInfo->hClient, pFbAllocInfo->size,
1216                                        pFbAllocInfo->format, pFbAllocInfo->pageFormat->flags,
1217                                        &pFbAllocInfo->retAttr, &pFbAllocInfo->retAttr2);
1218     if (!IsAMODEL(pGpu) && pageSize == 0)
1219     {
1220         status = NV_ERR_INVALID_STATE;
1221         NV_PRINTF(LEVEL_ERROR, "memmgrDeterminePageSize failed, status: 0x%x\n", status);
1222         goto failed;
1223     }
1224 
1225     // Fetch memory alignment
1226     status = memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, &pFbAllocInfo->size, &pFbAllocInfo->align,
1227                                                pFbAllocInfo->alignPad, pFbAllocInfo->pageFormat->flags,
1228                                                pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2, 0);
1229     if (status != NV_OK)
1230     {
1231         NV_PRINTF(LEVEL_ERROR, "memmgrAllocDetermineAlignment failed, status: 0x%x\n", status);
1232         goto failed;
1233     }
1234 
1235     //
1236     // Call into HAL to reserve any hardware resources for
1237     // the specified memory pVidHeapAlloc->type.
1238     // If the alignment was changed due to a HW limitation, and the
1239     // flag NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE is set, bad_argument
1240     // will be passed back from the HAL
1241     //
1242     status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo);
1243     bAllocedHwRes = NV_TRUE;
1244 
1245     pVidHeapAlloc->attr  = pFbAllocInfo->retAttr;
1246     pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2;
1247     pVidHeapAlloc->format = pFbAllocInfo->format;
1248     pVidHeapAlloc->comprCovg = pFbAllocInfo->comprCovg;
1249     pVidHeapAlloc->zcullCovg = pFbAllocInfo->zcullCovg;
1250 
1251     if (status != NV_OK)
1252     {
1253         //
1254         // probably means we passed in a bogus pVidHeapAlloc->type or no tiling resources available
1255         // when tiled memory attribute was set to REQUIRED
1256         //
1257         NV_PRINTF(LEVEL_ERROR, "fbAlloc failure!\n");
1258         goto failed;
1259     }
1260 
1261     // call HAL to set resources
1262     status = memmgrSetAllocParameters_HAL(pGpu, pMemoryManager, pFbAllocInfo);
1263 
1264     if (status != NV_OK)
1265     {
1266         //
1267         // Two possibilties: either some attribute was set to REQUIRED, ran out of resources,
1268         // or unaligned address / size was passed down. Free up memory and fail this call.
1269         // heapFree will fix up heap pointers.
1270         //
1271         goto failed;
1272     }
1273 
1274     //
1275     // for fixed allocation check if the alignment needs to adjusted.
1276     // some hardware units request allocation aligned to smaller than
1277     // page sizes which can be handled through alignPad
1278     //
1279     if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
1280     {
1281         //
1282         // is our desired offset suitably aligned?
1283         // if not adjust alignment using alignPad(offset into a page), the
1284         // allocation is page size aligned as required for swizzling.
1285         //
1286         if (pFbAllocInfo->desiredOffset % (pFbAllocInfo->align + 1))
1287         {
1288            pFbAllocInfo->alignPad = pFbAllocInfo->desiredOffset % (pFbAllocInfo->align + 1);
1289            pFbAllocInfo->desiredOffset -= pFbAllocInfo->alignPad;
1290         }
1291     }
1292 
1293     //
1294     // Refresh search parameters.
1295     //
1296     pFbAllocInfo->adjustedSize = pFbAllocInfo->size - pFbAllocInfo->alignPad;
1297     pVidHeapAlloc->height = pFbAllocInfo->height;
1298     pVidHeapAlloc->pitch  = pFbAllocInfo->pitch;
1299 
1300     //
1301     // The api takes alignment-1 (used to be a mask).
1302     //
1303     alignment = pFbAllocInfo->align + 1;
1304     pVidHeapAlloc->alignment = pFbAllocInfo->align + 1;      // convert mask to size
1305 
1306     //
1307     // Allow caller to request host page alignment to make it easier
1308     // to move things around with host os VM subsystem
1309     //
1310     if ((pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FORCE_ALIGN_HOST_PAGE) &&
1311         (addrSpace == ADDR_FBMEM))
1312     {
1313         OBJSYS *pSys         = SYS_GET_INSTANCE();
1314         NvU64   hostPageSize = pSys->cpuInfo.hostPageSize;
1315 
1316         // hostPageSize *should* always be set, but....
1317         if (hostPageSize == 0)
1318             hostPageSize = RM_PAGE_SIZE;
1319 
1320         alignment = memUtilsLeastCommonAlignment(alignment, hostPageSize);
1321     }
1322 
1323     pVidHeapAlloc->alignment = alignment;
1324     pFbAllocInfo->align = alignment - 1;
1325 
1326     return status;
1327 
1328 failed:
1329     if (bAllocedHwRes)
1330     {
1331         memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
1332     }
1333 
1334     return status;
1335 }
1336 
1337 /*!
1338  * @brief This function is used to create a memory descriptor if needed.
1339  *
1340  * @param[in/out] pAllocRequest     User-provided alloc request struct
1341  * @param[in/out] pFbAllocInfo      Initialized FB_ALLOC_INFO struct to alloc
1342  * @param[out]    ppMemDesc         Double pointer to created descriptor
1343  * @param[in]     pHeap             Heap pointer to store in descriptor
1344  * @param[in]     addrSpace         Address space identifier
1345  * @param[in]     memDescFlags      Memory descriptor alloc flags
1346  * @param[out]    bAllocedMemDesc   NV_TRUE if a descriptor was created
1347  */
1348 NV_STATUS
1349 memUtilsAllocMemDesc
1350 (
1351     OBJGPU                     *pGpu,
1352     MEMORY_ALLOCATION_REQUEST  *pAllocRequest,
1353     FB_ALLOC_INFO              *pFbAllocInfo,
1354     MEMORY_DESCRIPTOR         **ppMemDesc,
1355     Heap                       *pHeap,
1356     NV_ADDRESS_SPACE            addrSpace,
1357     NvBool                      bContig,
1358     NvBool                     *bAllocedMemDesc
1359 )
1360 {
1361     NV_STATUS status = NV_OK;
1362 
1363     //
1364     // Allocate a memory descriptor if needed. We do this after the fbHwAllocResources() call
1365     // so we have the updated size information.  Linear callers like memdescAlloc() can live with
1366     // only having access to the requested size in bytes, but block linear callers really do
1367     // need to allocate after fbAlloc() rounding takes place.
1368     //
1369     if (pAllocRequest->pMemDesc == NULL)
1370     {
1371         NvU64 memDescFlags = MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE;
1372 
1373         //
1374         // Allocate a contig vidmem descriptor now; if needed we'll
1375         // allocate a new noncontig memdesc later
1376         //
1377         status = memdescCreate(&pAllocRequest->pMemDesc, pGpu, pFbAllocInfo->adjustedSize, 0, bContig,
1378                                addrSpace, NV_MEMORY_UNCACHED, memDescFlags);
1379 
1380         if (status != NV_OK)
1381         {
1382             NV_PRINTF(LEVEL_ERROR, "cannot alloc memDesc!\n");
1383             return status;
1384         }
1385 
1386         *bAllocedMemDesc = NV_TRUE;
1387     }
1388 
1389     *ppMemDesc = pAllocRequest->pMemDesc;
1390     (*ppMemDesc)->pHeap = pHeap;
1391 
1392     // Set attributes tracked by the memdesc
1393     memdescSetPteKind(*ppMemDesc, pFbAllocInfo->format);
1394     memdescSetHwResId(*ppMemDesc, pFbAllocInfo->hwResId);
1395 
1396     return status;
1397 }
1398 
1399 /*!
1400  * Memsets the memory for the given memory descriptor with the given value.
1401  * This function assumes that BAR2 is not yet available. Thus either the BAR0
1402  * window to FB or a memmap to SYSMEM will be used, depending on the memory
1403  * location.
1404  *
1405  * @param[in] pGpu      GPU object pointer
1406  * @param[in] pMemDesc  Memory descriptor for the memory to memset
1407  * @param[in] value     Value to memset to.
1408  */
1409 NV_STATUS
1410 memUtilsMemSetNoBAR2(OBJGPU *pGpu, PMEMORY_DESCRIPTOR pMemDesc, NvU8 value)
1411 {
1412     KernelBus  *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1413     NvU8       *pMap  = NULL;
1414     void       *pPriv = NULL;
1415     RmPhysAddr  physAddr;
1416     RmPhysAddr  physAddrOrig;
1417     NvU64       sizeInDWord;
1418     NvU32       sizeOfDWord = sizeof(NvU32);
1419     NvU32       bar0Addr;
1420     NvU32       i;
1421 
1422     NV_ASSERT((pMemDesc != NULL) &&
1423               (pMemDesc->Size & (sizeOfDWord-1)) == 0);
1424     sizeInDWord = pMemDesc->Size / sizeOfDWord;
1425 
1426     //
1427     // BAR2 is not yet initialized. Thus use either the BAR0 window or
1428     // memmap to initialize the given surface.
1429     //
1430     NV_ASSERT(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping == NULL);
1431     switch (memdescGetAddressSpace(pMemDesc))
1432     {
1433         case ADDR_FBMEM:
1434             if (KBUS_BAR0_PRAMIN_DISABLED(pGpu))
1435             {
1436                 NvU8 *pMap = kbusMapRmAperture_HAL(pGpu, pMemDesc);
1437                 NV_ASSERT_OR_RETURN(pMap != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
1438                 portMemSet(pMap, value, pMemDesc->Size);
1439                 kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pMap, NV_TRUE);
1440 
1441                 break;
1442             }
1443             //
1444             // Set the BAR0 window to encompass the given surface while
1445             // saving off the location to where the BAR0 window was
1446             // previously pointing.
1447             //
1448             physAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
1449             NV_ASSERT((physAddr & (sizeOfDWord-1)) == 0);
1450 
1451             physAddrOrig = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
1452             NV_ASSERT_OK_OR_RETURN(
1453                 kbusSetBAR0WindowVidOffset_HAL(pGpu,
1454                                                pKernelBus,
1455                                                physAddr & ~0xffffULL));
1456             bar0Addr =
1457                 NvU64_LO32(kbusGetBAR0WindowAddress_HAL(pKernelBus) +
1458                           (physAddr - kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus)));
1459 
1460             //
1461             // Iterate and initialize the given surface with BAR0
1462             // writes.
1463             //
1464             for (i = 0; i < sizeInDWord; i++)
1465             {
1466                 GPU_REG_WR32(pGpu,
1467                              bar0Addr + (sizeOfDWord * i),
1468                              value);
1469             }
1470 
1471             //
1472             // Restore where the BAR0 window was previously pointing
1473             // to.
1474             //
1475             NV_ASSERT_OK_OR_RETURN(
1476                 kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, physAddrOrig));
1477 
1478             break;
1479 
1480         case ADDR_SYSMEM:
1481             // Plain old memmap.
1482             NV_ASSERT_OK_OR_RETURN(
1483                 memdescMapOld(pMemDesc, 0,
1484                               pMemDesc->Size,
1485                               NV_TRUE, // kernel,
1486                               NV_PROTECT_READ_WRITE,
1487                               (void **)&pMap,
1488                               &pPriv));
1489             portMemSet(pMap, value, NvU64_LO32(pMemDesc->Size));
1490             memdescUnmapOld(pMemDesc, 1, 0, pMap, pPriv);
1491             break;
1492 
1493         default:
1494             // Should not happen.
1495             NV_ASSERT(0);
1496             break;
1497     }
1498 
1499     return NV_OK;
1500 }
1501