1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "gpu/mem_mgr/mem_mgr.h"
25 #include "gpu/mem_mgr/heap_base.h"
26 #include "gpu/mem_mgr/mem_utils.h"
27 #include "gpu/mem_mgr/virt_mem_allocator_common.h"
28 #include "os/nv_memory_type.h"
29 #include "core/locks.h"
30 #include "ctrl/ctrl2080.h"
31 #include "rmapi/rs_utils.h"
32 
33 #include "gpu/bus/kern_bus.h"
34 
35 // Memory copy block size for if we need to cut up a mapping
36 #define MEMORY_COPY_BLOCK_SIZE 1024 * 1024
37 
38 /* ------------------------ Private functions --------------------------------------- */
39 
40 /*!
41  * @brief This utility routine helps in determining the appropriate
42  *        memory transfer technique to be used
43  */
44 static TRANSFER_TYPE
45 memmgrGetMemTransferType
46 (
47     MemoryManager    *pMemoryManager,
48     TRANSFER_SURFACE *pDst,
49     TRANSFER_SURFACE *pSrc
50 )
51 {
52     TRANSFER_TYPE transferType        = TRANSFER_TYPE_PROCESSOR;
53     OBJGPU    *pGpu       = ENG_GET_GPU(pMemoryManager);
54     KernelBus *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
55 
56     //
57     // In case of copy, both dest and src will be passed
58     // In case of memset/memread/memwrite either dest or src will be passed
59     //
60     if ((pDst != NULL) && (pSrc != NULL) &&
61         (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM) &&
62         (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM))
63     {
64         transferType = TRANSFER_TYPE_PROCESSOR;
65     }
66     else if (((pDst != NULL) &&
67              (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)) ||
68              ((pSrc != NULL) &&
69              (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM)))
70     {
71         transferType = TRANSFER_TYPE_PROCESSOR;
72     }
73     else if (kbusIsBarAccessBlocked(pKernelBus))
74     {
75         transferType = TRANSFER_TYPE_GSP_DMA;
76     }
77     return transferType;
78 }
79 
80 static NV_STATUS
81 _memmgrAllocAndMapSurface
82 (
83     OBJGPU             *pGpu,
84     NvU64               size,
85     MEMORY_DESCRIPTOR **ppMemDesc,
86     void              **ppMap,
87     void              **ppPriv
88 )
89 {
90     NV_STATUS status;
91     NvU64 flags = 0;
92 
93     NV_ASSERT_OR_RETURN(ppMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
94     NV_ASSERT_OR_RETURN(ppMap != NULL, NV_ERR_INVALID_ARGUMENT);
95     NV_ASSERT_OR_RETURN(ppPriv != NULL, NV_ERR_INVALID_ARGUMENT);
96 
97     flags = MEMDESC_FLAGS_ALLOC_IN_UNPROTECTED_MEMORY;
98 
99     NV_ASSERT_OK_OR_RETURN(
100         memdescCreate(ppMemDesc, pGpu, size, RM_PAGE_SIZE, NV_TRUE,
101                       ADDR_SYSMEM, NV_MEMORY_UNCACHED, flags));
102 
103     NV_ASSERT_OK_OR_GOTO(status, memdescAlloc(*ppMemDesc), failed);
104 
105     NV_ASSERT_OK_OR_GOTO(status,
106         memdescMapOld(*ppMemDesc, 0, size, NV_TRUE, NV_PROTECT_READ_WRITE,
107                       ppMap, ppPriv),
108         failed);
109 
110     // Clear surface before use
111     portMemSet(*ppMap, 0, size);
112 
113     return NV_OK;
114 failed:
115     memdescFree(*ppMemDesc);
116     memdescDestroy(*ppMemDesc);
117 
118     *ppMemDesc = NULL;
119     *ppMap = NULL;
120     *ppPriv = NULL;
121 
122     return status;
123 }
124 
125 static void
126 _memmgrUnmapAndFreeSurface
127 (
128     MEMORY_DESCRIPTOR *pMemDesc,
129     void              *pMap,
130     void              *pPriv
131 )
132 {
133     memdescUnmapOld(pMemDesc, NV_TRUE, 0, pMap, pPriv);
134 
135     memdescFree(pMemDesc);
136     memdescDestroy(pMemDesc);
137 }
138 
139 /*!
140  * @brief This function is used for writing/reading data to/from a client
141  *        provided buffer from/to some source region in vidmem
142  *
143  * @param[in] pDst    TRANSFER_SURFACE info for destination region
144  * @param[in] pBuf    Client provided buffer
145  * @param[in] size    Size in bytes of the memory transfer
146  * @param[in] bRead   TRUE for read and FALSE for write
147  */
148 static NV_STATUS
149 _memmgrMemReadOrWriteWithGsp
150 (
151     OBJGPU           *pGpu,
152     TRANSFER_SURFACE *pDst,
153     void             *pBuf,
154     NvU64             size,
155     NvBool            bRead
156 )
157 {
158     NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams;
159     NV_STATUS status;
160     MEMORY_DESCRIPTOR *pStagingBuf = NULL;
161     void *pStagingBufMap = NULL;
162     void *pStagingBufPriv = NULL;
163     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
164 
165     // Do not expect GSP to be used for reading/writing from/to sysmem
166     if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)
167         return NV_ERR_NOT_SUPPORTED;
168 
169     // Allocate and map the staging buffer
170     NV_ASSERT_OK_OR_RETURN(
171         _memmgrAllocAndMapSurface(pGpu, size, &pStagingBuf, &pStagingBufMap,
172                                   &pStagingBufPriv));
173 
174     // Copy the data to staging buffer before poking GSP for copying
175     if (!bRead)
176         portMemCopy(pStagingBufMap, size, pBuf, size);
177 
178     // Setup control call params
179     portMemSet(&gspParams, 0, sizeof(gspParams));
180 
181     gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMCPY;
182     gspParams.transferSize = size;
183 
184     if (bRead)
185     {
186         // Source surface in vidmem
187         gspParams.src.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
188         gspParams.src.size = memdescGetSize(pDst->pMemDesc);
189         gspParams.src.offset = pDst->offset;
190         gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
191         gspParams.src.aperture = memdescGetAddressSpace(pDst->pMemDesc);
192 
193         // Destination surface in unprotected sysmem
194         gspParams.dst.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0);
195         gspParams.dst.size = memdescGetSize(pStagingBuf);
196         gspParams.dst.offset = 0;
197         gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf);
198         gspParams.dst.aperture = memdescGetAddressSpace(pStagingBuf);
199     }
200     else
201     {
202         // Source surface in unprotected sysmem
203         gspParams.src.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0);
204         gspParams.src.size = memdescGetSize(pStagingBuf);
205         gspParams.src.offset = 0;
206         gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf);
207         gspParams.src.aperture = memdescGetAddressSpace(pStagingBuf);
208 
209         // Destination surface in vidmem
210         gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
211         gspParams.dst.size = memdescGetSize(pDst->pMemDesc);
212         gspParams.dst.offset = pDst->offset;
213         gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
214         gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc);
215     }
216 
217     // Send the control call
218     NV_ASSERT_OK_OR_GOTO(status,
219         pRmApi->Control(pRmApi,
220                         pGpu->hInternalClient,
221                         pGpu->hInternalSubdevice,
222                         NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP,
223                         &gspParams,
224                         sizeof(gspParams)),
225         failed);
226 
227     // Read contents from staging buffer after GSP is done copying
228     if (bRead)
229         portMemCopy(pBuf, size, pStagingBufMap, size);
230 
231 failed:
232     _memmgrUnmapAndFreeSurface(pStagingBuf, pStagingBufMap, pStagingBufPriv);
233     return status;
234 }
235 
236 /*!
237  * @brief This function is used for copying data b/w two memory regions
238  *        using GSP.
239  *
240  * @param[in] pDst    TRANSFER_SURFACE info for destination region
241  * @param[in] pSrc    TRANSFER_SURFACE info for source region
242  * @param[in] size    Size in bytes of the memory transfer
243  */
244 static NV_STATUS
245 _memmgrMemcpyWithGsp
246 (
247     OBJGPU           *pGpu,
248     TRANSFER_SURFACE *pDst,
249     TRANSFER_SURFACE *pSrc,
250     NvU64             size
251 )
252 {
253     NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams;
254     NV_STATUS status;
255     MEMORY_DESCRIPTOR *pStagingBuf = NULL;
256     void *pStagingBufMap = NULL;
257     void *pStagingBufPriv = NULL;
258     NvU8 *pMap = NULL;
259     void *pPriv = NULL;
260     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
261 
262     //
263     // Do not expect GSP to be used for copying data b/w two surfaces
264     // in sysmem. For SPT, there is no non-CPR vidmem. So, allow vidmem
265     // to vidmem copies in plain text. For copies b/w CPR and non-CPR
266     // vidmem, encryption/decryption needs to happen at the endpoints.
267     //
268     if (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM &&
269         memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)
270     {
271         return NV_ERR_NOT_SUPPORTED;
272     }
273 
274     // Allocate and map the bounce buffer
275     NV_ASSERT_OK_OR_RETURN(
276         _memmgrAllocAndMapSurface(pGpu, size, &pStagingBuf, &pStagingBufMap,
277                                   &pStagingBufPriv));
278 
279     // Setup control call params
280     portMemSet(&gspParams, 0, sizeof(gspParams));
281 
282     gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMCPY;
283     gspParams.transferSize = size;
284 
285     if (memdescGetAddressSpace(pSrc->pMemDesc) == ADDR_SYSMEM)
286     {
287         NV_ASSERT_OK_OR_GOTO(status,
288             memdescMapOld(pSrc->pMemDesc, 0, size, NV_TRUE,
289                           NV_PROTECT_READ_WRITE, (void**)&pMap, &pPriv),
290             failed);
291 
292         // Copy to staging buffer
293         portMemCopy(pStagingBufMap, size, pMap + pSrc->offset, size);
294 
295         memdescUnmapOld(pSrc->pMemDesc, NV_TRUE, 0, (void*)pMap, pPriv);
296 
297         // Source surface in unprotected sysmem
298         gspParams.src.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0);
299         gspParams.src.size = memdescGetSize(pStagingBuf);
300         gspParams.src.offset = 0;
301         gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf);
302         gspParams.src.aperture = memdescGetAddressSpace(pStagingBuf);
303 
304         // Destination surface in vidmem
305         gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
306         gspParams.dst.size = memdescGetSize(pDst->pMemDesc);
307         gspParams.dst.offset = pDst->offset;
308         gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
309         gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc);
310     }
311     else
312     {
313         // Source surface in vidmem
314         gspParams.src.baseAddr = memdescGetPhysAddr(pSrc->pMemDesc, AT_GPU, 0);
315         gspParams.src.size = memdescGetSize(pSrc->pMemDesc);
316         gspParams.src.offset = pSrc->offset;
317         gspParams.src.cpuCacheAttrib = memdescGetCpuCacheAttrib(pSrc->pMemDesc);
318         gspParams.src.aperture = memdescGetAddressSpace(pSrc->pMemDesc);
319 
320         if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_FBMEM)
321         {
322             // Destination surface in vidmem
323             gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
324             gspParams.dst.size = memdescGetSize(pDst->pMemDesc);
325             gspParams.dst.offset = pDst->offset;
326             gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
327             gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc);
328         }
329         else
330         {
331             // Destination surface in unprotected sysmem
332             gspParams.dst.baseAddr = memdescGetPhysAddr(pStagingBuf, AT_GPU, 0);
333             gspParams.dst.size = memdescGetSize(pStagingBuf);
334             gspParams.dst.offset = 0;
335             gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pStagingBuf);
336             gspParams.dst.aperture = memdescGetAddressSpace(pStagingBuf);
337         }
338     }
339 
340     // Send the control call
341     NV_ASSERT_OK_OR_GOTO(status,
342         pRmApi->Control(pRmApi,
343                         pGpu->hInternalClient,
344                         pGpu->hInternalSubdevice,
345                         NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP,
346                         &gspParams,
347                         sizeof(gspParams)),
348         failed);
349 
350     // Copy from staging buffer to destination
351     if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)
352     {
353         NV_ASSERT_OK_OR_GOTO(status,
354             memdescMapOld(pDst->pMemDesc, 0, size, NV_TRUE,
355                           NV_PROTECT_READ_WRITE, (void**)&pMap, &pPriv),
356             failed);
357 
358         portMemCopy(pMap + pDst->offset, size, pStagingBufMap, size);
359 
360         memdescUnmapOld(pDst->pMemDesc, NV_TRUE, 0, (void*)pMap, pPriv);
361     }
362 
363 failed:
364     _memmgrUnmapAndFreeSurface(pStagingBuf, pStagingBufMap, pStagingBufPriv);
365     return status;
366 }
367 
368 static NV_STATUS
369 _memmgrMemsetWithGsp
370 (
371     OBJGPU           *pGpu,
372     TRANSFER_SURFACE *pDst,
373     NvU32             value,
374     NvU64             size
375 )
376 {
377     NV2080_CTRL_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP_PARAMS gspParams;
378     RM_API *pRmApi = GPU_GET_PHYSICAL_RMAPI(pGpu);
379 
380     // Do not expect to use GSP to memset surfaces in sysmem
381     if (memdescGetAddressSpace(pDst->pMemDesc) == ADDR_SYSMEM)
382         return NV_ERR_NOT_SUPPORTED;
383 
384     portMemSet(&gspParams, 0, sizeof(gspParams));
385 
386     gspParams.memop = NV2080_CTRL_MEMMGR_MEMORY_OP_MEMSET;
387     gspParams.transferSize = size;
388     gspParams.value = value;
389     gspParams.dst.baseAddr = memdescGetPhysAddr(pDst->pMemDesc, AT_GPU, 0);
390     gspParams.dst.size = memdescGetSize(pDst->pMemDesc);
391     gspParams.dst.offset = pDst->offset;
392     gspParams.dst.cpuCacheAttrib = memdescGetCpuCacheAttrib(pDst->pMemDesc);
393     gspParams.dst.aperture = memdescGetAddressSpace(pDst->pMemDesc);
394 
395     // Send the control call
396     NV_ASSERT_OK_OR_RETURN(
397         pRmApi->Control(pRmApi,
398                         pGpu->hInternalClient,
399                         pGpu->hInternalSubdevice,
400                         NV2080_CTRL_CMD_INTERNAL_MEMMGR_MEMORY_TRANSFER_WITH_GSP,
401                         &gspParams,
402                         sizeof(gspParams)));
403 
404     return NV_OK;
405 }
406 
407 /*!
408  * @brief This function is used for copying data b/w two memory regions
409  *        using the specified memory transfer technique. Both memory regions
410  *        can be in the same aperture or in different apertures.
411  *
412  * @param[in] pDstInfo      TRANSFER_SURFACE info for destination region
413  * @param[in] pSrcInfo      TRANSFER_SURFACE info for source region
414  * @param[in] size          Size in bytes of the memory transfer
415  * @param[in] transferType  Memory transfer technique to be used
416  * @param[in] flags         Flags
417  */
418 static NV_STATUS
419 memmgrMemCopyWithTransferType
420 (
421     MemoryManager    *pMemoryManager,
422     TRANSFER_SURFACE *pDstInfo,
423     TRANSFER_SURFACE *pSrcInfo,
424     NvU32             size,
425     TRANSFER_TYPE     transferType,
426     NvU32             flags
427 )
428 {
429     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
430     NvU8 *pSrc;
431     NvU8 *pDst;
432 
433     // Sanitize the input
434     NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT);
435     NV_ASSERT_OR_RETURN(pSrcInfo != NULL, NV_ERR_INVALID_ARGUMENT);
436     NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
437     NV_ASSERT_OR_RETURN(pSrcInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
438     NV_ASSERT_OR_RETURN(!memdescDescIsEqual(pDstInfo->pMemDesc, pSrcInfo->pMemDesc),
439                         NV_ERR_INVALID_ARGUMENT);
440 
441     switch (transferType)
442     {
443         case TRANSFER_TYPE_PROCESSOR:
444             pDst = memdescMapInternal(pGpu, pDstInfo->pMemDesc, TRANSFER_FLAGS_NONE);
445             NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
446             pSrc = memdescMapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE);
447             if (pSrc == NULL)
448             {
449                 memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, 0);
450                 NV_ASSERT_OR_RETURN(0, NV_ERR_INSUFFICIENT_RESOURCES);
451             }
452 
453             portMemCopy(pDst + pDstInfo->offset, size, pSrc + pSrcInfo->offset, size);
454 
455             memdescUnmapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE);
456             memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, flags);
457             break;
458         case TRANSFER_TYPE_GSP_DMA:
459             if (IS_GSP_CLIENT(pGpu))
460             {
461                 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n");
462                 NV_ASSERT_OK_OR_RETURN(
463                     _memmgrMemcpyWithGsp(pGpu, pDstInfo, pSrcInfo, size));
464             }
465             else
466             {
467                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
468             }
469             break;
470         case TRANSFER_TYPE_CE:
471             NV_PRINTF(LEVEL_INFO, "Add call to CE\n");
472             break;
473     }
474 
475     return NV_OK;
476 }
477 
478 /*!
479  * @brief This function is used for setting a memory region to a constant state
480  *        using a specified memory transfer technique
481  *
482  * @param[in] pDstInfo      TRANSFER_SURFACE info for destination region
483  * @param[in] value         Value to be written to the region
484  * @param[in] size          Size in bytes of the memory to be initialized
485  * @param[in] transferType  Memory transfer technique to be used
486  * @param[in] flags         Flags
487  */
488 static NV_STATUS
489 memmgrMemSetWithTransferType
490 (
491     MemoryManager    *pMemoryManager,
492     TRANSFER_SURFACE *pDstInfo,
493     NvU32             value,
494     NvU32             size,
495     TRANSFER_TYPE     transferType,
496     NvU32             flags
497 )
498 {
499     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
500     NvU8 *pDst;
501 
502     // Sanitize the input
503     NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT);
504     NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
505     NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT);
506     NV_ASSERT_OR_RETURN(pDstInfo->offset + size <= pDstInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT);
507 
508     switch (transferType)
509     {
510         case TRANSFER_TYPE_PROCESSOR:
511             pDst = memdescMapInternal(pGpu, pDstInfo->pMemDesc, TRANSFER_FLAGS_NONE);
512             NV_ASSERT_OR_RETURN(pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
513 
514             portMemSet(pDst + pDstInfo->offset, value, size);
515 
516             memdescUnmapInternal(pGpu, pDstInfo->pMemDesc, flags);
517             break;
518         case TRANSFER_TYPE_GSP_DMA:
519             if (IS_GSP_CLIENT(pGpu))
520             {
521                 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n");
522                 NV_ASSERT_OK_OR_RETURN(
523                     _memmgrMemsetWithGsp(pGpu, pDstInfo, value, size));
524             }
525             else
526             {
527                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
528             }
529             break;
530         case TRANSFER_TYPE_CE:
531             NV_PRINTF(LEVEL_INFO, "Add call to CE\n");
532             break;
533     }
534 
535     return NV_OK;
536 }
537 
538 /*!
539  * @brief This function is used to map the appropriate memory descriptor,
540  *        copy the memory from the given buffer, and then unmap.
541  *
542  * @param[in] pMemDesc Memory descriptor of buffer to write
543  * @param[in] pBuf     Buffer allocated by caller
544  * @param[in] offset   Offset of buffer to write
545  * @param[in] size     Size in bytes of the buffer
546  * @param[in] flags    Flags
547  */
548 static NV_STATUS
549 memmgrMemWriteMapAndCopy
550 (
551     MemoryManager     *pMemoryManager,
552     MEMORY_DESCRIPTOR *pMemDesc,
553     void              *pBuf,
554     NvU64              offset,
555     NvU64              size,
556     NvU32              flags
557 )
558 {
559     NvU8   *pDst = NULL;
560     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
561 
562     pDst = memdescMapInternal(pGpu, pMemDesc, TRANSFER_FLAGS_NONE);
563     NV_CHECK_OR_RETURN(LEVEL_SILENT, pDst != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
564 
565     portMemCopy(pDst + offset, size, pBuf, size);
566     memdescUnmapInternal(pGpu, pMemDesc, flags);
567 
568     return NV_OK;
569 }
570 
571 /*!
572  * @brief This function is used for writing data placed in a caller passed buffer
573  *        to a given memory region while only mapping regions as large as the given
574  *        block size.
575  *
576  * @param[in] pMemDesc   Memory descriptor of buffer to write
577  * @param[in] pBuf       Buffer allocated by caller
578  * @param[in] baseOffset Offset of entire buffer to write
579  * @param[in] size       Size in bytes of the buffer
580  * @param[in] flags      Flags
581  * @param[in] blockSize  Maximum size of a mapping to use
582  */
583 static NV_STATUS
584 memmgrMemWriteInBlocks
585 (
586     MemoryManager     *pMemoryManager,
587     MEMORY_DESCRIPTOR *pMemDesc,
588     void              *pBuf,
589     NvU64              baseOffset,
590     NvU64              size,
591     NvU32              flags,
592     NvU32              blockSize
593 )
594 {
595     NV_STATUS  status    = NV_OK;
596     OBJGPU    *pGpu      = ENG_GET_GPU(pMemoryManager);
597     NvU64      remaining = size;
598     NvU64      offset    = 0;
599 
600     while ((remaining > 0) && (status == NV_OK))
601     {
602         MEMORY_DESCRIPTOR *pSubMemDesc = NULL;
603         NvU32              mapSize     = NV_MIN(blockSize, remaining);
604 
605         NV_CHECK_OK_OR_RETURN(LEVEL_SILENT, memdescCreateSubMem(&pSubMemDesc, pMemDesc, pGpu, offset + baseOffset, mapSize));
606 
607         // Set the offset to 0, as the sub descriptor already starts at the offset
608         status = memmgrMemWriteMapAndCopy(pMemoryManager, pSubMemDesc, (NvU8 *)pBuf + offset,
609                                           0, mapSize, flags);
610 
611         memdescFree(pSubMemDesc);
612         memdescDestroy(pSubMemDesc);
613 
614         offset += mapSize;
615         remaining -= mapSize;
616     }
617 
618     return status;
619 }
620 
621 /*!
622  * @brief This function is used for writing data placed in a caller passed buffer
623  *        to a given memory region using the specified memory transfer technique
624  *
625  * @param[in] pDstInfo      TRANSFER_SURFACE info for the destination region
626  * @param[in] pBuf          Buffer allocated by caller
627  * @param[in] size          Size in bytes of the buffer
628  * @param[in] transferType  Memory transfer technique to be used
629  * @param[in] flags         Flags
630  */
631 static NV_STATUS
632 memmgrMemWriteWithTransferType
633 (
634     MemoryManager    *pMemoryManager,
635     TRANSFER_SURFACE *pDstInfo,
636     void             *pBuf,
637     NvU64             size,
638     TRANSFER_TYPE     transferType,
639     NvU32             flags
640 )
641 {
642     NvU8 *pMapping = memdescGetKernelMapping(pDstInfo->pMemDesc);
643     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
644 
645     // Sanitize the input
646     NV_ASSERT_OR_RETURN(pDstInfo != NULL, NV_ERR_INVALID_ARGUMENT);
647     NV_ASSERT_OR_RETURN(pDstInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
648     NV_ASSERT_OR_RETURN(pBuf != NULL, NV_ERR_INVALID_ARGUMENT);
649     NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT);
650     NV_ASSERT_OR_RETURN(pDstInfo->offset + size <= pDstInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT);
651 
652     if (pMapping != NULL)
653     {
654         portMemCopy(pMapping + pDstInfo->offset, size, pBuf, size);
655         return NV_OK;
656     }
657 
658     switch (transferType)
659     {
660         case TRANSFER_TYPE_PROCESSOR:
661             if (memmgrMemWriteMapAndCopy(pMemoryManager, pDstInfo->pMemDesc, pBuf, pDstInfo->offset, size, flags) != NV_OK)
662             {
663                 // If we fail to map a block large enough for the entire transfer, split up the mapping.
664                 NV_ASSERT_OK_OR_RETURN(memmgrMemWriteInBlocks(pMemoryManager, pDstInfo->pMemDesc, pBuf,
665                                                               pDstInfo->offset, size, flags, MEMORY_COPY_BLOCK_SIZE));
666             }
667             break;
668         case TRANSFER_TYPE_GSP_DMA:
669             if (IS_GSP_CLIENT(pGpu))
670             {
671                 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n");
672                 NV_ASSERT_OK_OR_RETURN(
673                     _memmgrMemReadOrWriteWithGsp(pGpu, pDstInfo, pBuf, size,
674                                                  NV_FALSE /* bRead */));
675             }
676             else
677             {
678                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
679             }
680             break;
681         case TRANSFER_TYPE_CE:
682             NV_PRINTF(LEVEL_INFO, "Add call to CE\n");
683             break;
684     }
685 
686     return NV_OK;
687 }
688 
689 /*!
690  * @brief This function is used for reading specified number of bytes from
691  *        a source memory region into a caller passed buffer using a specified
692  *        memory transfer technique
693  *
694  * @param[in] pSrcInfo      TRANSFER_SURFACE info for the source region
695  * @param[in] pBuf          Caller allocated buffer
696  * @param[in] size          Size in bytes of the buffer
697  * @param[in] transferType  Memory transfer technique to be used
698  * @param[in] flags         Flags
699  */
700 static NV_STATUS
701 memmgrMemReadWithTransferType
702 (
703     MemoryManager    *pMemoryManager,
704     TRANSFER_SURFACE *pSrcInfo,
705     void             *pBuf,
706     NvU64             size,
707     TRANSFER_TYPE     transferType,
708     NvU32             flags
709 )
710 {
711     OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
712     NvU8   *pSrc;
713     NvU8   *pMapping = memdescGetKernelMapping(pSrcInfo->pMemDesc);
714 
715 
716     // Sanitize the input
717     NV_ASSERT_OR_RETURN(pSrcInfo != NULL, NV_ERR_INVALID_ARGUMENT);
718     NV_ASSERT_OR_RETURN(pSrcInfo->pMemDesc != NULL, NV_ERR_INVALID_ARGUMENT);
719     NV_ASSERT_OR_RETURN(pBuf != NULL, NV_ERR_INVALID_ARGUMENT);
720     NV_ASSERT_OR_RETURN(size > 0, NV_ERR_INVALID_ARGUMENT);
721     NV_ASSERT_OR_RETURN(pSrcInfo->offset + size <= pSrcInfo->pMemDesc->Size, NV_ERR_INVALID_ARGUMENT);
722 
723     if (pMapping != NULL)
724     {
725         portMemCopy(pBuf, size, pMapping + pSrcInfo->offset, size);
726         return NV_OK;
727     }
728 
729     switch (transferType)
730     {
731         case TRANSFER_TYPE_PROCESSOR:
732             pSrc = memdescMapInternal(pGpu, pSrcInfo->pMemDesc, TRANSFER_FLAGS_NONE);
733             NV_ASSERT_OR_RETURN(pSrc != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
734 
735             portMemCopy(pBuf, size, pSrc + pSrcInfo->offset, size);
736 
737             memdescUnmapInternal(pGpu, pSrcInfo->pMemDesc, 0);
738             break;
739         case TRANSFER_TYPE_GSP_DMA:
740             if (IS_GSP_CLIENT(pGpu))
741             {
742                 NV_PRINTF(LEVEL_INFO, "Calling GSP DMA task\n");
743                 NV_ASSERT_OK_OR_RETURN(
744                     _memmgrMemReadOrWriteWithGsp(pGpu, pSrcInfo, pBuf, size,
745                                                  NV_TRUE /* bRead */));
746             }
747             else
748             {
749                 NV_ASSERT_OR_RETURN(0, NV_ERR_INVALID_ARGUMENT);
750             }
751             break;
752         case TRANSFER_TYPE_CE:
753             NV_PRINTF(LEVEL_INFO, "Add call to CE\n");
754             break;
755     }
756 
757     return NV_OK;
758 }
759 
760 /* ------------------------ Public functions --------------------------------------- */
761 
762 NvU64 memUtilsLeastCommonAlignment(NvU64 align1, NvU64 align2)
763 {
764     NvU64 a, b;  // For Euclid's algorithm
765     NvU64 lcm;   // Least Common Multiple of align1 and align2
766     NvU64 maxAlignment = NV_U64_MAX;
767 
768     // WOLOG, make sure align1 >= align2.
769     //
770     if (align2 > align1)
771     {
772         NvU64 tmp = align1;
773         align1 = align2;
774         align2 = tmp;
775     }
776 
777     // If align2 is 0, return min(align1, maxAlignment)
778     //
779     if (align2 == 0)
780     {
781         return align1 < maxAlignment ? align1 : maxAlignment;
782     }
783 
784     // Use Euclid's algorithm (GCD(a, b) = GCD(b, a % b)) to find the
785     // GCD of the two alignments, and use the GCD to find the LCM.
786     //
787     a = align1;
788     b = align2;
789     while (b != 0)
790     {
791         NvU64 old_a = a;
792         a = b;
793         b = old_a % b;
794         NV_ASSERT(a > b);  // Ensure termination.  Should never fail.
795     }
796     lcm = align1 * (align2 / a);  // May overflow
797 
798     // Return min(lcm, maxAlignment).  Also return maxAlignment if the
799     // lcm calculation overflowed, since that means it must have been
800     // much bigger than maxAlignment.
801     //
802     if (lcm > maxAlignment || lcm < align1 ||
803         0 != (lcm % align1) || 0 != (lcm % align2))
804     {
805         NV_CHECK_FAILED(LEVEL_ERROR, "Alignment limit exceeded");
806         return maxAlignment;
807     }
808     return lcm;
809 }
810 
811 void memUtilsInitFBAllocInfo
812 (
813     NV_MEMORY_ALLOCATION_PARAMS *pAllocParams,
814     FB_ALLOC_INFO *pFbAllocInfo,
815     NvHandle hClient,
816     NvHandle hDevice
817 )
818 {
819     pFbAllocInfo->pageFormat->type  = pAllocParams->type;
820     pFbAllocInfo->owner             = pAllocParams->owner;
821     pFbAllocInfo->hwResId           = 0;
822     pFbAllocInfo->pad               = 0;
823     pFbAllocInfo->alignPad          = 0;
824     pFbAllocInfo->height            = pAllocParams->height;
825     pFbAllocInfo->width             = pAllocParams->width;
826     pFbAllocInfo->pitch             = pAllocParams->pitch;
827     pFbAllocInfo->size              = pAllocParams->size;
828     pFbAllocInfo->origSize          = pAllocParams->size;
829     pFbAllocInfo->adjustedSize      = pAllocParams->size;
830     pFbAllocInfo->offset            = ~0;
831     pFbAllocInfo->pageFormat->flags = pAllocParams->flags;
832     pFbAllocInfo->pageFormat->attr  = pAllocParams->attr;
833     pFbAllocInfo->retAttr           = pAllocParams->attr;
834     pFbAllocInfo->pageFormat->attr2 = pAllocParams->attr2;
835     pFbAllocInfo->retAttr2          = pAllocParams->attr2;
836     pFbAllocInfo->format            = pAllocParams->format;
837     pFbAllocInfo->comprCovg         = pAllocParams->comprCovg;
838     pFbAllocInfo->zcullCovg         = 0;
839     pFbAllocInfo->ctagOffset        = pAllocParams->ctagOffset;
840     pFbAllocInfo->bIsKernelAlloc    = NV_FALSE;
841     pFbAllocInfo->internalflags     = 0;
842     pFbAllocInfo->hClient           = hClient;
843     pFbAllocInfo->hDevice           = hDevice;
844 
845     if ((pAllocParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) ||
846         (pAllocParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
847         pFbAllocInfo->align = pAllocParams->alignment;
848     else
849         pFbAllocInfo->align = RM_PAGE_SIZE;
850 
851     if (pAllocParams->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
852     {
853         pFbAllocInfo->offset = pAllocParams->offset;
854         pFbAllocInfo->desiredOffset = pAllocParams->offset;
855     }
856 }
857 
858 
859 MEMORY_DESCRIPTOR *
860 memmgrMemUtilsGetMemDescFromHandle_IMPL
861 (
862     MemoryManager *pMemoryManager,
863     NvHandle hClient,
864     NvHandle hMemory
865 )
866 {
867     RsResourceRef *pMemoryRef;
868     Memory        *pMemory;
869 
870     if (serverutilGetResourceRef(hClient, hMemory, &pMemoryRef) != NV_OK)
871     {
872         return NULL;
873     }
874 
875     pMemory = dynamicCast(pMemoryRef->pResource, Memory);
876     if (pMemory == NULL)
877     {
878         return NULL;
879     }
880     return pMemory->pMemDesc;
881 }
882 
883 /*!
884  * @brief This function is used for copying data b/w two memory regions
885  *        Both memory regions can be in the same aperture of different apertures
886  *
887  * @param[in] pDstInfo  TRANSFER_SURFACE info for destination region
888  * @param[in] pSrcInfo  TRANSFER_SURFACE info for source region
889  * @param[in] size      Size in bytes of the memory transfer
890  * @param[in] flags     Flags
891  */
892 NV_STATUS
893 memmgrMemCopy_IMPL
894 (
895     MemoryManager    *pMemoryManager,
896     TRANSFER_SURFACE *pDstInfo,
897     TRANSFER_SURFACE *pSrcInfo,
898     NvU32             size,
899     NvU32             flags
900 )
901 {
902     TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager,
903                                                           pDstInfo, pSrcInfo);
904 
905     return memmgrMemCopyWithTransferType(pMemoryManager, pDstInfo, pSrcInfo,
906                                          size, transferType, flags);
907 }
908 
909 /*!
910  * @brief This function is used for setting a memory region to a constant state
911  *
912  * @param[in] pDstInfo  TRANSFER_SURFACE info for the destination region
913  * @param[in] value     Value to be written to the region
914  * @param[in] size      Size in bytes of the memory to be initialized
915  * @param[in] flags     Flags
916  */
917 NV_STATUS
918 memmgrMemSet_IMPL
919 (
920     MemoryManager    *pMemoryManager,
921     TRANSFER_SURFACE *pDstInfo,
922     NvU32             value,
923     NvU32             size,
924     NvU32             flags
925 )
926 {
927     TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager,
928                                                           pDstInfo, NULL);
929 
930     return memmgrMemSetWithTransferType(pMemoryManager, pDstInfo, value,
931                                         size, transferType, flags);
932 }
933 
934 /*!
935  * @brief This function is used for setting a memory region to a constant state
936  *
937  * @param[in] pMemDesc  Memory descriptor to end transfer to
938  * @param[in] value     Value to be written to the region
939  * @param[in] flags     Flags
940  */
941 NV_STATUS
942 memmgrMemDescMemSet_IMPL
943 (
944     MemoryManager     *pMemoryManager,
945     MEMORY_DESCRIPTOR *pMemDesc,
946     NvU32              value,
947     NvU32              flags
948 )
949 {
950     TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc};
951     TRANSFER_TYPE    transferType = memmgrGetMemTransferType(pMemoryManager,
952                                                              &transferSurface, NULL);
953 
954     return memmgrMemSetWithTransferType(pMemoryManager, &transferSurface, value,
955                                         (NvU32)memdescGetSize(pMemDesc),
956                                         transferType, flags);
957 }
958 
959 /*!
960  * @brief This function is used for writing data placed in a user buffer
961  *        to a given memory region
962  *
963  * @param[in] pDstInfo  TRANSFER_SURFACE info for the destination region
964  * @param[in] pBuf      Buffer allocated by caller
965  * @param[in] size      Size in bytes of the buffer
966  * @param[in] flags     Flags
967  */
968 NV_STATUS
969 memmgrMemWrite_IMPL
970 (
971     MemoryManager    *pMemoryManager,
972     TRANSFER_SURFACE *pDstInfo,
973     void             *pBuf,
974     NvU64             size,
975     NvU32             flags
976 )
977 {
978     TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager,
979                                                           pDstInfo, NULL);
980 
981     return memmgrMemWriteWithTransferType(pMemoryManager, pDstInfo, pBuf,
982                                           size, transferType, flags);
983 }
984 
985 /*!
986  * @brief This function is used for reading specified number of bytes from
987  *        a source memory region into a caller passed buffer
988  *
989  * @param[in] pSrcInfo  TRANSFER_SURFACE info for the source region
990  * @param[in] pBuf      Caller allocated buffer
991  * @param[in] size      Size in bytes of the buffer
992  * @param[in] flags     Flags
993  */
994 NV_STATUS
995 memmgrMemRead_IMPL
996 (
997     MemoryManager    *pMemoryManager,
998     TRANSFER_SURFACE *pSrcInfo,
999     void             *pBuf,
1000     NvU64             size,
1001     NvU32             flags
1002 )
1003 {
1004     TRANSFER_TYPE transferType = memmgrGetMemTransferType(pMemoryManager,
1005                                                           NULL, pSrcInfo);
1006 
1007     return memmgrMemReadWithTransferType(pMemoryManager, pSrcInfo, pBuf,
1008                                          size, transferType, flags);
1009 }
1010 
1011 /*!
1012  * @brief This helper function can be used to begin transfers
1013  *
1014  * @param[in] pTransferInfo      Transfer information
1015  * @param[in] shadowBufSize      Size of allocated shadow buffer in case of shadow mapping
1016  * @param[in] flags              Flags
1017  */
1018 NvU8 *
1019 memmgrMemBeginTransfer_IMPL
1020 (
1021     MemoryManager     *pMemoryManager,
1022     TRANSFER_SURFACE  *pTransferInfo,
1023     NvU64              shadowBufSize,
1024     NvU32              flags
1025 )
1026 {
1027     TRANSFER_TYPE      transferType = memmgrGetMemTransferType(pMemoryManager,
1028                                                                pTransferInfo, NULL);
1029     MEMORY_DESCRIPTOR *pMemDesc     = pTransferInfo->pMemDesc;
1030     NvU64              offset       = pTransferInfo->offset;
1031     OBJGPU            *pGpu         = ENG_GET_GPU(pMemoryManager);
1032     NvU8              *pPtr         = NULL;
1033     NvU64              memSz        = 0;
1034 
1035     NV_ASSERT_OR_RETURN(pMemDesc != NULL, NULL);
1036     NV_ASSERT_OR_RETURN((memSz = memdescGetSize(pMemDesc)) >= shadowBufSize, NULL);
1037     NV_ASSERT_OR_RETURN(memdescGetKernelMapping(pMemDesc) == NULL, NULL);
1038 
1039     memSz = shadowBufSize == 0 ? memSz : shadowBufSize;
1040 
1041     switch (transferType)
1042     {
1043         case TRANSFER_TYPE_PROCESSOR:
1044             if (flags & TRANSFER_FLAGS_USE_BAR1)
1045             {
1046                 NvP64 pPriv;
1047                 NvU32 protect = NV_PROTECT_READ_WRITE;
1048 
1049                 if (flags & TRANSFER_FLAGS_MAP_PROTECT_READABLE)
1050                 {
1051                     protect = NV_PROTECT_READABLE;
1052                 }
1053                 else if (flags & TRANSFER_FLAGS_MAP_PROTECT_WRITEABLE)
1054                 {
1055                     protect = NV_PROTECT_WRITEABLE;
1056                 }
1057 
1058                 NV_ASSERT_OR_RETURN(memdescMap(pMemDesc, offset, memSz, NV_TRUE, protect,
1059                     (NvP64*) &pPtr, &pPriv) == NV_OK, NULL);
1060                 memdescSetKernelMappingPriv(pMemDesc, pPriv);
1061                 break;
1062             }
1063             NV_ASSERT_OR_RETURN((pPtr = memdescMapInternal(pGpu, pMemDesc, flags)) != NULL, NULL);
1064             pPtr = &pPtr[offset];
1065 
1066             break;
1067         case TRANSFER_TYPE_GSP_DMA:
1068         case TRANSFER_TYPE_CE:
1069             if (flags & TRANSFER_FLAGS_SHADOW_ALLOC)
1070             {
1071                 NV_ASSERT_OR_RETURN((pPtr = portMemAllocNonPaged(memSz)), NULL);
1072                 if (flags & TRANSFER_FLAGS_SHADOW_INIT_MEM)
1073                 {
1074                     NV_ASSERT_OK(memmgrMemRead(pMemoryManager, pTransferInfo, pPtr, memSz, flags));
1075                 }
1076             }
1077             break;
1078         default:
1079             NV_ASSERT(0);
1080     }
1081     memdescSetKernelMapping(pMemDesc, pPtr);
1082     return pPtr;
1083 }
1084 
1085 /*!
1086  * @brief This helper function can be used to end transfers
1087  *
1088  * @param[in] pTransferInfo      Transfer information
1089  * @param[in] shadowBufSize      Size of allocated shadow buffer in case of shadow mapping
1090  * @param[in] flags              Flags
1091  */
1092 void
1093 memmgrMemEndTransfer_IMPL
1094 (
1095     MemoryManager     *pMemoryManager,
1096     TRANSFER_SURFACE  *pTransferInfo,
1097     NvU64              shadowBufSize,
1098     NvU32              flags
1099 )
1100 {
1101     TRANSFER_TYPE      transferType = memmgrGetMemTransferType(pMemoryManager,
1102                                                                pTransferInfo, NULL);
1103     MEMORY_DESCRIPTOR *pMemDesc     = pTransferInfo->pMemDesc;
1104     NvU64              offset       = pTransferInfo->offset;
1105     OBJGPU            *pGpu         = ENG_GET_GPU(pMemoryManager);
1106     NvU64              memSz        = 0;
1107     NvU8              *pMapping     = NULL;
1108 
1109     NV_ASSERT_OR_RETURN_VOID(pMemDesc != NULL);
1110     pMapping = memdescGetKernelMapping(pMemDesc);
1111 
1112     NV_ASSERT_OR_RETURN_VOID((memSz = memdescGetSize(pMemDesc)) >= (shadowBufSize + offset) );
1113     memSz = shadowBufSize == 0 ? memSz : shadowBufSize;
1114 
1115     memdescSetKernelMapping(pMemDesc, NULL);
1116 
1117     switch (transferType)
1118     {
1119         case TRANSFER_TYPE_PROCESSOR:
1120             if (flags & TRANSFER_FLAGS_USE_BAR1)
1121             {
1122                 NvP64 pPriv = memdescGetKernelMappingPriv(pMemDesc);
1123                 memdescSetKernelMappingPriv(pMemDesc, NULL);
1124                 if (pMapping != NULL)
1125                 {
1126                     memdescUnmap(pMemDesc, NV_TRUE, 0, pMapping, pPriv);
1127                 }
1128                 return;
1129             }
1130             memdescUnmapInternal(pGpu, pMemDesc, flags);
1131             return;
1132         case TRANSFER_TYPE_GSP_DMA:
1133         case TRANSFER_TYPE_CE:
1134             if (pMapping != NULL)
1135             {
1136                 NV_ASSERT_OK(memmgrMemWrite(pMemoryManager, pTransferInfo, pMapping, memSz, flags));
1137                 portMemFree(pMapping);
1138             }
1139             return;
1140         default:
1141             NV_ASSERT(0);
1142     }
1143     return;
1144 }
1145 
1146 /*!
1147  * @brief Helper function that ends transfers to a memdesc with default offset/size
1148  *
1149  * @param[in] pMemDesc           Memory descriptor to end transfer to
1150  * @param[in] flags              Flags
1151  */
1152 void
1153 memmgrMemDescEndTransfer_IMPL
1154 (
1155     MemoryManager *pMemoryManager,
1156     MEMORY_DESCRIPTOR *pMemDesc,
1157     NvU32 flags
1158 )
1159 {
1160     if (pMemDesc == NULL)
1161     {
1162         return;
1163     }
1164 
1165     TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc};
1166     memmgrMemEndTransfer(pMemoryManager, &transferSurface, memdescGetSize(pMemDesc), flags);
1167 }
1168 
1169 /*!
1170  * @brief Helper function that begins transfers to a memdesc with default offset/size
1171  *
1172  * @param[in] pMemDesc           Memory descriptor to begin transfer to
1173  * @param[in] flags              Flags
1174  */
1175 NvU8 *
1176 memmgrMemDescBeginTransfer_IMPL
1177 (
1178     MemoryManager *pMemoryManager,
1179     MEMORY_DESCRIPTOR *pMemDesc,
1180     NvU32 flags
1181 )
1182 {
1183     NV_ASSERT_OR_RETURN(pMemDesc != NULL, NULL);
1184     TRANSFER_SURFACE transferSurface = {.offset = 0, .pMemDesc = pMemDesc};
1185     return memmgrMemBeginTransfer(pMemoryManager, &transferSurface, memdescGetSize(pMemDesc), flags);
1186 }
1187 
1188 /*!
1189  * @brief This function is used to allocate common resources across memory
1190  *        classes, and must be used before memory-specific resource alloc.
1191  *
1192  * @param[in/out] pAllocRequest     User-provided alloc request struct
1193  * @param[in/out] pFbAllocInfo      Initialized FB_ALLOC_INFO struct to alloc
1194  */
1195 NV_STATUS
1196 memmgrAllocResources_IMPL
1197 (
1198     OBJGPU                      *pGpu,
1199     MemoryManager               *pMemoryManager,
1200     MEMORY_ALLOCATION_REQUEST   *pAllocRequest,
1201     FB_ALLOC_INFO               *pFbAllocInfo
1202 )
1203 {
1204     NV_STATUS                    status        = NV_OK;
1205     NvU64                        alignment     = 0;
1206     NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams;
1207     NV_ADDRESS_SPACE             addrSpace     = memmgrAllocGetAddrSpace(pMemoryManager, pVidHeapAlloc->flags,
1208                                                                          pFbAllocInfo->retAttr);
1209 
1210     NvU64                        pageSize      = 0;
1211     NvBool                       bAllocedHwRes = NV_FALSE;
1212 
1213     // IRQL TEST:  must be running at equivalent of passive-level
1214     IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL());
1215 
1216     //
1217     // Check for valid size.
1218     //
1219     if (pVidHeapAlloc->size == 0)
1220         return NV_ERR_INVALID_ARGUMENT;
1221 
1222     //
1223     // Ensure a valid allocation pVidHeapAlloc->type was passed in
1224     //
1225     if (pVidHeapAlloc->type > NVOS32_NUM_MEM_TYPES - 1)
1226         return NV_ERR_INVALID_ARGUMENT;
1227 
1228     if (ADDR_VIRTUAL != addrSpace)
1229     {
1230         // If vidmem not requested explicitly, decide on the physical location.
1231         if (FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _PCI, pFbAllocInfo->retAttr) ||
1232             FLD_TEST_DRF(OS32, _ATTR, _LOCATION, _ANY, pFbAllocInfo->retAttr))
1233         {
1234             if (ADDR_FBMEM == addrSpace)
1235             {
1236                 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _VIDMEM, pFbAllocInfo->retAttr);
1237             }
1238             else
1239             {
1240                 pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _PCI, pFbAllocInfo->retAttr);
1241             }
1242         }
1243     }
1244     else // Virtual
1245     {
1246         // Clear location to ANY since virtual does not associate with location.
1247         pFbAllocInfo->retAttr = FLD_SET_DRF(OS32, _ATTR, _LOCATION, _ANY, pFbAllocInfo->retAttr);
1248     }
1249 
1250     // Fetch RM page size
1251     pageSize = memmgrDeterminePageSize(pMemoryManager, pFbAllocInfo->hClient, pFbAllocInfo->size,
1252                                        pFbAllocInfo->format, pFbAllocInfo->pageFormat->flags,
1253                                        &pFbAllocInfo->retAttr, &pFbAllocInfo->retAttr2);
1254     if (!IsAMODEL(pGpu) && pageSize == 0)
1255     {
1256         status = NV_ERR_INVALID_STATE;
1257         NV_PRINTF(LEVEL_ERROR, "memmgrDeterminePageSize failed, status: 0x%x\n", status);
1258         goto failed;
1259     }
1260 
1261     // Fetch memory alignment
1262     status = memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, &pFbAllocInfo->size, &pFbAllocInfo->align,
1263                                                pFbAllocInfo->alignPad, pFbAllocInfo->pageFormat->flags,
1264                                                pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2, 0);
1265     if (status != NV_OK)
1266     {
1267         NV_PRINTF(LEVEL_ERROR, "memmgrAllocDetermineAlignment failed, status: 0x%x\n", status);
1268         goto failed;
1269     }
1270 
1271     //
1272     // Call into HAL to reserve any hardware resources for
1273     // the specified memory pVidHeapAlloc->type.
1274     // If the alignment was changed due to a HW limitation, and the
1275     // flag NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE is set, bad_argument
1276     // will be passed back from the HAL
1277     //
1278     status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo);
1279     bAllocedHwRes = NV_TRUE;
1280 
1281     pVidHeapAlloc->attr  = pFbAllocInfo->retAttr;
1282     pVidHeapAlloc->attr2 = pFbAllocInfo->retAttr2;
1283     pVidHeapAlloc->format = pFbAllocInfo->format;
1284     pVidHeapAlloc->comprCovg = pFbAllocInfo->comprCovg;
1285     pVidHeapAlloc->zcullCovg = pFbAllocInfo->zcullCovg;
1286 
1287     if (status != NV_OK)
1288     {
1289         //
1290         // probably means we passed in a bogus pVidHeapAlloc->type or no tiling resources available
1291         // when tiled memory attribute was set to REQUIRED
1292         //
1293         NV_PRINTF(LEVEL_ERROR, "fbAlloc failure!\n");
1294         goto failed;
1295     }
1296 
1297     // call HAL to set resources
1298     status = memmgrSetAllocParameters_HAL(pGpu, pMemoryManager, pFbAllocInfo);
1299 
1300     if (status != NV_OK)
1301     {
1302         //
1303         // Two possibilties: either some attribute was set to REQUIRED, ran out of resources,
1304         // or unaligned address / size was passed down. Free up memory and fail this call.
1305         // heapFree will fix up heap pointers.
1306         //
1307         goto failed;
1308     }
1309 
1310     //
1311     // for fixed allocation check if the alignment needs to adjusted.
1312     // some hardware units request allocation aligned to smaller than
1313     // page sizes which can be handled through alignPad
1314     //
1315     if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
1316     {
1317         //
1318         // is our desired offset suitably aligned?
1319         // if not adjust alignment using alignPad(offset into a page), the
1320         // allocation is page size aligned as required for swizzling.
1321         //
1322         if (pFbAllocInfo->desiredOffset % (pFbAllocInfo->align + 1))
1323         {
1324            pFbAllocInfo->alignPad = pFbAllocInfo->desiredOffset % (pFbAllocInfo->align + 1);
1325            pFbAllocInfo->desiredOffset -= pFbAllocInfo->alignPad;
1326         }
1327     }
1328 
1329     //
1330     // Refresh search parameters.
1331     //
1332     pFbAllocInfo->adjustedSize = pFbAllocInfo->size - pFbAllocInfo->alignPad;
1333     pVidHeapAlloc->height = pFbAllocInfo->height;
1334     pVidHeapAlloc->pitch  = pFbAllocInfo->pitch;
1335 
1336     //
1337     // The api takes alignment-1 (used to be a mask).
1338     //
1339     alignment = pFbAllocInfo->align + 1;
1340     pVidHeapAlloc->alignment = pFbAllocInfo->align + 1;      // convert mask to size
1341 
1342     //
1343     // Allow caller to request host page alignment to make it easier
1344     // to move things around with host os VM subsystem
1345     //
1346     if ((pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FORCE_ALIGN_HOST_PAGE) &&
1347         (addrSpace == ADDR_FBMEM))
1348     {
1349         OBJSYS *pSys         = SYS_GET_INSTANCE();
1350         NvU64   hostPageSize = pSys->cpuInfo.hostPageSize;
1351 
1352         // hostPageSize *should* always be set, but....
1353         if (hostPageSize == 0)
1354             hostPageSize = RM_PAGE_SIZE;
1355 
1356         alignment = memUtilsLeastCommonAlignment(alignment, hostPageSize);
1357     }
1358 
1359     pVidHeapAlloc->alignment = alignment;
1360     pFbAllocInfo->align = alignment - 1;
1361 
1362     return status;
1363 
1364 failed:
1365     if (bAllocedHwRes)
1366     {
1367         memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
1368     }
1369 
1370     return status;
1371 }
1372 
1373 /*!
1374  * @brief This function is used to create a memory descriptor if needed.
1375  *
1376  * @param[in/out] pAllocRequest     User-provided alloc request struct
1377  * @param[in/out] pFbAllocInfo      Initialized FB_ALLOC_INFO struct to alloc
1378  * @param[out]    ppMemDesc         Double pointer to created descriptor
1379  * @param[in]     pHeap             Heap pointer to store in descriptor
1380  * @param[in]     addrSpace         Address space identifier
1381  * @param[in]     memDescFlags      Memory descriptor alloc flags
1382  * @param[out]    bAllocedMemDesc   NV_TRUE if a descriptor was created
1383  */
1384 NV_STATUS
1385 memUtilsAllocMemDesc
1386 (
1387     OBJGPU                     *pGpu,
1388     MEMORY_ALLOCATION_REQUEST  *pAllocRequest,
1389     FB_ALLOC_INFO              *pFbAllocInfo,
1390     MEMORY_DESCRIPTOR         **ppMemDesc,
1391     Heap                       *pHeap,
1392     NV_ADDRESS_SPACE            addrSpace,
1393     NvBool                      bContig,
1394     NvBool                     *bAllocedMemDesc
1395 )
1396 {
1397     NV_STATUS status = NV_OK;
1398 
1399     //
1400     // Allocate a memory descriptor if needed. We do this after the fbHwAllocResources() call
1401     // so we have the updated size information.  Linear callers like memdescAlloc() can live with
1402     // only having access to the requested size in bytes, but block linear callers really do
1403     // need to allocate after fbAlloc() rounding takes place.
1404     //
1405     if (pAllocRequest->pMemDesc == NULL)
1406     {
1407         NvU64 memDescFlags = MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE;
1408 
1409         //
1410         // Allocate a contig vidmem descriptor now; if needed we'll
1411         // allocate a new noncontig memdesc later
1412         //
1413         status = memdescCreate(&pAllocRequest->pMemDesc, pGpu, pFbAllocInfo->adjustedSize, 0, bContig,
1414                                addrSpace, NV_MEMORY_UNCACHED, memDescFlags);
1415 
1416         if (status != NV_OK)
1417         {
1418             NV_PRINTF(LEVEL_ERROR, "cannot alloc memDesc!\n");
1419             return status;
1420         }
1421 
1422         *bAllocedMemDesc = NV_TRUE;
1423     }
1424 
1425     *ppMemDesc = pAllocRequest->pMemDesc;
1426     (*ppMemDesc)->pHeap = pHeap;
1427 
1428     // Set attributes tracked by the memdesc
1429     memdescSetPteKind(*ppMemDesc, pFbAllocInfo->format);
1430     memdescSetHwResId(*ppMemDesc, pFbAllocInfo->hwResId);
1431 
1432     return status;
1433 }
1434 
1435 /*!
1436  * Memsets the memory for the given memory descriptor with the given value.
1437  * This function assumes that BAR2 is not yet available. Thus either the BAR0
1438  * window to FB or a memmap to SYSMEM will be used, depending on the memory
1439  * location.
1440  *
1441  * @param[in] pGpu      GPU object pointer
1442  * @param[in] pMemDesc  Memory descriptor for the memory to memset
1443  * @param[in] value     Value to memset to.
1444  */
1445 NV_STATUS
1446 memUtilsMemSetNoBAR2(OBJGPU *pGpu, PMEMORY_DESCRIPTOR pMemDesc, NvU8 value)
1447 {
1448     KernelBus  *pKernelBus = GPU_GET_KERNEL_BUS(pGpu);
1449     NvU8       *pMap  = NULL;
1450     void       *pPriv = NULL;
1451     RmPhysAddr  physAddr;
1452     RmPhysAddr  physAddrOrig;
1453     NvU64       sizeInDWord;
1454     NvU32       sizeOfDWord = sizeof(NvU32);
1455     NvU32       bar0Addr;
1456     NvU32       i;
1457 
1458     NV_ASSERT((pMemDesc != NULL) &&
1459               (pMemDesc->Size & (sizeOfDWord-1)) == 0);
1460     sizeInDWord = pMemDesc->Size / sizeOfDWord;
1461 
1462     //
1463     // BAR2 is not yet initialized. Thus use either the BAR0 window or
1464     // memmap to initialize the given surface.
1465     //
1466     NV_ASSERT(pKernelBus->virtualBar2[GPU_GFID_PF].pCpuMapping == NULL);
1467     switch (memdescGetAddressSpace(pMemDesc))
1468     {
1469         case ADDR_FBMEM:
1470             if (KBUS_BAR0_PRAMIN_DISABLED(pGpu))
1471             {
1472                 NvU8 *pMap = kbusMapRmAperture_HAL(pGpu, pMemDesc);
1473                 NV_ASSERT_OR_RETURN(pMap != NULL, NV_ERR_INSUFFICIENT_RESOURCES);
1474                 portMemSet(pMap, value, pMemDesc->Size);
1475                 kbusUnmapRmAperture_HAL(pGpu, pMemDesc, &pMap, NV_TRUE);
1476 
1477                 break;
1478             }
1479             //
1480             // Set the BAR0 window to encompass the given surface while
1481             // saving off the location to where the BAR0 window was
1482             // previously pointing.
1483             //
1484             physAddr = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
1485             NV_ASSERT((physAddr & (sizeOfDWord-1)) == 0);
1486 
1487             physAddrOrig = kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus);
1488             NV_ASSERT_OK_OR_RETURN(
1489                 kbusSetBAR0WindowVidOffset_HAL(pGpu,
1490                                                pKernelBus,
1491                                                physAddr & ~0xffffULL));
1492             bar0Addr =
1493                 NvU64_LO32(kbusGetBAR0WindowAddress_HAL(pKernelBus) +
1494                           (physAddr - kbusGetBAR0WindowVidOffset_HAL(pGpu, pKernelBus)));
1495 
1496             //
1497             // Iterate and initialize the given surface with BAR0
1498             // writes.
1499             //
1500             for (i = 0; i < sizeInDWord; i++)
1501             {
1502                 GPU_REG_WR32(pGpu,
1503                              bar0Addr + (sizeOfDWord * i),
1504                              value);
1505             }
1506 
1507             //
1508             // Restore where the BAR0 window was previously pointing
1509             // to.
1510             //
1511             NV_ASSERT_OK_OR_RETURN(
1512                 kbusSetBAR0WindowVidOffset_HAL(pGpu, pKernelBus, physAddrOrig));
1513 
1514             break;
1515 
1516         case ADDR_SYSMEM:
1517             // Plain old memmap.
1518             NV_ASSERT_OK_OR_RETURN(
1519                 memdescMapOld(pMemDesc, 0,
1520                               pMemDesc->Size,
1521                               NV_TRUE, // kernel,
1522                               NV_PROTECT_READ_WRITE,
1523                               (void **)&pMap,
1524                               &pPriv));
1525             portMemSet(pMap, value, NvU64_LO32(pMemDesc->Size));
1526             memdescUnmapOld(pMemDesc, 1, 0, pMap, pPriv);
1527             break;
1528 
1529         default:
1530             // Should not happen.
1531             NV_ASSERT(0);
1532             break;
1533     }
1534 
1535     return NV_OK;
1536 }
1537