1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*!
25  * @file
26  * @brief Standard local frame buffer allocation and management routines
27  */
28 
29 #include "os/os.h"
30 #include "gpu/gpu.h"
31 #include "gpu/mem_mgr/mem_mgr.h"
32 #include "gpu/mem_mgr/heap.h"
33 #include "gpu/mem_sys/kern_mem_sys.h"
34 #include "mem_mgr/video_mem.h"
35 #include "mem_mgr/vaspace.h"
36 #include "mem_mgr/system_mem.h"
37 #include "gpu/mem_mgr/mem_utils.h"
38 #include "gpu/mem_mgr/virt_mem_allocator.h"
39 #include "gpu/mem_mgr/mem_desc.h"
40 #include "gpu_mgr/gpu_mgr.h"
41 #include "core/locks.h"
42 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
43 #include "vgpu/rpc.h"
44 #include "vgpu/vgpu_util.h"
45 #include "gpu/mmu/kern_gmmu.h"
46 #include "virtualization/hypervisor/hypervisor.h"
47 #include "gpu/device/device.h"
48 #include "kernel/gpu/intr/intr.h"
49 #include "platform/sli/sli.h"
50 
51 typedef enum
52 {
53     BLOCK_ADD,
54     BLOCK_REMOVE,
55     BLOCK_SIZE_CHANGED,
56     BLOCK_FREE_STATE_CHANGED,
57 } BlockAction;
58 
59 //
60 // Statics
61 //
62 static NV_STATUS _heapBlockFree(OBJGPU *, Heap *, NvHandle, NvHandle, MEM_BLOCK *);
63 static void      _heapSetTexturePlacement(Heap *, NvU32, NvU32, NvBool*,
64                                           NvU32*, NvU8*);
65 static NV_STATUS _heapGetMaxFree(Heap *, NvU64 *, NvU64 *);
66 static NV_STATUS _heapGetBankPlacement(OBJGPU *, Heap *, NvU32,
67                                        NvU32 *, NvU32, NvU32, NvU32 *);
68 static MEM_BLOCK *_heapFindAlignedBlockWithOwner(OBJGPU *, Heap *, NvU32,
69                                                 NvU64/* aligned*/);
70 static NV_STATUS _heapProcessFreeBlock(OBJGPU *, MEM_BLOCK *, MEM_BLOCK **,
71                                        MEM_BLOCK **, Heap *,
72                                        MEMORY_ALLOCATION_REQUEST *,
73                                        NvHandle, OBJHEAP_ALLOC_DATA *,
74                                        FB_ALLOC_INFO *, NvU64, NvU64 *);
75 static void _heapAddBlockToNoncontigList(Heap *, MEM_BLOCK *);
76 static void _heapRemoveBlockFromNoncontigList(Heap *, MEM_BLOCK *);
77 static NV_STATUS _heapFindBlockByOffset(OBJGPU *, Heap *, NvU32,
78                                         MEMORY_DESCRIPTOR *, NvU64,
79                                         MEM_BLOCK **);
80 static NV_STATUS _heapAllocNoncontig(OBJGPU *, NvHandle, Heap *,
81                                      MEMORY_ALLOCATION_REQUEST *, NvHandle,
82                                      OBJHEAP_ALLOC_DATA *, FB_ALLOC_INFO *,
83                                      NvU32, NvU64, NvU64 *, MEMORY_DESCRIPTOR *,
84                                      HWRESOURCE_INFO **);
85 static NV_STATUS _heapUpdate(Heap *, MEM_BLOCK *, BlockAction);
86 static void _heapAdjustFree(Heap *pHeap, NvS64 blockSize, NvBool internalHeap);
87 static void _heapBlacklistChunksInFreeBlocks(OBJGPU *, Heap *);
88 
89 #ifdef DEBUG
90 
91 /****************************************************************************/
92 /*                                                                          */
93 /*                             DEBUG support!                               */
94 /*                                                                          */
95 /****************************************************************************/
96 
97 NvU32 dbgDumpHeap = 0;
98 NvU32 dbgReverseDumpHeap = 0;
99 
100 static void      _heapDump(Heap *);
101 static void      _heapValidate(Heap *);
102 
103 #define HEAP_VALIDATE(h)    {_heapValidate(h);if(dbgDumpHeap)_heapDump(h);}
104 
ConvertOwnerToString(NvU32 owner,char * string)105 static void ConvertOwnerToString(NvU32 owner, char *string)
106 {
107     int i;
108     string[0] = (unsigned char)((owner >> 24));
109     string[1] = (unsigned char)((owner >> 16) & 0xFF);
110     string[2] = (unsigned char)((owner >>  8) & 0xFF);
111     string[3] = (unsigned char)((owner      ) & 0xFF);
112     string[4] = 0;
113     for (i=0 ;i<4; i++) {
114         // Assuming ASCII these should be "safe" printable characters.
115         if ((string[i] < ' ') || (string[i] > 0x7E)) {
116             string[i] = '?';
117         }
118     }
119 }
120 
_heapDump(Heap * pHeap)121 static void _heapDump
122 (
123     Heap *pHeap
124 )
125 {
126     NvU64       free;
127     MEM_BLOCK  *pBlock;
128     char        ownerString[5];
129 
130     if (!pHeap) return;
131 
132     NV_PRINTF(LEVEL_INFO, "Heap dump.  Size = 0x%08llx\n", pHeap->total);
133     NV_PRINTF(LEVEL_INFO, "            Free = 0x%08llx\n", pHeap->free);
134     NV_PRINTF(LEVEL_INFO, "        Reserved = 0x%08llx\n", pHeap->reserved);
135     NV_PRINTF(LEVEL_INFO,
136               "=================================================================\n");
137     NV_PRINTF(LEVEL_INFO,
138               "\t\t    Begin         End         Size    \t Type     ResId      Owner"
139               "   \"owns\"\n");
140     NV_PRINTF(LEVEL_INFO, "Block List %s\n",
141               dbgReverseDumpHeap ? "Reverse" : "Forward");
142     pBlock = pHeap->pBlockList;
143     do
144     {
145         if ( dbgReverseDumpHeap )
146             pBlock = pBlock->prev;
147 
148         NV_PRINTF(LEVEL_INFO, "\t\t0x%08llx 0x%08llx 0x%08llx", pBlock->begin,
149                   pBlock->end, 1 + (pBlock->end - pBlock->begin));
150 
151         if (pBlock->owner == NVOS32_BLOCK_TYPE_FREE) {
152             NV_PRINTF_EX(NV_PRINTF_MODULE, LEVEL_INFO, "\tFREE\n");
153         }
154         else
155         {
156             ConvertOwnerToString(pBlock->owner, ownerString);
157             NV_PRINTF_EX(NV_PRINTF_MODULE, LEVEL_INFO,
158                          "\t0x%04x 0x%08x \"%s\"\n", pBlock->u0.type,
159                          pBlock->owner, ownerString);
160         }
161 
162         if ( !dbgReverseDumpHeap )
163             pBlock = pBlock->next;
164     } while (pBlock != pHeap->pBlockList);
165 
166     NV_PRINTF(LEVEL_INFO, "FREE Block List %s\n",
167               dbgReverseDumpHeap ? "Reverse" : "Forward");
168     free  = 0;
169     pBlock = pHeap->pFreeBlockList;
170     if (pBlock)
171         do
172         {
173             if ( dbgReverseDumpHeap )
174                 pBlock = pBlock->u0.prevFree;
175 
176             NV_PRINTF(LEVEL_INFO, "\t\t0x%08llx 0x%08llx 0x%08llx\tFREE\n",
177                       pBlock->begin, pBlock->end,
178                       1 + (pBlock->end - pBlock->begin));
179 
180             free += pBlock->end - pBlock->begin + 1;
181 
182             if ( !dbgReverseDumpHeap )
183                 pBlock = pBlock->u1.nextFree;
184         } while (pBlock != pHeap->pFreeBlockList);
185 
186     NV_PRINTF(LEVEL_INFO, "\tCalculated free count = 0x%08llx\n", free);
187 }
188 
_heapValidate(Heap * pHeap)189 static void _heapValidate
190 (
191     Heap *pHeap
192 )
193 {
194     MEM_BLOCK  *pBlock, *pBlockFree;
195     NvU64       free, used;
196 
197     if (!pHeap) return;
198 
199     /*
200      * Scan the blocks and check for consistency.
201      */
202     free      = 0;
203     used      = 0;
204     pBlock     = pHeap->pBlockList;
205     pBlockFree = pHeap->pFreeBlockList;
206     do
207     {
208         if (pBlock->owner == NVOS32_BLOCK_TYPE_FREE)
209         {
210             if (!pBlockFree)
211             {
212                 NV_PRINTF(LEVEL_ERROR,
213                           "Invalid free list with free blocks found.\n");
214                 _heapDump(pHeap);
215                 DBG_BREAKPOINT();
216             }
217             free += pBlock->end - pBlock->begin + 1;
218             if (pBlock != pBlockFree)
219             {
220                 NV_PRINTF(LEVEL_ERROR,
221                           "Free list not consistent with block list.\n");
222                 _heapDump(pHeap);
223                 DBG_BREAKPOINT();
224             }
225             pBlockFree = pBlockFree->u1.nextFree;
226         }
227         else
228         {
229             used += pBlock->end - pBlock->begin + 1;
230         }
231         if (pBlock->next != pHeap->pBlockList)
232         {
233             if (pBlock->end != pBlock->next->begin - 1)
234             {
235                 NV_PRINTF(LEVEL_ERROR,
236                           "Hole between blocks at offset = 0x%llx\n",
237                           pBlock->end);
238                 _heapDump(pHeap);
239                 DBG_BREAKPOINT();
240             }
241         }
242         else
243         {
244             if (pBlock->end != pHeap->base + pHeap->total - 1)
245             {
246                 NV_PRINTF(LEVEL_ERROR, "Last block doesn't end at top.\n");
247                 _heapDump(pHeap);
248                 DBG_BREAKPOINT();
249             }
250             if (pBlock->next->begin != pHeap->base)
251             {
252                 NV_PRINTF(LEVEL_ERROR,
253                           "First block doesn't start at bottom.\n");
254                 _heapDump(pHeap);
255                 DBG_BREAKPOINT();
256             }
257         }
258         if (pBlock->end < pBlock->begin)
259         {
260             NV_PRINTF(LEVEL_ERROR,
261                       "Validate: Invalid block begin = 0x%08llx\n",
262                       pBlock->begin);
263             NV_PRINTF(LEVEL_ERROR,
264                       "                        end   = 0x%08llx\n",
265                       pBlock->end);
266             _heapDump(pHeap);
267             DBG_BREAKPOINT();
268         }
269         pBlock = pBlock->next;
270     } while (pBlock != pHeap->pBlockList);
271     if (free != pHeap->free)
272     {
273         NV_PRINTF(LEVEL_ERROR,
274                   "Calculated free count (%llx) not consistent with heap free count (%llx).\n",
275                   free, pHeap->free);
276         _heapDump(pHeap);
277         DBG_BREAKPOINT();
278     }
279     if ((used + free) > pHeap->total)
280     {
281         NV_PRINTF(LEVEL_ERROR,
282                   "Calculated used count (%llx) not consistent with heap size (%llx).\n",
283                   used + free, pHeap->total);
284         _heapDump(pHeap);
285         DBG_BREAKPOINT();
286     }
287 }
288 #else
289 #define HEAP_VALIDATE(h)
290 #endif // DEBUG
291 
292 
293 /****************************************************************************/
294 /*                                                                          */
295 /*                             Heap Manager                                 */
296 /*                                                                          */
297 /****************************************************************************/
298 
heapReserveRegion(MemoryManager * pMemoryManager,Heap * pHeap,NvU64 offset,NvU64 size,MEMORY_DESCRIPTOR ** ppMemDesc,NvBool isRmRsvdRegion,NvBool bProtected)299 static NV_STATUS heapReserveRegion
300 (
301     MemoryManager      *pMemoryManager,
302     Heap               *pHeap,
303     NvU64               offset,
304     NvU64               size,
305     MEMORY_DESCRIPTOR **ppMemDesc,
306     NvBool              isRmRsvdRegion,
307     NvBool              bProtected
308 )
309 {
310     NV_STATUS                    rmStatus           = NV_OK;
311     OBJGPU                      *pGpu               = ENG_GET_GPU(pMemoryManager);
312     NvU64                        heapSize           = (pHeap->base + pHeap->total);
313     FB_ALLOC_INFO               *pFbAllocInfo       = NULL;
314     FB_ALLOC_PAGE_FORMAT        *pFbAllocPageFormat = NULL;
315 
316     MEMORY_ALLOCATION_REQUEST allocRequest = {0};
317     NV_MEMORY_ALLOCATION_PARAMS allocData = {0};
318 
319     NvU64 align = 0;
320     NvU32 height = 1;
321     NvU32 pitch = 1;
322     NvU32 attr = DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB) |
323         DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS);
324     NvU32 attr2 = DRF_DEF(OS32, _ATTR2, _INTERNAL, _YES);
325 
326     NV_ASSERT_OR_RETURN((offset < heapSize), NV_OK);
327 
328     allocRequest.pUserParams = &allocData;
329 
330     allocData.owner = ((isRmRsvdRegion) ? HEAP_OWNER_RM_RESERVED_REGION : HEAP_OWNER_PMA_RESERVED_REGION);
331     allocData.height = height;
332     allocData.type = ((isRmRsvdRegion) ?  NVOS32_TYPE_RESERVED : NVOS32_TYPE_PMA);
333     allocData.flags = NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE;
334     allocData.attr = attr;
335     allocData.attr2 = attr2;
336     allocData.pitch = pitch;
337     allocData.alignment = align;
338     allocData.size = NV_MIN(size, (heapSize - offset));
339     allocData.offset = offset;
340 
341     if (bProtected)
342         allocData.flags |= NVOS32_ALLOC_FLAGS_PROTECTED;
343 
344     pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
345     NV_ASSERT_TRUE_OR_GOTO(rmStatus, pFbAllocInfo != NULL, NV_ERR_NO_MEMORY, done);
346 
347     pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
348     NV_ASSERT_TRUE_OR_GOTO(rmStatus, pFbAllocPageFormat != NULL, NV_ERR_NO_MEMORY, done);
349 
350     portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
351     portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
352     pFbAllocInfo->pageFormat = pFbAllocPageFormat;
353 
354     memUtilsInitFBAllocInfo(&allocData, pFbAllocInfo, 0, 0);
355 
356     NV_ASSERT_OK_OR_GOTO(rmStatus,
357         memmgrAllocResources(pGpu, pMemoryManager, &allocRequest, pFbAllocInfo),
358         done);
359 
360     NV_ASSERT_OK_OR_GOTO(rmStatus,
361         vidmemAllocResources(pGpu, pMemoryManager, &allocRequest, pFbAllocInfo, pHeap),
362         done);
363 
364     NV_PRINTF(LEVEL_INFO, "Reserved heap for %s %llx..%llx\n",
365               ((isRmRsvdRegion) ? "RM" : "PMA"), offset, (offset+size-1));
366 
367     *ppMemDesc = allocRequest.pMemDesc;
368 
369     // Account for reserved size removed from the total address space size
370     if (isRmRsvdRegion)
371     {
372         pHeap->reserved += allocData.size;
373     }
374 
375 done:
376     portMemFree(pFbAllocPageFormat);
377     portMemFree(pFbAllocInfo);
378 
379     return rmStatus;
380 }
381 
382 /*!
383  * @brief Initializes a heap object
384  *
385  * @param[in]     pFb           FB object ptr
386  * @param[in/out] pHeap         HEAP object ptr
387  * @param[in]     base          Base for this heap
388  * @param[in]     size          Size of this heap
389  * @param[in]     heapType      Heap type (Global or PMSA)
390  * @param[in]     pPtr          A generic pointer which will be typecasted based on heapType
391  */
heapInitInternal_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 base,NvU64 size,HEAP_TYPE_INTERNAL heapType,void * pPtr)392 NV_STATUS heapInitInternal_IMPL
393 (
394     OBJGPU            *pGpu,
395     Heap              *pHeap,
396     NvU64              base,
397     NvU64              size,
398     HEAP_TYPE_INTERNAL heapType,
399     void              *pPtr
400 )
401 {
402     MEM_BLOCK              *pBlock;
403     NvU32                   i;
404     NV_STATUS               status;
405     MemoryManager          *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
406     NvU32                   typeDataSize = 0;
407     FB_REGION_DESCRIPTOR   *pFbRegion;
408     MEMORY_DESCRIPTOR      *pPmsaMemDesc = NULL;
409 
410     //
411     // Simply create a free heap.
412     //
413     NV_PRINTF(LEVEL_INFO,
414               "Heap Manager: HEAP ABOUT TO BE CREATED. (Base: 0x%llx Size: 0x%llx)\n",
415               base, size);
416 
417     pHeap->base  = base;
418     pHeap->total = size;
419     pHeap->free  = size;
420     pHeap->reserved = 0;
421     pHeap->heapType = heapType;
422 
423     pHeap->peakInternalUsage = 0;
424     pHeap->peakExternalUsage = 0;
425     pHeap->currInternalUsage = 0;
426     pHeap->currExternalUsage = 0;
427 
428 
429     // Set the flags based on HEAP type
430     switch (heapType)
431     {
432         case HEAP_TYPE_RM_GLOBAL:
433             pHeap->bHasFbRegions      = NV_TRUE;
434             break;
435         case HEAP_TYPE_PHYS_MEM_SUBALLOCATOR:
436             NV_ASSERT(pPtr != NULL);
437 
438             pHeap->bHasFbRegions      = NV_FALSE;
439             typeDataSize = sizeof(PHYS_MEM_SUBALLOCATOR_DATA);
440             pPmsaMemDesc = ((PHYS_MEM_SUBALLOCATOR_DATA *)pPtr)->pMemDesc;
441             break;
442         case HEAP_TYPE_PARTITION_LOCAL:
443             pHeap->bHasFbRegions      = NV_TRUE;
444             break;
445         default:
446             return NV_ERR_INVALID_ARGUMENT;
447     }
448 
449     pHeap->pHeapTypeSpecificData = NULL;
450     if ((pPtr != NULL) && (typeDataSize > 0))
451     {
452         pHeap->pHeapTypeSpecificData = portMemAllocNonPaged(typeDataSize);
453         if (pHeap->pHeapTypeSpecificData == NULL)
454         {
455             return NV_ERR_OPERATING_SYSTEM;
456         }
457         NV_ASSERT(pHeap->pHeapTypeSpecificData != NULL);
458         portMemCopy(pHeap->pHeapTypeSpecificData, typeDataSize, pPtr, typeDataSize);
459     }
460 
461     pBlock = portMemAllocNonPaged(sizeof(MEM_BLOCK));
462     if (pBlock == NULL)
463     {
464         return NV_ERR_OPERATING_SYSTEM;
465     }
466     portMemSet(pBlock, 0, sizeof(MEM_BLOCK));
467 
468     pBlock->owner    = NVOS32_BLOCK_TYPE_FREE;
469     pBlock->textureId= 0;
470     pBlock->begin    = base;
471     pBlock->align    = 0;
472     pBlock->alignPad = 0;
473     pBlock->end      = base + size - 1;
474     pBlock->u0.prevFree = pBlock;
475     pBlock->u1.nextFree = pBlock;
476     pBlock->next     = pBlock;
477     pBlock->prev     = pBlock;
478     pBlock->format   = 0;
479 
480     pHeap->pBlockList     = pBlock;
481     pHeap->pFreeBlockList = pBlock;
482     pHeap->memHandle      = 0xcafe0000;
483     pHeap->numBlocks      = 1;
484     pHeap->pBlockTree     = NULL;
485 
486     //
487     // Set the client id as invalid since there isn't one that exists
488     // Initialize the client texture data structure
489     //
490     portMemSet(pHeap->textureData, 0,
491                sizeof(TEX_INFO) * MAX_TEXTURE_CLIENT_IDS);
492 
493     //
494     // Call into the hal to get bank placement policy.  Note this will vary chip to chip, but let's allow the HAL to tell us
495     // the implementation details.
496     //
497     status = memmgrGetBankPlacementData_HAL(pGpu, pMemoryManager, pHeap->placementStrategy);
498     if (status != NV_OK)
499     {
500         //
501         // ooops, can't get HAL version of where to place things - let's default to something
502         //
503         NV_PRINTF(LEVEL_ERROR,
504                   "Heap Manager unable to get bank placement policy from HAL.\n");
505         NV_PRINTF(LEVEL_ERROR,
506                   "Heap Manager defaulting to BAD placement policy.\n");
507 
508         pHeap->placementStrategy[BANK_PLACEMENT_IMAGE]  = ((0)
509                                                             | BANK_MEM_GROW_UP
510                                                             | MEM_GROW_UP
511                                                             | 0xFFFFFF00);
512         pHeap->placementStrategy[BANK_PLACEMENT_DEPTH]  = ((0)
513                                                             | BANK_MEM_GROW_DOWN
514                                                             | MEM_GROW_DOWN
515                                                             | 0xFFFFFF00);
516         pHeap->placementStrategy[BANK_PLACEMENT_TEX_OVERLAY_FONT]  = ((0)
517                                                             | BANK_MEM_GROW_DOWN
518                                                             | MEM_GROW_DOWN
519                                                             | 0xFFFFFF00);
520         pHeap->placementStrategy[BANK_PLACEMENT_OTHER]  = ((0)
521                                                             | BANK_MEM_GROW_DOWN
522                                                             | MEM_GROW_DOWN
523                                                             | 0xFFFFFF00);
524         status = NV_OK;
525     }
526 
527     // Setup noncontig list
528     pHeap->pNoncontigFreeBlockList = NULL;
529 
530     // insert first block into rb-tree
531     if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_ADD))
532     {
533         return NV_ERR_INVALID_STATE;
534     }
535 
536     //
537     // If there are FB regions defined, check to see if any of them are
538     // marked reserved. Tag those regions as reserved in the heap.
539     //
540     if ((pMemoryManager->Ram.numFBRegions > 0) && (pHeap->bHasFbRegions))
541     {
542         NvBool bConsoleFbRegionContentPreserved;
543         FB_REGION_DESCRIPTOR consoleFbRegion;
544         portMemSet(&consoleFbRegion, 0, sizeof(consoleFbRegion));
545 
546         if (heapType != HEAP_TYPE_PARTITION_LOCAL)
547         {
548             //
549             // If a region of FB is actively being used for console display memory
550             // on this GPU, mark it reserved in-place.
551             //
552             memmgrReserveConsoleRegion_HAL(pGpu, pMemoryManager, &consoleFbRegion);
553             status = memmgrAllocateConsoleRegion_HAL(pGpu, pMemoryManager, &consoleFbRegion);
554             if (status != NV_OK)
555             {
556                 NV_PRINTF(LEVEL_WARNING, "Squashing the error status after failing to allocate console region, status: %x\n",
557                             status);
558                 status = NV_OK;
559             }
560         }
561 
562         //
563         // Define PMA-managed regions
564         // This will be moved to memmgr once we refactor SMC partitions
565         //
566         if (memmgrIsPmaEnabled(pMemoryManager) &&
567             memmgrIsPmaSupportedOnPlatform(pMemoryManager) &&
568             (heapType != HEAP_TYPE_PARTITION_LOCAL))
569         {
570             memmgrSetPmaInitialized(pMemoryManager, NV_TRUE);
571             memmgrRegionSetupForPma(pGpu, pMemoryManager);
572         }
573 
574         bConsoleFbRegionContentPreserved = NV_FALSE;
575 
576         if (heapType != HEAP_TYPE_PARTITION_LOCAL)
577         {
578             // For GSP RM, all PMA candidate regions are given to CPU RM for its use
579             if (RMCFG_FEATURE_PLATFORM_GSP)
580             {
581                 memmgrRegionSetupForPma(pGpu, pMemoryManager);
582             }
583 
584             for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
585             {
586                 pFbRegion = &pMemoryManager->Ram.fbRegion[i];
587 
588                 // If the region is marked reserved, reserve it in the heap
589                 if (pFbRegion->bRsvdRegion ||
590                     ((memmgrIsPmaInitialized(pMemoryManager) ||
591                       RMCFG_FEATURE_PLATFORM_GSP) &&
592                      !pFbRegion->bInternalHeap))
593                 {
594                     NvU64 fbRegionBase;
595                     MEMORY_DESCRIPTOR *pMemDesc = NULL;
596 
597                     // Skip regions which are outside the heap boundaries
598                     if (pFbRegion->base < base && pFbRegion->limit < base)
599                     {
600                         continue;
601                     }
602 
603                     // TODO: Remove SRIOV check and enable on baremetal as well.
604                     if (IS_VIRTUAL_WITH_SRIOV(pGpu) && (pFbRegion->base >= (base + size)))
605                     {
606                         continue;
607                     }
608 
609                     // Adjust base of reserved region on heap
610                     fbRegionBase = NV_MAX(base, pFbRegion->base);
611 
612                     NV_PRINTF(LEVEL_INFO, "Reserve at %llx of size %llx\n",
613                         fbRegionBase, (pFbRegion->limit - fbRegionBase + 1));
614 
615                     status = heapReserveRegion(
616                         pMemoryManager,
617                         pHeap,
618                         fbRegionBase,
619                         (pFbRegion->limit - fbRegionBase + 1),
620                         &pMemDesc,
621                         pFbRegion->bRsvdRegion,
622                         pFbRegion->bProtected);
623 
624                     if (status != NV_OK || pMemDesc == NULL)
625                     {
626                         NV_PRINTF(LEVEL_ERROR, "failed to reserve %llx..%llx\n",
627                                   pFbRegion->base, pFbRegion->limit);
628                         return status;
629                     }
630 
631                     if ((pMemoryManager->Ram.ReservedConsoleDispMemSize > 0) &&
632                         (pFbRegion->base == consoleFbRegion.base) && (pFbRegion->limit == consoleFbRegion.limit))
633                     {
634                         memdescSetFlag(pMemDesc, MEMDESC_FLAGS_LOST_ON_SUSPEND, NV_FALSE);
635                         memdescSetFlag(pMemDesc, MEMDESC_FLAGS_PRESERVE_CONTENT_ON_SUSPEND, NV_TRUE);
636 
637                         bConsoleFbRegionContentPreserved = NV_TRUE;
638                     }
639                 }
640             }
641 
642             if ((pMemoryManager->Ram.ReservedConsoleDispMemSize > 0) &&
643                 !bConsoleFbRegionContentPreserved)
644             {
645                 NV_PRINTF(LEVEL_ERROR,
646                           "failed to preserve content of console display memory\n");
647             }
648         }
649 
650 #ifdef DEBUG
651         _heapDump(pHeap);
652 #endif
653     } //if ((pMemoryManager->Ram.numFBRegions > 0) && (pHeap->bHasFbRegions))
654 
655     // Hand over all the memory of partition-heap to partition-PMA
656     if ((heapType == HEAP_TYPE_PARTITION_LOCAL) &&
657         (memmgrIsPmaInitialized(pMemoryManager)))
658     {
659         MEMORY_DESCRIPTOR *pMemDesc = NULL;
660         NvBool bProtected = NV_FALSE;
661 
662     bProtected = gpuIsCCFeatureEnabled(pGpu);
663         status = heapReserveRegion(
664             pMemoryManager,
665             pHeap,
666             base,
667             size,
668             &pMemDesc,
669             NV_FALSE,
670             bProtected);
671 
672         if (status != NV_OK || pMemDesc == NULL)
673         {
674             NV_PRINTF(LEVEL_ERROR, "failed to reserve %llx..%llx\n", base,
675                       base + size - 1);
676 
677                 return status;
678         }
679     }
680 
681     // If PHYS_MEM_SUBALLOCATOR, increase its refCount
682     if ((status == NV_OK) && (pPmsaMemDesc != NULL))
683     {
684         memdescAddRef(pPmsaMemDesc);
685     }
686 
687     return (status);
688 }
689 
690 void
heapDestruct_IMPL(Heap * pHeap)691 heapDestruct_IMPL
692 (
693     Heap *pHeap
694 )
695 {
696     MEM_BLOCK            *pBlock, *pBlockFirst, *pBlockNext;
697     OBJGPU               *pGpu = ENG_GET_GPU(pHeap);
698     MemoryManager        *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
699     NvBool                headptr_updated;
700     MEMORY_DESCRIPTOR    *pPmsaMemDesc = NULL;
701 
702     NV_PRINTF(LEVEL_INFO, "Heap Manager: HEAP ABOUT TO BE DESTROYED.\n");
703 
704 #ifdef DEBUG
705     _heapDump(pHeap);
706 #endif
707 
708     // Free all blacklisted pages
709     if (pHeap->blackListAddresses.count != 0)
710     {
711         heapFreeBlackListedPages(pGpu, pHeap);
712     }
713 
714     //
715     // Free all allocated blocks, but preserve primary surfaces.
716     // If the head of our list changes, restart the search, since our terminating
717     // block pointer may not be in the list anymore.
718     //
719     do
720     {
721         pBlock = pBlockFirst = pHeap->pBlockList;
722         if (pBlock == NULL)
723         {
724             break;
725         }
726 
727         headptr_updated = NV_FALSE;
728 
729         do
730         {
731             pBlockNext = pBlock->next;
732 
733             // If we are freeing the reserved region created at heapInit, free the memory descriptor too
734             if ((pBlock->allocedMemDesc) && ((pBlock->owner == HEAP_OWNER_RM_RESERVED_REGION) ||
735                 (pBlock->owner == HEAP_OWNER_PMA_RESERVED_REGION)))
736             {
737                 memdescDestroy(pBlock->pMemDesc);
738                 pBlock->pMemDesc = NULL;
739                 pBlock->allocedMemDesc = NV_FALSE;
740             }
741 
742             _heapBlockFree(pGpu, pHeap, NV01_NULL_OBJECT, NV01_NULL_OBJECT, pBlock);
743 
744             // restart scanning the list, if the heap->pBlockList changed
745             if (pBlockFirst != pHeap->pBlockList)
746             {
747                 headptr_updated = NV_TRUE;
748                 break;
749             }
750 
751             pBlock = pBlockNext;
752 
753         } while (pBlock != pHeap->pBlockList);
754 
755     } while (headptr_updated);
756 
757     //
758     // Now that the console region is no longer reserved, free the console
759     // memdesc.
760     //
761     if (pHeap->heapType != HEAP_TYPE_PARTITION_LOCAL)
762         memmgrReleaseConsoleRegion(pGpu, pMemoryManager);
763 
764     //
765     // Free the heap structure, if we freed everything
766     // (the first block represents the entire free space of the heap).
767     // this is only done if the "internal" interface is used.
768     // heapDestroy is an exported function now to user/display driver land,
769     // and we don't want the heap structures being freed unless we've been
770     // called from RM-land during a STATE_DESTROY
771     //
772     if ((pHeap->pBlockList != NULL) &&
773         (pHeap->pBlockList->begin == pHeap->base) &&
774         (pHeap->pBlockList->end == (pHeap->base + pHeap->total - 1)))
775     {
776         portMemFree(pHeap->pBlockList);
777     }
778 
779     // Free the type specific data allocated
780     if (pHeap->pHeapTypeSpecificData != NULL)
781     {
782         if (pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)
783         {
784             pPmsaMemDesc = ((PHYS_MEM_SUBALLOCATOR_DATA *)(pHeap->pHeapTypeSpecificData))->pMemDesc;
785             memdescDestroy(pPmsaMemDesc);
786         }
787         portMemFree(pHeap->pHeapTypeSpecificData);
788         pHeap->pHeapTypeSpecificData = NULL;
789     }
790 
791     if ((pHeap->bHasFbRegions) && (memmgrIsPmaInitialized(pMemoryManager)))
792     {
793         if (pHeap->heapType != HEAP_TYPE_PARTITION_LOCAL)
794             memmgrSetPmaInitialized(pMemoryManager, NV_FALSE);
795 
796         pmaDestroy(&pHeap->pmaObject);
797         portMemSet(&pHeap->pmaObject, 0, sizeof(pHeap->pmaObject));
798     }
799 }
800 
_heapGetBankPlacement(OBJGPU * pGpu,Heap * pHeap,NvU32 owner,NvU32 * flags,NvU32 type,NvU32 bank,NvU32 * placement)801 static NV_STATUS _heapGetBankPlacement
802 (
803     OBJGPU *pGpu,
804     Heap   *pHeap,
805     NvU32   owner,
806     NvU32  *flags,
807     NvU32   type,
808     NvU32   bank,
809     NvU32  *placement
810 )
811 {
812     NvU32    bankPlacement, i;
813 
814     if (type != NVOS32_TYPE_PRIMARY)
815     {
816         NvU32 bankPlacementType;
817 
818         // what kind of allocation is it?
819         switch (type)
820         {
821         case NVOS32_TYPE_IMAGE:
822         case NVOS32_TYPE_NOTIFIER:
823             bankPlacementType = BANK_PLACEMENT_IMAGE;
824             break;
825         case NVOS32_TYPE_DEPTH:
826         case NVOS32_TYPE_ZCULL:
827         case NVOS32_TYPE_STENCIL:
828             bankPlacementType = BANK_PLACEMENT_DEPTH;
829             break;
830         case NVOS32_TYPE_TEXTURE:
831         case NVOS32_TYPE_VIDEO:
832         case NVOS32_TYPE_FONT:
833             bankPlacementType = BANK_PLACEMENT_TEX_OVERLAY_FONT;
834             break;
835         default:
836             bankPlacementType = BANK_PLACEMENT_OTHER;
837         }
838 
839         //
840         // NV50+ doesn't care about bank placement since the fb has bank
841         // striding and we dont need to care about allocating primary surfaces
842         // in special areas to avoid bank conflicts. This strategy management
843         // should be removed in the future.
844         //
845         bankPlacement = pHeap->placementStrategy[bankPlacementType];
846     }
847     else
848     {
849         //
850         // primary allocation, default grow direction is up, starting at bank 0
851         // Can be overridden with NVOS32_ALLOC_FLAGS_FORCE_MEM_*
852         //
853         bankPlacement = ((0)
854                         | BANK_MEM_GROW_UP
855                         | MEM_GROW_UP
856                         | 0xFFFFFF00);
857     }
858 
859     //
860     // check if bank placement force was passed in - hint is handled in the first loop below
861     //
862     if (*flags & NVOS32_ALLOC_FLAGS_BANK_FORCE)
863     {
864         // replace data in bankplacement
865         if (*flags & NVOS32_ALLOC_FLAGS_BANK_GROW_DOWN)
866             bankPlacement = bank | BANK_MEM_GROW_DOWN | 0xFFFFFF00;
867         else
868             bankPlacement = bank | BANK_MEM_GROW_UP   | 0xFFFFFF00;
869         *flags &= ~(NVOS32_ALLOC_FLAGS_BANK_HINT);   // remove hint flag
870     }
871 
872     //
873     // Check if FORCE_MEM_GROWS_UP or FORCE_MEM_GROWS_DOWN was passed in
874     // to override the MEM_GROWS direction for this allocation.  Make sure
875     // to override each of the first MEM_NUM_BANKS_TO_TRY bytes in the NvU32
876     //
877     if (*flags & NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_UP)
878     {
879         *flags |= NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT;
880         for (i = 0; i < MEM_NUM_BANKS_TO_TRY; i++)
881         {
882             bankPlacement = (bankPlacement & ~(MEM_GROW_MASK << (i*MEM_BANK_DATA_SIZE))) |
883                             (MEM_GROW_UP << (i*MEM_BANK_DATA_SIZE));
884         }
885     }
886     if (*flags & NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_DOWN)
887     {
888         *flags |= NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT;
889         for (i = 0; i < MEM_NUM_BANKS_TO_TRY; i++)
890         {
891             bankPlacement = (bankPlacement & ~(MEM_GROW_MASK << (i*MEM_BANK_DATA_SIZE))) |
892                             (MEM_GROW_DOWN << (i*MEM_BANK_DATA_SIZE));
893         }
894     }
895 
896     // return the bank placement to use
897     *placement = bankPlacement;
898     return (NV_OK);
899 }
900 
901 //
902 // Workaround for Bug 67690:
903 //    NV28M-WinXP: (Lindbergh) StencilFloor OpenGL Sample Locks Up when Maximized on Secondary DualView Display
904 //
905 // Change heap placement for textures if more than two clients
906 // are detected.  In the case of two or more clients, ignoreBankPlacement, textureClientIndex,
907 // and currentBankInfo are modified.  IgnoreBankPlacement flag is set to true, textureClientIndex
908 // is returned with the index of the client to be used as heap->textureData[textureClientIndex]
909 // which pertains to the current client.  Lastly, currentBankInfo is modified to grow in the
910 // opposite direction of the most recently allocated client.
911 //
_heapSetTexturePlacement(Heap * pHeap,NvU32 client,NvU32 type,NvBool * ignoreBankPlacement,NvU32 * textureClientIndex,NvU8 * currentBankInfo)912 static void _heapSetTexturePlacement
913 (
914     Heap   *pHeap,
915     NvU32   client,
916     NvU32   type,
917     NvBool *ignoreBankPlacement,
918     NvU32  *textureClientIndex,
919     NvU8   *currentBankInfo
920 )
921 {
922     NvU32 index, numClients, clientFound, mostRecentIndex;
923     mostRecentIndex     = 0xFFFFFFFF;
924     clientFound         = NV_FALSE;
925     numClients          = 0;
926 
927     //
928     // let's first check to see if the client is already registered
929     // We will iterate thru to find number of clients
930     //
931     for (index = 0; index < MAX_TEXTURE_CLIENT_IDS; index++)
932     {
933         // client already registered
934         if (pHeap->textureData[index].clientId == client)
935         {
936             // give the currentBankInfo the new flags
937             *currentBankInfo = pHeap->textureData[index].placementFlags;
938             //
939             // Set the client as found so that we skip allocation
940             // of the client in the texture data structure
941             //
942             clientFound = NV_TRUE;
943             *textureClientIndex = index;
944         }
945 
946         //
947         // We loop through the whole structure to determine the
948         // number of texture clients currently listed
949         //
950         if (pHeap->textureData[index].clientId != 0)
951             numClients++;
952 
953         //
954         // This is used to assign new textures to the buffer
955         // A value of 0xFFFFFFFF indicates that this is the first allocation
956         //
957         if (pHeap->textureData[index].mostRecentAllocatedFlag == NV_TRUE)
958             mostRecentIndex = index;
959     }
960 
961     //
962     // If more than one clinet is detected, ignore bank placement
963     // otherwise, defaults to bank placement
964     //
965     if (numClients > 1)
966         *ignoreBankPlacement = NV_TRUE;
967 
968     //
969     // We fall into this if statement if no client was listed
970     // or if we have exceeded the allowable clients available
971     //
972     if (clientFound == NV_FALSE)
973     {
974         index = 0;
975         while (clientFound == NV_FALSE)
976         {
977             // the case of full texture buffer of clients, greater than 4 clients
978             if (index == MAX_TEXTURE_CLIENT_IDS)
979             {
980                 index = (mostRecentIndex + 1) % MAX_TEXTURE_CLIENT_IDS;
981 
982                 // assign the new client and update the texture data
983                 pHeap->textureData[index].clientId                           = client;
984                 pHeap->textureData[index].mostRecentAllocatedFlag            = NV_TRUE;
985                 pHeap->textureData[mostRecentIndex].mostRecentAllocatedFlag  = NV_FALSE;
986                 pHeap->textureData[index].refCount                           = 0;
987 
988                 //
989                 // Reverse the placementFlags from the one that was previously allocated
990                 //
991                 if (pHeap->textureData[mostRecentIndex].placementFlags & MEM_GROW_MASK)
992                     *currentBankInfo = MEM_GROW_UP;
993                 else
994                     *currentBankInfo = MEM_GROW_DOWN;
995 
996                 // Assign the new value to the texture data structure
997                 pHeap->textureData[index].placementFlags = *currentBankInfo;
998                 clientFound                             = NV_TRUE;
999                 *ignoreBankPlacement                    = NV_TRUE;
1000                 *textureClientIndex                     = index;
1001             }
1002 
1003             // the case in which there is still room available in the buffer
1004             if (pHeap->textureData[index].clientId == 0)
1005             {
1006                 // If we fall in here, it means there is still room available
1007                 pHeap->textureData[index].clientId = client;
1008 
1009                 // deal with the grow directivity
1010                 if (mostRecentIndex == 0xFFFFFFFF)
1011                 {
1012                     // this is the very first client to be allocated
1013                     pHeap->textureData[index].placementFlags = *currentBankInfo;
1014                     if (pHeap->textureData[index].placementFlags & MEM_GROW_MASK)
1015                         *currentBankInfo = MEM_GROW_DOWN;
1016                     else
1017                         *currentBankInfo = MEM_GROW_UP;
1018                     pHeap->textureData[index].mostRecentAllocatedFlag = NV_TRUE;
1019                 }
1020                 else
1021                 {
1022                     if (pHeap->textureData[mostRecentIndex].placementFlags & MEM_GROW_MASK)
1023                         *currentBankInfo = MEM_GROW_UP;
1024                     else
1025                         *currentBankInfo = MEM_GROW_DOWN;
1026 
1027                     // Set the last client allocated to the new client allocated
1028                     pHeap->textureData[mostRecentIndex].mostRecentAllocatedFlag  = NV_FALSE;
1029                     pHeap->textureData[index].mostRecentAllocatedFlag            = NV_TRUE;
1030 
1031                     // update the placement flags
1032                     pHeap->textureData[index].placementFlags                     = *currentBankInfo;
1033 
1034                     // if this isn't the first client in the heap, then we ignore bank placement
1035                     *ignoreBankPlacement                                         = NV_TRUE;
1036                 }
1037 
1038                 clientFound = NV_TRUE;
1039                 *textureClientIndex = index;
1040             }
1041             index++;
1042         }           // while (clientFound == NV_FALSE)
1043     }               // if (clientFound == NV_FALSE)
1044 }
1045 
1046 //
1047 // If we have two different alignment requirements for a memory
1048 // allocation, this routine calculates the LCM (least common multiple)
1049 // to satisfy both requirements.
1050 //
1051 // An alignment of 0 means "no preferred alignment".  The return value
1052 // will not exceed maxAlignment = NV_U64_MAX; it returns maxAlignment if the limit
1053 // is exceeded.
1054 //
1055 // Called by heapAlloc and heapAllocHint.
1056 //
1057 
1058 
1059 /*!
1060  *  @Is Alloc Valid For FB Region
1061  *
1062  * Check the prospective allocation to see if the candidate block supports
1063  * the requested surface type.
1064  *
1065  * NOTE: The FB region and FB heap allocation code assume that free blocks
1066  * reside in a single FB region.  This is true in current implementations that
1067  * have the regions separated by a reserved block, but may not be true in future
1068  * implementations.
1069  *
1070  *  @param[in]   pGpu           GPU object
1071  *  @param[in]   pHeap          heap object
1072  *  @param[in]   pFbAllocInfo   allocation request information
1073  *  @param[in]   pAllocData     allocation candidate information
1074  *
1075  *  @returns NV_TRUE if block can be allocated at the prospective address
1076  *
1077  */
1078 static NvBool
_isAllocValidForFBRegion(OBJGPU * pGpu,Heap * pHeap,FB_ALLOC_INFO * pFbAllocInfo,OBJHEAP_ALLOC_DATA * pAllocData)1079 _isAllocValidForFBRegion
1080 (
1081     OBJGPU             *pGpu,
1082     Heap               *pHeap,
1083     FB_ALLOC_INFO      *pFbAllocInfo,
1084     OBJHEAP_ALLOC_DATA *pAllocData
1085 )
1086 {
1087     MemoryManager        *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1088     NvBool                isValid        = NV_FALSE;
1089     FB_REGION_DESCRIPTOR *fbRegion;
1090 
1091     // Check if any regions are defined.  If not, then we are done.
1092     if (pMemoryManager->Ram.numFBRegions > 0)
1093     {
1094         fbRegion = memmgrLookupFbRegionByOffset(pGpu, pMemoryManager, pAllocData->allocLo, pAllocData->allocHi);
1095 
1096         if (fbRegion != NULL)
1097         {
1098             // Because we heapAlloc the reserved region.
1099             if (pFbAllocInfo->pageFormat->type == NVOS32_TYPE_PMA &&
1100                 pFbAllocInfo->owner == HEAP_OWNER_PMA_RESERVED_REGION)
1101             {
1102                 if (!fbRegion->bInternalHeap && !fbRegion->bRsvdRegion)
1103                 {
1104                     isValid = NV_TRUE;
1105                 }
1106                 return isValid;
1107             }
1108             // Check if the region is reserved/not usable
1109             if (fbRegion->bRsvdRegion &&
1110                 (pFbAllocInfo->pageFormat->type != NVOS32_TYPE_RESERVED))
1111             {
1112                 NV_PRINTF(LEVEL_INFO,
1113                           "Reserved region.  Rejecting placement\n");
1114                 return NV_FALSE;
1115             }
1116 
1117             //
1118             // Check if the region supports compression and if we need it.
1119             // Surfaces that *require* compression can be allocated *only* in
1120             // regions that support compression.  *Optionally* compressed surfaces
1121             // can be allocated anywhere though -- the selection of an uncompressed
1122             // KIND will be handled in dmaUpdateVASpace.
1123             //
1124             if (!fbRegion->bSupportCompressed)
1125             {
1126                 if (DRF_VAL(OS32, _ATTR, _COMPR , pFbAllocInfo->pageFormat->attr) == NVOS32_ATTR_COMPR_REQUIRED)
1127                 {
1128                     NV_PRINTF(LEVEL_INFO,
1129                               "Compression not supported.  Rejecting placement\n");
1130                     return NV_FALSE;
1131                 }
1132             }
1133 
1134             // Check if the allocation type is specifically not allowed
1135             if (pFbAllocInfo->pageFormat->type < NVOS32_NUM_MEM_TYPES)
1136             {
1137                 if ((!fbRegion->bSupportISO) &&
1138                     ((pFbAllocInfo->pageFormat->type == NVOS32_TYPE_PRIMARY) ||
1139                      (pFbAllocInfo->pageFormat->type == NVOS32_TYPE_CURSOR) ||
1140                      (pFbAllocInfo->pageFormat->type == NVOS32_TYPE_VIDEO)))
1141                 {
1142                     NV_PRINTF(LEVEL_INFO,
1143                               "ISO surface type #%d not supported.  Rejecting placement\n",
1144                               pFbAllocInfo->pageFormat->type);
1145                     return NV_FALSE;
1146                 }
1147             }
1148 
1149             if (!!fbRegion->bProtected ^
1150                 !!(pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_PROTECTED))
1151             {
1152                 NV_PRINTF(LEVEL_INFO,
1153                           "Protection mismatch.  Rejecting placement\n");
1154                 return NV_FALSE;
1155             }
1156 
1157         }
1158         else if (pFbAllocInfo->pageFormat->type != NVOS32_TYPE_RESERVED)
1159         {
1160             //
1161             // Allow reserved allocs outside of valid regions, but everything else
1162             // must be allocated in a region.
1163             //
1164             NV_PRINTF(LEVEL_INFO,
1165                       "pFbAllocInfo->type != NVOS32_TYPE_RESERVED\n");
1166             return NV_FALSE;
1167         }
1168 
1169     }
1170 
1171     return NV_TRUE;
1172 }
1173 
1174 /**
1175  * Blacklists a single page
1176  * This function will allocate the memory descriptor with a fixed memory offset
1177  * and allocate the FB physical offset. Will replace the blacklist allocation
1178  * path in the heapBlackListPages_IMPL.
1179  *
1180  * @param[in]    pGpu            OBJGPU pointer
1181  * @param[in]    pHeap           Heap pointer
1182  * @param[in]    pBlacklistChunk BLACKLIST_CHUNK pointer
1183  *
1184  * @returns NV_OK on success
1185  *          NV_ERR_OUT_OF_MEMORY, if the memory is already blacklisted
1186  */
1187 
1188 static NV_STATUS
_heapBlacklistSingleChunk(OBJGPU * pGpu,Heap * pHeap,BLACKLIST_CHUNK * pBlacklistChunk)1189 _heapBlacklistSingleChunk
1190 (
1191     OBJGPU             *pGpu,
1192     Heap               *pHeap,
1193     BLACKLIST_CHUNK    *pBlacklistChunk
1194 )
1195 {
1196     NV_STATUS status = NV_OK;
1197     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1198     NV_ASSERT(pBlacklistChunk != NULL);
1199 
1200     status = memdescCreate(&pBlacklistChunk->pMemDesc,
1201                            pGpu, pBlacklistChunk->size, RM_PAGE_SIZE,
1202                            NV_TRUE, ADDR_FBMEM, NV_MEMORY_UNCACHED,
1203                            MEMDESC_FLAGS_FIXED_ADDRESS_ALLOCATE |
1204                            MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE);
1205     if (NV_OK != status)
1206     {
1207         NV_PRINTF(LEVEL_FATAL,
1208                   "Error 0x%x creating memdesc for blacklisted chunk for address0x%llx, skipping\n",
1209                   status, pBlacklistChunk->physOffset);
1210         NV_ASSERT(NV_FALSE);
1211         return status;
1212     }
1213 
1214     // this is how FIXED_ADDRESS_ALLOCATE works
1215     memdescSetPte(pBlacklistChunk->pMemDesc, AT_GPU, 0, RM_PAGE_ALIGN_DOWN(pBlacklistChunk->physOffset));
1216 
1217     if (pHeap->heapType != HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)
1218     {
1219         //
1220         // Allocate memory for this page. This is marked as an internal RM allocation
1221         // and will be saved/restored during suspend/resume
1222         //
1223         memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_78,
1224                     pBlacklistChunk->pMemDesc);
1225         if (NV_OK != status)
1226         {
1227             // no use for the memdesc if page couldn't be allocated
1228             memdescDestroy(pBlacklistChunk->pMemDesc);
1229 
1230             NV_PRINTF(LEVEL_FATAL,
1231                       "Error 0x%x creating page for blacklisting address: 0x%llx, skipping\n",
1232                       status, pBlacklistChunk->physOffset);
1233             NV_ASSERT(NV_FALSE);
1234             return status;
1235         }
1236     }
1237 
1238     // set the flags properly
1239     pBlacklistChunk->bIsValid                = NV_TRUE;
1240 
1241     // if dynamic blacklisteing is enabled, clear the pending retirement flag
1242     if (pMemoryManager->bEnableDynamicPageOfflining)
1243     {
1244         pBlacklistChunk->bPendingRetirement = NV_FALSE;
1245     }
1246     return status;
1247 }
1248 
1249 /**
1250  * Free-s the blacklisted pages within the range [begin, begin+size-1]
1251  * This function will iterate the blacklisted chunks data structure,
1252  * and free the blacklisted pages within the range [begin, begin+size-1]
1253  *
1254  * @param[in]    pGpu           OBJGPU pointer
1255  * @param[in]    pMemoryManager MemoryManager pointer
1256  * @param[in]    pBlackList     BLACKLIST pointer
1257  * @param[in]    begin          starting address of the range
1258  * @param[in]    size           Size of the region, where blacklisted pages to be free-d
1259  *
1260  * @returns NV_OK on success
1261  */
1262 static NV_STATUS
_heapFreeBlacklistPages(OBJGPU * pGpu,MemoryManager * pMemoryManager,BLACKLIST * pBlackList,NvU64 begin,NvU64 size)1263 _heapFreeBlacklistPages
1264 (
1265     OBJGPU        *pGpu,
1266     MemoryManager *pMemoryManager,
1267     BLACKLIST     *pBlackList,
1268     NvU64          begin,
1269     NvU64          size
1270 )
1271 {
1272     NvU32               chunk               = 0;
1273     NvU64               baseChunkAddress    = 0;
1274     NvU64               endChunkAddress     = 0;
1275     BLACKLIST_CHUNK    *pBlacklistChunks    = pBlackList->pBlacklistChunks;
1276 
1277     for (chunk = 0; chunk < pBlackList->count; chunk++)
1278     {
1279         baseChunkAddress = 0;
1280         endChunkAddress  = 0;
1281         // No need to process the chunk if it's not a valid chunk
1282         if (pBlacklistChunks[chunk].bIsValid != NV_TRUE ||
1283            (pMemoryManager->bEnableDynamicPageOfflining &&
1284             pBlacklistChunks[chunk].bPendingRetirement))
1285             continue;
1286 
1287         baseChunkAddress = pBlacklistChunks[chunk].physOffset;
1288         endChunkAddress =  baseChunkAddress + pBlacklistChunks[chunk].size - 1;
1289 
1290         if (baseChunkAddress >= begin && endChunkAddress <= (begin + size - 1))
1291         {
1292             //
1293             // free the mem desc, set the excludeGlobalListFlag
1294             // invalidate the entry
1295             //
1296              NV_PRINTF(LEVEL_FATAL,
1297                        "removing from blacklist... page start %llx, page end:%llx\n",
1298                        baseChunkAddress, endChunkAddress);
1299 
1300             memdescFree(pBlacklistChunks[chunk].pMemDesc);
1301             memdescDestroy(pBlacklistChunks[chunk].pMemDesc);
1302 
1303             pBlacklistChunks[chunk].bIsValid                = NV_FALSE;
1304         }
1305     }
1306     return NV_OK;
1307 }
1308 
1309 /**
1310  * Blacklist pages within the range [begin, begin+size-1]
1311  * This function will iterate the blacklisted chunks data structure,
1312  * and blacklist pages within the range [begin, begin+size-1]
1313  *
1314  * @param[in]    pGpu       OBJGPU pointer
1315  * @param[in]    pHeap      Heap pointer
1316  * @param[in]    pBlackList BLACKLIST pointer
1317  * @param[in]    begin      starting address of the range
1318  * @param[in]    size       Size of the region, where pages will be blacklisted
1319  *
1320  * @returns NV_OK on success
1321  *           error, if _heapBlacklistSingleChunk fails
1322  */
1323 static NV_STATUS
_heapBlacklistChunks(OBJGPU * pGpu,Heap * pHeap,BLACKLIST * pBlackList,NvU64 begin,NvU64 size)1324 _heapBlacklistChunks
1325 (
1326     OBJGPU         *pGpu,
1327     Heap           *pHeap,
1328     BLACKLIST      *pBlackList,
1329     NvU64           begin,
1330     NvU64           size
1331 )
1332 {
1333     NvU32              chunk                = 0;
1334     NvU64              baseAddress          = 0;
1335     NvU64              endAddress           = 0;
1336     BLACKLIST_CHUNK   *pBlacklistChunks     = pBlackList->pBlacklistChunks;
1337     NV_STATUS          status               = NV_OK;
1338 
1339 
1340     for (chunk = 0; chunk < pBlackList->count; chunk++)
1341     {
1342         baseAddress     = 0;
1343         endAddress      = 0;
1344 
1345         // No need to process the chunk if it's a valid chunk
1346         if (pBlacklistChunks[chunk].bIsValid == NV_TRUE)
1347             continue;
1348 
1349         baseAddress   = pBlacklistChunks[chunk].physOffset;
1350         endAddress    = baseAddress + pBlacklistChunks[chunk].size - 1;
1351 
1352         //TODO: what if the blacklisted chunk is halfway inside the allocated region??
1353         if (baseAddress >= begin && endAddress <= (begin + size - 1))
1354         {
1355             NV_PRINTF(LEVEL_ERROR,
1356                       "blacklisting chunk from addr: 0x%llx to 0x%llx, new begin :0x%llx, end:0x%llx\n",
1357                       baseAddress, endAddress, begin, begin + size - 1);
1358             status = _heapBlacklistSingleChunk(pGpu, pHeap, &pBlacklistChunks[chunk]);
1359             NV_ASSERT(status == NV_OK);
1360         }
1361     }
1362     return status;
1363 }
1364 
1365 /*!
1366  * @brief allocate memory from heap
1367  *
1368  * Allocates a memory region with requested parameters from heap.
1369  * If requested contiguous allocation is not possible, tries to allocate non-contiguous memory.
1370  *
1371  * @param[in]     pGpu                 GPU object
1372  * @param[in]     hClient              client handle
1373  * @param[in]     pHeap                heap object
1374  * @param[in]     pAllocRequest        allocation request
1375  * @param[in]     memHandle            memory handle
1376  * @param[in/out] pAllocData           heap-specific allocation data
1377  * @param[in/out] pFbAllocInfo         allocation data
1378  * @param[out]    pHwResource          pointer to allocation HW resource info
1379  * @param[in/out] pNoncontigAllocation the requested/provided allocation is noncotig
1380  * @param[in]     bNoncontigAllowed    allocation can be made noncontig
1381  * @param[in]     bAllocedMemdesc      memdesc should be freed if a new one is created
1382  */
heapAlloc_IMPL(OBJGPU * pGpu,NvHandle hClient,Heap * pHeap,MEMORY_ALLOCATION_REQUEST * pAllocRequest,NvHandle memHandle,OBJHEAP_ALLOC_DATA * pAllocData,FB_ALLOC_INFO * pFbAllocInfo,HWRESOURCE_INFO ** pHwResource,NvBool * pNoncontigAllocation,NvBool bNoncontigAllowed,NvBool bAllocedMemdesc)1383 NV_STATUS heapAlloc_IMPL
1384 (
1385     OBJGPU                        *pGpu,
1386     NvHandle                       hClient,
1387     Heap                          *pHeap,
1388     MEMORY_ALLOCATION_REQUEST     *pAllocRequest,
1389     NvHandle                       memHandle,
1390     OBJHEAP_ALLOC_DATA            *pAllocData,
1391     FB_ALLOC_INFO                 *pFbAllocInfo,
1392     HWRESOURCE_INFO              **pHwResource,
1393     NvBool                        *pNoncontigAllocation,
1394     NvBool                         bNoncontigAllowed,
1395     NvBool                         bAllocedMemdesc
1396 )
1397 {
1398     NV_MEMORY_ALLOCATION_PARAMS   *pVidHeapAlloc        = pAllocRequest->pUserParams;
1399     MEMORY_DESCRIPTOR             *pMemDesc             = pAllocRequest->pMemDesc;
1400     MemoryManager                 *pMemoryManager       = GPU_GET_MEMORY_MANAGER(pGpu);
1401     NvU32                          textureClientIndex   = 0xFFFFFFFF;
1402     NvU64                          desiredOffset        = pFbAllocInfo->offset;
1403     NvU64                          adjustedSize         = pFbAllocInfo->size - pFbAllocInfo->alignPad;
1404     NvU32                          bankPlacement        = 0;
1405     NvBool                         ignoreBankPlacement  = NV_FALSE;
1406     NvU8                           currentBankInfo;
1407     MEM_BLOCK                     *pBlockFirstFree;
1408     MEM_BLOCK                     *pBlockFree;
1409     MEM_BLOCK                     *pBlockNew            = NULL;
1410     MEM_BLOCK                     *pBlockSplit          = NULL;
1411     NvU64                          allocatedOffset      = 0;
1412     NvBool                         bTurnBlacklistOff    = NV_FALSE;
1413     NvBool                         bDone                = NV_FALSE;
1414     NV_STATUS                      status               = NV_OK;
1415     NvU32                          i;
1416 
1417     NV_ASSERT_OR_RETURN(
1418         (memmgrAllocGetAddrSpace(GPU_GET_MEMORY_MANAGER(pGpu), pVidHeapAlloc->flags, pVidHeapAlloc->attr)
1419             == ADDR_FBMEM) &&
1420         (pAllocRequest->pPmaAllocInfo[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] == NULL),
1421         NV_ERR_INVALID_ARGUMENT);
1422 
1423     if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
1424         desiredOffset -= pFbAllocInfo->alignPad;
1425 
1426     if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) &&
1427         gpuCheckPageRetirementSupport_HAL(pGpu) &&
1428         FLD_TEST_DRF(OS32, _ATTR2, _BLACKLIST, _OFF, pVidHeapAlloc->attr2))
1429     {
1430         NV_PRINTF(LEVEL_INFO,
1431                   "Trying to turn blacklisting pages off for this allocation of size: %llx\n",
1432                   pVidHeapAlloc->size);
1433         if (!hypervisorIsVgxHyper())
1434             _heapFreeBlacklistPages(pGpu, pMemoryManager, &pHeap->blackList, desiredOffset, pVidHeapAlloc->size);
1435         else
1436             _heapFreeBlacklistPages(pGpu, pMemoryManager, &pHeap->blackList, pHeap->base, pHeap->total);
1437         bTurnBlacklistOff = NV_TRUE;
1438         // Now continue with the heap allocation.
1439     }
1440 
1441     //
1442     // Check for range-limited request.
1443     // Range of [0,0] is a special case that means to use the entire heap.
1444     //
1445     // A range-limited request allows caller to say: I really want memory
1446     //   which only falls completely within a particular range.  Returns
1447     //   error if can't allocate within that range.
1448     //
1449     //   Used on Windows by OpenGL.  On Windows during a modeswitch, the
1450     //   display driver frees all vidmem surfaces.  Unfortunately, OpenGL
1451     //   writes to some vidmem surface with the CPU from user mode.  If these
1452     //   surfaces are freed during the modeswitch, then the user mode OpenGL
1453     //   app might scribble on someone else's surface if that video memory is
1454     //   reused before OpenGL notices the modeswitch.  Because modeswitches
1455     //   are asynchronous to the OpenGL client, it does not notice the
1456     //   modeswitches right away.
1457     //
1458     //   A solution is for OpenGL to restrict vidmem surfaces that have
1459     //   this problem to a range of memory where it is safe *not* to free
1460     //   the surface during a modeswitch.
1461     //
1462     // virtual allocation are checked in dmaAllocVA()
1463     if (pVidHeapAlloc->rangeLo == 0 && pVidHeapAlloc->rangeHi == 0)
1464     {
1465         pVidHeapAlloc->rangeHi = pHeap->base + pHeap->total - 1;
1466     }
1467     if (pVidHeapAlloc->rangeHi > pHeap->base + pHeap->total - 1)
1468     {
1469         pVidHeapAlloc->rangeHi = pHeap->base + pHeap->total - 1;
1470     }
1471 
1472     if ((pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) == 0)
1473     {
1474         // Only want to override in one direction at a time
1475         if (pMemoryManager->overrideInitHeapMin == 0)
1476         {
1477             pVidHeapAlloc->rangeHi = NV_MIN(pVidHeapAlloc->rangeHi, pMemoryManager->overrideHeapMax);
1478         }
1479         else
1480         {
1481             pVidHeapAlloc->rangeLo = NV_MAX(pVidHeapAlloc->rangeLo, pMemoryManager->overrideInitHeapMin);
1482         }
1483     }
1484 
1485     //
1486     // Check for valid range.
1487     //
1488     if (pVidHeapAlloc->rangeLo > pVidHeapAlloc->rangeHi)
1489     {
1490         status = NV_ERR_INVALID_ARGUMENT;
1491         goto return_early;
1492     }
1493 
1494     //
1495     // The bank placement loop does not know how to limit allocations to be
1496     // within a range.
1497     //
1498     if (((pVidHeapAlloc->rangeLo > 0) || (pVidHeapAlloc->rangeHi < pHeap->base + pHeap->total - 1)))
1499     {
1500         pVidHeapAlloc->flags |= NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT;
1501     }
1502 
1503     //
1504     // Set up bank placement data - should have been preselected in heapCreate
1505     //
1506     status = _heapGetBankPlacement(pGpu, pHeap, pVidHeapAlloc->owner,
1507                                    &pVidHeapAlloc->flags,
1508                                    pVidHeapAlloc->type,
1509                                    0,
1510                                    &bankPlacement);
1511     if (status != NV_OK)
1512     {
1513         NV_PRINTF(LEVEL_ERROR,
1514                   "_heapGetBankPlacement failed for current allocation\n");
1515         goto return_early;
1516     }
1517 
1518     //
1519     // Find the best bank to start looking in for this pVidHeapAlloc->type, but only if we're
1520     // not ignoring bank placement rules.  Save the current bank info.
1521     //
1522     currentBankInfo = (NvU8)bankPlacement; // this is always non zero from above
1523 
1524     //
1525     // Check for fixed address request.
1526     // This allows caller to say: I really want this memory at a particular
1527     //   offset.  Returns error if can't get that offset.
1528     //   Used initially by Mac display driver twinview code.
1529     //   On the Mac it is a very bad thing to *ever* move the primary
1530     //   during a modeset since a lot of sw caches the value and never
1531     //   checks again.
1532     //
1533     if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
1534     {
1535 
1536         // is our desired offset suitably aligned?
1537         if (desiredOffset % pAllocData->alignment)
1538         {
1539             NV_PRINTF(LEVEL_ERROR,
1540                       "offset 0x%llx not aligned to 0x%llx\n",
1541                       desiredOffset, pAllocData->alignment);
1542             goto failed;
1543         }
1544 
1545         pBlockFree = pHeap->pFreeBlockList;
1546 
1547         if (pBlockFree == NULL)
1548         {
1549             NV_PRINTF(LEVEL_ERROR, "no free blocks\n");
1550             goto failed;
1551         }
1552 
1553         do {
1554             //
1555             // Allocate from the bottom of the memory block.
1556             //
1557             pBlockFree = pBlockFree->u1.nextFree;
1558 
1559             // Does this block contain our desired range?
1560             if ((desiredOffset >= pBlockFree->begin) &&
1561                 (desiredOffset + pAllocData->allocSize - 1) <= pBlockFree->end)
1562             {
1563                 // we have a match, now remove it from the pool
1564                 pAllocData->allocLo = desiredOffset;
1565                 pAllocData->allocHi = desiredOffset + pAllocData->allocSize - 1;
1566                 pAllocData->allocAl = pAllocData->allocLo;
1567 
1568                 // Check that the candidate block can support the allocation type
1569                 if (_isAllocValidForFBRegion(pGpu, pHeap, pFbAllocInfo, pAllocData))
1570                     goto got_one;
1571             }
1572 
1573         } while (pBlockFree != pHeap->pFreeBlockList);
1574 
1575         // return error if can't get that particular address
1576         NV_PRINTF(LEVEL_ERROR,
1577                   "failed NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE @%llx (%lld bytes)\n",
1578                   desiredOffset, pAllocData->allocSize);
1579         goto failed;
1580     }
1581 
1582     //
1583     // Check if NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT was passed in with
1584     // the pVidHeapAlloc->type to ignore placing this allocation in a particular bank.
1585     // This means we default to the second loop where we choose first fit.
1586     //
1587     if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT)
1588         ignoreBankPlacement = NV_TRUE;
1589 
1590     //
1591     // Bug 67690: Treat textures differently for more than one client (eg. opengl),
1592     // [IN]:  client, pVidHeapAlloc->type, ignoreBankPlacement
1593     // [OUT]: heap, ignoreBankPlacement, textureClientIndex
1594     //
1595     // Bug 69385: Treat textures differently only if pVidHeapAlloc->flags are also set to zero.
1596     //   NV30GL-WinXP: Unable to run 3DMark2001SE @ 1600x1200x32bpp.
1597     //
1598     if ((pVidHeapAlloc->type == NVOS32_TYPE_TEXTURE) && (!pVidHeapAlloc->flags))
1599         _heapSetTexturePlacement(pHeap, hClient, pVidHeapAlloc->type, &ignoreBankPlacement, &textureClientIndex, &currentBankInfo);
1600 
1601     if (!ignoreBankPlacement)
1602     {
1603         currentBankInfo = (NvU8)bankPlacement & BANK_MEM_GROW_MASK;
1604 
1605         if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_BANK_HINT)
1606         {
1607             if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_BANK_GROW_DOWN)
1608                 currentBankInfo = MEM_GROW_DOWN;
1609             else
1610                 currentBankInfo = MEM_GROW_UP;
1611             pVidHeapAlloc->flags &= ~(NVOS32_ALLOC_FLAGS_BANK_HINT); // hint flag only lasts for 1 loop
1612         }
1613         else
1614         {
1615             // Convert bank grow up/down to mem grow up/down
1616             currentBankInfo = (currentBankInfo & BANK_MEM_GROW_DOWN ? MEM_GROW_DOWN : MEM_GROW_UP);
1617         }
1618     } // if (!ignoreBankPlacement)
1619 
1620     pBlockFirstFree = pHeap->pFreeBlockList;
1621     if (!pBlockFirstFree)
1622     {
1623         NV_PRINTF(LEVEL_ERROR, "no free blocks\n");
1624         goto failed;
1625     }
1626 
1627     if (*pNoncontigAllocation)
1628     {
1629         NV_PRINTF(LEVEL_INFO, "non-contig vidmem requested\n");
1630         goto non_contig_alloc;
1631     }
1632 
1633     //
1634     // Loop through all available regions.
1635     // Note we don't check for bRsvdRegion here because when blacklisting
1636     // those regions we need them to succeed.
1637     //
1638     bDone = NV_FALSE;
1639     i = 0;
1640     while (!bDone)
1641     {
1642         NvU64 saveRangeLo = pVidHeapAlloc->rangeLo;
1643         NvU64 saveRangeHi = pVidHeapAlloc->rangeHi;
1644 
1645         if (!memmgrAreFbRegionsSupported(pMemoryManager) ||
1646              gpuIsCacheOnlyModeEnabled(pGpu))
1647         {
1648             bDone = NV_TRUE;
1649         }
1650         else
1651         {
1652             NV_ASSERT( pMemoryManager->Ram.numFBRegionPriority > 0 );
1653 
1654             if (FLD_TEST_DRF(OS32, _ATTR2, _PRIORITY, _LOW, pFbAllocInfo->pageFormat->attr2) ||
1655                 (pMemoryManager->bPreferSlowRegion &&
1656                 !FLD_TEST_DRF(OS32, _ATTR2, _PRIORITY, _HIGH, pFbAllocInfo->pageFormat->attr2)))
1657             {
1658                 NV_ASSERT( pMemoryManager->Ram.fbRegionPriority[pMemoryManager->Ram.numFBRegionPriority-1-i] < pMemoryManager->Ram.numFBRegions );
1659                 NV_ASSERT( !pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[pMemoryManager->Ram.numFBRegionPriority-1-i]].bRsvdRegion );
1660                 //
1661                 // We prefer slow memory, or we want _LOW priority
1662                 // ==>> Try allocations in increasing order of performance,
1663                 // slowest first
1664                 //
1665                 pVidHeapAlloc->rangeLo = NV_MAX(pVidHeapAlloc->rangeLo, pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[pMemoryManager->Ram.numFBRegionPriority-1-i]].base);
1666                 pVidHeapAlloc->rangeHi = NV_MIN(pVidHeapAlloc->rangeHi, pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[pMemoryManager->Ram.numFBRegionPriority-1-i]].limit);
1667             }
1668             else
1669             {
1670                 NV_ASSERT( pMemoryManager->Ram.fbRegionPriority[i] < pMemoryManager->Ram.numFBRegions );
1671                 NV_ASSERT( !pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[i]].bRsvdRegion );
1672                //
1673                 // We don't explicitly want slow memory or we don't prefer
1674                 // allocations in the slow memory
1675                 // ==>> Try allocations in decreasing order of performance,
1676                 // fastest first
1677                 //
1678                 pVidHeapAlloc->rangeLo = NV_MAX(pVidHeapAlloc->rangeLo, pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[i]].base);
1679                 pVidHeapAlloc->rangeHi = NV_MIN(pVidHeapAlloc->rangeHi, pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[i]].limit);
1680             }
1681             i++;
1682 
1683             bDone = !(i < pMemoryManager->Ram.numFBRegionPriority);
1684         }
1685 
1686         //
1687         // When scanning upwards, start at the bottom - 1 so the following loop looks symetrical.
1688         //
1689         if ( ! (currentBankInfo & MEM_GROW_DOWN))
1690             pBlockFirstFree = pBlockFirstFree->u0.prevFree;
1691         pBlockFree = pBlockFirstFree;
1692 
1693         do
1694         {
1695             NvU64 blockLo;
1696             NvU64 blockHi;
1697 
1698             if (currentBankInfo & MEM_GROW_DOWN)
1699                 pBlockFree = pBlockFree->u0.prevFree;
1700             else
1701                 pBlockFree = pBlockFree->u1.nextFree;
1702 
1703             //
1704             // Is this block completely in requested range?
1705             //
1706             // We *should* check that pBlockFree is wholely resident in the range, but the
1707             // old check didn't and checking it causes some tests to fail.
1708             // So check that at least *some* of the block resides within the requested range.
1709             //
1710             if ((pBlockFree->end >= pVidHeapAlloc->rangeLo) && (pBlockFree->begin <= pVidHeapAlloc->rangeHi))
1711             {
1712                 //
1713                 // Find the intersection of the free block and the specified range.
1714                 //
1715                 blockLo = (pVidHeapAlloc->rangeLo > pBlockFree->begin) ? pVidHeapAlloc->rangeLo : pBlockFree->begin;
1716                 blockHi = (pVidHeapAlloc->rangeHi < pBlockFree->end) ? pVidHeapAlloc->rangeHi : pBlockFree->end;
1717 
1718                 if (currentBankInfo & MEM_GROW_DOWN)
1719                 {
1720                     //
1721                     // Allocate from the top of the memory block.
1722                     //
1723                     pAllocData->allocLo = (blockHi - pAllocData->allocSize + 1) / pAllocData->alignment * pAllocData->alignment;
1724                     pAllocData->allocAl = pAllocData->allocLo;
1725                     pAllocData->allocHi = pAllocData->allocAl + pAllocData->allocSize - 1;
1726                 }
1727                 else
1728                 {
1729                     //
1730                     // Allocate from the bottom of the memory block.
1731                     //
1732                     pAllocData->allocAl = (blockLo + (pAllocData->alignment - 1)) / pAllocData->alignment * pAllocData->alignment;
1733                     pAllocData->allocLo = pAllocData->allocAl;
1734                     pAllocData->allocHi = pAllocData->allocAl + pAllocData->allocSize - 1;
1735                 }
1736 
1737                 //
1738                 // Does the desired range fall completely within this block?
1739                 // Also make sure it does not wrap-around.
1740                 // Also make sure it is within the desired range.
1741                 //
1742                 if ((pAllocData->allocLo >= pBlockFree->begin) && (pAllocData->allocHi <= pBlockFree->end))
1743                 {
1744                     if (pAllocData->allocLo <= pAllocData->allocHi)
1745                     {
1746                         if ((pAllocData->allocLo >= pVidHeapAlloc->rangeLo) && (pAllocData->allocHi <= pVidHeapAlloc->rangeHi))
1747                         {
1748                             // Check that the candidate block can support the allocation type
1749                             if (_isAllocValidForFBRegion(pGpu, pHeap, pFbAllocInfo, pAllocData))
1750                             {
1751                                 pVidHeapAlloc->rangeLo = saveRangeLo;
1752                                 pVidHeapAlloc->rangeHi = saveRangeHi;
1753                                 goto got_one;
1754                             }
1755                         }
1756                     }
1757                 }
1758             }
1759 
1760         } while (pBlockFree != pBlockFirstFree);
1761 
1762         pVidHeapAlloc->rangeLo = saveRangeLo;
1763         pVidHeapAlloc->rangeHi = saveRangeHi;
1764     }
1765 
1766 non_contig_alloc:
1767     if (!bNoncontigAllowed)
1768         goto failed;
1769 
1770     if (!*pNoncontigAllocation)
1771     {
1772         NV_PRINTF(LEVEL_INFO,
1773                   "Contig vidmem allocation failed, running noncontig allocator\n");
1774 
1775         // Create a new noncontig memdescriptor
1776         memdescDestroy(pAllocRequest->pMemDesc);
1777 
1778         status = memdescCreate(&pAllocRequest->pMemDesc, pGpu, adjustedSize,
1779                                0, NV_FALSE, ADDR_FBMEM, NV_MEMORY_UNCACHED,
1780                                MEMDESC_FLAGS_NONE);
1781 
1782         if (status != NV_OK)
1783         {
1784             NV_PRINTF(LEVEL_ERROR, "cannot alloc memDesc!\n");
1785             pMemDesc = pAllocRequest->pMemDesc = NULL;
1786             goto failed;
1787         }
1788 
1789         pMemDesc = pAllocRequest->pMemDesc;
1790         pMemDesc->pHeap = pHeap;
1791 
1792         memdescSetPteKind(pMemDesc, pFbAllocInfo->format);
1793         memdescSetHwResId(pMemDesc, pFbAllocInfo->hwResId);
1794     }
1795 
1796     // Try the noncontig allocator
1797     if (NV_OK == _heapAllocNoncontig(pGpu,
1798                                      hClient,
1799                                      pHeap,
1800                                      pAllocRequest,
1801                                      memHandle,
1802                                      pAllocData,
1803                                      pFbAllocInfo,
1804                                      textureClientIndex,
1805                                      pFbAllocInfo->alignPad,
1806                                      &allocatedOffset,
1807                                      pMemDesc,
1808                                      pHwResource))
1809     {
1810         *pNoncontigAllocation = NV_TRUE;
1811 
1812         //
1813         // The noncontig allocator calls _heapProcessFreeBlock()
1814         // by itself, so we goto done: straight
1815         //
1816         status = NV_OK;
1817         goto return_early;
1818     }
1819 
1820     NV_PRINTF(LEVEL_INFO,
1821               "failed to allocate block.  Heap total=0x%llx free=0x%llx\n",
1822               pHeap->total, pHeap->free);
1823     // Out of memory.
1824     goto failed;
1825 
1826     //
1827     // We have a match.  Now link it in, trimming or splitting
1828     // any slop from the enclosing block as needed.
1829     //
1830 
1831 got_one:
1832     if (NV_OK != _heapProcessFreeBlock(pGpu, pBlockFree,
1833                                        &pBlockNew, &pBlockSplit,
1834                                        pHeap, pAllocRequest,
1835                                        memHandle,
1836                                        pAllocData, pFbAllocInfo,
1837                                        pFbAllocInfo->alignPad,
1838                                        &allocatedOffset) ||
1839         NV_OK != _heapUpdate(pHeap, pBlockNew, BLOCK_FREE_STATE_CHANGED))
1840 failed:
1841     {
1842 
1843         NV_PRINTF(LEVEL_INFO,
1844                   "failed to allocate block.  Heap total=0x%llx free=0x%llx\n",
1845                   pHeap->total, pHeap->free);
1846 
1847         portMemFree(pBlockNew);
1848         pBlockNew = NULL;
1849         portMemFree(pBlockSplit);
1850         status = NV_ERR_NO_MEMORY;
1851         goto return_early;
1852     }
1853 
1854     //
1855     // If a client calls us with pVidHeapAlloc->type == NVOS32_TYPE_TEXTURE, but where flags
1856     // are non-zero, we won't call _heapSetTexturePlacement and initialize
1857     // textureClientIndex to a proper value (default is 0xFFFFFFFF). In that
1858     // case, we won't track this texture allocation. Bug 79586.
1859     //
1860     if (pVidHeapAlloc->type == NVOS32_TYPE_TEXTURE &&
1861         textureClientIndex != 0xFFFFFFFF)
1862     {
1863         pBlockNew->textureId = hClient;
1864         pHeap->textureData[textureClientIndex].refCount++;
1865     }
1866     else
1867     {
1868         pBlockNew->textureId = 0;
1869     }
1870 
1871     pFbAllocInfo->offset = allocatedOffset;
1872 
1873     // TODO : This must be inside *all* blocks of a noncontig allocation
1874     if (!*pNoncontigAllocation)
1875     {
1876         pBlockNew->pitch = pFbAllocInfo->pitch;
1877         pBlockNew->height = pFbAllocInfo->height;
1878         pBlockNew->width = pFbAllocInfo->width;
1879     }
1880 
1881     *pHwResource = &pBlockNew->hwResource;
1882 
1883     // Remember memory descriptor
1884     memdescDescribe(pMemDesc, ADDR_FBMEM, allocatedOffset, adjustedSize);
1885     pBlockNew->pMemDesc = pMemDesc;
1886     pBlockNew->allocedMemDesc = bAllocedMemdesc;
1887 
1888     status = NV_OK;
1889 
1890 return_early:
1891     HEAP_VALIDATE(pHeap);
1892 
1893     if (bTurnBlacklistOff)
1894     {
1895         if (!hypervisorIsVgxHyper())
1896             _heapBlacklistChunks(pGpu, pHeap, &pHeap->blackList, desiredOffset, pVidHeapAlloc->size);
1897         else
1898             _heapBlacklistChunksInFreeBlocks(pGpu, pHeap);
1899     }
1900 
1901     return status;
1902 }
1903 
_heapBlacklistChunksInFreeBlocks(OBJGPU * pGpu,Heap * pHeap)1904 static void _heapBlacklistChunksInFreeBlocks
1905 (
1906     OBJGPU *pGpu,
1907     Heap   *pHeap
1908 )
1909 {
1910     MEM_BLOCK *pBlockFirstFree, *pBlockFree;
1911     NvU64 blockLo;
1912     NvU64 blockHi;
1913     NvU64 size;
1914 
1915     pBlockFirstFree = pHeap->pFreeBlockList;
1916 
1917     if (pBlockFirstFree)
1918     {
1919         pBlockFirstFree = pBlockFirstFree->u0.prevFree;
1920         pBlockFree = pBlockFirstFree;
1921         do
1922         {
1923             pBlockFree = pBlockFree->u1.nextFree;
1924             blockLo    = pBlockFree->begin;
1925             blockHi    = pBlockFree->end;
1926             size       = blockHi - blockLo + 1;
1927 
1928             _heapBlacklistChunks(pGpu, pHeap, &pHeap->blackList, blockLo, size);
1929 
1930         } while (pBlockFree != pBlockFirstFree);
1931     }
1932 }
1933 
_heapBlockFree(OBJGPU * pGpu,Heap * pHeap,NvHandle hClient,NvHandle hDevice,MEM_BLOCK * pBlock)1934 static NV_STATUS _heapBlockFree
1935 (
1936     OBJGPU      *pGpu,
1937     Heap        *pHeap,
1938     NvHandle     hClient,
1939     NvHandle     hDevice,
1940     MEM_BLOCK   *pBlock
1941 )
1942 {
1943     MEM_BLOCK       *pBlockTmp;
1944     NvU32            i;
1945     OBJOS           *pOS            = GPU_GET_OS(pGpu);
1946     MemoryManager   *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1947     NvBool           bBlocksMerged  = NV_FALSE;
1948 
1949     //
1950     // Check for valid owner.
1951     //
1952     if (pBlock->owner == NVOS32_BLOCK_TYPE_FREE)
1953         return NV_ERR_INVALID_STATE;
1954 
1955     pBlock->owner = NVOS32_BLOCK_TYPE_FREE;
1956 
1957     if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_FREE_STATE_CHANGED))
1958     {
1959         return NV_ERR_INVALID_STATE;
1960     }
1961 
1962     //
1963     // Update free count.
1964     //
1965     _heapAdjustFree(pHeap, pBlock->end - pBlock->begin + 1,
1966         FLD_TEST_DRF(OS32, _ATTR2, _INTERNAL, _YES, pBlock->hwResource.attr2));
1967 
1968     //
1969     // Release any HW resources that might've been in use
1970     //
1971     {
1972         FB_ALLOC_INFO        *pFbAllocInfo       = NULL;
1973         FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
1974 
1975         pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
1976         if (pFbAllocInfo == NULL)
1977         {
1978             NV_ASSERT(0);
1979             return NV_ERR_NO_MEMORY;
1980         }
1981 
1982         pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
1983         if (pFbAllocPageFormat == NULL) {
1984             NV_ASSERT(0);
1985             portMemFree(pFbAllocInfo);
1986             return NV_ERR_NO_MEMORY;
1987         }
1988 
1989         portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
1990         portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
1991         pFbAllocInfo->pageFormat = pFbAllocPageFormat;
1992 
1993         pFbAllocInfo->pageFormat->type = pBlock->u0.type;
1994         pFbAllocInfo->hwResId = pBlock->hwResource.hwResId;
1995         pFbAllocInfo->height = 0;
1996         pFbAllocInfo->pitch = 0;
1997         pFbAllocInfo->size = pBlock->end - pBlock->begin + 1;
1998         pFbAllocInfo->align = pBlock->align;
1999         pFbAllocInfo->alignPad = pBlock->alignPad;
2000         pFbAllocInfo->offset = pBlock->begin;
2001         pFbAllocInfo->format = pBlock->format;
2002         pFbAllocInfo->comprCovg = pBlock->hwResource.comprCovg;
2003         pFbAllocInfo->zcullCovg = 0;
2004         pFbAllocInfo->pageFormat->attr  = pBlock->hwResource.attr;
2005         pFbAllocInfo->pageFormat->attr2 = pBlock->hwResource.attr2;
2006         pFbAllocInfo->ctagOffset = pBlock->hwResource.ctagOffset;
2007         pFbAllocInfo->hClient = hClient;
2008         pFbAllocInfo->hDevice = hDevice;
2009 
2010         memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
2011 
2012         if (FLD_TEST_DRF(OS32, _ATTR2, _INTERNAL, _YES, pFbAllocInfo->pageFormat->attr2))
2013         {
2014             pOS->osInternalReserveFreeCallback(pFbAllocInfo->offset, pGpu->gpuId);
2015         }
2016 
2017         // Clear the HW resource associations since this block can be reused or merged.
2018         portMemSet(&pBlock->hwResource, 0, sizeof(pBlock->hwResource));
2019 
2020         portMemFree(pFbAllocPageFormat);
2021         portMemFree(pFbAllocInfo);
2022     }
2023 
2024     if ((pBlock->u0.type == NVOS32_TYPE_TEXTURE) && (pBlock->textureId != 0))
2025     {
2026         for (i = 0; i < MAX_TEXTURE_CLIENT_IDS; i++)
2027         {
2028             //
2029             // 1. Find the client within the textureData structure
2030             // 2. Once found, set the value to 0
2031             // 3. Then decrement its refCount
2032             // 4. If refCount goes to zero, reset the textureData structure
2033             //    that pertains to that index.
2034             //
2035             if (pHeap->textureData[i].clientId == pBlock->textureId)
2036             {
2037                 pBlock->textureId = 0;
2038                 pHeap->textureData[i].refCount--;
2039                 if (pHeap->textureData[i].refCount == 0)
2040                     portMemSet(&pHeap->textureData[i], 0,
2041                                sizeof(TEX_INFO));
2042                 break;
2043             }
2044         }
2045     }
2046 
2047     // Account for freeing any reserved RM region
2048     if ((pBlock->u0.type == NVOS32_TYPE_RESERVED) && (pBlock->owner == HEAP_OWNER_RM_RESERVED_REGION))
2049     {
2050         NV_ASSERT(pHeap->reserved >= pBlock->end - pBlock->begin + 1);
2051         pHeap->reserved -= pBlock->end - pBlock->begin + 1;
2052     }
2053 
2054     //
2055     //
2056     // Can this merge with any surrounding free blocks?
2057     //
2058     if ((pBlock->prev->owner == NVOS32_BLOCK_TYPE_FREE) && (pBlock != pHeap->pBlockList))
2059     {
2060         //
2061         // Remove block to be freed and previous one since nodes will be
2062         // combined into single one.
2063         //
2064         if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_REMOVE))
2065         {
2066             return NV_ERR_INVALID_STATE;
2067         }
2068         if (NV_OK != _heapUpdate(pHeap, pBlock->prev, BLOCK_REMOVE))
2069         {
2070             return NV_ERR_INVALID_STATE;
2071         }
2072 
2073         //
2074         // Merge with previous block.
2075         //
2076         pBlock->prev->next = pBlock->next;
2077         pBlock->next->prev = pBlock->prev;
2078         pBlock->prev->end  = pBlock->end;
2079         pBlockTmp = pBlock;
2080         pBlock    = pBlock->prev;
2081         pHeap->numBlocks--;
2082         portMemFree(pBlockTmp);
2083 
2084         // re-insert updated free block into rb-tree
2085         if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_SIZE_CHANGED))
2086         {
2087             return NV_ERR_INVALID_STATE;
2088         }
2089 
2090         bBlocksMerged = NV_TRUE;
2091     }
2092 
2093     if ((pBlock->next->owner == NVOS32_BLOCK_TYPE_FREE) && (pBlock->next != pHeap->pBlockList))
2094     {
2095         //
2096         // Remove block to be freed and next one since nodes will be
2097         // combined into single one.
2098         //
2099         if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_REMOVE))
2100         {
2101             return NV_ERR_INVALID_STATE;
2102         }
2103         if (NV_OK != _heapUpdate(pHeap, pBlock->next, BLOCK_REMOVE))
2104         {
2105             return NV_ERR_INVALID_STATE;
2106         }
2107 
2108         //
2109         // Merge with next block.
2110         //
2111         pBlock->prev->next    = pBlock->next;
2112         pBlock->next->prev    = pBlock->prev;
2113         pBlock->next->begin   = pBlock->begin;
2114 
2115         if (pHeap->pBlockList == pBlock)
2116             pHeap->pBlockList  = pBlock->next;
2117 
2118         if (bBlocksMerged)
2119         {
2120             if (pHeap->pFreeBlockList == pBlock)
2121                 pHeap->pFreeBlockList  = pBlock->u1.nextFree;
2122 
2123             pBlock->u1.nextFree->u0.prevFree = pBlock->u0.prevFree;
2124             pBlock->u0.prevFree->u1.nextFree = pBlock->u1.nextFree;
2125         }
2126 
2127         pBlockTmp = pBlock;
2128         pBlock    = pBlock->next;
2129         pHeap->numBlocks--;
2130         portMemFree(pBlockTmp);
2131 
2132         // re-insert updated free block into rb-tree
2133         if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_SIZE_CHANGED))
2134         {
2135             return NV_ERR_INVALID_STATE;
2136         }
2137 
2138         bBlocksMerged = NV_TRUE;
2139     }
2140 
2141     if (!bBlocksMerged)
2142     {
2143         //
2144         // Nothing was merged.  Add to free list.
2145         //
2146         pBlockTmp = pHeap->pFreeBlockList;
2147         if (!pBlockTmp)
2148         {
2149             pHeap->pFreeBlockList = pBlock;
2150             pBlock->u1.nextFree      = pBlock;
2151             pBlock->u0.prevFree      = pBlock;
2152         }
2153         else
2154         {
2155             if (pBlockTmp->begin > pBlock->begin)
2156                 //
2157                 // Insert into beginning of free list.
2158                 //
2159                 pHeap->pFreeBlockList = pBlock;
2160             else if (pBlockTmp->u0.prevFree->begin > pBlock->begin)
2161                 //
2162                 // Insert into free list.
2163                 //
2164                 do
2165                 {
2166                     pBlockTmp = pBlockTmp->u1.nextFree;
2167                 } while (pBlockTmp->begin < pBlock->begin);
2168                 /*
2169             else
2170                  * Insert at end of list.
2171                  */
2172             pBlock->u1.nextFree = pBlockTmp;
2173             pBlock->u0.prevFree = pBlockTmp->u0.prevFree;
2174             pBlock->u0.prevFree->u1.nextFree = pBlock;
2175             pBlockTmp->u0.prevFree           = pBlock;
2176         }
2177     }
2178 
2179     pBlock->mhandle = 0x0;
2180     pBlock->align   = pBlock->begin;
2181     pBlock->alignPad = 0;
2182     pBlock->format  = 0;
2183 
2184     HEAP_VALIDATE(pHeap);
2185     return (NV_OK);
2186 }
2187 
heapReference_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU32 owner,MEMORY_DESCRIPTOR * pMemDesc)2188 NV_STATUS heapReference_IMPL
2189 (
2190     OBJGPU             *pGpu,
2191     Heap               *pHeap,
2192     NvU32                owner,
2193     MEMORY_DESCRIPTOR  *pMemDesc
2194 )
2195 {
2196     NvU64       offsetAlign = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
2197     MEM_BLOCK  *pBlock;
2198 
2199     // Bail out in case allocation is in PMA owned FB region.
2200     if (pMemDesc->pPmaAllocInfo)
2201     {
2202         if (0 != pMemDesc->pPmaAllocInfo->refCount)
2203         {
2204             pMemDesc->pPmaAllocInfo->refCount++;
2205             if (IsSLIEnabled(pGpu) &&
2206                 (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM))
2207             {                        //
2208                 memdescAddRef(pMemDesc); // Otherwise we have a fake parent descriptor removed with existing submem descriptors.
2209                                      // In SLI only (not fully understood yet!). In non SLI, that memAddref() causes a memleak.
2210                                      //
2211             }
2212         }
2213         return NV_OK;
2214     }
2215 
2216     if (owner == NVOS32_BLOCK_TYPE_FREE)
2217         return NV_ERR_INVALID_STATE;
2218 
2219     pBlock = _heapFindAlignedBlockWithOwner(pGpu, pHeap, owner, offsetAlign);
2220 
2221     if (!pBlock)
2222         return NV_ERR_INVALID_OFFSET;
2223 
2224     if (pBlock->refCount == HEAP_MAX_REF_COUNT)
2225     {
2226         NV_PRINTF(LEVEL_ERROR,
2227                   "heapReference: reference count %x will exceed maximum 0x%x:\n",
2228                   pBlock->refCount, HEAP_MAX_REF_COUNT);
2229         return NV_ERR_GENERIC;
2230     }
2231 
2232     pBlock->refCount++;
2233     if (IsSLIEnabled(pGpu) &&
2234         (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM))
2235     {                        //
2236         memdescAddRef(pMemDesc); // Otherwise we have a fake parent descriptor removed with existing submem descriptors.
2237                              // In SLI only (not fully understood yet!). In non SLI, that memAddref() causes a memleak.
2238                              //
2239     }
2240     return NV_OK;
2241 }
2242 
2243 static NV_STATUS
_heapFindBlockByOffset(OBJGPU * pGpu,Heap * pHeap,NvU32 owner,MEMORY_DESCRIPTOR * pMemDesc,NvU64 offset,MEM_BLOCK ** ppBlock)2244 _heapFindBlockByOffset
2245 (
2246     OBJGPU             *pGpu,
2247     Heap               *pHeap,
2248     NvU32               owner,
2249     MEMORY_DESCRIPTOR  *pMemDesc,
2250     NvU64               offset,
2251     MEM_BLOCK         **ppBlock
2252 )
2253 {
2254     NV_STATUS status;
2255 
2256     // IRQL TEST: must be running at equivalent of passive-level
2257     IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL());
2258 
2259     *ppBlock = _heapFindAlignedBlockWithOwner(pGpu, pHeap, owner,
2260                                               offset);
2261 
2262     if (!*ppBlock)
2263     {
2264         // Try finding block based solely on offset.  This is primarily needed
2265         // to successfully locate a block that was allocated multiple times via
2266         // NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE in heapAlloc:  there can
2267         // be multiple owners, so that _heapFindAlignedBlockWithOwner may fail.
2268         if ((status = heapGetBlock(pHeap, offset, ppBlock)) != NV_OK
2269                 || !*ppBlock)
2270             return NV_ERR_INVALID_OFFSET;
2271     }
2272 
2273     return NV_OK;
2274 }
2275 
2276 NV_STATUS
heapFree_IMPL(OBJGPU * pGpu,Heap * pHeap,NvHandle hClient,NvHandle hDevice,NvU32 owner,MEMORY_DESCRIPTOR * pMemDesc)2277 heapFree_IMPL
2278 (
2279     OBJGPU             *pGpu,
2280     Heap               *pHeap,
2281     NvHandle            hClient,
2282     NvHandle            hDevice,
2283     NvU32               owner,
2284     MEMORY_DESCRIPTOR  *pMemDesc
2285 )
2286 {
2287     NV_STATUS   status;
2288     MEM_BLOCK  *pBlock;
2289     MEM_BLOCK  *pNextBlock;
2290     NvU64       offsetAlign       = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
2291     NvU64       allocBegin        = 0;
2292     NvU64       allocEnd          = 0;
2293     NvBool      bTurnBlacklistOff = NV_FALSE;
2294     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2295 
2296     NV_ASSERT_OR_RETURN(pMemDesc->pHeap == pHeap, NV_ERR_INVALID_ARGUMENT);
2297 
2298     if (memdescGetContiguity(pMemDesc, AT_GPU))
2299     {
2300         status = _heapFindBlockByOffset(pGpu, pHeap,
2301                                         owner, pMemDesc, offsetAlign,
2302                                         &pBlock);
2303         if (NV_OK != status)
2304         {
2305             return status;
2306         }
2307 
2308         if (pBlock->allocedMemDesc)
2309         {
2310             if (pMemDesc != pBlock->pMemDesc)
2311             {
2312                 NV_ASSERT(pMemDesc == pBlock->pMemDesc);
2313                 return NV_ERR_INVALID_ARGUMENT;
2314             }
2315 
2316             // Clear only if the memdesc is about to be freed by memdescDestroy()
2317             if (pMemDesc->RefCount == 1)
2318             {
2319                 pBlock->pMemDesc = NULL;
2320             }
2321 
2322             memdescFree(pMemDesc);
2323             memdescDestroy(pMemDesc);
2324         }
2325 
2326         if (--pBlock->refCount != 0)
2327             return NV_OK;
2328 
2329 
2330         if(pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) &&
2331             gpuCheckPageRetirementSupport_HAL(pGpu))
2332         {
2333             if (FLD_TEST_DRF(OS32, _ATTR2, _BLACKLIST, _OFF, pBlock->hwResource.attr2))
2334             {
2335                     bTurnBlacklistOff = NV_TRUE;
2336                     allocBegin        = pBlock->begin;
2337                     allocEnd          = pBlock->end;
2338             }
2339         }
2340 
2341         //
2342         // Since _heapBlockFree() unconditionally releases HW resources
2343         // such as compression tags, some memory descriptor fields
2344         // are rendered stale.  These fields need to be reset to safer
2345         // default values (e.g. invalid HW resource ID, pitch PTE
2346         // kind, etc.) - they may be referenced again before the memory
2347         // descriptor itself is freed.
2348         //
2349         if (pBlock->allocedMemDesc && (pBlock->pMemDesc != NULL))
2350         {
2351             memdescSetHwResId(pMemDesc, 0);
2352             // XXX We cannot reset the PTE kind here since it cause corruption
2353             // in RAGE. See bug 949059
2354             //
2355             // This is an ugly hack to help OGL recover from modeswitch.
2356             // A cleaner fix would be to change the way memory is managed in OGL,
2357             // but it doesn't worth the effort to fix that on XP, since the OS is
2358             // close to end of life. The OGL linux team have plan to change their
2359             // memory management in the future, so later this hack may not be
2360             // required anymore
2361             // pMemDesc->PteKind = 0;
2362         }
2363 
2364         if ((status = _heapBlockFree(pGpu, pHeap, hClient, hDevice, pBlock)) != NV_OK)
2365         {
2366             NV_ASSERT(0);
2367         }
2368 
2369         //
2370         // since the mem desc  is freed, now we can reallocate the blacklisted pages
2371         // in the [allocBegin, allocEnd]
2372         //
2373         if (bTurnBlacklistOff)
2374             status = _heapBlacklistChunks(pGpu, pHeap, &pHeap->blackList, allocBegin, allocEnd-allocBegin+1);
2375 
2376         if (pMemoryManager->bEnableDynamicPageOfflining)
2377         {
2378             NvU32            i = 0;
2379             BLACKLIST       *pBlacklist       = &pHeap->blackList;
2380             BLACKLIST_CHUNK *pBlacklistChunks = pBlacklist->pBlacklistChunks;
2381 
2382             for (i = 0; i < pBlacklist->count; i++)
2383             {
2384                 if (pBlacklistChunks[i].bPendingRetirement &&
2385                    (pBlacklistChunks[i].physOffset >= allocBegin &&
2386                    pBlacklistChunks[i].physOffset <= allocEnd))
2387                 {
2388                     status = _heapBlacklistSingleChunk(pGpu, pHeap, &pBlacklist->pBlacklistChunks[i]);
2389                     if (NV_OK != status)
2390                     {
2391                         NV_PRINTF(LEVEL_ERROR, "heapBlacklistSingleChunk, status: %x!\n", status);
2392                         return status;
2393                     }
2394                 }
2395             }
2396         }
2397         return status;
2398     }
2399     else
2400     {
2401         NvBool bBlacklistFailed = NV_FALSE;
2402         //
2403         // Use the pMemDesc->PteArray[0] to find the first block
2404         // The remaining blocks can be found from each block's
2405         // noncontigAllocListNext pointer
2406         //
2407         status = _heapFindBlockByOffset(pGpu, pHeap,
2408                 owner, pMemDesc,
2409                 memdescGetPte(pMemDesc, AT_GPU, 0), &pBlock);
2410 
2411         if (NV_OK != status)
2412         {
2413             return status;
2414         }
2415 
2416         while (pBlock != NULL)
2417         {
2418             // _heapBlockFree() clears pBlock, so save the next pointer
2419             pNextBlock = pBlock->noncontigAllocListNext;
2420 
2421             if (--pBlock->refCount != 0)
2422             {
2423                 // Remove this block from the noncontig allocation list
2424                 pBlock->noncontigAllocListNext = NULL;
2425                 pBlock = pNextBlock;
2426                 continue;
2427             }
2428 
2429             if (NV_OK != (status = _heapBlockFree(pGpu, pHeap, hClient, hDevice, pBlock)))
2430                 return status;
2431 
2432             // check if we need to dynamically blacklist the page
2433             if (pMemoryManager->bEnableDynamicPageOfflining)
2434             {
2435                 NvU32            i = 0;
2436                 BLACKLIST       *pBlacklist       = &pHeap->blackList;
2437                 BLACKLIST_CHUNK *pBlacklistChunks = pBlacklist->pBlacklistChunks;
2438                 for (i = 0; i < pBlacklist->count; i++)
2439                 {
2440                     if (pBlacklistChunks[i].bPendingRetirement &&
2441                     (pBlacklistChunks[i].physOffset >= pBlock->begin &&
2442                     pBlacklistChunks[i].physOffset <= pBlock->end))
2443                     {
2444                         status = _heapBlacklistSingleChunk(pGpu, pHeap, &pBlacklist->pBlacklistChunks[i]);
2445                         if (NV_OK != status)
2446                         {
2447                             NV_PRINTF(LEVEL_ERROR, "heapBlacklistSingleChunk, status: %x!\n", status);
2448                             bBlacklistFailed = NV_TRUE;
2449                         }
2450                     }
2451                 }
2452             }
2453             pBlock = pNextBlock;
2454         }
2455 
2456         memdescFree(pMemDesc);
2457         memdescDestroy(pMemDesc);
2458 
2459         if (bBlacklistFailed)
2460         {
2461             return NV_ERR_INVALID_STATE;
2462         }
2463         else
2464         {
2465             return status;
2466         }
2467     }
2468 }
2469 
heapGetBlock_IMPL(Heap * pHeap,NvU64 offset,MEM_BLOCK ** ppMemBlock)2470 NV_STATUS heapGetBlock_IMPL
2471 (
2472     Heap       *pHeap,
2473     NvU64       offset,
2474     MEM_BLOCK **ppMemBlock
2475 )
2476 {
2477     NODE *pNode;
2478 
2479     if (btreeSearch(offset, &pNode, pHeap->pBlockTree) != NV_OK)
2480     {
2481         if (ppMemBlock)
2482         {
2483             *ppMemBlock = NULL;
2484         }
2485         return NV_ERR_GENERIC;
2486     }
2487 
2488     if (ppMemBlock)
2489     {
2490         *ppMemBlock = (MEM_BLOCK *)pNode->Data;
2491     }
2492 
2493     return NV_OK;
2494 }
2495 
_heapFindAlignedBlockWithOwner(OBJGPU * pGpu,Heap * pHeap,NvU32 owner,NvU64 offset)2496 static MEM_BLOCK *_heapFindAlignedBlockWithOwner
2497 (
2498     OBJGPU   *pGpu,
2499     Heap     *pHeap,
2500     NvU32     owner,
2501     NvU64     offset // aligned
2502 )
2503 {
2504     MEM_BLOCK  *pBlock;
2505     NODE       *pNode;
2506 
2507     HEAP_VALIDATE(pHeap);
2508 
2509     if (btreeSearch(offset, &pNode, pHeap->pBlockTree) != NV_OK)
2510     {
2511         return NULL;
2512     }
2513 
2514     pBlock = (MEM_BLOCK *)pNode->Data;
2515     if (pBlock->owner != owner)
2516     {
2517         return NULL;
2518     }
2519 
2520     return pBlock;
2521 }
2522 
heapGetSize_IMPL(Heap * pHeap,NvU64 * size)2523 NV_STATUS heapGetSize_IMPL
2524 (
2525     Heap  *pHeap,
2526     NvU64 *size
2527 )
2528 {
2529     *size = pHeap->total;
2530     HEAP_VALIDATE(pHeap);
2531     return (NV_OK);
2532 }
2533 
heapGetUsableSize_IMPL(Heap * pHeap,NvU64 * usableSize)2534 NV_STATUS heapGetUsableSize_IMPL
2535 (
2536     Heap  *pHeap,
2537     NvU64 *usableSize
2538 )
2539 {
2540     *usableSize = pHeap->total - pHeap->reserved;
2541     HEAP_VALIDATE(pHeap);
2542     return (NV_OK);
2543 }
2544 
heapGetFree_IMPL(Heap * pHeap,NvU64 * free)2545 NV_STATUS heapGetFree_IMPL
2546 (
2547     Heap  *pHeap,
2548     NvU64 *free
2549 )
2550 {
2551     *free = pHeap->free;
2552     HEAP_VALIDATE(pHeap);
2553     return (NV_OK);
2554 }
2555 
heapGetBase_IMPL(Heap * pHeap,NvU64 * base)2556 NV_STATUS heapGetBase_IMPL
2557 (
2558     Heap  *pHeap,
2559     NvU64 *base
2560 )
2561 {
2562     *base = pHeap->base;
2563     HEAP_VALIDATE(pHeap);
2564     return (NV_OK);
2565 }
2566 
_heapGetMaxFree(Heap * pHeap,NvU64 * maxOffset,NvU64 * maxFree)2567 static NV_STATUS _heapGetMaxFree
2568 (
2569     Heap  *pHeap,
2570     NvU64 *maxOffset,
2571     NvU64 *maxFree
2572 )
2573 {
2574     MEM_BLOCK  *pBlockFirstFree, *pBlockFree;
2575     NvU64       freeBlockSize;
2576 
2577     *maxFree = 0;
2578 
2579     pBlockFirstFree = pHeap->pFreeBlockList;
2580     if (!pBlockFirstFree)
2581         // There are no free blocks. Max free is already set to 0
2582         return (NV_OK);
2583 
2584     // Walk the free block list.
2585     pBlockFree = pBlockFirstFree;
2586     do {
2587         freeBlockSize = pBlockFree->end - pBlockFree->begin + 1;
2588         if (freeBlockSize > *maxFree)
2589         {
2590             *maxOffset = pBlockFree->begin;
2591             *maxFree = freeBlockSize;
2592         }
2593         pBlockFree = pBlockFree->u1.nextFree;
2594     } while (pBlockFree != pBlockFirstFree);
2595 
2596     return (NV_OK);
2597 }
2598 
heapInfo_IMPL(Heap * pHeap,NvU64 * bytesFree,NvU64 * bytesTotal,NvU64 * base,NvU64 * largestOffset,NvU64 * largestFree)2599 NV_STATUS heapInfo_IMPL
2600 (
2601     Heap  *pHeap,
2602     NvU64 *bytesFree,
2603     NvU64 *bytesTotal,
2604     NvU64 *base,
2605     NvU64 *largestOffset,      // largest free blocks offset
2606     NvU64 *largestFree         // largest free blocks size
2607 )
2608 {
2609     NV_STATUS status;
2610 
2611     *bytesFree  = pHeap->free;
2612     *bytesTotal = pHeap->total - pHeap->reserved;
2613     *base  = pHeap->base;
2614     status = _heapGetMaxFree(pHeap, largestOffset, largestFree);
2615     HEAP_VALIDATE(pHeap);
2616 
2617     return status;
2618 }
2619 
heapInfoTypeAllocBlocks_IMPL(Heap * pHeap,NvU32 type,NvU64 * bytesTotal)2620 NV_STATUS heapInfoTypeAllocBlocks_IMPL
2621 (
2622     Heap   *pHeap,
2623     NvU32   type,
2624     NvU64  *bytesTotal
2625 )
2626 {
2627     MEM_BLOCK  *pBlock;
2628     NvU64       total;
2629 
2630     if (type >= NVOS32_NUM_MEM_TYPES) return (NV_ERR_GENERIC);
2631 
2632     pBlock = pHeap->pBlockList;
2633     total = 0;
2634 
2635     if (type == NVOS32_TYPE_OWNER_RM)
2636     {
2637         //
2638         // Scan for all the blocks whose owner is within
2639         // HEAP_OWNER_RM_SCRATCH_BEGIN and HEAP_OWNER_RM_SCRATCH_END
2640         // this is strictly speaking not 'type' search. Also note that this
2641         // includes reserved space in any,.like in case of 3FB mixed density mode.
2642         //
2643         do
2644         {
2645             if ( (pBlock->owner > HEAP_OWNER_RM_SCRATCH_BEGIN) &&
2646                  (pBlock->owner < HEAP_OWNER_RM_SCRATCH_END) )
2647             {
2648                 total += (pBlock->end - pBlock->begin + 1);
2649             }
2650             pBlock = pBlock->next;
2651         } while (pBlock != pHeap->pBlockList);
2652     }
2653     else
2654     {
2655         //
2656         // Scan for all the blocks belonging to this type.
2657         //
2658         do
2659         {
2660             if (pBlock->u0.type == type)
2661                 total += (pBlock->end - pBlock->begin + 1);
2662             pBlock = pBlock->next;
2663         } while (pBlock != pHeap->pBlockList);
2664     }
2665 
2666     *bytesTotal = total;
2667 
2668     HEAP_VALIDATE(pHeap);
2669     return NV_OK;
2670 }
2671 
heapGetBlockHandle_IMPL(Heap * pHeap,NvU32 owner,NvU32 type,NvU64 offset,NvBool bSkipCheck,NvHandle * puHandle)2672 NV_STATUS heapGetBlockHandle_IMPL(
2673     Heap       *pHeap,
2674     NvU32       owner,
2675     NvU32       type,
2676     NvU64       offset,
2677     NvBool      bSkipCheck,     // NV_TRUE if skip alignment/type check
2678     NvHandle   *puHandle
2679 )
2680 {
2681     MEM_BLOCK *pBlock;
2682     NV_STATUS status;
2683 
2684     if (offset > (pHeap->base + pHeap->total - 1)) return (NV_ERR_GENERIC);
2685 
2686     status = heapGetBlock(pHeap, offset, &pBlock);
2687     if (status != NV_OK)
2688     {
2689         return status;
2690     }
2691 
2692     if (!((pBlock->owner == owner) &&
2693           (((pBlock->u0.type == type) && (pBlock->align == offset)) || bSkipCheck)))
2694     {
2695         return NV_ERR_GENERIC;
2696     }
2697 
2698     *puHandle = pBlock->mhandle;
2699     return NV_OK;
2700 }
2701 
2702 //
2703 // Returns the number of blocks (free or allocated) currently in the heap
2704 //
heapGetNumBlocks_IMPL(Heap * pHeap)2705 NvU32 heapGetNumBlocks_IMPL
2706 (
2707     Heap *pHeap
2708 )
2709 {
2710     return pHeap->numBlocks;
2711 }
2712 
2713 //
2714 // Copies over block information for each block in the heap into the provided buffer
2715 //
heapGetBlockInfo_IMPL(Heap * pHeap,NvU32 size,NVOS32_HEAP_DUMP_BLOCK * pBlockBuffer)2716 NV_STATUS heapGetBlockInfo_IMPL
2717 (
2718     Heap                   *pHeap,
2719     NvU32                   size,
2720     NVOS32_HEAP_DUMP_BLOCK *pBlockBuffer
2721 )
2722 {
2723     MEM_BLOCK *pBlock;
2724     NvU32                   heapSize, i;
2725     NV_STATUS               rmStatus = NV_OK;
2726 
2727     // ensure buffer is the same size
2728     heapSize = heapGetNumBlocks(pHeap);
2729     NV_ASSERT_OR_RETURN(heapSize == size, NV_ERR_INVALID_ARGUMENT);
2730 
2731     pBlock = pHeap->pBlockList;
2732     for (i=0; i<heapSize; i++)
2733     {
2734         pBlockBuffer->begin = pBlock->begin;
2735         pBlockBuffer->align = pBlock->align;
2736         pBlockBuffer->end = pBlock->end;
2737         pBlockBuffer->owner = pBlock->owner;
2738         pBlockBuffer->format = pBlock->format;
2739         pBlock = pBlock->next;
2740         pBlockBuffer++;
2741     }
2742 
2743     return rmStatus;
2744 }
2745 
heapAllocHint_IMPL(OBJGPU * pGpu,Heap * pHeap,NvHandle hClient,NvHandle hDevice,HEAP_ALLOC_HINT_PARAMS * pAllocHint)2746 NV_STATUS heapAllocHint_IMPL
2747 (
2748     OBJGPU                 *pGpu,
2749     Heap                   *pHeap,
2750     NvHandle                hClient,
2751     NvHandle                hDevice,
2752     HEAP_ALLOC_HINT_PARAMS *pAllocHint
2753 )
2754 {
2755     MemoryManager          *pMemoryManager      = GPU_GET_MEMORY_MANAGER(pGpu);
2756     NvU64                   alignment;
2757     NV_STATUS               status;
2758     NvBool                  ignoreBankPlacement;
2759     NvU32                   textureClientIndex  = 0xFFFFFFFF;
2760     NvU32                   bankPlacement       = 0;
2761     NvU8                    currentBankInfo     = 0;
2762     FB_ALLOC_INFO          *pFbAllocInfo        = NULL;
2763     FB_ALLOC_PAGE_FORMAT   *pFbAllocPageFormat  = NULL;
2764     NvU64                   pageSize            = 0;
2765     NvU32                   flags;
2766     NvU32                   owner;
2767 
2768     // Check for valid size.
2769     NV_ASSERT_OR_RETURN((pAllocHint->pSize != NULL), NV_ERR_INVALID_ARGUMENT);
2770 
2771     // Ensure a valid allocation type was passed in
2772     NV_ASSERT_OR_RETURN((pAllocHint->type < NVOS32_NUM_MEM_TYPES), NV_ERR_INVALID_ARGUMENT);
2773 
2774     // As we will dereference these two later, we should not allow NULL value.
2775     NV_ASSERT_OR_RETURN(((pAllocHint->pHeight != NULL) && (pAllocHint->pAttr != NULL)), NV_ERR_INVALID_ARGUMENT);
2776 
2777     owner = 0x0;
2778     status = _heapGetBankPlacement(pGpu, pHeap, owner,
2779                                    &pAllocHint->flags, pAllocHint->type,
2780                                    0x0, &bankPlacement);
2781     if (status != NV_OK)
2782     {
2783         NV_PRINTF(LEVEL_ERROR,
2784                   "_heapGetBankPlacement failed for current allocation\n");
2785         goto exit;
2786     }
2787 
2788     pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
2789     if (pFbAllocInfo == NULL)
2790     {
2791         NV_ASSERT(0);
2792         status = NV_ERR_NO_MEMORY;
2793         goto exit;
2794     }
2795 
2796     pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
2797     if (pFbAllocPageFormat == NULL) {
2798         NV_ASSERT(0);
2799         status = NV_ERR_NO_MEMORY;
2800         goto exit;
2801     }
2802 
2803     portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
2804     portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
2805     pFbAllocInfo->pageFormat = pFbAllocPageFormat;
2806 
2807     pFbAllocInfo->pageFormat->type  = pAllocHint->type;
2808     pFbAllocInfo->hwResId       = 0;
2809     pFbAllocInfo->pad           = 0;
2810     pFbAllocInfo->height        = *pAllocHint->pHeight;
2811     pFbAllocInfo->width         = *pAllocHint->pWidth;
2812     pFbAllocInfo->pitch         = (pAllocHint->pPitch) ? (*pAllocHint->pPitch) : 0;
2813     pFbAllocInfo->size          = *pAllocHint->pSize;
2814     pFbAllocInfo->pageFormat->kind  = 0;
2815     pFbAllocInfo->offset        = ~0;
2816     pFbAllocInfo->hClient       = hClient;
2817     pFbAllocInfo->hDevice       = hDevice;
2818     pFbAllocInfo->pageFormat->flags = pAllocHint->flags;
2819     pFbAllocInfo->pageFormat->attr  = *pAllocHint->pAttr;
2820     pFbAllocInfo->retAttr       = *pAllocHint->pAttr;
2821     pFbAllocInfo->pageFormat->attr2 = *pAllocHint->pAttr2;
2822     pFbAllocInfo->retAttr2      = *pAllocHint->pAttr2;
2823     pFbAllocInfo->format        = pAllocHint->format;
2824 
2825     if ((pAllocHint->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) ||
2826         (pAllocHint->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
2827         pFbAllocInfo->align = *pAllocHint->pAlignment;
2828     else
2829         pFbAllocInfo->align = RM_PAGE_SIZE;
2830 
2831     // Fetch RM page size
2832     pageSize = memmgrDeterminePageSize(pMemoryManager, pFbAllocInfo->hClient, pFbAllocInfo->size,
2833                                        pFbAllocInfo->format, pFbAllocInfo->pageFormat->flags,
2834                                        &pFbAllocInfo->retAttr, &pFbAllocInfo->retAttr2);
2835     if (pageSize == 0)
2836     {
2837         status = NV_ERR_INVALID_STATE;
2838         NV_PRINTF(LEVEL_ERROR, "memmgrDeterminePageSize failed, status: 0x%x\n", status);
2839         goto exit;
2840     }
2841 
2842     // Fetch memory alignment
2843     status = memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, &pFbAllocInfo->size, &pFbAllocInfo->align,
2844                                                pFbAllocInfo->alignPad, pFbAllocInfo->pageFormat->flags,
2845                                                pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2, 0);
2846     if (status != NV_OK)
2847     {
2848         NV_PRINTF(LEVEL_ERROR, "memmgrAllocDetermineAlignment failed, status: 0x%x\n", status);
2849         goto exit;
2850     }
2851 
2852     //
2853     // Call into HAL to reserve any hardware resources for
2854     // the specified memory type.
2855     // If the alignment was changed due to a HW limitation, and the
2856     // flag NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE is set, bad_argument
2857     // will be passed back from the HAL
2858     //
2859     flags = pFbAllocInfo->pageFormat->flags;
2860     pFbAllocInfo->pageFormat->flags |= NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC;
2861     status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo);
2862     pFbAllocInfo->pageFormat->flags = flags;
2863     *pAllocHint->pAttr  = pFbAllocInfo->retAttr;
2864     *pAllocHint->pAttr2 = pFbAllocInfo->retAttr2;
2865     *pAllocHint->pKind  = pFbAllocInfo->pageFormat->kind;
2866 
2867     // Save retAttr as Possible Attributes that have passed error checking and
2868     // clear retAttr because we have not allocated them yet
2869     pFbAllocInfo->possAttr = pFbAllocInfo->retAttr;
2870     // pFbAllocInfo->possAttr2 = pFbAllocInfo->retAttr2;
2871     pFbAllocInfo->retAttr = 0x0;
2872     pFbAllocInfo->retAttr2 = 0x0;
2873     if (status != NV_OK)
2874     {
2875         //
2876         // probably means we passed in a bogus type or no tiling resources available
2877         // when tiled memory attribute was set to REQUIRED
2878         //
2879         NV_PRINTF(LEVEL_ERROR, "memmgrAllocHwResources failed, status: 0x%x\n",
2880                   status);
2881         goto exit;
2882     }
2883 
2884     //
2885     // Refresh search parameters.
2886     //
2887     if ((DRF_VAL(OS32, _ATTR, _FORMAT, *pAllocHint->pAttr) != NVOS32_ATTR_FORMAT_BLOCK_LINEAR))
2888     {
2889         *pAllocHint->pHeight     = pFbAllocInfo->height;
2890         if (pAllocHint->pPitch)
2891             *pAllocHint->pPitch  = pFbAllocInfo->pitch;
2892     }
2893 
2894     //
2895     // The heap allocator has assumed required alignments are powers of 2
2896     // (aligning FB offsets has been done using bit masks).
2897     //
2898     //
2899     *pAllocHint->pAlignment = pFbAllocInfo->align + 1;      // convert mask to size
2900     alignment = pFbAllocInfo->align + 1;
2901 
2902     //
2903     // Allow caller to request host page alignment to make it easier
2904     // to move things around with host os VM subsystem
2905     //
2906 
2907     if (pAllocHint->flags & NVOS32_ALLOC_FLAGS_FORCE_ALIGN_HOST_PAGE)
2908     {
2909         OBJSYS *pSys = SYS_GET_INSTANCE();
2910         NvU64   hostPageSize = pSys->cpuInfo.hostPageSize;
2911 
2912         // hostPageSize *should* always be set, but....
2913         if (hostPageSize == 0)
2914             hostPageSize = RM_PAGE_SIZE;
2915 
2916         alignment = memUtilsLeastCommonAlignment(alignment, hostPageSize);
2917     }
2918 
2919     if (memmgrAllocGetAddrSpace(pMemoryManager, pAllocHint->flags, *pAllocHint->pAttr) == ADDR_FBMEM)
2920     {
2921         if (alignment >= pHeap->total)
2922         {
2923             status = NV_ERR_INVALID_ARGUMENT;
2924             NV_PRINTF(LEVEL_ERROR, "heapAllocHint failed due to alignmend >= pHeap->total\n");
2925             goto exit;
2926         }
2927     }
2928 
2929     //
2930     // Check if NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT was passed in with
2931     // the type to ignore placing this allocation in a particular bank.
2932     // This means we default to the second loop where we choose first fit.
2933     //
2934     ignoreBankPlacement = NV_FALSE;
2935     if (pAllocHint->flags & NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT)
2936         ignoreBankPlacement = NV_TRUE;
2937 
2938     if ((pAllocHint->type == NVOS32_TYPE_TEXTURE) && (!pAllocHint->flags))
2939         _heapSetTexturePlacement(pHeap, pAllocHint->client, pAllocHint->type, &ignoreBankPlacement, &textureClientIndex, &currentBankInfo);
2940 
2941     pAllocHint->bankPlacement = bankPlacement;
2942     pAllocHint->ignoreBankPlacement = ignoreBankPlacement;
2943 
2944     *pAllocHint->pHeight = pFbAllocInfo->height;
2945     pAllocHint->pad = pFbAllocInfo->pad;
2946 
2947     *pAllocHint->pSize = pFbAllocInfo->size;           // returned to caller
2948 
2949     pAllocHint->alignAdjust = 0;
2950 
2951 exit:
2952     portMemFree(pFbAllocPageFormat);
2953     portMemFree(pFbAllocInfo);
2954 
2955     return status;
2956 }
2957 
heapHwAlloc_IMPL(OBJGPU * pGpu,Heap * pHeap,NvHandle hClient,NvHandle hDevice,NvHandle hMemory,MEMORY_HW_RESOURCES_ALLOCATION_REQUEST * pHwAlloc,NvU32 * pAttr,NvU32 * pAttr2)2958 NV_STATUS heapHwAlloc_IMPL
2959 (
2960     OBJGPU         *pGpu,
2961     Heap           *pHeap,
2962     NvHandle        hClient,
2963     NvHandle        hDevice,
2964     NvHandle        hMemory,
2965     MEMORY_HW_RESOURCES_ALLOCATION_REQUEST *pHwAlloc,
2966     NvU32           *pAttr,
2967     NvU32           *pAttr2
2968 )
2969 {
2970     MemoryManager          *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2971     NV_STATUS               status = NV_OK;
2972     FB_ALLOC_INFO          *pFbAllocInfo = NULL;
2973     FB_ALLOC_PAGE_FORMAT   *pFbAllocPageFormat = NULL;
2974     NvU64                   pageSize = 0;
2975     NV_MEMORY_HW_RESOURCES_ALLOCATION_PARAMS *pUserParams = pHwAlloc->pUserParams;
2976 
2977     // Ensure a valid allocation type was passed in
2978     if (pUserParams->type > NVOS32_NUM_MEM_TYPES - 1)
2979         return NV_ERR_GENERIC;
2980 
2981     pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
2982     if (NULL == pFbAllocInfo)
2983     {
2984         NV_PRINTF(LEVEL_ERROR, "No memory for Resource %p\n",
2985                   pHwAlloc->pHandle);
2986         status = NV_ERR_GENERIC;
2987         goto failed;
2988     }
2989     pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
2990     if (NULL == pFbAllocPageFormat)
2991     {
2992         NV_PRINTF(LEVEL_ERROR, "No memory for Resource %p\n",
2993                   pHwAlloc->pHandle);
2994         status = NV_ERR_GENERIC;
2995         goto failed;
2996     }
2997 
2998     portMemSet(pFbAllocInfo, 0x0, sizeof(FB_ALLOC_INFO));
2999     portMemSet(pFbAllocPageFormat, 0x0, sizeof(FB_ALLOC_PAGE_FORMAT));
3000     pFbAllocInfo->pageFormat        = pFbAllocPageFormat;
3001     pFbAllocInfo->pageFormat->type  = pUserParams->type;
3002     pFbAllocInfo->hwResId       = 0;
3003     pFbAllocInfo->pad           = 0;
3004     pFbAllocInfo->height        = pUserParams->height;
3005     pFbAllocInfo->width         = pUserParams->width;
3006     pFbAllocInfo->pitch         = pUserParams->pitch;
3007     pFbAllocInfo->size          = pUserParams->size;
3008     pFbAllocInfo->origSize      = pUserParams->size;
3009     pFbAllocInfo->pageFormat->kind  = pUserParams->kind;
3010     pFbAllocInfo->offset        = memmgrGetInvalidOffset_HAL(pGpu, pMemoryManager);
3011     pFbAllocInfo->hClient       = hClient;
3012     pFbAllocInfo->hDevice       = hDevice;
3013     pFbAllocInfo->pageFormat->flags = pUserParams->flags;
3014     pFbAllocInfo->pageFormat->attr  = pUserParams->attr;
3015     pFbAllocInfo->pageFormat->attr2 = pUserParams->attr2;
3016     pFbAllocInfo->retAttr       = pUserParams->attr;
3017     pFbAllocInfo->retAttr2      = pUserParams->attr2;
3018     pFbAllocInfo->comprCovg     = pUserParams->comprCovg;
3019     pFbAllocInfo->zcullCovg     = 0;
3020     pFbAllocInfo->internalflags = 0;
3021 
3022     if ((pUserParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) ||
3023         (pUserParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
3024         pFbAllocInfo->align = pUserParams->alignment;
3025     else
3026         pFbAllocInfo->align = RM_PAGE_SIZE;
3027 
3028     // Fetch RM page size
3029     pageSize = memmgrDeterminePageSize(pMemoryManager, pFbAllocInfo->hClient, pFbAllocInfo->size,
3030                                        pFbAllocInfo->format, pFbAllocInfo->pageFormat->flags,
3031                                        &pFbAllocInfo->retAttr, &pFbAllocInfo->retAttr2);
3032     if (pageSize == 0)
3033     {
3034         status = NV_ERR_INVALID_STATE;
3035         NV_PRINTF(LEVEL_ERROR, "memmgrDeterminePageSize failed\n");
3036     }
3037 
3038     // Fetch memory alignment
3039     status = memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, &pFbAllocInfo->size, &pFbAllocInfo->align,
3040                                                pFbAllocInfo->alignPad, pFbAllocInfo->pageFormat->flags,
3041                                                pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2, 0);
3042     if (status != NV_OK)
3043     {
3044         NV_PRINTF(LEVEL_ERROR, "memmgrAllocDetermineAlignment failed\n");
3045     }
3046 
3047     //
3048     // vGPU:
3049     //
3050     // Since vGPU does all real hardware management in the
3051     // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
3052     // do an RPC to the host to do the hardware update.
3053     //
3054     if ((status == NV_OK) && IS_VIRTUAL(pGpu))
3055     {
3056         if (vgpuIsGuestManagedHwAlloc(pGpu) &&
3057             (FLD_TEST_DRF(OS32, _ATTR, _COMPR, _NONE, pFbAllocInfo->pageFormat->attr)))
3058         {
3059             status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo);
3060             pHwAlloc->hwResource.isVgpuHostAllocated = NV_FALSE;
3061             NV_ASSERT(status == NV_OK);
3062         }
3063         else
3064         {
3065             NV_RM_RPC_MANAGE_HW_RESOURCE_ALLOC(pGpu,
3066                                                hClient,
3067                                                hDevice,
3068                                                hMemory,
3069                                                pFbAllocInfo,
3070                                                status);
3071             pHwAlloc->hwResource.isVgpuHostAllocated = NV_TRUE;
3072         }
3073 
3074         pUserParams->uncompressedKind      = pFbAllocInfo->uncompressedKind;
3075         pUserParams->compPageShift         = pFbAllocInfo->compPageShift;
3076         pUserParams->compressedKind        = pFbAllocInfo->compressedKind;
3077         pUserParams->compTagLineMin        = pFbAllocInfo->compTagLineMin;
3078         pUserParams->compPageIndexLo       = pFbAllocInfo->compPageIndexLo;
3079         pUserParams->compPageIndexHi       = pFbAllocInfo->compPageIndexHi;
3080         pUserParams->compTagLineMultiplier = pFbAllocInfo->compTagLineMultiplier;
3081     }
3082     else
3083     {
3084         //
3085         // Call into HAL to reserve any hardware resources for
3086         // the specified memory type.
3087         // If the alignment was changed due to a HW limitation, and the
3088         // flag NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE is set, bad_argument
3089         // will be passed back from the HAL
3090         //
3091         status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo);
3092     }
3093 
3094     // Is status bad or did we request attributes and they failed
3095     if ((status != NV_OK) || ((pUserParams->attr) && (0x0 == pFbAllocInfo->retAttr)))
3096     {
3097         //
3098         // probably means we passed in a bogus type or no tiling resources available
3099         // when tiled memory attribute was set to REQUIRED
3100         //
3101         if (status != NV_OK)
3102         {
3103             NV_PRINTF(LEVEL_ERROR,
3104                       "nvHalFbAlloc failure status = 0x%x Requested Attr 0x%x!\n",
3105                       status, pUserParams->attr);
3106         }
3107         else
3108         {
3109             NV_PRINTF(LEVEL_WARNING,
3110                       "nvHalFbAlloc Out of Resources Requested=%x Returned=%x !\n",
3111                       pUserParams->attr, pFbAllocInfo->retAttr);
3112         }
3113         goto failed;
3114     }
3115 
3116     //
3117     // Refresh search parameters.
3118     //
3119     pUserParams->pitch  = pFbAllocInfo->pitch;
3120 
3121     pUserParams->height = pFbAllocInfo->height;
3122     pHwAlloc->pad = NvU64_LO32(pFbAllocInfo->pad);
3123     pUserParams->kind = pFbAllocInfo->pageFormat->kind;
3124     pHwAlloc->hwResId = pFbAllocInfo->hwResId;
3125 
3126     pUserParams->size = pFbAllocInfo->size;           // returned to caller
3127 
3128     pHwAlloc->hwResource.attr = pFbAllocInfo->retAttr;
3129     pHwAlloc->hwResource.attr2 = pFbAllocInfo->retAttr2;
3130     pHwAlloc->hwResource.comprCovg = pFbAllocInfo->comprCovg;
3131     pHwAlloc->hwResource.ctagOffset = pFbAllocInfo->ctagOffset;
3132     pHwAlloc->hwResource.hwResId = pFbAllocInfo->hwResId;
3133 
3134     *pAttr  = pFbAllocInfo->retAttr;
3135     *pAttr2 = pFbAllocInfo->retAttr2;
3136 
3137 failed:
3138     portMemFree(pFbAllocPageFormat);
3139     portMemFree(pFbAllocInfo);
3140 
3141     return status;
3142 }
3143 
heapHwFree_IMPL(OBJGPU * pGpu,Heap * pHeap,Memory * pMemory,NvU32 flags)3144 void heapHwFree_IMPL
3145 (
3146     OBJGPU   *pGpu,
3147     Heap     *pHeap,
3148     Memory   *pMemory,
3149     NvU32     flags
3150 )
3151 {
3152     MemoryManager        *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
3153     FB_ALLOC_INFO        *pFbAllocInfo       = NULL;
3154     FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
3155 
3156     pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
3157     if (pFbAllocInfo == NULL)
3158     {
3159         NV_ASSERT(0);
3160         goto exit;
3161     }
3162 
3163     pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
3164     if (pFbAllocPageFormat == NULL) {
3165         NV_ASSERT(0);
3166         goto exit;
3167     }
3168 
3169     portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
3170     portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
3171     pFbAllocInfo->pageFormat = pFbAllocPageFormat;
3172 
3173     pFbAllocInfo->pageFormat->type = pMemory->Type;
3174     pFbAllocInfo->pageFormat->attr  = pMemory->pHwResource->attr;
3175     pFbAllocInfo->pageFormat->attr2 = pMemory->pHwResource->attr2;
3176     pFbAllocInfo->hwResId = pMemory->pHwResource->hwResId;
3177     pFbAllocInfo->size = pMemory->Length;
3178     pFbAllocInfo->format = memdescGetPteKind(pMemory->pMemDesc);
3179     pFbAllocInfo->offset = ~0;
3180     pFbAllocInfo->hClient = RES_GET_CLIENT_HANDLE(pMemory);
3181     pFbAllocInfo->hDevice = RES_GET_HANDLE(pMemory->pDevice);
3182 
3183     //
3184     // vGPU:
3185     //
3186     // Since vGPU does all real hardware management in the
3187     // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
3188     // do an RPC to the host to do the hardware update.
3189     //
3190 
3191     if (IS_VIRTUAL(pGpu))
3192     {
3193         if (vgpuIsGuestManagedHwAlloc(pGpu) && !pMemory->pHwResource->isVgpuHostAllocated)
3194         {
3195             memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
3196         }
3197         else
3198         {
3199             NV_STATUS rmStatus = NV_OK;
3200 
3201             NV_RM_RPC_MANAGE_HW_RESOURCE_FREE(pGpu,
3202                     RES_GET_CLIENT_HANDLE(pMemory),
3203                     RES_GET_HANDLE(pMemory->pDevice),
3204                     RES_GET_HANDLE(pMemory),
3205                     flags,
3206                     rmStatus);
3207         }
3208     }
3209     else
3210     {
3211         memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
3212     }
3213 
3214 exit:
3215     portMemFree(pFbAllocPageFormat);
3216     portMemFree(pFbAllocInfo);
3217 }
3218 
heapFreeBlockCount_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU32 * pCount)3219 NV_STATUS heapFreeBlockCount_IMPL(OBJGPU *pGpu, Heap *pHeap, NvU32 *pCount)
3220 {
3221     MEM_BLOCK *pMemBlock;
3222 
3223     pMemBlock = pHeap->pFreeBlockList;
3224     *pCount = 0;
3225 
3226     if (pMemBlock == NULL)
3227     {
3228         return NV_OK;
3229     }
3230 
3231     do
3232     {
3233         (*pCount)++;
3234         pMemBlock = pMemBlock->u1.nextFree;
3235     } while (pMemBlock != pHeap->pFreeBlockList);
3236 
3237     return NV_OK;
3238 }
3239 
heapFreeBlockInfo_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU32 Count,void * pVoidInfo)3240 NV_STATUS heapFreeBlockInfo_IMPL(OBJGPU *pGpu, Heap *pHeap, NvU32 Count, void *pVoidInfo)
3241 {
3242     NVOS32_BLOCKINFO   *pBlockInfo = pVoidInfo;
3243     NvU32               actualCount;
3244     MEM_BLOCK          *pMemBlock;
3245     NV_STATUS           rmStatus = NV_ERR_GENERIC;
3246     MemoryManager      *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
3247     NvU64               maxCpuOffset;
3248 
3249     heapFreeBlockCount(pGpu, pHeap, &actualCount);
3250 
3251     if ((actualCount == Count) && (NULL != pBlockInfo))
3252     {
3253         if (actualCount == 0)
3254         {
3255             return NV_OK;
3256         }
3257 
3258         maxCpuOffset = (pMemoryManager->Ram.mapRamSizeMb*0x100000) - 1;
3259         pMemBlock = pHeap->pFreeBlockList;
3260         actualCount = 0;
3261         do
3262         {
3263             pBlockInfo->startOffset = pMemBlock->begin;
3264             pBlockInfo->size = pMemBlock->end - pMemBlock->begin + 1;
3265             pBlockInfo->flags = 0x0;
3266             if (pBlockInfo->startOffset < maxCpuOffset)
3267             {
3268                 pBlockInfo->flags |= NVOS32_FLAGS_BLOCKINFO_VISIBILITY_CPU;
3269             }
3270             pMemBlock = pMemBlock->u1.nextFree;
3271             pBlockInfo++;
3272             actualCount++;
3273         } while ((pMemBlock != pHeap->pFreeBlockList) && (actualCount < Count));
3274 
3275         rmStatus = NV_OK;
3276     }
3277 
3278     return rmStatus;
3279 }
3280 
3281 /*!
3282  * @brief: Adjust heap free accounting
3283  *
3284  * @param[in] pHeap         Heap pointer
3285  * @param[in] blockSize     +: Size of block being freed
3286  *                          -: Size of block being allocated
3287  * @param[in] internalHeap  NV_TRUE if the allocation is 'INTERNAL'
3288  *
3289  * @return                  void
3290  */
3291 
3292 static void
_heapAdjustFree(Heap * pHeap,NvS64 blockSize,NvBool internalHeap)3293 _heapAdjustFree
3294 (
3295     Heap     *pHeap,
3296     NvS64     blockSize,
3297     NvBool    internalHeap
3298 )
3299 {
3300     pHeap->free += blockSize;
3301 
3302     NV_ASSERT(pHeap->free <= pHeap->total);
3303     if(pHeap->free > pHeap->total)
3304     {
3305         DBG_BREAKPOINT();
3306     }
3307 
3308     // Collect data on internal/external heap usage
3309     if (internalHeap)
3310     {
3311         pHeap->currInternalUsage -= blockSize;
3312         pHeap->peakInternalUsage = NV_MAX(pHeap->peakInternalUsage, pHeap->currInternalUsage);
3313     }
3314     else
3315     {
3316         pHeap->currExternalUsage -= blockSize;
3317         pHeap->peakExternalUsage = NV_MAX(pHeap->peakExternalUsage, pHeap->currExternalUsage);
3318     }
3319 }
3320 
3321 static NV_STATUS
_heapProcessFreeBlock(OBJGPU * pGpu,MEM_BLOCK * pBlockFree,MEM_BLOCK ** ppBlockNew,MEM_BLOCK ** ppBlockSplit,Heap * pHeap,MEMORY_ALLOCATION_REQUEST * pAllocRequest,NvHandle memHandle,OBJHEAP_ALLOC_DATA * pAllocData,FB_ALLOC_INFO * pFbAllocInfo,NvU64 alignPad,NvU64 * offset)3322 _heapProcessFreeBlock
3323 (
3324     OBJGPU             *pGpu,
3325     MEM_BLOCK          *pBlockFree,
3326     MEM_BLOCK         **ppBlockNew,
3327     MEM_BLOCK         **ppBlockSplit,
3328     Heap               *pHeap,
3329     MEMORY_ALLOCATION_REQUEST *pAllocRequest,
3330     NvHandle            memHandle,
3331     OBJHEAP_ALLOC_DATA *pAllocData,
3332     FB_ALLOC_INFO      *pFbAllocInfo,
3333     NvU64               alignPad,
3334     NvU64              *offset
3335 )
3336 {
3337     NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams;
3338     MEM_BLOCK  *pBlockNew = NULL, *pBlockSplit = NULL;
3339     OBJOS      *pOS       = GPU_GET_OS(pGpu);
3340     NV_STATUS   status    = NV_OK;
3341 
3342     if ((pAllocData->allocLo == pBlockFree->begin) &&
3343          (pAllocData->allocHi == pBlockFree->end))
3344     {
3345         //
3346         // Wow, exact match so replace free block.
3347         // Remove from free list.
3348         //
3349         pBlockFree->u1.nextFree->u0.prevFree = pBlockFree->u0.prevFree;
3350         pBlockFree->u0.prevFree->u1.nextFree = pBlockFree->u1.nextFree;
3351 
3352         if (pHeap->pFreeBlockList == pBlockFree)
3353         {
3354             //
3355             // This could be the last free block.
3356             //
3357             if (pBlockFree->u1.nextFree == pBlockFree)
3358                 pHeap->pFreeBlockList = NULL;
3359             else
3360                 pHeap->pFreeBlockList = pBlockFree->u1.nextFree;
3361         }
3362 
3363         //
3364         // Set pVidHeapAlloc->owner/pVidHeapAlloc->type values here.
3365         // Don't move because some fields are unions.
3366         //
3367         pBlockFree->owner    = pVidHeapAlloc->owner;
3368         pBlockFree->mhandle  = memHandle;
3369         pBlockFree->refCount = 1;
3370         pBlockFree->u0.type  = pVidHeapAlloc->type;
3371         pBlockFree->align    = pAllocData->allocAl;
3372         pBlockFree->alignPad  = alignPad;
3373         pBlockFree->format    = pFbAllocInfo->format;
3374 
3375         // tail end code below assumes 'blockNew' is the new block
3376         pBlockNew = pBlockFree;
3377     }
3378     else if ((pAllocData->allocLo >= pBlockFree->begin) &&
3379          (pAllocData->allocHi <= pBlockFree->end))
3380     {
3381         //
3382         // Found a fit.
3383         // It isn't exact, so we'll have to do a split
3384         //
3385         pBlockNew = portMemAllocNonPaged(sizeof(MEM_BLOCK));
3386         if (pBlockNew == NULL)
3387         {
3388             // Exit with failure and free any local allocations
3389             NV_ASSERT(0);
3390             status = NV_ERR_NO_MEMORY;
3391             goto _heapProcessFreeBlock_error;
3392         }
3393 
3394         portMemSet(pBlockNew, 0, sizeof(MEM_BLOCK));
3395 
3396         pBlockNew->owner     = pVidHeapAlloc->owner;
3397         pBlockNew->mhandle   = memHandle;
3398         pBlockNew->refCount  = 1;
3399         pBlockNew->u0.type   = pVidHeapAlloc->type;
3400         pBlockNew->begin     = pAllocData->allocLo;
3401         pBlockNew->align     = pAllocData->allocAl;
3402         pBlockNew->alignPad  = alignPad;
3403         pBlockNew->end       = pAllocData->allocHi;
3404         pBlockNew->format    = pFbAllocInfo->format;
3405 
3406         if (gpuIsCacheOnlyModeEnabled(pGpu))
3407         {
3408             //
3409             // In L2 Cache only mode, set the beginning of the new allocation
3410             // block to aligned (allocAl) offset rather then the start of
3411             // the free block (allocLo). And that the end of the new block is
3412             // is calculated as (allocSize - 1) from the beginning.
3413             // This insures that we don't "over allocate"  for the surface in the
3414             // case where start of the free block is not properly aligned for both
3415             // the grow down and grow up cases.
3416             // Only applying this in L2 cache mode for now, as we don't want to "waste"
3417             // L2 cache space, though wonder if there are any implications to doing
3418             // it this way in normal operation.
3419             //
3420             pBlockNew->begin = pAllocData->allocAl;
3421             pBlockNew->end   = pBlockNew->begin + pAllocData->allocSize - 1;
3422         }
3423 
3424         if ((pBlockFree->begin < pBlockNew->begin) &&
3425              (pBlockFree->end > pBlockNew->end))
3426         {
3427             // Split free block in two.
3428             pBlockSplit = portMemAllocNonPaged(sizeof(MEM_BLOCK));
3429             if (pBlockSplit == NULL)
3430             {
3431                 // Exit with failure and free any local allocations
3432                 status = NV_ERR_NO_MEMORY;
3433                 goto _heapProcessFreeBlock_error;
3434             }
3435 
3436             portMemSet(pBlockSplit, 0, sizeof(MEM_BLOCK));
3437 
3438             // remove free block from rb-tree since node's range will be changed
3439             if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_REMOVE)))
3440             {
3441                 // Exit with failure and free any local allocations
3442                 goto _heapProcessFreeBlock_error;
3443             }
3444 
3445             pBlockSplit->owner = NVOS32_BLOCK_TYPE_FREE;
3446             pBlockSplit->format= 0;
3447             pBlockSplit->begin = pBlockNew->end + 1;
3448             pBlockSplit->align = pBlockSplit->begin;
3449             pBlockSplit->alignPad = 0;
3450             pBlockSplit->end   = pBlockFree->end;
3451             pBlockFree->end    = pBlockNew->begin - 1;
3452             //
3453             // Insert free split block into free list.
3454             //
3455             pBlockSplit->u1.nextFree = pBlockFree->u1.nextFree;
3456             pBlockSplit->u0.prevFree = pBlockFree;
3457             pBlockSplit->u1.nextFree->u0.prevFree = pBlockSplit;
3458             pBlockFree->u1.nextFree = pBlockSplit;
3459             //
3460             //  Insert new and split blocks into block list.
3461             //
3462             pBlockNew->next   = pBlockSplit;
3463             pBlockNew->prev   = pBlockFree;
3464             pBlockSplit->next = pBlockFree->next;
3465             pBlockSplit->prev = pBlockNew;
3466             pBlockFree->next  = pBlockNew;
3467             pBlockSplit->next->prev = pBlockSplit;
3468 
3469             // update numBlocks count
3470             pHeap->numBlocks++;
3471 
3472             // re-insert updated free block into rb-tree
3473             if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_SIZE_CHANGED)))
3474             {
3475                 //
3476                 // Exit and report success.  The new block was allocated, but the
3477                 // noncontig info is now out-of-sync with reality.
3478                 //
3479                 NV_PRINTF(LEVEL_ERROR,
3480                           "_heapUpdate failed to _SIZE_CHANGE block\n");
3481                 goto _heapProcessFreeBlock_exit;
3482             }
3483 
3484             // insert new and split blocks into rb-tree
3485             if (NV_OK != (status = _heapUpdate(pHeap, pBlockNew, BLOCK_ADD)))
3486             {
3487                 //
3488                 // Exit and report success.  The new block was allocated, but the
3489                 // noncontig info is now out-of-sync with reality.
3490                 //
3491                 NV_PRINTF(LEVEL_ERROR, "_heapUpdate failed to _ADD block\n");
3492                 goto _heapProcessFreeBlock_exit;
3493             }
3494 
3495             if (NV_OK != (status = _heapUpdate(pHeap, pBlockSplit, BLOCK_ADD)))
3496             {
3497                 //
3498                 // Exit and report success.  The new block was allocated, but the
3499                 // noncontig info is now out-of-sync with reality.
3500                 //
3501                 NV_PRINTF(LEVEL_ERROR, "_heapUpdate failed to _ADD block\n");
3502                 goto _heapProcessFreeBlock_exit;
3503             }
3504         }
3505         else if (pBlockFree->end == pBlockNew->end)
3506         {
3507             // remove free block from rb-tree since node's range will be changed
3508             if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_REMOVE)))
3509             {
3510                 // Exit with failure and free any local allocations
3511                 goto _heapProcessFreeBlock_error;
3512             }
3513 
3514             //
3515             // New block inserted after free block.
3516             //
3517             pBlockFree->end = pBlockNew->begin - 1;
3518             pBlockNew->next = pBlockFree->next;
3519             pBlockNew->prev = pBlockFree;
3520             pBlockFree->next->prev = pBlockNew;
3521             pBlockFree->next       = pBlockNew;
3522 
3523             // re-insert updated free block into rb-tree
3524             if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_SIZE_CHANGED)))
3525             {
3526                 //
3527                 // Exit and report success.  The new block was allocated, but the
3528                 // noncontig info is now out-of-sync with reality.
3529                 //
3530                 NV_PRINTF(LEVEL_ERROR,
3531                           "_heapUpdate failed to _SIZE_CHANGE block\n");
3532                 goto _heapProcessFreeBlock_exit;
3533             }
3534 
3535             // insert new block into rb-tree
3536             if (NV_OK != (status = _heapUpdate(pHeap, pBlockNew, BLOCK_ADD)))
3537             {
3538                 //
3539                 // Exit and report success.  The new block was allocated, but the
3540                 // noncontig info is now out-of-sync with reality.
3541                 //
3542                 NV_PRINTF(LEVEL_ERROR, "_heapUpdate failed to _ADD block\n");
3543                 goto _heapProcessFreeBlock_exit;
3544             }
3545         }
3546         else if (pBlockFree->begin == pBlockNew->begin)
3547         {
3548             // remove free block from rb-tree since node's range will be changed
3549             if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_REMOVE)))
3550             {
3551                 // Exit with failure and free any local allocations
3552                 goto _heapProcessFreeBlock_error;
3553             }
3554 
3555             //
3556             // New block inserted before free block.
3557             //
3558             pBlockFree->begin = pBlockNew->end + 1;
3559             pBlockFree->align = pBlockFree->begin;
3560             pBlockNew->next   = pBlockFree;
3561             pBlockNew->prev   = pBlockFree->prev;
3562             pBlockFree->prev->next = pBlockNew;
3563             pBlockFree->prev       = pBlockNew;
3564             if (pHeap->pBlockList == pBlockFree)
3565                 pHeap->pBlockList  = pBlockNew;
3566 
3567             // re-insert updated free block into rb-tree
3568             if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_SIZE_CHANGED)))
3569             {
3570                 //
3571                 // Exit and report success.  The new block was allocated, but the
3572                 // noncontig info is now out-of-sync with reality.
3573                 //
3574                 NV_PRINTF(LEVEL_ERROR,
3575                           "_heapUpdate failed to _SIZE_CHANGE block\n");
3576                 goto _heapProcessFreeBlock_exit;
3577             }
3578 
3579             // insert new block into rb-tree
3580             if (NV_OK != (status = _heapUpdate(pHeap, pBlockNew, BLOCK_ADD)))
3581             {
3582                 //
3583                 // Exit and report success.  The new block was allocated, but the
3584                 // noncontig info is now out-of-sync with reality.
3585                 //
3586                 NV_PRINTF(LEVEL_ERROR, "_heapUpdate failed to _ADD block\n");
3587                 goto _heapProcessFreeBlock_exit;
3588             }
3589         }
3590         else
3591         {
3592             status = NV_ERR_NO_MEMORY;
3593             // Exit with failure and free any local allocations
3594             goto _heapProcessFreeBlock_error;
3595         }
3596 
3597         pHeap->numBlocks++;
3598     }
3599 
3600     if (NULL == pBlockNew)
3601         status = NV_ERR_NO_MEMORY;
3602 
3603 _heapProcessFreeBlock_error:
3604     if (status != NV_OK)
3605     {
3606         NV_PRINTF(LEVEL_ERROR, "failed to allocate block\n");
3607 
3608         portMemFree(pBlockNew);
3609         portMemFree(pBlockSplit);
3610 
3611         *ppBlockNew = NULL;
3612         *ppBlockSplit = NULL;
3613 
3614         return status;
3615     }
3616 
3617 _heapProcessFreeBlock_exit:
3618     *ppBlockNew = pBlockNew;
3619     *ppBlockSplit = pBlockSplit;
3620 
3621     // alignPad == 0 for all but >= NV5x
3622     *offset = pBlockNew->align + pBlockNew->alignPad;
3623 
3624     // Reduce free amount by allocated block size.
3625     _heapAdjustFree(pHeap, -((NvS64) (pBlockNew->end - pBlockNew->begin + 1)),
3626         FLD_TEST_DRF(OS32, _ATTR2, _INTERNAL, _YES, pFbAllocInfo->pageFormat->attr2));
3627 
3628     if (FLD_TEST_DRF(OS32, _ATTR2, _INTERNAL, _YES, pFbAllocInfo->pageFormat->attr2))
3629     {
3630         pOS->osInternalReserveAllocCallback(*offset, pFbAllocInfo->size, pGpu->gpuId);
3631     }
3632 
3633     return NV_OK;
3634 }
3635 
3636 static void
_heapAddBlockToNoncontigList(Heap * pHeap,MEM_BLOCK * pBlock)3637 _heapAddBlockToNoncontigList
3638 (
3639     Heap       *pHeap,
3640     MEM_BLOCK  *pBlock
3641 )
3642 {
3643     if (NULL == pHeap->pNoncontigFreeBlockList)
3644     {
3645         pHeap->pNoncontigFreeBlockList = pBlock;
3646         pBlock->nextFreeNoncontig = NULL;
3647         pBlock->prevFreeNoncontig = NULL;
3648     }
3649     else
3650     {
3651         MEM_BLOCK *pNextBlock = pHeap->pNoncontigFreeBlockList;
3652         NvU64 size, nextSize = 0;
3653         size = pBlock->end - pBlock->begin + 1;
3654 
3655         NV_ASSERT(pBlock->prevFreeNoncontig == NULL &&
3656                   pBlock->nextFreeNoncontig == NULL);
3657 
3658         // The noncontig block list is arranged in the descending order of size
3659         while (NULL != pNextBlock)
3660         {
3661             nextSize = pNextBlock->end - pNextBlock->begin + 1;
3662 
3663             if (size > nextSize)
3664             {
3665                 // Insert pBlock in front of pNextBlock
3666                 pBlock->prevFreeNoncontig = pNextBlock->prevFreeNoncontig;
3667                 pBlock->nextFreeNoncontig = pNextBlock;
3668                 pNextBlock->prevFreeNoncontig = pBlock;
3669 
3670                 if (pHeap->pNoncontigFreeBlockList == pNextBlock)
3671                 {
3672                     // We inserted at the head of the list
3673                     pHeap->pNoncontigFreeBlockList = pBlock;
3674                 }
3675                 else
3676                 {
3677                     pBlock->prevFreeNoncontig->nextFreeNoncontig = pBlock;
3678                 }
3679 
3680                 break;
3681             }
3682 
3683             if (NULL == pNextBlock->nextFreeNoncontig)
3684             {
3685                 // We reached the end of the list, insert here
3686                 pNextBlock->nextFreeNoncontig = pBlock;
3687                 pBlock->prevFreeNoncontig = pNextBlock;
3688                 pBlock->nextFreeNoncontig = NULL;
3689 
3690                 break;
3691             }
3692 
3693             pNextBlock = pNextBlock->nextFreeNoncontig;
3694         }
3695     }
3696 }
3697 
3698 static void
_heapRemoveBlockFromNoncontigList(Heap * pHeap,MEM_BLOCK * pBlock)3699 _heapRemoveBlockFromNoncontigList
3700 (
3701     Heap       *pHeap,
3702     MEM_BLOCK  *pBlock
3703 )
3704 {
3705     //
3706     // Unless pBlock is at the head of the list (and is the only element in the
3707     // list), both prev and nextFreeNoncontig cannot be NULL at the same time.
3708     // That would imply a bug in the noncontig list building code.
3709     //
3710     NV_ASSERT(pBlock == pHeap->pNoncontigFreeBlockList ||
3711               pBlock->prevFreeNoncontig != NULL ||
3712               pBlock->nextFreeNoncontig != NULL);
3713 
3714     // Removing first block?
3715     if (pHeap->pNoncontigFreeBlockList == pBlock)
3716     {
3717         pHeap->pNoncontigFreeBlockList = pBlock->nextFreeNoncontig;
3718     }
3719     else
3720     {
3721         if (NULL != pBlock->prevFreeNoncontig)
3722         {
3723             pBlock->prevFreeNoncontig->nextFreeNoncontig
3724                 = pBlock->nextFreeNoncontig;
3725         }
3726     }
3727 
3728     // Removing last block?
3729     if (NULL != pBlock->nextFreeNoncontig)
3730     {
3731         pBlock->nextFreeNoncontig->prevFreeNoncontig
3732             = pBlock->prevFreeNoncontig;
3733     }
3734 
3735     pBlock->nextFreeNoncontig = pBlock->prevFreeNoncontig = NULL;
3736 }
3737 
3738 //
3739 // The allocation is done using two loops. The first loop traverses the heap's
3740 // free list to build a list of blocks that can satisfy the allocation. If we
3741 // don't find enough blocks, we can exit quickly without needing to unwind,
3742 // which can happen quite frequently in low memory or heavy fragmentation
3743 // conditions.
3744 //
3745 // The second loop does the actual allocations. It calls _heapProcessFreeBlock()
3746 // to cut down a free block into the required size, which can fail, albeit
3747 // rarely. We need to unwind at that point. The two loops keep the unwinding
3748 // as infrequent as possible.
3749 //
3750 static NV_STATUS
_heapAllocNoncontig(OBJGPU * pGpu,NvHandle hClient,Heap * pHeap,MEMORY_ALLOCATION_REQUEST * pAllocRequest,NvHandle memHandle,OBJHEAP_ALLOC_DATA * pAllocData,FB_ALLOC_INFO * pFbAllocInfo,NvU32 textureClientIndex,NvU64 alignPad,NvU64 * offset,MEMORY_DESCRIPTOR * pMemDesc,HWRESOURCE_INFO ** ppHwResource)3751 _heapAllocNoncontig
3752 (
3753     OBJGPU             *pGpu,
3754     NvHandle            hClient,
3755     Heap               *pHeap,
3756     MEMORY_ALLOCATION_REQUEST *pAllocRequest,
3757     NvHandle            memHandle,
3758     OBJHEAP_ALLOC_DATA *pAllocData,
3759     FB_ALLOC_INFO      *pFbAllocInfo,
3760     NvU32               textureClientIndex,
3761     NvU64               alignPad,
3762     NvU64              *offset,
3763     MEMORY_DESCRIPTOR  *pMemDesc,
3764     HWRESOURCE_INFO   **ppHwResource
3765 )
3766 {
3767     KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3768     NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams;
3769     NvBool      bFirstBlock = NV_TRUE;
3770     NvU32       pteIndexOffset = 0, i = 0;
3771     NvU32       blockId = 0;
3772     NV_STATUS   status = NV_OK;
3773     NvU64       pageSize = 0;
3774     NvS64       numPagesLeft;
3775     MEM_BLOCK  *pCurrBlock;
3776     MEM_BLOCK  *pNextBlock;
3777     MEM_BLOCK  *pSavedAllocList = NULL;
3778     MEM_BLOCK  *pLastBlock = NULL;
3779     MEM_BLOCK  *pBlockNew, *pBlockSplit;
3780     NvU32       k, shuffleStride = 1;
3781     NvU64       addr, j, numPages;
3782     RM_ATTR_PAGE_SIZE pageSizeAttr = dmaNvos32ToPageSizeAttr(pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2);
3783 
3784     switch (pageSizeAttr)
3785     {
3786         case RM_ATTR_PAGE_SIZE_DEFAULT:
3787         case RM_ATTR_PAGE_SIZE_INVALID:
3788             NV_PRINTF(LEVEL_ERROR, "Invalid page size attribute!\n");
3789             return NV_ERR_INVALID_ARGUMENT;
3790         case RM_ATTR_PAGE_SIZE_4KB:
3791             pageSize = RM_PAGE_SIZE;
3792             break;
3793         case RM_ATTR_PAGE_SIZE_BIG:
3794         {
3795             pageSize = kgmmuGetMaxBigPageSize_HAL(pKernelGmmu);
3796             break;
3797         }
3798         case RM_ATTR_PAGE_SIZE_HUGE:
3799         {
3800             NV_ASSERT_OR_RETURN(kgmmuIsHugePageSupported(pKernelGmmu),
3801                                 NV_ERR_INVALID_ARGUMENT);
3802             pageSize = RM_PAGE_SIZE_HUGE;
3803             break;
3804         }
3805         case RM_ATTR_PAGE_SIZE_512MB:
3806         {
3807             NV_ASSERT_OR_RETURN(kgmmuIsPageSize512mbSupported(pKernelGmmu),
3808                               NV_ERR_INVALID_ARGUMENT);
3809             pageSize = RM_PAGE_SIZE_512M;
3810             break;
3811         }
3812     }
3813 
3814     //
3815     // pAllocData->allocSize already incorporates pFbAllocInfo->size,
3816     // which in turn is up aligned to pFbAllocInfo->align and alignPad,
3817     // so nothing else needs to be added here.
3818     //
3819     numPagesLeft = RM_ALIGN_UP(pAllocData->allocSize, pageSize) / pageSize;
3820     NV_PRINTF(LEVEL_INFO,
3821               "pageSize: 0x%llx, numPagesLeft: 0x%llx, allocSize: 0x%llx\n",
3822               pageSize / 1024, numPagesLeft, pAllocData->allocSize);
3823 
3824     for (pCurrBlock = pHeap->pNoncontigFreeBlockList;
3825         numPagesLeft > 0 && NULL != pCurrBlock;
3826         pCurrBlock = pNextBlock)
3827     {
3828         NvU64 blockBegin = 0;
3829         NvU64 blockEnd = 0;
3830         NvU64 blockAligned;
3831         NvU64 blockSizeInPages, blockSize;
3832         NvU64 alignPad;
3833         NvU64 pteAddress;
3834         NvU64 offset;
3835 
3836         // Get the next free block pointer before lists get re-linked
3837         pNextBlock = pCurrBlock->nextFreeNoncontig;
3838 
3839         // Selecting blocks: Is this block completely out of range?
3840         if ((pCurrBlock->end < pVidHeapAlloc->rangeLo) ||
3841             (pCurrBlock->begin > pVidHeapAlloc->rangeHi))
3842         {
3843             continue;
3844         }
3845 
3846         // Find the intersection of the block and the specified range.
3847         blockBegin = ((pVidHeapAlloc->rangeLo >= pCurrBlock->begin) ?
3848                 pVidHeapAlloc->rangeLo : pCurrBlock->begin);
3849         blockEnd = ((pVidHeapAlloc->rangeHi <= pCurrBlock->end) ?
3850                 pVidHeapAlloc->rangeHi : pCurrBlock->end);
3851 
3852         // Check if the derived block is usable
3853         if ((blockBegin >= blockEnd) ||
3854             (blockEnd-blockBegin+1 < pageSize))
3855         {
3856             // Skip if the usable size is invalid or insufficient.
3857             continue;
3858         }
3859 
3860         //
3861         // Checks above should protect against underflow, but we might still
3862         // end up with a post-aligned block that is unusable.
3863         // "end" should be RM_PAGE_SIZE-1 aligned.
3864         //
3865         blockBegin = RM_ALIGN_UP(blockBegin, pageSize);
3866         blockEnd = RM_ALIGN_DOWN(blockEnd+1, pageSize)-1;
3867 
3868         if (blockBegin >= blockEnd)
3869         {
3870             //
3871             // When blockSize < page_size and blockBegin and/or blockEnd are
3872             // not page aligned initially, the above alignment can cause
3873             // blockBegin to become > blockEnd.
3874             //
3875             continue;
3876         }
3877 
3878         // The first block has to handle pAllocData->alignment
3879         if (bFirstBlock)
3880         {
3881             // Align the starting address of the block to
3882             // pAllocData->alignment.
3883             blockAligned = (blockBegin +
3884                     pAllocData->alignment - 1) / pAllocData->alignment
3885                 * pAllocData->alignment;
3886 
3887             //
3888             // Check that we'll still be within this block when
3889             // alignPad is added.
3890             //
3891             if (blockAligned + pFbAllocInfo->alignPad > blockEnd)
3892             {
3893                 continue;
3894             }
3895 
3896             // Then make sure this is page aligned.
3897             blockBegin = RM_ALIGN_DOWN(blockAligned, pageSize);
3898 
3899             //
3900             // blockBegin is now the page aligned starting address of a
3901             // block that holds an address aligned to
3902             // pAllocData->alignment, and can take padding from
3903             // alignPad.
3904             //
3905         }
3906         else
3907         {
3908             blockAligned = blockBegin;
3909         }
3910 
3911         blockSizeInPages = (blockEnd - blockBegin + 1) / pageSize;
3912 
3913         // A usable block has to supply at least one page
3914         if (blockSizeInPages < 1)
3915         {
3916             continue;
3917         }
3918 
3919         // blockEnd may need to be corrected for the last page
3920         if (((NvU64)numPagesLeft < blockSizeInPages))
3921         {
3922             blockEnd = blockBegin + pageSize * numPagesLeft - 1;
3923             blockSizeInPages = numPagesLeft;
3924         }
3925 
3926         blockSize = blockEnd - blockBegin + 1;
3927 
3928         numPagesLeft -= blockSizeInPages;
3929 
3930         NV_PRINTF(LEVEL_INFO,
3931                   "\tblockId: %d, blockBegin: 0x%llx, blockEnd: 0x%llx, blockSize: "
3932                   "0x%llx, blockSizeInPages: 0x%llx, numPagesLeft: 0x%llx\n",
3933                   blockId, blockBegin, blockEnd, blockSize, blockSizeInPages,
3934                   numPagesLeft >= 0 ? numPagesLeft : 0);
3935 
3936         blockId++;
3937 
3938         //
3939         // Set pAllocData values before the call to
3940         // _heapProcessFreeBlock()
3941         //
3942         pAllocData->allocLo = blockBegin;
3943         pAllocData->allocHi = blockEnd;
3944         pAllocData->allocAl = blockAligned;
3945         pAllocData->allocSize = blockSize;
3946 
3947         if (bFirstBlock)
3948         {
3949             alignPad = pFbAllocInfo->alignPad;
3950         }
3951         else
3952         {
3953             alignPad = 0;
3954         }
3955 
3956         //
3957         // Cut this new block down to size. pBlockNew will be the block to use
3958         // when this returns.
3959         //
3960         if (NV_OK != (status = _heapProcessFreeBlock(pGpu, pCurrBlock,
3961                     &pBlockNew, &pBlockSplit, pHeap, pAllocRequest,
3962                     memHandle, pAllocData, pFbAllocInfo,
3963                     alignPad, &offset)))
3964         {
3965             NV_PRINTF(LEVEL_ERROR,
3966                       "ERROR: Could not process free block, error: 0x%x\n",
3967                       status);
3968             goto unwind_and_exit;
3969         }
3970 
3971         // Never fails
3972         (void)_heapUpdate(pHeap, pBlockNew, BLOCK_FREE_STATE_CHANGED);
3973 
3974         //
3975         // Save the allocation off in case we need to unwind
3976         // This also ensures that all blocks that make up the noncontig
3977         // allocation are strung together in a list, which is useful when
3978         // freeing them.
3979         //
3980         if (pSavedAllocList == NULL)
3981         {
3982             // First block
3983             pSavedAllocList = pLastBlock = pBlockNew;
3984             pSavedAllocList->noncontigAllocListNext = NULL;
3985         }
3986         else
3987         {
3988             pLastBlock->noncontigAllocListNext = pBlockNew;
3989             pLastBlock = pBlockNew;
3990             pLastBlock->noncontigAllocListNext = NULL;
3991         }
3992 
3993         pteAddress = RM_PAGE_ALIGN_DOWN(pBlockNew->begin);
3994 
3995         numPages = NV_MIN(blockSizeInPages, ((pMemDesc->PageCount - pteIndexOffset) * RM_PAGE_SIZE) / pageSize);
3996 
3997         if (pHeap->getProperty(pHeap, PDB_PROP_HEAP_PAGE_SHUFFLE))
3998         {
3999             i             = pHeap->shuffleStrideIndex;
4000             shuffleStride = pHeap->shuffleStrides[i];
4001 
4002             // Select a stride greater the the number of pages
4003             while(numPages < shuffleStride && i > 0)
4004             {
4005                 i--;
4006                 shuffleStride = pHeap->shuffleStrides[i];
4007             }
4008 
4009             pHeap->shuffleStrideIndex = (pHeap->shuffleStrideIndex + 1) % SHUFFLE_STRIDE_MAX;
4010         }
4011 
4012         //
4013         // Shuffling logic.
4014         // We scatter the contiguous pages at multiple of stride length.
4015         // For 5 pages with stride length 2, we have the following shuffling.
4016         // Before: 0, 1, 2, 3, 4
4017         // After : 0, 2, 4, 1, 3
4018         //
4019         for (i = 0; i < shuffleStride; i++)
4020         {
4021             for(j = i; j < numPages; j = j + shuffleStride)
4022             {
4023                 addr = pteAddress + j * pageSize;
4024                 for (k = 0; k < pageSize/RM_PAGE_SIZE; k++)
4025                 {
4026                     //
4027                     // The memDesc has everything in terms of 4k pages.
4028                     // If allocationSize % pageSize != 0, there will not be enough PTEs in
4029                     // the memdesc for completely specifying the final block, but that's
4030                     // ok. The mapping code will be mapping in the whole pageSize final
4031                     // block anyway, and the heapBlockFree() code will free the whole
4032                     // block.
4033                     //
4034                     memdescSetPte(pMemDesc, AT_GPU, pteIndexOffset, addr);
4035                     pteIndexOffset++;
4036                     addr += RM_PAGE_SIZE;
4037                 }
4038             }
4039         }
4040 
4041         //
4042         // If a client calls us with pVidHeapAlloc->type ==
4043         // NVOS32_TYPE_TEXTURE, but where flags are non-zero, we won't
4044         // call objHeapSetTexturePlacement and initialize
4045         // textureClientIndex to a proper value (default is 0xFFFFFFFF).
4046         // In that case, we won't track this texture allocation. Bug
4047         // 79586.
4048         //
4049         if (pVidHeapAlloc->type == NVOS32_TYPE_TEXTURE &&
4050                 textureClientIndex != 0xFFFFFFFF)
4051         {
4052             pBlockNew->textureId = hClient;
4053             if (bFirstBlock)
4054                 pHeap->textureData[textureClientIndex].refCount++;
4055         }
4056         else
4057         {
4058             pBlockNew->textureId = 0;
4059         }
4060 
4061         if (bFirstBlock)
4062         {
4063             pFbAllocInfo->offset = offset;
4064             *ppHwResource = &pBlockNew->hwResource;
4065         }
4066 
4067         pBlockNew->pMemDesc = pMemDesc;
4068         pBlockNew->allocedMemDesc = bFirstBlock; // avoid multiple frees
4069 
4070         bFirstBlock = NV_FALSE;
4071     }
4072 
4073     // Did we find enough pages?
4074     if (numPagesLeft > 0)
4075     {
4076         NV_PRINTF(LEVEL_INFO,
4077                   "Could not satisfy request: allocSize: 0x%llx\n",
4078                   pAllocData->allocSize);
4079 
4080         status = NV_ERR_NO_MEMORY;
4081 
4082 unwind_and_exit:
4083 
4084         while (pSavedAllocList != NULL)
4085         {
4086             NV_STATUS unwindStatus;
4087 
4088             pCurrBlock = pSavedAllocList->noncontigAllocListNext;
4089 
4090             unwindStatus = _heapBlockFree(pGpu, pHeap, hClient, pFbAllocInfo->hDevice, pSavedAllocList);
4091 
4092             if (unwindStatus != NV_OK)
4093             {
4094                 NV_PRINTF(LEVEL_ERROR,
4095                           "ERROR: Could not free block,  error 0x%x!\n",
4096                           unwindStatus);
4097             }
4098 
4099             pSavedAllocList = pCurrBlock;
4100         }
4101     }
4102     return status;
4103 }
4104 
4105 //
4106 // Explanation of BlockAction values:
4107 // - BLOCK_ADD,
4108 //    A new block is added to the heap
4109 //      o The block's node structure needs to be inited.
4110 //      o The block is added to the rb-tree.
4111 //      o The block is added to the noncontig freelist.
4112 // - BLOCK_REMOVE
4113 //    A block is removed from the heap for good
4114 //      o The block is removed from the rb-tree.
4115 //      o The block is removed from the noncontig freelist.
4116 // - BLOCK_SIZE_CHANGED
4117 //    A block's size has changed
4118 //      o The rb-tree needs to be updated.
4119 //      o The noncontig freelist needs to be updated.
4120 // - BLOCK_FREE_STATE_CHANGED
4121 //    if pBlock->owner != NVOS32_BLOCK_TYPE_FREE
4122 //      A block is allocated to a client
4123 //       o The block is removed from the noncontig freelist.
4124 //    else
4125 //      A block is freed by the client
4126 //       o The block is added to the noncontig freelist.
4127 //
4128 static NV_STATUS
_heapUpdate(Heap * pHeap,MEM_BLOCK * pBlock,BlockAction action)4129 _heapUpdate
4130 (
4131     Heap       *pHeap,
4132     MEM_BLOCK  *pBlock,
4133     BlockAction action
4134 )
4135 {
4136     // A new block is to be added, init its node structure.
4137     if (BLOCK_ADD == action)
4138     {
4139         portMemSet((void *)&pBlock->node, 0, sizeof(NODE));
4140         pBlock->node.Data     = (void *)pBlock;
4141     }
4142 
4143     // Both new and updated blocks need to be re-inserted into the rb tree.
4144     if ((BLOCK_SIZE_CHANGED == action) ||
4145         (BLOCK_ADD == action))
4146     {
4147         pBlock->node.keyStart = pBlock->begin;
4148         pBlock->node.keyEnd = pBlock->end;
4149 
4150         if (btreeInsert(&pBlock->node, &pHeap->pBlockTree) != NV_OK)
4151         {
4152             NV_ASSERT_FAILED("btreeInsert failed to ADD/SIZE_CHANGE block");
4153             return NV_ERR_INVALID_STATE;
4154         }
4155     }
4156 
4157     //
4158     // Updated, new and freed blocks need to be added back to the noncontig
4159     // freelist.
4160     //
4161     if ((BLOCK_SIZE_CHANGED == action) ||
4162         (BLOCK_ADD == action) ||
4163         (BLOCK_FREE_STATE_CHANGED == action &&
4164          pBlock->owner == NVOS32_BLOCK_TYPE_FREE))
4165     {
4166         _heapAddBlockToNoncontigList(pHeap, pBlock);
4167     }
4168 
4169     // Remove the block from the heap
4170     if (BLOCK_REMOVE == action)
4171     {
4172         if (btreeUnlink(&pBlock->node, &pHeap->pBlockTree) != NV_OK)
4173         {
4174             NV_ASSERT_FAILED("btreeUnlink failed to REMOVE block");
4175             return NV_ERR_INVALID_STATE;
4176         }
4177     }
4178 
4179     // An allocated block is only removed from the noncontig freelist.
4180     if ((BLOCK_REMOVE == action) ||
4181         ((BLOCK_FREE_STATE_CHANGED == action &&
4182           pBlock->owner != NVOS32_BLOCK_TYPE_FREE)))
4183     {
4184         _heapRemoveBlockFromNoncontigList(pHeap, pBlock);
4185     }
4186 
4187     return NV_OK;
4188 }
4189 
4190 static NvU32
_heapGetPageBlackListGranularity(void)4191 _heapGetPageBlackListGranularity(void)
4192 {
4193     return RM_PAGE_SIZE;
4194 }
4195 
4196 //
4197 // This function blacklists pages from the heap.
4198 // The addresses of the pages to blacklist are available from
4199 // pHeap->blackListAddresses.
4200 //
4201 NV_STATUS
heapBlackListPages_IMPL(OBJGPU * pGpu,Heap * pHeap)4202 heapBlackListPages_IMPL
4203 (
4204     OBJGPU *pGpu,
4205     Heap   *pHeap
4206 )
4207 {
4208     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4209     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
4210     PMA                 *pPma       = &pHeap->pmaObject;
4211     NvU32                i = 0, j = 0;
4212     NV_STATUS            status     = NV_OK;
4213     BLACKLIST           *pBlackList = &pHeap->blackList;
4214     BLACKLIST_ADDRESSES *pAddresses = &pHeap->blackListAddresses;
4215     NvU32                count = pHeap->blackListAddresses.count;
4216     NvU32                staticBlacklistSize, dynamicBlacklistSize;
4217     NvU32                dynamicRmBlackListedCount;
4218     NvU32                staticRmBlackListedCount;
4219     NvU16                maximumBlacklistPages = kmemsysGetMaximumBlacklistPages(pGpu, pKernelMemorySystem);
4220 
4221     if (NULL == pAddresses)
4222     {
4223         return NV_ERR_INVALID_ARGUMENT;
4224     }
4225 
4226     if (pBlackList->count != 0)
4227     {
4228         NV_PRINTF(LEVEL_ERROR, "Error: BlackList already exists!\n");
4229         return NV_ERR_INVALID_STATE;
4230     }
4231 
4232     //
4233     // We may not be able to allocate all pages requested, but alloc enough
4234     // space anyway
4235     //
4236     pBlackList->pBlacklistChunks = portMemAllocNonPaged(sizeof(BLACKLIST_CHUNK) * maximumBlacklistPages);
4237     if (NULL == pBlackList->pBlacklistChunks)
4238     {
4239         NV_PRINTF(LEVEL_ERROR, "Could not allocate memory for blackList!\n");
4240         return NV_ERR_NO_MEMORY;
4241     }
4242 
4243     portMemSet(pBlackList->pBlacklistChunks, 0, sizeof(BLACKLIST_CHUNK) * maximumBlacklistPages);
4244 
4245     dynamicRmBlackListedCount = 0;
4246     staticRmBlackListedCount  = 0;
4247     for (i = 0, j = 0; i < count; i++)
4248     {
4249         if (NV2080_CTRL_FB_OFFLINED_PAGES_INVALID_ADDRESS == pAddresses->data[i].address)
4250         {
4251             continue;
4252         }
4253 
4254         //
4255         // If PMA is enabled, only blacklist pages in the internal heap.
4256         // PMA blacklisting is handled in pmaRegisterRegion.
4257         //
4258         if (memmgrIsPmaInitialized(pMemoryManager))
4259         {
4260             if (heapIsPmaManaged(pGpu, pHeap, pAddresses->data[i].address, pAddresses->data[i].address))
4261             {
4262                 // Skipping non-internal address
4263                 continue;
4264             }
4265         }
4266 
4267         if ((pAddresses->data[i].type == NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_MULTIPLE_SBE) ||
4268             (pAddresses->data[i].type == NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_DBE))
4269         {
4270             dynamicRmBlackListedCount++;
4271         }
4272         else
4273         {
4274             staticRmBlackListedCount++;
4275         }
4276 
4277         // Create a memdesc
4278         status = memdescCreate(&pBlackList->pBlacklistChunks[j].pMemDesc,
4279                                pGpu,
4280                                RM_PAGE_SIZE,
4281                                RM_PAGE_SIZE,
4282                                NV_TRUE,
4283                                ADDR_FBMEM,
4284                                NV_MEMORY_UNCACHED,
4285                                MEMDESC_FLAGS_FIXED_ADDRESS_ALLOCATE |
4286                                MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE);
4287         if (NV_OK != status)
4288         {
4289             portMemSet(&pBlackList->pBlacklistChunks[j], 0, sizeof(BLACKLIST_CHUNK));
4290             NV_PRINTF(LEVEL_ERROR,
4291                       "Error 0x%x creating blacklisted page memdesc for address 0x%llx, skipping\n",
4292                       status, pAddresses->data[i].address);
4293             continue;
4294         }
4295 
4296         if (pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)
4297             pBlackList->pBlacklistChunks[j].pMemDesc->pHeap = pHeap;
4298 
4299         // This is how _FIXED_ADDRESS_ALLOCATE works
4300         memdescSetPte(pBlackList->pBlacklistChunks[j].pMemDesc,
4301                       AT_GPU, 0, RM_PAGE_ALIGN_DOWN(pAddresses->data[i].address));
4302 
4303         if (pHeap->heapType != HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)
4304         {
4305             //
4306             // Allocate memory for this page. This is marked as an internal RM
4307             // allocation and WILL be saved/restored during suspend/resume.
4308             //
4309             memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_79,
4310                     pBlackList->pBlacklistChunks[j].pMemDesc);
4311             if (NV_OK != status)
4312             {
4313                 // No use for the memdesc if the page couldn't be allocated
4314                 memdescDestroy(pBlackList->pBlacklistChunks[j].pMemDesc);
4315 
4316                 portMemSet(&pBlackList->pBlacklistChunks[j], 0, sizeof(BLACKLIST_CHUNK));
4317 
4318                 NV_PRINTF(LEVEL_ERROR,
4319                           "Error 0x%x blacklisting page at address  0x%llx, skipping\n",
4320                           status, pAddresses->data[i].address);
4321                 continue;
4322             }
4323         }
4324 
4325         // Page blacklisting is successful, add entries to the BLACKLIST
4326         pBlackList->pBlacklistChunks[j].physOffset              = pAddresses->data[i].address;
4327         pBlackList->pBlacklistChunks[j].size                    = RM_PAGE_SIZE;
4328         pBlackList->pBlacklistChunks[j].bIsValid                = NV_TRUE;
4329 
4330         // If the page was successfully blacklisted, move to the next entry
4331         j++;
4332     }
4333 
4334     pBlackList->count = j;
4335 
4336     pmaGetBlacklistSize(pPma, &dynamicBlacklistSize, &staticBlacklistSize);
4337     dynamicBlacklistSize = dynamicBlacklistSize >> 10;
4338     staticBlacklistSize  = staticBlacklistSize  >> 10;
4339 
4340     dynamicBlacklistSize += (dynamicRmBlackListedCount * _heapGetPageBlackListGranularity()) >> 10;
4341     staticBlacklistSize  += (staticRmBlackListedCount  * _heapGetPageBlackListGranularity()) >> 10;
4342 
4343     pHeap->dynamicBlacklistSize = dynamicBlacklistSize;
4344     pHeap->staticBlacklistSize = staticBlacklistSize;
4345 
4346     if (0 == pBlackList->count)
4347     {
4348         // No address was blacklisted
4349         portMemFree(pBlackList->pBlacklistChunks);
4350         pBlackList->pBlacklistChunks = NULL;
4351     }
4352 
4353     return NV_OK;
4354 }
4355 
4356 //
4357 // This function frees all blacklisted pages.
4358 // The pHeap->blackList structure holds a list of memdescs, one for each
4359 // blacklisted page.
4360 //
4361 NV_STATUS
heapFreeBlackListedPages_IMPL(OBJGPU * pGpu,Heap * pHeap)4362 heapFreeBlackListedPages_IMPL
4363 (
4364     OBJGPU *pGpu,
4365     Heap   *pHeap
4366 )
4367 {
4368     NvU32 i;
4369     BLACKLIST *pBlackList = &pHeap->blackList;
4370 
4371     // Also free the blacklistAddresses data here
4372     if (pHeap->blackListAddresses.data)
4373     {
4374         portMemFree(pHeap->blackListAddresses.data);
4375         pHeap->blackListAddresses.count = 0;
4376         pHeap->blackListAddresses.data  = NULL;
4377     }
4378 
4379     if (0 == pBlackList->count)
4380     {
4381         return NV_OK;
4382     }
4383 
4384     if (NULL == pBlackList->pBlacklistChunks)
4385     {
4386         return NV_ERR_INVALID_STATE;
4387     }
4388 
4389     for (i = 0; i < pBlackList->count; i++)
4390     {
4391         if (pBlackList->pBlacklistChunks[i].bIsValid)
4392         {
4393             // Free the blacklisted page
4394             memdescFree(pBlackList->pBlacklistChunks[i].pMemDesc);
4395 
4396             // Free the memdesc
4397             memdescDestroy(pBlackList->pBlacklistChunks[i].pMemDesc);
4398         }
4399     }
4400 
4401     portMemFree(pBlackList->pBlacklistChunks);
4402 
4403     pBlackList->count            = 0;
4404     pBlackList->pBlacklistChunks = NULL;
4405 
4406     return NV_OK;
4407 }
4408 
4409 NV_STATUS
heapStorePendingBlackList_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 pageAddressesWithEccOn,NvU64 pageAddressWithEccOff)4410 heapStorePendingBlackList_IMPL
4411 (
4412     OBJGPU  *pGpu,
4413     Heap    *pHeap,
4414     NvU64    pageAddressesWithEccOn,
4415     NvU64    pageAddressWithEccOff
4416 )
4417 {
4418     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4419     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
4420     NV_STATUS status = NV_OK;
4421     NvU64  physicalAddress;
4422     NvU64  pageNumber;
4423     BLACKLIST *pBlacklist = &pHeap->blackList;
4424     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
4425         kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
4426 
4427     if (pMemorySystemConfig->bEnabledEccFBPA)
4428     {
4429         physicalAddress = pageAddressesWithEccOn;
4430     }
4431     else
4432     {
4433         physicalAddress = pageAddressWithEccOff;
4434     }
4435 
4436     pageNumber = (physicalAddress >> RM_PAGE_SHIFT);
4437 
4438     // This code is called only when DBE happens, so marking it as type DBE
4439     status = heapAddPageToBlackList(pGpu, pHeap,
4440              DRF_VAL64(_HEAP, _PAGE_OFFLINE, _PAGE_NUMBER, pageNumber),
4441              NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_DBE);
4442     if (NV_OK != status)
4443     {
4444         // No more space in the blacklist
4445         NV_PRINTF(LEVEL_ERROR, "No more space in blacklist, status: %x!\n", status);
4446         return status;
4447     }
4448 
4449     if (memmgrIsPmaInitialized(pMemoryManager))
4450     {
4451         if (heapIsPmaManaged(pGpu, pHeap, physicalAddress, physicalAddress))
4452         {
4453             NV_PRINTF(LEVEL_INFO, "Calling PMA helper function to blacklist page offset: %llx\n", physicalAddress);
4454             status = pmaAddToBlacklistTracking(&pHeap->pmaObject, physicalAddress);
4455             return status;
4456         }
4457         else
4458         {
4459              // blacklisting needs to be done like CBC error recovery
4460              return NV_ERR_RESET_REQUIRED;
4461         }
4462     }
4463     else
4464     {
4465         if (pMemoryManager->bEnableDynamicPageOfflining)
4466         {
4467             // adding a new entry to heap managed blacklist
4468             if (pBlacklist->count == kmemsysGetMaximumBlacklistPages(pGpu, pKernelMemorySystem))
4469             {
4470                 NV_PRINTF(LEVEL_ERROR, "We have blacklisted maximum number of pages possible. returning error \n");
4471                 return NV_ERR_INSUFFICIENT_RESOURCES;
4472             }
4473             portMemSet(&pBlacklist->pBlacklistChunks[pBlacklist->count], 0 , sizeof(BLACKLIST_CHUNK));
4474             pBlacklist->pBlacklistChunks[pBlacklist->count].physOffset = physicalAddress;
4475             pBlacklist->pBlacklistChunks[pBlacklist->count].size = RM_PAGE_SIZE;
4476             pBlacklist->pBlacklistChunks[pBlacklist->count].bPendingRetirement = NV_TRUE;
4477             pBlacklist->count++;
4478         }
4479     }
4480     return status;
4481 }
4482 
4483 //
4484 // This function copies the addresses of pages to be blacklisted from
4485 // pPageNumbers into Heap's internal blackListAddresses structure.
4486 //
4487 NV_STATUS
heapStoreBlackList_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 * pPageNumbersWithEccOn,NvU64 * pPageNumbersWithECcOff,NvU32 maxInputPages)4488 heapStoreBlackList_IMPL
4489 (
4490     OBJGPU *pGpu,
4491     Heap   *pHeap,
4492     NvU64 *pPageNumbersWithEccOn,
4493     NvU64 *pPageNumbersWithECcOff,
4494     NvU32 maxInputPages
4495 )
4496 {
4497     NvU32       i;
4498     NvU64      *pPageNumbers;
4499     NV_STATUS   status  = NV_OK;
4500     const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
4501         kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
4502 
4503     if (pMemorySystemConfig->bEnabledEccFBPA)
4504     {
4505         pPageNumbers = pPageNumbersWithEccOn;
4506     }
4507     else
4508     {
4509         pPageNumbers = pPageNumbersWithECcOff;
4510     }
4511 
4512     for (i = 0; i < maxInputPages; i++)
4513     {
4514         //
4515         // Bug: 2999257
4516         // currently pre-Hopper we have 37b FB PA, whose PFN will be 25b
4517         // From Hopper+ we have 52b PA, whose PFN will be 40b PA and hence
4518         // the macro NV_INFOROM_BLACKLIST_PAGE_NUMBER width of 28b will not be
4519         // sufficient to capture the entire address, this needs to be fixed.
4520         //
4521         status = heapAddPageToBlackList(pGpu, pHeap,
4522                 DRF_VAL64(_HEAP, _PAGE_OFFLINE, _PAGE_NUMBER, pPageNumbers[i]),
4523                 (NvU32)DRF_VAL64(_HEAP, _PAGE_OFFLINE, _TYPE, pPageNumbers[i]));
4524         if (NV_OK != status)
4525         {
4526             // No more space in the blacklist
4527             NV_PRINTF(LEVEL_ERROR, "No more space in blacklist!\n");
4528             return status;
4529         }
4530     }
4531 
4532     return status;
4533 }
4534 
4535 NV_STATUS
heapAddPageToBlackList_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 pageNumber,NvU32 type)4536 heapAddPageToBlackList_IMPL
4537 (
4538     OBJGPU *pGpu,
4539     Heap   *pHeap,
4540     NvU64   pageNumber,
4541     NvU32   type
4542 )
4543 {
4544     KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
4545     NvU16 maximumBlacklistPages = kmemsysGetMaximumBlacklistPages(pGpu, pKernelMemorySystem);
4546     NvU32 index = pHeap->blackListAddresses.count;
4547 
4548     if (index == maximumBlacklistPages)
4549     {
4550         return NV_ERR_INSUFFICIENT_RESOURCES;
4551     }
4552 
4553     if (pHeap->blackListAddresses.data == NULL)
4554     {
4555         NvU64 listSize = sizeof(BLACKLIST_ADDRESS) * maximumBlacklistPages;
4556 
4557         pHeap->blackListAddresses.data = portMemAllocNonPaged(listSize);
4558         if (pHeap->blackListAddresses.data == NULL)
4559         {
4560             return NV_ERR_NO_MEMORY;
4561         }
4562 
4563         portMemSet(pHeap->blackListAddresses.data, 0, listSize);
4564     }
4565 
4566     pHeap->blackListAddresses.data[index].address = (pageNumber << RM_PAGE_SHIFT);
4567     pHeap->blackListAddresses.data[index].type = type;
4568 
4569     pHeap->blackListAddresses.count++;
4570 
4571     NV_PRINTF(LEVEL_INFO, "Added 0x%0llx (blacklist count: %u)\n",
4572               pHeap->blackListAddresses.data[index].address,
4573               pHeap->blackListAddresses.count);
4574 
4575     return NV_OK;
4576 }
4577 
4578 /*!
4579  * @brief: Identify if an FB range is PMA-managed
4580  *
4581  * @param[in] pGpu      OBJGPU pointer
4582  * @param[in] pHeap     Heap pointer
4583  * @param[in] offset    FB block offset
4584  * @param[in] limit     FB block limit
4585  *
4586  * @return NV_TRUE      offset is PMA-managed
4587  *         NV_FALSE     offset is not managed by PMA
4588  */
4589 NvBool
heapIsPmaManaged_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 offset,NvU64 limit)4590 heapIsPmaManaged_IMPL
4591 (
4592     OBJGPU *pGpu,
4593     Heap   *pHeap,
4594     NvU64   offset,
4595     NvU64   limit
4596 )
4597 {
4598     MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4599 
4600     if (memmgrIsPmaInitialized(pMemoryManager))
4601     {
4602         NvU32 i;
4603 
4604         NV_ASSERT(offset <= limit);
4605 
4606         for (i = 0; i < pHeap->pmaObject.regSize; i++)
4607         {
4608             if ((offset >= pHeap->pmaObject.pRegDescriptors[i]->base) &&
4609                 (limit  <= pHeap->pmaObject.pRegDescriptors[i]->limit))
4610             {
4611                 NV_PRINTF(LEVEL_INFO,
4612                           "range %llx..%llx resides in PMA region=%llx..%llx\n",
4613                           offset, limit,
4614                           pHeap->pmaObject.pRegDescriptors[i]->base,
4615                           pHeap->pmaObject.pRegDescriptors[i]->limit);
4616                 return NV_TRUE;
4617             }
4618 #if defined(DEBUG)
4619             // Check for straddling
4620             else if (
4621                 (limit >= pHeap->pmaObject.pRegDescriptors[i]->base) &&
4622                 (offset <= pHeap->pmaObject.pRegDescriptors[i]->limit))
4623             {
4624                 NV_PRINTF(LEVEL_ERROR,
4625                           "range %llx..%llx straddles in PMA region=%llx..%llx\n",
4626                           offset, limit,
4627                           pHeap->pmaObject.pRegDescriptors[i]->base,
4628                           pHeap->pmaObject.pRegDescriptors[i]->limit);
4629             }
4630 #endif  //defined(DEBUG)
4631         }
4632     }
4633 
4634     return(NV_FALSE);
4635 }
4636 
4637 /*!
4638  * @brief Increase the reference count
4639  *
4640  * @param[in] pGpu     OBJGPU pointer
4641  * @param[in] pHeap    Heap pointer
4642  *
4643  * @return Current refcount value
4644  */
4645 NvU32
heapAddRef_IMPL(Heap * pHeap)4646 heapAddRef_IMPL
4647 (
4648     Heap *pHeap
4649 )
4650 {
4651     if (pHeap == NULL)
4652         return 0;
4653 
4654     return portAtomicExIncrementU64(&pHeap->refCount);
4655 }
4656 
4657 /*!
4658  * @brief Increase the reference count
4659  *
4660  * @param[in] pGpu     OBJGPU pointer
4661  * @param[in] pHeap    Heap pointer
4662  *
4663  * @return Current refcount value
4664  */
4665 NvU32
heapRemoveRef_IMPL(Heap * pHeap)4666 heapRemoveRef_IMPL
4667 (
4668     Heap   *pHeap
4669 )
4670 {
4671     NvU64 refCount = 0;
4672 
4673     if (pHeap == NULL)
4674         return 0;
4675 
4676     refCount = portAtomicExDecrementU64(&pHeap->refCount);
4677     if (refCount == 0)
4678     {
4679         objDelete(pHeap);
4680     }
4681 
4682     return refCount;
4683 }
4684 
4685 /*!
4686  * @brief Adjust the heap size
4687  *
4688  * @param[in] pHeap    Heap pointer
4689  * @param[in] resizeBy NVS64 resizeBy value
4690  */
4691 
heapResize_IMPL(Heap * pHeap,NvS64 resizeBy)4692 NV_STATUS heapResize_IMPL
4693 (
4694     Heap *pHeap,
4695     NvS64 resizeBy
4696 )
4697 {
4698     MEM_BLOCK  *pBlockLast;
4699     MEM_BLOCK  *pBlockNew;
4700     NV_STATUS   status  = NV_OK;
4701     OBJGPU     *pGpu    = ENG_GET_GPU(pHeap);
4702 
4703     NV_ASSERT_OR_RETURN(pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR, NV_ERR_NOT_SUPPORTED);
4704 
4705     // Free all blacklisted pages
4706     if ((pHeap->blackListAddresses.count != 0) &&
4707          pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) &&
4708          gpuCheckPageRetirementSupport_HAL(pGpu))
4709     {
4710         heapFreeBlackListedPages(pGpu, pHeap);
4711     }
4712 
4713     // Go to last block if the heap w.r.t. the start address
4714     pBlockLast = pHeap->pBlockList;
4715     while (pBlockLast->next != pHeap->pBlockList)
4716         pBlockLast = pBlockLast->next;
4717 
4718     if (resizeBy < 0) // Shrink the allocation
4719     {
4720         NvS64 newSize;
4721 
4722         NV_ASSERT_OR_RETURN(pBlockLast->owner == NVOS32_BLOCK_TYPE_FREE, NV_ERR_NO_MEMORY);
4723         NV_CHECK_OR_RETURN(LEVEL_ERROR, portSafeAddS64(pBlockLast->end - pBlockLast->begin, resizeBy, &newSize) &&
4724                                         (newSize > 0), NV_ERR_INVALID_LIMIT);
4725         pBlockLast->end += resizeBy;
4726     }
4727     else // Grow the allocation
4728     {
4729         if (pBlockLast->owner == NVOS32_BLOCK_TYPE_FREE)
4730         {
4731             // Found a free block at the end Just resize it.
4732             pBlockLast->end += resizeBy;
4733         }
4734         else
4735         {
4736             // Could not find a free block at the end. Add a new free block.
4737             pBlockNew = portMemAllocNonPaged(sizeof(MEM_BLOCK));
4738             if (pBlockNew != NULL)
4739             {
4740 
4741                 portMemSet(pBlockNew, 0, sizeof(MEM_BLOCK));
4742 
4743                 pBlockNew->owner    = NVOS32_BLOCK_TYPE_FREE;
4744                 pBlockNew->refCount = 1;
4745 
4746                 // Set block boundaries
4747                 pBlockNew->begin    = pBlockLast->end + 1;
4748                 pBlockNew->end      = pBlockLast->end + resizeBy;
4749 
4750                 if (pHeap->pFreeBlockList == NULL)
4751                     pHeap->pFreeBlockList = pBlockNew;
4752 
4753                 // Add the block in the free blocks list
4754                 pBlockNew->u1.nextFree              = pHeap->pFreeBlockList;
4755                 pBlockNew->u0.prevFree              = pHeap->pFreeBlockList->u0.prevFree;
4756                 pBlockNew->u1.nextFree->u0.prevFree = pBlockNew;
4757                 pBlockNew->u0.prevFree->u1.nextFree = pBlockNew;
4758 
4759                 // Add the block in the blocks list
4760                 pBlockNew->next       = pBlockLast->next;
4761                 pBlockNew->prev       = pBlockLast;
4762                 pBlockNew->next->prev = pBlockNew;
4763                 pBlockNew->prev->next = pBlockNew;
4764 
4765                 if ((status = _heapUpdate(pHeap, pBlockNew, BLOCK_ADD)) != NV_OK)
4766                 {
4767                     NV_PRINTF(LEVEL_ERROR,
4768                               "_heapUpdate failed to _ADD block\n");
4769 
4770                     if (pHeap->pFreeBlockList == pBlockNew) // There was no free block in the heap.
4771                         pHeap->pFreeBlockList = NULL;       // We had added this one.
4772                     portMemFree(pBlockNew);
4773                 }
4774                 else
4775                 {
4776                     pHeap->numBlocks++;
4777                 }
4778             }
4779         }
4780     }
4781 
4782     if (status == NV_OK)
4783     {
4784         pHeap->total += resizeBy;
4785         pHeap->free  += resizeBy;
4786 
4787         status = memmgrGetBlackListPagesForHeap_HAL(pGpu, GPU_GET_MEMORY_MANAGER(pGpu), pHeap);
4788         if (status != NV_OK)
4789         {
4790             NV_PRINTF(LEVEL_INFO,
4791                       "Failed to read blackList pages (0x%x).\n",
4792                       status);
4793         }
4794 
4795         heapFilterBlackListPages(pHeap, pHeap->base, pHeap->total);
4796 
4797         if (pHeap->blackListAddresses.count != 0)
4798         {
4799             status = heapBlackListPages(pGpu, pHeap);
4800 
4801             if (status != NV_OK)
4802             {
4803                 NV_PRINTF(LEVEL_WARNING,
4804                           "Error 0x%x creating blacklist\n",
4805                           status);
4806             }
4807         }
4808     }
4809     return status;
4810 }
4811