1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*!
25 * @file
26 * @brief Standard local frame buffer allocation and management routines
27 */
28
29 #include "os/os.h"
30 #include "gpu/gpu.h"
31 #include "gpu/mem_mgr/mem_mgr.h"
32 #include "gpu/mem_mgr/heap.h"
33 #include "gpu/mem_sys/kern_mem_sys.h"
34 #include "mem_mgr/video_mem.h"
35 #include "mem_mgr/vaspace.h"
36 #include "mem_mgr/system_mem.h"
37 #include "gpu/mem_mgr/mem_utils.h"
38 #include "gpu/mem_mgr/virt_mem_allocator.h"
39 #include "gpu/mem_mgr/mem_desc.h"
40 #include "gpu_mgr/gpu_mgr.h"
41 #include "core/locks.h"
42 #include "class/cl0040.h" // NV01_MEMORY_LOCAL_USER
43 #include "vgpu/rpc.h"
44 #include "vgpu/vgpu_util.h"
45 #include "gpu/mmu/kern_gmmu.h"
46 #include "virtualization/hypervisor/hypervisor.h"
47 #include "gpu/device/device.h"
48 #include "kernel/gpu/intr/intr.h"
49 #include "platform/sli/sli.h"
50
51 typedef enum
52 {
53 BLOCK_ADD,
54 BLOCK_REMOVE,
55 BLOCK_SIZE_CHANGED,
56 BLOCK_FREE_STATE_CHANGED,
57 } BlockAction;
58
59 //
60 // Statics
61 //
62 static NV_STATUS _heapBlockFree(OBJGPU *, Heap *, NvHandle, NvHandle, MEM_BLOCK *);
63 static void _heapSetTexturePlacement(Heap *, NvU32, NvU32, NvBool*,
64 NvU32*, NvU8*);
65 static NV_STATUS _heapGetMaxFree(Heap *, NvU64 *, NvU64 *);
66 static NV_STATUS _heapGetBankPlacement(OBJGPU *, Heap *, NvU32,
67 NvU32 *, NvU32, NvU32, NvU32 *);
68 static MEM_BLOCK *_heapFindAlignedBlockWithOwner(OBJGPU *, Heap *, NvU32,
69 NvU64/* aligned*/);
70 static NV_STATUS _heapProcessFreeBlock(OBJGPU *, MEM_BLOCK *, MEM_BLOCK **,
71 MEM_BLOCK **, Heap *,
72 MEMORY_ALLOCATION_REQUEST *,
73 NvHandle, OBJHEAP_ALLOC_DATA *,
74 FB_ALLOC_INFO *, NvU64, NvU64 *);
75 static void _heapAddBlockToNoncontigList(Heap *, MEM_BLOCK *);
76 static void _heapRemoveBlockFromNoncontigList(Heap *, MEM_BLOCK *);
77 static NV_STATUS _heapFindBlockByOffset(OBJGPU *, Heap *, NvU32,
78 MEMORY_DESCRIPTOR *, NvU64,
79 MEM_BLOCK **);
80 static NV_STATUS _heapAllocNoncontig(OBJGPU *, NvHandle, Heap *,
81 MEMORY_ALLOCATION_REQUEST *, NvHandle,
82 OBJHEAP_ALLOC_DATA *, FB_ALLOC_INFO *,
83 NvU32, NvU64, NvU64 *, MEMORY_DESCRIPTOR *,
84 HWRESOURCE_INFO **);
85 static NV_STATUS _heapUpdate(Heap *, MEM_BLOCK *, BlockAction);
86 static void _heapAdjustFree(Heap *pHeap, NvS64 blockSize, NvBool internalHeap);
87 static void _heapBlacklistChunksInFreeBlocks(OBJGPU *, Heap *);
88
89 #ifdef DEBUG
90
91 /****************************************************************************/
92 /* */
93 /* DEBUG support! */
94 /* */
95 /****************************************************************************/
96
97 NvU32 dbgDumpHeap = 0;
98 NvU32 dbgReverseDumpHeap = 0;
99
100 static void _heapDump(Heap *);
101 static void _heapValidate(Heap *);
102
103 #define HEAP_VALIDATE(h) {_heapValidate(h);if(dbgDumpHeap)_heapDump(h);}
104
ConvertOwnerToString(NvU32 owner,char * string)105 static void ConvertOwnerToString(NvU32 owner, char *string)
106 {
107 int i;
108 string[0] = (unsigned char)((owner >> 24));
109 string[1] = (unsigned char)((owner >> 16) & 0xFF);
110 string[2] = (unsigned char)((owner >> 8) & 0xFF);
111 string[3] = (unsigned char)((owner ) & 0xFF);
112 string[4] = 0;
113 for (i=0 ;i<4; i++) {
114 // Assuming ASCII these should be "safe" printable characters.
115 if ((string[i] < ' ') || (string[i] > 0x7E)) {
116 string[i] = '?';
117 }
118 }
119 }
120
_heapDump(Heap * pHeap)121 static void _heapDump
122 (
123 Heap *pHeap
124 )
125 {
126 NvU64 free;
127 MEM_BLOCK *pBlock;
128 char ownerString[5];
129
130 if (!pHeap) return;
131
132 NV_PRINTF(LEVEL_INFO, "Heap dump. Size = 0x%08llx\n", pHeap->total);
133 NV_PRINTF(LEVEL_INFO, " Free = 0x%08llx\n", pHeap->free);
134 NV_PRINTF(LEVEL_INFO, " Reserved = 0x%08llx\n", pHeap->reserved);
135 NV_PRINTF(LEVEL_INFO,
136 "=================================================================\n");
137 NV_PRINTF(LEVEL_INFO,
138 "\t\t Begin End Size \t Type ResId Owner"
139 " \"owns\"\n");
140 NV_PRINTF(LEVEL_INFO, "Block List %s\n",
141 dbgReverseDumpHeap ? "Reverse" : "Forward");
142 pBlock = pHeap->pBlockList;
143 do
144 {
145 if ( dbgReverseDumpHeap )
146 pBlock = pBlock->prev;
147
148 NV_PRINTF(LEVEL_INFO, "\t\t0x%08llx 0x%08llx 0x%08llx", pBlock->begin,
149 pBlock->end, 1 + (pBlock->end - pBlock->begin));
150
151 if (pBlock->owner == NVOS32_BLOCK_TYPE_FREE) {
152 NV_PRINTF_EX(NV_PRINTF_MODULE, LEVEL_INFO, "\tFREE\n");
153 }
154 else
155 {
156 ConvertOwnerToString(pBlock->owner, ownerString);
157 NV_PRINTF_EX(NV_PRINTF_MODULE, LEVEL_INFO,
158 "\t0x%04x 0x%08x \"%s\"\n", pBlock->u0.type,
159 pBlock->owner, ownerString);
160 }
161
162 if ( !dbgReverseDumpHeap )
163 pBlock = pBlock->next;
164 } while (pBlock != pHeap->pBlockList);
165
166 NV_PRINTF(LEVEL_INFO, "FREE Block List %s\n",
167 dbgReverseDumpHeap ? "Reverse" : "Forward");
168 free = 0;
169 pBlock = pHeap->pFreeBlockList;
170 if (pBlock)
171 do
172 {
173 if ( dbgReverseDumpHeap )
174 pBlock = pBlock->u0.prevFree;
175
176 NV_PRINTF(LEVEL_INFO, "\t\t0x%08llx 0x%08llx 0x%08llx\tFREE\n",
177 pBlock->begin, pBlock->end,
178 1 + (pBlock->end - pBlock->begin));
179
180 free += pBlock->end - pBlock->begin + 1;
181
182 if ( !dbgReverseDumpHeap )
183 pBlock = pBlock->u1.nextFree;
184 } while (pBlock != pHeap->pFreeBlockList);
185
186 NV_PRINTF(LEVEL_INFO, "\tCalculated free count = 0x%08llx\n", free);
187 }
188
_heapValidate(Heap * pHeap)189 static void _heapValidate
190 (
191 Heap *pHeap
192 )
193 {
194 MEM_BLOCK *pBlock, *pBlockFree;
195 NvU64 free, used;
196
197 if (!pHeap) return;
198
199 /*
200 * Scan the blocks and check for consistency.
201 */
202 free = 0;
203 used = 0;
204 pBlock = pHeap->pBlockList;
205 pBlockFree = pHeap->pFreeBlockList;
206 do
207 {
208 if (pBlock->owner == NVOS32_BLOCK_TYPE_FREE)
209 {
210 if (!pBlockFree)
211 {
212 NV_PRINTF(LEVEL_ERROR,
213 "Invalid free list with free blocks found.\n");
214 _heapDump(pHeap);
215 DBG_BREAKPOINT();
216 }
217 free += pBlock->end - pBlock->begin + 1;
218 if (pBlock != pBlockFree)
219 {
220 NV_PRINTF(LEVEL_ERROR,
221 "Free list not consistent with block list.\n");
222 _heapDump(pHeap);
223 DBG_BREAKPOINT();
224 }
225 pBlockFree = pBlockFree->u1.nextFree;
226 }
227 else
228 {
229 used += pBlock->end - pBlock->begin + 1;
230 }
231 if (pBlock->next != pHeap->pBlockList)
232 {
233 if (pBlock->end != pBlock->next->begin - 1)
234 {
235 NV_PRINTF(LEVEL_ERROR,
236 "Hole between blocks at offset = 0x%llx\n",
237 pBlock->end);
238 _heapDump(pHeap);
239 DBG_BREAKPOINT();
240 }
241 }
242 else
243 {
244 if (pBlock->end != pHeap->base + pHeap->total - 1)
245 {
246 NV_PRINTF(LEVEL_ERROR, "Last block doesn't end at top.\n");
247 _heapDump(pHeap);
248 DBG_BREAKPOINT();
249 }
250 if (pBlock->next->begin != pHeap->base)
251 {
252 NV_PRINTF(LEVEL_ERROR,
253 "First block doesn't start at bottom.\n");
254 _heapDump(pHeap);
255 DBG_BREAKPOINT();
256 }
257 }
258 if (pBlock->end < pBlock->begin)
259 {
260 NV_PRINTF(LEVEL_ERROR,
261 "Validate: Invalid block begin = 0x%08llx\n",
262 pBlock->begin);
263 NV_PRINTF(LEVEL_ERROR,
264 " end = 0x%08llx\n",
265 pBlock->end);
266 _heapDump(pHeap);
267 DBG_BREAKPOINT();
268 }
269 pBlock = pBlock->next;
270 } while (pBlock != pHeap->pBlockList);
271 if (free != pHeap->free)
272 {
273 NV_PRINTF(LEVEL_ERROR,
274 "Calculated free count (%llx) not consistent with heap free count (%llx).\n",
275 free, pHeap->free);
276 _heapDump(pHeap);
277 DBG_BREAKPOINT();
278 }
279 if ((used + free) > pHeap->total)
280 {
281 NV_PRINTF(LEVEL_ERROR,
282 "Calculated used count (%llx) not consistent with heap size (%llx).\n",
283 used + free, pHeap->total);
284 _heapDump(pHeap);
285 DBG_BREAKPOINT();
286 }
287 }
288 #else
289 #define HEAP_VALIDATE(h)
290 #endif // DEBUG
291
292
293 /****************************************************************************/
294 /* */
295 /* Heap Manager */
296 /* */
297 /****************************************************************************/
298
heapReserveRegion(MemoryManager * pMemoryManager,Heap * pHeap,NvU64 offset,NvU64 size,MEMORY_DESCRIPTOR ** ppMemDesc,NvBool isRmRsvdRegion,NvBool bProtected)299 static NV_STATUS heapReserveRegion
300 (
301 MemoryManager *pMemoryManager,
302 Heap *pHeap,
303 NvU64 offset,
304 NvU64 size,
305 MEMORY_DESCRIPTOR **ppMemDesc,
306 NvBool isRmRsvdRegion,
307 NvBool bProtected
308 )
309 {
310 NV_STATUS rmStatus = NV_OK;
311 OBJGPU *pGpu = ENG_GET_GPU(pMemoryManager);
312 NvU64 heapSize = (pHeap->base + pHeap->total);
313 FB_ALLOC_INFO *pFbAllocInfo = NULL;
314 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
315
316 MEMORY_ALLOCATION_REQUEST allocRequest = {0};
317 NV_MEMORY_ALLOCATION_PARAMS allocData = {0};
318
319 NvU64 align = 0;
320 NvU32 height = 1;
321 NvU32 pitch = 1;
322 NvU32 attr = DRF_DEF(OS32, _ATTR, _PAGE_SIZE, _4KB) |
323 DRF_DEF(OS32, _ATTR, _PHYSICALITY, _CONTIGUOUS);
324 NvU32 attr2 = DRF_DEF(OS32, _ATTR2, _INTERNAL, _YES);
325
326 NV_ASSERT_OR_RETURN((offset < heapSize), NV_OK);
327
328 allocRequest.pUserParams = &allocData;
329
330 allocData.owner = ((isRmRsvdRegion) ? HEAP_OWNER_RM_RESERVED_REGION : HEAP_OWNER_PMA_RESERVED_REGION);
331 allocData.height = height;
332 allocData.type = ((isRmRsvdRegion) ? NVOS32_TYPE_RESERVED : NVOS32_TYPE_PMA);
333 allocData.flags = NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE;
334 allocData.attr = attr;
335 allocData.attr2 = attr2;
336 allocData.pitch = pitch;
337 allocData.alignment = align;
338 allocData.size = NV_MIN(size, (heapSize - offset));
339 allocData.offset = offset;
340
341 if (bProtected)
342 allocData.flags |= NVOS32_ALLOC_FLAGS_PROTECTED;
343
344 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
345 NV_ASSERT_TRUE_OR_GOTO(rmStatus, pFbAllocInfo != NULL, NV_ERR_NO_MEMORY, done);
346
347 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
348 NV_ASSERT_TRUE_OR_GOTO(rmStatus, pFbAllocPageFormat != NULL, NV_ERR_NO_MEMORY, done);
349
350 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
351 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
352 pFbAllocInfo->pageFormat = pFbAllocPageFormat;
353
354 memUtilsInitFBAllocInfo(&allocData, pFbAllocInfo, 0, 0);
355
356 NV_ASSERT_OK_OR_GOTO(rmStatus,
357 memmgrAllocResources(pGpu, pMemoryManager, &allocRequest, pFbAllocInfo),
358 done);
359
360 NV_ASSERT_OK_OR_GOTO(rmStatus,
361 vidmemAllocResources(pGpu, pMemoryManager, &allocRequest, pFbAllocInfo, pHeap),
362 done);
363
364 NV_PRINTF(LEVEL_INFO, "Reserved heap for %s %llx..%llx\n",
365 ((isRmRsvdRegion) ? "RM" : "PMA"), offset, (offset+size-1));
366
367 *ppMemDesc = allocRequest.pMemDesc;
368
369 // Account for reserved size removed from the total address space size
370 if (isRmRsvdRegion)
371 {
372 pHeap->reserved += allocData.size;
373 }
374
375 done:
376 portMemFree(pFbAllocPageFormat);
377 portMemFree(pFbAllocInfo);
378
379 return rmStatus;
380 }
381
382 /*!
383 * @brief Initializes a heap object
384 *
385 * @param[in] pFb FB object ptr
386 * @param[in/out] pHeap HEAP object ptr
387 * @param[in] base Base for this heap
388 * @param[in] size Size of this heap
389 * @param[in] heapType Heap type (Global or PMSA)
390 * @param[in] pPtr A generic pointer which will be typecasted based on heapType
391 */
heapInitInternal_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 base,NvU64 size,HEAP_TYPE_INTERNAL heapType,void * pPtr)392 NV_STATUS heapInitInternal_IMPL
393 (
394 OBJGPU *pGpu,
395 Heap *pHeap,
396 NvU64 base,
397 NvU64 size,
398 HEAP_TYPE_INTERNAL heapType,
399 void *pPtr
400 )
401 {
402 MEM_BLOCK *pBlock;
403 NvU32 i;
404 NV_STATUS status;
405 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
406 NvU32 typeDataSize = 0;
407 FB_REGION_DESCRIPTOR *pFbRegion;
408 MEMORY_DESCRIPTOR *pPmsaMemDesc = NULL;
409
410 //
411 // Simply create a free heap.
412 //
413 NV_PRINTF(LEVEL_INFO,
414 "Heap Manager: HEAP ABOUT TO BE CREATED. (Base: 0x%llx Size: 0x%llx)\n",
415 base, size);
416
417 pHeap->base = base;
418 pHeap->total = size;
419 pHeap->free = size;
420 pHeap->reserved = 0;
421 pHeap->heapType = heapType;
422
423 pHeap->peakInternalUsage = 0;
424 pHeap->peakExternalUsage = 0;
425 pHeap->currInternalUsage = 0;
426 pHeap->currExternalUsage = 0;
427
428
429 // Set the flags based on HEAP type
430 switch (heapType)
431 {
432 case HEAP_TYPE_RM_GLOBAL:
433 pHeap->bHasFbRegions = NV_TRUE;
434 break;
435 case HEAP_TYPE_PHYS_MEM_SUBALLOCATOR:
436 NV_ASSERT(pPtr != NULL);
437
438 pHeap->bHasFbRegions = NV_FALSE;
439 typeDataSize = sizeof(PHYS_MEM_SUBALLOCATOR_DATA);
440 pPmsaMemDesc = ((PHYS_MEM_SUBALLOCATOR_DATA *)pPtr)->pMemDesc;
441 break;
442 case HEAP_TYPE_PARTITION_LOCAL:
443 pHeap->bHasFbRegions = NV_TRUE;
444 break;
445 default:
446 return NV_ERR_INVALID_ARGUMENT;
447 }
448
449 pHeap->pHeapTypeSpecificData = NULL;
450 if ((pPtr != NULL) && (typeDataSize > 0))
451 {
452 pHeap->pHeapTypeSpecificData = portMemAllocNonPaged(typeDataSize);
453 if (pHeap->pHeapTypeSpecificData == NULL)
454 {
455 return NV_ERR_OPERATING_SYSTEM;
456 }
457 NV_ASSERT(pHeap->pHeapTypeSpecificData != NULL);
458 portMemCopy(pHeap->pHeapTypeSpecificData, typeDataSize, pPtr, typeDataSize);
459 }
460
461 pBlock = portMemAllocNonPaged(sizeof(MEM_BLOCK));
462 if (pBlock == NULL)
463 {
464 return NV_ERR_OPERATING_SYSTEM;
465 }
466 portMemSet(pBlock, 0, sizeof(MEM_BLOCK));
467
468 pBlock->owner = NVOS32_BLOCK_TYPE_FREE;
469 pBlock->textureId= 0;
470 pBlock->begin = base;
471 pBlock->align = 0;
472 pBlock->alignPad = 0;
473 pBlock->end = base + size - 1;
474 pBlock->u0.prevFree = pBlock;
475 pBlock->u1.nextFree = pBlock;
476 pBlock->next = pBlock;
477 pBlock->prev = pBlock;
478 pBlock->format = 0;
479
480 pHeap->pBlockList = pBlock;
481 pHeap->pFreeBlockList = pBlock;
482 pHeap->memHandle = 0xcafe0000;
483 pHeap->numBlocks = 1;
484 pHeap->pBlockTree = NULL;
485
486 //
487 // Set the client id as invalid since there isn't one that exists
488 // Initialize the client texture data structure
489 //
490 portMemSet(pHeap->textureData, 0,
491 sizeof(TEX_INFO) * MAX_TEXTURE_CLIENT_IDS);
492
493 //
494 // Call into the hal to get bank placement policy. Note this will vary chip to chip, but let's allow the HAL to tell us
495 // the implementation details.
496 //
497 status = memmgrGetBankPlacementData_HAL(pGpu, pMemoryManager, pHeap->placementStrategy);
498 if (status != NV_OK)
499 {
500 //
501 // ooops, can't get HAL version of where to place things - let's default to something
502 //
503 NV_PRINTF(LEVEL_ERROR,
504 "Heap Manager unable to get bank placement policy from HAL.\n");
505 NV_PRINTF(LEVEL_ERROR,
506 "Heap Manager defaulting to BAD placement policy.\n");
507
508 pHeap->placementStrategy[BANK_PLACEMENT_IMAGE] = ((0)
509 | BANK_MEM_GROW_UP
510 | MEM_GROW_UP
511 | 0xFFFFFF00);
512 pHeap->placementStrategy[BANK_PLACEMENT_DEPTH] = ((0)
513 | BANK_MEM_GROW_DOWN
514 | MEM_GROW_DOWN
515 | 0xFFFFFF00);
516 pHeap->placementStrategy[BANK_PLACEMENT_TEX_OVERLAY_FONT] = ((0)
517 | BANK_MEM_GROW_DOWN
518 | MEM_GROW_DOWN
519 | 0xFFFFFF00);
520 pHeap->placementStrategy[BANK_PLACEMENT_OTHER] = ((0)
521 | BANK_MEM_GROW_DOWN
522 | MEM_GROW_DOWN
523 | 0xFFFFFF00);
524 status = NV_OK;
525 }
526
527 // Setup noncontig list
528 pHeap->pNoncontigFreeBlockList = NULL;
529
530 // insert first block into rb-tree
531 if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_ADD))
532 {
533 return NV_ERR_INVALID_STATE;
534 }
535
536 //
537 // If there are FB regions defined, check to see if any of them are
538 // marked reserved. Tag those regions as reserved in the heap.
539 //
540 if ((pMemoryManager->Ram.numFBRegions > 0) && (pHeap->bHasFbRegions))
541 {
542 NvBool bConsoleFbRegionContentPreserved;
543 FB_REGION_DESCRIPTOR consoleFbRegion;
544 portMemSet(&consoleFbRegion, 0, sizeof(consoleFbRegion));
545
546 if (heapType != HEAP_TYPE_PARTITION_LOCAL)
547 {
548 //
549 // If a region of FB is actively being used for console display memory
550 // on this GPU, mark it reserved in-place.
551 //
552 memmgrReserveConsoleRegion_HAL(pGpu, pMemoryManager, &consoleFbRegion);
553 status = memmgrAllocateConsoleRegion_HAL(pGpu, pMemoryManager, &consoleFbRegion);
554 if (status != NV_OK)
555 {
556 NV_PRINTF(LEVEL_WARNING, "Squashing the error status after failing to allocate console region, status: %x\n",
557 status);
558 status = NV_OK;
559 }
560 }
561
562 //
563 // Define PMA-managed regions
564 // This will be moved to memmgr once we refactor SMC partitions
565 //
566 if (memmgrIsPmaEnabled(pMemoryManager) &&
567 memmgrIsPmaSupportedOnPlatform(pMemoryManager) &&
568 (heapType != HEAP_TYPE_PARTITION_LOCAL))
569 {
570 memmgrSetPmaInitialized(pMemoryManager, NV_TRUE);
571 memmgrRegionSetupForPma(pGpu, pMemoryManager);
572 }
573
574 bConsoleFbRegionContentPreserved = NV_FALSE;
575
576 if (heapType != HEAP_TYPE_PARTITION_LOCAL)
577 {
578 // For GSP RM, all PMA candidate regions are given to CPU RM for its use
579 if (RMCFG_FEATURE_PLATFORM_GSP)
580 {
581 memmgrRegionSetupForPma(pGpu, pMemoryManager);
582 }
583
584 for (i = 0; i < pMemoryManager->Ram.numFBRegions; i++)
585 {
586 pFbRegion = &pMemoryManager->Ram.fbRegion[i];
587
588 // If the region is marked reserved, reserve it in the heap
589 if (pFbRegion->bRsvdRegion ||
590 ((memmgrIsPmaInitialized(pMemoryManager) ||
591 RMCFG_FEATURE_PLATFORM_GSP) &&
592 !pFbRegion->bInternalHeap))
593 {
594 NvU64 fbRegionBase;
595 MEMORY_DESCRIPTOR *pMemDesc = NULL;
596
597 // Skip regions which are outside the heap boundaries
598 if (pFbRegion->base < base && pFbRegion->limit < base)
599 {
600 continue;
601 }
602
603 // TODO: Remove SRIOV check and enable on baremetal as well.
604 if (IS_VIRTUAL_WITH_SRIOV(pGpu) && (pFbRegion->base >= (base + size)))
605 {
606 continue;
607 }
608
609 // Adjust base of reserved region on heap
610 fbRegionBase = NV_MAX(base, pFbRegion->base);
611
612 NV_PRINTF(LEVEL_INFO, "Reserve at %llx of size %llx\n",
613 fbRegionBase, (pFbRegion->limit - fbRegionBase + 1));
614
615 status = heapReserveRegion(
616 pMemoryManager,
617 pHeap,
618 fbRegionBase,
619 (pFbRegion->limit - fbRegionBase + 1),
620 &pMemDesc,
621 pFbRegion->bRsvdRegion,
622 pFbRegion->bProtected);
623
624 if (status != NV_OK || pMemDesc == NULL)
625 {
626 NV_PRINTF(LEVEL_ERROR, "failed to reserve %llx..%llx\n",
627 pFbRegion->base, pFbRegion->limit);
628 return status;
629 }
630
631 if ((pMemoryManager->Ram.ReservedConsoleDispMemSize > 0) &&
632 (pFbRegion->base == consoleFbRegion.base) && (pFbRegion->limit == consoleFbRegion.limit))
633 {
634 memdescSetFlag(pMemDesc, MEMDESC_FLAGS_LOST_ON_SUSPEND, NV_FALSE);
635 memdescSetFlag(pMemDesc, MEMDESC_FLAGS_PRESERVE_CONTENT_ON_SUSPEND, NV_TRUE);
636
637 bConsoleFbRegionContentPreserved = NV_TRUE;
638 }
639 }
640 }
641
642 if ((pMemoryManager->Ram.ReservedConsoleDispMemSize > 0) &&
643 !bConsoleFbRegionContentPreserved)
644 {
645 NV_PRINTF(LEVEL_ERROR,
646 "failed to preserve content of console display memory\n");
647 }
648 }
649
650 #ifdef DEBUG
651 _heapDump(pHeap);
652 #endif
653 } //if ((pMemoryManager->Ram.numFBRegions > 0) && (pHeap->bHasFbRegions))
654
655 // Hand over all the memory of partition-heap to partition-PMA
656 if ((heapType == HEAP_TYPE_PARTITION_LOCAL) &&
657 (memmgrIsPmaInitialized(pMemoryManager)))
658 {
659 MEMORY_DESCRIPTOR *pMemDesc = NULL;
660 NvBool bProtected = NV_FALSE;
661
662 bProtected = gpuIsCCFeatureEnabled(pGpu);
663 status = heapReserveRegion(
664 pMemoryManager,
665 pHeap,
666 base,
667 size,
668 &pMemDesc,
669 NV_FALSE,
670 bProtected);
671
672 if (status != NV_OK || pMemDesc == NULL)
673 {
674 NV_PRINTF(LEVEL_ERROR, "failed to reserve %llx..%llx\n", base,
675 base + size - 1);
676
677 return status;
678 }
679 }
680
681 // If PHYS_MEM_SUBALLOCATOR, increase its refCount
682 if ((status == NV_OK) && (pPmsaMemDesc != NULL))
683 {
684 memdescAddRef(pPmsaMemDesc);
685 }
686
687 return (status);
688 }
689
690 void
heapDestruct_IMPL(Heap * pHeap)691 heapDestruct_IMPL
692 (
693 Heap *pHeap
694 )
695 {
696 MEM_BLOCK *pBlock, *pBlockFirst, *pBlockNext;
697 OBJGPU *pGpu = ENG_GET_GPU(pHeap);
698 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
699 NvBool headptr_updated;
700 MEMORY_DESCRIPTOR *pPmsaMemDesc = NULL;
701
702 NV_PRINTF(LEVEL_INFO, "Heap Manager: HEAP ABOUT TO BE DESTROYED.\n");
703
704 #ifdef DEBUG
705 _heapDump(pHeap);
706 #endif
707
708 // Free all blacklisted pages
709 if (pHeap->blackListAddresses.count != 0)
710 {
711 heapFreeBlackListedPages(pGpu, pHeap);
712 }
713
714 //
715 // Free all allocated blocks, but preserve primary surfaces.
716 // If the head of our list changes, restart the search, since our terminating
717 // block pointer may not be in the list anymore.
718 //
719 do
720 {
721 pBlock = pBlockFirst = pHeap->pBlockList;
722 if (pBlock == NULL)
723 {
724 break;
725 }
726
727 headptr_updated = NV_FALSE;
728
729 do
730 {
731 pBlockNext = pBlock->next;
732
733 // If we are freeing the reserved region created at heapInit, free the memory descriptor too
734 if ((pBlock->allocedMemDesc) && ((pBlock->owner == HEAP_OWNER_RM_RESERVED_REGION) ||
735 (pBlock->owner == HEAP_OWNER_PMA_RESERVED_REGION)))
736 {
737 memdescDestroy(pBlock->pMemDesc);
738 pBlock->pMemDesc = NULL;
739 pBlock->allocedMemDesc = NV_FALSE;
740 }
741
742 _heapBlockFree(pGpu, pHeap, NV01_NULL_OBJECT, NV01_NULL_OBJECT, pBlock);
743
744 // restart scanning the list, if the heap->pBlockList changed
745 if (pBlockFirst != pHeap->pBlockList)
746 {
747 headptr_updated = NV_TRUE;
748 break;
749 }
750
751 pBlock = pBlockNext;
752
753 } while (pBlock != pHeap->pBlockList);
754
755 } while (headptr_updated);
756
757 //
758 // Now that the console region is no longer reserved, free the console
759 // memdesc.
760 //
761 if (pHeap->heapType != HEAP_TYPE_PARTITION_LOCAL)
762 memmgrReleaseConsoleRegion(pGpu, pMemoryManager);
763
764 //
765 // Free the heap structure, if we freed everything
766 // (the first block represents the entire free space of the heap).
767 // this is only done if the "internal" interface is used.
768 // heapDestroy is an exported function now to user/display driver land,
769 // and we don't want the heap structures being freed unless we've been
770 // called from RM-land during a STATE_DESTROY
771 //
772 if ((pHeap->pBlockList != NULL) &&
773 (pHeap->pBlockList->begin == pHeap->base) &&
774 (pHeap->pBlockList->end == (pHeap->base + pHeap->total - 1)))
775 {
776 portMemFree(pHeap->pBlockList);
777 }
778
779 // Free the type specific data allocated
780 if (pHeap->pHeapTypeSpecificData != NULL)
781 {
782 if (pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)
783 {
784 pPmsaMemDesc = ((PHYS_MEM_SUBALLOCATOR_DATA *)(pHeap->pHeapTypeSpecificData))->pMemDesc;
785 memdescDestroy(pPmsaMemDesc);
786 }
787 portMemFree(pHeap->pHeapTypeSpecificData);
788 pHeap->pHeapTypeSpecificData = NULL;
789 }
790
791 if ((pHeap->bHasFbRegions) && (memmgrIsPmaInitialized(pMemoryManager)))
792 {
793 if (pHeap->heapType != HEAP_TYPE_PARTITION_LOCAL)
794 memmgrSetPmaInitialized(pMemoryManager, NV_FALSE);
795
796 pmaDestroy(&pHeap->pmaObject);
797 portMemSet(&pHeap->pmaObject, 0, sizeof(pHeap->pmaObject));
798 }
799 }
800
_heapGetBankPlacement(OBJGPU * pGpu,Heap * pHeap,NvU32 owner,NvU32 * flags,NvU32 type,NvU32 bank,NvU32 * placement)801 static NV_STATUS _heapGetBankPlacement
802 (
803 OBJGPU *pGpu,
804 Heap *pHeap,
805 NvU32 owner,
806 NvU32 *flags,
807 NvU32 type,
808 NvU32 bank,
809 NvU32 *placement
810 )
811 {
812 NvU32 bankPlacement, i;
813
814 if (type != NVOS32_TYPE_PRIMARY)
815 {
816 NvU32 bankPlacementType;
817
818 // what kind of allocation is it?
819 switch (type)
820 {
821 case NVOS32_TYPE_IMAGE:
822 case NVOS32_TYPE_NOTIFIER:
823 bankPlacementType = BANK_PLACEMENT_IMAGE;
824 break;
825 case NVOS32_TYPE_DEPTH:
826 case NVOS32_TYPE_ZCULL:
827 case NVOS32_TYPE_STENCIL:
828 bankPlacementType = BANK_PLACEMENT_DEPTH;
829 break;
830 case NVOS32_TYPE_TEXTURE:
831 case NVOS32_TYPE_VIDEO:
832 case NVOS32_TYPE_FONT:
833 bankPlacementType = BANK_PLACEMENT_TEX_OVERLAY_FONT;
834 break;
835 default:
836 bankPlacementType = BANK_PLACEMENT_OTHER;
837 }
838
839 //
840 // NV50+ doesn't care about bank placement since the fb has bank
841 // striding and we dont need to care about allocating primary surfaces
842 // in special areas to avoid bank conflicts. This strategy management
843 // should be removed in the future.
844 //
845 bankPlacement = pHeap->placementStrategy[bankPlacementType];
846 }
847 else
848 {
849 //
850 // primary allocation, default grow direction is up, starting at bank 0
851 // Can be overridden with NVOS32_ALLOC_FLAGS_FORCE_MEM_*
852 //
853 bankPlacement = ((0)
854 | BANK_MEM_GROW_UP
855 | MEM_GROW_UP
856 | 0xFFFFFF00);
857 }
858
859 //
860 // check if bank placement force was passed in - hint is handled in the first loop below
861 //
862 if (*flags & NVOS32_ALLOC_FLAGS_BANK_FORCE)
863 {
864 // replace data in bankplacement
865 if (*flags & NVOS32_ALLOC_FLAGS_BANK_GROW_DOWN)
866 bankPlacement = bank | BANK_MEM_GROW_DOWN | 0xFFFFFF00;
867 else
868 bankPlacement = bank | BANK_MEM_GROW_UP | 0xFFFFFF00;
869 *flags &= ~(NVOS32_ALLOC_FLAGS_BANK_HINT); // remove hint flag
870 }
871
872 //
873 // Check if FORCE_MEM_GROWS_UP or FORCE_MEM_GROWS_DOWN was passed in
874 // to override the MEM_GROWS direction for this allocation. Make sure
875 // to override each of the first MEM_NUM_BANKS_TO_TRY bytes in the NvU32
876 //
877 if (*flags & NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_UP)
878 {
879 *flags |= NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT;
880 for (i = 0; i < MEM_NUM_BANKS_TO_TRY; i++)
881 {
882 bankPlacement = (bankPlacement & ~(MEM_GROW_MASK << (i*MEM_BANK_DATA_SIZE))) |
883 (MEM_GROW_UP << (i*MEM_BANK_DATA_SIZE));
884 }
885 }
886 if (*flags & NVOS32_ALLOC_FLAGS_FORCE_MEM_GROWS_DOWN)
887 {
888 *flags |= NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT;
889 for (i = 0; i < MEM_NUM_BANKS_TO_TRY; i++)
890 {
891 bankPlacement = (bankPlacement & ~(MEM_GROW_MASK << (i*MEM_BANK_DATA_SIZE))) |
892 (MEM_GROW_DOWN << (i*MEM_BANK_DATA_SIZE));
893 }
894 }
895
896 // return the bank placement to use
897 *placement = bankPlacement;
898 return (NV_OK);
899 }
900
901 //
902 // Workaround for Bug 67690:
903 // NV28M-WinXP: (Lindbergh) StencilFloor OpenGL Sample Locks Up when Maximized on Secondary DualView Display
904 //
905 // Change heap placement for textures if more than two clients
906 // are detected. In the case of two or more clients, ignoreBankPlacement, textureClientIndex,
907 // and currentBankInfo are modified. IgnoreBankPlacement flag is set to true, textureClientIndex
908 // is returned with the index of the client to be used as heap->textureData[textureClientIndex]
909 // which pertains to the current client. Lastly, currentBankInfo is modified to grow in the
910 // opposite direction of the most recently allocated client.
911 //
_heapSetTexturePlacement(Heap * pHeap,NvU32 client,NvU32 type,NvBool * ignoreBankPlacement,NvU32 * textureClientIndex,NvU8 * currentBankInfo)912 static void _heapSetTexturePlacement
913 (
914 Heap *pHeap,
915 NvU32 client,
916 NvU32 type,
917 NvBool *ignoreBankPlacement,
918 NvU32 *textureClientIndex,
919 NvU8 *currentBankInfo
920 )
921 {
922 NvU32 index, numClients, clientFound, mostRecentIndex;
923 mostRecentIndex = 0xFFFFFFFF;
924 clientFound = NV_FALSE;
925 numClients = 0;
926
927 //
928 // let's first check to see if the client is already registered
929 // We will iterate thru to find number of clients
930 //
931 for (index = 0; index < MAX_TEXTURE_CLIENT_IDS; index++)
932 {
933 // client already registered
934 if (pHeap->textureData[index].clientId == client)
935 {
936 // give the currentBankInfo the new flags
937 *currentBankInfo = pHeap->textureData[index].placementFlags;
938 //
939 // Set the client as found so that we skip allocation
940 // of the client in the texture data structure
941 //
942 clientFound = NV_TRUE;
943 *textureClientIndex = index;
944 }
945
946 //
947 // We loop through the whole structure to determine the
948 // number of texture clients currently listed
949 //
950 if (pHeap->textureData[index].clientId != 0)
951 numClients++;
952
953 //
954 // This is used to assign new textures to the buffer
955 // A value of 0xFFFFFFFF indicates that this is the first allocation
956 //
957 if (pHeap->textureData[index].mostRecentAllocatedFlag == NV_TRUE)
958 mostRecentIndex = index;
959 }
960
961 //
962 // If more than one clinet is detected, ignore bank placement
963 // otherwise, defaults to bank placement
964 //
965 if (numClients > 1)
966 *ignoreBankPlacement = NV_TRUE;
967
968 //
969 // We fall into this if statement if no client was listed
970 // or if we have exceeded the allowable clients available
971 //
972 if (clientFound == NV_FALSE)
973 {
974 index = 0;
975 while (clientFound == NV_FALSE)
976 {
977 // the case of full texture buffer of clients, greater than 4 clients
978 if (index == MAX_TEXTURE_CLIENT_IDS)
979 {
980 index = (mostRecentIndex + 1) % MAX_TEXTURE_CLIENT_IDS;
981
982 // assign the new client and update the texture data
983 pHeap->textureData[index].clientId = client;
984 pHeap->textureData[index].mostRecentAllocatedFlag = NV_TRUE;
985 pHeap->textureData[mostRecentIndex].mostRecentAllocatedFlag = NV_FALSE;
986 pHeap->textureData[index].refCount = 0;
987
988 //
989 // Reverse the placementFlags from the one that was previously allocated
990 //
991 if (pHeap->textureData[mostRecentIndex].placementFlags & MEM_GROW_MASK)
992 *currentBankInfo = MEM_GROW_UP;
993 else
994 *currentBankInfo = MEM_GROW_DOWN;
995
996 // Assign the new value to the texture data structure
997 pHeap->textureData[index].placementFlags = *currentBankInfo;
998 clientFound = NV_TRUE;
999 *ignoreBankPlacement = NV_TRUE;
1000 *textureClientIndex = index;
1001 }
1002
1003 // the case in which there is still room available in the buffer
1004 if (pHeap->textureData[index].clientId == 0)
1005 {
1006 // If we fall in here, it means there is still room available
1007 pHeap->textureData[index].clientId = client;
1008
1009 // deal with the grow directivity
1010 if (mostRecentIndex == 0xFFFFFFFF)
1011 {
1012 // this is the very first client to be allocated
1013 pHeap->textureData[index].placementFlags = *currentBankInfo;
1014 if (pHeap->textureData[index].placementFlags & MEM_GROW_MASK)
1015 *currentBankInfo = MEM_GROW_DOWN;
1016 else
1017 *currentBankInfo = MEM_GROW_UP;
1018 pHeap->textureData[index].mostRecentAllocatedFlag = NV_TRUE;
1019 }
1020 else
1021 {
1022 if (pHeap->textureData[mostRecentIndex].placementFlags & MEM_GROW_MASK)
1023 *currentBankInfo = MEM_GROW_UP;
1024 else
1025 *currentBankInfo = MEM_GROW_DOWN;
1026
1027 // Set the last client allocated to the new client allocated
1028 pHeap->textureData[mostRecentIndex].mostRecentAllocatedFlag = NV_FALSE;
1029 pHeap->textureData[index].mostRecentAllocatedFlag = NV_TRUE;
1030
1031 // update the placement flags
1032 pHeap->textureData[index].placementFlags = *currentBankInfo;
1033
1034 // if this isn't the first client in the heap, then we ignore bank placement
1035 *ignoreBankPlacement = NV_TRUE;
1036 }
1037
1038 clientFound = NV_TRUE;
1039 *textureClientIndex = index;
1040 }
1041 index++;
1042 } // while (clientFound == NV_FALSE)
1043 } // if (clientFound == NV_FALSE)
1044 }
1045
1046 //
1047 // If we have two different alignment requirements for a memory
1048 // allocation, this routine calculates the LCM (least common multiple)
1049 // to satisfy both requirements.
1050 //
1051 // An alignment of 0 means "no preferred alignment". The return value
1052 // will not exceed maxAlignment = NV_U64_MAX; it returns maxAlignment if the limit
1053 // is exceeded.
1054 //
1055 // Called by heapAlloc and heapAllocHint.
1056 //
1057
1058
1059 /*!
1060 * @Is Alloc Valid For FB Region
1061 *
1062 * Check the prospective allocation to see if the candidate block supports
1063 * the requested surface type.
1064 *
1065 * NOTE: The FB region and FB heap allocation code assume that free blocks
1066 * reside in a single FB region. This is true in current implementations that
1067 * have the regions separated by a reserved block, but may not be true in future
1068 * implementations.
1069 *
1070 * @param[in] pGpu GPU object
1071 * @param[in] pHeap heap object
1072 * @param[in] pFbAllocInfo allocation request information
1073 * @param[in] pAllocData allocation candidate information
1074 *
1075 * @returns NV_TRUE if block can be allocated at the prospective address
1076 *
1077 */
1078 static NvBool
_isAllocValidForFBRegion(OBJGPU * pGpu,Heap * pHeap,FB_ALLOC_INFO * pFbAllocInfo,OBJHEAP_ALLOC_DATA * pAllocData)1079 _isAllocValidForFBRegion
1080 (
1081 OBJGPU *pGpu,
1082 Heap *pHeap,
1083 FB_ALLOC_INFO *pFbAllocInfo,
1084 OBJHEAP_ALLOC_DATA *pAllocData
1085 )
1086 {
1087 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1088 NvBool isValid = NV_FALSE;
1089 FB_REGION_DESCRIPTOR *fbRegion;
1090
1091 // Check if any regions are defined. If not, then we are done.
1092 if (pMemoryManager->Ram.numFBRegions > 0)
1093 {
1094 fbRegion = memmgrLookupFbRegionByOffset(pGpu, pMemoryManager, pAllocData->allocLo, pAllocData->allocHi);
1095
1096 if (fbRegion != NULL)
1097 {
1098 // Because we heapAlloc the reserved region.
1099 if (pFbAllocInfo->pageFormat->type == NVOS32_TYPE_PMA &&
1100 pFbAllocInfo->owner == HEAP_OWNER_PMA_RESERVED_REGION)
1101 {
1102 if (!fbRegion->bInternalHeap && !fbRegion->bRsvdRegion)
1103 {
1104 isValid = NV_TRUE;
1105 }
1106 return isValid;
1107 }
1108 // Check if the region is reserved/not usable
1109 if (fbRegion->bRsvdRegion &&
1110 (pFbAllocInfo->pageFormat->type != NVOS32_TYPE_RESERVED))
1111 {
1112 NV_PRINTF(LEVEL_INFO,
1113 "Reserved region. Rejecting placement\n");
1114 return NV_FALSE;
1115 }
1116
1117 //
1118 // Check if the region supports compression and if we need it.
1119 // Surfaces that *require* compression can be allocated *only* in
1120 // regions that support compression. *Optionally* compressed surfaces
1121 // can be allocated anywhere though -- the selection of an uncompressed
1122 // KIND will be handled in dmaUpdateVASpace.
1123 //
1124 if (!fbRegion->bSupportCompressed)
1125 {
1126 if (DRF_VAL(OS32, _ATTR, _COMPR , pFbAllocInfo->pageFormat->attr) == NVOS32_ATTR_COMPR_REQUIRED)
1127 {
1128 NV_PRINTF(LEVEL_INFO,
1129 "Compression not supported. Rejecting placement\n");
1130 return NV_FALSE;
1131 }
1132 }
1133
1134 // Check if the allocation type is specifically not allowed
1135 if (pFbAllocInfo->pageFormat->type < NVOS32_NUM_MEM_TYPES)
1136 {
1137 if ((!fbRegion->bSupportISO) &&
1138 ((pFbAllocInfo->pageFormat->type == NVOS32_TYPE_PRIMARY) ||
1139 (pFbAllocInfo->pageFormat->type == NVOS32_TYPE_CURSOR) ||
1140 (pFbAllocInfo->pageFormat->type == NVOS32_TYPE_VIDEO)))
1141 {
1142 NV_PRINTF(LEVEL_INFO,
1143 "ISO surface type #%d not supported. Rejecting placement\n",
1144 pFbAllocInfo->pageFormat->type);
1145 return NV_FALSE;
1146 }
1147 }
1148
1149 if (!!fbRegion->bProtected ^
1150 !!(pFbAllocInfo->pageFormat->flags & NVOS32_ALLOC_FLAGS_PROTECTED))
1151 {
1152 NV_PRINTF(LEVEL_INFO,
1153 "Protection mismatch. Rejecting placement\n");
1154 return NV_FALSE;
1155 }
1156
1157 }
1158 else if (pFbAllocInfo->pageFormat->type != NVOS32_TYPE_RESERVED)
1159 {
1160 //
1161 // Allow reserved allocs outside of valid regions, but everything else
1162 // must be allocated in a region.
1163 //
1164 NV_PRINTF(LEVEL_INFO,
1165 "pFbAllocInfo->type != NVOS32_TYPE_RESERVED\n");
1166 return NV_FALSE;
1167 }
1168
1169 }
1170
1171 return NV_TRUE;
1172 }
1173
1174 /**
1175 * Blacklists a single page
1176 * This function will allocate the memory descriptor with a fixed memory offset
1177 * and allocate the FB physical offset. Will replace the blacklist allocation
1178 * path in the heapBlackListPages_IMPL.
1179 *
1180 * @param[in] pGpu OBJGPU pointer
1181 * @param[in] pHeap Heap pointer
1182 * @param[in] pBlacklistChunk BLACKLIST_CHUNK pointer
1183 *
1184 * @returns NV_OK on success
1185 * NV_ERR_OUT_OF_MEMORY, if the memory is already blacklisted
1186 */
1187
1188 static NV_STATUS
_heapBlacklistSingleChunk(OBJGPU * pGpu,Heap * pHeap,BLACKLIST_CHUNK * pBlacklistChunk)1189 _heapBlacklistSingleChunk
1190 (
1191 OBJGPU *pGpu,
1192 Heap *pHeap,
1193 BLACKLIST_CHUNK *pBlacklistChunk
1194 )
1195 {
1196 NV_STATUS status = NV_OK;
1197 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1198 NV_ASSERT(pBlacklistChunk != NULL);
1199
1200 status = memdescCreate(&pBlacklistChunk->pMemDesc,
1201 pGpu, pBlacklistChunk->size, RM_PAGE_SIZE,
1202 NV_TRUE, ADDR_FBMEM, NV_MEMORY_UNCACHED,
1203 MEMDESC_FLAGS_FIXED_ADDRESS_ALLOCATE |
1204 MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE);
1205 if (NV_OK != status)
1206 {
1207 NV_PRINTF(LEVEL_FATAL,
1208 "Error 0x%x creating memdesc for blacklisted chunk for address0x%llx, skipping\n",
1209 status, pBlacklistChunk->physOffset);
1210 NV_ASSERT(NV_FALSE);
1211 return status;
1212 }
1213
1214 // this is how FIXED_ADDRESS_ALLOCATE works
1215 memdescSetPte(pBlacklistChunk->pMemDesc, AT_GPU, 0, RM_PAGE_ALIGN_DOWN(pBlacklistChunk->physOffset));
1216
1217 if (pHeap->heapType != HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)
1218 {
1219 //
1220 // Allocate memory for this page. This is marked as an internal RM allocation
1221 // and will be saved/restored during suspend/resume
1222 //
1223 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_78,
1224 pBlacklistChunk->pMemDesc);
1225 if (NV_OK != status)
1226 {
1227 // no use for the memdesc if page couldn't be allocated
1228 memdescDestroy(pBlacklistChunk->pMemDesc);
1229
1230 NV_PRINTF(LEVEL_FATAL,
1231 "Error 0x%x creating page for blacklisting address: 0x%llx, skipping\n",
1232 status, pBlacklistChunk->physOffset);
1233 NV_ASSERT(NV_FALSE);
1234 return status;
1235 }
1236 }
1237
1238 // set the flags properly
1239 pBlacklistChunk->bIsValid = NV_TRUE;
1240
1241 // if dynamic blacklisteing is enabled, clear the pending retirement flag
1242 if (pMemoryManager->bEnableDynamicPageOfflining)
1243 {
1244 pBlacklistChunk->bPendingRetirement = NV_FALSE;
1245 }
1246 return status;
1247 }
1248
1249 /**
1250 * Free-s the blacklisted pages within the range [begin, begin+size-1]
1251 * This function will iterate the blacklisted chunks data structure,
1252 * and free the blacklisted pages within the range [begin, begin+size-1]
1253 *
1254 * @param[in] pGpu OBJGPU pointer
1255 * @param[in] pMemoryManager MemoryManager pointer
1256 * @param[in] pBlackList BLACKLIST pointer
1257 * @param[in] begin starting address of the range
1258 * @param[in] size Size of the region, where blacklisted pages to be free-d
1259 *
1260 * @returns NV_OK on success
1261 */
1262 static NV_STATUS
_heapFreeBlacklistPages(OBJGPU * pGpu,MemoryManager * pMemoryManager,BLACKLIST * pBlackList,NvU64 begin,NvU64 size)1263 _heapFreeBlacklistPages
1264 (
1265 OBJGPU *pGpu,
1266 MemoryManager *pMemoryManager,
1267 BLACKLIST *pBlackList,
1268 NvU64 begin,
1269 NvU64 size
1270 )
1271 {
1272 NvU32 chunk = 0;
1273 NvU64 baseChunkAddress = 0;
1274 NvU64 endChunkAddress = 0;
1275 BLACKLIST_CHUNK *pBlacklistChunks = pBlackList->pBlacklistChunks;
1276
1277 for (chunk = 0; chunk < pBlackList->count; chunk++)
1278 {
1279 baseChunkAddress = 0;
1280 endChunkAddress = 0;
1281 // No need to process the chunk if it's not a valid chunk
1282 if (pBlacklistChunks[chunk].bIsValid != NV_TRUE ||
1283 (pMemoryManager->bEnableDynamicPageOfflining &&
1284 pBlacklistChunks[chunk].bPendingRetirement))
1285 continue;
1286
1287 baseChunkAddress = pBlacklistChunks[chunk].physOffset;
1288 endChunkAddress = baseChunkAddress + pBlacklistChunks[chunk].size - 1;
1289
1290 if (baseChunkAddress >= begin && endChunkAddress <= (begin + size - 1))
1291 {
1292 //
1293 // free the mem desc, set the excludeGlobalListFlag
1294 // invalidate the entry
1295 //
1296 NV_PRINTF(LEVEL_FATAL,
1297 "removing from blacklist... page start %llx, page end:%llx\n",
1298 baseChunkAddress, endChunkAddress);
1299
1300 memdescFree(pBlacklistChunks[chunk].pMemDesc);
1301 memdescDestroy(pBlacklistChunks[chunk].pMemDesc);
1302
1303 pBlacklistChunks[chunk].bIsValid = NV_FALSE;
1304 }
1305 }
1306 return NV_OK;
1307 }
1308
1309 /**
1310 * Blacklist pages within the range [begin, begin+size-1]
1311 * This function will iterate the blacklisted chunks data structure,
1312 * and blacklist pages within the range [begin, begin+size-1]
1313 *
1314 * @param[in] pGpu OBJGPU pointer
1315 * @param[in] pHeap Heap pointer
1316 * @param[in] pBlackList BLACKLIST pointer
1317 * @param[in] begin starting address of the range
1318 * @param[in] size Size of the region, where pages will be blacklisted
1319 *
1320 * @returns NV_OK on success
1321 * error, if _heapBlacklistSingleChunk fails
1322 */
1323 static NV_STATUS
_heapBlacklistChunks(OBJGPU * pGpu,Heap * pHeap,BLACKLIST * pBlackList,NvU64 begin,NvU64 size)1324 _heapBlacklistChunks
1325 (
1326 OBJGPU *pGpu,
1327 Heap *pHeap,
1328 BLACKLIST *pBlackList,
1329 NvU64 begin,
1330 NvU64 size
1331 )
1332 {
1333 NvU32 chunk = 0;
1334 NvU64 baseAddress = 0;
1335 NvU64 endAddress = 0;
1336 BLACKLIST_CHUNK *pBlacklistChunks = pBlackList->pBlacklistChunks;
1337 NV_STATUS status = NV_OK;
1338
1339
1340 for (chunk = 0; chunk < pBlackList->count; chunk++)
1341 {
1342 baseAddress = 0;
1343 endAddress = 0;
1344
1345 // No need to process the chunk if it's a valid chunk
1346 if (pBlacklistChunks[chunk].bIsValid == NV_TRUE)
1347 continue;
1348
1349 baseAddress = pBlacklistChunks[chunk].physOffset;
1350 endAddress = baseAddress + pBlacklistChunks[chunk].size - 1;
1351
1352 //TODO: what if the blacklisted chunk is halfway inside the allocated region??
1353 if (baseAddress >= begin && endAddress <= (begin + size - 1))
1354 {
1355 NV_PRINTF(LEVEL_ERROR,
1356 "blacklisting chunk from addr: 0x%llx to 0x%llx, new begin :0x%llx, end:0x%llx\n",
1357 baseAddress, endAddress, begin, begin + size - 1);
1358 status = _heapBlacklistSingleChunk(pGpu, pHeap, &pBlacklistChunks[chunk]);
1359 NV_ASSERT(status == NV_OK);
1360 }
1361 }
1362 return status;
1363 }
1364
1365 /*!
1366 * @brief allocate memory from heap
1367 *
1368 * Allocates a memory region with requested parameters from heap.
1369 * If requested contiguous allocation is not possible, tries to allocate non-contiguous memory.
1370 *
1371 * @param[in] pGpu GPU object
1372 * @param[in] hClient client handle
1373 * @param[in] pHeap heap object
1374 * @param[in] pAllocRequest allocation request
1375 * @param[in] memHandle memory handle
1376 * @param[in/out] pAllocData heap-specific allocation data
1377 * @param[in/out] pFbAllocInfo allocation data
1378 * @param[out] pHwResource pointer to allocation HW resource info
1379 * @param[in/out] pNoncontigAllocation the requested/provided allocation is noncotig
1380 * @param[in] bNoncontigAllowed allocation can be made noncontig
1381 * @param[in] bAllocedMemdesc memdesc should be freed if a new one is created
1382 */
heapAlloc_IMPL(OBJGPU * pGpu,NvHandle hClient,Heap * pHeap,MEMORY_ALLOCATION_REQUEST * pAllocRequest,NvHandle memHandle,OBJHEAP_ALLOC_DATA * pAllocData,FB_ALLOC_INFO * pFbAllocInfo,HWRESOURCE_INFO ** pHwResource,NvBool * pNoncontigAllocation,NvBool bNoncontigAllowed,NvBool bAllocedMemdesc)1383 NV_STATUS heapAlloc_IMPL
1384 (
1385 OBJGPU *pGpu,
1386 NvHandle hClient,
1387 Heap *pHeap,
1388 MEMORY_ALLOCATION_REQUEST *pAllocRequest,
1389 NvHandle memHandle,
1390 OBJHEAP_ALLOC_DATA *pAllocData,
1391 FB_ALLOC_INFO *pFbAllocInfo,
1392 HWRESOURCE_INFO **pHwResource,
1393 NvBool *pNoncontigAllocation,
1394 NvBool bNoncontigAllowed,
1395 NvBool bAllocedMemdesc
1396 )
1397 {
1398 NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams;
1399 MEMORY_DESCRIPTOR *pMemDesc = pAllocRequest->pMemDesc;
1400 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1401 NvU32 textureClientIndex = 0xFFFFFFFF;
1402 NvU64 desiredOffset = pFbAllocInfo->offset;
1403 NvU64 adjustedSize = pFbAllocInfo->size - pFbAllocInfo->alignPad;
1404 NvU32 bankPlacement = 0;
1405 NvBool ignoreBankPlacement = NV_FALSE;
1406 NvU8 currentBankInfo;
1407 MEM_BLOCK *pBlockFirstFree;
1408 MEM_BLOCK *pBlockFree;
1409 MEM_BLOCK *pBlockNew = NULL;
1410 MEM_BLOCK *pBlockSplit = NULL;
1411 NvU64 allocatedOffset = 0;
1412 NvBool bTurnBlacklistOff = NV_FALSE;
1413 NvBool bDone = NV_FALSE;
1414 NV_STATUS status = NV_OK;
1415 NvU32 i;
1416
1417 NV_ASSERT_OR_RETURN(
1418 (memmgrAllocGetAddrSpace(GPU_GET_MEMORY_MANAGER(pGpu), pVidHeapAlloc->flags, pVidHeapAlloc->attr)
1419 == ADDR_FBMEM) &&
1420 (pAllocRequest->pPmaAllocInfo[gpumgrGetSubDeviceInstanceFromGpu(pGpu)] == NULL),
1421 NV_ERR_INVALID_ARGUMENT);
1422
1423 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
1424 desiredOffset -= pFbAllocInfo->alignPad;
1425
1426 if (pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) &&
1427 gpuCheckPageRetirementSupport_HAL(pGpu) &&
1428 FLD_TEST_DRF(OS32, _ATTR2, _BLACKLIST, _OFF, pVidHeapAlloc->attr2))
1429 {
1430 NV_PRINTF(LEVEL_INFO,
1431 "Trying to turn blacklisting pages off for this allocation of size: %llx\n",
1432 pVidHeapAlloc->size);
1433 if (!hypervisorIsVgxHyper())
1434 _heapFreeBlacklistPages(pGpu, pMemoryManager, &pHeap->blackList, desiredOffset, pVidHeapAlloc->size);
1435 else
1436 _heapFreeBlacklistPages(pGpu, pMemoryManager, &pHeap->blackList, pHeap->base, pHeap->total);
1437 bTurnBlacklistOff = NV_TRUE;
1438 // Now continue with the heap allocation.
1439 }
1440
1441 //
1442 // Check for range-limited request.
1443 // Range of [0,0] is a special case that means to use the entire heap.
1444 //
1445 // A range-limited request allows caller to say: I really want memory
1446 // which only falls completely within a particular range. Returns
1447 // error if can't allocate within that range.
1448 //
1449 // Used on Windows by OpenGL. On Windows during a modeswitch, the
1450 // display driver frees all vidmem surfaces. Unfortunately, OpenGL
1451 // writes to some vidmem surface with the CPU from user mode. If these
1452 // surfaces are freed during the modeswitch, then the user mode OpenGL
1453 // app might scribble on someone else's surface if that video memory is
1454 // reused before OpenGL notices the modeswitch. Because modeswitches
1455 // are asynchronous to the OpenGL client, it does not notice the
1456 // modeswitches right away.
1457 //
1458 // A solution is for OpenGL to restrict vidmem surfaces that have
1459 // this problem to a range of memory where it is safe *not* to free
1460 // the surface during a modeswitch.
1461 //
1462 // virtual allocation are checked in dmaAllocVA()
1463 if (pVidHeapAlloc->rangeLo == 0 && pVidHeapAlloc->rangeHi == 0)
1464 {
1465 pVidHeapAlloc->rangeHi = pHeap->base + pHeap->total - 1;
1466 }
1467 if (pVidHeapAlloc->rangeHi > pHeap->base + pHeap->total - 1)
1468 {
1469 pVidHeapAlloc->rangeHi = pHeap->base + pHeap->total - 1;
1470 }
1471
1472 if ((pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE) == 0)
1473 {
1474 // Only want to override in one direction at a time
1475 if (pMemoryManager->overrideInitHeapMin == 0)
1476 {
1477 pVidHeapAlloc->rangeHi = NV_MIN(pVidHeapAlloc->rangeHi, pMemoryManager->overrideHeapMax);
1478 }
1479 else
1480 {
1481 pVidHeapAlloc->rangeLo = NV_MAX(pVidHeapAlloc->rangeLo, pMemoryManager->overrideInitHeapMin);
1482 }
1483 }
1484
1485 //
1486 // Check for valid range.
1487 //
1488 if (pVidHeapAlloc->rangeLo > pVidHeapAlloc->rangeHi)
1489 {
1490 status = NV_ERR_INVALID_ARGUMENT;
1491 goto return_early;
1492 }
1493
1494 //
1495 // The bank placement loop does not know how to limit allocations to be
1496 // within a range.
1497 //
1498 if (((pVidHeapAlloc->rangeLo > 0) || (pVidHeapAlloc->rangeHi < pHeap->base + pHeap->total - 1)))
1499 {
1500 pVidHeapAlloc->flags |= NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT;
1501 }
1502
1503 //
1504 // Set up bank placement data - should have been preselected in heapCreate
1505 //
1506 status = _heapGetBankPlacement(pGpu, pHeap, pVidHeapAlloc->owner,
1507 &pVidHeapAlloc->flags,
1508 pVidHeapAlloc->type,
1509 0,
1510 &bankPlacement);
1511 if (status != NV_OK)
1512 {
1513 NV_PRINTF(LEVEL_ERROR,
1514 "_heapGetBankPlacement failed for current allocation\n");
1515 goto return_early;
1516 }
1517
1518 //
1519 // Find the best bank to start looking in for this pVidHeapAlloc->type, but only if we're
1520 // not ignoring bank placement rules. Save the current bank info.
1521 //
1522 currentBankInfo = (NvU8)bankPlacement; // this is always non zero from above
1523
1524 //
1525 // Check for fixed address request.
1526 // This allows caller to say: I really want this memory at a particular
1527 // offset. Returns error if can't get that offset.
1528 // Used initially by Mac display driver twinview code.
1529 // On the Mac it is a very bad thing to *ever* move the primary
1530 // during a modeset since a lot of sw caches the value and never
1531 // checks again.
1532 //
1533 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE)
1534 {
1535
1536 // is our desired offset suitably aligned?
1537 if (desiredOffset % pAllocData->alignment)
1538 {
1539 NV_PRINTF(LEVEL_ERROR,
1540 "offset 0x%llx not aligned to 0x%llx\n",
1541 desiredOffset, pAllocData->alignment);
1542 goto failed;
1543 }
1544
1545 pBlockFree = pHeap->pFreeBlockList;
1546
1547 if (pBlockFree == NULL)
1548 {
1549 NV_PRINTF(LEVEL_ERROR, "no free blocks\n");
1550 goto failed;
1551 }
1552
1553 do {
1554 //
1555 // Allocate from the bottom of the memory block.
1556 //
1557 pBlockFree = pBlockFree->u1.nextFree;
1558
1559 // Does this block contain our desired range?
1560 if ((desiredOffset >= pBlockFree->begin) &&
1561 (desiredOffset + pAllocData->allocSize - 1) <= pBlockFree->end)
1562 {
1563 // we have a match, now remove it from the pool
1564 pAllocData->allocLo = desiredOffset;
1565 pAllocData->allocHi = desiredOffset + pAllocData->allocSize - 1;
1566 pAllocData->allocAl = pAllocData->allocLo;
1567
1568 // Check that the candidate block can support the allocation type
1569 if (_isAllocValidForFBRegion(pGpu, pHeap, pFbAllocInfo, pAllocData))
1570 goto got_one;
1571 }
1572
1573 } while (pBlockFree != pHeap->pFreeBlockList);
1574
1575 // return error if can't get that particular address
1576 NV_PRINTF(LEVEL_ERROR,
1577 "failed NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE @%llx (%lld bytes)\n",
1578 desiredOffset, pAllocData->allocSize);
1579 goto failed;
1580 }
1581
1582 //
1583 // Check if NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT was passed in with
1584 // the pVidHeapAlloc->type to ignore placing this allocation in a particular bank.
1585 // This means we default to the second loop where we choose first fit.
1586 //
1587 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT)
1588 ignoreBankPlacement = NV_TRUE;
1589
1590 //
1591 // Bug 67690: Treat textures differently for more than one client (eg. opengl),
1592 // [IN]: client, pVidHeapAlloc->type, ignoreBankPlacement
1593 // [OUT]: heap, ignoreBankPlacement, textureClientIndex
1594 //
1595 // Bug 69385: Treat textures differently only if pVidHeapAlloc->flags are also set to zero.
1596 // NV30GL-WinXP: Unable to run 3DMark2001SE @ 1600x1200x32bpp.
1597 //
1598 if ((pVidHeapAlloc->type == NVOS32_TYPE_TEXTURE) && (!pVidHeapAlloc->flags))
1599 _heapSetTexturePlacement(pHeap, hClient, pVidHeapAlloc->type, &ignoreBankPlacement, &textureClientIndex, ¤tBankInfo);
1600
1601 if (!ignoreBankPlacement)
1602 {
1603 currentBankInfo = (NvU8)bankPlacement & BANK_MEM_GROW_MASK;
1604
1605 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_BANK_HINT)
1606 {
1607 if (pVidHeapAlloc->flags & NVOS32_ALLOC_FLAGS_BANK_GROW_DOWN)
1608 currentBankInfo = MEM_GROW_DOWN;
1609 else
1610 currentBankInfo = MEM_GROW_UP;
1611 pVidHeapAlloc->flags &= ~(NVOS32_ALLOC_FLAGS_BANK_HINT); // hint flag only lasts for 1 loop
1612 }
1613 else
1614 {
1615 // Convert bank grow up/down to mem grow up/down
1616 currentBankInfo = (currentBankInfo & BANK_MEM_GROW_DOWN ? MEM_GROW_DOWN : MEM_GROW_UP);
1617 }
1618 } // if (!ignoreBankPlacement)
1619
1620 pBlockFirstFree = pHeap->pFreeBlockList;
1621 if (!pBlockFirstFree)
1622 {
1623 NV_PRINTF(LEVEL_ERROR, "no free blocks\n");
1624 goto failed;
1625 }
1626
1627 if (*pNoncontigAllocation)
1628 {
1629 NV_PRINTF(LEVEL_INFO, "non-contig vidmem requested\n");
1630 goto non_contig_alloc;
1631 }
1632
1633 //
1634 // Loop through all available regions.
1635 // Note we don't check for bRsvdRegion here because when blacklisting
1636 // those regions we need them to succeed.
1637 //
1638 bDone = NV_FALSE;
1639 i = 0;
1640 while (!bDone)
1641 {
1642 NvU64 saveRangeLo = pVidHeapAlloc->rangeLo;
1643 NvU64 saveRangeHi = pVidHeapAlloc->rangeHi;
1644
1645 if (!memmgrAreFbRegionsSupported(pMemoryManager) ||
1646 gpuIsCacheOnlyModeEnabled(pGpu))
1647 {
1648 bDone = NV_TRUE;
1649 }
1650 else
1651 {
1652 NV_ASSERT( pMemoryManager->Ram.numFBRegionPriority > 0 );
1653
1654 if (FLD_TEST_DRF(OS32, _ATTR2, _PRIORITY, _LOW, pFbAllocInfo->pageFormat->attr2) ||
1655 (pMemoryManager->bPreferSlowRegion &&
1656 !FLD_TEST_DRF(OS32, _ATTR2, _PRIORITY, _HIGH, pFbAllocInfo->pageFormat->attr2)))
1657 {
1658 NV_ASSERT( pMemoryManager->Ram.fbRegionPriority[pMemoryManager->Ram.numFBRegionPriority-1-i] < pMemoryManager->Ram.numFBRegions );
1659 NV_ASSERT( !pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[pMemoryManager->Ram.numFBRegionPriority-1-i]].bRsvdRegion );
1660 //
1661 // We prefer slow memory, or we want _LOW priority
1662 // ==>> Try allocations in increasing order of performance,
1663 // slowest first
1664 //
1665 pVidHeapAlloc->rangeLo = NV_MAX(pVidHeapAlloc->rangeLo, pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[pMemoryManager->Ram.numFBRegionPriority-1-i]].base);
1666 pVidHeapAlloc->rangeHi = NV_MIN(pVidHeapAlloc->rangeHi, pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[pMemoryManager->Ram.numFBRegionPriority-1-i]].limit);
1667 }
1668 else
1669 {
1670 NV_ASSERT( pMemoryManager->Ram.fbRegionPriority[i] < pMemoryManager->Ram.numFBRegions );
1671 NV_ASSERT( !pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[i]].bRsvdRegion );
1672 //
1673 // We don't explicitly want slow memory or we don't prefer
1674 // allocations in the slow memory
1675 // ==>> Try allocations in decreasing order of performance,
1676 // fastest first
1677 //
1678 pVidHeapAlloc->rangeLo = NV_MAX(pVidHeapAlloc->rangeLo, pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[i]].base);
1679 pVidHeapAlloc->rangeHi = NV_MIN(pVidHeapAlloc->rangeHi, pMemoryManager->Ram.fbRegion[pMemoryManager->Ram.fbRegionPriority[i]].limit);
1680 }
1681 i++;
1682
1683 bDone = !(i < pMemoryManager->Ram.numFBRegionPriority);
1684 }
1685
1686 //
1687 // When scanning upwards, start at the bottom - 1 so the following loop looks symetrical.
1688 //
1689 if ( ! (currentBankInfo & MEM_GROW_DOWN))
1690 pBlockFirstFree = pBlockFirstFree->u0.prevFree;
1691 pBlockFree = pBlockFirstFree;
1692
1693 do
1694 {
1695 NvU64 blockLo;
1696 NvU64 blockHi;
1697
1698 if (currentBankInfo & MEM_GROW_DOWN)
1699 pBlockFree = pBlockFree->u0.prevFree;
1700 else
1701 pBlockFree = pBlockFree->u1.nextFree;
1702
1703 //
1704 // Is this block completely in requested range?
1705 //
1706 // We *should* check that pBlockFree is wholely resident in the range, but the
1707 // old check didn't and checking it causes some tests to fail.
1708 // So check that at least *some* of the block resides within the requested range.
1709 //
1710 if ((pBlockFree->end >= pVidHeapAlloc->rangeLo) && (pBlockFree->begin <= pVidHeapAlloc->rangeHi))
1711 {
1712 //
1713 // Find the intersection of the free block and the specified range.
1714 //
1715 blockLo = (pVidHeapAlloc->rangeLo > pBlockFree->begin) ? pVidHeapAlloc->rangeLo : pBlockFree->begin;
1716 blockHi = (pVidHeapAlloc->rangeHi < pBlockFree->end) ? pVidHeapAlloc->rangeHi : pBlockFree->end;
1717
1718 if (currentBankInfo & MEM_GROW_DOWN)
1719 {
1720 //
1721 // Allocate from the top of the memory block.
1722 //
1723 pAllocData->allocLo = (blockHi - pAllocData->allocSize + 1) / pAllocData->alignment * pAllocData->alignment;
1724 pAllocData->allocAl = pAllocData->allocLo;
1725 pAllocData->allocHi = pAllocData->allocAl + pAllocData->allocSize - 1;
1726 }
1727 else
1728 {
1729 //
1730 // Allocate from the bottom of the memory block.
1731 //
1732 pAllocData->allocAl = (blockLo + (pAllocData->alignment - 1)) / pAllocData->alignment * pAllocData->alignment;
1733 pAllocData->allocLo = pAllocData->allocAl;
1734 pAllocData->allocHi = pAllocData->allocAl + pAllocData->allocSize - 1;
1735 }
1736
1737 //
1738 // Does the desired range fall completely within this block?
1739 // Also make sure it does not wrap-around.
1740 // Also make sure it is within the desired range.
1741 //
1742 if ((pAllocData->allocLo >= pBlockFree->begin) && (pAllocData->allocHi <= pBlockFree->end))
1743 {
1744 if (pAllocData->allocLo <= pAllocData->allocHi)
1745 {
1746 if ((pAllocData->allocLo >= pVidHeapAlloc->rangeLo) && (pAllocData->allocHi <= pVidHeapAlloc->rangeHi))
1747 {
1748 // Check that the candidate block can support the allocation type
1749 if (_isAllocValidForFBRegion(pGpu, pHeap, pFbAllocInfo, pAllocData))
1750 {
1751 pVidHeapAlloc->rangeLo = saveRangeLo;
1752 pVidHeapAlloc->rangeHi = saveRangeHi;
1753 goto got_one;
1754 }
1755 }
1756 }
1757 }
1758 }
1759
1760 } while (pBlockFree != pBlockFirstFree);
1761
1762 pVidHeapAlloc->rangeLo = saveRangeLo;
1763 pVidHeapAlloc->rangeHi = saveRangeHi;
1764 }
1765
1766 non_contig_alloc:
1767 if (!bNoncontigAllowed)
1768 goto failed;
1769
1770 if (!*pNoncontigAllocation)
1771 {
1772 NV_PRINTF(LEVEL_INFO,
1773 "Contig vidmem allocation failed, running noncontig allocator\n");
1774
1775 // Create a new noncontig memdescriptor
1776 memdescDestroy(pAllocRequest->pMemDesc);
1777
1778 status = memdescCreate(&pAllocRequest->pMemDesc, pGpu, adjustedSize,
1779 0, NV_FALSE, ADDR_FBMEM, NV_MEMORY_UNCACHED,
1780 MEMDESC_FLAGS_NONE);
1781
1782 if (status != NV_OK)
1783 {
1784 NV_PRINTF(LEVEL_ERROR, "cannot alloc memDesc!\n");
1785 pMemDesc = pAllocRequest->pMemDesc = NULL;
1786 goto failed;
1787 }
1788
1789 pMemDesc = pAllocRequest->pMemDesc;
1790 pMemDesc->pHeap = pHeap;
1791
1792 memdescSetPteKind(pMemDesc, pFbAllocInfo->format);
1793 memdescSetHwResId(pMemDesc, pFbAllocInfo->hwResId);
1794 }
1795
1796 // Try the noncontig allocator
1797 if (NV_OK == _heapAllocNoncontig(pGpu,
1798 hClient,
1799 pHeap,
1800 pAllocRequest,
1801 memHandle,
1802 pAllocData,
1803 pFbAllocInfo,
1804 textureClientIndex,
1805 pFbAllocInfo->alignPad,
1806 &allocatedOffset,
1807 pMemDesc,
1808 pHwResource))
1809 {
1810 *pNoncontigAllocation = NV_TRUE;
1811
1812 //
1813 // The noncontig allocator calls _heapProcessFreeBlock()
1814 // by itself, so we goto done: straight
1815 //
1816 status = NV_OK;
1817 goto return_early;
1818 }
1819
1820 NV_PRINTF(LEVEL_INFO,
1821 "failed to allocate block. Heap total=0x%llx free=0x%llx\n",
1822 pHeap->total, pHeap->free);
1823 // Out of memory.
1824 goto failed;
1825
1826 //
1827 // We have a match. Now link it in, trimming or splitting
1828 // any slop from the enclosing block as needed.
1829 //
1830
1831 got_one:
1832 if (NV_OK != _heapProcessFreeBlock(pGpu, pBlockFree,
1833 &pBlockNew, &pBlockSplit,
1834 pHeap, pAllocRequest,
1835 memHandle,
1836 pAllocData, pFbAllocInfo,
1837 pFbAllocInfo->alignPad,
1838 &allocatedOffset) ||
1839 NV_OK != _heapUpdate(pHeap, pBlockNew, BLOCK_FREE_STATE_CHANGED))
1840 failed:
1841 {
1842
1843 NV_PRINTF(LEVEL_INFO,
1844 "failed to allocate block. Heap total=0x%llx free=0x%llx\n",
1845 pHeap->total, pHeap->free);
1846
1847 portMemFree(pBlockNew);
1848 pBlockNew = NULL;
1849 portMemFree(pBlockSplit);
1850 status = NV_ERR_NO_MEMORY;
1851 goto return_early;
1852 }
1853
1854 //
1855 // If a client calls us with pVidHeapAlloc->type == NVOS32_TYPE_TEXTURE, but where flags
1856 // are non-zero, we won't call _heapSetTexturePlacement and initialize
1857 // textureClientIndex to a proper value (default is 0xFFFFFFFF). In that
1858 // case, we won't track this texture allocation. Bug 79586.
1859 //
1860 if (pVidHeapAlloc->type == NVOS32_TYPE_TEXTURE &&
1861 textureClientIndex != 0xFFFFFFFF)
1862 {
1863 pBlockNew->textureId = hClient;
1864 pHeap->textureData[textureClientIndex].refCount++;
1865 }
1866 else
1867 {
1868 pBlockNew->textureId = 0;
1869 }
1870
1871 pFbAllocInfo->offset = allocatedOffset;
1872
1873 // TODO : This must be inside *all* blocks of a noncontig allocation
1874 if (!*pNoncontigAllocation)
1875 {
1876 pBlockNew->pitch = pFbAllocInfo->pitch;
1877 pBlockNew->height = pFbAllocInfo->height;
1878 pBlockNew->width = pFbAllocInfo->width;
1879 }
1880
1881 *pHwResource = &pBlockNew->hwResource;
1882
1883 // Remember memory descriptor
1884 memdescDescribe(pMemDesc, ADDR_FBMEM, allocatedOffset, adjustedSize);
1885 pBlockNew->pMemDesc = pMemDesc;
1886 pBlockNew->allocedMemDesc = bAllocedMemdesc;
1887
1888 status = NV_OK;
1889
1890 return_early:
1891 HEAP_VALIDATE(pHeap);
1892
1893 if (bTurnBlacklistOff)
1894 {
1895 if (!hypervisorIsVgxHyper())
1896 _heapBlacklistChunks(pGpu, pHeap, &pHeap->blackList, desiredOffset, pVidHeapAlloc->size);
1897 else
1898 _heapBlacklistChunksInFreeBlocks(pGpu, pHeap);
1899 }
1900
1901 return status;
1902 }
1903
_heapBlacklistChunksInFreeBlocks(OBJGPU * pGpu,Heap * pHeap)1904 static void _heapBlacklistChunksInFreeBlocks
1905 (
1906 OBJGPU *pGpu,
1907 Heap *pHeap
1908 )
1909 {
1910 MEM_BLOCK *pBlockFirstFree, *pBlockFree;
1911 NvU64 blockLo;
1912 NvU64 blockHi;
1913 NvU64 size;
1914
1915 pBlockFirstFree = pHeap->pFreeBlockList;
1916
1917 if (pBlockFirstFree)
1918 {
1919 pBlockFirstFree = pBlockFirstFree->u0.prevFree;
1920 pBlockFree = pBlockFirstFree;
1921 do
1922 {
1923 pBlockFree = pBlockFree->u1.nextFree;
1924 blockLo = pBlockFree->begin;
1925 blockHi = pBlockFree->end;
1926 size = blockHi - blockLo + 1;
1927
1928 _heapBlacklistChunks(pGpu, pHeap, &pHeap->blackList, blockLo, size);
1929
1930 } while (pBlockFree != pBlockFirstFree);
1931 }
1932 }
1933
_heapBlockFree(OBJGPU * pGpu,Heap * pHeap,NvHandle hClient,NvHandle hDevice,MEM_BLOCK * pBlock)1934 static NV_STATUS _heapBlockFree
1935 (
1936 OBJGPU *pGpu,
1937 Heap *pHeap,
1938 NvHandle hClient,
1939 NvHandle hDevice,
1940 MEM_BLOCK *pBlock
1941 )
1942 {
1943 MEM_BLOCK *pBlockTmp;
1944 NvU32 i;
1945 OBJOS *pOS = GPU_GET_OS(pGpu);
1946 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
1947 NvBool bBlocksMerged = NV_FALSE;
1948
1949 //
1950 // Check for valid owner.
1951 //
1952 if (pBlock->owner == NVOS32_BLOCK_TYPE_FREE)
1953 return NV_ERR_INVALID_STATE;
1954
1955 pBlock->owner = NVOS32_BLOCK_TYPE_FREE;
1956
1957 if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_FREE_STATE_CHANGED))
1958 {
1959 return NV_ERR_INVALID_STATE;
1960 }
1961
1962 //
1963 // Update free count.
1964 //
1965 _heapAdjustFree(pHeap, pBlock->end - pBlock->begin + 1,
1966 FLD_TEST_DRF(OS32, _ATTR2, _INTERNAL, _YES, pBlock->hwResource.attr2));
1967
1968 //
1969 // Release any HW resources that might've been in use
1970 //
1971 {
1972 FB_ALLOC_INFO *pFbAllocInfo = NULL;
1973 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
1974
1975 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
1976 if (pFbAllocInfo == NULL)
1977 {
1978 NV_ASSERT(0);
1979 return NV_ERR_NO_MEMORY;
1980 }
1981
1982 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
1983 if (pFbAllocPageFormat == NULL) {
1984 NV_ASSERT(0);
1985 portMemFree(pFbAllocInfo);
1986 return NV_ERR_NO_MEMORY;
1987 }
1988
1989 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
1990 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
1991 pFbAllocInfo->pageFormat = pFbAllocPageFormat;
1992
1993 pFbAllocInfo->pageFormat->type = pBlock->u0.type;
1994 pFbAllocInfo->hwResId = pBlock->hwResource.hwResId;
1995 pFbAllocInfo->height = 0;
1996 pFbAllocInfo->pitch = 0;
1997 pFbAllocInfo->size = pBlock->end - pBlock->begin + 1;
1998 pFbAllocInfo->align = pBlock->align;
1999 pFbAllocInfo->alignPad = pBlock->alignPad;
2000 pFbAllocInfo->offset = pBlock->begin;
2001 pFbAllocInfo->format = pBlock->format;
2002 pFbAllocInfo->comprCovg = pBlock->hwResource.comprCovg;
2003 pFbAllocInfo->zcullCovg = 0;
2004 pFbAllocInfo->pageFormat->attr = pBlock->hwResource.attr;
2005 pFbAllocInfo->pageFormat->attr2 = pBlock->hwResource.attr2;
2006 pFbAllocInfo->ctagOffset = pBlock->hwResource.ctagOffset;
2007 pFbAllocInfo->hClient = hClient;
2008 pFbAllocInfo->hDevice = hDevice;
2009
2010 memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
2011
2012 if (FLD_TEST_DRF(OS32, _ATTR2, _INTERNAL, _YES, pFbAllocInfo->pageFormat->attr2))
2013 {
2014 pOS->osInternalReserveFreeCallback(pFbAllocInfo->offset, pGpu->gpuId);
2015 }
2016
2017 // Clear the HW resource associations since this block can be reused or merged.
2018 portMemSet(&pBlock->hwResource, 0, sizeof(pBlock->hwResource));
2019
2020 portMemFree(pFbAllocPageFormat);
2021 portMemFree(pFbAllocInfo);
2022 }
2023
2024 if ((pBlock->u0.type == NVOS32_TYPE_TEXTURE) && (pBlock->textureId != 0))
2025 {
2026 for (i = 0; i < MAX_TEXTURE_CLIENT_IDS; i++)
2027 {
2028 //
2029 // 1. Find the client within the textureData structure
2030 // 2. Once found, set the value to 0
2031 // 3. Then decrement its refCount
2032 // 4. If refCount goes to zero, reset the textureData structure
2033 // that pertains to that index.
2034 //
2035 if (pHeap->textureData[i].clientId == pBlock->textureId)
2036 {
2037 pBlock->textureId = 0;
2038 pHeap->textureData[i].refCount--;
2039 if (pHeap->textureData[i].refCount == 0)
2040 portMemSet(&pHeap->textureData[i], 0,
2041 sizeof(TEX_INFO));
2042 break;
2043 }
2044 }
2045 }
2046
2047 // Account for freeing any reserved RM region
2048 if ((pBlock->u0.type == NVOS32_TYPE_RESERVED) && (pBlock->owner == HEAP_OWNER_RM_RESERVED_REGION))
2049 {
2050 NV_ASSERT(pHeap->reserved >= pBlock->end - pBlock->begin + 1);
2051 pHeap->reserved -= pBlock->end - pBlock->begin + 1;
2052 }
2053
2054 //
2055 //
2056 // Can this merge with any surrounding free blocks?
2057 //
2058 if ((pBlock->prev->owner == NVOS32_BLOCK_TYPE_FREE) && (pBlock != pHeap->pBlockList))
2059 {
2060 //
2061 // Remove block to be freed and previous one since nodes will be
2062 // combined into single one.
2063 //
2064 if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_REMOVE))
2065 {
2066 return NV_ERR_INVALID_STATE;
2067 }
2068 if (NV_OK != _heapUpdate(pHeap, pBlock->prev, BLOCK_REMOVE))
2069 {
2070 return NV_ERR_INVALID_STATE;
2071 }
2072
2073 //
2074 // Merge with previous block.
2075 //
2076 pBlock->prev->next = pBlock->next;
2077 pBlock->next->prev = pBlock->prev;
2078 pBlock->prev->end = pBlock->end;
2079 pBlockTmp = pBlock;
2080 pBlock = pBlock->prev;
2081 pHeap->numBlocks--;
2082 portMemFree(pBlockTmp);
2083
2084 // re-insert updated free block into rb-tree
2085 if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_SIZE_CHANGED))
2086 {
2087 return NV_ERR_INVALID_STATE;
2088 }
2089
2090 bBlocksMerged = NV_TRUE;
2091 }
2092
2093 if ((pBlock->next->owner == NVOS32_BLOCK_TYPE_FREE) && (pBlock->next != pHeap->pBlockList))
2094 {
2095 //
2096 // Remove block to be freed and next one since nodes will be
2097 // combined into single one.
2098 //
2099 if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_REMOVE))
2100 {
2101 return NV_ERR_INVALID_STATE;
2102 }
2103 if (NV_OK != _heapUpdate(pHeap, pBlock->next, BLOCK_REMOVE))
2104 {
2105 return NV_ERR_INVALID_STATE;
2106 }
2107
2108 //
2109 // Merge with next block.
2110 //
2111 pBlock->prev->next = pBlock->next;
2112 pBlock->next->prev = pBlock->prev;
2113 pBlock->next->begin = pBlock->begin;
2114
2115 if (pHeap->pBlockList == pBlock)
2116 pHeap->pBlockList = pBlock->next;
2117
2118 if (bBlocksMerged)
2119 {
2120 if (pHeap->pFreeBlockList == pBlock)
2121 pHeap->pFreeBlockList = pBlock->u1.nextFree;
2122
2123 pBlock->u1.nextFree->u0.prevFree = pBlock->u0.prevFree;
2124 pBlock->u0.prevFree->u1.nextFree = pBlock->u1.nextFree;
2125 }
2126
2127 pBlockTmp = pBlock;
2128 pBlock = pBlock->next;
2129 pHeap->numBlocks--;
2130 portMemFree(pBlockTmp);
2131
2132 // re-insert updated free block into rb-tree
2133 if (NV_OK != _heapUpdate(pHeap, pBlock, BLOCK_SIZE_CHANGED))
2134 {
2135 return NV_ERR_INVALID_STATE;
2136 }
2137
2138 bBlocksMerged = NV_TRUE;
2139 }
2140
2141 if (!bBlocksMerged)
2142 {
2143 //
2144 // Nothing was merged. Add to free list.
2145 //
2146 pBlockTmp = pHeap->pFreeBlockList;
2147 if (!pBlockTmp)
2148 {
2149 pHeap->pFreeBlockList = pBlock;
2150 pBlock->u1.nextFree = pBlock;
2151 pBlock->u0.prevFree = pBlock;
2152 }
2153 else
2154 {
2155 if (pBlockTmp->begin > pBlock->begin)
2156 //
2157 // Insert into beginning of free list.
2158 //
2159 pHeap->pFreeBlockList = pBlock;
2160 else if (pBlockTmp->u0.prevFree->begin > pBlock->begin)
2161 //
2162 // Insert into free list.
2163 //
2164 do
2165 {
2166 pBlockTmp = pBlockTmp->u1.nextFree;
2167 } while (pBlockTmp->begin < pBlock->begin);
2168 /*
2169 else
2170 * Insert at end of list.
2171 */
2172 pBlock->u1.nextFree = pBlockTmp;
2173 pBlock->u0.prevFree = pBlockTmp->u0.prevFree;
2174 pBlock->u0.prevFree->u1.nextFree = pBlock;
2175 pBlockTmp->u0.prevFree = pBlock;
2176 }
2177 }
2178
2179 pBlock->mhandle = 0x0;
2180 pBlock->align = pBlock->begin;
2181 pBlock->alignPad = 0;
2182 pBlock->format = 0;
2183
2184 HEAP_VALIDATE(pHeap);
2185 return (NV_OK);
2186 }
2187
heapReference_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU32 owner,MEMORY_DESCRIPTOR * pMemDesc)2188 NV_STATUS heapReference_IMPL
2189 (
2190 OBJGPU *pGpu,
2191 Heap *pHeap,
2192 NvU32 owner,
2193 MEMORY_DESCRIPTOR *pMemDesc
2194 )
2195 {
2196 NvU64 offsetAlign = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
2197 MEM_BLOCK *pBlock;
2198
2199 // Bail out in case allocation is in PMA owned FB region.
2200 if (pMemDesc->pPmaAllocInfo)
2201 {
2202 if (0 != pMemDesc->pPmaAllocInfo->refCount)
2203 {
2204 pMemDesc->pPmaAllocInfo->refCount++;
2205 if (IsSLIEnabled(pGpu) &&
2206 (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM))
2207 { //
2208 memdescAddRef(pMemDesc); // Otherwise we have a fake parent descriptor removed with existing submem descriptors.
2209 // In SLI only (not fully understood yet!). In non SLI, that memAddref() causes a memleak.
2210 //
2211 }
2212 }
2213 return NV_OK;
2214 }
2215
2216 if (owner == NVOS32_BLOCK_TYPE_FREE)
2217 return NV_ERR_INVALID_STATE;
2218
2219 pBlock = _heapFindAlignedBlockWithOwner(pGpu, pHeap, owner, offsetAlign);
2220
2221 if (!pBlock)
2222 return NV_ERR_INVALID_OFFSET;
2223
2224 if (pBlock->refCount == HEAP_MAX_REF_COUNT)
2225 {
2226 NV_PRINTF(LEVEL_ERROR,
2227 "heapReference: reference count %x will exceed maximum 0x%x:\n",
2228 pBlock->refCount, HEAP_MAX_REF_COUNT);
2229 return NV_ERR_GENERIC;
2230 }
2231
2232 pBlock->refCount++;
2233 if (IsSLIEnabled(pGpu) &&
2234 (memdescGetAddressSpace(pMemDesc) == ADDR_FBMEM))
2235 { //
2236 memdescAddRef(pMemDesc); // Otherwise we have a fake parent descriptor removed with existing submem descriptors.
2237 // In SLI only (not fully understood yet!). In non SLI, that memAddref() causes a memleak.
2238 //
2239 }
2240 return NV_OK;
2241 }
2242
2243 static NV_STATUS
_heapFindBlockByOffset(OBJGPU * pGpu,Heap * pHeap,NvU32 owner,MEMORY_DESCRIPTOR * pMemDesc,NvU64 offset,MEM_BLOCK ** ppBlock)2244 _heapFindBlockByOffset
2245 (
2246 OBJGPU *pGpu,
2247 Heap *pHeap,
2248 NvU32 owner,
2249 MEMORY_DESCRIPTOR *pMemDesc,
2250 NvU64 offset,
2251 MEM_BLOCK **ppBlock
2252 )
2253 {
2254 NV_STATUS status;
2255
2256 // IRQL TEST: must be running at equivalent of passive-level
2257 IRQL_ASSERT_AND_RETURN(!osIsRaisedIRQL());
2258
2259 *ppBlock = _heapFindAlignedBlockWithOwner(pGpu, pHeap, owner,
2260 offset);
2261
2262 if (!*ppBlock)
2263 {
2264 // Try finding block based solely on offset. This is primarily needed
2265 // to successfully locate a block that was allocated multiple times via
2266 // NVOS32_ALLOC_FLAGS_FIXED_ADDRESS_ALLOCATE in heapAlloc: there can
2267 // be multiple owners, so that _heapFindAlignedBlockWithOwner may fail.
2268 if ((status = heapGetBlock(pHeap, offset, ppBlock)) != NV_OK
2269 || !*ppBlock)
2270 return NV_ERR_INVALID_OFFSET;
2271 }
2272
2273 return NV_OK;
2274 }
2275
2276 NV_STATUS
heapFree_IMPL(OBJGPU * pGpu,Heap * pHeap,NvHandle hClient,NvHandle hDevice,NvU32 owner,MEMORY_DESCRIPTOR * pMemDesc)2277 heapFree_IMPL
2278 (
2279 OBJGPU *pGpu,
2280 Heap *pHeap,
2281 NvHandle hClient,
2282 NvHandle hDevice,
2283 NvU32 owner,
2284 MEMORY_DESCRIPTOR *pMemDesc
2285 )
2286 {
2287 NV_STATUS status;
2288 MEM_BLOCK *pBlock;
2289 MEM_BLOCK *pNextBlock;
2290 NvU64 offsetAlign = memdescGetPhysAddr(pMemDesc, AT_GPU, 0);
2291 NvU64 allocBegin = 0;
2292 NvU64 allocEnd = 0;
2293 NvBool bTurnBlacklistOff = NV_FALSE;
2294 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2295
2296 NV_ASSERT_OR_RETURN(pMemDesc->pHeap == pHeap, NV_ERR_INVALID_ARGUMENT);
2297
2298 if (memdescGetContiguity(pMemDesc, AT_GPU))
2299 {
2300 status = _heapFindBlockByOffset(pGpu, pHeap,
2301 owner, pMemDesc, offsetAlign,
2302 &pBlock);
2303 if (NV_OK != status)
2304 {
2305 return status;
2306 }
2307
2308 if (pBlock->allocedMemDesc)
2309 {
2310 if (pMemDesc != pBlock->pMemDesc)
2311 {
2312 NV_ASSERT(pMemDesc == pBlock->pMemDesc);
2313 return NV_ERR_INVALID_ARGUMENT;
2314 }
2315
2316 // Clear only if the memdesc is about to be freed by memdescDestroy()
2317 if (pMemDesc->RefCount == 1)
2318 {
2319 pBlock->pMemDesc = NULL;
2320 }
2321
2322 memdescFree(pMemDesc);
2323 memdescDestroy(pMemDesc);
2324 }
2325
2326 if (--pBlock->refCount != 0)
2327 return NV_OK;
2328
2329
2330 if(pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) &&
2331 gpuCheckPageRetirementSupport_HAL(pGpu))
2332 {
2333 if (FLD_TEST_DRF(OS32, _ATTR2, _BLACKLIST, _OFF, pBlock->hwResource.attr2))
2334 {
2335 bTurnBlacklistOff = NV_TRUE;
2336 allocBegin = pBlock->begin;
2337 allocEnd = pBlock->end;
2338 }
2339 }
2340
2341 //
2342 // Since _heapBlockFree() unconditionally releases HW resources
2343 // such as compression tags, some memory descriptor fields
2344 // are rendered stale. These fields need to be reset to safer
2345 // default values (e.g. invalid HW resource ID, pitch PTE
2346 // kind, etc.) - they may be referenced again before the memory
2347 // descriptor itself is freed.
2348 //
2349 if (pBlock->allocedMemDesc && (pBlock->pMemDesc != NULL))
2350 {
2351 memdescSetHwResId(pMemDesc, 0);
2352 // XXX We cannot reset the PTE kind here since it cause corruption
2353 // in RAGE. See bug 949059
2354 //
2355 // This is an ugly hack to help OGL recover from modeswitch.
2356 // A cleaner fix would be to change the way memory is managed in OGL,
2357 // but it doesn't worth the effort to fix that on XP, since the OS is
2358 // close to end of life. The OGL linux team have plan to change their
2359 // memory management in the future, so later this hack may not be
2360 // required anymore
2361 // pMemDesc->PteKind = 0;
2362 }
2363
2364 if ((status = _heapBlockFree(pGpu, pHeap, hClient, hDevice, pBlock)) != NV_OK)
2365 {
2366 NV_ASSERT(0);
2367 }
2368
2369 //
2370 // since the mem desc is freed, now we can reallocate the blacklisted pages
2371 // in the [allocBegin, allocEnd]
2372 //
2373 if (bTurnBlacklistOff)
2374 status = _heapBlacklistChunks(pGpu, pHeap, &pHeap->blackList, allocBegin, allocEnd-allocBegin+1);
2375
2376 if (pMemoryManager->bEnableDynamicPageOfflining)
2377 {
2378 NvU32 i = 0;
2379 BLACKLIST *pBlacklist = &pHeap->blackList;
2380 BLACKLIST_CHUNK *pBlacklistChunks = pBlacklist->pBlacklistChunks;
2381
2382 for (i = 0; i < pBlacklist->count; i++)
2383 {
2384 if (pBlacklistChunks[i].bPendingRetirement &&
2385 (pBlacklistChunks[i].physOffset >= allocBegin &&
2386 pBlacklistChunks[i].physOffset <= allocEnd))
2387 {
2388 status = _heapBlacklistSingleChunk(pGpu, pHeap, &pBlacklist->pBlacklistChunks[i]);
2389 if (NV_OK != status)
2390 {
2391 NV_PRINTF(LEVEL_ERROR, "heapBlacklistSingleChunk, status: %x!\n", status);
2392 return status;
2393 }
2394 }
2395 }
2396 }
2397 return status;
2398 }
2399 else
2400 {
2401 NvBool bBlacklistFailed = NV_FALSE;
2402 //
2403 // Use the pMemDesc->PteArray[0] to find the first block
2404 // The remaining blocks can be found from each block's
2405 // noncontigAllocListNext pointer
2406 //
2407 status = _heapFindBlockByOffset(pGpu, pHeap,
2408 owner, pMemDesc,
2409 memdescGetPte(pMemDesc, AT_GPU, 0), &pBlock);
2410
2411 if (NV_OK != status)
2412 {
2413 return status;
2414 }
2415
2416 while (pBlock != NULL)
2417 {
2418 // _heapBlockFree() clears pBlock, so save the next pointer
2419 pNextBlock = pBlock->noncontigAllocListNext;
2420
2421 if (--pBlock->refCount != 0)
2422 {
2423 // Remove this block from the noncontig allocation list
2424 pBlock->noncontigAllocListNext = NULL;
2425 pBlock = pNextBlock;
2426 continue;
2427 }
2428
2429 if (NV_OK != (status = _heapBlockFree(pGpu, pHeap, hClient, hDevice, pBlock)))
2430 return status;
2431
2432 // check if we need to dynamically blacklist the page
2433 if (pMemoryManager->bEnableDynamicPageOfflining)
2434 {
2435 NvU32 i = 0;
2436 BLACKLIST *pBlacklist = &pHeap->blackList;
2437 BLACKLIST_CHUNK *pBlacklistChunks = pBlacklist->pBlacklistChunks;
2438 for (i = 0; i < pBlacklist->count; i++)
2439 {
2440 if (pBlacklistChunks[i].bPendingRetirement &&
2441 (pBlacklistChunks[i].physOffset >= pBlock->begin &&
2442 pBlacklistChunks[i].physOffset <= pBlock->end))
2443 {
2444 status = _heapBlacklistSingleChunk(pGpu, pHeap, &pBlacklist->pBlacklistChunks[i]);
2445 if (NV_OK != status)
2446 {
2447 NV_PRINTF(LEVEL_ERROR, "heapBlacklistSingleChunk, status: %x!\n", status);
2448 bBlacklistFailed = NV_TRUE;
2449 }
2450 }
2451 }
2452 }
2453 pBlock = pNextBlock;
2454 }
2455
2456 memdescFree(pMemDesc);
2457 memdescDestroy(pMemDesc);
2458
2459 if (bBlacklistFailed)
2460 {
2461 return NV_ERR_INVALID_STATE;
2462 }
2463 else
2464 {
2465 return status;
2466 }
2467 }
2468 }
2469
heapGetBlock_IMPL(Heap * pHeap,NvU64 offset,MEM_BLOCK ** ppMemBlock)2470 NV_STATUS heapGetBlock_IMPL
2471 (
2472 Heap *pHeap,
2473 NvU64 offset,
2474 MEM_BLOCK **ppMemBlock
2475 )
2476 {
2477 NODE *pNode;
2478
2479 if (btreeSearch(offset, &pNode, pHeap->pBlockTree) != NV_OK)
2480 {
2481 if (ppMemBlock)
2482 {
2483 *ppMemBlock = NULL;
2484 }
2485 return NV_ERR_GENERIC;
2486 }
2487
2488 if (ppMemBlock)
2489 {
2490 *ppMemBlock = (MEM_BLOCK *)pNode->Data;
2491 }
2492
2493 return NV_OK;
2494 }
2495
_heapFindAlignedBlockWithOwner(OBJGPU * pGpu,Heap * pHeap,NvU32 owner,NvU64 offset)2496 static MEM_BLOCK *_heapFindAlignedBlockWithOwner
2497 (
2498 OBJGPU *pGpu,
2499 Heap *pHeap,
2500 NvU32 owner,
2501 NvU64 offset // aligned
2502 )
2503 {
2504 MEM_BLOCK *pBlock;
2505 NODE *pNode;
2506
2507 HEAP_VALIDATE(pHeap);
2508
2509 if (btreeSearch(offset, &pNode, pHeap->pBlockTree) != NV_OK)
2510 {
2511 return NULL;
2512 }
2513
2514 pBlock = (MEM_BLOCK *)pNode->Data;
2515 if (pBlock->owner != owner)
2516 {
2517 return NULL;
2518 }
2519
2520 return pBlock;
2521 }
2522
heapGetSize_IMPL(Heap * pHeap,NvU64 * size)2523 NV_STATUS heapGetSize_IMPL
2524 (
2525 Heap *pHeap,
2526 NvU64 *size
2527 )
2528 {
2529 *size = pHeap->total;
2530 HEAP_VALIDATE(pHeap);
2531 return (NV_OK);
2532 }
2533
heapGetUsableSize_IMPL(Heap * pHeap,NvU64 * usableSize)2534 NV_STATUS heapGetUsableSize_IMPL
2535 (
2536 Heap *pHeap,
2537 NvU64 *usableSize
2538 )
2539 {
2540 *usableSize = pHeap->total - pHeap->reserved;
2541 HEAP_VALIDATE(pHeap);
2542 return (NV_OK);
2543 }
2544
heapGetFree_IMPL(Heap * pHeap,NvU64 * free)2545 NV_STATUS heapGetFree_IMPL
2546 (
2547 Heap *pHeap,
2548 NvU64 *free
2549 )
2550 {
2551 *free = pHeap->free;
2552 HEAP_VALIDATE(pHeap);
2553 return (NV_OK);
2554 }
2555
heapGetBase_IMPL(Heap * pHeap,NvU64 * base)2556 NV_STATUS heapGetBase_IMPL
2557 (
2558 Heap *pHeap,
2559 NvU64 *base
2560 )
2561 {
2562 *base = pHeap->base;
2563 HEAP_VALIDATE(pHeap);
2564 return (NV_OK);
2565 }
2566
_heapGetMaxFree(Heap * pHeap,NvU64 * maxOffset,NvU64 * maxFree)2567 static NV_STATUS _heapGetMaxFree
2568 (
2569 Heap *pHeap,
2570 NvU64 *maxOffset,
2571 NvU64 *maxFree
2572 )
2573 {
2574 MEM_BLOCK *pBlockFirstFree, *pBlockFree;
2575 NvU64 freeBlockSize;
2576
2577 *maxFree = 0;
2578
2579 pBlockFirstFree = pHeap->pFreeBlockList;
2580 if (!pBlockFirstFree)
2581 // There are no free blocks. Max free is already set to 0
2582 return (NV_OK);
2583
2584 // Walk the free block list.
2585 pBlockFree = pBlockFirstFree;
2586 do {
2587 freeBlockSize = pBlockFree->end - pBlockFree->begin + 1;
2588 if (freeBlockSize > *maxFree)
2589 {
2590 *maxOffset = pBlockFree->begin;
2591 *maxFree = freeBlockSize;
2592 }
2593 pBlockFree = pBlockFree->u1.nextFree;
2594 } while (pBlockFree != pBlockFirstFree);
2595
2596 return (NV_OK);
2597 }
2598
heapInfo_IMPL(Heap * pHeap,NvU64 * bytesFree,NvU64 * bytesTotal,NvU64 * base,NvU64 * largestOffset,NvU64 * largestFree)2599 NV_STATUS heapInfo_IMPL
2600 (
2601 Heap *pHeap,
2602 NvU64 *bytesFree,
2603 NvU64 *bytesTotal,
2604 NvU64 *base,
2605 NvU64 *largestOffset, // largest free blocks offset
2606 NvU64 *largestFree // largest free blocks size
2607 )
2608 {
2609 NV_STATUS status;
2610
2611 *bytesFree = pHeap->free;
2612 *bytesTotal = pHeap->total - pHeap->reserved;
2613 *base = pHeap->base;
2614 status = _heapGetMaxFree(pHeap, largestOffset, largestFree);
2615 HEAP_VALIDATE(pHeap);
2616
2617 return status;
2618 }
2619
heapInfoTypeAllocBlocks_IMPL(Heap * pHeap,NvU32 type,NvU64 * bytesTotal)2620 NV_STATUS heapInfoTypeAllocBlocks_IMPL
2621 (
2622 Heap *pHeap,
2623 NvU32 type,
2624 NvU64 *bytesTotal
2625 )
2626 {
2627 MEM_BLOCK *pBlock;
2628 NvU64 total;
2629
2630 if (type >= NVOS32_NUM_MEM_TYPES) return (NV_ERR_GENERIC);
2631
2632 pBlock = pHeap->pBlockList;
2633 total = 0;
2634
2635 if (type == NVOS32_TYPE_OWNER_RM)
2636 {
2637 //
2638 // Scan for all the blocks whose owner is within
2639 // HEAP_OWNER_RM_SCRATCH_BEGIN and HEAP_OWNER_RM_SCRATCH_END
2640 // this is strictly speaking not 'type' search. Also note that this
2641 // includes reserved space in any,.like in case of 3FB mixed density mode.
2642 //
2643 do
2644 {
2645 if ( (pBlock->owner > HEAP_OWNER_RM_SCRATCH_BEGIN) &&
2646 (pBlock->owner < HEAP_OWNER_RM_SCRATCH_END) )
2647 {
2648 total += (pBlock->end - pBlock->begin + 1);
2649 }
2650 pBlock = pBlock->next;
2651 } while (pBlock != pHeap->pBlockList);
2652 }
2653 else
2654 {
2655 //
2656 // Scan for all the blocks belonging to this type.
2657 //
2658 do
2659 {
2660 if (pBlock->u0.type == type)
2661 total += (pBlock->end - pBlock->begin + 1);
2662 pBlock = pBlock->next;
2663 } while (pBlock != pHeap->pBlockList);
2664 }
2665
2666 *bytesTotal = total;
2667
2668 HEAP_VALIDATE(pHeap);
2669 return NV_OK;
2670 }
2671
heapGetBlockHandle_IMPL(Heap * pHeap,NvU32 owner,NvU32 type,NvU64 offset,NvBool bSkipCheck,NvHandle * puHandle)2672 NV_STATUS heapGetBlockHandle_IMPL(
2673 Heap *pHeap,
2674 NvU32 owner,
2675 NvU32 type,
2676 NvU64 offset,
2677 NvBool bSkipCheck, // NV_TRUE if skip alignment/type check
2678 NvHandle *puHandle
2679 )
2680 {
2681 MEM_BLOCK *pBlock;
2682 NV_STATUS status;
2683
2684 if (offset > (pHeap->base + pHeap->total - 1)) return (NV_ERR_GENERIC);
2685
2686 status = heapGetBlock(pHeap, offset, &pBlock);
2687 if (status != NV_OK)
2688 {
2689 return status;
2690 }
2691
2692 if (!((pBlock->owner == owner) &&
2693 (((pBlock->u0.type == type) && (pBlock->align == offset)) || bSkipCheck)))
2694 {
2695 return NV_ERR_GENERIC;
2696 }
2697
2698 *puHandle = pBlock->mhandle;
2699 return NV_OK;
2700 }
2701
2702 //
2703 // Returns the number of blocks (free or allocated) currently in the heap
2704 //
heapGetNumBlocks_IMPL(Heap * pHeap)2705 NvU32 heapGetNumBlocks_IMPL
2706 (
2707 Heap *pHeap
2708 )
2709 {
2710 return pHeap->numBlocks;
2711 }
2712
2713 //
2714 // Copies over block information for each block in the heap into the provided buffer
2715 //
heapGetBlockInfo_IMPL(Heap * pHeap,NvU32 size,NVOS32_HEAP_DUMP_BLOCK * pBlockBuffer)2716 NV_STATUS heapGetBlockInfo_IMPL
2717 (
2718 Heap *pHeap,
2719 NvU32 size,
2720 NVOS32_HEAP_DUMP_BLOCK *pBlockBuffer
2721 )
2722 {
2723 MEM_BLOCK *pBlock;
2724 NvU32 heapSize, i;
2725 NV_STATUS rmStatus = NV_OK;
2726
2727 // ensure buffer is the same size
2728 heapSize = heapGetNumBlocks(pHeap);
2729 NV_ASSERT_OR_RETURN(heapSize == size, NV_ERR_INVALID_ARGUMENT);
2730
2731 pBlock = pHeap->pBlockList;
2732 for (i=0; i<heapSize; i++)
2733 {
2734 pBlockBuffer->begin = pBlock->begin;
2735 pBlockBuffer->align = pBlock->align;
2736 pBlockBuffer->end = pBlock->end;
2737 pBlockBuffer->owner = pBlock->owner;
2738 pBlockBuffer->format = pBlock->format;
2739 pBlock = pBlock->next;
2740 pBlockBuffer++;
2741 }
2742
2743 return rmStatus;
2744 }
2745
heapAllocHint_IMPL(OBJGPU * pGpu,Heap * pHeap,NvHandle hClient,NvHandle hDevice,HEAP_ALLOC_HINT_PARAMS * pAllocHint)2746 NV_STATUS heapAllocHint_IMPL
2747 (
2748 OBJGPU *pGpu,
2749 Heap *pHeap,
2750 NvHandle hClient,
2751 NvHandle hDevice,
2752 HEAP_ALLOC_HINT_PARAMS *pAllocHint
2753 )
2754 {
2755 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2756 NvU64 alignment;
2757 NV_STATUS status;
2758 NvBool ignoreBankPlacement;
2759 NvU32 textureClientIndex = 0xFFFFFFFF;
2760 NvU32 bankPlacement = 0;
2761 NvU8 currentBankInfo = 0;
2762 FB_ALLOC_INFO *pFbAllocInfo = NULL;
2763 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
2764 NvU64 pageSize = 0;
2765 NvU32 flags;
2766 NvU32 owner;
2767
2768 // Check for valid size.
2769 NV_ASSERT_OR_RETURN((pAllocHint->pSize != NULL), NV_ERR_INVALID_ARGUMENT);
2770
2771 // Ensure a valid allocation type was passed in
2772 NV_ASSERT_OR_RETURN((pAllocHint->type < NVOS32_NUM_MEM_TYPES), NV_ERR_INVALID_ARGUMENT);
2773
2774 // As we will dereference these two later, we should not allow NULL value.
2775 NV_ASSERT_OR_RETURN(((pAllocHint->pHeight != NULL) && (pAllocHint->pAttr != NULL)), NV_ERR_INVALID_ARGUMENT);
2776
2777 owner = 0x0;
2778 status = _heapGetBankPlacement(pGpu, pHeap, owner,
2779 &pAllocHint->flags, pAllocHint->type,
2780 0x0, &bankPlacement);
2781 if (status != NV_OK)
2782 {
2783 NV_PRINTF(LEVEL_ERROR,
2784 "_heapGetBankPlacement failed for current allocation\n");
2785 goto exit;
2786 }
2787
2788 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
2789 if (pFbAllocInfo == NULL)
2790 {
2791 NV_ASSERT(0);
2792 status = NV_ERR_NO_MEMORY;
2793 goto exit;
2794 }
2795
2796 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
2797 if (pFbAllocPageFormat == NULL) {
2798 NV_ASSERT(0);
2799 status = NV_ERR_NO_MEMORY;
2800 goto exit;
2801 }
2802
2803 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
2804 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
2805 pFbAllocInfo->pageFormat = pFbAllocPageFormat;
2806
2807 pFbAllocInfo->pageFormat->type = pAllocHint->type;
2808 pFbAllocInfo->hwResId = 0;
2809 pFbAllocInfo->pad = 0;
2810 pFbAllocInfo->height = *pAllocHint->pHeight;
2811 pFbAllocInfo->width = *pAllocHint->pWidth;
2812 pFbAllocInfo->pitch = (pAllocHint->pPitch) ? (*pAllocHint->pPitch) : 0;
2813 pFbAllocInfo->size = *pAllocHint->pSize;
2814 pFbAllocInfo->pageFormat->kind = 0;
2815 pFbAllocInfo->offset = ~0;
2816 pFbAllocInfo->hClient = hClient;
2817 pFbAllocInfo->hDevice = hDevice;
2818 pFbAllocInfo->pageFormat->flags = pAllocHint->flags;
2819 pFbAllocInfo->pageFormat->attr = *pAllocHint->pAttr;
2820 pFbAllocInfo->retAttr = *pAllocHint->pAttr;
2821 pFbAllocInfo->pageFormat->attr2 = *pAllocHint->pAttr2;
2822 pFbAllocInfo->retAttr2 = *pAllocHint->pAttr2;
2823 pFbAllocInfo->format = pAllocHint->format;
2824
2825 if ((pAllocHint->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) ||
2826 (pAllocHint->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
2827 pFbAllocInfo->align = *pAllocHint->pAlignment;
2828 else
2829 pFbAllocInfo->align = RM_PAGE_SIZE;
2830
2831 // Fetch RM page size
2832 pageSize = memmgrDeterminePageSize(pMemoryManager, pFbAllocInfo->hClient, pFbAllocInfo->size,
2833 pFbAllocInfo->format, pFbAllocInfo->pageFormat->flags,
2834 &pFbAllocInfo->retAttr, &pFbAllocInfo->retAttr2);
2835 if (pageSize == 0)
2836 {
2837 status = NV_ERR_INVALID_STATE;
2838 NV_PRINTF(LEVEL_ERROR, "memmgrDeterminePageSize failed, status: 0x%x\n", status);
2839 goto exit;
2840 }
2841
2842 // Fetch memory alignment
2843 status = memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, &pFbAllocInfo->size, &pFbAllocInfo->align,
2844 pFbAllocInfo->alignPad, pFbAllocInfo->pageFormat->flags,
2845 pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2, 0);
2846 if (status != NV_OK)
2847 {
2848 NV_PRINTF(LEVEL_ERROR, "memmgrAllocDetermineAlignment failed, status: 0x%x\n", status);
2849 goto exit;
2850 }
2851
2852 //
2853 // Call into HAL to reserve any hardware resources for
2854 // the specified memory type.
2855 // If the alignment was changed due to a HW limitation, and the
2856 // flag NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE is set, bad_argument
2857 // will be passed back from the HAL
2858 //
2859 flags = pFbAllocInfo->pageFormat->flags;
2860 pFbAllocInfo->pageFormat->flags |= NVOS32_ALLOC_FLAGS_SKIP_RESOURCE_ALLOC;
2861 status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo);
2862 pFbAllocInfo->pageFormat->flags = flags;
2863 *pAllocHint->pAttr = pFbAllocInfo->retAttr;
2864 *pAllocHint->pAttr2 = pFbAllocInfo->retAttr2;
2865 *pAllocHint->pKind = pFbAllocInfo->pageFormat->kind;
2866
2867 // Save retAttr as Possible Attributes that have passed error checking and
2868 // clear retAttr because we have not allocated them yet
2869 pFbAllocInfo->possAttr = pFbAllocInfo->retAttr;
2870 // pFbAllocInfo->possAttr2 = pFbAllocInfo->retAttr2;
2871 pFbAllocInfo->retAttr = 0x0;
2872 pFbAllocInfo->retAttr2 = 0x0;
2873 if (status != NV_OK)
2874 {
2875 //
2876 // probably means we passed in a bogus type or no tiling resources available
2877 // when tiled memory attribute was set to REQUIRED
2878 //
2879 NV_PRINTF(LEVEL_ERROR, "memmgrAllocHwResources failed, status: 0x%x\n",
2880 status);
2881 goto exit;
2882 }
2883
2884 //
2885 // Refresh search parameters.
2886 //
2887 if ((DRF_VAL(OS32, _ATTR, _FORMAT, *pAllocHint->pAttr) != NVOS32_ATTR_FORMAT_BLOCK_LINEAR))
2888 {
2889 *pAllocHint->pHeight = pFbAllocInfo->height;
2890 if (pAllocHint->pPitch)
2891 *pAllocHint->pPitch = pFbAllocInfo->pitch;
2892 }
2893
2894 //
2895 // The heap allocator has assumed required alignments are powers of 2
2896 // (aligning FB offsets has been done using bit masks).
2897 //
2898 //
2899 *pAllocHint->pAlignment = pFbAllocInfo->align + 1; // convert mask to size
2900 alignment = pFbAllocInfo->align + 1;
2901
2902 //
2903 // Allow caller to request host page alignment to make it easier
2904 // to move things around with host os VM subsystem
2905 //
2906
2907 if (pAllocHint->flags & NVOS32_ALLOC_FLAGS_FORCE_ALIGN_HOST_PAGE)
2908 {
2909 OBJSYS *pSys = SYS_GET_INSTANCE();
2910 NvU64 hostPageSize = pSys->cpuInfo.hostPageSize;
2911
2912 // hostPageSize *should* always be set, but....
2913 if (hostPageSize == 0)
2914 hostPageSize = RM_PAGE_SIZE;
2915
2916 alignment = memUtilsLeastCommonAlignment(alignment, hostPageSize);
2917 }
2918
2919 if (memmgrAllocGetAddrSpace(pMemoryManager, pAllocHint->flags, *pAllocHint->pAttr) == ADDR_FBMEM)
2920 {
2921 if (alignment >= pHeap->total)
2922 {
2923 status = NV_ERR_INVALID_ARGUMENT;
2924 NV_PRINTF(LEVEL_ERROR, "heapAllocHint failed due to alignmend >= pHeap->total\n");
2925 goto exit;
2926 }
2927 }
2928
2929 //
2930 // Check if NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT was passed in with
2931 // the type to ignore placing this allocation in a particular bank.
2932 // This means we default to the second loop where we choose first fit.
2933 //
2934 ignoreBankPlacement = NV_FALSE;
2935 if (pAllocHint->flags & NVOS32_ALLOC_FLAGS_IGNORE_BANK_PLACEMENT)
2936 ignoreBankPlacement = NV_TRUE;
2937
2938 if ((pAllocHint->type == NVOS32_TYPE_TEXTURE) && (!pAllocHint->flags))
2939 _heapSetTexturePlacement(pHeap, pAllocHint->client, pAllocHint->type, &ignoreBankPlacement, &textureClientIndex, ¤tBankInfo);
2940
2941 pAllocHint->bankPlacement = bankPlacement;
2942 pAllocHint->ignoreBankPlacement = ignoreBankPlacement;
2943
2944 *pAllocHint->pHeight = pFbAllocInfo->height;
2945 pAllocHint->pad = pFbAllocInfo->pad;
2946
2947 *pAllocHint->pSize = pFbAllocInfo->size; // returned to caller
2948
2949 pAllocHint->alignAdjust = 0;
2950
2951 exit:
2952 portMemFree(pFbAllocPageFormat);
2953 portMemFree(pFbAllocInfo);
2954
2955 return status;
2956 }
2957
heapHwAlloc_IMPL(OBJGPU * pGpu,Heap * pHeap,NvHandle hClient,NvHandle hDevice,NvHandle hMemory,MEMORY_HW_RESOURCES_ALLOCATION_REQUEST * pHwAlloc,NvU32 * pAttr,NvU32 * pAttr2)2958 NV_STATUS heapHwAlloc_IMPL
2959 (
2960 OBJGPU *pGpu,
2961 Heap *pHeap,
2962 NvHandle hClient,
2963 NvHandle hDevice,
2964 NvHandle hMemory,
2965 MEMORY_HW_RESOURCES_ALLOCATION_REQUEST *pHwAlloc,
2966 NvU32 *pAttr,
2967 NvU32 *pAttr2
2968 )
2969 {
2970 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
2971 NV_STATUS status = NV_OK;
2972 FB_ALLOC_INFO *pFbAllocInfo = NULL;
2973 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
2974 NvU64 pageSize = 0;
2975 NV_MEMORY_HW_RESOURCES_ALLOCATION_PARAMS *pUserParams = pHwAlloc->pUserParams;
2976
2977 // Ensure a valid allocation type was passed in
2978 if (pUserParams->type > NVOS32_NUM_MEM_TYPES - 1)
2979 return NV_ERR_GENERIC;
2980
2981 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
2982 if (NULL == pFbAllocInfo)
2983 {
2984 NV_PRINTF(LEVEL_ERROR, "No memory for Resource %p\n",
2985 pHwAlloc->pHandle);
2986 status = NV_ERR_GENERIC;
2987 goto failed;
2988 }
2989 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
2990 if (NULL == pFbAllocPageFormat)
2991 {
2992 NV_PRINTF(LEVEL_ERROR, "No memory for Resource %p\n",
2993 pHwAlloc->pHandle);
2994 status = NV_ERR_GENERIC;
2995 goto failed;
2996 }
2997
2998 portMemSet(pFbAllocInfo, 0x0, sizeof(FB_ALLOC_INFO));
2999 portMemSet(pFbAllocPageFormat, 0x0, sizeof(FB_ALLOC_PAGE_FORMAT));
3000 pFbAllocInfo->pageFormat = pFbAllocPageFormat;
3001 pFbAllocInfo->pageFormat->type = pUserParams->type;
3002 pFbAllocInfo->hwResId = 0;
3003 pFbAllocInfo->pad = 0;
3004 pFbAllocInfo->height = pUserParams->height;
3005 pFbAllocInfo->width = pUserParams->width;
3006 pFbAllocInfo->pitch = pUserParams->pitch;
3007 pFbAllocInfo->size = pUserParams->size;
3008 pFbAllocInfo->origSize = pUserParams->size;
3009 pFbAllocInfo->pageFormat->kind = pUserParams->kind;
3010 pFbAllocInfo->offset = memmgrGetInvalidOffset_HAL(pGpu, pMemoryManager);
3011 pFbAllocInfo->hClient = hClient;
3012 pFbAllocInfo->hDevice = hDevice;
3013 pFbAllocInfo->pageFormat->flags = pUserParams->flags;
3014 pFbAllocInfo->pageFormat->attr = pUserParams->attr;
3015 pFbAllocInfo->pageFormat->attr2 = pUserParams->attr2;
3016 pFbAllocInfo->retAttr = pUserParams->attr;
3017 pFbAllocInfo->retAttr2 = pUserParams->attr2;
3018 pFbAllocInfo->comprCovg = pUserParams->comprCovg;
3019 pFbAllocInfo->zcullCovg = 0;
3020 pFbAllocInfo->internalflags = 0;
3021
3022 if ((pUserParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_HINT) ||
3023 (pUserParams->flags & NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE))
3024 pFbAllocInfo->align = pUserParams->alignment;
3025 else
3026 pFbAllocInfo->align = RM_PAGE_SIZE;
3027
3028 // Fetch RM page size
3029 pageSize = memmgrDeterminePageSize(pMemoryManager, pFbAllocInfo->hClient, pFbAllocInfo->size,
3030 pFbAllocInfo->format, pFbAllocInfo->pageFormat->flags,
3031 &pFbAllocInfo->retAttr, &pFbAllocInfo->retAttr2);
3032 if (pageSize == 0)
3033 {
3034 status = NV_ERR_INVALID_STATE;
3035 NV_PRINTF(LEVEL_ERROR, "memmgrDeterminePageSize failed\n");
3036 }
3037
3038 // Fetch memory alignment
3039 status = memmgrAllocDetermineAlignment_HAL(pGpu, pMemoryManager, &pFbAllocInfo->size, &pFbAllocInfo->align,
3040 pFbAllocInfo->alignPad, pFbAllocInfo->pageFormat->flags,
3041 pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2, 0);
3042 if (status != NV_OK)
3043 {
3044 NV_PRINTF(LEVEL_ERROR, "memmgrAllocDetermineAlignment failed\n");
3045 }
3046
3047 //
3048 // vGPU:
3049 //
3050 // Since vGPU does all real hardware management in the
3051 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
3052 // do an RPC to the host to do the hardware update.
3053 //
3054 if ((status == NV_OK) && IS_VIRTUAL(pGpu))
3055 {
3056 if (vgpuIsGuestManagedHwAlloc(pGpu) &&
3057 (FLD_TEST_DRF(OS32, _ATTR, _COMPR, _NONE, pFbAllocInfo->pageFormat->attr)))
3058 {
3059 status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo);
3060 pHwAlloc->hwResource.isVgpuHostAllocated = NV_FALSE;
3061 NV_ASSERT(status == NV_OK);
3062 }
3063 else
3064 {
3065 NV_RM_RPC_MANAGE_HW_RESOURCE_ALLOC(pGpu,
3066 hClient,
3067 hDevice,
3068 hMemory,
3069 pFbAllocInfo,
3070 status);
3071 pHwAlloc->hwResource.isVgpuHostAllocated = NV_TRUE;
3072 }
3073
3074 pUserParams->uncompressedKind = pFbAllocInfo->uncompressedKind;
3075 pUserParams->compPageShift = pFbAllocInfo->compPageShift;
3076 pUserParams->compressedKind = pFbAllocInfo->compressedKind;
3077 pUserParams->compTagLineMin = pFbAllocInfo->compTagLineMin;
3078 pUserParams->compPageIndexLo = pFbAllocInfo->compPageIndexLo;
3079 pUserParams->compPageIndexHi = pFbAllocInfo->compPageIndexHi;
3080 pUserParams->compTagLineMultiplier = pFbAllocInfo->compTagLineMultiplier;
3081 }
3082 else
3083 {
3084 //
3085 // Call into HAL to reserve any hardware resources for
3086 // the specified memory type.
3087 // If the alignment was changed due to a HW limitation, and the
3088 // flag NVOS32_ALLOC_FLAGS_ALIGNMENT_FORCE is set, bad_argument
3089 // will be passed back from the HAL
3090 //
3091 status = memmgrAllocHwResources(pGpu, pMemoryManager, pFbAllocInfo);
3092 }
3093
3094 // Is status bad or did we request attributes and they failed
3095 if ((status != NV_OK) || ((pUserParams->attr) && (0x0 == pFbAllocInfo->retAttr)))
3096 {
3097 //
3098 // probably means we passed in a bogus type or no tiling resources available
3099 // when tiled memory attribute was set to REQUIRED
3100 //
3101 if (status != NV_OK)
3102 {
3103 NV_PRINTF(LEVEL_ERROR,
3104 "nvHalFbAlloc failure status = 0x%x Requested Attr 0x%x!\n",
3105 status, pUserParams->attr);
3106 }
3107 else
3108 {
3109 NV_PRINTF(LEVEL_WARNING,
3110 "nvHalFbAlloc Out of Resources Requested=%x Returned=%x !\n",
3111 pUserParams->attr, pFbAllocInfo->retAttr);
3112 }
3113 goto failed;
3114 }
3115
3116 //
3117 // Refresh search parameters.
3118 //
3119 pUserParams->pitch = pFbAllocInfo->pitch;
3120
3121 pUserParams->height = pFbAllocInfo->height;
3122 pHwAlloc->pad = NvU64_LO32(pFbAllocInfo->pad);
3123 pUserParams->kind = pFbAllocInfo->pageFormat->kind;
3124 pHwAlloc->hwResId = pFbAllocInfo->hwResId;
3125
3126 pUserParams->size = pFbAllocInfo->size; // returned to caller
3127
3128 pHwAlloc->hwResource.attr = pFbAllocInfo->retAttr;
3129 pHwAlloc->hwResource.attr2 = pFbAllocInfo->retAttr2;
3130 pHwAlloc->hwResource.comprCovg = pFbAllocInfo->comprCovg;
3131 pHwAlloc->hwResource.ctagOffset = pFbAllocInfo->ctagOffset;
3132 pHwAlloc->hwResource.hwResId = pFbAllocInfo->hwResId;
3133
3134 *pAttr = pFbAllocInfo->retAttr;
3135 *pAttr2 = pFbAllocInfo->retAttr2;
3136
3137 failed:
3138 portMemFree(pFbAllocPageFormat);
3139 portMemFree(pFbAllocInfo);
3140
3141 return status;
3142 }
3143
heapHwFree_IMPL(OBJGPU * pGpu,Heap * pHeap,Memory * pMemory,NvU32 flags)3144 void heapHwFree_IMPL
3145 (
3146 OBJGPU *pGpu,
3147 Heap *pHeap,
3148 Memory *pMemory,
3149 NvU32 flags
3150 )
3151 {
3152 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
3153 FB_ALLOC_INFO *pFbAllocInfo = NULL;
3154 FB_ALLOC_PAGE_FORMAT *pFbAllocPageFormat = NULL;
3155
3156 pFbAllocInfo = portMemAllocNonPaged(sizeof(FB_ALLOC_INFO));
3157 if (pFbAllocInfo == NULL)
3158 {
3159 NV_ASSERT(0);
3160 goto exit;
3161 }
3162
3163 pFbAllocPageFormat = portMemAllocNonPaged(sizeof(FB_ALLOC_PAGE_FORMAT));
3164 if (pFbAllocPageFormat == NULL) {
3165 NV_ASSERT(0);
3166 goto exit;
3167 }
3168
3169 portMemSet(pFbAllocInfo, 0, sizeof(FB_ALLOC_INFO));
3170 portMemSet(pFbAllocPageFormat, 0, sizeof(FB_ALLOC_PAGE_FORMAT));
3171 pFbAllocInfo->pageFormat = pFbAllocPageFormat;
3172
3173 pFbAllocInfo->pageFormat->type = pMemory->Type;
3174 pFbAllocInfo->pageFormat->attr = pMemory->pHwResource->attr;
3175 pFbAllocInfo->pageFormat->attr2 = pMemory->pHwResource->attr2;
3176 pFbAllocInfo->hwResId = pMemory->pHwResource->hwResId;
3177 pFbAllocInfo->size = pMemory->Length;
3178 pFbAllocInfo->format = memdescGetPteKind(pMemory->pMemDesc);
3179 pFbAllocInfo->offset = ~0;
3180 pFbAllocInfo->hClient = RES_GET_CLIENT_HANDLE(pMemory);
3181 pFbAllocInfo->hDevice = RES_GET_HANDLE(pMemory->pDevice);
3182
3183 //
3184 // vGPU:
3185 //
3186 // Since vGPU does all real hardware management in the
3187 // host, if we are in guest OS (where IS_VIRTUAL(pGpu) is true),
3188 // do an RPC to the host to do the hardware update.
3189 //
3190
3191 if (IS_VIRTUAL(pGpu))
3192 {
3193 if (vgpuIsGuestManagedHwAlloc(pGpu) && !pMemory->pHwResource->isVgpuHostAllocated)
3194 {
3195 memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
3196 }
3197 else
3198 {
3199 NV_STATUS rmStatus = NV_OK;
3200
3201 NV_RM_RPC_MANAGE_HW_RESOURCE_FREE(pGpu,
3202 RES_GET_CLIENT_HANDLE(pMemory),
3203 RES_GET_HANDLE(pMemory->pDevice),
3204 RES_GET_HANDLE(pMemory),
3205 flags,
3206 rmStatus);
3207 }
3208 }
3209 else
3210 {
3211 memmgrFreeHwResources(pGpu, pMemoryManager, pFbAllocInfo);
3212 }
3213
3214 exit:
3215 portMemFree(pFbAllocPageFormat);
3216 portMemFree(pFbAllocInfo);
3217 }
3218
heapFreeBlockCount_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU32 * pCount)3219 NV_STATUS heapFreeBlockCount_IMPL(OBJGPU *pGpu, Heap *pHeap, NvU32 *pCount)
3220 {
3221 MEM_BLOCK *pMemBlock;
3222
3223 pMemBlock = pHeap->pFreeBlockList;
3224 *pCount = 0;
3225
3226 if (pMemBlock == NULL)
3227 {
3228 return NV_OK;
3229 }
3230
3231 do
3232 {
3233 (*pCount)++;
3234 pMemBlock = pMemBlock->u1.nextFree;
3235 } while (pMemBlock != pHeap->pFreeBlockList);
3236
3237 return NV_OK;
3238 }
3239
heapFreeBlockInfo_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU32 Count,void * pVoidInfo)3240 NV_STATUS heapFreeBlockInfo_IMPL(OBJGPU *pGpu, Heap *pHeap, NvU32 Count, void *pVoidInfo)
3241 {
3242 NVOS32_BLOCKINFO *pBlockInfo = pVoidInfo;
3243 NvU32 actualCount;
3244 MEM_BLOCK *pMemBlock;
3245 NV_STATUS rmStatus = NV_ERR_GENERIC;
3246 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
3247 NvU64 maxCpuOffset;
3248
3249 heapFreeBlockCount(pGpu, pHeap, &actualCount);
3250
3251 if ((actualCount == Count) && (NULL != pBlockInfo))
3252 {
3253 if (actualCount == 0)
3254 {
3255 return NV_OK;
3256 }
3257
3258 maxCpuOffset = (pMemoryManager->Ram.mapRamSizeMb*0x100000) - 1;
3259 pMemBlock = pHeap->pFreeBlockList;
3260 actualCount = 0;
3261 do
3262 {
3263 pBlockInfo->startOffset = pMemBlock->begin;
3264 pBlockInfo->size = pMemBlock->end - pMemBlock->begin + 1;
3265 pBlockInfo->flags = 0x0;
3266 if (pBlockInfo->startOffset < maxCpuOffset)
3267 {
3268 pBlockInfo->flags |= NVOS32_FLAGS_BLOCKINFO_VISIBILITY_CPU;
3269 }
3270 pMemBlock = pMemBlock->u1.nextFree;
3271 pBlockInfo++;
3272 actualCount++;
3273 } while ((pMemBlock != pHeap->pFreeBlockList) && (actualCount < Count));
3274
3275 rmStatus = NV_OK;
3276 }
3277
3278 return rmStatus;
3279 }
3280
3281 /*!
3282 * @brief: Adjust heap free accounting
3283 *
3284 * @param[in] pHeap Heap pointer
3285 * @param[in] blockSize +: Size of block being freed
3286 * -: Size of block being allocated
3287 * @param[in] internalHeap NV_TRUE if the allocation is 'INTERNAL'
3288 *
3289 * @return void
3290 */
3291
3292 static void
_heapAdjustFree(Heap * pHeap,NvS64 blockSize,NvBool internalHeap)3293 _heapAdjustFree
3294 (
3295 Heap *pHeap,
3296 NvS64 blockSize,
3297 NvBool internalHeap
3298 )
3299 {
3300 pHeap->free += blockSize;
3301
3302 NV_ASSERT(pHeap->free <= pHeap->total);
3303 if(pHeap->free > pHeap->total)
3304 {
3305 DBG_BREAKPOINT();
3306 }
3307
3308 // Collect data on internal/external heap usage
3309 if (internalHeap)
3310 {
3311 pHeap->currInternalUsage -= blockSize;
3312 pHeap->peakInternalUsage = NV_MAX(pHeap->peakInternalUsage, pHeap->currInternalUsage);
3313 }
3314 else
3315 {
3316 pHeap->currExternalUsage -= blockSize;
3317 pHeap->peakExternalUsage = NV_MAX(pHeap->peakExternalUsage, pHeap->currExternalUsage);
3318 }
3319 }
3320
3321 static NV_STATUS
_heapProcessFreeBlock(OBJGPU * pGpu,MEM_BLOCK * pBlockFree,MEM_BLOCK ** ppBlockNew,MEM_BLOCK ** ppBlockSplit,Heap * pHeap,MEMORY_ALLOCATION_REQUEST * pAllocRequest,NvHandle memHandle,OBJHEAP_ALLOC_DATA * pAllocData,FB_ALLOC_INFO * pFbAllocInfo,NvU64 alignPad,NvU64 * offset)3322 _heapProcessFreeBlock
3323 (
3324 OBJGPU *pGpu,
3325 MEM_BLOCK *pBlockFree,
3326 MEM_BLOCK **ppBlockNew,
3327 MEM_BLOCK **ppBlockSplit,
3328 Heap *pHeap,
3329 MEMORY_ALLOCATION_REQUEST *pAllocRequest,
3330 NvHandle memHandle,
3331 OBJHEAP_ALLOC_DATA *pAllocData,
3332 FB_ALLOC_INFO *pFbAllocInfo,
3333 NvU64 alignPad,
3334 NvU64 *offset
3335 )
3336 {
3337 NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams;
3338 MEM_BLOCK *pBlockNew = NULL, *pBlockSplit = NULL;
3339 OBJOS *pOS = GPU_GET_OS(pGpu);
3340 NV_STATUS status = NV_OK;
3341
3342 if ((pAllocData->allocLo == pBlockFree->begin) &&
3343 (pAllocData->allocHi == pBlockFree->end))
3344 {
3345 //
3346 // Wow, exact match so replace free block.
3347 // Remove from free list.
3348 //
3349 pBlockFree->u1.nextFree->u0.prevFree = pBlockFree->u0.prevFree;
3350 pBlockFree->u0.prevFree->u1.nextFree = pBlockFree->u1.nextFree;
3351
3352 if (pHeap->pFreeBlockList == pBlockFree)
3353 {
3354 //
3355 // This could be the last free block.
3356 //
3357 if (pBlockFree->u1.nextFree == pBlockFree)
3358 pHeap->pFreeBlockList = NULL;
3359 else
3360 pHeap->pFreeBlockList = pBlockFree->u1.nextFree;
3361 }
3362
3363 //
3364 // Set pVidHeapAlloc->owner/pVidHeapAlloc->type values here.
3365 // Don't move because some fields are unions.
3366 //
3367 pBlockFree->owner = pVidHeapAlloc->owner;
3368 pBlockFree->mhandle = memHandle;
3369 pBlockFree->refCount = 1;
3370 pBlockFree->u0.type = pVidHeapAlloc->type;
3371 pBlockFree->align = pAllocData->allocAl;
3372 pBlockFree->alignPad = alignPad;
3373 pBlockFree->format = pFbAllocInfo->format;
3374
3375 // tail end code below assumes 'blockNew' is the new block
3376 pBlockNew = pBlockFree;
3377 }
3378 else if ((pAllocData->allocLo >= pBlockFree->begin) &&
3379 (pAllocData->allocHi <= pBlockFree->end))
3380 {
3381 //
3382 // Found a fit.
3383 // It isn't exact, so we'll have to do a split
3384 //
3385 pBlockNew = portMemAllocNonPaged(sizeof(MEM_BLOCK));
3386 if (pBlockNew == NULL)
3387 {
3388 // Exit with failure and free any local allocations
3389 NV_ASSERT(0);
3390 status = NV_ERR_NO_MEMORY;
3391 goto _heapProcessFreeBlock_error;
3392 }
3393
3394 portMemSet(pBlockNew, 0, sizeof(MEM_BLOCK));
3395
3396 pBlockNew->owner = pVidHeapAlloc->owner;
3397 pBlockNew->mhandle = memHandle;
3398 pBlockNew->refCount = 1;
3399 pBlockNew->u0.type = pVidHeapAlloc->type;
3400 pBlockNew->begin = pAllocData->allocLo;
3401 pBlockNew->align = pAllocData->allocAl;
3402 pBlockNew->alignPad = alignPad;
3403 pBlockNew->end = pAllocData->allocHi;
3404 pBlockNew->format = pFbAllocInfo->format;
3405
3406 if (gpuIsCacheOnlyModeEnabled(pGpu))
3407 {
3408 //
3409 // In L2 Cache only mode, set the beginning of the new allocation
3410 // block to aligned (allocAl) offset rather then the start of
3411 // the free block (allocLo). And that the end of the new block is
3412 // is calculated as (allocSize - 1) from the beginning.
3413 // This insures that we don't "over allocate" for the surface in the
3414 // case where start of the free block is not properly aligned for both
3415 // the grow down and grow up cases.
3416 // Only applying this in L2 cache mode for now, as we don't want to "waste"
3417 // L2 cache space, though wonder if there are any implications to doing
3418 // it this way in normal operation.
3419 //
3420 pBlockNew->begin = pAllocData->allocAl;
3421 pBlockNew->end = pBlockNew->begin + pAllocData->allocSize - 1;
3422 }
3423
3424 if ((pBlockFree->begin < pBlockNew->begin) &&
3425 (pBlockFree->end > pBlockNew->end))
3426 {
3427 // Split free block in two.
3428 pBlockSplit = portMemAllocNonPaged(sizeof(MEM_BLOCK));
3429 if (pBlockSplit == NULL)
3430 {
3431 // Exit with failure and free any local allocations
3432 status = NV_ERR_NO_MEMORY;
3433 goto _heapProcessFreeBlock_error;
3434 }
3435
3436 portMemSet(pBlockSplit, 0, sizeof(MEM_BLOCK));
3437
3438 // remove free block from rb-tree since node's range will be changed
3439 if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_REMOVE)))
3440 {
3441 // Exit with failure and free any local allocations
3442 goto _heapProcessFreeBlock_error;
3443 }
3444
3445 pBlockSplit->owner = NVOS32_BLOCK_TYPE_FREE;
3446 pBlockSplit->format= 0;
3447 pBlockSplit->begin = pBlockNew->end + 1;
3448 pBlockSplit->align = pBlockSplit->begin;
3449 pBlockSplit->alignPad = 0;
3450 pBlockSplit->end = pBlockFree->end;
3451 pBlockFree->end = pBlockNew->begin - 1;
3452 //
3453 // Insert free split block into free list.
3454 //
3455 pBlockSplit->u1.nextFree = pBlockFree->u1.nextFree;
3456 pBlockSplit->u0.prevFree = pBlockFree;
3457 pBlockSplit->u1.nextFree->u0.prevFree = pBlockSplit;
3458 pBlockFree->u1.nextFree = pBlockSplit;
3459 //
3460 // Insert new and split blocks into block list.
3461 //
3462 pBlockNew->next = pBlockSplit;
3463 pBlockNew->prev = pBlockFree;
3464 pBlockSplit->next = pBlockFree->next;
3465 pBlockSplit->prev = pBlockNew;
3466 pBlockFree->next = pBlockNew;
3467 pBlockSplit->next->prev = pBlockSplit;
3468
3469 // update numBlocks count
3470 pHeap->numBlocks++;
3471
3472 // re-insert updated free block into rb-tree
3473 if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_SIZE_CHANGED)))
3474 {
3475 //
3476 // Exit and report success. The new block was allocated, but the
3477 // noncontig info is now out-of-sync with reality.
3478 //
3479 NV_PRINTF(LEVEL_ERROR,
3480 "_heapUpdate failed to _SIZE_CHANGE block\n");
3481 goto _heapProcessFreeBlock_exit;
3482 }
3483
3484 // insert new and split blocks into rb-tree
3485 if (NV_OK != (status = _heapUpdate(pHeap, pBlockNew, BLOCK_ADD)))
3486 {
3487 //
3488 // Exit and report success. The new block was allocated, but the
3489 // noncontig info is now out-of-sync with reality.
3490 //
3491 NV_PRINTF(LEVEL_ERROR, "_heapUpdate failed to _ADD block\n");
3492 goto _heapProcessFreeBlock_exit;
3493 }
3494
3495 if (NV_OK != (status = _heapUpdate(pHeap, pBlockSplit, BLOCK_ADD)))
3496 {
3497 //
3498 // Exit and report success. The new block was allocated, but the
3499 // noncontig info is now out-of-sync with reality.
3500 //
3501 NV_PRINTF(LEVEL_ERROR, "_heapUpdate failed to _ADD block\n");
3502 goto _heapProcessFreeBlock_exit;
3503 }
3504 }
3505 else if (pBlockFree->end == pBlockNew->end)
3506 {
3507 // remove free block from rb-tree since node's range will be changed
3508 if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_REMOVE)))
3509 {
3510 // Exit with failure and free any local allocations
3511 goto _heapProcessFreeBlock_error;
3512 }
3513
3514 //
3515 // New block inserted after free block.
3516 //
3517 pBlockFree->end = pBlockNew->begin - 1;
3518 pBlockNew->next = pBlockFree->next;
3519 pBlockNew->prev = pBlockFree;
3520 pBlockFree->next->prev = pBlockNew;
3521 pBlockFree->next = pBlockNew;
3522
3523 // re-insert updated free block into rb-tree
3524 if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_SIZE_CHANGED)))
3525 {
3526 //
3527 // Exit and report success. The new block was allocated, but the
3528 // noncontig info is now out-of-sync with reality.
3529 //
3530 NV_PRINTF(LEVEL_ERROR,
3531 "_heapUpdate failed to _SIZE_CHANGE block\n");
3532 goto _heapProcessFreeBlock_exit;
3533 }
3534
3535 // insert new block into rb-tree
3536 if (NV_OK != (status = _heapUpdate(pHeap, pBlockNew, BLOCK_ADD)))
3537 {
3538 //
3539 // Exit and report success. The new block was allocated, but the
3540 // noncontig info is now out-of-sync with reality.
3541 //
3542 NV_PRINTF(LEVEL_ERROR, "_heapUpdate failed to _ADD block\n");
3543 goto _heapProcessFreeBlock_exit;
3544 }
3545 }
3546 else if (pBlockFree->begin == pBlockNew->begin)
3547 {
3548 // remove free block from rb-tree since node's range will be changed
3549 if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_REMOVE)))
3550 {
3551 // Exit with failure and free any local allocations
3552 goto _heapProcessFreeBlock_error;
3553 }
3554
3555 //
3556 // New block inserted before free block.
3557 //
3558 pBlockFree->begin = pBlockNew->end + 1;
3559 pBlockFree->align = pBlockFree->begin;
3560 pBlockNew->next = pBlockFree;
3561 pBlockNew->prev = pBlockFree->prev;
3562 pBlockFree->prev->next = pBlockNew;
3563 pBlockFree->prev = pBlockNew;
3564 if (pHeap->pBlockList == pBlockFree)
3565 pHeap->pBlockList = pBlockNew;
3566
3567 // re-insert updated free block into rb-tree
3568 if (NV_OK != (status = _heapUpdate(pHeap, pBlockFree, BLOCK_SIZE_CHANGED)))
3569 {
3570 //
3571 // Exit and report success. The new block was allocated, but the
3572 // noncontig info is now out-of-sync with reality.
3573 //
3574 NV_PRINTF(LEVEL_ERROR,
3575 "_heapUpdate failed to _SIZE_CHANGE block\n");
3576 goto _heapProcessFreeBlock_exit;
3577 }
3578
3579 // insert new block into rb-tree
3580 if (NV_OK != (status = _heapUpdate(pHeap, pBlockNew, BLOCK_ADD)))
3581 {
3582 //
3583 // Exit and report success. The new block was allocated, but the
3584 // noncontig info is now out-of-sync with reality.
3585 //
3586 NV_PRINTF(LEVEL_ERROR, "_heapUpdate failed to _ADD block\n");
3587 goto _heapProcessFreeBlock_exit;
3588 }
3589 }
3590 else
3591 {
3592 status = NV_ERR_NO_MEMORY;
3593 // Exit with failure and free any local allocations
3594 goto _heapProcessFreeBlock_error;
3595 }
3596
3597 pHeap->numBlocks++;
3598 }
3599
3600 if (NULL == pBlockNew)
3601 status = NV_ERR_NO_MEMORY;
3602
3603 _heapProcessFreeBlock_error:
3604 if (status != NV_OK)
3605 {
3606 NV_PRINTF(LEVEL_ERROR, "failed to allocate block\n");
3607
3608 portMemFree(pBlockNew);
3609 portMemFree(pBlockSplit);
3610
3611 *ppBlockNew = NULL;
3612 *ppBlockSplit = NULL;
3613
3614 return status;
3615 }
3616
3617 _heapProcessFreeBlock_exit:
3618 *ppBlockNew = pBlockNew;
3619 *ppBlockSplit = pBlockSplit;
3620
3621 // alignPad == 0 for all but >= NV5x
3622 *offset = pBlockNew->align + pBlockNew->alignPad;
3623
3624 // Reduce free amount by allocated block size.
3625 _heapAdjustFree(pHeap, -((NvS64) (pBlockNew->end - pBlockNew->begin + 1)),
3626 FLD_TEST_DRF(OS32, _ATTR2, _INTERNAL, _YES, pFbAllocInfo->pageFormat->attr2));
3627
3628 if (FLD_TEST_DRF(OS32, _ATTR2, _INTERNAL, _YES, pFbAllocInfo->pageFormat->attr2))
3629 {
3630 pOS->osInternalReserveAllocCallback(*offset, pFbAllocInfo->size, pGpu->gpuId);
3631 }
3632
3633 return NV_OK;
3634 }
3635
3636 static void
_heapAddBlockToNoncontigList(Heap * pHeap,MEM_BLOCK * pBlock)3637 _heapAddBlockToNoncontigList
3638 (
3639 Heap *pHeap,
3640 MEM_BLOCK *pBlock
3641 )
3642 {
3643 if (NULL == pHeap->pNoncontigFreeBlockList)
3644 {
3645 pHeap->pNoncontigFreeBlockList = pBlock;
3646 pBlock->nextFreeNoncontig = NULL;
3647 pBlock->prevFreeNoncontig = NULL;
3648 }
3649 else
3650 {
3651 MEM_BLOCK *pNextBlock = pHeap->pNoncontigFreeBlockList;
3652 NvU64 size, nextSize = 0;
3653 size = pBlock->end - pBlock->begin + 1;
3654
3655 NV_ASSERT(pBlock->prevFreeNoncontig == NULL &&
3656 pBlock->nextFreeNoncontig == NULL);
3657
3658 // The noncontig block list is arranged in the descending order of size
3659 while (NULL != pNextBlock)
3660 {
3661 nextSize = pNextBlock->end - pNextBlock->begin + 1;
3662
3663 if (size > nextSize)
3664 {
3665 // Insert pBlock in front of pNextBlock
3666 pBlock->prevFreeNoncontig = pNextBlock->prevFreeNoncontig;
3667 pBlock->nextFreeNoncontig = pNextBlock;
3668 pNextBlock->prevFreeNoncontig = pBlock;
3669
3670 if (pHeap->pNoncontigFreeBlockList == pNextBlock)
3671 {
3672 // We inserted at the head of the list
3673 pHeap->pNoncontigFreeBlockList = pBlock;
3674 }
3675 else
3676 {
3677 pBlock->prevFreeNoncontig->nextFreeNoncontig = pBlock;
3678 }
3679
3680 break;
3681 }
3682
3683 if (NULL == pNextBlock->nextFreeNoncontig)
3684 {
3685 // We reached the end of the list, insert here
3686 pNextBlock->nextFreeNoncontig = pBlock;
3687 pBlock->prevFreeNoncontig = pNextBlock;
3688 pBlock->nextFreeNoncontig = NULL;
3689
3690 break;
3691 }
3692
3693 pNextBlock = pNextBlock->nextFreeNoncontig;
3694 }
3695 }
3696 }
3697
3698 static void
_heapRemoveBlockFromNoncontigList(Heap * pHeap,MEM_BLOCK * pBlock)3699 _heapRemoveBlockFromNoncontigList
3700 (
3701 Heap *pHeap,
3702 MEM_BLOCK *pBlock
3703 )
3704 {
3705 //
3706 // Unless pBlock is at the head of the list (and is the only element in the
3707 // list), both prev and nextFreeNoncontig cannot be NULL at the same time.
3708 // That would imply a bug in the noncontig list building code.
3709 //
3710 NV_ASSERT(pBlock == pHeap->pNoncontigFreeBlockList ||
3711 pBlock->prevFreeNoncontig != NULL ||
3712 pBlock->nextFreeNoncontig != NULL);
3713
3714 // Removing first block?
3715 if (pHeap->pNoncontigFreeBlockList == pBlock)
3716 {
3717 pHeap->pNoncontigFreeBlockList = pBlock->nextFreeNoncontig;
3718 }
3719 else
3720 {
3721 if (NULL != pBlock->prevFreeNoncontig)
3722 {
3723 pBlock->prevFreeNoncontig->nextFreeNoncontig
3724 = pBlock->nextFreeNoncontig;
3725 }
3726 }
3727
3728 // Removing last block?
3729 if (NULL != pBlock->nextFreeNoncontig)
3730 {
3731 pBlock->nextFreeNoncontig->prevFreeNoncontig
3732 = pBlock->prevFreeNoncontig;
3733 }
3734
3735 pBlock->nextFreeNoncontig = pBlock->prevFreeNoncontig = NULL;
3736 }
3737
3738 //
3739 // The allocation is done using two loops. The first loop traverses the heap's
3740 // free list to build a list of blocks that can satisfy the allocation. If we
3741 // don't find enough blocks, we can exit quickly without needing to unwind,
3742 // which can happen quite frequently in low memory or heavy fragmentation
3743 // conditions.
3744 //
3745 // The second loop does the actual allocations. It calls _heapProcessFreeBlock()
3746 // to cut down a free block into the required size, which can fail, albeit
3747 // rarely. We need to unwind at that point. The two loops keep the unwinding
3748 // as infrequent as possible.
3749 //
3750 static NV_STATUS
_heapAllocNoncontig(OBJGPU * pGpu,NvHandle hClient,Heap * pHeap,MEMORY_ALLOCATION_REQUEST * pAllocRequest,NvHandle memHandle,OBJHEAP_ALLOC_DATA * pAllocData,FB_ALLOC_INFO * pFbAllocInfo,NvU32 textureClientIndex,NvU64 alignPad,NvU64 * offset,MEMORY_DESCRIPTOR * pMemDesc,HWRESOURCE_INFO ** ppHwResource)3751 _heapAllocNoncontig
3752 (
3753 OBJGPU *pGpu,
3754 NvHandle hClient,
3755 Heap *pHeap,
3756 MEMORY_ALLOCATION_REQUEST *pAllocRequest,
3757 NvHandle memHandle,
3758 OBJHEAP_ALLOC_DATA *pAllocData,
3759 FB_ALLOC_INFO *pFbAllocInfo,
3760 NvU32 textureClientIndex,
3761 NvU64 alignPad,
3762 NvU64 *offset,
3763 MEMORY_DESCRIPTOR *pMemDesc,
3764 HWRESOURCE_INFO **ppHwResource
3765 )
3766 {
3767 KernelGmmu *pKernelGmmu = GPU_GET_KERNEL_GMMU(pGpu);
3768 NV_MEMORY_ALLOCATION_PARAMS *pVidHeapAlloc = pAllocRequest->pUserParams;
3769 NvBool bFirstBlock = NV_TRUE;
3770 NvU32 pteIndexOffset = 0, i = 0;
3771 NvU32 blockId = 0;
3772 NV_STATUS status = NV_OK;
3773 NvU64 pageSize = 0;
3774 NvS64 numPagesLeft;
3775 MEM_BLOCK *pCurrBlock;
3776 MEM_BLOCK *pNextBlock;
3777 MEM_BLOCK *pSavedAllocList = NULL;
3778 MEM_BLOCK *pLastBlock = NULL;
3779 MEM_BLOCK *pBlockNew, *pBlockSplit;
3780 NvU32 k, shuffleStride = 1;
3781 NvU64 addr, j, numPages;
3782 RM_ATTR_PAGE_SIZE pageSizeAttr = dmaNvos32ToPageSizeAttr(pFbAllocInfo->retAttr, pFbAllocInfo->retAttr2);
3783
3784 switch (pageSizeAttr)
3785 {
3786 case RM_ATTR_PAGE_SIZE_DEFAULT:
3787 case RM_ATTR_PAGE_SIZE_INVALID:
3788 NV_PRINTF(LEVEL_ERROR, "Invalid page size attribute!\n");
3789 return NV_ERR_INVALID_ARGUMENT;
3790 case RM_ATTR_PAGE_SIZE_4KB:
3791 pageSize = RM_PAGE_SIZE;
3792 break;
3793 case RM_ATTR_PAGE_SIZE_BIG:
3794 {
3795 pageSize = kgmmuGetMaxBigPageSize_HAL(pKernelGmmu);
3796 break;
3797 }
3798 case RM_ATTR_PAGE_SIZE_HUGE:
3799 {
3800 NV_ASSERT_OR_RETURN(kgmmuIsHugePageSupported(pKernelGmmu),
3801 NV_ERR_INVALID_ARGUMENT);
3802 pageSize = RM_PAGE_SIZE_HUGE;
3803 break;
3804 }
3805 case RM_ATTR_PAGE_SIZE_512MB:
3806 {
3807 NV_ASSERT_OR_RETURN(kgmmuIsPageSize512mbSupported(pKernelGmmu),
3808 NV_ERR_INVALID_ARGUMENT);
3809 pageSize = RM_PAGE_SIZE_512M;
3810 break;
3811 }
3812 }
3813
3814 //
3815 // pAllocData->allocSize already incorporates pFbAllocInfo->size,
3816 // which in turn is up aligned to pFbAllocInfo->align and alignPad,
3817 // so nothing else needs to be added here.
3818 //
3819 numPagesLeft = RM_ALIGN_UP(pAllocData->allocSize, pageSize) / pageSize;
3820 NV_PRINTF(LEVEL_INFO,
3821 "pageSize: 0x%llx, numPagesLeft: 0x%llx, allocSize: 0x%llx\n",
3822 pageSize / 1024, numPagesLeft, pAllocData->allocSize);
3823
3824 for (pCurrBlock = pHeap->pNoncontigFreeBlockList;
3825 numPagesLeft > 0 && NULL != pCurrBlock;
3826 pCurrBlock = pNextBlock)
3827 {
3828 NvU64 blockBegin = 0;
3829 NvU64 blockEnd = 0;
3830 NvU64 blockAligned;
3831 NvU64 blockSizeInPages, blockSize;
3832 NvU64 alignPad;
3833 NvU64 pteAddress;
3834 NvU64 offset;
3835
3836 // Get the next free block pointer before lists get re-linked
3837 pNextBlock = pCurrBlock->nextFreeNoncontig;
3838
3839 // Selecting blocks: Is this block completely out of range?
3840 if ((pCurrBlock->end < pVidHeapAlloc->rangeLo) ||
3841 (pCurrBlock->begin > pVidHeapAlloc->rangeHi))
3842 {
3843 continue;
3844 }
3845
3846 // Find the intersection of the block and the specified range.
3847 blockBegin = ((pVidHeapAlloc->rangeLo >= pCurrBlock->begin) ?
3848 pVidHeapAlloc->rangeLo : pCurrBlock->begin);
3849 blockEnd = ((pVidHeapAlloc->rangeHi <= pCurrBlock->end) ?
3850 pVidHeapAlloc->rangeHi : pCurrBlock->end);
3851
3852 // Check if the derived block is usable
3853 if ((blockBegin >= blockEnd) ||
3854 (blockEnd-blockBegin+1 < pageSize))
3855 {
3856 // Skip if the usable size is invalid or insufficient.
3857 continue;
3858 }
3859
3860 //
3861 // Checks above should protect against underflow, but we might still
3862 // end up with a post-aligned block that is unusable.
3863 // "end" should be RM_PAGE_SIZE-1 aligned.
3864 //
3865 blockBegin = RM_ALIGN_UP(blockBegin, pageSize);
3866 blockEnd = RM_ALIGN_DOWN(blockEnd+1, pageSize)-1;
3867
3868 if (blockBegin >= blockEnd)
3869 {
3870 //
3871 // When blockSize < page_size and blockBegin and/or blockEnd are
3872 // not page aligned initially, the above alignment can cause
3873 // blockBegin to become > blockEnd.
3874 //
3875 continue;
3876 }
3877
3878 // The first block has to handle pAllocData->alignment
3879 if (bFirstBlock)
3880 {
3881 // Align the starting address of the block to
3882 // pAllocData->alignment.
3883 blockAligned = (blockBegin +
3884 pAllocData->alignment - 1) / pAllocData->alignment
3885 * pAllocData->alignment;
3886
3887 //
3888 // Check that we'll still be within this block when
3889 // alignPad is added.
3890 //
3891 if (blockAligned + pFbAllocInfo->alignPad > blockEnd)
3892 {
3893 continue;
3894 }
3895
3896 // Then make sure this is page aligned.
3897 blockBegin = RM_ALIGN_DOWN(blockAligned, pageSize);
3898
3899 //
3900 // blockBegin is now the page aligned starting address of a
3901 // block that holds an address aligned to
3902 // pAllocData->alignment, and can take padding from
3903 // alignPad.
3904 //
3905 }
3906 else
3907 {
3908 blockAligned = blockBegin;
3909 }
3910
3911 blockSizeInPages = (blockEnd - blockBegin + 1) / pageSize;
3912
3913 // A usable block has to supply at least one page
3914 if (blockSizeInPages < 1)
3915 {
3916 continue;
3917 }
3918
3919 // blockEnd may need to be corrected for the last page
3920 if (((NvU64)numPagesLeft < blockSizeInPages))
3921 {
3922 blockEnd = blockBegin + pageSize * numPagesLeft - 1;
3923 blockSizeInPages = numPagesLeft;
3924 }
3925
3926 blockSize = blockEnd - blockBegin + 1;
3927
3928 numPagesLeft -= blockSizeInPages;
3929
3930 NV_PRINTF(LEVEL_INFO,
3931 "\tblockId: %d, blockBegin: 0x%llx, blockEnd: 0x%llx, blockSize: "
3932 "0x%llx, blockSizeInPages: 0x%llx, numPagesLeft: 0x%llx\n",
3933 blockId, blockBegin, blockEnd, blockSize, blockSizeInPages,
3934 numPagesLeft >= 0 ? numPagesLeft : 0);
3935
3936 blockId++;
3937
3938 //
3939 // Set pAllocData values before the call to
3940 // _heapProcessFreeBlock()
3941 //
3942 pAllocData->allocLo = blockBegin;
3943 pAllocData->allocHi = blockEnd;
3944 pAllocData->allocAl = blockAligned;
3945 pAllocData->allocSize = blockSize;
3946
3947 if (bFirstBlock)
3948 {
3949 alignPad = pFbAllocInfo->alignPad;
3950 }
3951 else
3952 {
3953 alignPad = 0;
3954 }
3955
3956 //
3957 // Cut this new block down to size. pBlockNew will be the block to use
3958 // when this returns.
3959 //
3960 if (NV_OK != (status = _heapProcessFreeBlock(pGpu, pCurrBlock,
3961 &pBlockNew, &pBlockSplit, pHeap, pAllocRequest,
3962 memHandle, pAllocData, pFbAllocInfo,
3963 alignPad, &offset)))
3964 {
3965 NV_PRINTF(LEVEL_ERROR,
3966 "ERROR: Could not process free block, error: 0x%x\n",
3967 status);
3968 goto unwind_and_exit;
3969 }
3970
3971 // Never fails
3972 (void)_heapUpdate(pHeap, pBlockNew, BLOCK_FREE_STATE_CHANGED);
3973
3974 //
3975 // Save the allocation off in case we need to unwind
3976 // This also ensures that all blocks that make up the noncontig
3977 // allocation are strung together in a list, which is useful when
3978 // freeing them.
3979 //
3980 if (pSavedAllocList == NULL)
3981 {
3982 // First block
3983 pSavedAllocList = pLastBlock = pBlockNew;
3984 pSavedAllocList->noncontigAllocListNext = NULL;
3985 }
3986 else
3987 {
3988 pLastBlock->noncontigAllocListNext = pBlockNew;
3989 pLastBlock = pBlockNew;
3990 pLastBlock->noncontigAllocListNext = NULL;
3991 }
3992
3993 pteAddress = RM_PAGE_ALIGN_DOWN(pBlockNew->begin);
3994
3995 numPages = NV_MIN(blockSizeInPages, ((pMemDesc->PageCount - pteIndexOffset) * RM_PAGE_SIZE) / pageSize);
3996
3997 if (pHeap->getProperty(pHeap, PDB_PROP_HEAP_PAGE_SHUFFLE))
3998 {
3999 i = pHeap->shuffleStrideIndex;
4000 shuffleStride = pHeap->shuffleStrides[i];
4001
4002 // Select a stride greater the the number of pages
4003 while(numPages < shuffleStride && i > 0)
4004 {
4005 i--;
4006 shuffleStride = pHeap->shuffleStrides[i];
4007 }
4008
4009 pHeap->shuffleStrideIndex = (pHeap->shuffleStrideIndex + 1) % SHUFFLE_STRIDE_MAX;
4010 }
4011
4012 //
4013 // Shuffling logic.
4014 // We scatter the contiguous pages at multiple of stride length.
4015 // For 5 pages with stride length 2, we have the following shuffling.
4016 // Before: 0, 1, 2, 3, 4
4017 // After : 0, 2, 4, 1, 3
4018 //
4019 for (i = 0; i < shuffleStride; i++)
4020 {
4021 for(j = i; j < numPages; j = j + shuffleStride)
4022 {
4023 addr = pteAddress + j * pageSize;
4024 for (k = 0; k < pageSize/RM_PAGE_SIZE; k++)
4025 {
4026 //
4027 // The memDesc has everything in terms of 4k pages.
4028 // If allocationSize % pageSize != 0, there will not be enough PTEs in
4029 // the memdesc for completely specifying the final block, but that's
4030 // ok. The mapping code will be mapping in the whole pageSize final
4031 // block anyway, and the heapBlockFree() code will free the whole
4032 // block.
4033 //
4034 memdescSetPte(pMemDesc, AT_GPU, pteIndexOffset, addr);
4035 pteIndexOffset++;
4036 addr += RM_PAGE_SIZE;
4037 }
4038 }
4039 }
4040
4041 //
4042 // If a client calls us with pVidHeapAlloc->type ==
4043 // NVOS32_TYPE_TEXTURE, but where flags are non-zero, we won't
4044 // call objHeapSetTexturePlacement and initialize
4045 // textureClientIndex to a proper value (default is 0xFFFFFFFF).
4046 // In that case, we won't track this texture allocation. Bug
4047 // 79586.
4048 //
4049 if (pVidHeapAlloc->type == NVOS32_TYPE_TEXTURE &&
4050 textureClientIndex != 0xFFFFFFFF)
4051 {
4052 pBlockNew->textureId = hClient;
4053 if (bFirstBlock)
4054 pHeap->textureData[textureClientIndex].refCount++;
4055 }
4056 else
4057 {
4058 pBlockNew->textureId = 0;
4059 }
4060
4061 if (bFirstBlock)
4062 {
4063 pFbAllocInfo->offset = offset;
4064 *ppHwResource = &pBlockNew->hwResource;
4065 }
4066
4067 pBlockNew->pMemDesc = pMemDesc;
4068 pBlockNew->allocedMemDesc = bFirstBlock; // avoid multiple frees
4069
4070 bFirstBlock = NV_FALSE;
4071 }
4072
4073 // Did we find enough pages?
4074 if (numPagesLeft > 0)
4075 {
4076 NV_PRINTF(LEVEL_INFO,
4077 "Could not satisfy request: allocSize: 0x%llx\n",
4078 pAllocData->allocSize);
4079
4080 status = NV_ERR_NO_MEMORY;
4081
4082 unwind_and_exit:
4083
4084 while (pSavedAllocList != NULL)
4085 {
4086 NV_STATUS unwindStatus;
4087
4088 pCurrBlock = pSavedAllocList->noncontigAllocListNext;
4089
4090 unwindStatus = _heapBlockFree(pGpu, pHeap, hClient, pFbAllocInfo->hDevice, pSavedAllocList);
4091
4092 if (unwindStatus != NV_OK)
4093 {
4094 NV_PRINTF(LEVEL_ERROR,
4095 "ERROR: Could not free block, error 0x%x!\n",
4096 unwindStatus);
4097 }
4098
4099 pSavedAllocList = pCurrBlock;
4100 }
4101 }
4102 return status;
4103 }
4104
4105 //
4106 // Explanation of BlockAction values:
4107 // - BLOCK_ADD,
4108 // A new block is added to the heap
4109 // o The block's node structure needs to be inited.
4110 // o The block is added to the rb-tree.
4111 // o The block is added to the noncontig freelist.
4112 // - BLOCK_REMOVE
4113 // A block is removed from the heap for good
4114 // o The block is removed from the rb-tree.
4115 // o The block is removed from the noncontig freelist.
4116 // - BLOCK_SIZE_CHANGED
4117 // A block's size has changed
4118 // o The rb-tree needs to be updated.
4119 // o The noncontig freelist needs to be updated.
4120 // - BLOCK_FREE_STATE_CHANGED
4121 // if pBlock->owner != NVOS32_BLOCK_TYPE_FREE
4122 // A block is allocated to a client
4123 // o The block is removed from the noncontig freelist.
4124 // else
4125 // A block is freed by the client
4126 // o The block is added to the noncontig freelist.
4127 //
4128 static NV_STATUS
_heapUpdate(Heap * pHeap,MEM_BLOCK * pBlock,BlockAction action)4129 _heapUpdate
4130 (
4131 Heap *pHeap,
4132 MEM_BLOCK *pBlock,
4133 BlockAction action
4134 )
4135 {
4136 // A new block is to be added, init its node structure.
4137 if (BLOCK_ADD == action)
4138 {
4139 portMemSet((void *)&pBlock->node, 0, sizeof(NODE));
4140 pBlock->node.Data = (void *)pBlock;
4141 }
4142
4143 // Both new and updated blocks need to be re-inserted into the rb tree.
4144 if ((BLOCK_SIZE_CHANGED == action) ||
4145 (BLOCK_ADD == action))
4146 {
4147 pBlock->node.keyStart = pBlock->begin;
4148 pBlock->node.keyEnd = pBlock->end;
4149
4150 if (btreeInsert(&pBlock->node, &pHeap->pBlockTree) != NV_OK)
4151 {
4152 NV_ASSERT_FAILED("btreeInsert failed to ADD/SIZE_CHANGE block");
4153 return NV_ERR_INVALID_STATE;
4154 }
4155 }
4156
4157 //
4158 // Updated, new and freed blocks need to be added back to the noncontig
4159 // freelist.
4160 //
4161 if ((BLOCK_SIZE_CHANGED == action) ||
4162 (BLOCK_ADD == action) ||
4163 (BLOCK_FREE_STATE_CHANGED == action &&
4164 pBlock->owner == NVOS32_BLOCK_TYPE_FREE))
4165 {
4166 _heapAddBlockToNoncontigList(pHeap, pBlock);
4167 }
4168
4169 // Remove the block from the heap
4170 if (BLOCK_REMOVE == action)
4171 {
4172 if (btreeUnlink(&pBlock->node, &pHeap->pBlockTree) != NV_OK)
4173 {
4174 NV_ASSERT_FAILED("btreeUnlink failed to REMOVE block");
4175 return NV_ERR_INVALID_STATE;
4176 }
4177 }
4178
4179 // An allocated block is only removed from the noncontig freelist.
4180 if ((BLOCK_REMOVE == action) ||
4181 ((BLOCK_FREE_STATE_CHANGED == action &&
4182 pBlock->owner != NVOS32_BLOCK_TYPE_FREE)))
4183 {
4184 _heapRemoveBlockFromNoncontigList(pHeap, pBlock);
4185 }
4186
4187 return NV_OK;
4188 }
4189
4190 static NvU32
_heapGetPageBlackListGranularity(void)4191 _heapGetPageBlackListGranularity(void)
4192 {
4193 return RM_PAGE_SIZE;
4194 }
4195
4196 //
4197 // This function blacklists pages from the heap.
4198 // The addresses of the pages to blacklist are available from
4199 // pHeap->blackListAddresses.
4200 //
4201 NV_STATUS
heapBlackListPages_IMPL(OBJGPU * pGpu,Heap * pHeap)4202 heapBlackListPages_IMPL
4203 (
4204 OBJGPU *pGpu,
4205 Heap *pHeap
4206 )
4207 {
4208 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4209 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
4210 PMA *pPma = &pHeap->pmaObject;
4211 NvU32 i = 0, j = 0;
4212 NV_STATUS status = NV_OK;
4213 BLACKLIST *pBlackList = &pHeap->blackList;
4214 BLACKLIST_ADDRESSES *pAddresses = &pHeap->blackListAddresses;
4215 NvU32 count = pHeap->blackListAddresses.count;
4216 NvU32 staticBlacklistSize, dynamicBlacklistSize;
4217 NvU32 dynamicRmBlackListedCount;
4218 NvU32 staticRmBlackListedCount;
4219 NvU16 maximumBlacklistPages = kmemsysGetMaximumBlacklistPages(pGpu, pKernelMemorySystem);
4220
4221 if (NULL == pAddresses)
4222 {
4223 return NV_ERR_INVALID_ARGUMENT;
4224 }
4225
4226 if (pBlackList->count != 0)
4227 {
4228 NV_PRINTF(LEVEL_ERROR, "Error: BlackList already exists!\n");
4229 return NV_ERR_INVALID_STATE;
4230 }
4231
4232 //
4233 // We may not be able to allocate all pages requested, but alloc enough
4234 // space anyway
4235 //
4236 pBlackList->pBlacklistChunks = portMemAllocNonPaged(sizeof(BLACKLIST_CHUNK) * maximumBlacklistPages);
4237 if (NULL == pBlackList->pBlacklistChunks)
4238 {
4239 NV_PRINTF(LEVEL_ERROR, "Could not allocate memory for blackList!\n");
4240 return NV_ERR_NO_MEMORY;
4241 }
4242
4243 portMemSet(pBlackList->pBlacklistChunks, 0, sizeof(BLACKLIST_CHUNK) * maximumBlacklistPages);
4244
4245 dynamicRmBlackListedCount = 0;
4246 staticRmBlackListedCount = 0;
4247 for (i = 0, j = 0; i < count; i++)
4248 {
4249 if (NV2080_CTRL_FB_OFFLINED_PAGES_INVALID_ADDRESS == pAddresses->data[i].address)
4250 {
4251 continue;
4252 }
4253
4254 //
4255 // If PMA is enabled, only blacklist pages in the internal heap.
4256 // PMA blacklisting is handled in pmaRegisterRegion.
4257 //
4258 if (memmgrIsPmaInitialized(pMemoryManager))
4259 {
4260 if (heapIsPmaManaged(pGpu, pHeap, pAddresses->data[i].address, pAddresses->data[i].address))
4261 {
4262 // Skipping non-internal address
4263 continue;
4264 }
4265 }
4266
4267 if ((pAddresses->data[i].type == NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_MULTIPLE_SBE) ||
4268 (pAddresses->data[i].type == NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_DBE))
4269 {
4270 dynamicRmBlackListedCount++;
4271 }
4272 else
4273 {
4274 staticRmBlackListedCount++;
4275 }
4276
4277 // Create a memdesc
4278 status = memdescCreate(&pBlackList->pBlacklistChunks[j].pMemDesc,
4279 pGpu,
4280 RM_PAGE_SIZE,
4281 RM_PAGE_SIZE,
4282 NV_TRUE,
4283 ADDR_FBMEM,
4284 NV_MEMORY_UNCACHED,
4285 MEMDESC_FLAGS_FIXED_ADDRESS_ALLOCATE |
4286 MEMDESC_FLAGS_SKIP_RESOURCE_COMPUTE);
4287 if (NV_OK != status)
4288 {
4289 portMemSet(&pBlackList->pBlacklistChunks[j], 0, sizeof(BLACKLIST_CHUNK));
4290 NV_PRINTF(LEVEL_ERROR,
4291 "Error 0x%x creating blacklisted page memdesc for address 0x%llx, skipping\n",
4292 status, pAddresses->data[i].address);
4293 continue;
4294 }
4295
4296 if (pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)
4297 pBlackList->pBlacklistChunks[j].pMemDesc->pHeap = pHeap;
4298
4299 // This is how _FIXED_ADDRESS_ALLOCATE works
4300 memdescSetPte(pBlackList->pBlacklistChunks[j].pMemDesc,
4301 AT_GPU, 0, RM_PAGE_ALIGN_DOWN(pAddresses->data[i].address));
4302
4303 if (pHeap->heapType != HEAP_TYPE_PHYS_MEM_SUBALLOCATOR)
4304 {
4305 //
4306 // Allocate memory for this page. This is marked as an internal RM
4307 // allocation and WILL be saved/restored during suspend/resume.
4308 //
4309 memdescTagAlloc(status, NV_FB_ALLOC_RM_INTERNAL_OWNER_UNNAMED_TAG_79,
4310 pBlackList->pBlacklistChunks[j].pMemDesc);
4311 if (NV_OK != status)
4312 {
4313 // No use for the memdesc if the page couldn't be allocated
4314 memdescDestroy(pBlackList->pBlacklistChunks[j].pMemDesc);
4315
4316 portMemSet(&pBlackList->pBlacklistChunks[j], 0, sizeof(BLACKLIST_CHUNK));
4317
4318 NV_PRINTF(LEVEL_ERROR,
4319 "Error 0x%x blacklisting page at address 0x%llx, skipping\n",
4320 status, pAddresses->data[i].address);
4321 continue;
4322 }
4323 }
4324
4325 // Page blacklisting is successful, add entries to the BLACKLIST
4326 pBlackList->pBlacklistChunks[j].physOffset = pAddresses->data[i].address;
4327 pBlackList->pBlacklistChunks[j].size = RM_PAGE_SIZE;
4328 pBlackList->pBlacklistChunks[j].bIsValid = NV_TRUE;
4329
4330 // If the page was successfully blacklisted, move to the next entry
4331 j++;
4332 }
4333
4334 pBlackList->count = j;
4335
4336 pmaGetBlacklistSize(pPma, &dynamicBlacklistSize, &staticBlacklistSize);
4337 dynamicBlacklistSize = dynamicBlacklistSize >> 10;
4338 staticBlacklistSize = staticBlacklistSize >> 10;
4339
4340 dynamicBlacklistSize += (dynamicRmBlackListedCount * _heapGetPageBlackListGranularity()) >> 10;
4341 staticBlacklistSize += (staticRmBlackListedCount * _heapGetPageBlackListGranularity()) >> 10;
4342
4343 pHeap->dynamicBlacklistSize = dynamicBlacklistSize;
4344 pHeap->staticBlacklistSize = staticBlacklistSize;
4345
4346 if (0 == pBlackList->count)
4347 {
4348 // No address was blacklisted
4349 portMemFree(pBlackList->pBlacklistChunks);
4350 pBlackList->pBlacklistChunks = NULL;
4351 }
4352
4353 return NV_OK;
4354 }
4355
4356 //
4357 // This function frees all blacklisted pages.
4358 // The pHeap->blackList structure holds a list of memdescs, one for each
4359 // blacklisted page.
4360 //
4361 NV_STATUS
heapFreeBlackListedPages_IMPL(OBJGPU * pGpu,Heap * pHeap)4362 heapFreeBlackListedPages_IMPL
4363 (
4364 OBJGPU *pGpu,
4365 Heap *pHeap
4366 )
4367 {
4368 NvU32 i;
4369 BLACKLIST *pBlackList = &pHeap->blackList;
4370
4371 // Also free the blacklistAddresses data here
4372 if (pHeap->blackListAddresses.data)
4373 {
4374 portMemFree(pHeap->blackListAddresses.data);
4375 pHeap->blackListAddresses.count = 0;
4376 pHeap->blackListAddresses.data = NULL;
4377 }
4378
4379 if (0 == pBlackList->count)
4380 {
4381 return NV_OK;
4382 }
4383
4384 if (NULL == pBlackList->pBlacklistChunks)
4385 {
4386 return NV_ERR_INVALID_STATE;
4387 }
4388
4389 for (i = 0; i < pBlackList->count; i++)
4390 {
4391 if (pBlackList->pBlacklistChunks[i].bIsValid)
4392 {
4393 // Free the blacklisted page
4394 memdescFree(pBlackList->pBlacklistChunks[i].pMemDesc);
4395
4396 // Free the memdesc
4397 memdescDestroy(pBlackList->pBlacklistChunks[i].pMemDesc);
4398 }
4399 }
4400
4401 portMemFree(pBlackList->pBlacklistChunks);
4402
4403 pBlackList->count = 0;
4404 pBlackList->pBlacklistChunks = NULL;
4405
4406 return NV_OK;
4407 }
4408
4409 NV_STATUS
heapStorePendingBlackList_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 pageAddressesWithEccOn,NvU64 pageAddressWithEccOff)4410 heapStorePendingBlackList_IMPL
4411 (
4412 OBJGPU *pGpu,
4413 Heap *pHeap,
4414 NvU64 pageAddressesWithEccOn,
4415 NvU64 pageAddressWithEccOff
4416 )
4417 {
4418 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4419 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
4420 NV_STATUS status = NV_OK;
4421 NvU64 physicalAddress;
4422 NvU64 pageNumber;
4423 BLACKLIST *pBlacklist = &pHeap->blackList;
4424 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
4425 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
4426
4427 if (pMemorySystemConfig->bEnabledEccFBPA)
4428 {
4429 physicalAddress = pageAddressesWithEccOn;
4430 }
4431 else
4432 {
4433 physicalAddress = pageAddressWithEccOff;
4434 }
4435
4436 pageNumber = (physicalAddress >> RM_PAGE_SHIFT);
4437
4438 // This code is called only when DBE happens, so marking it as type DBE
4439 status = heapAddPageToBlackList(pGpu, pHeap,
4440 DRF_VAL64(_HEAP, _PAGE_OFFLINE, _PAGE_NUMBER, pageNumber),
4441 NV2080_CTRL_FB_OFFLINED_PAGES_SOURCE_DPR_DBE);
4442 if (NV_OK != status)
4443 {
4444 // No more space in the blacklist
4445 NV_PRINTF(LEVEL_ERROR, "No more space in blacklist, status: %x!\n", status);
4446 return status;
4447 }
4448
4449 if (memmgrIsPmaInitialized(pMemoryManager))
4450 {
4451 if (heapIsPmaManaged(pGpu, pHeap, physicalAddress, physicalAddress))
4452 {
4453 NV_PRINTF(LEVEL_INFO, "Calling PMA helper function to blacklist page offset: %llx\n", physicalAddress);
4454 status = pmaAddToBlacklistTracking(&pHeap->pmaObject, physicalAddress);
4455 return status;
4456 }
4457 else
4458 {
4459 // blacklisting needs to be done like CBC error recovery
4460 return NV_ERR_RESET_REQUIRED;
4461 }
4462 }
4463 else
4464 {
4465 if (pMemoryManager->bEnableDynamicPageOfflining)
4466 {
4467 // adding a new entry to heap managed blacklist
4468 if (pBlacklist->count == kmemsysGetMaximumBlacklistPages(pGpu, pKernelMemorySystem))
4469 {
4470 NV_PRINTF(LEVEL_ERROR, "We have blacklisted maximum number of pages possible. returning error \n");
4471 return NV_ERR_INSUFFICIENT_RESOURCES;
4472 }
4473 portMemSet(&pBlacklist->pBlacklistChunks[pBlacklist->count], 0 , sizeof(BLACKLIST_CHUNK));
4474 pBlacklist->pBlacklistChunks[pBlacklist->count].physOffset = physicalAddress;
4475 pBlacklist->pBlacklistChunks[pBlacklist->count].size = RM_PAGE_SIZE;
4476 pBlacklist->pBlacklistChunks[pBlacklist->count].bPendingRetirement = NV_TRUE;
4477 pBlacklist->count++;
4478 }
4479 }
4480 return status;
4481 }
4482
4483 //
4484 // This function copies the addresses of pages to be blacklisted from
4485 // pPageNumbers into Heap's internal blackListAddresses structure.
4486 //
4487 NV_STATUS
heapStoreBlackList_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 * pPageNumbersWithEccOn,NvU64 * pPageNumbersWithECcOff,NvU32 maxInputPages)4488 heapStoreBlackList_IMPL
4489 (
4490 OBJGPU *pGpu,
4491 Heap *pHeap,
4492 NvU64 *pPageNumbersWithEccOn,
4493 NvU64 *pPageNumbersWithECcOff,
4494 NvU32 maxInputPages
4495 )
4496 {
4497 NvU32 i;
4498 NvU64 *pPageNumbers;
4499 NV_STATUS status = NV_OK;
4500 const MEMORY_SYSTEM_STATIC_CONFIG *pMemorySystemConfig =
4501 kmemsysGetStaticConfig(pGpu, GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu));
4502
4503 if (pMemorySystemConfig->bEnabledEccFBPA)
4504 {
4505 pPageNumbers = pPageNumbersWithEccOn;
4506 }
4507 else
4508 {
4509 pPageNumbers = pPageNumbersWithECcOff;
4510 }
4511
4512 for (i = 0; i < maxInputPages; i++)
4513 {
4514 //
4515 // Bug: 2999257
4516 // currently pre-Hopper we have 37b FB PA, whose PFN will be 25b
4517 // From Hopper+ we have 52b PA, whose PFN will be 40b PA and hence
4518 // the macro NV_INFOROM_BLACKLIST_PAGE_NUMBER width of 28b will not be
4519 // sufficient to capture the entire address, this needs to be fixed.
4520 //
4521 status = heapAddPageToBlackList(pGpu, pHeap,
4522 DRF_VAL64(_HEAP, _PAGE_OFFLINE, _PAGE_NUMBER, pPageNumbers[i]),
4523 (NvU32)DRF_VAL64(_HEAP, _PAGE_OFFLINE, _TYPE, pPageNumbers[i]));
4524 if (NV_OK != status)
4525 {
4526 // No more space in the blacklist
4527 NV_PRINTF(LEVEL_ERROR, "No more space in blacklist!\n");
4528 return status;
4529 }
4530 }
4531
4532 return status;
4533 }
4534
4535 NV_STATUS
heapAddPageToBlackList_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 pageNumber,NvU32 type)4536 heapAddPageToBlackList_IMPL
4537 (
4538 OBJGPU *pGpu,
4539 Heap *pHeap,
4540 NvU64 pageNumber,
4541 NvU32 type
4542 )
4543 {
4544 KernelMemorySystem *pKernelMemorySystem = GPU_GET_KERNEL_MEMORY_SYSTEM(pGpu);
4545 NvU16 maximumBlacklistPages = kmemsysGetMaximumBlacklistPages(pGpu, pKernelMemorySystem);
4546 NvU32 index = pHeap->blackListAddresses.count;
4547
4548 if (index == maximumBlacklistPages)
4549 {
4550 return NV_ERR_INSUFFICIENT_RESOURCES;
4551 }
4552
4553 if (pHeap->blackListAddresses.data == NULL)
4554 {
4555 NvU64 listSize = sizeof(BLACKLIST_ADDRESS) * maximumBlacklistPages;
4556
4557 pHeap->blackListAddresses.data = portMemAllocNonPaged(listSize);
4558 if (pHeap->blackListAddresses.data == NULL)
4559 {
4560 return NV_ERR_NO_MEMORY;
4561 }
4562
4563 portMemSet(pHeap->blackListAddresses.data, 0, listSize);
4564 }
4565
4566 pHeap->blackListAddresses.data[index].address = (pageNumber << RM_PAGE_SHIFT);
4567 pHeap->blackListAddresses.data[index].type = type;
4568
4569 pHeap->blackListAddresses.count++;
4570
4571 NV_PRINTF(LEVEL_INFO, "Added 0x%0llx (blacklist count: %u)\n",
4572 pHeap->blackListAddresses.data[index].address,
4573 pHeap->blackListAddresses.count);
4574
4575 return NV_OK;
4576 }
4577
4578 /*!
4579 * @brief: Identify if an FB range is PMA-managed
4580 *
4581 * @param[in] pGpu OBJGPU pointer
4582 * @param[in] pHeap Heap pointer
4583 * @param[in] offset FB block offset
4584 * @param[in] limit FB block limit
4585 *
4586 * @return NV_TRUE offset is PMA-managed
4587 * NV_FALSE offset is not managed by PMA
4588 */
4589 NvBool
heapIsPmaManaged_IMPL(OBJGPU * pGpu,Heap * pHeap,NvU64 offset,NvU64 limit)4590 heapIsPmaManaged_IMPL
4591 (
4592 OBJGPU *pGpu,
4593 Heap *pHeap,
4594 NvU64 offset,
4595 NvU64 limit
4596 )
4597 {
4598 MemoryManager *pMemoryManager = GPU_GET_MEMORY_MANAGER(pGpu);
4599
4600 if (memmgrIsPmaInitialized(pMemoryManager))
4601 {
4602 NvU32 i;
4603
4604 NV_ASSERT(offset <= limit);
4605
4606 for (i = 0; i < pHeap->pmaObject.regSize; i++)
4607 {
4608 if ((offset >= pHeap->pmaObject.pRegDescriptors[i]->base) &&
4609 (limit <= pHeap->pmaObject.pRegDescriptors[i]->limit))
4610 {
4611 NV_PRINTF(LEVEL_INFO,
4612 "range %llx..%llx resides in PMA region=%llx..%llx\n",
4613 offset, limit,
4614 pHeap->pmaObject.pRegDescriptors[i]->base,
4615 pHeap->pmaObject.pRegDescriptors[i]->limit);
4616 return NV_TRUE;
4617 }
4618 #if defined(DEBUG)
4619 // Check for straddling
4620 else if (
4621 (limit >= pHeap->pmaObject.pRegDescriptors[i]->base) &&
4622 (offset <= pHeap->pmaObject.pRegDescriptors[i]->limit))
4623 {
4624 NV_PRINTF(LEVEL_ERROR,
4625 "range %llx..%llx straddles in PMA region=%llx..%llx\n",
4626 offset, limit,
4627 pHeap->pmaObject.pRegDescriptors[i]->base,
4628 pHeap->pmaObject.pRegDescriptors[i]->limit);
4629 }
4630 #endif //defined(DEBUG)
4631 }
4632 }
4633
4634 return(NV_FALSE);
4635 }
4636
4637 /*!
4638 * @brief Increase the reference count
4639 *
4640 * @param[in] pGpu OBJGPU pointer
4641 * @param[in] pHeap Heap pointer
4642 *
4643 * @return Current refcount value
4644 */
4645 NvU32
heapAddRef_IMPL(Heap * pHeap)4646 heapAddRef_IMPL
4647 (
4648 Heap *pHeap
4649 )
4650 {
4651 if (pHeap == NULL)
4652 return 0;
4653
4654 return portAtomicExIncrementU64(&pHeap->refCount);
4655 }
4656
4657 /*!
4658 * @brief Increase the reference count
4659 *
4660 * @param[in] pGpu OBJGPU pointer
4661 * @param[in] pHeap Heap pointer
4662 *
4663 * @return Current refcount value
4664 */
4665 NvU32
heapRemoveRef_IMPL(Heap * pHeap)4666 heapRemoveRef_IMPL
4667 (
4668 Heap *pHeap
4669 )
4670 {
4671 NvU64 refCount = 0;
4672
4673 if (pHeap == NULL)
4674 return 0;
4675
4676 refCount = portAtomicExDecrementU64(&pHeap->refCount);
4677 if (refCount == 0)
4678 {
4679 objDelete(pHeap);
4680 }
4681
4682 return refCount;
4683 }
4684
4685 /*!
4686 * @brief Adjust the heap size
4687 *
4688 * @param[in] pHeap Heap pointer
4689 * @param[in] resizeBy NVS64 resizeBy value
4690 */
4691
heapResize_IMPL(Heap * pHeap,NvS64 resizeBy)4692 NV_STATUS heapResize_IMPL
4693 (
4694 Heap *pHeap,
4695 NvS64 resizeBy
4696 )
4697 {
4698 MEM_BLOCK *pBlockLast;
4699 MEM_BLOCK *pBlockNew;
4700 NV_STATUS status = NV_OK;
4701 OBJGPU *pGpu = ENG_GET_GPU(pHeap);
4702
4703 NV_ASSERT_OR_RETURN(pHeap->heapType == HEAP_TYPE_PHYS_MEM_SUBALLOCATOR, NV_ERR_NOT_SUPPORTED);
4704
4705 // Free all blacklisted pages
4706 if ((pHeap->blackListAddresses.count != 0) &&
4707 pGpu->getProperty(pGpu, PDB_PROP_GPU_ALLOW_PAGE_RETIREMENT) &&
4708 gpuCheckPageRetirementSupport_HAL(pGpu))
4709 {
4710 heapFreeBlackListedPages(pGpu, pHeap);
4711 }
4712
4713 // Go to last block if the heap w.r.t. the start address
4714 pBlockLast = pHeap->pBlockList;
4715 while (pBlockLast->next != pHeap->pBlockList)
4716 pBlockLast = pBlockLast->next;
4717
4718 if (resizeBy < 0) // Shrink the allocation
4719 {
4720 NvS64 newSize;
4721
4722 NV_ASSERT_OR_RETURN(pBlockLast->owner == NVOS32_BLOCK_TYPE_FREE, NV_ERR_NO_MEMORY);
4723 NV_CHECK_OR_RETURN(LEVEL_ERROR, portSafeAddS64(pBlockLast->end - pBlockLast->begin, resizeBy, &newSize) &&
4724 (newSize > 0), NV_ERR_INVALID_LIMIT);
4725 pBlockLast->end += resizeBy;
4726 }
4727 else // Grow the allocation
4728 {
4729 if (pBlockLast->owner == NVOS32_BLOCK_TYPE_FREE)
4730 {
4731 // Found a free block at the end Just resize it.
4732 pBlockLast->end += resizeBy;
4733 }
4734 else
4735 {
4736 // Could not find a free block at the end. Add a new free block.
4737 pBlockNew = portMemAllocNonPaged(sizeof(MEM_BLOCK));
4738 if (pBlockNew != NULL)
4739 {
4740
4741 portMemSet(pBlockNew, 0, sizeof(MEM_BLOCK));
4742
4743 pBlockNew->owner = NVOS32_BLOCK_TYPE_FREE;
4744 pBlockNew->refCount = 1;
4745
4746 // Set block boundaries
4747 pBlockNew->begin = pBlockLast->end + 1;
4748 pBlockNew->end = pBlockLast->end + resizeBy;
4749
4750 if (pHeap->pFreeBlockList == NULL)
4751 pHeap->pFreeBlockList = pBlockNew;
4752
4753 // Add the block in the free blocks list
4754 pBlockNew->u1.nextFree = pHeap->pFreeBlockList;
4755 pBlockNew->u0.prevFree = pHeap->pFreeBlockList->u0.prevFree;
4756 pBlockNew->u1.nextFree->u0.prevFree = pBlockNew;
4757 pBlockNew->u0.prevFree->u1.nextFree = pBlockNew;
4758
4759 // Add the block in the blocks list
4760 pBlockNew->next = pBlockLast->next;
4761 pBlockNew->prev = pBlockLast;
4762 pBlockNew->next->prev = pBlockNew;
4763 pBlockNew->prev->next = pBlockNew;
4764
4765 if ((status = _heapUpdate(pHeap, pBlockNew, BLOCK_ADD)) != NV_OK)
4766 {
4767 NV_PRINTF(LEVEL_ERROR,
4768 "_heapUpdate failed to _ADD block\n");
4769
4770 if (pHeap->pFreeBlockList == pBlockNew) // There was no free block in the heap.
4771 pHeap->pFreeBlockList = NULL; // We had added this one.
4772 portMemFree(pBlockNew);
4773 }
4774 else
4775 {
4776 pHeap->numBlocks++;
4777 }
4778 }
4779 }
4780 }
4781
4782 if (status == NV_OK)
4783 {
4784 pHeap->total += resizeBy;
4785 pHeap->free += resizeBy;
4786
4787 status = memmgrGetBlackListPagesForHeap_HAL(pGpu, GPU_GET_MEMORY_MANAGER(pGpu), pHeap);
4788 if (status != NV_OK)
4789 {
4790 NV_PRINTF(LEVEL_INFO,
4791 "Failed to read blackList pages (0x%x).\n",
4792 status);
4793 }
4794
4795 heapFilterBlackListPages(pHeap, pHeap->base, pHeap->total);
4796
4797 if (pHeap->blackListAddresses.count != 0)
4798 {
4799 status = heapBlackListPages(pGpu, pHeap);
4800
4801 if (status != NV_OK)
4802 {
4803 NV_PRINTF(LEVEL_WARNING,
4804 "Error 0x%x creating blacklist\n",
4805 status);
4806 }
4807 }
4808 }
4809 return status;
4810 }
4811