1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*!
25  * @file
26  *
27  * @brief Implementation for the NUMA interfaces, used by parent module PMA only.
28  * This file interfaces with the RM Linux layer which interfaces with the
29  * Linux kernel.
30  */
31 
32 #include "gpu/mem_mgr/phys_mem_allocator/numa.h"
33 #include "gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator_util.h"
34 #include "gpu/mem_mgr/mem_scrub.h"
35 #include "utils/nvprintf.h"
36 #include "utils/nvassert.h"
37 #include "os/os.h"
38 
39 //
40 // Local helper functions and declarations
41 //
42 
43 //TODO merge or nuke these functions
44 static NV_STATUS _pmaNumaAvailableEvictablePage(PMA *pPma, NvS32 *validRegionList);
45 static NV_STATUS _pmaNumaAvailableEvictableRange(PMA *pPma, NvS32 *validRegionList,
46     NvLength actualSize, NvU64 pageSize, NvU64 *evictStart, NvU64 *evictEnd);
47 static NV_STATUS _pmaNumaAllocateRange(PMA *pPma, NvU32 numaNodeId, NvLength actualSize,
48     NvU64 pageSize, NvU64 *pPages, NvBool bScrubOnAlloc, NvBool allowEvict, NvS32 *validRegionList,
49     NvU64 *allocatedCount);
50 static NV_STATUS _pmaNumaAllocatePages (PMA *pPma, NvU32 numaNodeId, NvU64 pageSize,
51     NvLength allocationCount, NvU64 *pPages, NvBool bScrubOnAlloc, NvBool allowEvict, NvS32 *validRegionList,
52     NvU64 *allocatedPages);
53 
54 /*!
55  * @brief Check if there is at least one evictable page from UVM.
56  */
57 static NV_STATUS _pmaNumaAvailableEvictablePage
58 (
59     PMA     *pPma,
60     NvS32   *validRegionList
61 )
62 {
63     NvU32           regionIdx;
64     PMA_PAGESTATUS  frameState;
65     void           *pMap   = NULL;
66     NV_STATUS       status = NV_ERR_NO_MEMORY;
67 
68     for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
69     {
70         NvU32 regId, frameNum;
71         NvU64 totalFrames;
72 
73         regId = (NvU32)validRegionList[regionIdx];
74 
75         if (validRegionList[regionIdx] == -1)
76             continue;
77 
78         pMap = pPma->pRegions[regId];
79         pPma->pMapInfo->pmaMapGetSize(pMap, &totalFrames);
80         totalFrames >>= PMA_PAGE_SHIFT;
81 
82         for (frameNum = 0; frameNum < totalFrames; frameNum++)
83         {
84             frameState = pPma->pMapInfo->pmaMapRead(pMap, frameNum, NV_TRUE);
85             if ((frameState & STATE_MASK) == STATE_UNPIN)
86             {
87                 status = NV_OK;
88                 break;
89             }
90         }
91 
92         if (status == NV_OK)
93             break;
94     }
95 
96     if (status == NV_OK)
97         NV_PRINTF(LEVEL_INFO, "Evictable frame: FOUND\n");
98     else
99         NV_PRINTF(LEVEL_INFO, "Evictable frame: NOT FOUND\n");
100 
101     return status;
102 }
103 
104 /*!
105  * @brief  Check if there is a contiguous range of
106  *         evictable frame with UVM and get the start
107  *         and end address if there is
108  * In NUMA, OS manages memory and PMA will only track allocated memory in ALLOC_PIN
109  * and ALLOC_UNPIN state. FREE memory is managed by OS and cannot be tracked by PMA
110  * and hence PMA cannot consider FREE memory for eviction and can only consider frames
111  * in known state to PMA or eviction. ALLOC_PIN cannot be evicted and hence only ALLOC_UNPIN
112  * can be evictable.
113  */
114 NV_STATUS _pmaNumaAvailableEvictableRange
115 (
116     PMA      *pPma,
117     NvS32    *validRegionList,
118     NvLength  actualSize,
119     NvU64     pageSize,
120     NvU64    *evictStart,
121     NvU64    *evictEnd
122 )
123 {
124     void     *pMap  = NULL;
125     NvU32     regionIdx;
126     NV_STATUS status = NV_ERR_NO_MEMORY;
127 
128     if ((evictStart == NULL) || (evictEnd == NULL))
129     {
130         return NV_ERR_INVALID_ARGUMENT;
131     }
132 
133     *evictStart = 0;
134     *evictEnd   = 0;
135 
136     for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
137     {
138         NvU64 addrBase;
139         NvU32 regId;
140 
141         if (validRegionList[regionIdx] == -1)
142             continue;
143 
144         regId = (NvU32)validRegionList[regionIdx];
145         pMap  = pPma->pRegions[regId];
146         addrBase = pPma->pRegDescriptors[regId]->base;
147 
148 
149         if ((status = pPma->pMapInfo->pmaMapScanContiguousNumaEviction(pMap, addrBase, actualSize,
150                                                                        pageSize, evictStart, evictEnd)) == NV_OK)
151         {
152             break;
153         }
154     }
155 
156     return status;
157 }
158 
159 /*!
160  * Check if the number of free frames is below the skip threshold percentage of total.
161  * @return NV_TRUE  free frame count is below threshold.
162  *         NV_FALSE otherwise.
163  */
164 static NvBool _pmaCheckFreeFramesToSkipReclaim(PMA *pPma)
165 {
166     return (100 * pPma->pmaStats.numFreeFrames <
167              (pPma->pmaStats.num2mbPages * (_PMA_2MB >> PMA_PAGE_SHIFT) * pPma->numaReclaimSkipThreshold));
168 }
169 
170 /*!
171  * @brief  Allocate contiguous memory for Numa
172  *
173  */
174 NV_STATUS _pmaNumaAllocateRange
175 (
176     PMA     *pPma,
177     NvU32    numaNodeId,
178     NvLength actualSize,
179     NvU64    pageSize,
180     NvU64   *pPages,
181     NvBool   bScrubOnAlloc,
182     NvBool   allowEvict,
183     NvS32   *validRegionList,
184     NvU64   *allocatedCount
185 )
186 {
187     NV_STATUS   status = NV_ERR_NO_MEMORY;
188     NvU64  sysPhysAddr = 0, gpaPhysAddr = 0, evictStart = 0, evictEnd = 0;
189     NvU32 flags = OS_ALLOC_PAGES_NODE_NONE;
190     *allocatedCount    = 0;
191 
192     NV_ASSERT_OR_RETURN(actualSize >= osGetPageSize(), NV_ERR_INVALID_ARGUMENT);
193 
194     // check if numFreeFrames(64KB) are below a certain % of PMA managed memory(indicated by num2mbPages).
195     if (_pmaCheckFreeFramesToSkipReclaim(pPma))
196     {
197         flags = OS_ALLOC_PAGES_NODE_SKIP_RECLAIM;
198     }
199 
200     portSyncSpinlockRelease(pPma->pPmaLock);
201 
202     // Try to allocate contiguous allocation of actualSize from OS. Do not force RECLAIM
203     status = osAllocPagesNode((int)numaNodeId, (NvLength)actualSize, flags, &sysPhysAddr);
204 
205     if (status == NV_OK)
206     {
207         NvU8 osPageShift = osGetPageShift();
208 
209         // Skip the first page as it is refcounted at allocation.
210         osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (actualSize >> osPageShift) - 1);
211 
212         gpaPhysAddr = sysPhysAddr - pPma->coherentCpuFbBase;
213         NV_ASSERT(gpaPhysAddr < pPma->coherentCpuFbBase);
214         *allocatedCount = 1;
215 
216         if (bScrubOnAlloc)
217         {
218             PSCRUB_NODE pPmaScrubList = NULL;
219             NvU64 count;
220 
221             if ((status = scrubSubmitPages(pPma->pScrubObj, (NvU32)actualSize, &gpaPhysAddr,
222                                            1, &pPmaScrubList, &count)) != NV_OK)
223             {
224                 status = NV_ERR_INSUFFICIENT_RESOURCES;
225                 goto scrub_exit;
226             }
227 
228             if (count > 0)
229                 _pmaClearScrubBit(pPma, pPmaScrubList, count);
230 
231             if ((status = _pmaCheckScrubbedPages(pPma, actualSize, &gpaPhysAddr, 1)) != NV_OK)
232             {
233                 status = NV_ERR_INSUFFICIENT_RESOURCES;
234             }
235 
236 scrub_exit:
237             portMemFree(pPmaScrubList);
238 
239             if (status == NV_ERR_INSUFFICIENT_RESOURCES)
240             {
241                 NV_PRINTF(LEVEL_ERROR, "ERROR: scrubber OOM!\n");
242             }
243         }
244 
245         portSyncSpinlockAcquire(pPma->pPmaLock);
246         goto allocated;
247     }
248 
249     portSyncSpinlockAcquire(pPma->pPmaLock);
250 
251     NV_PRINTF(LEVEL_INFO, "Allocate from OS failed for allocation size = %lld!\n",
252                                (NvU64) actualSize);
253 
254 
255     if (allowEvict)
256     {
257         // Check if UVM has evictable contiguous allocations of actualSize
258         status = _pmaNumaAvailableEvictableRange(pPma, validRegionList,
259                                              actualSize, pageSize,
260                                              &evictStart, &evictEnd);
261     }
262 
263     if ((status == NV_OK) && (evictEnd - evictStart + 1) >=  actualSize)
264     {
265         void *pMap = NULL;
266         NvU32 regId;
267         MEMORY_PROTECTION prot;
268 
269         NV_ASSERT((evictEnd - evictStart + 1) ==  actualSize);
270         status = NV_ERR_NO_MEMORY;
271         regId = findRegionID(pPma, evictStart);
272         pMap  = pPma->pRegions[regId];
273         prot = pPma->pRegDescriptors[regId]->bProtected ? MEMORY_PROTECTION_PROTECTED :
274                                                           MEMORY_PROTECTION_UNPROTECTED;
275 
276         if (pMap != NULL)
277         {
278             //
279             // Call UVM to evict the contiguous allocation and evict the rest to OS
280             // UVM will call into PMA to free this contiguous range along with any excesses.
281             // PMA will release only the excess allocation to OS in the free routine.
282             // i.e., region evictStart to evictEnd is marked as 'ATTRIB_EVICTING' and will not
283             // be returned to OS.
284             //
285             status = _pmaEvictContiguous(pPma, pMap, evictStart, evictEnd, prot);
286 
287             if (status == NV_ERR_NO_MEMORY)
288             {
289                 NV_PRINTF(LEVEL_INFO, "Eviction Failed = %llx to %llx!\n", evictStart, evictEnd);
290             }
291             else
292             {
293                 NV_PRINTF(LEVEL_INFO, "Eviction succeeded = %llx to %llx Scrub status 0x%x!\n",
294                                       evictStart, evictEnd, status);
295                 gpaPhysAddr =  evictStart;
296                 *allocatedCount = 1;
297             }
298         }
299         else
300         {
301             NV_PRINTF(LEVEL_INFO, "pMap NULL cannot perform eviction\n");
302         }
303     }
304 
305 
306 allocated:
307 
308     // GPA needs to be acquired by shifting by the ATS aperture base address
309     pPages[0] = gpaPhysAddr;
310 
311     return status;
312 }
313 
314 /*!
315  * @brief  Allocate discontiguous pages for Numa
316  *
317  */
318 static NV_STATUS _pmaNumaAllocatePages
319 (
320     PMA     *pPma,
321     NvU32    numaNodeId,
322     NvU64    pageSize,
323     NvLength allocationCount,
324     NvU64   *pPages,
325     NvBool   bScrubOnAlloc,
326     NvBool   allowEvict,
327     NvS32   *validRegionList,
328     NvU64   *allocatedPages
329 )
330 {
331     NV_STATUS status = NV_ERR_NO_MEMORY;
332     NvU64     sysPhysAddr;
333     NvU64     i = 0;
334     NvU32     flags = OS_ALLOC_PAGES_NODE_NONE;
335     NvU8      osPageShift = osGetPageShift();
336 
337     NV_ASSERT(allocationCount);
338     NV_ASSERT_OR_RETURN(pageSize >= osGetPageSize(), NV_ERR_INVALID_ARGUMENT);
339 
340     // check if numFreeFrames are below certain % of PMA managed memory.
341     if (_pmaCheckFreeFramesToSkipReclaim(pPma))
342     {
343         flags = OS_ALLOC_PAGES_NODE_SKIP_RECLAIM;
344     }
345 
346     portSyncSpinlockRelease(pPma->pPmaLock);
347 
348     for (; i < allocationCount; i++)
349     {
350         status = osAllocPagesNode((int)numaNodeId, (NvLength) pageSize, flags, &sysPhysAddr);
351         if (status != NV_OK)
352         {
353             NV_PRINTF(LEVEL_INFO, "Alloc from OS failed for i= %lld allocationCount = %lld pageSize = %lld!\n",
354                                    i, (NvU64) allocationCount, (NvU64) pageSize);
355             break;
356         }
357 
358         // GPA needs to be acquired by shifting by the ATS aperture base address
359         NV_ASSERT(sysPhysAddr >= pPma->coherentCpuFbBase);
360         pPages[i] = sysPhysAddr - pPma->coherentCpuFbBase;
361 
362         // Skip the first page as it is refcounted at allocation.
363         osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (pageSize >> osPageShift) - 1);
364     }
365 
366     if (bScrubOnAlloc && (i > 0))
367     {
368         PSCRUB_NODE pPmaScrubList = NULL;
369         NvU64 count;
370 
371         if ((status = scrubSubmitPages(pPma->pScrubObj, pageSize, pPages,
372                                        i, &pPmaScrubList, &count)) != NV_OK)
373         {
374             status = NV_ERR_INSUFFICIENT_RESOURCES;
375             goto scrub_exit;
376         }
377 
378         if (count > 0)
379             _pmaClearScrubBit(pPma, pPmaScrubList, count);
380 
381         if ((status = _pmaCheckScrubbedPages(pPma, pageSize, pPages, (NvU32)i)) != NV_OK)
382         {
383             status = NV_ERR_INSUFFICIENT_RESOURCES;
384         }
385 
386 scrub_exit:
387         portMemFree(pPmaScrubList);
388 
389         if (status == NV_ERR_INSUFFICIENT_RESOURCES)
390         {
391             NV_PRINTF(LEVEL_ERROR, "ERROR: scrubber OOM!\n");
392             portSyncSpinlockAcquire(pPma->pPmaLock);
393             goto exit;
394         }
395     }
396 
397     portSyncSpinlockAcquire(pPma->pPmaLock);
398 
399     if (( i < allocationCount) && allowEvict)
400     {
401         NvU32 regionIdx;
402 
403         // Check if there is atleast one evictable page
404         status = _pmaNumaAvailableEvictablePage(pPma, validRegionList);
405 
406         if (status != NV_OK)
407         {
408             goto exit;
409         }
410 
411         status = NV_ERR_NO_MEMORY;
412 
413         for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
414         {
415             NvU32 regId;
416             NvU64 addrBase, addrLimit;
417             void *pMap = NULL;
418             MEMORY_PROTECTION prot;
419 
420             if (validRegionList[regionIdx] == -1)
421             {
422                 continue;
423             }
424 
425             regId = (NvU32)validRegionList[regionIdx];
426             pMap  = pPma->pRegions[regId];
427 
428             addrBase = pPma->pRegDescriptors[regId]->base;
429             addrLimit = pPma->pRegDescriptors[regId]->limit;
430             prot = pPma->pRegDescriptors[regId]->bProtected ? MEMORY_PROTECTION_PROTECTED :
431                                                               MEMORY_PROTECTION_UNPROTECTED;
432 
433             status = _pmaEvictPages(pPma, pMap,
434                                     &pPages[i], (NvU32)(allocationCount - i),
435                                     &pPages[0], i,
436                                     pageSize, addrBase, addrLimit, prot);
437 
438             if (status != NV_ERR_NO_MEMORY)
439             {
440                 NV_PRINTF(LEVEL_INFO, "Frames %lld evicted in region %d of total allocationCount %lld Scrub status 0x%x!\n",
441                                       i, regionIdx,  (NvU64) allocationCount, status);
442                 //
443                 // UVM can over evict, but will call into PMA only to evict the excess.
444                 // free startAddr + actualSize, (uvmAllocatedSize - actualSize) to OS.
445                 // Assume no under eviction. Overeviction is taken care of by the free routine.
446                 //
447                 i = allocationCount;
448                 break;
449             }
450 
451             NV_PRINTF(LEVEL_INFO, "Eviction Failed %d pages !\n", (NvU32) (allocationCount - i));
452         }
453 
454     }
455 
456 exit:
457     *allocatedPages = i;
458 
459     return status;
460 }
461 
462 
463 NV_STATUS pmaNumaAllocate
464 (
465     PMA                    *pPma,
466     NvLength                allocationCount,
467     NvU64                   pageSize,
468     PMA_ALLOCATION_OPTIONS *allocationOptions,
469     NvU64                  *pPages
470 )
471 {
472     NvU32    i;
473     NV_STATUS  status     = NV_OK;
474     NvU32    numaNodeId   = pPma->numaNodeId;
475     NvS32    regionList[PMA_REGION_SIZE];
476     NvU32    flags        = allocationOptions->flags;
477     NvLength allocSize    = 0;
478     NvLength allocCount   = 0;
479     NvU32    contigFlag   = !!(flags & PMA_ALLOCATE_CONTIGUOUS);
480     // As per bug #2444368, kernel scrubbing is too slow. Use the GPU scrubber instead
481     NvBool bScrubOnAlloc  = !(flags & PMA_ALLOCATE_NO_ZERO);
482     NvBool    allowEvict  = !(flags & PMA_ALLOCATE_DONT_EVICT);
483     NvBool   partialFlag  = !!(flags & PMA_ALLOCATE_ALLOW_PARTIAL);
484     NvBool bSkipScrubFlag = !!(flags & PMA_ALLOCATE_NO_ZERO);
485 
486     NvU64    finalAllocatedCount = 0;
487 
488     if (!pPma->bNuma)
489     {
490         NV_PRINTF(LEVEL_FATAL, "Cannot allocate from NUMA node %d on a non-NUMA system.\n",
491                                 numaNodeId);
492         return NV_ERR_INVALID_ARGUMENT;
493     }
494 
495     if (pageSize > _PMA_512MB)
496     {
497         NV_PRINTF(LEVEL_FATAL, "Cannot allocate with more than 512MB contiguity.\n");
498         return NV_ERR_INVALID_ARGUMENT;
499     }
500 
501     if (pPma->nodeOnlined != NV_TRUE)
502     {
503         NV_PRINTF(LEVEL_INFO, "Cannot allocate from NUMA node %d before it is onlined.\n",
504                                numaNodeId);
505         return NV_ERR_INVALID_STATE;
506     }
507 
508     if (contigFlag)
509     {
510         if (((NvU64)allocationCount) * ((NvU64) pageSize) > NV_U32_MAX)
511         {
512             NV_PRINTF(LEVEL_FATAL, "Cannot allocate more than 4GB contiguous memory in one call.\n");
513             return NV_ERR_INVALID_ARGUMENT;
514         }
515     }
516 
517     // We are not changing the state. Can be outside the lock perhaps
518     NV_CHECK_OK_OR_RETURN(LEVEL_FATAL, pmaSelector(pPma, allocationOptions, regionList));
519 
520     //
521     // Scrub on free is enabled for this allocation request if the feature is enabled and the
522     // caller does not want to skip scrubber.
523     // Caller may want to skip scrubber when it knows the memory is zero'ed or when we are
524     // initializing RM structures needed by the scrubber itself.
525     //
526     if (pPma->bScrubOnFree && !bSkipScrubFlag)
527     {
528         portSyncMutexAcquire(pPma->pAllocLock);
529         portSyncRwLockAcquireRead(pPma->pScrubberValidLock);
530 
531         if (pmaPortAtomicGet(&pPma->scrubberValid) != PMA_SCRUBBER_VALID)
532         {
533             NV_PRINTF(LEVEL_WARNING, "PMA object is not valid\n");
534             portSyncRwLockReleaseRead(pPma->pScrubberValidLock);
535             portSyncMutexRelease(pPma->pAllocLock);
536             return NV_ERR_INVALID_STATE;
537         }
538     }
539     else
540     {
541         //
542         // Scrub-on-free feature is OFF, therefore we cannot do scrub-on-alloc
543         // either because it uses the same HW
544         //
545         bScrubOnAlloc = NV_FALSE;
546     }
547 
548     //
549     // In the NUMA path, scrub on free does not provide enough safety guarantees
550     // because pages are released to the kernel and they can be reused by other
551     // processes. Therefore, we can only guarantee that the returned pages are
552     // zero if scrub on alloc is used.
553     //
554     allocationOptions->resultFlags = (bScrubOnAlloc)? PMA_ALLOCATE_RESULT_IS_ZERO : 0;
555 
556     portSyncSpinlockAcquire(pPma->pPmaLock);
557 
558     if (contigFlag)
559     {
560         allocCount = 1;
561         allocSize  = allocationCount * pageSize;
562         status     = _pmaNumaAllocateRange(pPma, numaNodeId, allocSize, pageSize, pPages, bScrubOnAlloc, allowEvict, regionList, &finalAllocatedCount);
563     }
564     else
565     {
566         allocCount = allocationCount;
567         allocSize  = pageSize;
568         status     = _pmaNumaAllocatePages(pPma, numaNodeId, (NvU32) allocSize, allocCount, pPages, bScrubOnAlloc, allowEvict, regionList, &finalAllocatedCount);
569     }
570 
571     if ((status == NV_ERR_NO_MEMORY) && partialFlag && (finalAllocatedCount > 0))
572     {
573         status = NV_OK;
574     }
575 
576     if (status == NV_OK)
577     {
578         NvU32  regId;
579         void  *pMap = NULL;
580         NvU64  regAddrBase;
581         NvU64  frameOffset;
582         NvU64  frameCount = 0;
583         PMA_PAGESTATUS curStatus = STATE_FREE;
584         PMA_PAGESTATUS allocOption = !!(flags & PMA_ALLOCATE_PINNED) ?
585                                         STATE_PIN : STATE_UNPIN;
586 
587         NV_PRINTF(LEVEL_INFO, "SUCCESS allocCount %lld, allocsize %lld eviction? %s pinned ? %s contig? %s\n",
588                               (NvU64) allocCount,(NvU64) allocSize, (flags & PMA_ALLOCATE_DONT_EVICT) ?  "NOTALLOWED" : "ALLOWED",
589                                !!(flags & PMA_ALLOCATE_PINNED) ? "PINNED" : "UNPINNED", contigFlag ? "CONTIG":"DISCONTIG");
590 
591         for (i = 0; i < finalAllocatedCount; i++)
592         {
593             NvU32 j;
594 
595             regId = findRegionID(pPma, pPages[i]);
596             pMap  = pPma->pRegions[regId];
597             regAddrBase = pPma->pRegDescriptors[regId]->base;
598             frameCount  = allocSize >> PMA_PAGE_SHIFT;
599 
600             for (j = 0; j < frameCount; j++)
601             {
602                 frameOffset = PMA_ADDR2FRAME(pPages[i], regAddrBase) + j;
603 
604                 curStatus = pPma->pMapInfo->pmaMapRead(pMap, frameOffset, NV_TRUE);
605 
606                 if (curStatus & ATTRIB_EVICTING)
607                 {
608                     status = NV_ERR_NO_MEMORY;
609                     break;
610                 }
611                 pPma->pMapInfo->pmaMapChangeStateAttrib(pMap, frameOffset, allocOption, NV_TRUE);
612             }
613             if (status != NV_OK)
614                 break;
615         }
616 
617         if (status == NV_OK)
618         {
619             allocationOptions->numPagesAllocated = (NvLength)finalAllocatedCount;
620         }
621     }
622 
623 
624     if (status != NV_OK)
625     {
626         NV_PRINTF(LEVEL_INFO, "FAILED allocCount %lld, allocsize %lld eviction? %s pinned ? %s contig? %s\n",
627                               (NvU64) allocCount, (NvU64) allocSize, (flags & PMA_ALLOCATE_DONT_EVICT) ?  "NOTALLOWED" : "ALLOWED",
628                               !!(flags & PMA_ALLOCATE_PINNED) ? "PINNED" : "UNPINNED", contigFlag ? "CONTIG":"DISCONTIG");
629         //
630         // Free the entire allocation if scrubbing failed or if we had allocated evicting allocations.
631         // Evicting allocation will be handled in the pmaEvictContiguous
632         //
633         if (finalAllocatedCount > 0)
634             pmaNumaFreeInternal(pPma, pPages, finalAllocatedCount, pageSize, 0);
635 
636         status = NV_ERR_NO_MEMORY;
637     }
638 
639     portSyncSpinlockRelease(pPma->pPmaLock);
640 
641     if (pPma->bScrubOnFree && !bSkipScrubFlag)
642     {
643         portSyncRwLockReleaseRead(pPma->pScrubberValidLock);
644         portSyncMutexRelease(pPma->pAllocLock);
645     }
646 
647     return status;
648 }
649 
650 void pmaNumaFreeInternal
651 (
652     PMA   *pPma,
653     NvU64 *pPages,
654     NvU64  pageCount,
655     NvU64  size,
656     NvU32  flag
657 )
658 {
659     NvU64 i, j;
660     NvU8 osPageShift = osGetPageShift();
661 
662     NV_ASSERT_OR_RETURN_VOID(PMA_PAGE_SHIFT >= osPageShift);
663 
664     NV_PRINTF(LEVEL_INFO, "Freeing pPage[0] = %llx pageCount %lld\n", pPages[0], pageCount);
665 
666     for (i = 0; i < pageCount; i++)
667     {
668         NvU32 regId;
669         NvU64 addrBase;
670         NvU64 sysPhysAddr = 0;
671         NvU64 frameNum;
672         NvU64 framesPerPage;
673 
674         // Shift the GPA to acquire the bus address (SPA)
675         NV_ASSERT(pPages[i] < pPma->coherentCpuFbSize);
676 
677         regId    = findRegionID(pPma, pPages[i]);
678         addrBase = pPma->pRegDescriptors[regId]->base;
679         frameNum = PMA_ADDR2FRAME(pPages[i], addrBase);
680         framesPerPage = size >> PMA_PAGE_SHIFT;
681         sysPhysAddr   = pPages[i] + pPma->coherentCpuFbBase;
682 
683         for (j = 0; j < framesPerPage; j++)
684         {
685             PMA_PAGESTATUS newStatus = STATE_FREE;
686             PMA_PAGESTATUS currentStatus;
687             NvU64 sysPagePhysAddr = 0;
688             currentStatus = pPma->pMapInfo->pmaMapRead(pPma->pRegions[regId], (frameNum + j), NV_TRUE);
689 
690             //
691             // When the pages are marked for evicting, we will skip free the page to OS
692             // in order to reuse the page.
693             //
694             if (currentStatus & ATTRIB_EVICTING)
695             {
696                 //
697                 // Evicting allocations are returned to new client and will be freed later.
698                 // We set the ATTRIB_NUMA_REUSE bit here just in case eviction fails later and we
699                 // need to release the page to OS in the allocation path.
700                 //
701                 if (currentStatus & STATE_UNPIN)
702                 {
703                     pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j),
704                                                               ATTRIB_NUMA_REUSE, ATTRIB_NUMA_REUSE);
705                 }
706                 continue;
707             }
708             sysPagePhysAddr = sysPhysAddr + (j << PMA_PAGE_SHIFT);
709             osAllocReleasePage(sysPagePhysAddr, 1 << (PMA_PAGE_SHIFT - osPageShift));
710             pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), newStatus, ~ATTRIB_EVICTING);
711         }
712     }
713 }
714 
715 void pmaNumaSetReclaimSkipThreshold(PMA *pPma, NvU32 skipReclaimPercent)
716 {
717     portSyncSpinlockAcquire(pPma->pPmaLock);
718     pPma->numaReclaimSkipThreshold = skipReclaimPercent;
719     portSyncSpinlockRelease(pPma->pPmaLock);
720 }
721