1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /*!
25  * @file
26  *
27  * @brief Implementation for the NUMA interfaces, used by parent module PMA only.
28  * This file interfaces with the RM Linux layer which interfaces with the
29  * Linux kernel.
30  */
31 
32 #include "gpu/mem_mgr/phys_mem_allocator/numa.h"
33 #include "gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator_util.h"
34 #include "gpu/mem_mgr/mem_scrub.h"
35 #include "utils/nvprintf.h"
36 #include "utils/nvassert.h"
37 #include "os/os.h"
38 
39 //
40 // Local helper functions and declarations
41 //
42 
43 //TODO merge or nuke these functions
44 static NV_STATUS _pmaNumaAvailableEvictablePage(PMA *pPma, NvS32 *validRegionList);
45 static NV_STATUS _pmaNumaAvailableEvictableRange(PMA *pPma, NvS32 *validRegionList,
46     NvLength actualSize, NvU64 pageSize, NvU64 *evictStart, NvU64 *evictEnd);
47 static NV_STATUS _pmaNumaAllocateRange(PMA *pPma, NvU32 numaNodeId, NvLength actualSize,
48     NvU64 pageSize, NvU64 *pPages, NvBool bScrubOnAlloc, NvBool allowEvict, NvS32 *validRegionList,
49     NvU64 *allocatedCount);
50 static NV_STATUS _pmaNumaAllocatePages (PMA *pPma, NvU32 numaNodeId, NvU64 pageSize,
51     NvLength allocationCount, NvU64 *pPages, NvBool bScrubOnAlloc, NvBool allowEvict, NvS32 *validRegionList,
52     NvU64 *allocatedPages);
53 
54 /*!
55  * @brief Check if there is at least one evictable page from UVM.
56  */
_pmaNumaAvailableEvictablePage(PMA * pPma,NvS32 * validRegionList)57 static NV_STATUS _pmaNumaAvailableEvictablePage
58 (
59     PMA     *pPma,
60     NvS32   *validRegionList
61 )
62 {
63     NvU32           regionIdx;
64     PMA_PAGESTATUS  frameState;
65     void           *pMap   = NULL;
66     NV_STATUS       status = NV_ERR_NO_MEMORY;
67 
68     for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
69     {
70         NvU32 regId, frameNum;
71         NvU64 totalFrames;
72 
73         regId = (NvU32)validRegionList[regionIdx];
74 
75         if (validRegionList[regionIdx] == -1)
76             continue;
77 
78         pMap = pPma->pRegions[regId];
79         pPma->pMapInfo->pmaMapGetSize(pMap, &totalFrames);
80         totalFrames >>= PMA_PAGE_SHIFT;
81 
82         for (frameNum = 0; frameNum < totalFrames; frameNum++)
83         {
84             frameState = pPma->pMapInfo->pmaMapRead(pMap, frameNum, NV_TRUE);
85             if ((frameState & STATE_MASK) == STATE_UNPIN)
86             {
87                 status = NV_OK;
88                 break;
89             }
90         }
91 
92         if (status == NV_OK)
93             break;
94     }
95 
96     if (status == NV_OK)
97         NV_PRINTF(LEVEL_INFO, "Evictable frame: FOUND\n");
98     else
99         NV_PRINTF(LEVEL_INFO, "Evictable frame: NOT FOUND\n");
100 
101     return status;
102 }
103 
104 /*!
105  * @brief  Check if there is a contiguous range of
106  *         evictable frame with UVM and get the start
107  *         and end address if there is
108  * In NUMA, OS manages memory and PMA will only track allocated memory in ALLOC_PIN
109  * and ALLOC_UNPIN state. FREE memory is managed by OS and cannot be tracked by PMA
110  * and hence PMA cannot consider FREE memory for eviction and can only consider frames
111  * in known state to PMA or eviction. ALLOC_PIN cannot be evicted and hence only ALLOC_UNPIN
112  * can be evictable.
113  */
_pmaNumaAvailableEvictableRange(PMA * pPma,NvS32 * validRegionList,NvLength actualSize,NvU64 pageSize,NvU64 * evictStart,NvU64 * evictEnd)114 NV_STATUS _pmaNumaAvailableEvictableRange
115 (
116     PMA      *pPma,
117     NvS32    *validRegionList,
118     NvLength  actualSize,
119     NvU64     pageSize,
120     NvU64    *evictStart,
121     NvU64    *evictEnd
122 )
123 {
124     void     *pMap  = NULL;
125     NvU32     regionIdx;
126     NV_STATUS status = NV_ERR_NO_MEMORY;
127 
128     if ((evictStart == NULL) || (evictEnd == NULL))
129     {
130         return NV_ERR_INVALID_ARGUMENT;
131     }
132 
133     *evictStart = 0;
134     *evictEnd   = 0;
135 
136     for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
137     {
138         NvU64 addrBase;
139         NvU32 regId;
140 
141         if (validRegionList[regionIdx] == -1)
142             continue;
143 
144         regId = (NvU32)validRegionList[regionIdx];
145         pMap  = pPma->pRegions[regId];
146         addrBase = pPma->pRegDescriptors[regId]->base;
147 
148 
149         if ((status = pPma->pMapInfo->pmaMapScanContiguousNumaEviction(pMap, addrBase, actualSize,
150                                                                        pageSize, evictStart, evictEnd)) == NV_OK)
151         {
152             break;
153         }
154     }
155 
156     return status;
157 }
158 
159 /*!
160  * Check if the number of free frames is below the skip threshold percentage of total.
161  * @return NV_TRUE  free frame count is below threshold.
162  *         NV_FALSE otherwise.
163  */
_pmaCheckFreeFramesToSkipReclaim(PMA * pPma)164 static NvBool _pmaCheckFreeFramesToSkipReclaim(PMA *pPma)
165 {
166     return (100 * pPma->pmaStats.numFreeFrames <
167              (pPma->pmaStats.num2mbPages * (_PMA_2MB >> PMA_PAGE_SHIFT) * pPma->numaReclaimSkipThreshold));
168 }
169 
170 /*!
171  * Translate a page returned by kernel to internal PMA page offset.
172  * @return NV_OK if the translation is successful.
173  *         NV_ERR_INVALID_STATE if the address is out of bound of PMA region
174  */
175 static NV_STATUS
_pmaTranslateKernelPage(PMA * pPma,NvU64 sysPhysAddr,NvU64 pageSize,NvU64 * pGpaPhysAddr)176 _pmaTranslateKernelPage
177 (
178     PMA   *pPma,
179     NvU64  sysPhysAddr,
180     NvU64  pageSize,
181     NvU64 *pGpaPhysAddr
182 )
183 {
184     NV_ASSERT_OR_RETURN(pGpaPhysAddr != NULL, NV_ERR_INVALID_ARGUMENT);
185 
186     // Check returned page against online region
187     if ((sysPhysAddr < pPma->coherentCpuFbBase) ||
188         ((sysPhysAddr + pageSize) > (pPma->coherentCpuFbBase + pPma->coherentCpuFbSize)))
189     {
190         return NV_ERR_INVALID_STATE;
191     }
192 
193     *pGpaPhysAddr = sysPhysAddr - pPma->coherentCpuFbBase;
194 
195     // Check returned page against internal PMA structures
196     return pmaCheckRangeAgainstRegionDesc(pPma, *pGpaPhysAddr, pageSize);
197 }
198 
199 /*!
200  * @brief  Allocate contiguous memory for Numa
201  *
202  */
_pmaNumaAllocateRange(PMA * pPma,NvU32 numaNodeId,NvLength actualSize,NvU64 pageSize,NvU64 * pPages,NvBool bScrubOnAlloc,NvBool allowEvict,NvS32 * validRegionList,NvU64 * allocatedCount)203 NV_STATUS _pmaNumaAllocateRange
204 (
205     PMA     *pPma,
206     NvU32    numaNodeId,
207     NvLength actualSize,
208     NvU64    pageSize,
209     NvU64   *pPages,
210     NvBool   bScrubOnAlloc,
211     NvBool   allowEvict,
212     NvS32   *validRegionList,
213     NvU64   *allocatedCount
214 )
215 {
216     NV_STATUS   status = NV_ERR_NO_MEMORY;
217     NvU64  sysPhysAddr = 0, gpaPhysAddr = 0, evictStart = 0, evictEnd = 0;
218     NvU32 flags = OS_ALLOC_PAGES_NODE_NONE;
219     *allocatedCount    = 0;
220 
221     NV_ASSERT_OR_RETURN(actualSize >= osGetPageSize(), NV_ERR_INVALID_ARGUMENT);
222 
223     // check if numFreeFrames(64KB) are below a certain % of PMA managed memory(indicated by num2mbPages).
224     if (_pmaCheckFreeFramesToSkipReclaim(pPma))
225     {
226         flags = OS_ALLOC_PAGES_NODE_SKIP_RECLAIM;
227     }
228 
229     portSyncSpinlockRelease(pPma->pPmaLock);
230 
231     // Try to allocate contiguous allocation of actualSize from OS. Do not force RECLAIM
232     status = osAllocPagesNode((int)numaNodeId, (NvLength)actualSize, flags, &sysPhysAddr);
233 
234     if (status == NV_OK)
235     {
236         NvU8 osPageShift = osGetPageShift();
237 
238         // Skip the first page as it is refcounted at allocation.
239         osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (actualSize >> osPageShift) - 1);
240 
241         // GPA needs to be acquired by shifting by the ATS aperture base address
242         status = _pmaTranslateKernelPage(pPma, sysPhysAddr, actualSize, &gpaPhysAddr);
243         if (status != NV_OK)
244         {
245             NV_PRINTF(LEVEL_ERROR, "Alloc from OS invalid for sysPhysAddr = 0x%llx actualSize = 0x%llx!\n",
246                                    sysPhysAddr, actualSize);
247             goto exit;
248         }
249 
250         *allocatedCount = 1;
251 
252         if (bScrubOnAlloc)
253         {
254             PSCRUB_NODE pPmaScrubList = NULL;
255             NvU64 count;
256 
257             if ((status = scrubSubmitPages(pPma->pScrubObj, (NvU32)actualSize, &gpaPhysAddr,
258                                            1, &pPmaScrubList, &count)) != NV_OK)
259             {
260                 status = NV_ERR_INSUFFICIENT_RESOURCES;
261                 goto scrub_exit;
262             }
263 
264             if (count > 0)
265                 _pmaClearScrubBit(pPma, pPmaScrubList, count);
266 
267             if ((status = _pmaCheckScrubbedPages(pPma, actualSize, &gpaPhysAddr, 1)) != NV_OK)
268             {
269                 status = NV_ERR_INSUFFICIENT_RESOURCES;
270             }
271 
272 scrub_exit:
273             portMemFree(pPmaScrubList);
274 
275             if (status == NV_ERR_INSUFFICIENT_RESOURCES)
276             {
277                 NV_PRINTF(LEVEL_ERROR, "ERROR: scrubber OOM!\n");
278             }
279         }
280 
281         portSyncSpinlockAcquire(pPma->pPmaLock);
282         goto allocated;
283     }
284 
285 exit:
286     portSyncSpinlockAcquire(pPma->pPmaLock);
287 
288     NV_PRINTF(LEVEL_INFO, "Allocate from OS failed for allocation size = %lld!\n",
289                                (NvU64) actualSize);
290 
291 
292     if (allowEvict)
293     {
294         // Check if UVM has evictable contiguous allocations of actualSize
295         status = _pmaNumaAvailableEvictableRange(pPma, validRegionList,
296                                              actualSize, pageSize,
297                                              &evictStart, &evictEnd);
298     }
299 
300     if ((status == NV_OK) && (evictEnd - evictStart + 1) >=  actualSize)
301     {
302         void *pMap = NULL;
303         NvU32 regId;
304         MEMORY_PROTECTION prot;
305 
306         NV_ASSERT((evictEnd - evictStart + 1) ==  actualSize);
307         status = NV_ERR_NO_MEMORY;
308         regId = findRegionID(pPma, evictStart);
309         pMap  = pPma->pRegions[regId];
310         prot = pPma->pRegDescriptors[regId]->bProtected ? MEMORY_PROTECTION_PROTECTED :
311                                                           MEMORY_PROTECTION_UNPROTECTED;
312 
313         if (pMap != NULL)
314         {
315             //
316             // Call UVM to evict the contiguous allocation and evict the rest to OS
317             // UVM will call into PMA to free this contiguous range along with any excesses.
318             // PMA will release only the excess allocation to OS in the free routine.
319             // i.e., region evictStart to evictEnd is marked as 'ATTRIB_EVICTING' and will not
320             // be returned to OS.
321             //
322             status = _pmaEvictContiguous(pPma, pMap, evictStart, evictEnd, prot);
323 
324             if (status == NV_ERR_NO_MEMORY)
325             {
326                 NV_PRINTF(LEVEL_INFO, "Eviction Failed = %llx to %llx!\n", evictStart, evictEnd);
327             }
328             else
329             {
330                 NV_PRINTF(LEVEL_INFO, "Eviction succeeded = %llx to %llx Scrub status 0x%x!\n",
331                                       evictStart, evictEnd, status);
332                 gpaPhysAddr =  evictStart;
333                 *allocatedCount = 1;
334             }
335         }
336         else
337         {
338             NV_PRINTF(LEVEL_INFO, "pMap NULL cannot perform eviction\n");
339         }
340     }
341 
342 
343 allocated:
344 
345     // GPA needs to be acquired by shifting by the ATS aperture base address
346     pPages[0] = gpaPhysAddr;
347 
348     return status;
349 }
350 
351 /*!
352  * @brief  Allocate discontiguous pages for Numa
353  *
354  */
_pmaNumaAllocatePages(PMA * pPma,NvU32 numaNodeId,NvU64 pageSize,NvLength allocationCount,NvU64 * pPages,NvBool bScrubOnAlloc,NvBool allowEvict,NvS32 * validRegionList,NvU64 * allocatedPages)355 static NV_STATUS _pmaNumaAllocatePages
356 (
357     PMA     *pPma,
358     NvU32    numaNodeId,
359     NvU64    pageSize,
360     NvLength allocationCount,
361     NvU64   *pPages,
362     NvBool   bScrubOnAlloc,
363     NvBool   allowEvict,
364     NvS32   *validRegionList,
365     NvU64   *allocatedPages
366 )
367 {
368     NV_STATUS status = NV_ERR_NO_MEMORY;
369     NvU64     sysPhysAddr;
370     NvU64     i = 0;
371     NvU32     flags = OS_ALLOC_PAGES_NODE_NONE;
372     NvU8      osPageShift = osGetPageShift();
373 
374     NV_ASSERT(allocationCount);
375     NV_ASSERT_OR_RETURN(pageSize >= osGetPageSize(), NV_ERR_INVALID_ARGUMENT);
376 
377     // check if numFreeFrames are below certain % of PMA managed memory.
378     if (_pmaCheckFreeFramesToSkipReclaim(pPma))
379     {
380         flags = OS_ALLOC_PAGES_NODE_SKIP_RECLAIM;
381     }
382 
383     portSyncSpinlockRelease(pPma->pPmaLock);
384 
385     for (; i < allocationCount; i++)
386     {
387         status = osAllocPagesNode((int)numaNodeId, (NvLength) pageSize, flags, &sysPhysAddr);
388         if (status != NV_OK)
389         {
390             NV_PRINTF(LEVEL_ERROR, "Alloc from OS failed for i= %lld allocationCount = %lld pageSize = %lld!\n",
391                                    i, (NvU64) allocationCount, (NvU64) pageSize);
392             break;
393         }
394 
395         // GPA needs to be acquired by shifting by the ATS aperture base address
396         status = _pmaTranslateKernelPage(pPma, sysPhysAddr, pageSize, &pPages[i]);
397         if (status != NV_OK)
398         {
399             NV_PRINTF(LEVEL_ERROR, "Alloc from OS invalid for i= %lld allocationCount = %lld pageSize = %lld!\n",
400                                    i, (NvU64) allocationCount, (NvU64) pageSize);
401             break;
402         }
403 
404         // Skip the first page as it is refcounted at allocation.
405         osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (pageSize >> osPageShift) - 1);
406     }
407 
408     if (bScrubOnAlloc && (i > 0))
409     {
410         PSCRUB_NODE pPmaScrubList = NULL;
411         NvU64 count;
412 
413         if ((status = scrubSubmitPages(pPma->pScrubObj, pageSize, pPages,
414                                        i, &pPmaScrubList, &count)) != NV_OK)
415         {
416             status = NV_ERR_INSUFFICIENT_RESOURCES;
417             goto scrub_exit;
418         }
419 
420         if (count > 0)
421             _pmaClearScrubBit(pPma, pPmaScrubList, count);
422 
423         if ((status = _pmaCheckScrubbedPages(pPma, pageSize, pPages, (NvU32)i)) != NV_OK)
424         {
425             status = NV_ERR_INSUFFICIENT_RESOURCES;
426         }
427 
428 scrub_exit:
429         portMemFree(pPmaScrubList);
430 
431         if (status == NV_ERR_INSUFFICIENT_RESOURCES)
432         {
433             NV_PRINTF(LEVEL_ERROR, "ERROR: scrubber OOM!\n");
434             portSyncSpinlockAcquire(pPma->pPmaLock);
435             goto exit;
436         }
437     }
438 
439     portSyncSpinlockAcquire(pPma->pPmaLock);
440 
441     if (( i < allocationCount) && allowEvict)
442     {
443         NvU32 regionIdx;
444 
445         // Check if there is atleast one evictable page
446         status = _pmaNumaAvailableEvictablePage(pPma, validRegionList);
447 
448         if (status != NV_OK)
449         {
450             goto exit;
451         }
452 
453         status = NV_ERR_NO_MEMORY;
454 
455         for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
456         {
457             NvU32 regId;
458             NvU64 addrBase, addrLimit;
459             void *pMap = NULL;
460             MEMORY_PROTECTION prot;
461 
462             if (validRegionList[regionIdx] == -1)
463             {
464                 continue;
465             }
466 
467             regId = (NvU32)validRegionList[regionIdx];
468             pMap  = pPma->pRegions[regId];
469 
470             addrBase = pPma->pRegDescriptors[regId]->base;
471             addrLimit = pPma->pRegDescriptors[regId]->limit;
472             prot = pPma->pRegDescriptors[regId]->bProtected ? MEMORY_PROTECTION_PROTECTED :
473                                                               MEMORY_PROTECTION_UNPROTECTED;
474 
475             status = _pmaEvictPages(pPma, pMap,
476                                     &pPages[i], (NvU32)(allocationCount - i),
477                                     &pPages[0], i,
478                                     pageSize, addrBase, addrLimit, prot);
479 
480             if (status != NV_ERR_NO_MEMORY)
481             {
482                 NV_PRINTF(LEVEL_INFO, "Frames %lld evicted in region %d of total allocationCount %lld Scrub status 0x%x!\n",
483                                       i, regionIdx,  (NvU64) allocationCount, status);
484                 //
485                 // UVM can over evict, but will call into PMA only to evict the excess.
486                 // free startAddr + actualSize, (uvmAllocatedSize - actualSize) to OS.
487                 // Assume no under eviction. Overeviction is taken care of by the free routine.
488                 //
489                 i = allocationCount;
490                 break;
491             }
492 
493             NV_PRINTF(LEVEL_INFO, "Eviction Failed %d pages !\n", (NvU32) (allocationCount - i));
494         }
495 
496     }
497 
498 exit:
499     *allocatedPages = i;
500 
501     return status;
502 }
503 
504 
pmaNumaAllocate(PMA * pPma,NvLength allocationCount,NvU64 pageSize,PMA_ALLOCATION_OPTIONS * allocationOptions,NvU64 * pPages)505 NV_STATUS pmaNumaAllocate
506 (
507     PMA                    *pPma,
508     NvLength                allocationCount,
509     NvU64                   pageSize,
510     PMA_ALLOCATION_OPTIONS *allocationOptions,
511     NvU64                  *pPages
512 )
513 {
514     NvU32    i;
515     NV_STATUS  status     = NV_OK;
516     NvU32    numaNodeId   = pPma->numaNodeId;
517     NvS32    regionList[PMA_REGION_SIZE];
518     NvU32    flags        = allocationOptions->flags;
519     NvLength allocSize    = 0;
520     NvLength allocCount   = 0;
521     NvU32    contigFlag   = !!(flags & PMA_ALLOCATE_CONTIGUOUS);
522     // As per bug #2444368, kernel scrubbing is too slow. Use the GPU scrubber instead
523     NvBool bScrubOnAlloc  = !(flags & PMA_ALLOCATE_NO_ZERO);
524     NvBool    allowEvict  = !(flags & PMA_ALLOCATE_DONT_EVICT);
525     NvBool   partialFlag  = !!(flags & PMA_ALLOCATE_ALLOW_PARTIAL);
526     NvBool bSkipScrubFlag = !!(flags & PMA_ALLOCATE_NO_ZERO);
527 
528     NvU64    finalAllocatedCount = 0;
529 
530     if (!pPma->bNuma)
531     {
532         NV_PRINTF(LEVEL_FATAL, "Cannot allocate from NUMA node %d on a non-NUMA system.\n",
533                                 numaNodeId);
534         return NV_ERR_INVALID_ARGUMENT;
535     }
536 
537     if (pageSize > _PMA_512MB)
538     {
539         NV_PRINTF(LEVEL_FATAL, "Cannot allocate with more than 512MB contiguity.\n");
540         return NV_ERR_INVALID_ARGUMENT;
541     }
542 
543     if (pPma->nodeOnlined != NV_TRUE)
544     {
545         NV_PRINTF(LEVEL_INFO, "Cannot allocate from NUMA node %d before it is onlined.\n",
546                                numaNodeId);
547         return NV_ERR_INVALID_STATE;
548     }
549 
550     if (contigFlag)
551     {
552         NvU64 contigTotal;
553         if (!portSafeMulU64(allocationCount, pageSize, &contigTotal) || contigTotal > NV_U32_MAX)
554         {
555             NV_PRINTF(LEVEL_FATAL, "Cannot allocate more than 4GB contiguous memory in one call.\n");
556             return NV_ERR_INVALID_ARGUMENT;
557         }
558     }
559 
560     // We are not changing the state. Can be outside the lock perhaps
561     NV_CHECK_OK_OR_RETURN(LEVEL_FATAL, pmaSelector(pPma, allocationOptions, regionList));
562 
563     //
564     // Scrub on free is enabled for this allocation request if the feature is enabled and the
565     // caller does not want to skip scrubber.
566     // Caller may want to skip scrubber when it knows the memory is zero'ed or when we are
567     // initializing RM structures needed by the scrubber itself.
568     //
569     if (pPma->bScrubOnFree && !bSkipScrubFlag)
570     {
571         portSyncMutexAcquire(pPma->pAllocLock);
572         portSyncRwLockAcquireRead(pPma->pScrubberValidLock);
573 
574         if (pmaPortAtomicGet(&pPma->scrubberValid) != PMA_SCRUBBER_VALID)
575         {
576             NV_PRINTF(LEVEL_WARNING, "PMA object is not valid\n");
577             portSyncRwLockReleaseRead(pPma->pScrubberValidLock);
578             portSyncMutexRelease(pPma->pAllocLock);
579             return NV_ERR_INVALID_STATE;
580         }
581     }
582     else
583     {
584         //
585         // Scrub-on-free feature is OFF, therefore we cannot do scrub-on-alloc
586         // either because it uses the same HW
587         //
588         bScrubOnAlloc = NV_FALSE;
589     }
590 
591     //
592     // In the NUMA path, scrub on free does not provide enough safety guarantees
593     // because pages are released to the kernel and they can be reused by other
594     // processes. Therefore, we can only guarantee that the returned pages are
595     // zero if scrub on alloc is used.
596     //
597     allocationOptions->resultFlags = (bScrubOnAlloc)? PMA_ALLOCATE_RESULT_IS_ZERO : 0;
598 
599     portSyncSpinlockAcquire(pPma->pPmaLock);
600 
601     if (contigFlag)
602     {
603         allocCount = 1;
604         allocSize  = allocationCount * pageSize;
605         status     = _pmaNumaAllocateRange(pPma, numaNodeId, allocSize, pageSize, pPages, bScrubOnAlloc, allowEvict, regionList, &finalAllocatedCount);
606     }
607     else
608     {
609         allocCount = allocationCount;
610         allocSize  = pageSize;
611         status     = _pmaNumaAllocatePages(pPma, numaNodeId, (NvU32) allocSize, allocCount, pPages, bScrubOnAlloc, allowEvict, regionList, &finalAllocatedCount);
612     }
613 
614     if ((status == NV_ERR_NO_MEMORY) && partialFlag && (finalAllocatedCount > 0))
615     {
616         status = NV_OK;
617     }
618 
619     if (status == NV_OK)
620     {
621         NvU32  regId;
622         void  *pMap = NULL;
623         NvU64  regAddrBase;
624         NvU64  frameOffset;
625         NvU64  frameCount = 0;
626         PMA_PAGESTATUS curStatus = STATE_FREE;
627         PMA_PAGESTATUS allocOption = !!(flags & PMA_ALLOCATE_PINNED) ?
628                                         STATE_PIN : STATE_UNPIN;
629 
630         NV_PRINTF(LEVEL_INFO, "SUCCESS allocCount %lld, allocsize %lld eviction? %s pinned ? %s contig? %s\n",
631                               (NvU64) allocCount,(NvU64) allocSize, (flags & PMA_ALLOCATE_DONT_EVICT) ?  "NOTALLOWED" : "ALLOWED",
632                                !!(flags & PMA_ALLOCATE_PINNED) ? "PINNED" : "UNPINNED", contigFlag ? "CONTIG":"DISCONTIG");
633 
634         for (i = 0; i < finalAllocatedCount; i++)
635         {
636             NvU32 j;
637 
638             regId = findRegionID(pPma, pPages[i]);
639             pMap  = pPma->pRegions[regId];
640             regAddrBase = pPma->pRegDescriptors[regId]->base;
641             frameCount  = allocSize >> PMA_PAGE_SHIFT;
642 
643             for (j = 0; j < frameCount; j++)
644             {
645                 frameOffset = PMA_ADDR2FRAME(pPages[i], regAddrBase) + j;
646 
647                 curStatus = pPma->pMapInfo->pmaMapRead(pMap, frameOffset, NV_TRUE);
648 
649                 if (curStatus & ATTRIB_EVICTING)
650                 {
651                     status = NV_ERR_NO_MEMORY;
652                     break;
653                 }
654                 pPma->pMapInfo->pmaMapChangeStateAttrib(pMap, frameOffset, allocOption, MAP_MASK);
655             }
656             if (status != NV_OK)
657                 break;
658         }
659 
660         pPma->pStatsUpdateCb(pPma->pStatsUpdateCtx, pPma->pmaStats.numFreeFrames);
661 
662         if (status == NV_OK)
663         {
664             allocationOptions->numPagesAllocated = (NvLength)finalAllocatedCount;
665         }
666     }
667 
668 
669     if (status != NV_OK)
670     {
671         NV_PRINTF(LEVEL_INFO, "FAILED allocCount %lld, allocsize %lld eviction? %s pinned ? %s contig? %s\n",
672                               (NvU64) allocCount, (NvU64) allocSize, (flags & PMA_ALLOCATE_DONT_EVICT) ?  "NOTALLOWED" : "ALLOWED",
673                               !!(flags & PMA_ALLOCATE_PINNED) ? "PINNED" : "UNPINNED", contigFlag ? "CONTIG":"DISCONTIG");
674         //
675         // Free the entire allocation if scrubbing failed or if we had allocated evicting allocations.
676         // Evicting allocation will be handled in the pmaEvictContiguous
677         //
678         if (finalAllocatedCount > 0)
679             pmaNumaFreeInternal(pPma, pPages, finalAllocatedCount, pageSize, 0);
680 
681         status = NV_ERR_NO_MEMORY;
682     }
683 
684     portSyncSpinlockRelease(pPma->pPmaLock);
685 
686     if (pPma->bScrubOnFree && !bSkipScrubFlag)
687     {
688         portSyncRwLockReleaseRead(pPma->pScrubberValidLock);
689         portSyncMutexRelease(pPma->pAllocLock);
690     }
691 
692     return status;
693 }
694 
pmaNumaFreeInternal(PMA * pPma,NvU64 * pPages,NvU64 pageCount,NvU64 size,NvU32 flag)695 void pmaNumaFreeInternal
696 (
697     PMA   *pPma,
698     NvU64 *pPages,
699     NvU64  pageCount,
700     NvU64  size,
701     NvU32  flag
702 )
703 {
704     NvU64 i, j;
705     NvU8 osPageShift = osGetPageShift();
706 
707     NV_ASSERT_OR_RETURN_VOID(PMA_PAGE_SHIFT >= osPageShift);
708 
709     NV_PRINTF(LEVEL_INFO, "Freeing pPage[0] = %llx pageCount %lld\n", pPages[0], pageCount);
710 
711     for (i = 0; i < pageCount; i++)
712     {
713         NvU32 regId;
714         NvU64 addrBase;
715         NvU64 sysPhysAddr = 0;
716         NvU64 frameNum;
717         NvU64 framesPerPage;
718 
719         // Shift the GPA to acquire the bus address (SPA)
720         NV_ASSERT(pPages[i] < pPma->coherentCpuFbSize);
721 
722         regId    = findRegionID(pPma, pPages[i]);
723         addrBase = pPma->pRegDescriptors[regId]->base;
724         frameNum = PMA_ADDR2FRAME(pPages[i], addrBase);
725         framesPerPage = size >> PMA_PAGE_SHIFT;
726         sysPhysAddr   = pPages[i] + pPma->coherentCpuFbBase;
727 
728         for (j = 0; j < framesPerPage; j++)
729         {
730             PMA_PAGESTATUS newStatus = STATE_FREE;
731             PMA_PAGESTATUS currentStatus;
732             NvU64 sysPagePhysAddr = 0;
733             currentStatus = pPma->pMapInfo->pmaMapRead(pPma->pRegions[regId], (frameNum + j), NV_TRUE);
734 
735             //
736             // When the pages are marked for evicting, we will skip free the page to OS
737             // in order to reuse the page.
738             //
739             if (currentStatus & ATTRIB_EVICTING)
740             {
741                 //
742                 // Evicting allocations are returned to new client and will be freed later.
743                 // We set the ATTRIB_NUMA_REUSE bit here just in case eviction fails later and we
744                 // need to release the page to OS in the allocation path.
745                 //
746                 if (currentStatus & STATE_UNPIN)
747                 {
748                     pPma->pMapInfo->pmaMapChangeStateAttrib(pPma->pRegions[regId], (frameNum + j),
749                                                             ATTRIB_NUMA_REUSE, ATTRIB_NUMA_REUSE);
750                 }
751                 continue;
752             }
753             sysPagePhysAddr = sysPhysAddr + (j << PMA_PAGE_SHIFT);
754             osAllocReleasePage(sysPagePhysAddr, 1 << (PMA_PAGE_SHIFT - osPageShift));
755             pPma->pMapInfo->pmaMapChangeStateAttrib(pPma->pRegions[regId], (frameNum + j), newStatus, ~ATTRIB_EVICTING);
756         }
757     }
758 
759     pPma->pStatsUpdateCb(pPma->pStatsUpdateCtx, pPma->pmaStats.numFreeFrames);
760 }
761 
pmaNumaSetReclaimSkipThreshold(PMA * pPma,NvU32 skipReclaimPercent)762 void pmaNumaSetReclaimSkipThreshold(PMA *pPma, NvU32 skipReclaimPercent)
763 {
764     portSyncSpinlockAcquire(pPma->pPmaLock);
765     pPma->numaReclaimSkipThreshold = skipReclaimPercent;
766     portSyncSpinlockRelease(pPma->pPmaLock);
767 }
768