1 /*
2 * SPDX-FileCopyrightText: Copyright (c) 2015-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: MIT
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24 /*!
25 * @file
26 *
27 * @brief Implementation for the NUMA interfaces, used by parent module PMA only.
28 * This file interfaces with the RM Linux layer which interfaces with the
29 * Linux kernel.
30 */
31
32 #include "gpu/mem_mgr/phys_mem_allocator/numa.h"
33 #include "gpu/mem_mgr/phys_mem_allocator/phys_mem_allocator_util.h"
34 #include "gpu/mem_mgr/mem_scrub.h"
35 #include "utils/nvprintf.h"
36 #include "utils/nvassert.h"
37 #include "os/os.h"
38
39 //
40 // Local helper functions and declarations
41 //
42
43 //TODO merge or nuke these functions
44 static NV_STATUS _pmaNumaAvailableEvictablePage(PMA *pPma, NvS32 *validRegionList);
45 static NV_STATUS _pmaNumaAvailableEvictableRange(PMA *pPma, NvS32 *validRegionList,
46 NvLength actualSize, NvU64 pageSize, NvU64 *evictStart, NvU64 *evictEnd);
47 static NV_STATUS _pmaNumaAllocateRange(PMA *pPma, NvU32 numaNodeId, NvLength actualSize,
48 NvU64 pageSize, NvU64 *pPages, NvBool bScrubOnAlloc, NvBool allowEvict, NvS32 *validRegionList,
49 NvU64 *allocatedCount);
50 static NV_STATUS _pmaNumaAllocatePages (PMA *pPma, NvU32 numaNodeId, NvU64 pageSize,
51 NvLength allocationCount, NvU64 *pPages, NvBool bScrubOnAlloc, NvBool allowEvict, NvS32 *validRegionList,
52 NvU64 *allocatedPages);
53
54 /*!
55 * @brief Check if there is at least one evictable page from UVM.
56 */
_pmaNumaAvailableEvictablePage(PMA * pPma,NvS32 * validRegionList)57 static NV_STATUS _pmaNumaAvailableEvictablePage
58 (
59 PMA *pPma,
60 NvS32 *validRegionList
61 )
62 {
63 NvU32 regionIdx;
64 PMA_PAGESTATUS frameState;
65 void *pMap = NULL;
66 NV_STATUS status = NV_ERR_NO_MEMORY;
67
68 for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
69 {
70 NvU32 regId, frameNum;
71 NvU64 totalFrames;
72
73 regId = (NvU32)validRegionList[regionIdx];
74
75 if (validRegionList[regionIdx] == -1)
76 continue;
77
78 pMap = pPma->pRegions[regId];
79 pPma->pMapInfo->pmaMapGetSize(pMap, &totalFrames);
80 totalFrames >>= PMA_PAGE_SHIFT;
81
82 for (frameNum = 0; frameNum < totalFrames; frameNum++)
83 {
84 frameState = pPma->pMapInfo->pmaMapRead(pMap, frameNum, NV_TRUE);
85 if ((frameState & STATE_MASK) == STATE_UNPIN)
86 {
87 status = NV_OK;
88 break;
89 }
90 }
91
92 if (status == NV_OK)
93 break;
94 }
95
96 if (status == NV_OK)
97 NV_PRINTF(LEVEL_INFO, "Evictable frame: FOUND\n");
98 else
99 NV_PRINTF(LEVEL_INFO, "Evictable frame: NOT FOUND\n");
100
101 return status;
102 }
103
104 /*!
105 * @brief Check if there is a contiguous range of
106 * evictable frame with UVM and get the start
107 * and end address if there is
108 * In NUMA, OS manages memory and PMA will only track allocated memory in ALLOC_PIN
109 * and ALLOC_UNPIN state. FREE memory is managed by OS and cannot be tracked by PMA
110 * and hence PMA cannot consider FREE memory for eviction and can only consider frames
111 * in known state to PMA or eviction. ALLOC_PIN cannot be evicted and hence only ALLOC_UNPIN
112 * can be evictable.
113 */
_pmaNumaAvailableEvictableRange(PMA * pPma,NvS32 * validRegionList,NvLength actualSize,NvU64 pageSize,NvU64 * evictStart,NvU64 * evictEnd)114 NV_STATUS _pmaNumaAvailableEvictableRange
115 (
116 PMA *pPma,
117 NvS32 *validRegionList,
118 NvLength actualSize,
119 NvU64 pageSize,
120 NvU64 *evictStart,
121 NvU64 *evictEnd
122 )
123 {
124 void *pMap = NULL;
125 NvU32 regionIdx;
126 NV_STATUS status = NV_ERR_NO_MEMORY;
127
128 if ((evictStart == NULL) || (evictEnd == NULL))
129 {
130 return NV_ERR_INVALID_ARGUMENT;
131 }
132
133 *evictStart = 0;
134 *evictEnd = 0;
135
136 for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
137 {
138 NvU64 addrBase;
139 NvU32 regId;
140
141 if (validRegionList[regionIdx] == -1)
142 continue;
143
144 regId = (NvU32)validRegionList[regionIdx];
145 pMap = pPma->pRegions[regId];
146 addrBase = pPma->pRegDescriptors[regId]->base;
147
148
149 if ((status = pPma->pMapInfo->pmaMapScanContiguousNumaEviction(pMap, addrBase, actualSize,
150 pageSize, evictStart, evictEnd)) == NV_OK)
151 {
152 break;
153 }
154 }
155
156 return status;
157 }
158
159 /*!
160 * Check if the number of free frames is below the skip threshold percentage of total.
161 * @return NV_TRUE free frame count is below threshold.
162 * NV_FALSE otherwise.
163 */
_pmaCheckFreeFramesToSkipReclaim(PMA * pPma)164 static NvBool _pmaCheckFreeFramesToSkipReclaim(PMA *pPma)
165 {
166 return (100 * pPma->pmaStats.numFreeFrames <
167 (pPma->pmaStats.num2mbPages * (_PMA_2MB >> PMA_PAGE_SHIFT) * pPma->numaReclaimSkipThreshold));
168 }
169
170 /*!
171 * Translate a page returned by kernel to internal PMA page offset.
172 * @return NV_OK if the translation is successful.
173 * NV_ERR_INVALID_STATE if the address is out of bound of PMA region
174 */
175 static NV_STATUS
_pmaTranslateKernelPage(PMA * pPma,NvU64 sysPhysAddr,NvU64 pageSize,NvU64 * pGpaPhysAddr)176 _pmaTranslateKernelPage
177 (
178 PMA *pPma,
179 NvU64 sysPhysAddr,
180 NvU64 pageSize,
181 NvU64 *pGpaPhysAddr
182 )
183 {
184 NV_ASSERT_OR_RETURN(pGpaPhysAddr != NULL, NV_ERR_INVALID_ARGUMENT);
185
186 // Check returned page against online region
187 if ((sysPhysAddr < pPma->coherentCpuFbBase) ||
188 ((sysPhysAddr + pageSize) > (pPma->coherentCpuFbBase + pPma->coherentCpuFbSize)))
189 {
190 return NV_ERR_INVALID_STATE;
191 }
192
193 *pGpaPhysAddr = sysPhysAddr - pPma->coherentCpuFbBase;
194
195 // Check returned page against internal PMA structures
196 return pmaCheckRangeAgainstRegionDesc(pPma, *pGpaPhysAddr, pageSize);
197 }
198
199 /*!
200 * @brief Allocate contiguous memory for Numa
201 *
202 */
_pmaNumaAllocateRange(PMA * pPma,NvU32 numaNodeId,NvLength actualSize,NvU64 pageSize,NvU64 * pPages,NvBool bScrubOnAlloc,NvBool allowEvict,NvS32 * validRegionList,NvU64 * allocatedCount)203 NV_STATUS _pmaNumaAllocateRange
204 (
205 PMA *pPma,
206 NvU32 numaNodeId,
207 NvLength actualSize,
208 NvU64 pageSize,
209 NvU64 *pPages,
210 NvBool bScrubOnAlloc,
211 NvBool allowEvict,
212 NvS32 *validRegionList,
213 NvU64 *allocatedCount
214 )
215 {
216 NV_STATUS status = NV_ERR_NO_MEMORY;
217 NvU64 sysPhysAddr = 0, gpaPhysAddr = 0, evictStart = 0, evictEnd = 0;
218 NvU32 flags = OS_ALLOC_PAGES_NODE_NONE;
219 *allocatedCount = 0;
220
221 NV_ASSERT_OR_RETURN(actualSize >= osGetPageSize(), NV_ERR_INVALID_ARGUMENT);
222
223 // check if numFreeFrames(64KB) are below a certain % of PMA managed memory(indicated by num2mbPages).
224 if (_pmaCheckFreeFramesToSkipReclaim(pPma))
225 {
226 flags = OS_ALLOC_PAGES_NODE_SKIP_RECLAIM;
227 }
228
229 portSyncSpinlockRelease(pPma->pPmaLock);
230
231 // Try to allocate contiguous allocation of actualSize from OS. Do not force RECLAIM
232 status = osAllocPagesNode((int)numaNodeId, (NvLength)actualSize, flags, &sysPhysAddr);
233
234 if (status == NV_OK)
235 {
236 NvU8 osPageShift = osGetPageShift();
237
238 // Skip the first page as it is refcounted at allocation.
239 osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (actualSize >> osPageShift) - 1);
240
241 // GPA needs to be acquired by shifting by the ATS aperture base address
242 status = _pmaTranslateKernelPage(pPma, sysPhysAddr, actualSize, &gpaPhysAddr);
243 if (status != NV_OK)
244 {
245 NV_PRINTF(LEVEL_ERROR, "Alloc from OS invalid for sysPhysAddr = 0x%llx actualSize = 0x%llx!\n",
246 sysPhysAddr, actualSize);
247 goto exit;
248 }
249
250 *allocatedCount = 1;
251
252 if (bScrubOnAlloc)
253 {
254 PSCRUB_NODE pPmaScrubList = NULL;
255 NvU64 count;
256
257 if ((status = scrubSubmitPages(pPma->pScrubObj, (NvU32)actualSize, &gpaPhysAddr,
258 1, &pPmaScrubList, &count)) != NV_OK)
259 {
260 status = NV_ERR_INSUFFICIENT_RESOURCES;
261 goto scrub_exit;
262 }
263
264 if (count > 0)
265 _pmaClearScrubBit(pPma, pPmaScrubList, count);
266
267 if ((status = _pmaCheckScrubbedPages(pPma, actualSize, &gpaPhysAddr, 1)) != NV_OK)
268 {
269 status = NV_ERR_INSUFFICIENT_RESOURCES;
270 }
271
272 scrub_exit:
273 portMemFree(pPmaScrubList);
274
275 if (status == NV_ERR_INSUFFICIENT_RESOURCES)
276 {
277 NV_PRINTF(LEVEL_ERROR, "ERROR: scrubber OOM!\n");
278 }
279 }
280
281 portSyncSpinlockAcquire(pPma->pPmaLock);
282 goto allocated;
283 }
284
285 exit:
286 portSyncSpinlockAcquire(pPma->pPmaLock);
287
288 NV_PRINTF(LEVEL_INFO, "Allocate from OS failed for allocation size = %lld!\n",
289 (NvU64) actualSize);
290
291
292 if (allowEvict)
293 {
294 // Check if UVM has evictable contiguous allocations of actualSize
295 status = _pmaNumaAvailableEvictableRange(pPma, validRegionList,
296 actualSize, pageSize,
297 &evictStart, &evictEnd);
298 }
299
300 if ((status == NV_OK) && (evictEnd - evictStart + 1) >= actualSize)
301 {
302 void *pMap = NULL;
303 NvU32 regId;
304 MEMORY_PROTECTION prot;
305
306 NV_ASSERT((evictEnd - evictStart + 1) == actualSize);
307 status = NV_ERR_NO_MEMORY;
308 regId = findRegionID(pPma, evictStart);
309 pMap = pPma->pRegions[regId];
310 prot = pPma->pRegDescriptors[regId]->bProtected ? MEMORY_PROTECTION_PROTECTED :
311 MEMORY_PROTECTION_UNPROTECTED;
312
313 if (pMap != NULL)
314 {
315 //
316 // Call UVM to evict the contiguous allocation and evict the rest to OS
317 // UVM will call into PMA to free this contiguous range along with any excesses.
318 // PMA will release only the excess allocation to OS in the free routine.
319 // i.e., region evictStart to evictEnd is marked as 'ATTRIB_EVICTING' and will not
320 // be returned to OS.
321 //
322 status = _pmaEvictContiguous(pPma, pMap, evictStart, evictEnd, prot);
323
324 if (status == NV_ERR_NO_MEMORY)
325 {
326 NV_PRINTF(LEVEL_INFO, "Eviction Failed = %llx to %llx!\n", evictStart, evictEnd);
327 }
328 else
329 {
330 NV_PRINTF(LEVEL_INFO, "Eviction succeeded = %llx to %llx Scrub status 0x%x!\n",
331 evictStart, evictEnd, status);
332 gpaPhysAddr = evictStart;
333 *allocatedCount = 1;
334 }
335 }
336 else
337 {
338 NV_PRINTF(LEVEL_INFO, "pMap NULL cannot perform eviction\n");
339 }
340 }
341
342
343 allocated:
344
345 // GPA needs to be acquired by shifting by the ATS aperture base address
346 pPages[0] = gpaPhysAddr;
347
348 return status;
349 }
350
351 /*!
352 * @brief Allocate discontiguous pages for Numa
353 *
354 */
_pmaNumaAllocatePages(PMA * pPma,NvU32 numaNodeId,NvU64 pageSize,NvLength allocationCount,NvU64 * pPages,NvBool bScrubOnAlloc,NvBool allowEvict,NvS32 * validRegionList,NvU64 * allocatedPages)355 static NV_STATUS _pmaNumaAllocatePages
356 (
357 PMA *pPma,
358 NvU32 numaNodeId,
359 NvU64 pageSize,
360 NvLength allocationCount,
361 NvU64 *pPages,
362 NvBool bScrubOnAlloc,
363 NvBool allowEvict,
364 NvS32 *validRegionList,
365 NvU64 *allocatedPages
366 )
367 {
368 NV_STATUS status = NV_ERR_NO_MEMORY;
369 NvU64 sysPhysAddr;
370 NvU64 i = 0;
371 NvU32 flags = OS_ALLOC_PAGES_NODE_NONE;
372 NvU8 osPageShift = osGetPageShift();
373
374 NV_ASSERT(allocationCount);
375 NV_ASSERT_OR_RETURN(pageSize >= osGetPageSize(), NV_ERR_INVALID_ARGUMENT);
376
377 // check if numFreeFrames are below certain % of PMA managed memory.
378 if (_pmaCheckFreeFramesToSkipReclaim(pPma))
379 {
380 flags = OS_ALLOC_PAGES_NODE_SKIP_RECLAIM;
381 }
382
383 portSyncSpinlockRelease(pPma->pPmaLock);
384
385 for (; i < allocationCount; i++)
386 {
387 status = osAllocPagesNode((int)numaNodeId, (NvLength) pageSize, flags, &sysPhysAddr);
388 if (status != NV_OK)
389 {
390 NV_PRINTF(LEVEL_ERROR, "Alloc from OS failed for i= %lld allocationCount = %lld pageSize = %lld!\n",
391 i, (NvU64) allocationCount, (NvU64) pageSize);
392 break;
393 }
394
395 // GPA needs to be acquired by shifting by the ATS aperture base address
396 status = _pmaTranslateKernelPage(pPma, sysPhysAddr, pageSize, &pPages[i]);
397 if (status != NV_OK)
398 {
399 NV_PRINTF(LEVEL_ERROR, "Alloc from OS invalid for i= %lld allocationCount = %lld pageSize = %lld!\n",
400 i, (NvU64) allocationCount, (NvU64) pageSize);
401 break;
402 }
403
404 // Skip the first page as it is refcounted at allocation.
405 osAllocAcquirePage(sysPhysAddr + (1 << osPageShift), (pageSize >> osPageShift) - 1);
406 }
407
408 if (bScrubOnAlloc && (i > 0))
409 {
410 PSCRUB_NODE pPmaScrubList = NULL;
411 NvU64 count;
412
413 if ((status = scrubSubmitPages(pPma->pScrubObj, pageSize, pPages,
414 i, &pPmaScrubList, &count)) != NV_OK)
415 {
416 status = NV_ERR_INSUFFICIENT_RESOURCES;
417 goto scrub_exit;
418 }
419
420 if (count > 0)
421 _pmaClearScrubBit(pPma, pPmaScrubList, count);
422
423 if ((status = _pmaCheckScrubbedPages(pPma, pageSize, pPages, (NvU32)i)) != NV_OK)
424 {
425 status = NV_ERR_INSUFFICIENT_RESOURCES;
426 }
427
428 scrub_exit:
429 portMemFree(pPmaScrubList);
430
431 if (status == NV_ERR_INSUFFICIENT_RESOURCES)
432 {
433 NV_PRINTF(LEVEL_ERROR, "ERROR: scrubber OOM!\n");
434 portSyncSpinlockAcquire(pPma->pPmaLock);
435 goto exit;
436 }
437 }
438
439 portSyncSpinlockAcquire(pPma->pPmaLock);
440
441 if (( i < allocationCount) && allowEvict)
442 {
443 NvU32 regionIdx;
444
445 // Check if there is atleast one evictable page
446 status = _pmaNumaAvailableEvictablePage(pPma, validRegionList);
447
448 if (status != NV_OK)
449 {
450 goto exit;
451 }
452
453 status = NV_ERR_NO_MEMORY;
454
455 for (regionIdx = 0; regionIdx < pPma->regSize; regionIdx++)
456 {
457 NvU32 regId;
458 NvU64 addrBase, addrLimit;
459 void *pMap = NULL;
460 MEMORY_PROTECTION prot;
461
462 if (validRegionList[regionIdx] == -1)
463 {
464 continue;
465 }
466
467 regId = (NvU32)validRegionList[regionIdx];
468 pMap = pPma->pRegions[regId];
469
470 addrBase = pPma->pRegDescriptors[regId]->base;
471 addrLimit = pPma->pRegDescriptors[regId]->limit;
472 prot = pPma->pRegDescriptors[regId]->bProtected ? MEMORY_PROTECTION_PROTECTED :
473 MEMORY_PROTECTION_UNPROTECTED;
474
475 status = _pmaEvictPages(pPma, pMap,
476 &pPages[i], (NvU32)(allocationCount - i),
477 &pPages[0], i,
478 pageSize, addrBase, addrLimit, prot);
479
480 if (status != NV_ERR_NO_MEMORY)
481 {
482 NV_PRINTF(LEVEL_INFO, "Frames %lld evicted in region %d of total allocationCount %lld Scrub status 0x%x!\n",
483 i, regionIdx, (NvU64) allocationCount, status);
484 //
485 // UVM can over evict, but will call into PMA only to evict the excess.
486 // free startAddr + actualSize, (uvmAllocatedSize - actualSize) to OS.
487 // Assume no under eviction. Overeviction is taken care of by the free routine.
488 //
489 i = allocationCount;
490 break;
491 }
492
493 NV_PRINTF(LEVEL_INFO, "Eviction Failed %d pages !\n", (NvU32) (allocationCount - i));
494 }
495
496 }
497
498 exit:
499 *allocatedPages = i;
500
501 return status;
502 }
503
504
pmaNumaAllocate(PMA * pPma,NvLength allocationCount,NvU64 pageSize,PMA_ALLOCATION_OPTIONS * allocationOptions,NvU64 * pPages)505 NV_STATUS pmaNumaAllocate
506 (
507 PMA *pPma,
508 NvLength allocationCount,
509 NvU64 pageSize,
510 PMA_ALLOCATION_OPTIONS *allocationOptions,
511 NvU64 *pPages
512 )
513 {
514 NvU32 i;
515 NV_STATUS status = NV_OK;
516 NvU32 numaNodeId = pPma->numaNodeId;
517 NvS32 regionList[PMA_REGION_SIZE];
518 NvU32 flags = allocationOptions->flags;
519 NvLength allocSize = 0;
520 NvLength allocCount = 0;
521 NvU32 contigFlag = !!(flags & PMA_ALLOCATE_CONTIGUOUS);
522 // As per bug #2444368, kernel scrubbing is too slow. Use the GPU scrubber instead
523 NvBool bScrubOnAlloc = !(flags & PMA_ALLOCATE_NO_ZERO);
524 NvBool allowEvict = !(flags & PMA_ALLOCATE_DONT_EVICT);
525 NvBool partialFlag = !!(flags & PMA_ALLOCATE_ALLOW_PARTIAL);
526 NvBool bSkipScrubFlag = !!(flags & PMA_ALLOCATE_NO_ZERO);
527
528 NvU64 finalAllocatedCount = 0;
529
530 if (!pPma->bNuma)
531 {
532 NV_PRINTF(LEVEL_FATAL, "Cannot allocate from NUMA node %d on a non-NUMA system.\n",
533 numaNodeId);
534 return NV_ERR_INVALID_ARGUMENT;
535 }
536
537 if (pageSize > _PMA_512MB)
538 {
539 NV_PRINTF(LEVEL_FATAL, "Cannot allocate with more than 512MB contiguity.\n");
540 return NV_ERR_INVALID_ARGUMENT;
541 }
542
543 if (pPma->nodeOnlined != NV_TRUE)
544 {
545 NV_PRINTF(LEVEL_INFO, "Cannot allocate from NUMA node %d before it is onlined.\n",
546 numaNodeId);
547 return NV_ERR_INVALID_STATE;
548 }
549
550 if (contigFlag)
551 {
552 NvU64 contigTotal;
553 if (!portSafeMulU64(allocationCount, pageSize, &contigTotal) || contigTotal > NV_U32_MAX)
554 {
555 NV_PRINTF(LEVEL_FATAL, "Cannot allocate more than 4GB contiguous memory in one call.\n");
556 return NV_ERR_INVALID_ARGUMENT;
557 }
558 }
559
560 // We are not changing the state. Can be outside the lock perhaps
561 NV_CHECK_OK_OR_RETURN(LEVEL_FATAL, pmaSelector(pPma, allocationOptions, regionList));
562
563 //
564 // Scrub on free is enabled for this allocation request if the feature is enabled and the
565 // caller does not want to skip scrubber.
566 // Caller may want to skip scrubber when it knows the memory is zero'ed or when we are
567 // initializing RM structures needed by the scrubber itself.
568 //
569 if (pPma->bScrubOnFree && !bSkipScrubFlag)
570 {
571 portSyncMutexAcquire(pPma->pAllocLock);
572 portSyncRwLockAcquireRead(pPma->pScrubberValidLock);
573
574 if (pmaPortAtomicGet(&pPma->scrubberValid) != PMA_SCRUBBER_VALID)
575 {
576 NV_PRINTF(LEVEL_WARNING, "PMA object is not valid\n");
577 portSyncRwLockReleaseRead(pPma->pScrubberValidLock);
578 portSyncMutexRelease(pPma->pAllocLock);
579 return NV_ERR_INVALID_STATE;
580 }
581 }
582 else
583 {
584 //
585 // Scrub-on-free feature is OFF, therefore we cannot do scrub-on-alloc
586 // either because it uses the same HW
587 //
588 bScrubOnAlloc = NV_FALSE;
589 }
590
591 //
592 // In the NUMA path, scrub on free does not provide enough safety guarantees
593 // because pages are released to the kernel and they can be reused by other
594 // processes. Therefore, we can only guarantee that the returned pages are
595 // zero if scrub on alloc is used.
596 //
597 allocationOptions->resultFlags = (bScrubOnAlloc)? PMA_ALLOCATE_RESULT_IS_ZERO : 0;
598
599 portSyncSpinlockAcquire(pPma->pPmaLock);
600
601 if (contigFlag)
602 {
603 allocCount = 1;
604 allocSize = allocationCount * pageSize;
605 status = _pmaNumaAllocateRange(pPma, numaNodeId, allocSize, pageSize, pPages, bScrubOnAlloc, allowEvict, regionList, &finalAllocatedCount);
606 }
607 else
608 {
609 allocCount = allocationCount;
610 allocSize = pageSize;
611 status = _pmaNumaAllocatePages(pPma, numaNodeId, (NvU32) allocSize, allocCount, pPages, bScrubOnAlloc, allowEvict, regionList, &finalAllocatedCount);
612 }
613
614 if ((status == NV_ERR_NO_MEMORY) && partialFlag && (finalAllocatedCount > 0))
615 {
616 status = NV_OK;
617 }
618
619 if (status == NV_OK)
620 {
621 NvU32 regId;
622 void *pMap = NULL;
623 NvU64 regAddrBase;
624 NvU64 frameOffset;
625 NvU64 frameCount = 0;
626 PMA_PAGESTATUS curStatus = STATE_FREE;
627 PMA_PAGESTATUS allocOption = !!(flags & PMA_ALLOCATE_PINNED) ?
628 STATE_PIN : STATE_UNPIN;
629
630 NV_PRINTF(LEVEL_INFO, "SUCCESS allocCount %lld, allocsize %lld eviction? %s pinned ? %s contig? %s\n",
631 (NvU64) allocCount,(NvU64) allocSize, (flags & PMA_ALLOCATE_DONT_EVICT) ? "NOTALLOWED" : "ALLOWED",
632 !!(flags & PMA_ALLOCATE_PINNED) ? "PINNED" : "UNPINNED", contigFlag ? "CONTIG":"DISCONTIG");
633
634 for (i = 0; i < finalAllocatedCount; i++)
635 {
636 NvU32 j;
637
638 regId = findRegionID(pPma, pPages[i]);
639 pMap = pPma->pRegions[regId];
640 regAddrBase = pPma->pRegDescriptors[regId]->base;
641 frameCount = allocSize >> PMA_PAGE_SHIFT;
642
643 for (j = 0; j < frameCount; j++)
644 {
645 frameOffset = PMA_ADDR2FRAME(pPages[i], regAddrBase) + j;
646
647 curStatus = pPma->pMapInfo->pmaMapRead(pMap, frameOffset, NV_TRUE);
648
649 if (curStatus & ATTRIB_EVICTING)
650 {
651 status = NV_ERR_NO_MEMORY;
652 break;
653 }
654 pPma->pMapInfo->pmaMapChangeStateAttrib(pMap, frameOffset, allocOption, MAP_MASK);
655 }
656 if (status != NV_OK)
657 break;
658 }
659
660 pPma->pStatsUpdateCb(pPma->pStatsUpdateCtx, pPma->pmaStats.numFreeFrames);
661
662 if (status == NV_OK)
663 {
664 allocationOptions->numPagesAllocated = (NvLength)finalAllocatedCount;
665 }
666 }
667
668
669 if (status != NV_OK)
670 {
671 NV_PRINTF(LEVEL_INFO, "FAILED allocCount %lld, allocsize %lld eviction? %s pinned ? %s contig? %s\n",
672 (NvU64) allocCount, (NvU64) allocSize, (flags & PMA_ALLOCATE_DONT_EVICT) ? "NOTALLOWED" : "ALLOWED",
673 !!(flags & PMA_ALLOCATE_PINNED) ? "PINNED" : "UNPINNED", contigFlag ? "CONTIG":"DISCONTIG");
674 //
675 // Free the entire allocation if scrubbing failed or if we had allocated evicting allocations.
676 // Evicting allocation will be handled in the pmaEvictContiguous
677 //
678 if (finalAllocatedCount > 0)
679 pmaNumaFreeInternal(pPma, pPages, finalAllocatedCount, pageSize, 0);
680
681 status = NV_ERR_NO_MEMORY;
682 }
683
684 portSyncSpinlockRelease(pPma->pPmaLock);
685
686 if (pPma->bScrubOnFree && !bSkipScrubFlag)
687 {
688 portSyncRwLockReleaseRead(pPma->pScrubberValidLock);
689 portSyncMutexRelease(pPma->pAllocLock);
690 }
691
692 return status;
693 }
694
pmaNumaFreeInternal(PMA * pPma,NvU64 * pPages,NvU64 pageCount,NvU64 size,NvU32 flag)695 void pmaNumaFreeInternal
696 (
697 PMA *pPma,
698 NvU64 *pPages,
699 NvU64 pageCount,
700 NvU64 size,
701 NvU32 flag
702 )
703 {
704 NvU64 i, j;
705 NvU8 osPageShift = osGetPageShift();
706
707 NV_ASSERT_OR_RETURN_VOID(PMA_PAGE_SHIFT >= osPageShift);
708
709 NV_PRINTF(LEVEL_INFO, "Freeing pPage[0] = %llx pageCount %lld\n", pPages[0], pageCount);
710
711 for (i = 0; i < pageCount; i++)
712 {
713 NvU32 regId;
714 NvU64 addrBase;
715 NvU64 sysPhysAddr = 0;
716 NvU64 frameNum;
717 NvU64 framesPerPage;
718
719 // Shift the GPA to acquire the bus address (SPA)
720 NV_ASSERT(pPages[i] < pPma->coherentCpuFbSize);
721
722 regId = findRegionID(pPma, pPages[i]);
723 addrBase = pPma->pRegDescriptors[regId]->base;
724 frameNum = PMA_ADDR2FRAME(pPages[i], addrBase);
725 framesPerPage = size >> PMA_PAGE_SHIFT;
726 sysPhysAddr = pPages[i] + pPma->coherentCpuFbBase;
727
728 for (j = 0; j < framesPerPage; j++)
729 {
730 PMA_PAGESTATUS newStatus = STATE_FREE;
731 PMA_PAGESTATUS currentStatus;
732 NvU64 sysPagePhysAddr = 0;
733 currentStatus = pPma->pMapInfo->pmaMapRead(pPma->pRegions[regId], (frameNum + j), NV_TRUE);
734
735 //
736 // When the pages are marked for evicting, we will skip free the page to OS
737 // in order to reuse the page.
738 //
739 if (currentStatus & ATTRIB_EVICTING)
740 {
741 //
742 // Evicting allocations are returned to new client and will be freed later.
743 // We set the ATTRIB_NUMA_REUSE bit here just in case eviction fails later and we
744 // need to release the page to OS in the allocation path.
745 //
746 if (currentStatus & STATE_UNPIN)
747 {
748 pPma->pMapInfo->pmaMapChangeStateAttrib(pPma->pRegions[regId], (frameNum + j),
749 ATTRIB_NUMA_REUSE, ATTRIB_NUMA_REUSE);
750 }
751 continue;
752 }
753 sysPagePhysAddr = sysPhysAddr + (j << PMA_PAGE_SHIFT);
754 osAllocReleasePage(sysPagePhysAddr, 1 << (PMA_PAGE_SHIFT - osPageShift));
755 pPma->pMapInfo->pmaMapChangeStateAttrib(pPma->pRegions[regId], (frameNum + j), newStatus, ~ATTRIB_EVICTING);
756 }
757 }
758
759 pPma->pStatsUpdateCb(pPma->pStatsUpdateCtx, pPma->pmaStats.numFreeFrames);
760 }
761
pmaNumaSetReclaimSkipThreshold(PMA * pPma,NvU32 skipReclaimPercent)762 void pmaNumaSetReclaimSkipThreshold(PMA *pPma, NvU32 skipReclaimPercent)
763 {
764 portSyncSpinlockAcquire(pPma->pPmaLock);
765 pPma->numaReclaimSkipThreshold = skipReclaimPercent;
766 portSyncSpinlockRelease(pPma->pPmaLock);
767 }
768