xref: /reactos/ntoskrnl/cache/section/swapout.c (revision 8c2e9189)
1 /*
2  * Copyright (C) 1998-2005 ReactOS Team (and the authors from the programmers section)
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License
6  * as published by the Free Software Foundation; either version 2
7  * of the License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
17  *
18  *
19  * PROJECT:         ReactOS kernel
20  * FILE:            ntoskrnl/cache/section/swapout.c
21  * PURPOSE:         Consolidate fault handlers for sections
22  *
23  * PROGRAMMERS:     Arty
24  *                  Rex Jolliff
25  *                  David Welch
26  *                  Eric Kohl
27  *                  Emanuele Aliberti
28  *                  Eugene Ingerman
29  *                  Casper Hornstrup
30  *                  KJK::Hyperion
31  *                  Guido de Jong
32  *                  Ge van Geldorp
33  *                  Royce Mitchell III
34  *                  Filip Navara
35  *                  Aleksey Bragin
36  *                  Jason Filby
37  *                  Thomas Weidenmueller
38  *                  Gunnar Andre' Dalsnes
39  *                  Mike Nordell
40  *                  Alex Ionescu
41  *                  Gregor Anich
42  *                  Steven Edwards
43  *                  Herve Poussineau
44  */
45 
46 /*
47 
48 This file implements page out infrastructure for cache type sections.  This
49 is implemented a little differently from the legacy mm because mapping in an
50 address space and membership in a segment are considered separate.
51 
52 The general strategy here is to try to remove all mappings as gently as
53 possible, then to remove the page entry from the section itself as a final
54 step.  If at any time during the page out operation, the page is mapped in
55 a new address space by a competing thread, the operation will abort before
56 the segment page is finally removed, and the page will be naturally faulted
57 back into any address spaces required in the normal way.
58 
59 */
60 
61 /* INCLUDES *****************************************************************/
62 
63 #include <ntoskrnl.h>
64 #include "newmm.h"
65 #define NDEBUG
66 #include <debug.h>
67 
68 #define DPRINTC DPRINT
69 
70 extern KEVENT MmWaitPageEvent;
71 extern FAST_MUTEX RmapListLock;
72 extern PMMWSL MmWorkingSetList;
73 
74 FAST_MUTEX MiGlobalPageOperation;
75 
76 /*
77 
78 MmWithdrawSectionPage removes a page entry from the section segment, replacing
79 it with a wait entry.  The caller must replace the wait entry with a 0, when
80 any required writing is done.  The wait entry must remain until the page is
81 written to protect against cases where a fault brings a stale copy of the page
82 back before writing is complete.
83 
84 */
85 PFN_NUMBER
86 NTAPI
87 MmWithdrawSectionPage(PMM_SECTION_SEGMENT Segment,
88                       PLARGE_INTEGER FileOffset,
89                       BOOLEAN *Dirty)
90 {
91     ULONG_PTR Entry;
92 
93     DPRINT("MmWithdrawSectionPage(%p,%08x%08x,%p)\n",
94            Segment,
95            FileOffset->HighPart,
96            FileOffset->LowPart,
97            Dirty);
98 
99     MmLockSectionSegment(Segment);
100     Entry = MmGetPageEntrySectionSegment(Segment, FileOffset);
101 
102     *Dirty = !!IS_DIRTY_SSE(Entry);
103 
104     DPRINT("Withdraw %x (%x) of %wZ\n",
105            FileOffset->LowPart,
106            Entry,
107            Segment->FileObject ? &Segment->FileObject->FileName : NULL);
108 
109     if (!Entry)
110     {
111         DPRINT("Stoeled!\n");
112         MmUnlockSectionSegment(Segment);
113         return 0;
114     }
115     else if (MM_IS_WAIT_PTE(Entry))
116     {
117         DPRINT("WAIT\n");
118         MmUnlockSectionSegment(Segment);
119         return MM_WAIT_ENTRY;
120     }
121     else if (Entry && !IS_SWAP_FROM_SSE(Entry))
122     {
123         DPRINT("Page %x\n", PFN_FROM_SSE(Entry));
124 
125         *Dirty |= (Entry & 2);
126 
127         MmSetPageEntrySectionSegment(Segment,
128                                      FileOffset,
129                                      MAKE_SWAP_SSE(MM_WAIT_ENTRY));
130 
131         MmUnlockSectionSegment(Segment);
132         return PFN_FROM_SSE(Entry);
133     }
134     else
135     {
136         DPRINT1("SWAP ENTRY?! (%p:%08x%08x)\n",
137                 Segment,
138                 FileOffset->HighPart,
139                 FileOffset->LowPart);
140 
141         ASSERT(FALSE);
142         MmUnlockSectionSegment(Segment);
143         return 0;
144     }
145 }
146 
147 /*
148 
149 This function determines whether the segment holds the very last reference to
150 the page being considered and if so, writes it back or discards it as
151 approriate.  One small niggle here is that we might be holding the last
152 reference to the section segment associated with this page.  That happens
153 when the segment is destroyed at the same time that an active swap operation
154 is occurring, and all maps were already withdrawn.  In that case, it's our
155 responsiblity for finalizing the segment.
156 
157 Note that in the current code, WriteZero is always TRUE because the section
158 always backs a file.  In the ultimate form of this code, it also writes back
159 pages without necessarily evicting them.  In reactos' trunk, this is vestigal.
160 
161 */
162 
163 NTSTATUS
164 NTAPI
165 MmFinalizeSectionPageOut(PMM_SECTION_SEGMENT Segment,
166                          PLARGE_INTEGER FileOffset,
167                          PFN_NUMBER Page,
168                          BOOLEAN Dirty)
169 {
170     NTSTATUS Status = STATUS_SUCCESS;
171     BOOLEAN WriteZero = FALSE, WritePage = FALSE;
172     SWAPENTRY Swap = MmGetSavedSwapEntryPage(Page);
173 
174     /* Bail early if the reference count isn't where we need it */
175     if (MmGetReferenceCountPage(Page) != 1)
176     {
177         DPRINT1("Cannot page out locked page %x with ref count %lu\n",
178                 Page,
179                 MmGetReferenceCountPage(Page));
180         return STATUS_UNSUCCESSFUL;
181     }
182 
183     MmLockSectionSegment(Segment);
184     (void)InterlockedIncrementUL(&Segment->ReferenceCount);
185 
186     if (Dirty)
187     {
188         DPRINT("Finalize (dirty) Segment %p Page %x\n", Segment, Page);
189         DPRINT("Segment->FileObject %p\n", Segment->FileObject);
190         DPRINT("Segment->Flags %x\n", Segment->Flags);
191 
192         WriteZero = TRUE;
193         WritePage = TRUE;
194     }
195     else
196     {
197         WriteZero = TRUE;
198     }
199 
200     DPRINT("Status %x\n", Status);
201 
202     MmUnlockSectionSegment(Segment);
203 
204     if (WritePage)
205     {
206         DPRINT("MiWriteBackPage(Segment %p FileObject %p Offset %x)\n",
207                Segment,
208                Segment->FileObject,
209                FileOffset->LowPart);
210 
211         Status = MiWriteBackPage(Segment->FileObject,
212                                  FileOffset,
213                                  PAGE_SIZE,
214                                  Page);
215     }
216 
217     MmLockSectionSegment(Segment);
218 
219     if (WriteZero && NT_SUCCESS(Status))
220     {
221         DPRINT("Setting page entry in segment %p:%x to swap %x\n",
222                Segment,
223                FileOffset->LowPart,
224                Swap);
225 
226         MmSetPageEntrySectionSegment(Segment,
227                                      FileOffset,
228                                      Swap ? MAKE_SWAP_SSE(Swap) : 0);
229     }
230     else
231     {
232         DPRINT("Setting page entry in segment %p:%x to page %x\n",
233                Segment,
234                FileOffset->LowPart,
235                Page);
236 
237         MmSetPageEntrySectionSegment(Segment,
238                                      FileOffset,
239                                      Page ? (Dirty ? DIRTY_SSE(MAKE_PFN_SSE(Page)) : MAKE_PFN_SSE(Page)) : 0);
240     }
241 
242     if (NT_SUCCESS(Status))
243     {
244         DPRINT("Removing page %x for real\n", Page);
245         MmSetSavedSwapEntryPage(Page, 0);
246         MmReleasePageMemoryConsumer(MC_CACHE, Page);
247     }
248 
249     MmUnlockSectionSegment(Segment);
250 
251     if (InterlockedDecrementUL(&Segment->ReferenceCount) == 0)
252     {
253         MmFinalizeSegment(Segment);
254     }
255 
256     /* Note: Writing may evict the segment... Nothing is guaranteed from here down */
257     MiSetPageEvent(Segment, (ULONG_PTR)FileOffset->QuadPart);
258 
259     DPRINT("Status %x\n", Status);
260     return Status;
261 }
262 
263 /*
264 
265 The slightly misnamed MmPageOutCacheSection removes a page from an address
266 space in the manner of fault handlers found in fault.c.  In the ultimate form
267 of the code, this is one of the function pointers stored in a memory area
268 to control how pages in that memory area are managed.
269 
270 Also misleading is the call to MmReleasePageMemoryConsumer, which releases
271 the reference held by this address space only.  After all address spaces
272 have had MmPageOutCacheSection succeed on them for the indicated page,
273 then paging out of a cache page can continue.
274 
275 */
276 
277 NTSTATUS
278 NTAPI
279 MmPageOutCacheSection(PMMSUPPORT AddressSpace,
280                       MEMORY_AREA* MemoryArea,
281                       PVOID Address,
282                       PBOOLEAN Dirty,
283                       PMM_REQUIRED_RESOURCES Required)
284 {
285     ULONG_PTR Entry;
286     PFN_NUMBER OurPage;
287     PEPROCESS Process = MmGetAddressSpaceOwner(AddressSpace);
288     LARGE_INTEGER TotalOffset;
289     PMM_SECTION_SEGMENT Segment;
290     PVOID PAddress = MM_ROUND_DOWN(Address, PAGE_SIZE);
291 
292     TotalOffset.QuadPart = (ULONG_PTR)PAddress -
293                            MA_GetStartingAddress(MemoryArea) +
294                            MemoryArea->Data.SectionData.ViewOffset.QuadPart;
295 
296     Segment = MemoryArea->Data.SectionData.Segment;
297 
298     MmLockSectionSegment(Segment);
299     ASSERT(KeGetCurrentIrql() <= APC_LEVEL);
300 
301     Entry = MmGetPageEntrySectionSegment(Segment, &TotalOffset);
302     DBG_UNREFERENCED_LOCAL_VARIABLE(Entry);
303 
304     if (MmIsPageSwapEntry(Process, PAddress))
305     {
306         SWAPENTRY SwapEntry;
307         MmGetPageFileMapping(Process, PAddress, &SwapEntry);
308         MmUnlockSectionSegment(Segment);
309         return SwapEntry == MM_WAIT_ENTRY ? STATUS_SUCCESS + 1 : STATUS_UNSUCCESSFUL;
310     }
311 
312     MmDeleteRmap(Required->Page[0], Process, Address);
313     MmDeleteVirtualMapping(Process, Address, Dirty, &OurPage);
314     ASSERT(OurPage == Required->Page[0]);
315 
316     /* Note: this releases the reference held by this address space only. */
317     MmReleasePageMemoryConsumer(MC_CACHE, Required->Page[0]);
318 
319     MmUnlockSectionSegment(Segment);
320     MiSetPageEvent(Process, Address);
321     return STATUS_SUCCESS;
322 }
323 
324 /*
325 
326 This function is called by rmap when spare pages are needed by the blancer.
327 It attempts first to release the page from every address space in which it
328 appears, and, after a final check that no competing thread has mapped the
329 page again, uses MmFinalizeSectionPageOut to completely evict the page.  If
330 that's successful, then a suitable non-page map will be left in the segment
331 page table, otherwise, the original page is replaced in the section page
332 map.  Failure may result from a variety of conditions, but always leaves
333 the page mapped.
334 
335 This code is like the other fault handlers, in that MmPageOutCacheSection has
336 the option of returning either STATUS_SUCCESS + 1 to wait for a wait entry
337 to disppear or to use the blocking callout facility by returning
338 STATUS_MORE_PROCESSING_REQUIRED and placing a pointer to a function from
339 reqtools.c in the MM_REQUIRED_RESOURCES struct.
340 
341 */
342 
343 NTSTATUS
344 NTAPI
345 MmpPageOutPhysicalAddress(PFN_NUMBER Page)
346 {
347     BOOLEAN ProcRef = FALSE, PageDirty;
348     PFN_NUMBER SectionPage = 0;
349     PMM_RMAP_ENTRY entry;
350     PMM_SECTION_SEGMENT Segment = NULL;
351     LARGE_INTEGER FileOffset;
352     PMEMORY_AREA MemoryArea;
353     PMMSUPPORT AddressSpace = NULL;
354     BOOLEAN Dirty = FALSE;
355     PVOID Address = NULL;
356     PEPROCESS Process = NULL;
357     NTSTATUS Status = STATUS_SUCCESS;
358     MM_REQUIRED_RESOURCES Resources = { 0 };
359 
360     DPRINTC("Page out %x (ref ct %x)\n", Page, MmGetReferenceCountPage(Page));
361 
362     ExAcquireFastMutex(&MiGlobalPageOperation);
363     if ((Segment = MmGetSectionAssociation(Page, &FileOffset)))
364     {
365         DPRINTC("Withdrawing page (%x) %p:%x\n",
366                 Page,
367                 Segment,
368                 FileOffset.LowPart);
369 
370         SectionPage = MmWithdrawSectionPage(Segment, &FileOffset, &Dirty);
371         DPRINTC("SectionPage %x\n", SectionPage);
372 
373         if (SectionPage == MM_WAIT_ENTRY || SectionPage == 0)
374         {
375             DPRINT1("In progress page out %x\n", SectionPage);
376             ExReleaseFastMutex(&MiGlobalPageOperation);
377             return STATUS_UNSUCCESSFUL;
378         }
379         else
380         {
381             ASSERT(SectionPage == Page);
382         }
383         Resources.State = Dirty ? 1 : 0;
384     }
385     else
386     {
387         DPRINT("No segment association for %x\n", Page);
388     }
389 
390     Dirty = MmIsDirtyPageRmap(Page);
391 
392     DPRINTC("Trying to unmap all instances of %x\n", Page);
393     ExAcquireFastMutex(&RmapListLock);
394     entry = MmGetRmapListHeadPage(Page);
395 
396     // Entry and Segment might be null here in the case that the page
397     // is new and is in the process of being swapped in
398     if (!entry && !Segment)
399     {
400         Status = STATUS_UNSUCCESSFUL;
401         DPRINT1("Page %x is in transit\n", Page);
402         ExReleaseFastMutex(&RmapListLock);
403         goto bail;
404     }
405 
406     while (entry != NULL && NT_SUCCESS(Status))
407     {
408         Process = entry->Process;
409         Address = entry->Address;
410 
411         DPRINTC("Process %p Address %p Page %x\n", Process, Address, Page);
412 
413         if (RMAP_IS_SEGMENT(Address))
414         {
415             entry = entry->Next;
416             continue;
417         }
418 
419         if (Process && Address < MmSystemRangeStart)
420         {
421             /* Make sure we don't try to page out part of an exiting process */
422             if (PspIsProcessExiting(Process))
423             {
424                 DPRINT("bail\n");
425                 ExReleaseFastMutex(&RmapListLock);
426                 goto bail;
427             }
428             ObReferenceObject(Process);
429             ProcRef = TRUE;
430             AddressSpace = &Process->Vm;
431         }
432         else
433         {
434             AddressSpace = MmGetKernelAddressSpace();
435         }
436         ExReleaseFastMutex(&RmapListLock);
437 
438         RtlZeroMemory(&Resources, sizeof(Resources));
439 
440         if ((((ULONG_PTR)Address) & 0xFFF) != 0)
441         {
442             KeBugCheck(MEMORY_MANAGEMENT);
443         }
444 
445         do
446         {
447             MmLockAddressSpace(AddressSpace);
448 
449             MemoryArea = MmLocateMemoryAreaByAddress(AddressSpace, Address);
450             if (MemoryArea == NULL || MemoryArea->DeleteInProgress)
451             {
452                 Status = STATUS_UNSUCCESSFUL;
453                 MmUnlockAddressSpace(AddressSpace);
454                 DPRINTC("bail\n");
455                 goto bail;
456             }
457 
458             DPRINTC("Type %x (%p -> %p)\n",
459                     MemoryArea->Type,
460                     MA_GetStartingAddress(MemoryArea),
461                     MA_GetEndingAddress(MemoryArea));
462 
463             Resources.DoAcquisition = NULL;
464             Resources.Page[0] = Page;
465 
466             ASSERT(KeGetCurrentIrql() <= APC_LEVEL);
467 
468             DPRINT("%p:%p, page %x %x\n",
469                    Process,
470                    Address,
471                    Page,
472                    Resources.Page[0]);
473 
474             PageDirty = FALSE;
475 
476             Status = MmPageOutCacheSection(AddressSpace,
477                                            MemoryArea,
478                                            Address,
479                                            &PageDirty,
480                                            &Resources);
481 
482             Dirty |= PageDirty;
483             DPRINT("%x\n", Status);
484 
485             ASSERT(KeGetCurrentIrql() <= APC_LEVEL);
486 
487             MmUnlockAddressSpace(AddressSpace);
488 
489             if (Status == STATUS_SUCCESS + 1)
490             {
491                 // Wait page ... the other guy has it, so we'll just fail for now
492                 DPRINT1("Wait entry ... can't continue\n");
493                 Status = STATUS_UNSUCCESSFUL;
494                 goto bail;
495             }
496             else if (Status == STATUS_MORE_PROCESSING_REQUIRED)
497             {
498                 DPRINTC("DoAcquisition %p\n", Resources.DoAcquisition);
499 
500                 Status = Resources.DoAcquisition(AddressSpace,
501                                                  MemoryArea,
502                                                  &Resources);
503 
504                 DPRINTC("Status %x\n", Status);
505                 if (!NT_SUCCESS(Status))
506                 {
507                     DPRINT1("bail\n");
508                     goto bail;
509                 }
510                 else
511                 {
512                     Status = STATUS_MM_RESTART_OPERATION;
513                 }
514             }
515         }
516         while (Status == STATUS_MM_RESTART_OPERATION);
517 
518         if (ProcRef)
519         {
520             ObDereferenceObject(Process);
521             ProcRef = FALSE;
522         }
523 
524         ExAcquireFastMutex(&RmapListLock);
525         ASSERT(!MM_IS_WAIT_PTE(MmGetPfnForProcess(Process, Address)));
526         entry = MmGetRmapListHeadPage(Page);
527 
528         DPRINTC("Entry %p\n", entry);
529     }
530 
531     ExReleaseFastMutex(&RmapListLock);
532 
533 bail:
534     DPRINTC("BAIL %x\n", Status);
535 
536     if (Segment)
537     {
538         ULONG RefCount;
539 
540         DPRINTC("About to finalize section page %x (%p:%x) Status %x %s\n",
541                 Page,
542                 Segment,
543                 FileOffset.LowPart,
544                 Status,
545                 Dirty ? "dirty" : "clean");
546 
547         if (!NT_SUCCESS(Status) ||
548             !NT_SUCCESS(Status = MmFinalizeSectionPageOut(Segment,
549                                                           &FileOffset,
550                                                           Page,
551                                                           Dirty)))
552         {
553             DPRINTC("Failed to page out %x, replacing %x at %x in segment %x\n",
554                     SectionPage,
555                     FileOffset.LowPart,
556                     Segment);
557 
558             MmLockSectionSegment(Segment);
559 
560             MmSetPageEntrySectionSegment(Segment,
561                                          &FileOffset,
562                                          Dirty ? MAKE_PFN_SSE(Page) : DIRTY_SSE(MAKE_PFN_SSE(Page)));
563 
564             MmUnlockSectionSegment(Segment);
565         }
566 
567         /* Alas, we had the last reference */
568         if ((RefCount = InterlockedDecrementUL(&Segment->ReferenceCount)) == 0)
569             MmFinalizeSegment(Segment);
570     }
571 
572     if (ProcRef)
573     {
574         DPRINTC("Dereferencing process...\n");
575         ObDereferenceObject(Process);
576     }
577 
578     ExReleaseFastMutex(&MiGlobalPageOperation);
579 
580     DPRINTC("%s %x %x\n",
581             NT_SUCCESS(Status) ? "Evicted" : "Spared",
582             Page,
583             Status);
584 
585     return NT_SUCCESS(Status) ? STATUS_SUCCESS : STATUS_UNSUCCESSFUL;
586 }
587 
588 ULONG
589 NTAPI
590 MiCacheEvictPages(PMM_SECTION_SEGMENT Segment,
591                   ULONG Target)
592 {
593     ULONG_PTR Entry;
594     ULONG Result = 0, i, j;
595     NTSTATUS Status;
596     PFN_NUMBER Page;
597     LARGE_INTEGER Offset;
598 
599     MmLockSectionSegment(Segment);
600 
601     for (i = 0; i < RtlNumberGenericTableElements(&Segment->PageTable); i++) {
602 
603         PCACHE_SECTION_PAGE_TABLE Element = RtlGetElementGenericTable(&Segment->PageTable,
604                                                                       i);
605 
606         ASSERT(Element);
607 
608         Offset = Element->FileOffset;
609         for (j = 0; j < ENTRIES_PER_ELEMENT; j++, Offset.QuadPart += PAGE_SIZE) {
610             Entry = MmGetPageEntrySectionSegment(Segment, &Offset);
611             if (Entry && !IS_SWAP_FROM_SSE(Entry)) {
612                 Page = PFN_FROM_SSE(Entry);
613                 MmUnlockSectionSegment(Segment);
614                 Status = MmpPageOutPhysicalAddress(Page);
615                 if (NT_SUCCESS(Status))
616                     Result++;
617                 MmLockSectionSegment(Segment);
618             }
619         }
620     }
621 
622     MmUnlockSectionSegment(Segment);
623 
624     return Result;
625 }
626 
627 extern LIST_ENTRY MiSegmentList;
628 
629 // Interact with legacy balance manager for now
630 // This can fall away when our section implementation supports
631 // demand paging properly
632 NTSTATUS
633 MiRosTrimCache(ULONG Target,
634                ULONG Priority,
635                PULONG NrFreed)
636 {
637     ULONG Freed;
638     PLIST_ENTRY Entry;
639     PMM_SECTION_SEGMENT Segment;
640     *NrFreed = 0;
641 
642     DPRINT1("Need to trim %lu cache pages\n", Target);
643     for (Entry = MiSegmentList.Flink;
644          *NrFreed < Target && Entry != &MiSegmentList;
645          Entry = Entry->Flink) {
646         Segment = CONTAINING_RECORD(Entry, MM_SECTION_SEGMENT, ListOfSegments);
647         /* Defer to MM to try recovering pages from it */
648         Freed = MiCacheEvictPages(Segment, Target);
649         *NrFreed += Freed;
650     }
651     DPRINT1("Evicted %lu cache pages\n", Target);
652 
653     if (!IsListEmpty(&MiSegmentList)) {
654         Entry = MiSegmentList.Flink;
655         RemoveEntryList(Entry);
656         InsertTailList(&MiSegmentList, Entry);
657     }
658 
659     return STATUS_SUCCESS;
660 }
661