1 /*
2 * Copyright (C) 1998-2005 ReactOS Team (and the authors from the programmers section)
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version 2
7 * of the License, or (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 *
18 *
19 * PROJECT: ReactOS kernel
20 * FILE: ntoskrnl/cache/section/swapout.c
21 * PURPOSE: Consolidate fault handlers for sections
22 *
23 * PROGRAMMERS: Arty
24 * Rex Jolliff
25 * David Welch
26 * Eric Kohl
27 * Emanuele Aliberti
28 * Eugene Ingerman
29 * Casper Hornstrup
30 * KJK::Hyperion
31 * Guido de Jong
32 * Ge van Geldorp
33 * Royce Mitchell III
34 * Filip Navara
35 * Aleksey Bragin
36 * Jason Filby
37 * Thomas Weidenmueller
38 * Gunnar Andre' Dalsnes
39 * Mike Nordell
40 * Alex Ionescu
41 * Gregor Anich
42 * Steven Edwards
43 * Herve Poussineau
44 */
45
46 /*
47
48 This file implements page out infrastructure for cache type sections. This
49 is implemented a little differently from the legacy mm because mapping in an
50 address space and membership in a segment are considered separate.
51
52 The general strategy here is to try to remove all mappings as gently as
53 possible, then to remove the page entry from the section itself as a final
54 step. If at any time during the page out operation, the page is mapped in
55 a new address space by a competing thread, the operation will abort before
56 the segment page is finally removed, and the page will be naturally faulted
57 back into any address spaces required in the normal way.
58
59 */
60
61 /* INCLUDES *****************************************************************/
62
63 #include <ntoskrnl.h>
64 #include "newmm.h"
65 #define NDEBUG
66 #include <debug.h>
67
68 #define DPRINTC DPRINT
69
70 extern KEVENT MmWaitPageEvent;
71 extern FAST_MUTEX RmapListLock;
72 extern PMMWSL MmWorkingSetList;
73
74 FAST_MUTEX MiGlobalPageOperation;
75
76 /*
77
78 MmWithdrawSectionPage removes a page entry from the section segment, replacing
79 it with a wait entry. The caller must replace the wait entry with a 0, when
80 any required writing is done. The wait entry must remain until the page is
81 written to protect against cases where a fault brings a stale copy of the page
82 back before writing is complete.
83
84 */
85 PFN_NUMBER
86 NTAPI
MmWithdrawSectionPage(PMM_SECTION_SEGMENT Segment,PLARGE_INTEGER FileOffset,BOOLEAN * Dirty)87 MmWithdrawSectionPage(PMM_SECTION_SEGMENT Segment,
88 PLARGE_INTEGER FileOffset,
89 BOOLEAN *Dirty)
90 {
91 ULONG_PTR Entry;
92
93 DPRINT("MmWithdrawSectionPage(%p,%08x%08x,%p)\n",
94 Segment,
95 FileOffset->HighPart,
96 FileOffset->LowPart,
97 Dirty);
98
99 MmLockSectionSegment(Segment);
100 Entry = MmGetPageEntrySectionSegment(Segment, FileOffset);
101
102 *Dirty = !!IS_DIRTY_SSE(Entry);
103
104 DPRINT("Withdraw %x (%x) of %wZ\n",
105 FileOffset->LowPart,
106 Entry,
107 Segment->FileObject ? &Segment->FileObject->FileName : NULL);
108
109 if (!Entry)
110 {
111 DPRINT("Stoeled!\n");
112 MmUnlockSectionSegment(Segment);
113 return 0;
114 }
115 else if (MM_IS_WAIT_PTE(Entry))
116 {
117 DPRINT("WAIT\n");
118 MmUnlockSectionSegment(Segment);
119 return MM_WAIT_ENTRY;
120 }
121 else if (Entry && !IS_SWAP_FROM_SSE(Entry))
122 {
123 DPRINT("Page %x\n", PFN_FROM_SSE(Entry));
124
125 *Dirty |= (Entry & 2);
126
127 MmSetPageEntrySectionSegment(Segment,
128 FileOffset,
129 MAKE_SWAP_SSE(MM_WAIT_ENTRY));
130
131 MmUnlockSectionSegment(Segment);
132 return PFN_FROM_SSE(Entry);
133 }
134 else
135 {
136 DPRINT1("SWAP ENTRY?! (%p:%08x%08x)\n",
137 Segment,
138 FileOffset->HighPart,
139 FileOffset->LowPart);
140
141 ASSERT(FALSE);
142 MmUnlockSectionSegment(Segment);
143 return 0;
144 }
145 }
146
147 /*
148
149 This function determines whether the segment holds the very last reference to
150 the page being considered and if so, writes it back or discards it as
151 approriate. One small niggle here is that we might be holding the last
152 reference to the section segment associated with this page. That happens
153 when the segment is destroyed at the same time that an active swap operation
154 is occurring, and all maps were already withdrawn. In that case, it's our
155 responsiblity for finalizing the segment.
156
157 Note that in the current code, WriteZero is always TRUE because the section
158 always backs a file. In the ultimate form of this code, it also writes back
159 pages without necessarily evicting them. In reactos' trunk, this is vestigal.
160
161 */
162
163 NTSTATUS
164 NTAPI
MmFinalizeSectionPageOut(PMM_SECTION_SEGMENT Segment,PLARGE_INTEGER FileOffset,PFN_NUMBER Page,BOOLEAN Dirty)165 MmFinalizeSectionPageOut(PMM_SECTION_SEGMENT Segment,
166 PLARGE_INTEGER FileOffset,
167 PFN_NUMBER Page,
168 BOOLEAN Dirty)
169 {
170 NTSTATUS Status = STATUS_SUCCESS;
171 BOOLEAN WriteZero = FALSE, WritePage = FALSE;
172 SWAPENTRY Swap = MmGetSavedSwapEntryPage(Page);
173
174 /* Bail early if the reference count isn't where we need it */
175 if (MmGetReferenceCountPageWithoutLock(Page) != 1)
176 {
177 DPRINT1("Cannot page out locked page %x with ref count %lu\n",
178 Page,
179 MmGetReferenceCountPageWithoutLock(Page));
180 return STATUS_UNSUCCESSFUL;
181 }
182
183 MmLockSectionSegment(Segment);
184 (void)InterlockedIncrementUL(&Segment->ReferenceCount);
185
186 if (Dirty)
187 {
188 DPRINT("Finalize (dirty) Segment %p Page %x\n", Segment, Page);
189 DPRINT("Segment->FileObject %p\n", Segment->FileObject);
190 DPRINT("Segment->Flags %x\n", Segment->Flags);
191
192 WriteZero = TRUE;
193 WritePage = TRUE;
194 }
195 else
196 {
197 WriteZero = TRUE;
198 }
199
200 DPRINT("Status %x\n", Status);
201
202 MmUnlockSectionSegment(Segment);
203
204 if (WritePage)
205 {
206 DPRINT("MiWriteBackPage(Segment %p FileObject %p Offset %x)\n",
207 Segment,
208 Segment->FileObject,
209 FileOffset->LowPart);
210
211 Status = MiWriteBackPage(Segment->FileObject,
212 FileOffset,
213 PAGE_SIZE,
214 Page);
215 }
216
217 MmLockSectionSegment(Segment);
218
219 if (WriteZero && NT_SUCCESS(Status))
220 {
221 DPRINT("Setting page entry in segment %p:%x to swap %x\n",
222 Segment,
223 FileOffset->LowPart,
224 Swap);
225
226 MmSetPageEntrySectionSegment(Segment,
227 FileOffset,
228 Swap ? MAKE_SWAP_SSE(Swap) : 0);
229 }
230 else
231 {
232 DPRINT("Setting page entry in segment %p:%x to page %x\n",
233 Segment,
234 FileOffset->LowPart,
235 Page);
236
237 MmSetPageEntrySectionSegment(Segment,
238 FileOffset,
239 Page ? (Dirty ? DIRTY_SSE(MAKE_PFN_SSE(Page)) : MAKE_PFN_SSE(Page)) : 0);
240 }
241
242 if (NT_SUCCESS(Status))
243 {
244 DPRINT("Removing page %x for real\n", Page);
245 MmSetSavedSwapEntryPage(Page, 0);
246 MmReleasePageMemoryConsumer(MC_CACHE, Page);
247 }
248
249 MmUnlockSectionSegment(Segment);
250
251 if (InterlockedDecrementUL(&Segment->ReferenceCount) == 0)
252 {
253 MmFinalizeSegment(Segment);
254 }
255
256 /* Note: Writing may evict the segment... Nothing is guaranteed from here down */
257 MiSetPageEvent(Segment, (ULONG_PTR)FileOffset->QuadPart);
258
259 DPRINT("Status %x\n", Status);
260 return Status;
261 }
262
263 /*
264
265 The slightly misnamed MmPageOutCacheSection removes a page from an address
266 space in the manner of fault handlers found in fault.c. In the ultimate form
267 of the code, this is one of the function pointers stored in a memory area
268 to control how pages in that memory area are managed.
269
270 Also misleading is the call to MmReleasePageMemoryConsumer, which releases
271 the reference held by this address space only. After all address spaces
272 have had MmPageOutCacheSection succeed on them for the indicated page,
273 then paging out of a cache page can continue.
274
275 */
276
277 NTSTATUS
278 NTAPI
MmPageOutCacheSection(PMMSUPPORT AddressSpace,MEMORY_AREA * MemoryArea,PVOID Address,PBOOLEAN Dirty,PMM_REQUIRED_RESOURCES Required)279 MmPageOutCacheSection(PMMSUPPORT AddressSpace,
280 MEMORY_AREA* MemoryArea,
281 PVOID Address,
282 PBOOLEAN Dirty,
283 PMM_REQUIRED_RESOURCES Required)
284 {
285 ULONG_PTR Entry;
286 PFN_NUMBER OurPage;
287 PEPROCESS Process = MmGetAddressSpaceOwner(AddressSpace);
288 LARGE_INTEGER TotalOffset;
289 PMM_SECTION_SEGMENT Segment;
290 PVOID PAddress = MM_ROUND_DOWN(Address, PAGE_SIZE);
291
292 TotalOffset.QuadPart = (ULONG_PTR)PAddress -
293 MA_GetStartingAddress(MemoryArea) +
294 MemoryArea->Data.SectionData.ViewOffset.QuadPart;
295
296 Segment = MemoryArea->Data.SectionData.Segment;
297
298 MmLockSectionSegment(Segment);
299 ASSERT(KeGetCurrentIrql() <= APC_LEVEL);
300
301 Entry = MmGetPageEntrySectionSegment(Segment, &TotalOffset);
302 DBG_UNREFERENCED_LOCAL_VARIABLE(Entry);
303
304 if (MmIsPageSwapEntry(Process, PAddress))
305 {
306 SWAPENTRY SwapEntry;
307 MmGetPageFileMapping(Process, PAddress, &SwapEntry);
308 MmUnlockSectionSegment(Segment);
309 return SwapEntry == MM_WAIT_ENTRY ? STATUS_SUCCESS + 1 : STATUS_UNSUCCESSFUL;
310 }
311
312 MmDeleteRmap(Required->Page[0], Process, Address);
313 MmDeleteVirtualMapping(Process, Address, Dirty, &OurPage);
314 ASSERT(OurPage == Required->Page[0]);
315
316 /* Note: this releases the reference held by this address space only. */
317 MmReleasePageMemoryConsumer(MC_CACHE, Required->Page[0]);
318
319 MmUnlockSectionSegment(Segment);
320 MiSetPageEvent(Process, Address);
321 return STATUS_SUCCESS;
322 }
323
324 /*
325
326 This function is called by rmap when spare pages are needed by the blancer.
327 It attempts first to release the page from every address space in which it
328 appears, and, after a final check that no competing thread has mapped the
329 page again, uses MmFinalizeSectionPageOut to completely evict the page. If
330 that's successful, then a suitable non-page map will be left in the segment
331 page table, otherwise, the original page is replaced in the section page
332 map. Failure may result from a variety of conditions, but always leaves
333 the page mapped.
334
335 This code is like the other fault handlers, in that MmPageOutCacheSection has
336 the option of returning either STATUS_SUCCESS + 1 to wait for a wait entry
337 to disppear or to use the blocking callout facility by returning
338 STATUS_MORE_PROCESSING_REQUIRED and placing a pointer to a function from
339 reqtools.c in the MM_REQUIRED_RESOURCES struct.
340
341 */
342
343 NTSTATUS
344 NTAPI
MmpPageOutPhysicalAddress(PFN_NUMBER Page)345 MmpPageOutPhysicalAddress(PFN_NUMBER Page)
346 {
347 BOOLEAN ProcRef = FALSE, PageDirty;
348 PFN_NUMBER SectionPage = 0;
349 PMM_RMAP_ENTRY entry;
350 PMM_SECTION_SEGMENT Segment = NULL;
351 LARGE_INTEGER FileOffset;
352 PMEMORY_AREA MemoryArea;
353 PMMSUPPORT AddressSpace = NULL;
354 BOOLEAN Dirty = FALSE;
355 PVOID Address = NULL;
356 PEPROCESS Process = NULL;
357 NTSTATUS Status = STATUS_SUCCESS;
358 MM_REQUIRED_RESOURCES Resources = { 0 };
359
360 DPRINTC("Page out %x (ref ct %x)\n", Page, MmGetReferenceCountPageWithoutLock(Page));
361
362 ExAcquireFastMutex(&MiGlobalPageOperation);
363 if ((Segment = MmGetSectionAssociation(Page, &FileOffset)))
364 {
365 DPRINTC("Withdrawing page (%x) %p:%x\n",
366 Page,
367 Segment,
368 FileOffset.LowPart);
369
370 SectionPage = MmWithdrawSectionPage(Segment, &FileOffset, &Dirty);
371 DPRINTC("SectionPage %x\n", SectionPage);
372
373 if (SectionPage == MM_WAIT_ENTRY || SectionPage == 0)
374 {
375 DPRINT1("In progress page out %x\n", SectionPage);
376 ExReleaseFastMutex(&MiGlobalPageOperation);
377 return STATUS_UNSUCCESSFUL;
378 }
379 else
380 {
381 ASSERT(SectionPage == Page);
382 }
383 Resources.State = Dirty ? 1 : 0;
384 }
385 else
386 {
387 DPRINT("No segment association for %x\n", Page);
388 }
389
390 Dirty = MmIsDirtyPageRmap(Page);
391
392 DPRINTC("Trying to unmap all instances of %x\n", Page);
393 ExAcquireFastMutex(&RmapListLock);
394 entry = MmGetRmapListHeadPage(Page);
395
396 // Entry and Segment might be null here in the case that the page
397 // is new and is in the process of being swapped in
398 if (!entry && !Segment)
399 {
400 Status = STATUS_UNSUCCESSFUL;
401 DPRINT1("Page %x is in transit\n", Page);
402 ExReleaseFastMutex(&RmapListLock);
403 goto bail;
404 }
405
406 while (entry != NULL && NT_SUCCESS(Status))
407 {
408 Process = entry->Process;
409 Address = entry->Address;
410
411 DPRINTC("Process %p Address %p Page %x\n", Process, Address, Page);
412
413 if (RMAP_IS_SEGMENT(Address))
414 {
415 entry = entry->Next;
416 continue;
417 }
418
419 if (Process && Address < MmSystemRangeStart)
420 {
421 /* Make sure we don't try to page out part of an exiting process */
422 if (PspIsProcessExiting(Process))
423 {
424 DPRINT("bail\n");
425 ExReleaseFastMutex(&RmapListLock);
426 goto bail;
427 }
428 ObReferenceObject(Process);
429 ProcRef = TRUE;
430 AddressSpace = &Process->Vm;
431 }
432 else
433 {
434 AddressSpace = MmGetKernelAddressSpace();
435 }
436 ExReleaseFastMutex(&RmapListLock);
437
438 RtlZeroMemory(&Resources, sizeof(Resources));
439
440 if ((((ULONG_PTR)Address) & 0xFFF) != 0)
441 {
442 KeBugCheck(MEMORY_MANAGEMENT);
443 }
444
445 do
446 {
447 MmLockAddressSpace(AddressSpace);
448
449 MemoryArea = MmLocateMemoryAreaByAddress(AddressSpace, Address);
450 if (MemoryArea == NULL || MemoryArea->DeleteInProgress)
451 {
452 Status = STATUS_UNSUCCESSFUL;
453 MmUnlockAddressSpace(AddressSpace);
454 DPRINTC("bail\n");
455 goto bail;
456 }
457
458 DPRINTC("Type %x (%p -> %p)\n",
459 MemoryArea->Type,
460 MA_GetStartingAddress(MemoryArea),
461 MA_GetEndingAddress(MemoryArea));
462
463 Resources.DoAcquisition = NULL;
464 Resources.Page[0] = Page;
465
466 ASSERT(KeGetCurrentIrql() <= APC_LEVEL);
467
468 DPRINT("%p:%p, page %x %x\n",
469 Process,
470 Address,
471 Page,
472 Resources.Page[0]);
473
474 PageDirty = FALSE;
475
476 Status = MmPageOutCacheSection(AddressSpace,
477 MemoryArea,
478 Address,
479 &PageDirty,
480 &Resources);
481
482 Dirty |= PageDirty;
483 DPRINT("%x\n", Status);
484
485 ASSERT(KeGetCurrentIrql() <= APC_LEVEL);
486
487 MmUnlockAddressSpace(AddressSpace);
488
489 if (Status == STATUS_SUCCESS + 1)
490 {
491 // Wait page ... the other guy has it, so we'll just fail for now
492 DPRINT1("Wait entry ... can't continue\n");
493 Status = STATUS_UNSUCCESSFUL;
494 goto bail;
495 }
496 else if (Status == STATUS_MORE_PROCESSING_REQUIRED)
497 {
498 DPRINTC("DoAcquisition %p\n", Resources.DoAcquisition);
499
500 Status = Resources.DoAcquisition(AddressSpace,
501 MemoryArea,
502 &Resources);
503
504 DPRINTC("Status %x\n", Status);
505 if (!NT_SUCCESS(Status))
506 {
507 DPRINT1("bail\n");
508 goto bail;
509 }
510 else
511 {
512 Status = STATUS_MM_RESTART_OPERATION;
513 }
514 }
515 }
516 while (Status == STATUS_MM_RESTART_OPERATION);
517
518 if (ProcRef)
519 {
520 ObDereferenceObject(Process);
521 ProcRef = FALSE;
522 }
523
524 ExAcquireFastMutex(&RmapListLock);
525 ASSERT(!MM_IS_WAIT_PTE(MmGetPfnForProcess(Process, Address)));
526 entry = MmGetRmapListHeadPage(Page);
527
528 DPRINTC("Entry %p\n", entry);
529 }
530
531 ExReleaseFastMutex(&RmapListLock);
532
533 bail:
534 DPRINTC("BAIL %x\n", Status);
535
536 if (Segment)
537 {
538 ULONG RefCount;
539
540 DPRINTC("About to finalize section page %x (%p:%x) Status %x %s\n",
541 Page,
542 Segment,
543 FileOffset.LowPart,
544 Status,
545 Dirty ? "dirty" : "clean");
546
547 if (!NT_SUCCESS(Status) ||
548 !NT_SUCCESS(Status = MmFinalizeSectionPageOut(Segment,
549 &FileOffset,
550 Page,
551 Dirty)))
552 {
553 DPRINTC("Failed to page out %x, replacing %x at %x in segment %x\n",
554 SectionPage,
555 FileOffset.LowPart,
556 Segment);
557
558 MmLockSectionSegment(Segment);
559
560 MmSetPageEntrySectionSegment(Segment,
561 &FileOffset,
562 Dirty ? MAKE_PFN_SSE(Page) : DIRTY_SSE(MAKE_PFN_SSE(Page)));
563
564 MmUnlockSectionSegment(Segment);
565 }
566
567 /* Alas, we had the last reference */
568 if ((RefCount = InterlockedDecrementUL(&Segment->ReferenceCount)) == 0)
569 MmFinalizeSegment(Segment);
570 }
571
572 if (ProcRef)
573 {
574 DPRINTC("Dereferencing process...\n");
575 ObDereferenceObject(Process);
576 }
577
578 ExReleaseFastMutex(&MiGlobalPageOperation);
579
580 DPRINTC("%s %x %x\n",
581 NT_SUCCESS(Status) ? "Evicted" : "Spared",
582 Page,
583 Status);
584
585 return NT_SUCCESS(Status) ? STATUS_SUCCESS : STATUS_UNSUCCESSFUL;
586 }
587
588 ULONG
589 NTAPI
MiCacheEvictPages(PMM_SECTION_SEGMENT Segment,ULONG Target)590 MiCacheEvictPages(PMM_SECTION_SEGMENT Segment,
591 ULONG Target)
592 {
593 ULONG_PTR Entry;
594 ULONG Result = 0, i, j;
595 NTSTATUS Status;
596 PFN_NUMBER Page;
597 LARGE_INTEGER Offset;
598
599 MmLockSectionSegment(Segment);
600
601 for (i = 0; i < RtlNumberGenericTableElements(&Segment->PageTable); i++) {
602
603 PCACHE_SECTION_PAGE_TABLE Element = RtlGetElementGenericTable(&Segment->PageTable,
604 i);
605
606 ASSERT(Element);
607
608 Offset = Element->FileOffset;
609 for (j = 0; j < ENTRIES_PER_ELEMENT; j++, Offset.QuadPart += PAGE_SIZE) {
610 Entry = MmGetPageEntrySectionSegment(Segment, &Offset);
611 if (Entry && !IS_SWAP_FROM_SSE(Entry)) {
612 Page = PFN_FROM_SSE(Entry);
613 MmUnlockSectionSegment(Segment);
614 Status = MmpPageOutPhysicalAddress(Page);
615 if (NT_SUCCESS(Status))
616 Result++;
617 MmLockSectionSegment(Segment);
618 }
619 }
620 }
621
622 MmUnlockSectionSegment(Segment);
623
624 return Result;
625 }
626
627 extern LIST_ENTRY MiSegmentList;
628
629 // Interact with legacy balance manager for now
630 // This can fall away when our section implementation supports
631 // demand paging properly
632 NTSTATUS
MiRosTrimCache(ULONG Target,ULONG Priority,PULONG NrFreed)633 MiRosTrimCache(ULONG Target,
634 ULONG Priority,
635 PULONG NrFreed)
636 {
637 ULONG Freed;
638 PLIST_ENTRY Entry;
639 PMM_SECTION_SEGMENT Segment;
640 *NrFreed = 0;
641
642 DPRINT1("Need to trim %lu cache pages\n", Target);
643 for (Entry = MiSegmentList.Flink;
644 *NrFreed < Target && Entry != &MiSegmentList;
645 Entry = Entry->Flink) {
646 Segment = CONTAINING_RECORD(Entry, MM_SECTION_SEGMENT, ListOfSegments);
647 /* Defer to MM to try recovering pages from it */
648 Freed = MiCacheEvictPages(Segment, Target);
649 *NrFreed += Freed;
650 }
651 DPRINT1("Evicted %lu cache pages\n", Target);
652
653 if (!IsListEmpty(&MiSegmentList)) {
654 Entry = MiSegmentList.Flink;
655 RemoveEntryList(Entry);
656 InsertTailList(&MiSegmentList, Entry);
657 }
658
659 return STATUS_SUCCESS;
660 }
661