xref: /dragonfly/sys/vm/vm_page2.h (revision ae24b5e0)
1 /*-
2  * Copyright (c) 1982, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *	@(#)vmmeter.h	8.2 (Berkeley) 7/10/94
30  * $FreeBSD: src/sys/sys/vmmeter.h,v 1.21.2.2 2002/10/10 19:28:21 dillon Exp $
31  */
32 
33 #ifndef _VM_VM_PAGE2_H_
34 #define _VM_VM_PAGE2_H_
35 
36 #ifdef _KERNEL
37 
38 #ifndef _SYS_VMMETER_H_
39 #include <sys/vmmeter.h>
40 #endif
41 #ifndef _SYS_QUEUE_H_
42 #include <sys/queue.h>
43 #endif
44 #ifndef _VM_VM_PAGE_H_
45 #include <vm/vm_page.h>
46 #endif
47 #ifndef _SYS_SPINLOCK_H_
48 #include <sys/spinlock.h>
49 #endif
50 #ifndef _SYS_SPINLOCK2_H_
51 #include <sys/spinlock2.h>
52 #endif
53 
54 /*
55  * SMP NOTE
56  *
57  * VM fault rates are highly dependent on SMP locking conflicts and, on
58  * multi-socket systems, cache mastership changes for globals due to atomic
59  * ops (even simple atomic_add_*() calls).  Cache mastership changes can
60  * limit the aggregate fault rate.
61  *
62  * For this reason we go through some hoops to access VM statistics for
63  * low-memory handling, pageout, and other triggers.  Each cpu collects
64  * adjustments in gd->gd_vmstats_adj.  These get rolled up into the global
65  * vmstats structure.  The global vmstats structure is then pulled into
66  * gd->gd_vmstats by each cpu when it needs it.  Critical path checks always
67  * use the pcpu gd->gd_vmstats structure.
68  */
69 /*
70  * Return TRUE if we are under our severe low-free-pages threshold
71  *
72  * This causes user processes to stall to avoid exhausting memory that
73  * the kernel might need.
74  *
75  * reserved < severe < minimum < target < paging_target
76  */
77 static __inline
78 int
79 vm_page_count_severe(void)
80 {
81     globaldata_t gd = mycpu;
82 
83     return (gd->gd_vmstats.v_free_severe >
84 	    gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count ||
85 	    gd->gd_vmstats.v_free_reserved > gd->gd_vmstats.v_free_count);
86 }
87 
88 /*
89  * Return TRUE if we are under our minimum low-free-pages threshold.
90  * This activates the pageout demon.  The pageout demon tries to
91  * reach the target but may stop once it satisfies the minimum.
92  *
93  * reserved < severe < minimum < target < paging_target
94  */
95 static __inline
96 int
97 vm_page_count_min(int donotcount)
98 {
99     globaldata_t gd = mycpu;
100 
101     return (gd->gd_vmstats.v_free_min + donotcount >
102 	    (gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count) ||
103 	    gd->gd_vmstats.v_free_reserved > gd->gd_vmstats.v_free_count);
104 }
105 
106 /*
107  * Return TRUE if we are under our free page target.  The pageout demon
108  * tries to reach the target but may stop once it gets past the min.
109  *
110  * User threads doing normal allocations might wait based on this
111  * function but MUST NOT wait in a loop based on this function as the
112  * VM load may prevent the target from being reached.
113  */
114 static __inline
115 int
116 vm_page_count_target(void)
117 {
118     globaldata_t gd = mycpu;
119 
120     return (gd->gd_vmstats.v_free_target >
121 	    (gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count) ||
122 	    gd->gd_vmstats.v_free_reserved > gd->gd_vmstats.v_free_count);
123 }
124 
125 /*
126  * Return the number of pages the pageout daemon needs to move into the
127  * cache or free lists.  A negative number means we have sufficient free
128  * pages.
129  *
130  * The target free+cache is greater than vm_page_count_target().  The
131  * frontend uses vm_page_count_target() while the backend continue freeing
132  * based on vm_paging_target().
133  *
134  * This function DOES NOT return TRUE or FALSE.
135  */
136 static __inline
137 int
138 vm_paging_target(void)
139 {
140     globaldata_t gd = mycpu;
141 
142     return ((gd->gd_vmstats.v_free_target + gd->gd_vmstats.v_cache_min) -
143 	    (gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count));
144 }
145 
146 /*
147  * Return TRUE if hysteresis dictates we should nominally wakeup the
148  * pageout daemon to start working on freeing up some memory.  This
149  * routine should NOT be used to determine when to block on the VM system.
150  * We want to wakeup the pageout daemon before we might otherwise block.
151  *
152  * Paging begins when cache+free drops below cache_min + free_min.
153  */
154 static __inline
155 int
156 vm_paging_needed(void)
157 {
158     globaldata_t gd = mycpu;
159 
160     if (gd->gd_vmstats.v_free_min + gd->gd_vmstats.v_cache_min >
161 	gd->gd_vmstats.v_free_count + gd->gd_vmstats.v_cache_count) {
162 		return 1;
163     }
164     if (gd->gd_vmstats.v_free_min > gd->gd_vmstats.v_free_count)
165 		return 1;
166     return 0;
167 }
168 
169 static __inline
170 void
171 vm_page_event(vm_page_t m, vm_page_event_t event)
172 {
173     if (m->flags & PG_ACTIONLIST)
174 	vm_page_event_internal(m, event);
175 }
176 
177 static __inline
178 void
179 vm_page_init_action(vm_page_t m, vm_page_action_t action,
180 		    void (*func)(vm_page_t, vm_page_action_t), void *data)
181 {
182     action->m = m;
183     action->func = func;
184     action->data = data;
185 }
186 
187 /*
188  * Clear dirty bits in the VM page but truncate the
189  * end to a DEV_BSIZE'd boundary.
190  *
191  * Used when reading data in, typically via getpages.
192  * The partial device block at the end of the truncation
193  * range should not lose its dirty bit.
194  *
195  * NOTE: This function does not clear the pmap modified bit.
196  */
197 static __inline
198 void
199 vm_page_clear_dirty_end_nonincl(vm_page_t m, int base, int size)
200 {
201     size = (base + size) & ~DEV_BMASK;
202     if (base < size)
203 	vm_page_clear_dirty(m, base, size - base);
204 }
205 
206 /*
207  * Clear dirty bits in the VM page but truncate the
208  * beginning to a DEV_BSIZE'd boundary.
209  *
210  * Used when truncating a buffer.  The partial device
211  * block at the beginning of the truncation range
212  * should not lose its dirty bit.
213  *
214  * NOTE: This function does not clear the pmap modified bit.
215  */
216 static __inline
217 void
218 vm_page_clear_dirty_beg_nonincl(vm_page_t m, int base, int size)
219 {
220     size += base;
221     base = (base + DEV_BMASK) & ~DEV_BMASK;
222     if (base < size)
223 	vm_page_clear_dirty(m, base, size - base);
224 }
225 
226 static __inline
227 void
228 vm_page_spin_lock(vm_page_t m)
229 {
230     spin_lock(&m->spin);
231 }
232 
233 static __inline
234 void
235 vm_page_spin_unlock(vm_page_t m)
236 {
237     spin_unlock(&m->spin);
238 }
239 
240 /*
241  * Wire a vm_page that is already wired.  Does not require a busied
242  * page.
243  */
244 static __inline
245 void
246 vm_page_wire_quick(vm_page_t m)
247 {
248     if (atomic_fetchadd_int(&m->wire_count, 1) == 0)
249 	panic("vm_page_wire_quick: wire_count was 0");
250 }
251 
252 /*
253  * Unwire a vm_page quickly, does not require a busied page.
254  *
255  * This routine refuses to drop the wire_count to 0 and will return
256  * TRUE if it would have had to (instead of decrementing it to 0).
257  * The caller can then busy the page and deal with it.
258  */
259 static __inline
260 int
261 vm_page_unwire_quick(vm_page_t m)
262 {
263     KKASSERT(m->wire_count > 0);
264     for (;;) {
265 	u_int wire_count = m->wire_count;
266 
267 	cpu_ccfence();
268 	if (wire_count == 1)
269 		return TRUE;
270 	if (atomic_cmpset_int(&m->wire_count, wire_count, wire_count - 1))
271 		return FALSE;
272     }
273 }
274 
275 /*
276  *	Functions implemented as macros
277  */
278 
279 static __inline void
280 vm_page_flag_set(vm_page_t m, unsigned int bits)
281 {
282 	atomic_set_int(&(m)->flags, bits);
283 }
284 
285 static __inline void
286 vm_page_flag_clear(vm_page_t m, unsigned int bits)
287 {
288 	atomic_clear_int(&(m)->flags, bits);
289 }
290 
291 /*
292  * Wakeup anyone waiting for the page after potentially unbusying
293  * (hard or soft) or doing other work on a page that might make a
294  * waiter ready.  The setting of PG_WANTED is integrated into the
295  * related flags and it can't be set once the flags are already
296  * clear, so there should be no races here.
297  */
298 
299 static __inline void
300 vm_page_flash(vm_page_t m)
301 {
302 	if (m->flags & PG_WANTED) {
303 		vm_page_flag_clear(m, PG_WANTED);
304 		wakeup(m);
305 	}
306 }
307 
308 /*
309  * Reduce the protection of a page.  This routine never raises the
310  * protection and therefore can be safely called if the page is already
311  * at VM_PROT_NONE (it will be a NOP effectively ).
312  *
313  * VM_PROT_NONE will remove all user mappings of a page.  This is often
314  * necessary when a page changes state (for example, turns into a copy-on-write
315  * page or needs to be frozen for write I/O) in order to force a fault, or
316  * to force a page's dirty bits to be synchronized and avoid hardware
317  * (modified/accessed) bit update races with pmap changes.
318  *
319  * Since 'prot' is usually a constant, this inline usually winds up optimizing
320  * out the primary conditional.
321  *
322  * WARNING: VM_PROT_NONE can block, but will loop until all mappings have
323  * been cleared.  Callers should be aware that other page related elements
324  * might have changed, however.
325  */
326 static __inline void
327 vm_page_protect(vm_page_t m, int prot)
328 {
329 	KKASSERT(m->flags & PG_BUSY);
330 	if (prot == VM_PROT_NONE) {
331 		if (m->flags & (PG_WRITEABLE|PG_MAPPED)) {
332 			pmap_page_protect(m, VM_PROT_NONE);
333 			/* PG_WRITEABLE & PG_MAPPED cleared by call */
334 		}
335 	} else if ((prot == VM_PROT_READ) && (m->flags & PG_WRITEABLE)) {
336 		pmap_page_protect(m, VM_PROT_READ);
337 		/* PG_WRITEABLE cleared by call */
338 	}
339 }
340 
341 /*
342  * Zero-fill the specified page.  The entire contents of the page will be
343  * zero'd out.
344  */
345 static __inline boolean_t
346 vm_page_zero_fill(vm_page_t m)
347 {
348 	pmap_zero_page(VM_PAGE_TO_PHYS(m));
349 	return (TRUE);
350 }
351 
352 /*
353  * Copy the contents of src_m to dest_m.  The pages must be stable but spl
354  * and other protections depend on context.
355  */
356 static __inline void
357 vm_page_copy(vm_page_t src_m, vm_page_t dest_m)
358 {
359 	pmap_copy_page(VM_PAGE_TO_PHYS(src_m), VM_PAGE_TO_PHYS(dest_m));
360 	dest_m->valid = VM_PAGE_BITS_ALL;
361 	dest_m->dirty = VM_PAGE_BITS_ALL;
362 }
363 
364 /*
365  * Free a page.  The page must be marked BUSY.
366  */
367 static __inline void
368 vm_page_free(vm_page_t m)
369 {
370 	vm_page_free_toq(m);
371 }
372 
373 /*
374  * Free a page to the zerod-pages queue.  The caller must ensure that the
375  * page has been zerod.
376  */
377 static __inline void
378 vm_page_free_zero(vm_page_t m)
379 {
380 #ifdef PMAP_DEBUG
381 #ifdef PHYS_TO_DMAP
382 	char *p = (char *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
383 	int i;
384 
385 	for (i = 0; i < PAGE_SIZE; i++) {
386 		if (p[i] != 0) {
387 			panic("non-zero page in vm_page_free_zero()");
388 		}
389 	}
390 #endif
391 #endif
392 	vm_page_free_toq(m);
393 }
394 
395 /*
396  * Set page to not be dirty.  Note: does not clear pmap modify bits .
397  */
398 static __inline void
399 vm_page_undirty(vm_page_t m)
400 {
401 	m->dirty = 0;
402 }
403 
404 #endif	/* _KERNEL */
405 #endif	/* _VM_VM_PAGE2_H_ */
406 
407