xref: /freebsd/share/man/man9/vm_page_alloc.9 (revision 4b9d6057)
1.\"
2.\" Copyright (C) 2001 Chad David <davidc@acns.ab.ca>. All rights reserved.
3.\" Copyright (c) 2021 The FreeBSD Foundation
4.\"
5.\" Portions of this documentation were written by Mark Johnston under
6.\" sponsorship from the FreeBSD Foundation.
7.\"
8.\" Redistribution and use in source and binary forms, with or without
9.\" modification, are permitted provided that the following conditions
10.\" are met:
11.\" 1. Redistributions of source code must retain the above copyright
12.\"    notice(s), this list of conditions and the following disclaimer as
13.\"    the first lines of this file unmodified other than the possible
14.\"    addition of one or more copyright notices.
15.\" 2. Redistributions in binary form must reproduce the above copyright
16.\"    notice(s), this list of conditions and the following disclaimer in the
17.\"    documentation and/or other materials provided with the distribution.
18.\"
19.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
20.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
23.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29.\" DAMAGE.
30.\"
31.Dd November 11, 2021
32.Dt VM_PAGE_ALLOC 9
33.Os
34.Sh NAME
35.Nm vm_page_alloc
36.Nd "allocate a page of memory"
37.Sh SYNOPSIS
38.In sys/param.h
39.In vm/vm.h
40.In vm/vm_page.h
41.Ft vm_page_t
42.Fn vm_page_alloc "vm_object_t object" "vm_pindex_t pindex" "int req"
43.Ft vm_page_t
44.Fo vm_page_alloc_after
45.Fa "vm_object_t object"
46.Fa "vm_pindex_t pindex"
47.Fa "int req"
48.Fa "vm_page_t mpred"
49.Fc
50.Ft vm_page_t
51.Fo vm_page_alloc_contig
52.Fa "vm_object_t object"
53.Fa "vm_pindex_t pindex"
54.Fa "int req"
55.Fa "u_long npages"
56.Fa "vm_paddr_t low"
57.Fa "vm_paddr_t high"
58.Fa "u_long alignment"
59.Fa "vm_paddr_t boundary"
60.Fa "vm_memattr_t memattr"
61.Fc
62.Ft vm_page_t
63.Fo vm_page_alloc_contig_domain
64.Fa "vm_object_t object"
65.Fa "vm_pindex_t pindex"
66.Fa "int req"
67.Fa "u_long npages"
68.Fa "vm_paddr_t low"
69.Fa "vm_paddr_t high"
70.Fa "u_long alignment"
71.Fa "vm_paddr_t boundary"
72.Fa "vm_memattr_t memattr"
73.Fc
74.Ft vm_page_t
75.Fo vm_page_alloc_domain
76.Fa "vm_object_t object"
77.Fa "vm_pindex_t pindex"
78.Fa "int domain"
79.Fa "int req"
80.Fc
81.Ft vm_page_t
82.Fo vm_page_alloc_domain_after
83.Fa "vm_object_t object"
84.Fa "vm_pindex_t pindex"
85.Fa "int domain"
86.Fa "int req"
87.Fa "vm_page_t mpred"
88.Fc
89.Ft vm_page_t
90.Fo vm_page_alloc_freelist
91.Fa "int freelist"
92.Fa "int req"
93.Fc
94.Ft vm_page_t
95.Fo vm_page_alloc_freelist_domain
96.Fa "int domain"
97.Fa "int freelist"
98.Fa "int req"
99.Fc
100.Ft vm_page_t
101.Fo vm_page_alloc_noobj
102.Fa "int req"
103.Fc
104.Ft vm_page_t
105.Fo vm_page_alloc_noobj_contig
106.Fa "int req"
107.Fa "u_long npages"
108.Fa "vm_paddr_t low"
109.Fa "vm_paddr_t high"
110.Fa "u_long alignment"
111.Fa "vm_paddr_t boundary"
112.Fa "vm_memattr_t memattr"
113.Fc
114.Ft vm_page_t
115.Fo vm_page_alloc_noobj_contig_domain
116.Fa "int domain"
117.Fa "int req"
118.Fa "u_long npages"
119.Fa "vm_paddr_t low"
120.Fa "vm_paddr_t high"
121.Fa "u_long alignment"
122.Fa "vm_paddr_t boundary"
123.Fa "vm_memattr_t memattr"
124.Fc
125.Ft vm_page_t
126.Fo vm_page_alloc_noobj_domain
127.Fa "int domain"
128.Fa "int req"
129.Fc
130.Sh DESCRIPTION
131The
132.Fn vm_page_alloc
133family of functions allocate one or more pages of physical memory.
134Most kernel code should not call these functions directly but should instead
135use a kernel memory allocator such as
136.Xr malloc 9
137or
138.Xr uma 9 ,
139or should use a higher-level interface to the page cache, such as
140.Xr vm_page_grab 9 .
141.Pp
142All of the functions take a
143.Fa req
144parameter which encodes the allocation priority and optional modifier flags,
145described below.
146The functions whose names do not include
147.Dq noobj
148additionally insert the pages starting at index
149.Fa pindex
150in the
151VM object
152.Fa object .
153The object must be write-locked and not have a page already resident at the
154specified index.
155The functions whose names include
156.Dq domain
157support NUMA-aware allocation by returning pages from the
158.Xr numa 4
159domain specified by
160.Fa domain .
161.Pp
162The
163.Fn vm_page_alloc_after
164and
165.Fn vm_page_alloc_domain_after
166functions behave identically to
167.Fn vm_page_alloc
168and
169.Fn vm_page_alloc_domain ,
170respectively, except that they take an additional parameter
171.Fa mpred
172which must be the page resident in
173.Fa object
174with largest index smaller than
175.Fa pindex ,
176or
177.Dv NULL
178if no such page exists.
179These functions exist to optimize the common case of loops that allocate
180multiple pages at successive indices within an object.
181.Pp
182The
183.Fn vm_page_alloc_contig
184and
185.Fn vm_page_alloc_noobj_contig
186functions and their NUMA-aware variants allocate a physically contiguous run of
187.Fa npages
188pages which satisfies the specified constraints.
189The
190.Fa low
191and
192.Fa high
193parameters specify a physical address range from which the run is to
194be allocated.
195The
196.Fa alignment
197parameter specifies the requested alignment of the first page in the run
198and must be a power of two.
199If the
200.Fa boundary
201parameter is non-zero, the pages constituting the run will not cross a
202physical address that is a multiple of the parameter value, which must be a
203power of two.
204If
205.Fa memattr
206is not equal to
207.Dv VM_MEMATTR_DEFAULT ,
208then mappings of the returned pages created by, e.g.,
209.Xr pmap_enter 9
210or
211.Xr pmap_qenter 9 ,
212will carry the machine-dependent encoding of the memory attribute.
213Additionally, the direct mapping of the page, if any, will be updated to
214reflect the requested memory attribute.
215.Pp
216The
217.Fn vm_page_alloc_freelist
218and
219.Fn vm_page_alloc_freelist_domain
220functions behave identically to
221.Fn vm_page_alloc_noobj
222and
223.Fn vm_page_alloc_noobj_domain ,
224respectively, except that a successful allocation will return a page from the
225specified physical memory freelist.
226These functions are not intended for use outside of the virtual memory
227subsystem and exist only to support the requirements of certain platforms.
228.Sh REQUEST FLAGS
229All page allocator functions accept a
230.Fa req
231parameter that governs certain aspects of the function's behavior.
232.Pp
233The
234.Dv VM_ALLOC_WAITOK ,
235.Dv VM_ALLOC_WAITFAIL ,
236and
237.Dv VM_ALLOC_NOWAIT
238flags specify the behavior of the allocator if free pages could not be
239immediately allocated.
240The
241.Dv VM_ALLOC_WAITOK
242flag can only be used with the
243.Dq noobj
244variants.
245If
246.Dv VM_ALLOC_NOWAIT
247is specified, then the allocator gives up and returns
248.Dv NULL .
249.Dv VM_ALLOC_NOWAIT
250is specified implicitly if none of the flags are present in the request.
251If either
252.Dv VM_ALLOC_WAITOK
253or
254.Dv VM_ALLOC_WAITFAIL
255is specified, the allocator will put the calling thread to sleep until
256sufficient free pages become available.
257At this point, if
258.Dv VM_ALLOC_WAITFAIL
259is specified the allocator will return
260.Dv NULL ,
261and if
262.Dv VM_ALLOC_WAITOK
263is specified the allocator will retry the allocation.
264After a failed
265.Dv VM_ALLOC_WAITFAIL
266allocation returns, the VM object, if any, will have been unlocked while the
267thread was sleeping.
268In this case the VM object write lock will be re-acquired before the function
269call returns.
270.Pp
271.Fa req
272also encodes the allocation request priority.
273By default the page(s) are allocated with no special treatment.
274If the number of available free pages is below a certain watermark, the
275allocation will fail or the allocating thread will sleep, depending on
276the specified wait flag.
277The watermark is computed at boot time and corresponds to a small (less than
278one percent) fraction of the system's total physical memory.
279To allocate memory more aggressively, one of following flags may be specified.
280.Bl -tag -width ".Dv VM_ALLOC_INTERRUPT"
281.It Dv VM_ALLOC_SYSTEM
282The page can be allocated if the free page count is above the interrupt
283reserved water mark.
284This flag should be used only when the system really needs the page.
285.It Dv VM_ALLOC_INTERRUPT
286The allocation will fail only if zero free pages are available.
287This flag should be used only if the consequences of an allocation failure
288are worse than leaving the system without free memory.
289For example, this flag is used when allocating kernel page table pages, where
290allocation failures trigger a kernel panic.
291.El
292.Pp
293The following optional flags can further modify allocator behavior:
294.Bl -tag -width ".Dv VM_ALLOC_NOBUSY"
295.It Dv VM_ALLOC_SBUSY
296The returned page will be shared-busy.
297This flag may only be specified when allocating pages in a VM object.
298.It Dv VM_ALLOC_NOBUSY
299The returned page will not be busy.
300This flag is implicit when allocating pages without a VM object.
301When allocating pages in a VM object, and neither
302.Dv VM_ALLOC_SBUSY
303nor
304.Dv VM_ALLOC_NOBUSY
305are specified, the returned pages will be exclusively busied.
306.It Dv VM_ALLOC_NODUMP
307The returned page will not be included in any kernel core dumps
308regardless of whether or not it is mapped in to KVA.
309.It Dv VM_ALLOC_WIRED
310The returned page will be wired.
311.It Dv VM_ALLOC_ZERO
312If this flag is specified, the
313.Dq noobj
314variants will return zeroed pages.
315The other allocator interfaces ignore this flag.
316.It Dv VM_ALLOC_NORECLAIM
317If this flag is specified and the request can not be immediately satisfied,
318the allocator will not attempt to break superpage reservations to satisfy the
319allocation.
320This may be useful when the overhead of scanning the reservation queue
321outweighs the cost of a failed allocation.
322This flag may be used only with the
323.Dq contig
324variants, and must not be specified in combination with
325.Dv VM_ALLOC_WAITOK .
326.It Dv VM_ALLOC_COUNT(n)
327Hint that at least
328.Fa n
329pages will be allocated by the caller in the near future.
330.Fa n
331must be no larger than 65535.
332If the system is short of free pages, this hint may cause the kernel
333to reclaim memory more aggressively than it would otherwise.
334.El
335.Sh RETURN VALUES
336If the allocation was successful, a pointer to the
337.Vt struct vm_page
338corresponding to the allocated page is returned.
339If the allocation request specified multiple pages, the returned
340pointer points to an array of
341.Vt struct vm_page
342constituting the run.
343Upon failure,
344.Dv NULL
345is returned.
346Regardless of whether the allocation succeeds or fails, the VM
347object
348.Fa object
349will be write-locked upon return.
350.Sh SEE ALSO
351.Xr numa 4 ,
352.Xr malloc 9 ,
353.Xr uma 9 ,
354.Xr vm_page_grab 9 ,
355.Xr vm_page_sbusy 9
356.Sh AUTHORS
357This manual page was written by
358.An Chad David Aq Mt davidc@acns.ab.ca .
359