xref: /freebsd/share/man/man9/zone.9 (revision 069ac184)
1.\"-
2.\" Copyright (c) 2001 Dag-Erling Smørgrav
3.\" All rights reserved.
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\" 1. Redistributions of source code must retain the above copyright
9.\"    notice, this list of conditions and the following disclaimer.
10.\" 2. Redistributions in binary form must reproduce the above copyright
11.\"    notice, this list of conditions and the following disclaimer in the
12.\"    documentation and/or other materials provided with the distribution.
13.\"
14.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24.\" SUCH DAMAGE.
25.\"
26.Dd January 16, 2023
27.Dt UMA 9
28.Os
29.Sh NAME
30.Nm UMA
31.Nd general-purpose kernel object allocator
32.Sh SYNOPSIS
33.In sys/param.h
34.In sys/queue.h
35.In vm/uma.h
36.Bd -literal
37typedef int (*uma_ctor)(void *mem, int size, void *arg, int flags);
38typedef void (*uma_dtor)(void *mem, int size, void *arg);
39typedef int (*uma_init)(void *mem, int size, int flags);
40typedef void (*uma_fini)(void *mem, int size);
41typedef int (*uma_import)(void *arg, void **store, int count, int domain,
42    int flags);
43typedef void (*uma_release)(void *arg, void **store, int count);
44typedef void *(*uma_alloc)(uma_zone_t zone, vm_size_t size, int domain,
45    uint8_t *pflag, int wait);
46typedef void (*uma_free)(void *item, vm_size_t size, uint8_t pflag);
47
48.Ed
49.Ft uma_zone_t
50.Fo uma_zcreate
51.Fa "char *name" "size_t size"
52.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
53.Fa "int align" "uint16_t flags"
54.Fc
55.Ft uma_zone_t
56.Fo uma_zcache_create
57.Fa "char *name" "int size"
58.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
59.Fa "uma_import zimport" "uma_release zrelease"
60.Fa "void *arg" "int flags"
61.Fc
62.Ft uma_zone_t
63.Fo uma_zsecond_create
64.Fa "char *name"
65.Fa "uma_ctor ctor" "uma_dtor dtor" "uma_init zinit" "uma_fini zfini"
66.Fa "uma_zone_t master"
67.Fc
68.Ft void
69.Fn uma_zdestroy "uma_zone_t zone"
70.Ft "void *"
71.Fn uma_zalloc "uma_zone_t zone" "int flags"
72.Ft "void *"
73.Fn uma_zalloc_arg "uma_zone_t zone" "void *arg" "int flags"
74.Ft "void *"
75.Fn uma_zalloc_domain "uma_zone_t zone" "void *arg" "int domain" "int flags"
76.Ft "void *"
77.Fn uma_zalloc_pcpu "uma_zone_t zone" "int flags"
78.Ft "void *"
79.Fn uma_zalloc_pcpu_arg "uma_zone_t zone" "void *arg" "int flags"
80.Ft "void *"
81.Fn uma_zalloc_smr "uma_zone_t zone" "int flags"
82.Ft void
83.Fn uma_zfree "uma_zone_t zone" "void *item"
84.Ft void
85.Fn uma_zfree_arg "uma_zone_t zone" "void *item" "void *arg"
86.Ft void
87.Fn uma_zfree_pcpu "uma_zone_t zone" "void *item"
88.Ft void
89.Fn uma_zfree_pcpu_arg "uma_zone_t zone" "void *item" "void *arg"
90.Ft void
91.Fn uma_zfree_smr "uma_zone_t zone" "void *item"
92.Ft void
93.Fn uma_prealloc "uma_zone_t zone" "int nitems"
94.Ft void
95.Fn uma_zone_reserve "uma_zone_t zone" "int nitems"
96.Ft void
97.Fn uma_zone_reserve_kva "uma_zone_t zone" "int nitems"
98.Ft void
99.Fn uma_reclaim "int req"
100.Ft void
101.Fn uma_reclaim_domain "int req" "int domain"
102.Ft void
103.Fn uma_zone_reclaim "uma_zone_t zone" "int req"
104.Ft void
105.Fn uma_zone_reclaim_domain "uma_zone_t zone" "int req" "int domain"
106.Ft void
107.Fn uma_zone_set_allocf "uma_zone_t zone" "uma_alloc allocf"
108.Ft void
109.Fn uma_zone_set_freef "uma_zone_t zone" "uma_free freef"
110.Ft int
111.Fn uma_zone_set_max "uma_zone_t zone" "int nitems"
112.Ft void
113.Fn uma_zone_set_maxcache "uma_zone_t zone" "int nitems"
114.Ft int
115.Fn uma_zone_get_max "uma_zone_t zone"
116.Ft int
117.Fn uma_zone_get_cur "uma_zone_t zone"
118.Ft void
119.Fn uma_zone_set_warning "uma_zone_t zone" "const char *warning"
120.Ft void
121.Fn uma_zone_set_maxaction "uma_zone_t zone" "void (*maxaction)(uma_zone_t)"
122.Ft smr_t
123.Fn uma_zone_get_smr "uma_zone_t zone"
124.Ft void
125.Fn uma_zone_set_smr "uma_zone_t zone" "smr_t smr"
126.In sys/sysctl.h
127.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
128.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
129.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
130.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name access zone descr
131.Sh DESCRIPTION
132UMA (Universal Memory Allocator) provides an efficient interface for managing
133dynamically-sized collections of items of identical size, referred to as zones.
134Zones keep track of which items are in use and which
135are not, and UMA provides functions for allocating items from a zone and
136for releasing them back, making them available for subsequent allocation requests.
137Zones maintain per-CPU caches with linear scalability on SMP
138systems as well as round-robin and first-touch policies for NUMA
139systems.
140The number of items cached per CPU is bounded, and each zone additionally
141maintains an unbounded cache of items that is used to quickly satisfy
142per-CPU cache allocation misses.
143.Pp
144Two types of zones exist: regular zones and cache zones.
145In a regular zone, items are allocated from a slab, which is one or more
146virtually contiguous memory pages that have been allocated from the kernel's
147page allocator.
148Internally, slabs are managed by a UMA keg, which is responsible for allocating
149slabs and keeping track of their usage by one or more zones.
150In typical usage, there is one keg per zone, so slabs are not shared among
151multiple zones.
152.Pp
153Normal zones import items from a keg, and release items back to that keg if
154requested.
155Cache zones do not have a keg, and instead use custom import and release
156methods.
157For example, some collections of kernel objects are statically allocated
158at boot-time, and the size of the collection does not change.
159A cache zone can be used to implement an efficient allocator for the objects in
160such a collection.
161.Pp
162The
163.Fn uma_zcreate
164and
165.Fn uma_zcache_create
166functions create a new regular zone and cache zone, respectively.
167The
168.Fn uma_zsecond_create
169function creates a regular zone which shares the keg of the zone
170specified by the
171.Fa master
172argument.
173The
174.Fa name
175argument is a text name of the zone for debugging and stats; this memory
176should not be freed until the zone has been deallocated.
177.Pp
178The
179.Fa ctor
180and
181.Fa dtor
182arguments are callback functions that are called by
183the UMA subsystem at the time of the call to
184.Fn uma_zalloc
185and
186.Fn uma_zfree
187respectively.
188Their purpose is to provide hooks for initializing or
189destroying things that need to be done at the time of the allocation
190or release of a resource.
191A good usage for the
192.Fa ctor
193and
194.Fa dtor
195callbacks might be to initialize a data structure embedded in the item,
196such as a
197.Xr queue 3
198head.
199.Pp
200The
201.Fa zinit
202and
203.Fa zfini
204arguments are used to optimize the allocation of items from the zone.
205They are called by the UMA subsystem whenever
206it needs to allocate or free items to satisfy requests or memory pressure.
207A good use for the
208.Fa zinit
209and
210.Fa zfini
211callbacks might be to
212initialize and destroy a mutex contained within an item.
213This would allow one to avoid destroying and re-initializing the mutex
214each time the item is freed and re-allocated.
215They are not called on each call to
216.Fn uma_zalloc
217and
218.Fn uma_zfree
219but rather when an item is imported into a zone's cache, and when a zone
220releases an item to the slab allocator, typically as a response to memory
221pressure.
222.Pp
223For
224.Fn uma_zcache_create ,
225the
226.Fa zimport
227and
228.Fa zrelease
229functions are called to import items into the zone and to release items
230from the zone, respectively.
231The
232.Fa zimport
233function should store pointers to items in the
234.Fa store
235array, which contains a maximum of
236.Fa count
237entries.
238The function must return the number of imported items, which may be less than
239the maximum.
240Similarly, the
241.Fa store
242parameter to the
243.Fa zrelease
244function contains an array of
245.Fa count
246pointers to items.
247The
248.Fa arg
249parameter passed to
250.Fn uma_zcache_create
251is provided to the import and release functions.
252The
253.Fa domain
254parameter to
255.Fa zimport
256specifies the requested
257.Xr numa 4
258domain for the allocation.
259It is either a NUMA domain number or the special value
260.Dv UMA_ANYDOMAIN .
261.Pp
262The
263.Fa flags
264argument of
265.Fn uma_zcreate
266and
267.Fn uma_zcache_create
268is a subset of the following flags:
269.Bl -tag -width "foo"
270.It Dv UMA_ZONE_NOFREE
271Slabs allocated to the zone's keg are never freed.
272.It Dv UMA_ZONE_NODUMP
273Pages belonging to the zone will not be included in minidumps.
274.It Dv UMA_ZONE_PCPU
275An allocation from zone would have
276.Va mp_ncpu
277shadow copies, that are privately assigned to CPUs.
278A CPU can address its private copy using base the allocation address plus
279a multiple of the current CPU ID and
280.Fn sizeof "struct pcpu" :
281.Bd -literal -offset indent
282foo_zone = uma_zcreate(..., UMA_ZONE_PCPU);
283 ...
284foo_base = uma_zalloc(foo_zone, ...);
285 ...
286critical_enter();
287foo_pcpu = (foo_t *)zpcpu_get(foo_base);
288/* do something with foo_pcpu */
289critical_exit();
290
291.Ed
292Note that
293.Dv M_ZERO
294cannot be used when allocating items from a PCPU zone.
295To obtain zeroed memory from a PCPU zone, use the
296.Fn uma_zalloc_pcpu
297function and its variants instead, and pass
298.Dv M_ZERO .
299.It Dv UMA_ZONE_NOTOUCH
300The UMA subsystem may not directly touch (i.e. read or write) the slab memory.
301Otherwise, by default, book-keeping of items within a slab may be done in the
302slab page itself, and
303.Dv INVARIANTS
304kernels may also do use-after-free checking by accessing the slab memory.
305.It Dv UMA_ZONE_ZINIT
306The zone will have its
307.Ft uma_init
308method set to internal method that initializes a new allocated slab
309to all zeros.
310Do not mistake
311.Ft uma_init
312method with
313.Ft uma_ctor .
314A zone with
315.Dv UMA_ZONE_ZINIT
316flag would not return zeroed memory on every
317.Fn uma_zalloc .
318.It Dv UMA_ZONE_NOTPAGE
319An allocator function will be supplied with
320.Fn uma_zone_set_allocf
321and the memory that it returns may not be kernel virtual memory backed by VM
322pages in the page array.
323.It Dv UMA_ZONE_MALLOC
324The zone is for the
325.Xr malloc 9
326subsystem.
327.It Dv UMA_ZONE_VM
328The zone is for the VM subsystem.
329.It Dv UMA_ZONE_CONTIG
330Items in this zone must be contiguous in physical address space.
331Items will follow normal alignment constraints and may span page boundaries
332between pages with contiguous physical addresses.
333.It Dv UMA_ZONE_UNMANAGED
334By default, UMA zone caches are shrunk to help resolve free page shortages.
335Cached items that have not been used for a long period may also be freed from
336zone.
337When this flag is set, the system will not reclaim memory from the zone's
338caches.
339.It Dv UMA_ZONE_SMR
340Create a zone whose items will be synchronized using the
341.Xr smr 9
342mechanism.
343Upon creation the zone will have an associated
344.Dt smr_t
345structure which can be fetched using
346.Fn uma_zone_get_smr .
347.El
348.Pp
349Zones can be destroyed using
350.Fn uma_zdestroy ,
351freeing all memory that is cached in the zone.
352All items allocated from the zone must be freed to the zone before the zone
353may be safely destroyed.
354.Pp
355To allocate an item from a zone, simply call
356.Fn uma_zalloc
357with a pointer to that zone and set the
358.Fa flags
359argument to selected flags as documented in
360.Xr malloc 9 .
361It will return a pointer to an item if successful, or
362.Dv NULL
363in the rare case where all items in the zone are in use and the
364allocator is unable to grow the zone and
365.Dv M_NOWAIT
366is specified.
367.Pp
368Items are released back to the zone from which they were allocated by
369calling
370.Fn uma_zfree
371with a pointer to the zone and a pointer to the item.
372If
373.Fa item
374is
375.Dv NULL ,
376then
377.Fn uma_zfree
378does nothing.
379.Pp
380The variants
381.Fn uma_zalloc_arg
382and
383.Fn uma_zfree_arg
384allow callers to
385specify an argument for the
386.Dv ctor
387and
388.Dv dtor
389functions of the zone, respectively.
390The variants
391.Fn uma_zalloc_pcpu
392and
393.Fn uma_zfree_pcpu
394allocate and free
395.Va mp_ncpu
396shadow copies as described for
397.Dv UMA_ZONE_PCPU .
398If
399.Fa item
400is
401.Dv NULL ,
402then
403.Fn uma_zfree_pcpu
404does nothing.
405.Pp
406The
407.Fn uma_zalloc_smr
408and
409.Fn uma_zfree_smr
410functions allocate and free items from an SMR-enabled zone, that is,
411a zone created with
412.Dv UMA_ZONE_SMR
413or a zone that has had
414.Fn uma_zone_set_smr
415called.
416.Pp
417The
418.Fn uma_zalloc_domain
419function allows callers to specify a fixed
420.Xr numa 4
421domain to allocate from.
422This uses a guaranteed but slow path in the allocator which reduces
423concurrency.
424.Pp
425The
426.Fn uma_prealloc
427function allocates slabs for the requested number of items, typically following
428the initial creation of a zone.
429Subsequent allocations from the zone will be satisfied using the pre-allocated
430slabs.
431Note that slab allocation is performed with the
432.Dv M_WAITOK
433flag, so
434.Fn uma_prealloc
435may sleep.
436.Pp
437The
438.Fn uma_zone_reserve
439function sets the number of reserved items for the zone.
440.Fn uma_zalloc
441and variants will ensure that the zone contains at least the reserved number
442of free items.
443Reserved items may be allocated by specifying
444.Dv M_USE_RESERVE
445in the allocation request flags.
446.Fn uma_zone_reserve
447does not perform any pre-allocation by itself.
448.Pp
449The
450.Fn uma_zone_reserve_kva
451function pre-allocates kernel virtual address space for the requested
452number of items.
453Subsequent allocations from the zone will be satisfied using the pre-allocated
454address space.
455Note that unlike
456.Fn uma_zone_reserve ,
457.Fn uma_zone_reserve_kva
458does not restrict the use of the pre-allocation to
459.Dv M_USE_RESERVE
460requests.
461.Pp
462The
463.Fn uma_reclaim
464and
465.Fn uma_zone_reclaim
466functions reclaim cached items from UMA zones, releasing unused memory.
467The
468.Fn uma_reclaim
469function reclaims items from all regular zones, while
470.Fn uma_zone_reclaim
471reclaims items only from the specified zone.
472The
473.Fa req
474parameter must be one of three values which specify how aggressively
475items are to be reclaimed:
476.Bl -tag -width indent
477.It Dv UMA_RECLAIM_TRIM
478Reclaim items only in excess of the zone's estimated working set size.
479The working set size is periodically updated and tracks the recent history
480of the zone's usage.
481.It Dv UMA_RECLAIM_DRAIN
482Reclaim all items from the unbounded cache.
483Free items in the per-CPU caches are left alone.
484.It Dv UMA_RECLAIM_DRAIN_CPU
485Reclaim all cached items.
486.El
487The
488.Fn uma_reclaim_domain
489and
490.Fn uma_zone_reclaim_domain
491functions apply only to items allocated from the specified domain.
492In the case of domains using a round-robin NUMA policy, cached items from all
493domains are freed to the keg, but only slabs from the specific domain will
494be freed.
495.Pp
496The
497.Fn uma_zone_set_allocf
498and
499.Fn uma_zone_set_freef
500functions allow a zone's default slab allocation and free functions to be
501overridden.
502This is useful if memory with special constraints such as attributes,
503alignment, or address ranges must be used.
504.Pp
505The
506.Fn uma_zone_set_max
507function limits the number of items
508.Pq and therefore memory
509that can be allocated to
510.Fa zone .
511The
512.Fa nitems
513argument specifies the requested upper limit number of items.
514The effective limit is returned to the caller, as it may end up being higher
515than requested due to the implementation rounding up to ensure all memory pages
516allocated to the zone are utilised to capacity.
517The limit applies to the total number of items in the zone, which includes
518allocated items, free items and free items in the per-cpu caches.
519On systems with more than one CPU it may not be possible to allocate
520the specified number of items even when there is no shortage of memory,
521because all of the remaining free items may be in the caches of the
522other CPUs when the limit is hit.
523.Pp
524The
525.Fn uma_zone_set_maxcache
526function limits the number of free items which may be cached in the zone.
527This limit applies to both the per-CPU caches and the cache of free buckets.
528.Pp
529The
530.Fn uma_zone_get_max
531function returns the effective upper limit number of items for a zone.
532.Pp
533The
534.Fn uma_zone_get_cur
535function returns an approximation of the number of items currently allocated
536from the zone.
537The returned value is approximate because appropriate synchronisation to
538determine an exact value is not performed by the implementation.
539This ensures low overhead at the expense of potentially stale data being used
540in the calculation.
541.Pp
542The
543.Fn uma_zone_set_warning
544function sets a warning that will be printed on the system console when the
545given zone becomes full and fails to allocate an item.
546The warning will be printed no more often than every five minutes.
547Warnings can be turned off globally by setting the
548.Va vm.zone_warnings
549sysctl tunable to
550.Va 0 .
551.Pp
552The
553.Fn uma_zone_set_maxaction
554function sets a function that will be called when the given zone becomes full
555and fails to allocate an item.
556The function will be called with the zone locked.
557Also, the function
558that called the allocation function may have held additional locks.
559Therefore,
560this function should do very little work (similar to a signal handler).
561.Pp
562The
563.Fn uma_zone_set_smr
564function associates an existing
565.Xr smr 9
566structure with a UMA zone.
567The effect is similar to creating a zone with the
568.Dv UMA_ZONE_SMR
569flag, except that a new SMR structure is not created.
570This function must be called before any allocations from the zone are performed.
571.Pp
572The
573.Fn SYSCTL_UMA_MAX parent nbr name access zone descr
574macro declares a static
575.Xr sysctl 9
576oid that exports the effective upper limit number of items for a zone.
577The
578.Fa zone
579argument should be a pointer to
580.Vt uma_zone_t .
581A read of the oid returns value obtained through
582.Fn uma_zone_get_max .
583A write to the oid sets new value via
584.Fn uma_zone_set_max .
585The
586.Fn SYSCTL_ADD_UMA_MAX ctx parent nbr name access zone descr
587macro is provided to create this type of oid dynamically.
588.Pp
589The
590.Fn SYSCTL_UMA_CUR parent nbr name access zone descr
591macro declares a static read-only
592.Xr sysctl 9
593oid that exports the approximate current occupancy of the zone.
594The
595.Fa zone
596argument should be a pointer to
597.Vt uma_zone_t .
598A read of the oid returns value obtained through
599.Fn uma_zone_get_cur .
600The
601.Fn SYSCTL_ADD_UMA_CUR ctx parent nbr name zone descr
602macro is provided to create this type of oid dynamically.
603.Sh IMPLEMENTATION NOTES
604The memory that these allocation calls return is not executable.
605The
606.Fn uma_zalloc
607function does not support the
608.Dv M_EXEC
609flag to allocate executable memory.
610Not all platforms enforce a distinction between executable and
611non-executable memory.
612.Sh SEE ALSO
613.Xr numa 4 ,
614.Xr vmstat 8 ,
615.Xr malloc 9 ,
616.Xr smr 9
617.Rs
618.%A Jeff Bonwick
619.%T "The Slab Allocator: An Object-Caching Kernel Memory Allocator"
620.%D 1994
621.Re
622.Sh HISTORY
623The zone allocator first appeared in
624.Fx 3.0 .
625It was radically changed in
626.Fx 5.0
627to function as a slab allocator.
628.Sh AUTHORS
629.An -nosplit
630The zone allocator was written by
631.An John S. Dyson .
632The zone allocator was rewritten in large parts by
633.An Jeff Roberson Aq Mt jeff@FreeBSD.org
634to function as a slab allocator.
635.Pp
636This manual page was written by
637.An Dag-Erling Sm\(/orgrav Aq Mt des@FreeBSD.org .
638Changes for UMA by
639.An Jeroen Ruigrok van der Werven Aq Mt asmodai@FreeBSD.org .
640