xref: /dragonfly/sys/vm/vm_zone.c (revision cc93b0eb)
1 /*
2  * Copyright (c) 1997, 1998 John S. Dyson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *	notice immediately at the beginning of the file, without modification,
10  *	this list of conditions, and the following disclaimer.
11  * 2. Absolutely no warranty of function or purpose is made by the author
12  *	John S. Dyson.
13  *
14  * $FreeBSD: src/sys/vm/vm_zone.c,v 1.30.2.6 2002/10/10 19:50:16 dillon Exp $
15  * $DragonFly: src/sys/vm/vm_zone.c,v 1.28 2008/01/23 17:35:48 nth Exp $
16  */
17 
18 #include <sys/param.h>
19 #include <sys/queue.h>
20 #include <sys/systm.h>
21 #include <sys/kernel.h>
22 #include <sys/lock.h>
23 #include <sys/malloc.h>
24 #include <sys/sysctl.h>
25 #include <sys/vmmeter.h>
26 
27 #include <vm/vm.h>
28 #include <vm/vm_object.h>
29 #include <vm/vm_page.h>
30 #include <vm/vm_map.h>
31 #include <vm/vm_kern.h>
32 #include <vm/vm_extern.h>
33 #include <vm/vm_zone.h>
34 #include <sys/spinlock2.h>		/* XXX */
35 
36 static MALLOC_DEFINE(M_ZONE, "ZONE", "Zone header");
37 
38 #define	ZONE_ERROR_INVALID 0
39 #define	ZONE_ERROR_NOTFREE 1
40 #define	ZONE_ERROR_ALREADYFREE 2
41 
42 #define	ZONE_ROUNDING	32
43 
44 #define	ZENTRY_FREE	0x12342378
45 
46 static void *zget(vm_zone_t z);
47 
48 /*
49  * Return an item from the specified zone.   This function is non-blocking for
50  * ZONE_INTERRUPT zones.
51  */
52 void *
53 zalloc(vm_zone_t z)
54 {
55 	void *item;
56 
57 #ifdef INVARIANTS
58 	if (z == NULL)
59 		zerror(ZONE_ERROR_INVALID);
60 #endif
61 	spin_lock_wr(&z->zlock);
62 	if (z->zfreecnt > z->zfreemin) {
63 		item = z->zitems;
64 #ifdef INVARIANTS
65 		KASSERT(item != NULL, ("zitems unexpectedly NULL"));
66 		if (((void **) item)[1] != (void *) ZENTRY_FREE)
67 			zerror(ZONE_ERROR_NOTFREE);
68 		((void **) item)[1] = 0;
69 #endif
70 		z->zitems = ((void **) item)[0];
71 		z->zfreecnt--;
72 		z->znalloc++;
73 		spin_unlock_wr(&z->zlock);
74 	} else {
75 		spin_unlock_wr(&z->zlock);
76 		item = zget(z);
77 		/*
78 		 * PANICFAIL allows the caller to assume that the zalloc()
79 		 * will always succeed.  If it doesn't, we panic here.
80 		 */
81 		if (item == NULL && (z->zflags & ZONE_PANICFAIL))
82 			panic("zalloc(%s) failed", z->zname);
83 	}
84 	return item;
85 }
86 
87 /*
88  * Free an item to the specified zone.
89  */
90 void
91 zfree(vm_zone_t z, void *item)
92 {
93 
94 	spin_lock_wr(&z->zlock);
95 	((void **) item)[0] = z->zitems;
96 #ifdef INVARIANTS
97 	if (((void **) item)[1] == (void *) ZENTRY_FREE)
98 		zerror(ZONE_ERROR_ALREADYFREE);
99 	((void **) item)[1] = (void *) ZENTRY_FREE;
100 #endif
101 	z->zitems = item;
102 	z->zfreecnt++;
103 	spin_unlock_wr(&z->zlock);
104 }
105 
106 /*
107  * This file comprises a very simple zone allocator.  This is used
108  * in lieu of the malloc allocator, where needed or more optimal.
109  *
110  * Note that the initial implementation of this had coloring, and
111  * absolutely no improvement (actually perf degradation) occurred.
112  *
113  * Note also that the zones are type stable.  The only restriction is
114  * that the first two longwords of a data structure can be changed
115  * between allocations.  Any data that must be stable between allocations
116  * must reside in areas after the first two longwords.
117  *
118  * zinitna, zinit, zbootinit are the initialization routines.
119  * zalloc, zfree, are the allocation/free routines.
120  */
121 
122 LIST_HEAD(zlist, vm_zone) zlist = LIST_HEAD_INITIALIZER(zlist);
123 static int sysctl_vm_zone(SYSCTL_HANDLER_ARGS);
124 static int zone_kmem_pages, zone_kern_pages, zone_kmem_kvaspace;
125 
126 /*
127  * Create a zone, but don't allocate the zone structure.  If the
128  * zone had been previously created by the zone boot code, initialize
129  * various parts of the zone code.
130  *
131  * If waits are not allowed during allocation (e.g. during interrupt
132  * code), a-priori allocate the kernel virtual space, and allocate
133  * only pages when needed.
134  *
135  * Arguments:
136  * z		pointer to zone structure.
137  * obj		pointer to VM object (opt).
138  * name		name of zone.
139  * size		size of zone entries.
140  * nentries	number of zone entries allocated (only ZONE_INTERRUPT.)
141  * flags	ZONE_INTERRUPT -- items can be allocated at interrupt time.
142  * zalloc	number of pages allocated when memory is needed.
143  *
144  * Note that when using ZONE_INTERRUPT, the size of the zone is limited
145  * by the nentries argument.  The size of the memory allocatable is
146  * unlimited if ZONE_INTERRUPT is not set.
147  *
148  */
149 int
150 zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
151 	int nentries, int flags, int zalloc)
152 {
153 	int totsize;
154 
155 	/*
156 	 * Only zones created with zinit() are destroyable.
157 	 */
158 	if (z->zflags & ZONE_DESTROYABLE)
159 		panic("zinitna: can't create destroyable zone");
160 
161 	if ((z->zflags & ZONE_BOOT) == 0) {
162 		z->zsize = (size + ZONE_ROUNDING - 1) & ~(ZONE_ROUNDING - 1);
163 		spin_init(&z->zlock);
164 		z->zfreecnt = 0;
165 		z->ztotal = 0;
166 		z->zmax = 0;
167 		z->zname = name;
168 		z->znalloc = 0;
169 		z->zitems = NULL;
170 
171 		LIST_INSERT_HEAD(&zlist, z, zlink);
172 	}
173 
174 	z->zkmvec = NULL;
175 	z->zkmcur = z->zkmmax = 0;
176 	z->zflags |= flags;
177 
178 	/*
179 	 * If we cannot wait, allocate KVA space up front, and we will fill
180 	 * in pages as needed.  This is particularly required when creating
181 	 * an allocation space for map entries in kernel_map, because we
182 	 * do not want to go into a recursion deadlock with
183 	 * vm_map_entry_reserve().
184 	 */
185 	if (z->zflags & ZONE_INTERRUPT) {
186 		totsize = round_page(z->zsize * nentries);
187 		zone_kmem_kvaspace += totsize;
188 
189 		z->zkva = kmem_alloc_pageable(&kernel_map, totsize);
190 		if (z->zkva == 0) {
191 			LIST_REMOVE(z, zlink);
192 			return 0;
193 		}
194 
195 		z->zpagemax = totsize / PAGE_SIZE;
196 		if (obj == NULL) {
197 			z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax);
198 		} else {
199 			z->zobj = obj;
200 			_vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj);
201 		}
202 		z->zallocflag = VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT;
203 		z->zmax += nentries;
204 	} else {
205 		z->zallocflag = VM_ALLOC_NORMAL | VM_ALLOC_SYSTEM;
206 		z->zmax = 0;
207 	}
208 
209 
210 	if (z->zsize > PAGE_SIZE)
211 		z->zfreemin = 1;
212 	else
213 		z->zfreemin = PAGE_SIZE / z->zsize;
214 
215 	z->zpagecount = 0;
216 	if (zalloc)
217 		z->zalloc = zalloc;
218 	else
219 		z->zalloc = 1;
220 
221 	/*
222 	 * Populate the interrrupt zone at creation time rather than
223 	 * on first allocation, as this is a potentially long operation.
224 	 */
225 	if (z->zflags & ZONE_INTERRUPT) {
226 		void *buf;
227 
228 		buf = zget(z);
229 		zfree(z, buf);
230 	}
231 
232 	return 1;
233 }
234 
235 /*
236  * Subroutine same as zinitna, except zone data structure is allocated
237  * automatically by malloc.  This routine should normally be used, except
238  * in certain tricky startup conditions in the VM system -- then
239  * zbootinit and zinitna can be used.  Zinit is the standard zone
240  * initialization call.
241  */
242 vm_zone_t
243 zinit(char *name, int size, int nentries, int flags, int zalloc)
244 {
245 	vm_zone_t z;
246 
247 	z = (vm_zone_t) kmalloc(sizeof (struct vm_zone), M_ZONE, M_NOWAIT);
248 	if (z == NULL)
249 		return NULL;
250 
251 	z->zflags = 0;
252 	if (zinitna(z, NULL, name, size, nentries,
253 	            flags & ~ZONE_DESTROYABLE, zalloc) == 0) {
254 		kfree(z, M_ZONE);
255 		return NULL;
256 	}
257 
258 	if (flags & ZONE_DESTROYABLE)
259 		z->zflags |= ZONE_DESTROYABLE;
260 
261 	return z;
262 }
263 
264 /*
265  * Initialize a zone before the system is fully up.  This routine should
266  * only be called before full VM startup.
267  */
268 void
269 zbootinit(vm_zone_t z, char *name, int size, void *item, int nitems)
270 {
271 	int i;
272 
273 	z->zname = name;
274 	z->zsize = size;
275 	z->zpagemax = 0;
276 	z->zobj = NULL;
277 	z->zflags = ZONE_BOOT;
278 	z->zfreemin = 0;
279 	z->zallocflag = 0;
280 	z->zpagecount = 0;
281 	z->zalloc = 0;
282 	z->znalloc = 0;
283 	spin_init(&z->zlock);
284 
285 	bzero(item, nitems * z->zsize);
286 	z->zitems = NULL;
287 	for (i = 0; i < nitems; i++) {
288 		((void **) item)[0] = z->zitems;
289 #ifdef INVARIANTS
290 		((void **) item)[1] = (void *) ZENTRY_FREE;
291 #endif
292 		z->zitems = item;
293 		item = (uint8_t *)item + z->zsize;
294 	}
295 	z->zfreecnt = nitems;
296 	z->zmax = nitems;
297 	z->ztotal = nitems;
298 
299 	LIST_INSERT_HEAD(&zlist, z, zlink);
300 }
301 
302 /*
303  * Release all resources owned by zone created with zinit().
304  */
305 void
306 zdestroy(vm_zone_t z)
307 {
308 	int i;
309 
310 	if (z == NULL)
311 		panic("zdestroy: null zone");
312 	if ((z->zflags & ZONE_DESTROYABLE) == 0)
313 		panic("zdestroy: undestroyable zone");
314 
315 	LIST_REMOVE(z, zlink);
316 
317 	/*
318 	 * Release virtual mappings, physical memory and update sysctl stats.
319 	 */
320 	if (z->zflags & ZONE_INTERRUPT) {
321 		/*
322 		 * Free the mapping.
323 		 */
324 		kmem_free(&kernel_map, z->zkva, z->zpagemax*PAGE_SIZE);
325 		atomic_subtract_int(&zone_kmem_kvaspace, z->zpagemax*PAGE_SIZE);
326 		/*
327 		 * Free the backing object and physical pages.
328 		 */
329 		vm_object_deallocate(z->zobj);
330 		atomic_subtract_int(&zone_kmem_pages, z->zpagecount);
331 	} else {
332 		for (i=0; i < z->zkmcur; i++) {
333 			kmem_free(&kernel_map, z->zkmvec[i],
334 			    z->zalloc*PAGE_SIZE);
335 			atomic_subtract_int(&zone_kern_pages, z->zalloc);
336 		}
337 		if (z->zkmvec != NULL)
338 			kfree(z->zkmvec, M_ZONE);
339 	}
340 
341 	spin_uninit(&z->zlock);
342 	kfree(z, M_ZONE);
343 }
344 
345 
346 /*
347  * void *zalloc(vm_zone_t zone) --
348  *	Returns an item from a specified zone.  May not be called from a
349  *	FAST interrupt or IPI function.
350  *
351  * void zfree(vm_zone_t zone, void *item) --
352  *	Frees an item back to a specified zone.  May not be called from a
353  *	FAST interrupt or IPI function.
354  */
355 
356 /*
357  * Internal zone routine.  Not to be called from external (non vm_zone) code.
358  */
359 static void *
360 zget(vm_zone_t z)
361 {
362 	int i;
363 	vm_page_t m;
364 	int nitems, nbytes;
365 	void *item;
366 
367 	if (z == NULL)
368 		panic("zget: null zone");
369 
370 	if (z->zflags & ZONE_INTERRUPT) {
371 		/*
372 		 * Interrupt zones do not mess with the kernel_map, they
373 		 * simply populate an existing mapping.
374 		 */
375 		nbytes = z->zpagecount * PAGE_SIZE;
376 		nbytes -= nbytes % z->zsize;
377 		item = (char *) z->zkva + nbytes;
378 		for (i = 0; ((i < z->zalloc) && (z->zpagecount < z->zpagemax));
379 		     i++) {
380 			vm_offset_t zkva;
381 
382 			m = vm_page_alloc(z->zobj, z->zpagecount,
383 					  z->zallocflag);
384 			/* note: z might be modified due to blocking */
385 			if (m == NULL)
386 				break;
387 
388 			/*
389 			 * Unbusy page so it can freed in zdestroy().  Make
390 			 * sure it is not on any queue and so can not be
391 			 * recycled under our feet.
392 			 */
393 			KKASSERT(m->queue == PQ_NONE);
394 			vm_page_flag_clear(m, PG_BUSY);
395 
396 			zkva = z->zkva + z->zpagecount * PAGE_SIZE;
397 			pmap_kenter(zkva, VM_PAGE_TO_PHYS(m)); /* YYY */
398 			bzero((void *)zkva, PAGE_SIZE);
399 			z->zpagecount++;
400 			zone_kmem_pages++;
401 			vmstats.v_wire_count++;
402 		}
403 		nitems = ((z->zpagecount * PAGE_SIZE) - nbytes) / z->zsize;
404 	} else if (z->zflags & ZONE_SPECIAL) {
405 		/*
406 		 * The special zone is the one used for vm_map_entry_t's.
407 		 * We have to avoid an infinite recursion in
408 		 * vm_map_entry_reserve() by using vm_map_entry_kreserve()
409 		 * instead.  The map entries are pre-reserved by the kernel
410 		 * by vm_map_entry_reserve_cpu_init().
411 		 */
412 		nbytes = z->zalloc * PAGE_SIZE;
413 
414 		item = (void *)kmem_alloc3(&kernel_map, nbytes, KM_KRESERVE);
415 
416 		/* note: z might be modified due to blocking */
417 		if (item != NULL) {
418 			zone_kern_pages += z->zalloc;	/* not MP-safe XXX */
419 			bzero(item, nbytes);
420 		} else {
421 			nbytes = 0;
422 		}
423 		nitems = nbytes / z->zsize;
424 	} else {
425 		/*
426 		 * Otherwise allocate KVA from the kernel_map.
427 		 */
428 		nbytes = z->zalloc * PAGE_SIZE;
429 
430 		item = (void *)kmem_alloc3(&kernel_map, nbytes, 0);
431 
432 		/* note: z might be modified due to blocking */
433 		if (item != NULL) {
434 			zone_kern_pages += z->zalloc;	/* not MP-safe XXX */
435 			bzero(item, nbytes);
436 
437 			if (z->zflags & ZONE_DESTROYABLE) {
438 				if (z->zkmcur == z->zkmmax) {
439 					z->zkmmax =
440 						z->zkmmax==0 ? 1 : z->zkmmax*2;
441 					z->zkmvec = krealloc(z->zkmvec,
442 					    z->zkmmax * sizeof(z->zkmvec[0]),
443 					    M_ZONE, M_WAITOK);
444 				}
445 				z->zkmvec[z->zkmcur++] = (vm_offset_t)item;
446 			}
447 		} else {
448 			nbytes = 0;
449 		}
450 		nitems = nbytes / z->zsize;
451 	}
452 
453 	spin_lock_wr(&z->zlock);
454 	z->ztotal += nitems;
455 	/*
456 	 * Save one for immediate allocation
457 	 */
458 	if (nitems != 0) {
459 		nitems -= 1;
460 		for (i = 0; i < nitems; i++) {
461 			((void **) item)[0] = z->zitems;
462 #ifdef INVARIANTS
463 			((void **) item)[1] = (void *) ZENTRY_FREE;
464 #endif
465 			z->zitems = item;
466 			item = (uint8_t *)item + z->zsize;
467 		}
468 		z->zfreecnt += nitems;
469 		z->znalloc++;
470 	} else if (z->zfreecnt > 0) {
471 		item = z->zitems;
472 		z->zitems = ((void **) item)[0];
473 #ifdef INVARIANTS
474 		if (((void **) item)[1] != (void *) ZENTRY_FREE)
475 			zerror(ZONE_ERROR_NOTFREE);
476 		((void **) item)[1] = 0;
477 #endif
478 		z->zfreecnt--;
479 		z->znalloc++;
480 	} else {
481 		item = NULL;
482 	}
483 	spin_unlock_wr(&z->zlock);
484 
485 	/*
486 	 * A special zone may have used a kernel-reserved vm_map_entry.  If
487 	 * so we have to be sure to recover our reserve so we don't run out.
488 	 * We will panic if we run out.
489 	 */
490 	if (z->zflags & ZONE_SPECIAL)
491 		vm_map_entry_reserve(0);
492 
493 	return item;
494 }
495 
496 static int
497 sysctl_vm_zone(SYSCTL_HANDLER_ARGS)
498 {
499 	int error=0;
500 	vm_zone_t curzone;
501 	char tmpbuf[128];
502 	char tmpname[14];
503 
504 	ksnprintf(tmpbuf, sizeof(tmpbuf),
505 	    "\nITEM            SIZE     LIMIT    USED    FREE  REQUESTS\n");
506 	error = SYSCTL_OUT(req, tmpbuf, strlen(tmpbuf));
507 	if (error)
508 		return (error);
509 
510 	LIST_FOREACH(curzone, &zlist, zlink) {
511 		int i;
512 		int len;
513 		int offset;
514 
515 		len = strlen(curzone->zname);
516 		if (len >= (sizeof(tmpname) - 1))
517 			len = (sizeof(tmpname) - 1);
518 		for(i = 0; i < sizeof(tmpname) - 1; i++)
519 			tmpname[i] = ' ';
520 		tmpname[i] = 0;
521 		memcpy(tmpname, curzone->zname, len);
522 		tmpname[len] = ':';
523 		offset = 0;
524 		if (curzone == LIST_FIRST(&zlist)) {
525 			offset = 1;
526 			tmpbuf[0] = '\n';
527 		}
528 
529 		ksnprintf(tmpbuf + offset, sizeof(tmpbuf) - offset,
530 			"%s %6.6u, %8.8u, %6.6u, %6.6u, %8.8u\n",
531 			tmpname, curzone->zsize, curzone->zmax,
532 			(curzone->ztotal - curzone->zfreecnt),
533 			curzone->zfreecnt, curzone->znalloc);
534 
535 		len = strlen((char *)tmpbuf);
536 		if (LIST_NEXT(curzone, zlink) == NULL)
537 			tmpbuf[len - 1] = 0;
538 
539 		error = SYSCTL_OUT(req, tmpbuf, len);
540 
541 		if (error)
542 			return (error);
543 	}
544 	return (0);
545 }
546 
547 #if defined(INVARIANTS)
548 void
549 zerror(int error)
550 {
551 	char *msg;
552 
553 	switch (error) {
554 	case ZONE_ERROR_INVALID:
555 		msg = "zone: invalid zone";
556 		break;
557 	case ZONE_ERROR_NOTFREE:
558 		msg = "zone: entry not free";
559 		break;
560 	case ZONE_ERROR_ALREADYFREE:
561 		msg = "zone: freeing free entry";
562 		break;
563 	default:
564 		msg = "zone: invalid error";
565 		break;
566 	}
567 	panic(msg);
568 }
569 #endif
570 
571 SYSCTL_OID(_vm, OID_AUTO, zone, CTLTYPE_STRING|CTLFLAG_RD, \
572 	NULL, 0, sysctl_vm_zone, "A", "Zone Info");
573 
574 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_pages,
575 	CTLFLAG_RD, &zone_kmem_pages, 0, "Number of interrupt safe pages allocated by zone");
576 SYSCTL_INT(_vm, OID_AUTO, zone_kmem_kvaspace,
577 	CTLFLAG_RD, &zone_kmem_kvaspace, 0, "KVA space allocated by zone");
578 SYSCTL_INT(_vm, OID_AUTO, zone_kern_pages,
579 	CTLFLAG_RD, &zone_kern_pages, 0, "Number of non-interrupt safe pages allocated by zone");
580