xref: /openbsd/sys/kern/subr_extent.c (revision baaa8a4a)
1 /*	$OpenBSD: subr_extent.c,v 1.65 2024/01/19 22:12:24 kettenis Exp $	*/
2 /*	$NetBSD: subr_extent.c,v 1.7 1996/11/21 18:46:34 cgd Exp $	*/
3 
4 /*-
5  * Copyright (c) 1996, 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe and Matthias Drochner.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * General purpose extent manager.
35  */
36 
37 #ifdef _KERNEL
38 #include <sys/param.h>
39 #include <sys/extent.h>
40 #include <sys/malloc.h>
41 #include <sys/systm.h>
42 #include <sys/queue.h>
43 #include <sys/pool.h>
44 #include <ddb/db_output.h>
45 #else
46 /*
47  * user-land definitions, so it can fit into a testing harness.
48  */
49 #include <sys/param.h>
50 #include <sys/extent.h>
51 #include <sys/queue.h>
52 #include <errno.h>
53 #include <err.h>
54 #include <stdlib.h>
55 #include <stdio.h>
56 #include <string.h>
57 
58 #define	malloc(s, t, flags)		malloc(s)
59 #define	free(p, t, s)			free(p)
60 
61 #define	tsleep_nsec(c, p, s, t)		(EWOULDBLOCK)
62 #define	wakeup(chan)			((void)0)
63 
64 struct pool {
65 	size_t pr_size;
66 };
67 
68 #define	pool_init(a, b, c, d, e, f, g)	do { (a)->pr_size = (b); } while (0)
69 #define	pool_get(pp, flags)		malloc((pp)->pr_size, 0, 0)
70 #define	pool_put(pp, rp)		free((rp), 0, 0)
71 
72 #define	panic(...)		do { warnx(__VA_ARGS__); abort(); } while (0)
73 #endif
74 
75 #if defined(DIAGNOSTIC) || defined(DDB)
76 void	extent_print1(struct extent *, int (*)(const char *, ...)
77 	    __attribute__((__format__(__kprintf__,1,2))));
78 #endif
79 
80 static	void extent_insert_and_optimize(struct extent *, u_long, u_long,
81 	    struct extent_region *, struct extent_region *);
82 static	struct extent_region *extent_alloc_region_descriptor(struct extent *, int);
83 static	void extent_free_region_descriptor(struct extent *,
84 	    struct extent_region *);
85 int	extent_do_alloc(struct extent *, u_long, u_long, u_long, u_long,
86 	    u_long, u_long, int, struct extent_region *, u_long *);
87 
88 /*
89  * Shortcut to align to an arbitrary power-of-two boundary.
90  */
91 static __inline__ u_long
extent_align(u_long start,u_long align,u_long skew)92 extent_align(u_long start, u_long align, u_long skew)
93 {
94 	return ((((start - skew) + (align - 1)) & (-align)) + skew);
95 }
96 
97 
98 #if defined(DIAGNOSTIC) || defined(DDB)
99 /*
100  * Register the extent on a doubly linked list.
101  * Should work, no?
102  */
103 static LIST_HEAD(listhead, extent) ext_list;
104 static	void extent_register(struct extent *);
105 
106 static void
extent_register(struct extent * ex)107 extent_register(struct extent *ex)
108 {
109 #ifdef DIAGNOSTIC
110 	struct extent *ep;
111 #endif
112 	static int initialized;
113 
114 	if (!initialized){
115 		LIST_INIT(&ext_list);
116 		initialized = 1;
117 	}
118 
119 #ifdef DIAGNOSTIC
120 	LIST_FOREACH(ep, &ext_list, ex_link) {
121 		if (ep == ex)
122 			panic("%s: already registered", __func__);
123 	}
124 #endif
125 
126 	/* Insert into list */
127 	LIST_INSERT_HEAD(&ext_list, ex, ex_link);
128 }
129 #endif	/* DIAGNOSTIC || DDB */
130 
131 struct pool ex_region_pl;
132 
133 static void
extent_pool_init(void)134 extent_pool_init(void)
135 {
136 	static int inited;
137 
138 	if (!inited) {
139 		pool_init(&ex_region_pl, sizeof(struct extent_region), 0,
140 		    IPL_VM, 0, "extentpl", NULL);
141 		inited = 1;
142 	}
143 }
144 
145 #ifdef DDB
146 /*
147  * Print out all extents registered.  This is used in
148  * DDB show extents
149  */
150 void
extent_print_all(void)151 extent_print_all(void)
152 {
153 	struct extent *ep;
154 
155 	LIST_FOREACH(ep, &ext_list, ex_link) {
156 		extent_print1(ep, db_printf);
157 	}
158 }
159 #endif
160 
161 /*
162  * Allocate and initialize an extent map.
163  */
164 struct extent *
extent_create(char * name,u_long start,u_long end,int mtype,caddr_t storage,size_t storagesize,int flags)165 extent_create(char *name, u_long start, u_long end, int mtype, caddr_t storage,
166     size_t storagesize, int flags)
167 {
168 	struct extent *ex;
169 	caddr_t cp = storage;
170 	size_t sz = storagesize;
171 	struct extent_region *rp;
172 	int fixed_extent = (storage != NULL);
173 
174 #ifdef DIAGNOSTIC
175 	/* Check arguments. */
176 	if (name == NULL)
177 		panic("%s: name == NULL", __func__);
178 	if (end < start) {
179 		printf("%s: extent `%s', start 0x%lx, end 0x%lx\n",
180 		    __func__, name, start, end);
181 		panic("%s: end < start", __func__);
182 	}
183 	if (fixed_extent && (storagesize < sizeof(struct extent_fixed)))
184 		panic("%s: fixed extent, bad storagesize 0x%lx",
185 		    __func__, (u_long)storagesize);
186 	if (fixed_extent == 0 && (storagesize != 0 || storage != NULL))
187 		panic("%s: storage provided for non-fixed", __func__);
188 #endif
189 
190 	extent_pool_init();
191 
192 	/* Allocate extent descriptor. */
193 	if (fixed_extent) {
194 		struct extent_fixed *fex;
195 
196 		memset(storage, 0, storagesize);
197 
198 		/*
199 		 * Align all descriptors on "long" boundaries.
200 		 */
201 		fex = (struct extent_fixed *)cp;
202 		ex = (struct extent *)fex;
203 		cp += ALIGN(sizeof(struct extent_fixed));
204 		sz -= ALIGN(sizeof(struct extent_fixed));
205 		fex->fex_storage = storage;
206 		fex->fex_storagesize = storagesize;
207 
208 		/*
209 		 * In a fixed extent, we have to pre-allocate region
210 		 * descriptors and place them in the extent's freelist.
211 		 */
212 		LIST_INIT(&fex->fex_freelist);
213 		while (sz >= ALIGN(sizeof(struct extent_region))) {
214 			rp = (struct extent_region *)cp;
215 			cp += ALIGN(sizeof(struct extent_region));
216 			sz -= ALIGN(sizeof(struct extent_region));
217 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
218 		}
219 	} else {
220 		ex = (struct extent *)malloc(sizeof(struct extent),
221 		    mtype, (flags & EX_WAITOK) ? M_WAITOK : M_NOWAIT);
222 		if (ex == NULL)
223 			return (NULL);
224 	}
225 
226 	/* Fill in the extent descriptor and return it to the caller. */
227 	LIST_INIT(&ex->ex_regions);
228 	ex->ex_name = name;
229 	ex->ex_start = start;
230 	ex->ex_end = end;
231 	ex->ex_mtype = mtype;
232 	ex->ex_flags = 0;
233 	if (fixed_extent)
234 		ex->ex_flags |= EXF_FIXED;
235 	if (flags & EX_NOCOALESCE)
236 		ex->ex_flags |= EXF_NOCOALESCE;
237 
238 	if (flags & EX_FILLED) {
239 		rp = extent_alloc_region_descriptor(ex, flags);
240 		if (rp == NULL) {
241 			if (!fixed_extent)
242 				free(ex, mtype, sizeof(struct extent));
243 			return (NULL);
244 		}
245 		rp->er_start = start;
246 		rp->er_end = end;
247 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
248 	}
249 
250 #if defined(DIAGNOSTIC) || defined(DDB)
251 	extent_register(ex);
252 #endif
253 	return (ex);
254 }
255 
256 /*
257  * Destroy an extent map.
258  */
259 void
extent_destroy(struct extent * ex)260 extent_destroy(struct extent *ex)
261 {
262 	struct extent_region *rp, *orp;
263 
264 #ifdef DIAGNOSTIC
265 	/* Check arguments. */
266 	if (ex == NULL)
267 		panic("%s: NULL extent", __func__);
268 #endif
269 
270 	/* Free all region descriptors in extent. */
271 	for (rp = LIST_FIRST(&ex->ex_regions); rp != NULL; ) {
272 		orp = rp;
273 		rp = LIST_NEXT(rp, er_link);
274 		LIST_REMOVE(orp, er_link);
275 		extent_free_region_descriptor(ex, orp);
276 	}
277 
278 #if defined(DIAGNOSTIC) || defined(DDB)
279 	/* Remove from the list of all extents. */
280 	LIST_REMOVE(ex, ex_link);
281 #endif
282 
283 	/* If we're not a fixed extent, free the extent descriptor itself. */
284 	if ((ex->ex_flags & EXF_FIXED) == 0)
285 		free(ex, ex->ex_mtype, sizeof(*ex));
286 }
287 
288 /*
289  * Insert a region descriptor into the sorted region list after the
290  * entry "after" or at the head of the list (if "after" is NULL).
291  * The region descriptor we insert is passed in "rp".  We must
292  * allocate the region descriptor before calling this function!
293  * If we don't need the region descriptor, it will be freed here.
294  */
295 static void
extent_insert_and_optimize(struct extent * ex,u_long start,u_long size,struct extent_region * after,struct extent_region * rp)296 extent_insert_and_optimize(struct extent *ex, u_long start, u_long size,
297     struct extent_region *after, struct extent_region *rp)
298 {
299 	struct extent_region *nextr;
300 	int appended = 0;
301 
302 	if (after == NULL) {
303 		/*
304 		 * We're the first in the region list.  If there's
305 		 * a region after us, attempt to coalesce to save
306 		 * descriptor overhead.
307 		 */
308 		if (((ex->ex_flags & EXF_NOCOALESCE) == 0) &&
309 		    !LIST_EMPTY(&ex->ex_regions) &&
310 		    ((start + size) == LIST_FIRST(&ex->ex_regions)->er_start)) {
311 			/*
312 			 * We can coalesce.  Prepend us to the first region.
313 			 */
314 			LIST_FIRST(&ex->ex_regions)->er_start = start;
315 			extent_free_region_descriptor(ex, rp);
316 			return;
317 		}
318 
319 		/*
320 		 * Can't coalesce.  Fill in the region descriptor
321 		 * in, and insert us at the head of the region list.
322 		 */
323 		rp->er_start = start;
324 		rp->er_end = start + (size - 1);
325 		LIST_INSERT_HEAD(&ex->ex_regions, rp, er_link);
326 		return;
327 	}
328 
329 	/*
330 	 * If EXF_NOCOALESCE is set, coalescing is disallowed.
331 	 */
332 	if (ex->ex_flags & EXF_NOCOALESCE)
333 		goto cant_coalesce;
334 
335 	/*
336 	 * Attempt to coalesce with the region before us.
337 	 */
338 	if ((after->er_end + 1) == start) {
339 		/*
340 		 * We can coalesce.  Append ourselves and make
341 		 * note of it.
342 		 */
343 		after->er_end = start + (size - 1);
344 		appended = 1;
345 	}
346 
347 	/*
348 	 * Attempt to coalesce with the region after us.
349 	 */
350 	if (LIST_NEXT(after, er_link) != NULL &&
351 	    ((start + size) == LIST_NEXT(after, er_link)->er_start)) {
352 		/*
353 		 * We can coalesce.  Note that if we appended ourselves
354 		 * to the previous region, we exactly fit the gap, and
355 		 * can free the "next" region descriptor.
356 		 */
357 		if (appended) {
358 			/*
359 			 * Yup, we can free it up.
360 			 */
361 			after->er_end = LIST_NEXT(after, er_link)->er_end;
362 			nextr = LIST_NEXT(after, er_link);
363 			LIST_REMOVE(nextr, er_link);
364 			extent_free_region_descriptor(ex, nextr);
365 		} else {
366 			/*
367 			 * Nope, just prepend us to the next region.
368 			 */
369 			LIST_NEXT(after, er_link)->er_start = start;
370 		}
371 
372 		extent_free_region_descriptor(ex, rp);
373 		return;
374 	}
375 
376 	/*
377 	 * We weren't able to coalesce with the next region, but
378 	 * we don't need to allocate a region descriptor if we
379 	 * appended ourselves to the previous region.
380 	 */
381 	if (appended) {
382 		extent_free_region_descriptor(ex, rp);
383 		return;
384 	}
385 
386  cant_coalesce:
387 
388 	/*
389 	 * Fill in the region descriptor and insert ourselves
390 	 * into the region list.
391 	 */
392 	rp->er_start = start;
393 	rp->er_end = start + (size - 1);
394 	LIST_INSERT_AFTER(after, rp, er_link);
395 }
396 
397 /*
398  * Allocate a specific region in an extent map.
399  */
400 int
extent_do_alloc_region(struct extent * ex,u_long start,u_long size,int flags,struct extent_region * myrp)401 extent_do_alloc_region(struct extent *ex, u_long start, u_long size,
402     int flags, struct extent_region *myrp)
403 {
404 	struct extent_region *rp, *last;
405 	u_long end = start + (size - 1);
406 	int error;
407 
408 #ifdef DIAGNOSTIC
409 	/* Check arguments. */
410 	if (ex == NULL)
411 		panic("%s: NULL extent", __func__);
412 	if (size < 1) {
413 		printf("%s: extent `%s', size 0x%lx\n",
414 		    __func__, ex->ex_name, size);
415 		panic("%s: bad size", __func__);
416 	}
417 	if (end < start) {
418 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
419 		    __func__, ex->ex_name, start, size);
420 		panic("%s: overflow", __func__);
421 	}
422 	if ((flags & EX_CONFLICTOK) && (flags & EX_WAITSPACE))
423 		panic("%s: EX_CONFLICTOK and EX_WAITSPACE "
424 		    "are mutually exclusive", __func__);
425 #endif
426 
427 	/*
428 	 * Make sure the requested region lies within the
429 	 * extent.
430 	 */
431 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
432 #ifdef DIAGNOSTIC
433 		printf("%s: extent `%s' (0x%lx - 0x%lx)\n",
434 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
435 		printf("%s: start 0x%lx, end 0x%lx\n",
436 		    __func__, start, end);
437 		panic("%s: region lies outside extent", __func__);
438 #else
439 		extent_free_region_descriptor(ex, myrp);
440 		return (EINVAL);
441 #endif
442 	}
443 
444  alloc_start:
445 	/*
446 	 * Attempt to place ourselves in the desired area of the
447 	 * extent.  We save ourselves some work by keeping the list sorted.
448 	 * In other words, if the start of the current region is greater
449 	 * than the end of our region, we don't have to search any further.
450 	 */
451 
452 	/*
453 	 * Keep a pointer to the last region we looked at so
454 	 * that we don't have to traverse the list again when
455 	 * we insert ourselves.  If "last" is NULL when we
456 	 * finally insert ourselves, we go at the head of the
457 	 * list.  See extent_insert_and_optimize() for details.
458 	 */
459 	last = NULL;
460 
461 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
462 		if (rp->er_start > end) {
463 			/*
464 			 * We lie before this region and don't
465 			 * conflict.
466 			 */
467 			break;
468 		}
469 
470 		/*
471 		 * The current region begins before we end.
472 		 * Check for a conflict.
473 		 */
474 		if (rp->er_end >= start) {
475 			/*
476 			 * We conflict.  If we can (and want to) wait,
477 			 * do so.
478 			 */
479 			if (flags & EX_WAITSPACE) {
480 				ex->ex_flags |= EXF_WANTED;
481 				error = tsleep_nsec(ex,
482 				    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
483 				    "extnt", INFSLP);
484 				if (error)
485 					return (error);
486 				goto alloc_start;
487 			}
488 
489 			/*
490 			 * If we tolerate conflicts adjust things such
491 			 * that all space in the requested region is
492 			 * allocated.
493 			 */
494 			if (flags & EX_CONFLICTOK) {
495 				/*
496 				 * There are four possibilities:
497 				 *
498 				 * 1. The current region overlaps with
499 				 *    the start of the requested region.
500 				 *    Adjust the requested region to
501 				 *    start at the end of the current
502 				 *    region and try again.
503 				 *
504 				 * 2. The current region falls
505 				 *    completely within the requested
506 				 *    region.  Free the current region
507 				 *    and try again.
508 				 *
509 				 * 3. The current region overlaps with
510 				 *    the end of the requested region.
511 				 *    Adjust the requested region to
512 				 *    end at the start of the current
513 				 *    region and try again.
514 				 *
515 				 * 4. The requested region falls
516 				 *    completely within the current
517 				 *    region.  We're done.
518 				 */
519 				if (rp->er_start <= start) {
520 					if (rp->er_end < ex->ex_end) {
521 						start = rp->er_end + 1;
522 						size = end - start + 1;
523 						goto alloc_start;
524 					}
525 				} else if (rp->er_end < end) {
526 					LIST_REMOVE(rp, er_link);
527 					extent_free_region_descriptor(ex, rp);
528 					goto alloc_start;
529 				} else if (rp->er_start < end) {
530 					if (rp->er_start > ex->ex_start) {
531 						end = rp->er_start - 1;
532 						size = end - start + 1;
533 						goto alloc_start;
534 					}
535 				}
536 				return (0);
537 			}
538 
539 			extent_free_region_descriptor(ex, myrp);
540 			return (EAGAIN);
541 		}
542 		/*
543 		 * We don't conflict, but this region lies before
544 		 * us.  Keep a pointer to this region, and keep
545 		 * trying.
546 		 */
547 		last = rp;
548 	}
549 
550 	/*
551 	 * We don't conflict with any regions.  "last" points
552 	 * to the region we fall after, or is NULL if we belong
553 	 * at the beginning of the region list.  Insert ourselves.
554 	 */
555 	extent_insert_and_optimize(ex, start, size, last, myrp);
556 	return (0);
557 }
558 
559 int
extent_alloc_region(struct extent * ex,u_long start,u_long size,int flags)560 extent_alloc_region(struct extent *ex, u_long start, u_long size, int flags)
561 {
562 	struct extent_region *rp;
563 
564 	/*
565 	 * Allocate the region descriptor.  It will be freed later
566 	 * if we can coalesce with another region.
567 	 */
568 	rp = extent_alloc_region_descriptor(ex, flags);
569 	if (rp == NULL) {
570 #ifdef DIAGNOSTIC
571 		printf("%s: can't allocate region descriptor\n", __func__);
572 #endif
573 		return (ENOMEM);
574 	}
575 
576 	return extent_do_alloc_region(ex, start, size, flags, rp);
577 }
578 
579 int
extent_alloc_region_with_descr(struct extent * ex,u_long start,u_long size,int flags,struct extent_region * rp)580 extent_alloc_region_with_descr(struct extent *ex, u_long start,
581     u_long size, int flags, struct extent_region *rp)
582 {
583 #ifdef DIAGNOSTIC
584 	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
585 		panic("%s: EX_NOCOALESCE not set", __func__);
586 #endif
587 
588 	rp->er_flags = ER_DISCARD;
589 	return extent_do_alloc_region(ex, start, size, flags, rp);
590 }
591 
592 /*
593  * Macro to check (x + y) <= z.  This check is designed to fail
594  * if an overflow occurs.
595  */
596 #define LE_OV(x, y, z)	((((x) + (y)) >= (x)) && (((x) + (y)) <= (z)))
597 
598 /*
599  * Allocate a region in an extent map subregion.
600  *
601  * If EX_FAST is specified, we return the first fit in the map.
602  * Otherwise, we try to minimize fragmentation by finding the
603  * smallest gap that will hold the request.
604  *
605  * The allocated region is aligned to "alignment", which must be
606  * a power of 2.
607  */
608 int
extent_do_alloc(struct extent * ex,u_long substart,u_long subend,u_long size,u_long alignment,u_long skew,u_long boundary,int flags,struct extent_region * myrp,u_long * result)609 extent_do_alloc(struct extent *ex, u_long substart, u_long subend,
610     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
611     struct extent_region *myrp, u_long *result)
612 {
613 	struct extent_region *rp, *last, *bestlast;
614 	u_long newstart, newend, exend, beststart, bestovh, ovh;
615 	u_long dontcross;
616 	int error;
617 
618 #ifdef DIAGNOSTIC
619 	/* Check arguments. */
620 	if (ex == NULL)
621 		panic("%s: NULL extent", __func__);
622 	if (myrp == NULL)
623 		panic("%s: NULL region descriptor", __func__);
624 	if (result == NULL)
625 		panic("%s: NULL result pointer", __func__);
626 	if ((substart < ex->ex_start) || (substart > ex->ex_end) ||
627 	    (subend > ex->ex_end) || (subend < ex->ex_start)) {
628 		printf("%s: extent `%s', ex_start 0x%lx, ex_end 0x%lx\n",
629 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end);
630 		printf("%s: substart 0x%lx, subend 0x%lx\n",
631 		    __func__, substart, subend);
632 		panic("%s: bad subregion", __func__);
633 	}
634 	if ((size < 1) || ((size - 1) > (subend - substart))) {
635 		printf("%s: extent `%s', size 0x%lx\n",
636 		    __func__, ex->ex_name, size);
637 		panic("%s: bad size", __func__);
638 	}
639 	if (alignment == 0)
640 		panic("%s: bad alignment", __func__);
641 	if (boundary && (boundary < size)) {
642 		printf("%s: extent `%s', size 0x%lx, boundary 0x%lx\n",
643 		    __func__, ex->ex_name, size, boundary);
644 		panic("%s: bad boundary", __func__);
645 	}
646 #endif
647 
648  alloc_start:
649 	/*
650 	 * Keep a pointer to the last region we looked at so
651 	 * that we don't have to traverse the list again when
652 	 * we insert ourselves.  If "last" is NULL when we
653 	 * finally insert ourselves, we go at the head of the
654 	 * list.  See extent_insert_and_optimize() for details.
655 	 */
656 	last = NULL;
657 
658 	/*
659 	 * Keep track of size and location of the smallest
660 	 * chunk we fit in.
661 	 *
662 	 * Since the extent can be as large as the numeric range
663 	 * of the CPU (0 - 0xffffffff for 32-bit systems), the
664 	 * best overhead value can be the maximum unsigned integer.
665 	 * Thus, we initialize "bestovh" to 0, since we insert ourselves
666 	 * into the region list immediately on an exact match (which
667 	 * is the only case where "bestovh" would be set to 0).
668 	 */
669 	bestovh = 0;
670 	beststart = 0;
671 	bestlast = NULL;
672 
673 	/*
674 	 * Keep track of end of free region.  This is either the end of extent
675 	 * or the start of a region past the subend.
676 	 */
677 	exend = ex->ex_end;
678 
679 	/*
680 	 * For N allocated regions, we must make (N + 1)
681 	 * checks for unallocated space.  The first chunk we
682 	 * check is the area from the beginning of the subregion
683 	 * to the first allocated region after that point.
684 	 */
685 	newstart = extent_align(substart, alignment, skew);
686 	if (newstart < ex->ex_start) {
687 #ifdef DIAGNOSTIC
688 		printf("%s: extent `%s' (0x%lx - 0x%lx), alignment 0x%lx\n",
689 		    __func__, ex->ex_name, ex->ex_start, ex->ex_end,
690 		    alignment);
691 		panic("%s: overflow after alignment", __func__);
692 #else
693 		extent_free_region_descriptor(ex, myrp);
694 		return (EINVAL);
695 #endif
696 	}
697 
698 	/*
699 	 * Find the first allocated region that begins on or after
700 	 * the subregion start, advancing the "last" pointer along
701 	 * the way.
702 	 */
703 	LIST_FOREACH(rp, &ex->ex_regions, er_link) {
704 		if (rp->er_start >= newstart)
705 			break;
706 		last = rp;
707 	}
708 
709 	/*
710 	 * Relocate the start of our candidate region to the end of
711 	 * the last allocated region (if there was one overlapping
712 	 * our subrange).
713 	 */
714 	if (last != NULL && last->er_end >= newstart)
715 		newstart = extent_align((last->er_end + 1), alignment, skew);
716 
717 	for (; rp != NULL; rp = LIST_NEXT(rp, er_link)) {
718 		/*
719 		 * If the region pasts the subend, bail out and see
720 		 * if we fit against the subend.
721 		 */
722 		if (rp->er_start > subend) {
723 			exend = rp->er_start;
724 			break;
725 		}
726 
727 		/*
728 		 * Check the chunk before "rp".  Note that our
729 		 * comparison is safe from overflow conditions.
730 		 */
731 		if (LE_OV(newstart, size, rp->er_start)) {
732 			/*
733 			 * Do a boundary check, if necessary.  Note
734 			 * that a region may *begin* on the boundary,
735 			 * but it must end before the boundary.
736 			 */
737 			if (boundary) {
738 				newend = newstart + (size - 1);
739 
740 				/*
741 				 * Calculate the next boundary after the start
742 				 * of this region.
743 				 */
744 				dontcross = extent_align(newstart+1, boundary,
745 				    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
746 				    - 1;
747 
748 #if 0
749 				printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
750 				    newstart, newend, ex->ex_start, ex->ex_end,
751 				    boundary, dontcross);
752 #endif
753 
754 				/* Check for overflow */
755 				if (dontcross < ex->ex_start)
756 					dontcross = ex->ex_end;
757 				else if (newend > dontcross) {
758 					/*
759 					 * Candidate region crosses boundary.
760 					 * Throw away the leading part and see
761 					 * if we still fit.
762 					 */
763 					newstart = dontcross + 1;
764 					newend = newstart + (size - 1);
765 					dontcross += boundary;
766 					if (!LE_OV(newstart, size, rp->er_start))
767 						goto skip;
768 				}
769 
770 				/*
771 				 * If we run past the end of
772 				 * the extent or the boundary
773 				 * overflows, then the request
774 				 * can't fit.
775 				 */
776 				if (newstart + size - 1 > ex->ex_end ||
777 				    dontcross < newstart)
778 					goto fail;
779 			}
780 
781 			/*
782 			 * We would fit into this space.  Calculate
783 			 * the overhead (wasted space).  If we exactly
784 			 * fit, or we're taking the first fit, insert
785 			 * ourselves into the region list.
786 			 */
787 			ovh = rp->er_start - newstart - size;
788 			if ((flags & EX_FAST) || (ovh == 0))
789 				goto found;
790 
791 			/*
792 			 * Don't exactly fit, but check to see
793 			 * if we're better than any current choice.
794 			 */
795 			if ((bestovh == 0) || (ovh < bestovh)) {
796 				bestovh = ovh;
797 				beststart = newstart;
798 				bestlast = last;
799 			}
800 		}
801 
802 skip:
803 		/*
804 		 * Skip past the current region and check again.
805 		 */
806 		newstart = extent_align((rp->er_end + 1), alignment, skew);
807 		if (newstart < rp->er_end) {
808 			/*
809 			 * Overflow condition.  Don't error out, since
810 			 * we might have a chunk of space that we can
811 			 * use.
812 			 */
813 			goto fail;
814 		}
815 
816 		last = rp;
817 	}
818 
819 	/*
820 	 * The final check is from the current starting point to the
821 	 * end of the subregion.  If there were no allocated regions,
822 	 * "newstart" is set to the beginning of the subregion, or
823 	 * just past the end of the last allocated region, adjusted
824 	 * for alignment in either case.
825 	 */
826 	if (LE_OV(newstart, (size - 1), subend)) {
827 		/*
828 		 * Do a boundary check, if necessary.  Note
829 		 * that a region may *begin* on the boundary,
830 		 * but it must end before the boundary.
831 		 */
832 		if (boundary) {
833 			newend = newstart + (size - 1);
834 
835 			/*
836 			 * Calculate the next boundary after the start
837 			 * of this region.
838 			 */
839 			dontcross = extent_align(newstart+1, boundary,
840 			    (flags & EX_BOUNDZERO) ? 0 : ex->ex_start)
841 			    - 1;
842 
843 #if 0
844 			printf("newstart=%lx newend=%lx ex_start=%lx ex_end=%lx boundary=%lx dontcross=%lx\n",
845 			    newstart, newend, ex->ex_start, ex->ex_end,
846 			    boundary, dontcross);
847 #endif
848 
849 			/* Check for overflow */
850 			if (dontcross < ex->ex_start)
851 				dontcross = ex->ex_end;
852 			else if (newend > dontcross) {
853 				/*
854 				 * Candidate region crosses boundary.
855 				 * Throw away the leading part and see
856 				 * if we still fit.
857 				 */
858 				newstart = dontcross + 1;
859 				newend = newstart + (size - 1);
860 				dontcross += boundary;
861 				if (!LE_OV(newstart, (size - 1), subend))
862 					goto fail;
863 			}
864 
865 			/*
866 			 * If we run past the end of
867 			 * the extent or the boundary
868 			 * overflows, then the request
869 			 * can't fit.
870 			 */
871 			if (newstart + size - 1 > ex->ex_end ||
872 			    dontcross < newstart)
873 				goto fail;
874 		}
875 
876 		/*
877 		 * We would fit into this space.  Calculate
878 		 * the overhead (wasted space).  If we exactly
879 		 * fit, or we're taking the first fit, insert
880 		 * ourselves into the region list.
881 		 */
882 		ovh = exend - newstart - (size - 1);
883 		if ((flags & EX_FAST) || (ovh == 0))
884 			goto found;
885 
886 		/*
887 		 * Don't exactly fit, but check to see
888 		 * if we're better than any current choice.
889 		 */
890 		if ((bestovh == 0) || (ovh < bestovh)) {
891 			bestovh = ovh;
892 			beststart = newstart;
893 			bestlast = last;
894 		}
895 	}
896 
897  fail:
898 	/*
899 	 * One of the following two conditions have
900 	 * occurred:
901 	 *
902 	 *	There is no chunk large enough to hold the request.
903 	 *
904 	 *	If EX_FAST was not specified, there is not an
905 	 *	exact match for the request.
906 	 *
907 	 * Note that if we reach this point and EX_FAST is
908 	 * set, then we know there is no space in the extent for
909 	 * the request.
910 	 */
911 	if (((flags & EX_FAST) == 0) && (bestovh != 0)) {
912 		/*
913 		 * We have a match that's "good enough".
914 		 */
915 		newstart = beststart;
916 		last = bestlast;
917 		goto found;
918 	}
919 
920 	/*
921 	 * No space currently available.  Wait for it to free up,
922 	 * if possible.
923 	 */
924 	if (flags & EX_WAITSPACE) {
925 		ex->ex_flags |= EXF_WANTED;
926 		error = tsleep_nsec(ex,
927 		    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
928 		    "extnt", INFSLP);
929 		if (error)
930 			return (error);
931 		goto alloc_start;
932 	}
933 
934 	extent_free_region_descriptor(ex, myrp);
935 	return (EAGAIN);
936 
937  found:
938 	/*
939 	 * Insert ourselves into the region list.
940 	 */
941 	extent_insert_and_optimize(ex, newstart, size, last, myrp);
942 	*result = newstart;
943 	return (0);
944 }
945 
946 int
extent_alloc_subregion(struct extent * ex,u_long substart,u_long subend,u_long size,u_long alignment,u_long skew,u_long boundary,int flags,u_long * result)947 extent_alloc_subregion(struct extent *ex, u_long substart, u_long subend,
948     u_long size, u_long alignment, u_long skew, u_long boundary, int flags,
949     u_long *result)
950 {
951 	struct extent_region *rp;
952 
953 	/*
954 	 * Allocate the region descriptor.  It will be freed later
955 	 * if we can coalesce with another region.
956 	 */
957 	rp = extent_alloc_region_descriptor(ex, flags);
958 	if (rp == NULL) {
959 #ifdef DIAGNOSTIC
960 		printf("%s: can't allocate region descriptor\n", __func__);
961 #endif
962 		return (ENOMEM);
963 	}
964 
965 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
966 	    boundary, flags, rp, result);
967 }
968 
969 int
extent_alloc_subregion_with_descr(struct extent * ex,u_long substart,u_long subend,u_long size,u_long alignment,u_long skew,u_long boundary,int flags,struct extent_region * rp,u_long * result)970 extent_alloc_subregion_with_descr(struct extent *ex, u_long substart,
971     u_long subend, u_long size, u_long alignment, u_long skew,
972     u_long boundary, int flags, struct extent_region *rp, u_long *result)
973 {
974 #ifdef DIAGNOSTIC
975 	if ((ex->ex_flags & EXF_NOCOALESCE) == 0)
976 		panic("%s: EX_NOCOALESCE not set", __func__);
977 #endif
978 
979 	rp->er_flags = ER_DISCARD;
980 	return extent_do_alloc(ex, substart, subend, size, alignment, skew,
981 	    boundary, flags, rp, result);
982 }
983 
984 int
extent_free(struct extent * ex,u_long start,u_long size,int flags)985 extent_free(struct extent *ex, u_long start, u_long size, int flags)
986 {
987 	struct extent_region *rp, *nrp = NULL;
988 	struct extent_region *tmp;
989 	u_long end = start + (size - 1);
990 	int exflags;
991 	int error = 0;
992 
993 #ifdef DIAGNOSTIC
994 	/* Check arguments. */
995 	if (ex == NULL)
996 		panic("%s: NULL extent", __func__);
997 	if ((start < ex->ex_start) || (end > ex->ex_end)) {
998 		extent_print(ex);
999 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
1000 		    __func__, ex->ex_name, start, size);
1001 		panic("%s: extent `%s', region not within extent",
1002 		    __func__, ex->ex_name);
1003 	}
1004 	/* Check for an overflow. */
1005 	if (end < start) {
1006 		extent_print(ex);
1007 		printf("%s: extent `%s', start 0x%lx, size 0x%lx\n",
1008 		    __func__, ex->ex_name, start, size);
1009 		panic("%s: overflow", __func__);
1010 	}
1011 #endif
1012 
1013 	/*
1014 	 * If we're allowing coalescing, we must allocate a region
1015 	 * descriptor now, since it might block.
1016 	 *
1017 	 * XXX Make a static, create-time flags word, so we don't
1018 	 * XXX have to lock to read it!
1019 	 */
1020 	exflags = ex->ex_flags;
1021 
1022 	if ((exflags & EXF_NOCOALESCE) == 0) {
1023 		/* Allocate a region descriptor. */
1024 		nrp = extent_alloc_region_descriptor(ex, flags);
1025 		if (nrp == NULL)
1026 			return (ENOMEM);
1027 	}
1028 
1029 	/*
1030 	 * Find region and deallocate.  Several possibilities:
1031 	 *
1032 	 *	1. (start == er_start) && (end == er_end):
1033 	 *	   Free descriptor.
1034 	 *
1035 	 *	2. (start == er_start) && (end < er_end):
1036 	 *	   Adjust er_start.
1037 	 *
1038 	 *	3. (start > er_start) && (end == er_end):
1039 	 *	   Adjust er_end.
1040 	 *
1041 	 *	4. (start > er_start) && (end < er_end):
1042 	 *	   Fragment region.  Requires descriptor alloc.
1043 	 *
1044 	 * Cases 2, 3, and 4 require that the EXF_NOCOALESCE flag
1045 	 * is not set.
1046 	 *
1047 	 * If the EX_CONFLICTOK flag is set, partially overlapping
1048 	 * regions are allowed.  This is handled in cases 1a, 2a and
1049 	 * 3a below.
1050 	 */
1051 	LIST_FOREACH_SAFE(rp, &ex->ex_regions, er_link, tmp) {
1052 		/*
1053 		 * Save ourselves some comparisons; does the current
1054 		 * region end before chunk to be freed begins?  If so,
1055 		 * then we haven't found the appropriate region descriptor.
1056 		 */
1057 		if (rp->er_end < start)
1058 			continue;
1059 
1060 		/*
1061 		 * Save ourselves some traversal; does the current
1062 		 * region begin after the chunk to be freed ends?  If so,
1063 		 * then we've already passed any possible region descriptors
1064 		 * that might have contained the chunk to be freed.
1065 		 */
1066 		if (rp->er_start > end)
1067 			break;
1068 
1069 		/* Case 1. */
1070 		if ((start == rp->er_start) && (end == rp->er_end)) {
1071 			LIST_REMOVE(rp, er_link);
1072 			extent_free_region_descriptor(ex, rp);
1073 			goto done;
1074 		}
1075 
1076 		/*
1077 		 * The following cases all require that EXF_NOCOALESCE
1078 		 * is not set.
1079 		 */
1080 		if (ex->ex_flags & EXF_NOCOALESCE)
1081 			continue;
1082 
1083 		/* Case 2. */
1084 		if ((start == rp->er_start) && (end < rp->er_end)) {
1085 			rp->er_start = (end + 1);
1086 			goto done;
1087 		}
1088 
1089 		/* Case 3. */
1090 		if ((start > rp->er_start) && (end == rp->er_end)) {
1091 			rp->er_end = (start - 1);
1092 			goto done;
1093 		}
1094 
1095 		/* Case 4. */
1096 		if ((start > rp->er_start) && (end < rp->er_end)) {
1097 			/* Fill in new descriptor. */
1098 			nrp->er_start = end + 1;
1099 			nrp->er_end = rp->er_end;
1100 
1101 			/* Adjust current descriptor. */
1102 			rp->er_end = start - 1;
1103 
1104 			/* Insert new descriptor after current. */
1105 			LIST_INSERT_AFTER(rp, nrp, er_link);
1106 
1107 			/* We used the new descriptor, so don't free it below */
1108 			nrp = NULL;
1109 			goto done;
1110 		}
1111 
1112 		if ((flags & EX_CONFLICTOK) == 0)
1113 			continue;
1114 
1115 		/* Case 1a. */
1116 		if ((start <= rp->er_start && end >= rp->er_end)) {
1117 			LIST_REMOVE(rp, er_link);
1118 			extent_free_region_descriptor(ex, rp);
1119 			continue;
1120 		}
1121 
1122 		/* Case 2a. */
1123 		if ((start <= rp->er_start) && (end >= rp->er_start))
1124 			rp->er_start = (end + 1);
1125 
1126 		/* Case 3a. */
1127 		if ((start <= rp->er_end) && (end >= rp->er_end))
1128 			rp->er_end = (start - 1);
1129 	}
1130 
1131 	if (flags & EX_CONFLICTOK)
1132 		goto done;
1133 
1134 	/* Region not found, or request otherwise invalid. */
1135 #if defined(DIAGNOSTIC) || defined(DDB)
1136 	extent_print(ex);
1137 #endif
1138 	printf("%s: start 0x%lx, end 0x%lx\n", __func__, start, end);
1139 	panic("%s: region not found", __func__);
1140 
1141  done:
1142 	if (nrp != NULL)
1143 		extent_free_region_descriptor(ex, nrp);
1144 	if (ex->ex_flags & EXF_WANTED) {
1145 		ex->ex_flags &= ~EXF_WANTED;
1146 		wakeup(ex);
1147 	}
1148 	return (error);
1149 }
1150 
1151 static struct extent_region *
extent_alloc_region_descriptor(struct extent * ex,int flags)1152 extent_alloc_region_descriptor(struct extent *ex, int flags)
1153 {
1154 	struct extent_region *rp;
1155 
1156 	if (ex->ex_flags & EXF_FIXED) {
1157 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1158 
1159 		while (LIST_EMPTY(&fex->fex_freelist)) {
1160 			if (flags & EX_MALLOCOK)
1161 				goto alloc;
1162 
1163 			if ((flags & EX_WAITOK) == 0)
1164 				return (NULL);
1165 			ex->ex_flags |= EXF_FLWANTED;
1166 			if (tsleep_nsec(&fex->fex_freelist,
1167 			    PRIBIO | ((flags & EX_CATCH) ? PCATCH : 0),
1168 			    "extnt", INFSLP))
1169 				return (NULL);
1170 		}
1171 		rp = LIST_FIRST(&fex->fex_freelist);
1172 		LIST_REMOVE(rp, er_link);
1173 
1174 		/*
1175 		 * Don't muck with flags after pulling it off the
1176 		 * freelist; it may be a dynamically allocated
1177 		 * region pointer that was kindly given to us,
1178 		 * and we need to preserve that information.
1179 		 */
1180 
1181 		return (rp);
1182 	}
1183 
1184  alloc:
1185 	rp = pool_get(&ex_region_pl, (flags & EX_WAITOK) ? PR_WAITOK :
1186 	    PR_NOWAIT);
1187 	if (rp != NULL)
1188 		rp->er_flags = ER_ALLOC;
1189 
1190 	return (rp);
1191 }
1192 
1193 static void
extent_free_region_descriptor(struct extent * ex,struct extent_region * rp)1194 extent_free_region_descriptor(struct extent *ex, struct extent_region *rp)
1195 {
1196 	if (rp->er_flags & ER_DISCARD)
1197 		return;
1198 
1199 	if (ex->ex_flags & EXF_FIXED) {
1200 		struct extent_fixed *fex = (struct extent_fixed *)ex;
1201 
1202 		/*
1203 		 * If someone's waiting for a region descriptor,
1204 		 * be nice and give them this one, rather than
1205 		 * just free'ing it back to the system.
1206 		 */
1207 		if (rp->er_flags & ER_ALLOC) {
1208 			if (ex->ex_flags & EXF_FLWANTED) {
1209 				/* Clear all but ER_ALLOC flag. */
1210 				rp->er_flags = ER_ALLOC;
1211 				LIST_INSERT_HEAD(&fex->fex_freelist, rp,
1212 				    er_link);
1213 				goto wake_em_up;
1214 			} else {
1215 				pool_put(&ex_region_pl, rp);
1216 			}
1217 		} else {
1218 			/* Clear all flags. */
1219 			rp->er_flags = 0;
1220 			LIST_INSERT_HEAD(&fex->fex_freelist, rp, er_link);
1221 		}
1222 
1223 		if (ex->ex_flags & EXF_FLWANTED) {
1224  wake_em_up:
1225 			ex->ex_flags &= ~EXF_FLWANTED;
1226 			wakeup(&fex->fex_freelist);
1227 		}
1228 		return;
1229 	}
1230 
1231 	/*
1232 	 * We know it's dynamically allocated if we get here.
1233 	 */
1234 	pool_put(&ex_region_pl, rp);
1235 }
1236 
1237 
1238 #if defined(DIAGNOSTIC) || defined(DDB) || !defined(_KERNEL)
1239 
1240 void
extent_print(struct extent * ex)1241 extent_print(struct extent *ex)
1242 {
1243 	extent_print1(ex, printf);
1244 }
1245 
1246 void
extent_print1(struct extent * ex,int (* pr)(const char *,...))1247 extent_print1(struct extent *ex,
1248     int (*pr)(const char *, ...) __attribute__((__format__(__kprintf__,1,2))))
1249 {
1250 	struct extent_region *rp;
1251 
1252 	if (ex == NULL)
1253 		panic("%s: NULL extent", __func__);
1254 
1255 #ifdef _KERNEL
1256 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags=%b\n", ex->ex_name,
1257 	    ex->ex_start, ex->ex_end, ex->ex_flags, EXF_BITS);
1258 #else
1259 	(*pr)("extent `%s' (0x%lx - 0x%lx), flags = 0x%x\n", ex->ex_name,
1260 	    ex->ex_start, ex->ex_end, ex->ex_flags);
1261 #endif
1262 
1263 	LIST_FOREACH(rp, &ex->ex_regions, er_link)
1264 		(*pr)("     0x%lx - 0x%lx\n", rp->er_start, rp->er_end);
1265 }
1266 #endif
1267