xref: /original-bsd/sys/ufs/lfs/lfs_alloc.c (revision f3cd0a77)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)lfs_alloc.c	7.10 (Berkeley) 06/07/89
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "buf.h"
24 #include "user.h"
25 #include "vnode.h"
26 #include "kernel.h"
27 #include "syslog.h"
28 #include "cmap.h"
29 #include "../ufs/quota.h"
30 #include "../ufs/inode.h"
31 #include "../ufs/fs.h"
32 
33 extern u_long		hashalloc();
34 extern ino_t		ialloccg();
35 extern daddr_t		alloccg();
36 extern daddr_t		alloccgblk();
37 extern daddr_t		fragextend();
38 extern daddr_t		blkpref();
39 extern daddr_t		mapsearch();
40 extern int		inside[], around[];
41 extern unsigned char	*fragtbl[];
42 
43 /*
44  * Allocate a block in the file system.
45  *
46  * The size of the requested block is given, which must be some
47  * multiple of fs_fsize and <= fs_bsize.
48  * A preference may be optionally specified. If a preference is given
49  * the following hierarchy is used to allocate a block:
50  *   1) allocate the requested block.
51  *   2) allocate a rotationally optimal block in the same cylinder.
52  *   3) allocate a block in the same cylinder group.
53  *   4) quadradically rehash into other cylinder groups, until an
54  *      available block is located.
55  * If no block preference is given the following heirarchy is used
56  * to allocate a block:
57  *   1) allocate a block in the cylinder group that contains the
58  *      inode for the file.
59  *   2) quadradically rehash into other cylinder groups, until an
60  *      available block is located.
61  */
62 alloc(ip, bpref, size, bpp, flags)
63 	register struct inode *ip;
64 	daddr_t bpref;
65 	int size;
66 	struct buf **bpp;
67 	int flags;
68 {
69 	daddr_t bno;
70 	register struct fs *fs;
71 	register struct buf *bp;
72 	int cg, error;
73 
74 	*bpp = 0;
75 	fs = ip->i_fs;
76 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
77 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
78 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
79 		panic("alloc: bad size");
80 	}
81 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
82 		goto nospace;
83 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
84 		goto nospace;
85 #ifdef QUOTA
86 	if (error = chkdq(ip, (long)btodb(size), 0))
87 		return (error);
88 #endif
89 	if (bpref >= fs->fs_size)
90 		bpref = 0;
91 	if (bpref == 0)
92 		cg = itog(fs, ip->i_number);
93 	else
94 		cg = dtog(fs, bpref);
95 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
96 		(u_long (*)())alloccg);
97 	if (bno <= 0)
98 		goto nospace;
99 	ip->i_blocks += btodb(size);
100 	ip->i_flag |= IUPD|ICHG;
101 	bp = getblk(ip->i_devvp, fsbtodb(fs, bno), size);
102 	if (flags & B_CLRBUF)
103 		clrbuf(bp);
104 	*bpp = bp;
105 	return (0);
106 nospace:
107 	fserr(fs, "file system full");
108 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
109 	return (ENOSPC);
110 }
111 
112 /*
113  * Reallocate a fragment to a bigger size
114  *
115  * The number and size of the old block is given, and a preference
116  * and new size is also specified. The allocator attempts to extend
117  * the original block. Failing that, the regular block allocator is
118  * invoked to get an appropriate block.
119  */
120 realloccg(ip, bprev, bpref, osize, nsize, bpp)
121 	register struct inode *ip;
122 	daddr_t bprev, bpref;
123 	int osize, nsize;
124 	struct buf **bpp;
125 {
126 	register struct fs *fs;
127 	struct buf *bp, *obp;
128 	int cg, request;
129 	daddr_t bno, bn;
130 	int i, error, count;
131 
132 	*bpp = 0;
133 	fs = ip->i_fs;
134 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
135 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
136 		printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
137 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
138 		panic("realloccg: bad size");
139 	}
140 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
141 		goto nospace;
142 	if (bprev == 0) {
143 		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
144 		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
145 		panic("realloccg: bad bprev");
146 	}
147 #ifdef QUOTA
148 	if (error = chkdq(ip, (long)btodb(nsize - osize), 0))
149 		return (error);
150 #endif
151 	cg = dtog(fs, bprev);
152 	bno = fragextend(ip, cg, (long)bprev, osize, nsize);
153 	if (bno != 0) {
154 		do {
155 			error = bread(ip->i_devvp, fsbtodb(fs, bno),
156 				osize, &bp);
157 			if (error) {
158 				brelse(bp);
159 				return (error);
160 			}
161 		} while (brealloc(bp, nsize) == 0);
162 		bp->b_flags |= B_DONE;
163 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
164 		ip->i_blocks += btodb(nsize - osize);
165 		ip->i_flag |= IUPD|ICHG;
166 		*bpp = bp;
167 		return (0);
168 	}
169 	if (bpref >= fs->fs_size)
170 		bpref = 0;
171 	switch ((int)fs->fs_optim) {
172 	case FS_OPTSPACE:
173 		/*
174 		 * Allocate an exact sized fragment. Although this makes
175 		 * best use of space, we will waste time relocating it if
176 		 * the file continues to grow. If the fragmentation is
177 		 * less than half of the minimum free reserve, we choose
178 		 * to begin optimizing for time.
179 		 */
180 		request = nsize;
181 		if (fs->fs_minfree < 5 ||
182 		    fs->fs_cstotal.cs_nffree >
183 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
184 			break;
185 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
186 			fs->fs_fsmnt);
187 		fs->fs_optim = FS_OPTTIME;
188 		break;
189 	case FS_OPTTIME:
190 		/*
191 		 * At this point we have discovered a file that is trying
192 		 * to grow a small fragment to a larger fragment. To save
193 		 * time, we allocate a full sized block, then free the
194 		 * unused portion. If the file continues to grow, the
195 		 * `fragextend' call above will be able to grow it in place
196 		 * without further copying. If aberrant programs cause
197 		 * disk fragmentation to grow within 2% of the free reserve,
198 		 * we choose to begin optimizing for space.
199 		 */
200 		request = fs->fs_bsize;
201 		if (fs->fs_cstotal.cs_nffree <
202 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
203 			break;
204 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
205 			fs->fs_fsmnt);
206 		fs->fs_optim = FS_OPTSPACE;
207 		break;
208 	default:
209 		printf("dev = 0x%x, optim = %d, fs = %s\n",
210 		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
211 		panic("realloccg: bad optim");
212 		/* NOTREACHED */
213 	}
214 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
215 		(u_long (*)())alloccg);
216 	if (bno > 0) {
217 		error = bread(ip->i_devvp, fsbtodb(fs, bprev), osize, &obp);
218 		if (error) {
219 			brelse(obp);
220 			return (error);
221 		}
222 		bn = fsbtodb(fs, bno);
223 		bp = getblk(ip->i_devvp, bn, nsize);
224 		bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize);
225 		count = howmany(osize, CLBYTES);
226 		for (i = 0; i < count; i++)
227 			munhash(ip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
228 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
229 		if (obp->b_flags & B_DELWRI) {
230 			obp->b_flags &= ~B_DELWRI;
231 			u.u_ru.ru_oublock--;		/* delete charge */
232 		}
233 		brelse(obp);
234 		blkfree(ip, bprev, (off_t)osize);
235 		if (nsize < request)
236 			blkfree(ip, bno + numfrags(fs, nsize),
237 				(off_t)(request - nsize));
238 		ip->i_blocks += btodb(nsize - osize);
239 		ip->i_flag |= IUPD|ICHG;
240 		*bpp = bp;
241 		return (0);
242 	}
243 nospace:
244 	/*
245 	 * no space available
246 	 */
247 	fserr(fs, "file system full");
248 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
249 	return (ENOSPC);
250 }
251 
252 /*
253  * Allocate an inode in the file system.
254  *
255  * A preference may be optionally specified. If a preference is given
256  * the following hierarchy is used to allocate an inode:
257  *   1) allocate the requested inode.
258  *   2) allocate an inode in the same cylinder group.
259  *   3) quadradically rehash into other cylinder groups, until an
260  *      available inode is located.
261  * If no inode preference is given the following heirarchy is used
262  * to allocate an inode:
263  *   1) allocate an inode in cylinder group 0.
264  *   2) quadradically rehash into other cylinder groups, until an
265  *      available inode is located.
266  */
267 ialloc(pip, ipref, mode, ipp)
268 	register struct inode *pip;
269 	ino_t ipref;
270 	int mode;
271 	struct inode **ipp;
272 {
273 	ino_t ino;
274 	register struct fs *fs;
275 	register struct inode *ip;
276 	int cg, error;
277 
278 	*ipp = 0;
279 	fs = pip->i_fs;
280 	if (fs->fs_cstotal.cs_nifree == 0)
281 		goto noinodes;
282 #ifdef QUOTA
283 	if (error = chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0))
284 		return (error);
285 #endif
286 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
287 		ipref = 0;
288 	cg = itog(fs, ipref);
289 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
290 	if (ino == 0)
291 		goto noinodes;
292 	error = iget(pip, ino, ipp);
293 	if (error) {
294 		ifree(pip, ino, 0);
295 		return (error);
296 	}
297 	ip = *ipp;
298 	if (ip->i_mode) {
299 		printf("mode = 0%o, inum = %d, fs = %s\n",
300 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
301 		panic("ialloc: dup alloc");
302 	}
303 	if (ip->i_blocks) {				/* XXX */
304 		printf("free inode %s/%d had %d blocks\n",
305 		    fs->fs_fsmnt, ino, ip->i_blocks);
306 		ip->i_blocks = 0;
307 	}
308 	/*
309 	 * Set up a new generation number for this inode.
310 	 */
311 	if (++nextgennumber < (u_long)time.tv_sec)
312 		nextgennumber = time.tv_sec;
313 	ip->i_gen = nextgennumber;
314 	return (0);
315 noinodes:
316 	fserr(fs, "out of inodes");
317 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
318 	return (ENOSPC);
319 }
320 
321 /*
322  * Find a cylinder to place a directory.
323  *
324  * The policy implemented by this algorithm is to select from
325  * among those cylinder groups with above the average number of
326  * free inodes, the one with the smallest number of directories.
327  */
328 ino_t
329 dirpref(fs)
330 	register struct fs *fs;
331 {
332 	int cg, minndir, mincg, avgifree;
333 
334 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
335 	minndir = fs->fs_ipg;
336 	mincg = 0;
337 	for (cg = 0; cg < fs->fs_ncg; cg++)
338 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
339 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
340 			mincg = cg;
341 			minndir = fs->fs_cs(fs, cg).cs_ndir;
342 		}
343 	return ((ino_t)(fs->fs_ipg * mincg));
344 }
345 
346 /*
347  * Select the desired position for the next block in a file.  The file is
348  * logically divided into sections. The first section is composed of the
349  * direct blocks. Each additional section contains fs_maxbpg blocks.
350  *
351  * If no blocks have been allocated in the first section, the policy is to
352  * request a block in the same cylinder group as the inode that describes
353  * the file. If no blocks have been allocated in any other section, the
354  * policy is to place the section in a cylinder group with a greater than
355  * average number of free blocks.  An appropriate cylinder group is found
356  * by using a rotor that sweeps the cylinder groups. When a new group of
357  * blocks is needed, the sweep begins in the cylinder group following the
358  * cylinder group from which the previous allocation was made. The sweep
359  * continues until a cylinder group with greater than the average number
360  * of free blocks is found. If the allocation is for the first block in an
361  * indirect block, the information on the previous allocation is unavailable;
362  * here a best guess is made based upon the logical block number being
363  * allocated.
364  *
365  * If a section is already partially allocated, the policy is to
366  * contiguously allocate fs_maxcontig blocks.  The end of one of these
367  * contiguous blocks and the beginning of the next is physically separated
368  * so that the disk head will be in transit between them for at least
369  * fs_rotdelay milliseconds.  This is to allow time for the processor to
370  * schedule another I/O transfer.
371  */
372 daddr_t
373 blkpref(ip, lbn, indx, bap)
374 	struct inode *ip;
375 	daddr_t lbn;
376 	int indx;
377 	daddr_t *bap;
378 {
379 	register struct fs *fs;
380 	register int cg;
381 	int avgbfree, startcg;
382 	daddr_t nextblk;
383 
384 	fs = ip->i_fs;
385 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
386 		if (lbn < NDADDR) {
387 			cg = itog(fs, ip->i_number);
388 			return (fs->fs_fpg * cg + fs->fs_frag);
389 		}
390 		/*
391 		 * Find a cylinder with greater than average number of
392 		 * unused data blocks.
393 		 */
394 		if (indx == 0 || bap[indx - 1] == 0)
395 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
396 		else
397 			startcg = dtog(fs, bap[indx - 1]) + 1;
398 		startcg %= fs->fs_ncg;
399 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
400 		for (cg = startcg; cg < fs->fs_ncg; cg++)
401 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
402 				fs->fs_cgrotor = cg;
403 				return (fs->fs_fpg * cg + fs->fs_frag);
404 			}
405 		for (cg = 0; cg <= startcg; cg++)
406 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
407 				fs->fs_cgrotor = cg;
408 				return (fs->fs_fpg * cg + fs->fs_frag);
409 			}
410 		return (NULL);
411 	}
412 	/*
413 	 * One or more previous blocks have been laid out. If less
414 	 * than fs_maxcontig previous blocks are contiguous, the
415 	 * next block is requested contiguously, otherwise it is
416 	 * requested rotationally delayed by fs_rotdelay milliseconds.
417 	 */
418 	nextblk = bap[indx - 1] + fs->fs_frag;
419 	if (indx > fs->fs_maxcontig &&
420 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
421 	    != nextblk)
422 		return (nextblk);
423 	if (fs->fs_rotdelay != 0)
424 		/*
425 		 * Here we convert ms of delay to frags as:
426 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
427 		 *	((sect/frag) * (ms/sec))
428 		 * then round up to the next block.
429 		 */
430 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
431 		    (NSPF(fs) * 1000), fs->fs_frag);
432 	return (nextblk);
433 }
434 
435 /*
436  * Implement the cylinder overflow algorithm.
437  *
438  * The policy implemented by this algorithm is:
439  *   1) allocate the block in its requested cylinder group.
440  *   2) quadradically rehash on the cylinder group number.
441  *   3) brute force search for a free block.
442  */
443 /*VARARGS5*/
444 u_long
445 hashalloc(ip, cg, pref, size, allocator)
446 	struct inode *ip;
447 	int cg;
448 	long pref;
449 	int size;	/* size for data blocks, mode for inodes */
450 	u_long (*allocator)();
451 {
452 	register struct fs *fs;
453 	long result;
454 	int i, icg = cg;
455 
456 	fs = ip->i_fs;
457 	/*
458 	 * 1: preferred cylinder group
459 	 */
460 	result = (*allocator)(ip, cg, pref, size);
461 	if (result)
462 		return (result);
463 	/*
464 	 * 2: quadratic rehash
465 	 */
466 	for (i = 1; i < fs->fs_ncg; i *= 2) {
467 		cg += i;
468 		if (cg >= fs->fs_ncg)
469 			cg -= fs->fs_ncg;
470 		result = (*allocator)(ip, cg, 0, size);
471 		if (result)
472 			return (result);
473 	}
474 	/*
475 	 * 3: brute force search
476 	 * Note that we start at i == 2, since 0 was checked initially,
477 	 * and 1 is always checked in the quadratic rehash.
478 	 */
479 	cg = (icg + 2) % fs->fs_ncg;
480 	for (i = 2; i < fs->fs_ncg; i++) {
481 		result = (*allocator)(ip, cg, 0, size);
482 		if (result)
483 			return (result);
484 		cg++;
485 		if (cg == fs->fs_ncg)
486 			cg = 0;
487 	}
488 	return (NULL);
489 }
490 
491 /*
492  * Determine whether a fragment can be extended.
493  *
494  * Check to see if the necessary fragments are available, and
495  * if they are, allocate them.
496  */
497 daddr_t
498 fragextend(ip, cg, bprev, osize, nsize)
499 	struct inode *ip;
500 	int cg;
501 	long bprev;
502 	int osize, nsize;
503 {
504 	register struct fs *fs;
505 	register struct cg *cgp;
506 	struct buf *bp;
507 	long bno;
508 	int frags, bbase;
509 	int i, error;
510 
511 	fs = ip->i_fs;
512 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
513 		return (NULL);
514 	frags = numfrags(fs, nsize);
515 	bbase = fragnum(fs, bprev);
516 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
517 		/* cannot extend across a block boundary */
518 		return (NULL);
519 	}
520 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
521 		(int)fs->fs_cgsize, &bp);
522 	if (error) {
523 		brelse(bp);
524 		return (NULL);
525 	}
526 	cgp = bp->b_un.b_cg;
527 	if (!cg_chkmagic(cgp)) {
528 		brelse(bp);
529 		return (NULL);
530 	}
531 	cgp->cg_time = time.tv_sec;
532 	bno = dtogd(fs, bprev);
533 	for (i = numfrags(fs, osize); i < frags; i++)
534 		if (isclr(cg_blksfree(cgp), bno + i)) {
535 			brelse(bp);
536 			return (NULL);
537 		}
538 	/*
539 	 * the current fragment can be extended
540 	 * deduct the count on fragment being extended into
541 	 * increase the count on the remaining fragment (if any)
542 	 * allocate the extended piece
543 	 */
544 	for (i = frags; i < fs->fs_frag - bbase; i++)
545 		if (isclr(cg_blksfree(cgp), bno + i))
546 			break;
547 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
548 	if (i != frags)
549 		cgp->cg_frsum[i - frags]++;
550 	for (i = numfrags(fs, osize); i < frags; i++) {
551 		clrbit(cg_blksfree(cgp), bno + i);
552 		cgp->cg_cs.cs_nffree--;
553 		fs->fs_cstotal.cs_nffree--;
554 		fs->fs_cs(fs, cg).cs_nffree--;
555 	}
556 	fs->fs_fmod++;
557 	bdwrite(bp);
558 	return (bprev);
559 }
560 
561 /*
562  * Determine whether a block can be allocated.
563  *
564  * Check to see if a block of the apprpriate size is available,
565  * and if it is, allocate it.
566  */
567 daddr_t
568 alloccg(ip, cg, bpref, size)
569 	struct inode *ip;
570 	int cg;
571 	daddr_t bpref;
572 	int size;
573 {
574 	register struct fs *fs;
575 	register struct cg *cgp;
576 	struct buf *bp;
577 	register int i;
578 	int error, bno, frags, allocsiz;
579 
580 	fs = ip->i_fs;
581 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
582 		return (NULL);
583 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
584 		(int)fs->fs_cgsize, &bp);
585 	if (error) {
586 		brelse(bp);
587 		return (NULL);
588 	}
589 	cgp = bp->b_un.b_cg;
590 	if (!cg_chkmagic(cgp) ||
591 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
592 		brelse(bp);
593 		return (NULL);
594 	}
595 	cgp->cg_time = time.tv_sec;
596 	if (size == fs->fs_bsize) {
597 		bno = alloccgblk(fs, cgp, bpref);
598 		bdwrite(bp);
599 		return (bno);
600 	}
601 	/*
602 	 * check to see if any fragments are already available
603 	 * allocsiz is the size which will be allocated, hacking
604 	 * it down to a smaller size if necessary
605 	 */
606 	frags = numfrags(fs, size);
607 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
608 		if (cgp->cg_frsum[allocsiz] != 0)
609 			break;
610 	if (allocsiz == fs->fs_frag) {
611 		/*
612 		 * no fragments were available, so a block will be
613 		 * allocated, and hacked up
614 		 */
615 		if (cgp->cg_cs.cs_nbfree == 0) {
616 			brelse(bp);
617 			return (NULL);
618 		}
619 		bno = alloccgblk(fs, cgp, bpref);
620 		bpref = dtogd(fs, bno);
621 		for (i = frags; i < fs->fs_frag; i++)
622 			setbit(cg_blksfree(cgp), bpref + i);
623 		i = fs->fs_frag - frags;
624 		cgp->cg_cs.cs_nffree += i;
625 		fs->fs_cstotal.cs_nffree += i;
626 		fs->fs_cs(fs, cg).cs_nffree += i;
627 		fs->fs_fmod++;
628 		cgp->cg_frsum[i]++;
629 		bdwrite(bp);
630 		return (bno);
631 	}
632 	bno = mapsearch(fs, cgp, bpref, allocsiz);
633 	if (bno < 0) {
634 		brelse(bp);
635 		return (NULL);
636 	}
637 	for (i = 0; i < frags; i++)
638 		clrbit(cg_blksfree(cgp), bno + i);
639 	cgp->cg_cs.cs_nffree -= frags;
640 	fs->fs_cstotal.cs_nffree -= frags;
641 	fs->fs_cs(fs, cg).cs_nffree -= frags;
642 	fs->fs_fmod++;
643 	cgp->cg_frsum[allocsiz]--;
644 	if (frags != allocsiz)
645 		cgp->cg_frsum[allocsiz - frags]++;
646 	bdwrite(bp);
647 	return (cg * fs->fs_fpg + bno);
648 }
649 
650 /*
651  * Allocate a block in a cylinder group.
652  *
653  * This algorithm implements the following policy:
654  *   1) allocate the requested block.
655  *   2) allocate a rotationally optimal block in the same cylinder.
656  *   3) allocate the next available block on the block rotor for the
657  *      specified cylinder group.
658  * Note that this routine only allocates fs_bsize blocks; these
659  * blocks may be fragmented by the routine that allocates them.
660  */
661 daddr_t
662 alloccgblk(fs, cgp, bpref)
663 	register struct fs *fs;
664 	register struct cg *cgp;
665 	daddr_t bpref;
666 {
667 	daddr_t bno;
668 	int cylno, pos, delta;
669 	short *cylbp;
670 	register int i;
671 
672 	if (bpref == 0) {
673 		bpref = cgp->cg_rotor;
674 		goto norot;
675 	}
676 	bpref = blknum(fs, bpref);
677 	bpref = dtogd(fs, bpref);
678 	/*
679 	 * if the requested block is available, use it
680 	 */
681 	if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
682 		bno = bpref;
683 		goto gotit;
684 	}
685 	/*
686 	 * check for a block available on the same cylinder
687 	 */
688 	cylno = cbtocylno(fs, bpref);
689 	if (cg_blktot(cgp)[cylno] == 0)
690 		goto norot;
691 	if (fs->fs_cpc == 0) {
692 		/*
693 		 * block layout info is not available, so just have
694 		 * to take any block in this cylinder.
695 		 */
696 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
697 		goto norot;
698 	}
699 	/*
700 	 * check the summary information to see if a block is
701 	 * available in the requested cylinder starting at the
702 	 * requested rotational position and proceeding around.
703 	 */
704 	cylbp = cg_blks(fs, cgp, cylno);
705 	pos = cbtorpos(fs, bpref);
706 	for (i = pos; i < fs->fs_nrpos; i++)
707 		if (cylbp[i] > 0)
708 			break;
709 	if (i == fs->fs_nrpos)
710 		for (i = 0; i < pos; i++)
711 			if (cylbp[i] > 0)
712 				break;
713 	if (cylbp[i] > 0) {
714 		/*
715 		 * found a rotational position, now find the actual
716 		 * block. A panic if none is actually there.
717 		 */
718 		pos = cylno % fs->fs_cpc;
719 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
720 		if (fs_postbl(fs, pos)[i] == -1) {
721 			printf("pos = %d, i = %d, fs = %s\n",
722 			    pos, i, fs->fs_fsmnt);
723 			panic("alloccgblk: cyl groups corrupted");
724 		}
725 		for (i = fs_postbl(fs, pos)[i];; ) {
726 			if (isblock(fs, cg_blksfree(cgp), bno + i)) {
727 				bno = blkstofrags(fs, (bno + i));
728 				goto gotit;
729 			}
730 			delta = fs_rotbl(fs)[i];
731 			if (delta <= 0 ||
732 			    delta + i > fragstoblks(fs, fs->fs_fpg))
733 				break;
734 			i += delta;
735 		}
736 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
737 		panic("alloccgblk: can't find blk in cyl");
738 	}
739 norot:
740 	/*
741 	 * no blocks in the requested cylinder, so take next
742 	 * available one in this cylinder group.
743 	 */
744 	bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
745 	if (bno < 0)
746 		return (NULL);
747 	cgp->cg_rotor = bno;
748 gotit:
749 	clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
750 	cgp->cg_cs.cs_nbfree--;
751 	fs->fs_cstotal.cs_nbfree--;
752 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
753 	cylno = cbtocylno(fs, bno);
754 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
755 	cg_blktot(cgp)[cylno]--;
756 	fs->fs_fmod++;
757 	return (cgp->cg_cgx * fs->fs_fpg + bno);
758 }
759 
760 /*
761  * Determine whether an inode can be allocated.
762  *
763  * Check to see if an inode is available, and if it is,
764  * allocate it using the following policy:
765  *   1) allocate the requested inode.
766  *   2) allocate the next available inode after the requested
767  *      inode in the specified cylinder group.
768  */
769 ino_t
770 ialloccg(ip, cg, ipref, mode)
771 	struct inode *ip;
772 	int cg;
773 	daddr_t ipref;
774 	int mode;
775 {
776 	register struct fs *fs;
777 	register struct cg *cgp;
778 	struct buf *bp;
779 	int error, start, len, loc, map, i;
780 
781 	fs = ip->i_fs;
782 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
783 		return (NULL);
784 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
785 		(int)fs->fs_cgsize, &bp);
786 	if (error) {
787 		brelse(bp);
788 		return (NULL);
789 	}
790 	cgp = bp->b_un.b_cg;
791 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
792 		brelse(bp);
793 		return (NULL);
794 	}
795 	cgp->cg_time = time.tv_sec;
796 	if (ipref) {
797 		ipref %= fs->fs_ipg;
798 		if (isclr(cg_inosused(cgp), ipref))
799 			goto gotit;
800 	}
801 	start = cgp->cg_irotor / NBBY;
802 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
803 	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
804 	if (loc == 0) {
805 		len = start + 1;
806 		start = 0;
807 		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
808 		if (loc == 0) {
809 			printf("cg = %s, irotor = %d, fs = %s\n",
810 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
811 			panic("ialloccg: map corrupted");
812 			/* NOTREACHED */
813 		}
814 	}
815 	i = start + len - loc;
816 	map = cg_inosused(cgp)[i];
817 	ipref = i * NBBY;
818 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
819 		if ((map & i) == 0) {
820 			cgp->cg_irotor = ipref;
821 			goto gotit;
822 		}
823 	}
824 	printf("fs = %s\n", fs->fs_fsmnt);
825 	panic("ialloccg: block not in map");
826 	/* NOTREACHED */
827 gotit:
828 	setbit(cg_inosused(cgp), ipref);
829 	cgp->cg_cs.cs_nifree--;
830 	fs->fs_cstotal.cs_nifree--;
831 	fs->fs_cs(fs, cg).cs_nifree--;
832 	fs->fs_fmod++;
833 	if ((mode & IFMT) == IFDIR) {
834 		cgp->cg_cs.cs_ndir++;
835 		fs->fs_cstotal.cs_ndir++;
836 		fs->fs_cs(fs, cg).cs_ndir++;
837 	}
838 	bdwrite(bp);
839 	return (cg * fs->fs_ipg + ipref);
840 }
841 
842 /*
843  * Free a block or fragment.
844  *
845  * The specified block or fragment is placed back in the
846  * free map. If a fragment is deallocated, a possible
847  * block reassembly is checked.
848  */
849 blkfree(ip, bno, size)
850 	register struct inode *ip;
851 	daddr_t bno;
852 	off_t size;
853 {
854 	register struct fs *fs;
855 	register struct cg *cgp;
856 	struct buf *bp;
857 	int error, cg, blk, frags, bbase;
858 	register int i;
859 
860 	fs = ip->i_fs;
861 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
862 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
863 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
864 		panic("blkfree: bad size");
865 	}
866 	cg = dtog(fs, bno);
867 	if (badblock(fs, bno)) {
868 		printf("bad block %d, ino %d\n", bno, ip->i_number);
869 		return;
870 	}
871 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
872 		(int)fs->fs_cgsize, &bp);
873 	if (error) {
874 		brelse(bp);
875 		return;
876 	}
877 	cgp = bp->b_un.b_cg;
878 	if (!cg_chkmagic(cgp)) {
879 		brelse(bp);
880 		return;
881 	}
882 	cgp->cg_time = time.tv_sec;
883 	bno = dtogd(fs, bno);
884 	if (size == fs->fs_bsize) {
885 		if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
886 			printf("dev = 0x%x, block = %d, fs = %s\n",
887 			    ip->i_dev, bno, fs->fs_fsmnt);
888 			panic("blkfree: freeing free block");
889 		}
890 		setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
891 		cgp->cg_cs.cs_nbfree++;
892 		fs->fs_cstotal.cs_nbfree++;
893 		fs->fs_cs(fs, cg).cs_nbfree++;
894 		i = cbtocylno(fs, bno);
895 		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
896 		cg_blktot(cgp)[i]++;
897 	} else {
898 		bbase = bno - fragnum(fs, bno);
899 		/*
900 		 * decrement the counts associated with the old frags
901 		 */
902 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
903 		fragacct(fs, blk, cgp->cg_frsum, -1);
904 		/*
905 		 * deallocate the fragment
906 		 */
907 		frags = numfrags(fs, size);
908 		for (i = 0; i < frags; i++) {
909 			if (isset(cg_blksfree(cgp), bno + i)) {
910 				printf("dev = 0x%x, block = %d, fs = %s\n",
911 				    ip->i_dev, bno + i, fs->fs_fsmnt);
912 				panic("blkfree: freeing free frag");
913 			}
914 			setbit(cg_blksfree(cgp), bno + i);
915 		}
916 		cgp->cg_cs.cs_nffree += i;
917 		fs->fs_cstotal.cs_nffree += i;
918 		fs->fs_cs(fs, cg).cs_nffree += i;
919 		/*
920 		 * add back in counts associated with the new frags
921 		 */
922 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
923 		fragacct(fs, blk, cgp->cg_frsum, 1);
924 		/*
925 		 * if a complete block has been reassembled, account for it
926 		 */
927 		if (isblock(fs, cg_blksfree(cgp),
928 		    (daddr_t)fragstoblks(fs, bbase))) {
929 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
930 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
931 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
932 			cgp->cg_cs.cs_nbfree++;
933 			fs->fs_cstotal.cs_nbfree++;
934 			fs->fs_cs(fs, cg).cs_nbfree++;
935 			i = cbtocylno(fs, bbase);
936 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
937 			cg_blktot(cgp)[i]++;
938 		}
939 	}
940 	fs->fs_fmod++;
941 	bdwrite(bp);
942 }
943 
944 /*
945  * Free an inode.
946  *
947  * The specified inode is placed back in the free map.
948  */
949 ifree(ip, ino, mode)
950 	struct inode *ip;
951 	ino_t ino;
952 	int mode;
953 {
954 	register struct fs *fs;
955 	register struct cg *cgp;
956 	struct buf *bp;
957 	int error, cg;
958 
959 	fs = ip->i_fs;
960 	if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
961 		printf("dev = 0x%x, ino = %d, fs = %s\n",
962 		    ip->i_dev, ino, fs->fs_fsmnt);
963 		panic("ifree: range");
964 	}
965 	cg = itog(fs, ino);
966 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
967 		(int)fs->fs_cgsize, &bp);
968 	if (error) {
969 		brelse(bp);
970 		return;
971 	}
972 	cgp = bp->b_un.b_cg;
973 	if (!cg_chkmagic(cgp)) {
974 		brelse(bp);
975 		return;
976 	}
977 	cgp->cg_time = time.tv_sec;
978 	ino %= fs->fs_ipg;
979 	if (isclr(cg_inosused(cgp), ino)) {
980 		printf("dev = 0x%x, ino = %d, fs = %s\n",
981 		    ip->i_dev, ino, fs->fs_fsmnt);
982 		panic("ifree: freeing free inode");
983 	}
984 	clrbit(cg_inosused(cgp), ino);
985 	if (ino < cgp->cg_irotor)
986 		cgp->cg_irotor = ino;
987 	cgp->cg_cs.cs_nifree++;
988 	fs->fs_cstotal.cs_nifree++;
989 	fs->fs_cs(fs, cg).cs_nifree++;
990 	if ((mode & IFMT) == IFDIR) {
991 		cgp->cg_cs.cs_ndir--;
992 		fs->fs_cstotal.cs_ndir--;
993 		fs->fs_cs(fs, cg).cs_ndir--;
994 	}
995 	fs->fs_fmod++;
996 	bdwrite(bp);
997 }
998 
999 /*
1000  * Find a block of the specified size in the specified cylinder group.
1001  *
1002  * It is a panic if a request is made to find a block if none are
1003  * available.
1004  */
1005 daddr_t
1006 mapsearch(fs, cgp, bpref, allocsiz)
1007 	register struct fs *fs;
1008 	register struct cg *cgp;
1009 	daddr_t bpref;
1010 	int allocsiz;
1011 {
1012 	daddr_t bno;
1013 	int start, len, loc, i;
1014 	int blk, field, subfield, pos;
1015 
1016 	/*
1017 	 * find the fragment by searching through the free block
1018 	 * map for an appropriate bit pattern
1019 	 */
1020 	if (bpref)
1021 		start = dtogd(fs, bpref) / NBBY;
1022 	else
1023 		start = cgp->cg_frotor / NBBY;
1024 	len = howmany(fs->fs_fpg, NBBY) - start;
1025 	loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
1026 		(u_char *)fragtbl[fs->fs_frag],
1027 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1028 	if (loc == 0) {
1029 		len = start + 1;
1030 		start = 0;
1031 		loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
1032 			(u_char *)fragtbl[fs->fs_frag],
1033 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1034 		if (loc == 0) {
1035 			printf("start = %d, len = %d, fs = %s\n",
1036 			    start, len, fs->fs_fsmnt);
1037 			panic("alloccg: map corrupted");
1038 			/* NOTREACHED */
1039 		}
1040 	}
1041 	bno = (start + len - loc) * NBBY;
1042 	cgp->cg_frotor = bno;
1043 	/*
1044 	 * found the byte in the map
1045 	 * sift through the bits to find the selected frag
1046 	 */
1047 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1048 		blk = blkmap(fs, cg_blksfree(cgp), bno);
1049 		blk <<= 1;
1050 		field = around[allocsiz];
1051 		subfield = inside[allocsiz];
1052 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1053 			if ((blk & field) == subfield)
1054 				return (bno + pos);
1055 			field <<= 1;
1056 			subfield <<= 1;
1057 		}
1058 	}
1059 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1060 	panic("alloccg: block not in map");
1061 	return (-1);
1062 }
1063 
1064 /*
1065  * Fserr prints the name of a file system with an error diagnostic.
1066  *
1067  * The form of the error message is:
1068  *	fs: error message
1069  */
1070 fserr(fs, cp)
1071 	struct fs *fs;
1072 	char *cp;
1073 {
1074 
1075 	log(LOG_ERR, "%s: %s\n", fs->fs_fsmnt, cp);
1076 }
1077