xref: /original-bsd/sys/ufs/lfs/lfs_alloc.c (revision 99919bf7)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)lfs_alloc.c	7.8 (Berkeley) 06/29/88
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "mount.h"
23 #include "fs.h"
24 #include "buf.h"
25 #include "inode.h"
26 #include "dir.h"
27 #include "user.h"
28 #include "quota.h"
29 #include "kernel.h"
30 #include "syslog.h"
31 #include "cmap.h"
32 
33 extern u_long		hashalloc();
34 extern ino_t		ialloccg();
35 extern daddr_t		alloccg();
36 extern daddr_t		alloccgblk();
37 extern daddr_t		fragextend();
38 extern daddr_t		blkpref();
39 extern daddr_t		mapsearch();
40 extern int		inside[], around[];
41 extern unsigned char	*fragtbl[];
42 
43 /*
44  * Allocate a block in the file system.
45  *
46  * The size of the requested block is given, which must be some
47  * multiple of fs_fsize and <= fs_bsize.
48  * A preference may be optionally specified. If a preference is given
49  * the following hierarchy is used to allocate a block:
50  *   1) allocate the requested block.
51  *   2) allocate a rotationally optimal block in the same cylinder.
52  *   3) allocate a block in the same cylinder group.
53  *   4) quadradically rehash into other cylinder groups, until an
54  *      available block is located.
55  * If no block preference is given the following heirarchy is used
56  * to allocate a block:
57  *   1) allocate a block in the cylinder group that contains the
58  *      inode for the file.
59  *   2) quadradically rehash into other cylinder groups, until an
60  *      available block is located.
61  */
62 struct buf *
63 alloc(ip, bpref, size)
64 	register struct inode *ip;
65 	daddr_t bpref;
66 	int size;
67 {
68 	daddr_t bno;
69 	register struct fs *fs;
70 	register struct buf *bp;
71 	int cg;
72 
73 	fs = ip->i_fs;
74 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
75 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
76 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
77 		panic("alloc: bad size");
78 	}
79 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
80 		goto nospace;
81 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
82 		goto nospace;
83 #ifdef QUOTA
84 	u.u_error = chkdq(ip, (long)btodb(size), 0);
85 	if (u.u_error)
86 		return (NULL);
87 #endif
88 	if (bpref >= fs->fs_size)
89 		bpref = 0;
90 	if (bpref == 0)
91 		cg = itog(fs, ip->i_number);
92 	else
93 		cg = dtog(fs, bpref);
94 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
95 		(u_long (*)())alloccg);
96 	if (bno <= 0)
97 		goto nospace;
98 	ip->i_blocks += btodb(size);
99 	ip->i_flag |= IUPD|ICHG;
100 	bp = getblk(ip->i_dev, fsbtodb(fs, bno), size);
101 	clrbuf(bp);
102 	return (bp);
103 nospace:
104 	fserr(fs, "file system full");
105 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
106 	u.u_error = ENOSPC;
107 	return (NULL);
108 }
109 
110 /*
111  * Reallocate a fragment to a bigger size
112  *
113  * The number and size of the old block is given, and a preference
114  * and new size is also specified. The allocator attempts to extend
115  * the original block. Failing that, the regular block allocator is
116  * invoked to get an appropriate block.
117  */
118 struct buf *
119 realloccg(ip, bprev, bpref, osize, nsize)
120 	register struct inode *ip;
121 	daddr_t bprev, bpref;
122 	int osize, nsize;
123 {
124 	register struct fs *fs;
125 	register struct buf *bp, *obp;
126 	int cg, request;
127 	daddr_t bno, bn;
128 	int i, count;
129 
130 	fs = ip->i_fs;
131 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
132 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
133 		printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
134 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
135 		panic("realloccg: bad size");
136 	}
137 	if (u.u_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
138 		goto nospace;
139 	if (bprev == 0) {
140 		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
141 		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
142 		panic("realloccg: bad bprev");
143 	}
144 #ifdef QUOTA
145 	u.u_error = chkdq(ip, (long)btodb(nsize - osize), 0);
146 	if (u.u_error)
147 		return (NULL);
148 #endif
149 	cg = dtog(fs, bprev);
150 	bno = fragextend(ip, cg, (long)bprev, osize, nsize);
151 	if (bno != 0) {
152 		do {
153 			bp = bread(ip->i_dev, fsbtodb(fs, bno), osize);
154 			if (bp->b_flags & B_ERROR) {
155 				brelse(bp);
156 				return (NULL);
157 			}
158 		} while (brealloc(bp, nsize) == 0);
159 		bp->b_flags |= B_DONE;
160 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
161 		ip->i_blocks += btodb(nsize - osize);
162 		ip->i_flag |= IUPD|ICHG;
163 		return (bp);
164 	}
165 	if (bpref >= fs->fs_size)
166 		bpref = 0;
167 	switch ((int)fs->fs_optim) {
168 	case FS_OPTSPACE:
169 		/*
170 		 * Allocate an exact sized fragment. Although this makes
171 		 * best use of space, we will waste time relocating it if
172 		 * the file continues to grow. If the fragmentation is
173 		 * less than half of the minimum free reserve, we choose
174 		 * to begin optimizing for time.
175 		 */
176 		request = nsize;
177 		if (fs->fs_minfree < 5 ||
178 		    fs->fs_cstotal.cs_nffree >
179 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
180 			break;
181 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
182 			fs->fs_fsmnt);
183 		fs->fs_optim = FS_OPTTIME;
184 		break;
185 	case FS_OPTTIME:
186 		/*
187 		 * At this point we have discovered a file that is trying
188 		 * to grow a small fragment to a larger fragment. To save
189 		 * time, we allocate a full sized block, then free the
190 		 * unused portion. If the file continues to grow, the
191 		 * `fragextend' call above will be able to grow it in place
192 		 * without further copying. If aberrant programs cause
193 		 * disk fragmentation to grow within 2% of the free reserve,
194 		 * we choose to begin optimizing for space.
195 		 */
196 		request = fs->fs_bsize;
197 		if (fs->fs_cstotal.cs_nffree <
198 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
199 			break;
200 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
201 			fs->fs_fsmnt);
202 		fs->fs_optim = FS_OPTSPACE;
203 		break;
204 	default:
205 		printf("dev = 0x%x, optim = %d, fs = %s\n",
206 		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
207 		panic("realloccg: bad optim");
208 		/* NOTREACHED */
209 	}
210 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
211 		(u_long (*)())alloccg);
212 	if (bno > 0) {
213 		obp = bread(ip->i_dev, fsbtodb(fs, bprev), osize);
214 		if (obp->b_flags & B_ERROR) {
215 			brelse(obp);
216 			return (NULL);
217 		}
218 		bn = fsbtodb(fs, bno);
219 		bp = getblk(ip->i_dev, bn, nsize);
220 		bcopy(obp->b_un.b_addr, bp->b_un.b_addr, (u_int)osize);
221 		count = howmany(osize, CLBYTES);
222 		for (i = 0; i < count; i++)
223 			munhash(ip->i_dev, bn + i * CLBYTES / DEV_BSIZE);
224 		bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
225 		if (obp->b_flags & B_DELWRI) {
226 			obp->b_flags &= ~B_DELWRI;
227 			u.u_ru.ru_oublock--;		/* delete charge */
228 		}
229 		brelse(obp);
230 		blkfree(ip, bprev, (off_t)osize);
231 		if (nsize < request)
232 			blkfree(ip, bno + numfrags(fs, nsize),
233 				(off_t)(request - nsize));
234 		ip->i_blocks += btodb(nsize - osize);
235 		ip->i_flag |= IUPD|ICHG;
236 		return (bp);
237 	}
238 nospace:
239 	/*
240 	 * no space available
241 	 */
242 	fserr(fs, "file system full");
243 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
244 	u.u_error = ENOSPC;
245 	return (NULL);
246 }
247 
248 /*
249  * Allocate an inode in the file system.
250  *
251  * A preference may be optionally specified. If a preference is given
252  * the following hierarchy is used to allocate an inode:
253  *   1) allocate the requested inode.
254  *   2) allocate an inode in the same cylinder group.
255  *   3) quadradically rehash into other cylinder groups, until an
256  *      available inode is located.
257  * If no inode preference is given the following heirarchy is used
258  * to allocate an inode:
259  *   1) allocate an inode in cylinder group 0.
260  *   2) quadradically rehash into other cylinder groups, until an
261  *      available inode is located.
262  */
263 struct inode *
264 ialloc(pip, ipref, mode)
265 	register struct inode *pip;
266 	ino_t ipref;
267 	int mode;
268 {
269 	ino_t ino;
270 	register struct fs *fs;
271 	register struct inode *ip;
272 	int cg;
273 
274 	fs = pip->i_fs;
275 	if (fs->fs_cstotal.cs_nifree == 0)
276 		goto noinodes;
277 #ifdef QUOTA
278 	u.u_error = chkiq(pip->i_dev, (struct inode *)NULL, u.u_uid, 0);
279 	if (u.u_error)
280 		return (NULL);
281 #endif
282 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
283 		ipref = 0;
284 	cg = itog(fs, ipref);
285 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
286 	if (ino == 0)
287 		goto noinodes;
288 	ip = iget(pip->i_dev, pip->i_fs, ino);
289 	if (ip == NULL) {
290 		ifree(pip, ino, 0);
291 		return (NULL);
292 	}
293 	if (ip->i_mode) {
294 		printf("mode = 0%o, inum = %d, fs = %s\n",
295 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
296 		panic("ialloc: dup alloc");
297 	}
298 	if (ip->i_blocks) {				/* XXX */
299 		printf("free inode %s/%d had %d blocks\n",
300 		    fs->fs_fsmnt, ino, ip->i_blocks);
301 		ip->i_blocks = 0;
302 	}
303 	return (ip);
304 noinodes:
305 	fserr(fs, "out of inodes");
306 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
307 	u.u_error = ENOSPC;
308 	return (NULL);
309 }
310 
311 /*
312  * Find a cylinder to place a directory.
313  *
314  * The policy implemented by this algorithm is to select from
315  * among those cylinder groups with above the average number of
316  * free inodes, the one with the smallest number of directories.
317  */
318 ino_t
319 dirpref(fs)
320 	register struct fs *fs;
321 {
322 	int cg, minndir, mincg, avgifree;
323 
324 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
325 	minndir = fs->fs_ipg;
326 	mincg = 0;
327 	for (cg = 0; cg < fs->fs_ncg; cg++)
328 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
329 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
330 			mincg = cg;
331 			minndir = fs->fs_cs(fs, cg).cs_ndir;
332 		}
333 	return ((ino_t)(fs->fs_ipg * mincg));
334 }
335 
336 /*
337  * Select the desired position for the next block in a file.  The file is
338  * logically divided into sections. The first section is composed of the
339  * direct blocks. Each additional section contains fs_maxbpg blocks.
340  *
341  * If no blocks have been allocated in the first section, the policy is to
342  * request a block in the same cylinder group as the inode that describes
343  * the file. If no blocks have been allocated in any other section, the
344  * policy is to place the section in a cylinder group with a greater than
345  * average number of free blocks.  An appropriate cylinder group is found
346  * by using a rotor that sweeps the cylinder groups. When a new group of
347  * blocks is needed, the sweep begins in the cylinder group following the
348  * cylinder group from which the previous allocation was made. The sweep
349  * continues until a cylinder group with greater than the average number
350  * of free blocks is found. If the allocation is for the first block in an
351  * indirect block, the information on the previous allocation is unavailable;
352  * here a best guess is made based upon the logical block number being
353  * allocated.
354  *
355  * If a section is already partially allocated, the policy is to
356  * contiguously allocate fs_maxcontig blocks.  The end of one of these
357  * contiguous blocks and the beginning of the next is physically separated
358  * so that the disk head will be in transit between them for at least
359  * fs_rotdelay milliseconds.  This is to allow time for the processor to
360  * schedule another I/O transfer.
361  */
362 daddr_t
363 blkpref(ip, lbn, indx, bap)
364 	struct inode *ip;
365 	daddr_t lbn;
366 	int indx;
367 	daddr_t *bap;
368 {
369 	register struct fs *fs;
370 	register int cg;
371 	int avgbfree, startcg;
372 	daddr_t nextblk;
373 
374 	fs = ip->i_fs;
375 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
376 		if (lbn < NDADDR) {
377 			cg = itog(fs, ip->i_number);
378 			return (fs->fs_fpg * cg + fs->fs_frag);
379 		}
380 		/*
381 		 * Find a cylinder with greater than average number of
382 		 * unused data blocks.
383 		 */
384 		if (indx == 0 || bap[indx - 1] == 0)
385 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
386 		else
387 			startcg = dtog(fs, bap[indx - 1]) + 1;
388 		startcg %= fs->fs_ncg;
389 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
390 		for (cg = startcg; cg < fs->fs_ncg; cg++)
391 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
392 				fs->fs_cgrotor = cg;
393 				return (fs->fs_fpg * cg + fs->fs_frag);
394 			}
395 		for (cg = 0; cg <= startcg; cg++)
396 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
397 				fs->fs_cgrotor = cg;
398 				return (fs->fs_fpg * cg + fs->fs_frag);
399 			}
400 		return (NULL);
401 	}
402 	/*
403 	 * One or more previous blocks have been laid out. If less
404 	 * than fs_maxcontig previous blocks are contiguous, the
405 	 * next block is requested contiguously, otherwise it is
406 	 * requested rotationally delayed by fs_rotdelay milliseconds.
407 	 */
408 	nextblk = bap[indx - 1] + fs->fs_frag;
409 	if (indx > fs->fs_maxcontig &&
410 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
411 	    != nextblk)
412 		return (nextblk);
413 	if (fs->fs_rotdelay != 0)
414 		/*
415 		 * Here we convert ms of delay to frags as:
416 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
417 		 *	((sect/frag) * (ms/sec))
418 		 * then round up to the next block.
419 		 */
420 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
421 		    (NSPF(fs) * 1000), fs->fs_frag);
422 	return (nextblk);
423 }
424 
425 /*
426  * Implement the cylinder overflow algorithm.
427  *
428  * The policy implemented by this algorithm is:
429  *   1) allocate the block in its requested cylinder group.
430  *   2) quadradically rehash on the cylinder group number.
431  *   3) brute force search for a free block.
432  */
433 /*VARARGS5*/
434 u_long
435 hashalloc(ip, cg, pref, size, allocator)
436 	struct inode *ip;
437 	int cg;
438 	long pref;
439 	int size;	/* size for data blocks, mode for inodes */
440 	u_long (*allocator)();
441 {
442 	register struct fs *fs;
443 	long result;
444 	int i, icg = cg;
445 
446 	fs = ip->i_fs;
447 	/*
448 	 * 1: preferred cylinder group
449 	 */
450 	result = (*allocator)(ip, cg, pref, size);
451 	if (result)
452 		return (result);
453 	/*
454 	 * 2: quadratic rehash
455 	 */
456 	for (i = 1; i < fs->fs_ncg; i *= 2) {
457 		cg += i;
458 		if (cg >= fs->fs_ncg)
459 			cg -= fs->fs_ncg;
460 		result = (*allocator)(ip, cg, 0, size);
461 		if (result)
462 			return (result);
463 	}
464 	/*
465 	 * 3: brute force search
466 	 * Note that we start at i == 2, since 0 was checked initially,
467 	 * and 1 is always checked in the quadratic rehash.
468 	 */
469 	cg = (icg + 2) % fs->fs_ncg;
470 	for (i = 2; i < fs->fs_ncg; i++) {
471 		result = (*allocator)(ip, cg, 0, size);
472 		if (result)
473 			return (result);
474 		cg++;
475 		if (cg == fs->fs_ncg)
476 			cg = 0;
477 	}
478 	return (NULL);
479 }
480 
481 /*
482  * Determine whether a fragment can be extended.
483  *
484  * Check to see if the necessary fragments are available, and
485  * if they are, allocate them.
486  */
487 daddr_t
488 fragextend(ip, cg, bprev, osize, nsize)
489 	struct inode *ip;
490 	int cg;
491 	long bprev;
492 	int osize, nsize;
493 {
494 	register struct fs *fs;
495 	register struct buf *bp;
496 	register struct cg *cgp;
497 	long bno;
498 	int frags, bbase;
499 	int i;
500 
501 	fs = ip->i_fs;
502 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
503 		return (NULL);
504 	frags = numfrags(fs, nsize);
505 	bbase = fragnum(fs, bprev);
506 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
507 		/* cannot extend across a block boundary */
508 		return (NULL);
509 	}
510 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
511 	cgp = bp->b_un.b_cg;
512 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
513 		brelse(bp);
514 		return (NULL);
515 	}
516 	cgp->cg_time = time.tv_sec;
517 	bno = dtogd(fs, bprev);
518 	for (i = numfrags(fs, osize); i < frags; i++)
519 		if (isclr(cg_blksfree(cgp), bno + i)) {
520 			brelse(bp);
521 			return (NULL);
522 		}
523 	/*
524 	 * the current fragment can be extended
525 	 * deduct the count on fragment being extended into
526 	 * increase the count on the remaining fragment (if any)
527 	 * allocate the extended piece
528 	 */
529 	for (i = frags; i < fs->fs_frag - bbase; i++)
530 		if (isclr(cg_blksfree(cgp), bno + i))
531 			break;
532 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
533 	if (i != frags)
534 		cgp->cg_frsum[i - frags]++;
535 	for (i = numfrags(fs, osize); i < frags; i++) {
536 		clrbit(cg_blksfree(cgp), bno + i);
537 		cgp->cg_cs.cs_nffree--;
538 		fs->fs_cstotal.cs_nffree--;
539 		fs->fs_cs(fs, cg).cs_nffree--;
540 	}
541 	fs->fs_fmod++;
542 	bdwrite(bp);
543 	return (bprev);
544 }
545 
546 /*
547  * Determine whether a block can be allocated.
548  *
549  * Check to see if a block of the apprpriate size is available,
550  * and if it is, allocate it.
551  */
552 daddr_t
553 alloccg(ip, cg, bpref, size)
554 	struct inode *ip;
555 	int cg;
556 	daddr_t bpref;
557 	int size;
558 {
559 	register struct fs *fs;
560 	register struct buf *bp;
561 	register struct cg *cgp;
562 	int bno, frags;
563 	int allocsiz;
564 	register int i;
565 
566 	fs = ip->i_fs;
567 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
568 		return (NULL);
569 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
570 	cgp = bp->b_un.b_cg;
571 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
572 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
573 		brelse(bp);
574 		return (NULL);
575 	}
576 	cgp->cg_time = time.tv_sec;
577 	if (size == fs->fs_bsize) {
578 		bno = alloccgblk(fs, cgp, bpref);
579 		bdwrite(bp);
580 		return (bno);
581 	}
582 	/*
583 	 * check to see if any fragments are already available
584 	 * allocsiz is the size which will be allocated, hacking
585 	 * it down to a smaller size if necessary
586 	 */
587 	frags = numfrags(fs, size);
588 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
589 		if (cgp->cg_frsum[allocsiz] != 0)
590 			break;
591 	if (allocsiz == fs->fs_frag) {
592 		/*
593 		 * no fragments were available, so a block will be
594 		 * allocated, and hacked up
595 		 */
596 		if (cgp->cg_cs.cs_nbfree == 0) {
597 			brelse(bp);
598 			return (NULL);
599 		}
600 		bno = alloccgblk(fs, cgp, bpref);
601 		bpref = dtogd(fs, bno);
602 		for (i = frags; i < fs->fs_frag; i++)
603 			setbit(cg_blksfree(cgp), bpref + i);
604 		i = fs->fs_frag - frags;
605 		cgp->cg_cs.cs_nffree += i;
606 		fs->fs_cstotal.cs_nffree += i;
607 		fs->fs_cs(fs, cg).cs_nffree += i;
608 		fs->fs_fmod++;
609 		cgp->cg_frsum[i]++;
610 		bdwrite(bp);
611 		return (bno);
612 	}
613 	bno = mapsearch(fs, cgp, bpref, allocsiz);
614 	if (bno < 0) {
615 		brelse(bp);
616 		return (NULL);
617 	}
618 	for (i = 0; i < frags; i++)
619 		clrbit(cg_blksfree(cgp), bno + i);
620 	cgp->cg_cs.cs_nffree -= frags;
621 	fs->fs_cstotal.cs_nffree -= frags;
622 	fs->fs_cs(fs, cg).cs_nffree -= frags;
623 	fs->fs_fmod++;
624 	cgp->cg_frsum[allocsiz]--;
625 	if (frags != allocsiz)
626 		cgp->cg_frsum[allocsiz - frags]++;
627 	bdwrite(bp);
628 	return (cg * fs->fs_fpg + bno);
629 }
630 
631 /*
632  * Allocate a block in a cylinder group.
633  *
634  * This algorithm implements the following policy:
635  *   1) allocate the requested block.
636  *   2) allocate a rotationally optimal block in the same cylinder.
637  *   3) allocate the next available block on the block rotor for the
638  *      specified cylinder group.
639  * Note that this routine only allocates fs_bsize blocks; these
640  * blocks may be fragmented by the routine that allocates them.
641  */
642 daddr_t
643 alloccgblk(fs, cgp, bpref)
644 	register struct fs *fs;
645 	register struct cg *cgp;
646 	daddr_t bpref;
647 {
648 	daddr_t bno;
649 	int cylno, pos, delta;
650 	short *cylbp;
651 	register int i;
652 
653 	if (bpref == 0) {
654 		bpref = cgp->cg_rotor;
655 		goto norot;
656 	}
657 	bpref = blknum(fs, bpref);
658 	bpref = dtogd(fs, bpref);
659 	/*
660 	 * if the requested block is available, use it
661 	 */
662 	if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
663 		bno = bpref;
664 		goto gotit;
665 	}
666 	/*
667 	 * check for a block available on the same cylinder
668 	 */
669 	cylno = cbtocylno(fs, bpref);
670 	if (cg_blktot(cgp)[cylno] == 0)
671 		goto norot;
672 	if (fs->fs_cpc == 0) {
673 		/*
674 		 * block layout info is not available, so just have
675 		 * to take any block in this cylinder.
676 		 */
677 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
678 		goto norot;
679 	}
680 	/*
681 	 * check the summary information to see if a block is
682 	 * available in the requested cylinder starting at the
683 	 * requested rotational position and proceeding around.
684 	 */
685 	cylbp = cg_blks(fs, cgp, cylno);
686 	pos = cbtorpos(fs, bpref);
687 	for (i = pos; i < fs->fs_nrpos; i++)
688 		if (cylbp[i] > 0)
689 			break;
690 	if (i == fs->fs_nrpos)
691 		for (i = 0; i < pos; i++)
692 			if (cylbp[i] > 0)
693 				break;
694 	if (cylbp[i] > 0) {
695 		/*
696 		 * found a rotational position, now find the actual
697 		 * block. A panic if none is actually there.
698 		 */
699 		pos = cylno % fs->fs_cpc;
700 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
701 		if (fs_postbl(fs, pos)[i] == -1) {
702 			printf("pos = %d, i = %d, fs = %s\n",
703 			    pos, i, fs->fs_fsmnt);
704 			panic("alloccgblk: cyl groups corrupted");
705 		}
706 		for (i = fs_postbl(fs, pos)[i];; ) {
707 			if (isblock(fs, cg_blksfree(cgp), bno + i)) {
708 				bno = blkstofrags(fs, (bno + i));
709 				goto gotit;
710 			}
711 			delta = fs_rotbl(fs)[i];
712 			if (delta <= 0 ||
713 			    delta + i > fragstoblks(fs, fs->fs_fpg))
714 				break;
715 			i += delta;
716 		}
717 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
718 		panic("alloccgblk: can't find blk in cyl");
719 	}
720 norot:
721 	/*
722 	 * no blocks in the requested cylinder, so take next
723 	 * available one in this cylinder group.
724 	 */
725 	bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
726 	if (bno < 0)
727 		return (NULL);
728 	cgp->cg_rotor = bno;
729 gotit:
730 	clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
731 	cgp->cg_cs.cs_nbfree--;
732 	fs->fs_cstotal.cs_nbfree--;
733 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
734 	cylno = cbtocylno(fs, bno);
735 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
736 	cg_blktot(cgp)[cylno]--;
737 	fs->fs_fmod++;
738 	return (cgp->cg_cgx * fs->fs_fpg + bno);
739 }
740 
741 /*
742  * Determine whether an inode can be allocated.
743  *
744  * Check to see if an inode is available, and if it is,
745  * allocate it using the following policy:
746  *   1) allocate the requested inode.
747  *   2) allocate the next available inode after the requested
748  *      inode in the specified cylinder group.
749  */
750 ino_t
751 ialloccg(ip, cg, ipref, mode)
752 	struct inode *ip;
753 	int cg;
754 	daddr_t ipref;
755 	int mode;
756 {
757 	register struct fs *fs;
758 	register struct cg *cgp;
759 	struct buf *bp;
760 	int start, len, loc, map, i;
761 
762 	fs = ip->i_fs;
763 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
764 		return (NULL);
765 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
766 	cgp = bp->b_un.b_cg;
767 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp) ||
768 	    cgp->cg_cs.cs_nifree == 0) {
769 		brelse(bp);
770 		return (NULL);
771 	}
772 	cgp->cg_time = time.tv_sec;
773 	if (ipref) {
774 		ipref %= fs->fs_ipg;
775 		if (isclr(cg_inosused(cgp), ipref))
776 			goto gotit;
777 	}
778 	start = cgp->cg_irotor / NBBY;
779 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
780 	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
781 	if (loc == 0) {
782 		len = start + 1;
783 		start = 0;
784 		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
785 		if (loc == 0) {
786 			printf("cg = %s, irotor = %d, fs = %s\n",
787 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
788 			panic("ialloccg: map corrupted");
789 			/* NOTREACHED */
790 		}
791 	}
792 	i = start + len - loc;
793 	map = cg_inosused(cgp)[i];
794 	ipref = i * NBBY;
795 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
796 		if ((map & i) == 0) {
797 			cgp->cg_irotor = ipref;
798 			goto gotit;
799 		}
800 	}
801 	printf("fs = %s\n", fs->fs_fsmnt);
802 	panic("ialloccg: block not in map");
803 	/* NOTREACHED */
804 gotit:
805 	setbit(cg_inosused(cgp), ipref);
806 	cgp->cg_cs.cs_nifree--;
807 	fs->fs_cstotal.cs_nifree--;
808 	fs->fs_cs(fs, cg).cs_nifree--;
809 	fs->fs_fmod++;
810 	if ((mode & IFMT) == IFDIR) {
811 		cgp->cg_cs.cs_ndir++;
812 		fs->fs_cstotal.cs_ndir++;
813 		fs->fs_cs(fs, cg).cs_ndir++;
814 	}
815 	bdwrite(bp);
816 	return (cg * fs->fs_ipg + ipref);
817 }
818 
819 /*
820  * Free a block or fragment.
821  *
822  * The specified block or fragment is placed back in the
823  * free map. If a fragment is deallocated, a possible
824  * block reassembly is checked.
825  */
826 blkfree(ip, bno, size)
827 	register struct inode *ip;
828 	daddr_t bno;
829 	off_t size;
830 {
831 	register struct fs *fs;
832 	register struct cg *cgp;
833 	register struct buf *bp;
834 	int cg, blk, frags, bbase;
835 	register int i;
836 
837 	fs = ip->i_fs;
838 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
839 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
840 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
841 		panic("blkfree: bad size");
842 	}
843 	cg = dtog(fs, bno);
844 	if (badblock(fs, bno)) {
845 		printf("bad block %d, ino %d\n", bno, ip->i_number);
846 		return;
847 	}
848 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
849 	cgp = bp->b_un.b_cg;
850 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
851 		brelse(bp);
852 		return;
853 	}
854 	cgp->cg_time = time.tv_sec;
855 	bno = dtogd(fs, bno);
856 	if (size == fs->fs_bsize) {
857 		if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
858 			printf("dev = 0x%x, block = %d, fs = %s\n",
859 			    ip->i_dev, bno, fs->fs_fsmnt);
860 			panic("blkfree: freeing free block");
861 		}
862 		setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
863 		cgp->cg_cs.cs_nbfree++;
864 		fs->fs_cstotal.cs_nbfree++;
865 		fs->fs_cs(fs, cg).cs_nbfree++;
866 		i = cbtocylno(fs, bno);
867 		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
868 		cg_blktot(cgp)[i]++;
869 	} else {
870 		bbase = bno - fragnum(fs, bno);
871 		/*
872 		 * decrement the counts associated with the old frags
873 		 */
874 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
875 		fragacct(fs, blk, cgp->cg_frsum, -1);
876 		/*
877 		 * deallocate the fragment
878 		 */
879 		frags = numfrags(fs, size);
880 		for (i = 0; i < frags; i++) {
881 			if (isset(cg_blksfree(cgp), bno + i)) {
882 				printf("dev = 0x%x, block = %d, fs = %s\n",
883 				    ip->i_dev, bno + i, fs->fs_fsmnt);
884 				panic("blkfree: freeing free frag");
885 			}
886 			setbit(cg_blksfree(cgp), bno + i);
887 		}
888 		cgp->cg_cs.cs_nffree += i;
889 		fs->fs_cstotal.cs_nffree += i;
890 		fs->fs_cs(fs, cg).cs_nffree += i;
891 		/*
892 		 * add back in counts associated with the new frags
893 		 */
894 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
895 		fragacct(fs, blk, cgp->cg_frsum, 1);
896 		/*
897 		 * if a complete block has been reassembled, account for it
898 		 */
899 		if (isblock(fs, cg_blksfree(cgp),
900 		    (daddr_t)fragstoblks(fs, bbase))) {
901 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
902 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
903 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
904 			cgp->cg_cs.cs_nbfree++;
905 			fs->fs_cstotal.cs_nbfree++;
906 			fs->fs_cs(fs, cg).cs_nbfree++;
907 			i = cbtocylno(fs, bbase);
908 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
909 			cg_blktot(cgp)[i]++;
910 		}
911 	}
912 	fs->fs_fmod++;
913 	bdwrite(bp);
914 }
915 
916 /*
917  * Free an inode.
918  *
919  * The specified inode is placed back in the free map.
920  */
921 ifree(ip, ino, mode)
922 	struct inode *ip;
923 	ino_t ino;
924 	int mode;
925 {
926 	register struct fs *fs;
927 	register struct cg *cgp;
928 	register struct buf *bp;
929 	int cg;
930 
931 	fs = ip->i_fs;
932 	if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
933 		printf("dev = 0x%x, ino = %d, fs = %s\n",
934 		    ip->i_dev, ino, fs->fs_fsmnt);
935 		panic("ifree: range");
936 	}
937 	cg = itog(fs, ino);
938 	bp = bread(ip->i_dev, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize);
939 	cgp = bp->b_un.b_cg;
940 	if (bp->b_flags & B_ERROR || !cg_chkmagic(cgp)) {
941 		brelse(bp);
942 		return;
943 	}
944 	cgp->cg_time = time.tv_sec;
945 	ino %= fs->fs_ipg;
946 	if (isclr(cg_inosused(cgp), ino)) {
947 		printf("dev = 0x%x, ino = %d, fs = %s\n",
948 		    ip->i_dev, ino, fs->fs_fsmnt);
949 		panic("ifree: freeing free inode");
950 	}
951 	clrbit(cg_inosused(cgp), ino);
952 	if (ino < cgp->cg_irotor)
953 		cgp->cg_irotor = ino;
954 	cgp->cg_cs.cs_nifree++;
955 	fs->fs_cstotal.cs_nifree++;
956 	fs->fs_cs(fs, cg).cs_nifree++;
957 	if ((mode & IFMT) == IFDIR) {
958 		cgp->cg_cs.cs_ndir--;
959 		fs->fs_cstotal.cs_ndir--;
960 		fs->fs_cs(fs, cg).cs_ndir--;
961 	}
962 	fs->fs_fmod++;
963 	bdwrite(bp);
964 }
965 
966 /*
967  * Find a block of the specified size in the specified cylinder group.
968  *
969  * It is a panic if a request is made to find a block if none are
970  * available.
971  */
972 daddr_t
973 mapsearch(fs, cgp, bpref, allocsiz)
974 	register struct fs *fs;
975 	register struct cg *cgp;
976 	daddr_t bpref;
977 	int allocsiz;
978 {
979 	daddr_t bno;
980 	int start, len, loc, i;
981 	int blk, field, subfield, pos;
982 
983 	/*
984 	 * find the fragment by searching through the free block
985 	 * map for an appropriate bit pattern
986 	 */
987 	if (bpref)
988 		start = dtogd(fs, bpref) / NBBY;
989 	else
990 		start = cgp->cg_frotor / NBBY;
991 	len = howmany(fs->fs_fpg, NBBY) - start;
992 	loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
993 		(u_char *)fragtbl[fs->fs_frag],
994 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
995 	if (loc == 0) {
996 		len = start + 1;
997 		start = 0;
998 		loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
999 			(u_char *)fragtbl[fs->fs_frag],
1000 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1001 		if (loc == 0) {
1002 			printf("start = %d, len = %d, fs = %s\n",
1003 			    start, len, fs->fs_fsmnt);
1004 			panic("alloccg: map corrupted");
1005 			/* NOTREACHED */
1006 		}
1007 	}
1008 	bno = (start + len - loc) * NBBY;
1009 	cgp->cg_frotor = bno;
1010 	/*
1011 	 * found the byte in the map
1012 	 * sift through the bits to find the selected frag
1013 	 */
1014 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1015 		blk = blkmap(fs, cg_blksfree(cgp), bno);
1016 		blk <<= 1;
1017 		field = around[allocsiz];
1018 		subfield = inside[allocsiz];
1019 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1020 			if ((blk & field) == subfield)
1021 				return (bno + pos);
1022 			field <<= 1;
1023 			subfield <<= 1;
1024 		}
1025 	}
1026 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1027 	panic("alloccg: block not in map");
1028 	return (-1);
1029 }
1030 
1031 /*
1032  * Fserr prints the name of a file system with an error diagnostic.
1033  *
1034  * The form of the error message is:
1035  *	fs: error message
1036  */
1037 fserr(fs, cp)
1038 	struct fs *fs;
1039 	char *cp;
1040 {
1041 
1042 	log(LOG_ERR, "%s: %s\n", fs->fs_fsmnt, cp);
1043 }
1044