xref: /original-bsd/sys/ufs/ffs/ffs_alloc.c (revision a64d8d4e)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)ffs_alloc.c	7.23 (Berkeley) 12/05/90
8  */
9 
10 #include "param.h"
11 #include "systm.h"
12 #include "buf.h"
13 #include "user.h"
14 #include "vnode.h"
15 #include "kernel.h"
16 #include "syslog.h"
17 #include "cmap.h"
18 #include "../ufs/quota.h"
19 #include "../ufs/inode.h"
20 #include "../ufs/fs.h"
21 
22 extern u_long		hashalloc();
23 extern ino_t		ialloccg();
24 extern daddr_t		alloccg();
25 extern daddr_t		alloccgblk();
26 extern daddr_t		fragextend();
27 extern daddr_t		blkpref();
28 extern daddr_t		mapsearch();
29 extern int		inside[], around[];
30 extern unsigned char	*fragtbl[];
31 
32 /*
33  * Allocate a block in the file system.
34  *
35  * The size of the requested block is given, which must be some
36  * multiple of fs_fsize and <= fs_bsize.
37  * A preference may be optionally specified. If a preference is given
38  * the following hierarchy is used to allocate a block:
39  *   1) allocate the requested block.
40  *   2) allocate a rotationally optimal block in the same cylinder.
41  *   3) allocate a block in the same cylinder group.
42  *   4) quadradically rehash into other cylinder groups, until an
43  *      available block is located.
44  * If no block preference is given the following heirarchy is used
45  * to allocate a block:
46  *   1) allocate a block in the cylinder group that contains the
47  *      inode for the file.
48  *   2) quadradically rehash into other cylinder groups, until an
49  *      available block is located.
50  */
51 alloc(ip, lbn, bpref, size, bnp)
52 	register struct inode *ip;
53 	daddr_t lbn, bpref;
54 	int size;
55 	daddr_t *bnp;
56 {
57 	daddr_t bno;
58 	register struct fs *fs;
59 	register struct buf *bp;
60 	int cg, error;
61 	struct ucred *cred = u.u_cred;		/* XXX */
62 
63 	*bnp = 0;
64 	fs = ip->i_fs;
65 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
66 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
67 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
68 		panic("alloc: bad size");
69 	}
70 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
71 		goto nospace;
72 	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
73 		goto nospace;
74 #ifdef QUOTA
75 	if (error = chkdq(ip, (long)btodb(size), cred, 0))
76 		return (error);
77 #endif
78 	if (bpref >= fs->fs_size)
79 		bpref = 0;
80 	if (bpref == 0)
81 		cg = itog(fs, ip->i_number);
82 	else
83 		cg = dtog(fs, bpref);
84 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
85 		(u_long (*)())alloccg);
86 	if (bno > 0) {
87 		ip->i_blocks += btodb(size);
88 		ip->i_flag |= IUPD|ICHG;
89 		*bnp = bno;
90 		return (0);
91 	}
92 #ifdef QUOTA
93 	/*
94 	 * Restore user's disk quota because allocation failed.
95 	 */
96 	(void) chkdq(ip, (long)-btodb(size), cred, FORCE);
97 #endif
98 nospace:
99 	fserr(fs, cred->cr_uid, "file system full");
100 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
101 	return (ENOSPC);
102 }
103 
104 /*
105  * Reallocate a fragment to a bigger size
106  *
107  * The number and size of the old block is given, and a preference
108  * and new size is also specified. The allocator attempts to extend
109  * the original block. Failing that, the regular block allocator is
110  * invoked to get an appropriate block.
111  */
112 realloccg(ip, lbprev, bpref, osize, nsize, bpp)
113 	register struct inode *ip;
114 	off_t lbprev;
115 	daddr_t bpref;
116 	int osize, nsize;
117 	struct buf **bpp;
118 {
119 	register struct fs *fs;
120 	struct buf *bp, *obp;
121 	int cg, request, error;
122 	daddr_t bprev, bno;
123 	struct ucred *cred = u.u_cred;		/* XXX */
124 
125 	*bpp = 0;
126 	fs = ip->i_fs;
127 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
128 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
129 		printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
130 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
131 		panic("realloccg: bad size");
132 	}
133 	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
134 		goto nospace;
135 	if ((bprev = ip->i_db[lbprev]) == 0) {
136 		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
137 		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
138 		panic("realloccg: bad bprev");
139 	}
140 	/*
141 	 * Allocate the extra space in the buffer.
142 	 */
143 	if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
144 		brelse(bp);
145 		return (error);
146 	}
147 #ifdef QUOTA
148 	if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) {
149 		brelse(bp);
150 		return (error);
151 	}
152 #endif
153 	allocbuf(bp, nsize);
154 	bp->b_flags |= B_DONE;
155 	bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
156 	/*
157 	 * Check for extension in the existing location.
158 	 */
159 	cg = dtog(fs, bprev);
160 	if (bno = fragextend(ip, cg, (long)bprev, osize, nsize)) {
161 		if (bp->b_blkno != fsbtodb(fs, bno))
162 			panic("bad blockno");
163 		ip->i_blocks += btodb(nsize - osize);
164 		ip->i_flag |= IUPD|ICHG;
165 		*bpp = bp;
166 		return (0);
167 	}
168 	/*
169 	 * Allocate a new disk location.
170 	 */
171 	if (bpref >= fs->fs_size)
172 		bpref = 0;
173 	switch ((int)fs->fs_optim) {
174 	case FS_OPTSPACE:
175 		/*
176 		 * Allocate an exact sized fragment. Although this makes
177 		 * best use of space, we will waste time relocating it if
178 		 * the file continues to grow. If the fragmentation is
179 		 * less than half of the minimum free reserve, we choose
180 		 * to begin optimizing for time.
181 		 */
182 		request = nsize;
183 		if (fs->fs_minfree < 5 ||
184 		    fs->fs_cstotal.cs_nffree >
185 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
186 			break;
187 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
188 			fs->fs_fsmnt);
189 		fs->fs_optim = FS_OPTTIME;
190 		break;
191 	case FS_OPTTIME:
192 		/*
193 		 * At this point we have discovered a file that is trying
194 		 * to grow a small fragment to a larger fragment. To save
195 		 * time, we allocate a full sized block, then free the
196 		 * unused portion. If the file continues to grow, the
197 		 * `fragextend' call above will be able to grow it in place
198 		 * without further copying. If aberrant programs cause
199 		 * disk fragmentation to grow within 2% of the free reserve,
200 		 * we choose to begin optimizing for space.
201 		 */
202 		request = fs->fs_bsize;
203 		if (fs->fs_cstotal.cs_nffree <
204 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
205 			break;
206 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
207 			fs->fs_fsmnt);
208 		fs->fs_optim = FS_OPTSPACE;
209 		break;
210 	default:
211 		printf("dev = 0x%x, optim = %d, fs = %s\n",
212 		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
213 		panic("realloccg: bad optim");
214 		/* NOTREACHED */
215 	}
216 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
217 		(u_long (*)())alloccg);
218 	if (bno > 0) {
219 		bp->b_blkno = fsbtodb(fs, bno);
220 		(void) vnode_pager_uncache(ITOV(ip));
221 		blkfree(ip, bprev, (off_t)osize);
222 		if (nsize < request)
223 			blkfree(ip, bno + numfrags(fs, nsize),
224 				(off_t)(request - nsize));
225 		ip->i_blocks += btodb(nsize - osize);
226 		ip->i_flag |= IUPD|ICHG;
227 		*bpp = bp;
228 		return (0);
229 	}
230 #ifdef QUOTA
231 	/*
232 	 * Restore user's disk quota because allocation failed.
233 	 */
234 	(void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE);
235 #endif
236 	brelse(bp);
237 nospace:
238 	/*
239 	 * no space available
240 	 */
241 	fserr(fs, cred->cr_uid, "file system full");
242 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
243 	return (ENOSPC);
244 }
245 
246 /*
247  * Allocate an inode in the file system.
248  *
249  * A preference may be optionally specified. If a preference is given
250  * the following hierarchy is used to allocate an inode:
251  *   1) allocate the requested inode.
252  *   2) allocate an inode in the same cylinder group.
253  *   3) quadradically rehash into other cylinder groups, until an
254  *      available inode is located.
255  * If no inode preference is given the following heirarchy is used
256  * to allocate an inode:
257  *   1) allocate an inode in cylinder group 0.
258  *   2) quadradically rehash into other cylinder groups, until an
259  *      available inode is located.
260  */
261 ialloc(pip, ipref, mode, cred, ipp)
262 	register struct inode *pip;
263 	ino_t ipref;
264 	int mode;
265 	struct ucred *cred;
266 	struct inode **ipp;
267 {
268 	ino_t ino;
269 	register struct fs *fs;
270 	register struct inode *ip;
271 	int cg, error;
272 
273 	*ipp = 0;
274 	fs = pip->i_fs;
275 	if (fs->fs_cstotal.cs_nifree == 0)
276 		goto noinodes;
277 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
278 		ipref = 0;
279 	cg = itog(fs, ipref);
280 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
281 	if (ino == 0)
282 		goto noinodes;
283 	error = iget(pip, ino, ipp);
284 	if (error) {
285 		ifree(pip, ino, mode);
286 		return (error);
287 	}
288 	ip = *ipp;
289 	if (ip->i_mode) {
290 		printf("mode = 0%o, inum = %d, fs = %s\n",
291 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
292 		panic("ialloc: dup alloc");
293 	}
294 	if (ip->i_blocks) {				/* XXX */
295 		printf("free inode %s/%d had %d blocks\n",
296 		    fs->fs_fsmnt, ino, ip->i_blocks);
297 		ip->i_blocks = 0;
298 	}
299 	ip->i_flags = 0;
300 	/*
301 	 * Set up a new generation number for this inode.
302 	 */
303 	if (++nextgennumber < (u_long)time.tv_sec)
304 		nextgennumber = time.tv_sec;
305 	ip->i_gen = nextgennumber;
306 	return (0);
307 noinodes:
308 	fserr(fs, cred->cr_uid, "out of inodes");
309 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
310 	return (ENOSPC);
311 }
312 
313 /*
314  * Find a cylinder to place a directory.
315  *
316  * The policy implemented by this algorithm is to select from
317  * among those cylinder groups with above the average number of
318  * free inodes, the one with the smallest number of directories.
319  */
320 ino_t
321 dirpref(fs)
322 	register struct fs *fs;
323 {
324 	int cg, minndir, mincg, avgifree;
325 
326 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
327 	minndir = fs->fs_ipg;
328 	mincg = 0;
329 	for (cg = 0; cg < fs->fs_ncg; cg++)
330 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
331 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
332 			mincg = cg;
333 			minndir = fs->fs_cs(fs, cg).cs_ndir;
334 		}
335 	return ((ino_t)(fs->fs_ipg * mincg));
336 }
337 
338 /*
339  * Select the desired position for the next block in a file.  The file is
340  * logically divided into sections. The first section is composed of the
341  * direct blocks. Each additional section contains fs_maxbpg blocks.
342  *
343  * If no blocks have been allocated in the first section, the policy is to
344  * request a block in the same cylinder group as the inode that describes
345  * the file. If no blocks have been allocated in any other section, the
346  * policy is to place the section in a cylinder group with a greater than
347  * average number of free blocks.  An appropriate cylinder group is found
348  * by using a rotor that sweeps the cylinder groups. When a new group of
349  * blocks is needed, the sweep begins in the cylinder group following the
350  * cylinder group from which the previous allocation was made. The sweep
351  * continues until a cylinder group with greater than the average number
352  * of free blocks is found. If the allocation is for the first block in an
353  * indirect block, the information on the previous allocation is unavailable;
354  * here a best guess is made based upon the logical block number being
355  * allocated.
356  *
357  * If a section is already partially allocated, the policy is to
358  * contiguously allocate fs_maxcontig blocks.  The end of one of these
359  * contiguous blocks and the beginning of the next is physically separated
360  * so that the disk head will be in transit between them for at least
361  * fs_rotdelay milliseconds.  This is to allow time for the processor to
362  * schedule another I/O transfer.
363  */
364 daddr_t
365 blkpref(ip, lbn, indx, bap)
366 	struct inode *ip;
367 	daddr_t lbn;
368 	int indx;
369 	daddr_t *bap;
370 {
371 	register struct fs *fs;
372 	register int cg;
373 	int avgbfree, startcg;
374 	daddr_t nextblk;
375 
376 	fs = ip->i_fs;
377 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
378 		if (lbn < NDADDR) {
379 			cg = itog(fs, ip->i_number);
380 			return (fs->fs_fpg * cg + fs->fs_frag);
381 		}
382 		/*
383 		 * Find a cylinder with greater than average number of
384 		 * unused data blocks.
385 		 */
386 		if (indx == 0 || bap[indx - 1] == 0)
387 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
388 		else
389 			startcg = dtog(fs, bap[indx - 1]) + 1;
390 		startcg %= fs->fs_ncg;
391 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
392 		for (cg = startcg; cg < fs->fs_ncg; cg++)
393 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
394 				fs->fs_cgrotor = cg;
395 				return (fs->fs_fpg * cg + fs->fs_frag);
396 			}
397 		for (cg = 0; cg <= startcg; cg++)
398 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
399 				fs->fs_cgrotor = cg;
400 				return (fs->fs_fpg * cg + fs->fs_frag);
401 			}
402 		return (NULL);
403 	}
404 	/*
405 	 * One or more previous blocks have been laid out. If less
406 	 * than fs_maxcontig previous blocks are contiguous, the
407 	 * next block is requested contiguously, otherwise it is
408 	 * requested rotationally delayed by fs_rotdelay milliseconds.
409 	 */
410 	nextblk = bap[indx - 1] + fs->fs_frag;
411 	if (indx > fs->fs_maxcontig &&
412 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
413 	    != nextblk)
414 		return (nextblk);
415 	if (fs->fs_rotdelay != 0)
416 		/*
417 		 * Here we convert ms of delay to frags as:
418 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
419 		 *	((sect/frag) * (ms/sec))
420 		 * then round up to the next block.
421 		 */
422 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
423 		    (NSPF(fs) * 1000), fs->fs_frag);
424 	return (nextblk);
425 }
426 
427 /*
428  * Implement the cylinder overflow algorithm.
429  *
430  * The policy implemented by this algorithm is:
431  *   1) allocate the block in its requested cylinder group.
432  *   2) quadradically rehash on the cylinder group number.
433  *   3) brute force search for a free block.
434  */
435 /*VARARGS5*/
436 u_long
437 hashalloc(ip, cg, pref, size, allocator)
438 	struct inode *ip;
439 	int cg;
440 	long pref;
441 	int size;	/* size for data blocks, mode for inodes */
442 	u_long (*allocator)();
443 {
444 	register struct fs *fs;
445 	long result;
446 	int i, icg = cg;
447 
448 	fs = ip->i_fs;
449 	/*
450 	 * 1: preferred cylinder group
451 	 */
452 	result = (*allocator)(ip, cg, pref, size);
453 	if (result)
454 		return (result);
455 	/*
456 	 * 2: quadratic rehash
457 	 */
458 	for (i = 1; i < fs->fs_ncg; i *= 2) {
459 		cg += i;
460 		if (cg >= fs->fs_ncg)
461 			cg -= fs->fs_ncg;
462 		result = (*allocator)(ip, cg, 0, size);
463 		if (result)
464 			return (result);
465 	}
466 	/*
467 	 * 3: brute force search
468 	 * Note that we start at i == 2, since 0 was checked initially,
469 	 * and 1 is always checked in the quadratic rehash.
470 	 */
471 	cg = (icg + 2) % fs->fs_ncg;
472 	for (i = 2; i < fs->fs_ncg; i++) {
473 		result = (*allocator)(ip, cg, 0, size);
474 		if (result)
475 			return (result);
476 		cg++;
477 		if (cg == fs->fs_ncg)
478 			cg = 0;
479 	}
480 	return (NULL);
481 }
482 
483 /*
484  * Determine whether a fragment can be extended.
485  *
486  * Check to see if the necessary fragments are available, and
487  * if they are, allocate them.
488  */
489 daddr_t
490 fragextend(ip, cg, bprev, osize, nsize)
491 	struct inode *ip;
492 	int cg;
493 	long bprev;
494 	int osize, nsize;
495 {
496 	register struct fs *fs;
497 	register struct cg *cgp;
498 	struct buf *bp;
499 	long bno;
500 	int frags, bbase;
501 	int i, error;
502 
503 	fs = ip->i_fs;
504 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
505 		return (NULL);
506 	frags = numfrags(fs, nsize);
507 	bbase = fragnum(fs, bprev);
508 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
509 		/* cannot extend across a block boundary */
510 		return (NULL);
511 	}
512 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
513 		(int)fs->fs_cgsize, NOCRED, &bp);
514 	if (error) {
515 		brelse(bp);
516 		return (NULL);
517 	}
518 	cgp = bp->b_un.b_cg;
519 	if (!cg_chkmagic(cgp)) {
520 		brelse(bp);
521 		return (NULL);
522 	}
523 	cgp->cg_time = time.tv_sec;
524 	bno = dtogd(fs, bprev);
525 	for (i = numfrags(fs, osize); i < frags; i++)
526 		if (isclr(cg_blksfree(cgp), bno + i)) {
527 			brelse(bp);
528 			return (NULL);
529 		}
530 	/*
531 	 * the current fragment can be extended
532 	 * deduct the count on fragment being extended into
533 	 * increase the count on the remaining fragment (if any)
534 	 * allocate the extended piece
535 	 */
536 	for (i = frags; i < fs->fs_frag - bbase; i++)
537 		if (isclr(cg_blksfree(cgp), bno + i))
538 			break;
539 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
540 	if (i != frags)
541 		cgp->cg_frsum[i - frags]++;
542 	for (i = numfrags(fs, osize); i < frags; i++) {
543 		clrbit(cg_blksfree(cgp), bno + i);
544 		cgp->cg_cs.cs_nffree--;
545 		fs->fs_cstotal.cs_nffree--;
546 		fs->fs_cs(fs, cg).cs_nffree--;
547 	}
548 	fs->fs_fmod++;
549 	bdwrite(bp);
550 	return (bprev);
551 }
552 
553 /*
554  * Determine whether a block can be allocated.
555  *
556  * Check to see if a block of the apprpriate size is available,
557  * and if it is, allocate it.
558  */
559 daddr_t
560 alloccg(ip, cg, bpref, size)
561 	struct inode *ip;
562 	int cg;
563 	daddr_t bpref;
564 	int size;
565 {
566 	register struct fs *fs;
567 	register struct cg *cgp;
568 	struct buf *bp;
569 	register int i;
570 	int error, bno, frags, allocsiz;
571 
572 	fs = ip->i_fs;
573 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
574 		return (NULL);
575 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
576 		(int)fs->fs_cgsize, NOCRED, &bp);
577 	if (error) {
578 		brelse(bp);
579 		return (NULL);
580 	}
581 	cgp = bp->b_un.b_cg;
582 	if (!cg_chkmagic(cgp) ||
583 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
584 		brelse(bp);
585 		return (NULL);
586 	}
587 	cgp->cg_time = time.tv_sec;
588 	if (size == fs->fs_bsize) {
589 		bno = alloccgblk(fs, cgp, bpref);
590 		bdwrite(bp);
591 		return (bno);
592 	}
593 	/*
594 	 * check to see if any fragments are already available
595 	 * allocsiz is the size which will be allocated, hacking
596 	 * it down to a smaller size if necessary
597 	 */
598 	frags = numfrags(fs, size);
599 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
600 		if (cgp->cg_frsum[allocsiz] != 0)
601 			break;
602 	if (allocsiz == fs->fs_frag) {
603 		/*
604 		 * no fragments were available, so a block will be
605 		 * allocated, and hacked up
606 		 */
607 		if (cgp->cg_cs.cs_nbfree == 0) {
608 			brelse(bp);
609 			return (NULL);
610 		}
611 		bno = alloccgblk(fs, cgp, bpref);
612 		bpref = dtogd(fs, bno);
613 		for (i = frags; i < fs->fs_frag; i++)
614 			setbit(cg_blksfree(cgp), bpref + i);
615 		i = fs->fs_frag - frags;
616 		cgp->cg_cs.cs_nffree += i;
617 		fs->fs_cstotal.cs_nffree += i;
618 		fs->fs_cs(fs, cg).cs_nffree += i;
619 		fs->fs_fmod++;
620 		cgp->cg_frsum[i]++;
621 		bdwrite(bp);
622 		return (bno);
623 	}
624 	bno = mapsearch(fs, cgp, bpref, allocsiz);
625 	if (bno < 0) {
626 		brelse(bp);
627 		return (NULL);
628 	}
629 	for (i = 0; i < frags; i++)
630 		clrbit(cg_blksfree(cgp), bno + i);
631 	cgp->cg_cs.cs_nffree -= frags;
632 	fs->fs_cstotal.cs_nffree -= frags;
633 	fs->fs_cs(fs, cg).cs_nffree -= frags;
634 	fs->fs_fmod++;
635 	cgp->cg_frsum[allocsiz]--;
636 	if (frags != allocsiz)
637 		cgp->cg_frsum[allocsiz - frags]++;
638 	bdwrite(bp);
639 	return (cg * fs->fs_fpg + bno);
640 }
641 
642 /*
643  * Allocate a block in a cylinder group.
644  *
645  * This algorithm implements the following policy:
646  *   1) allocate the requested block.
647  *   2) allocate a rotationally optimal block in the same cylinder.
648  *   3) allocate the next available block on the block rotor for the
649  *      specified cylinder group.
650  * Note that this routine only allocates fs_bsize blocks; these
651  * blocks may be fragmented by the routine that allocates them.
652  */
653 daddr_t
654 alloccgblk(fs, cgp, bpref)
655 	register struct fs *fs;
656 	register struct cg *cgp;
657 	daddr_t bpref;
658 {
659 	daddr_t bno;
660 	int cylno, pos, delta;
661 	short *cylbp;
662 	register int i;
663 
664 	if (bpref == 0) {
665 		bpref = cgp->cg_rotor;
666 		goto norot;
667 	}
668 	bpref = blknum(fs, bpref);
669 	bpref = dtogd(fs, bpref);
670 	/*
671 	 * if the requested block is available, use it
672 	 */
673 	if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
674 		bno = bpref;
675 		goto gotit;
676 	}
677 	/*
678 	 * check for a block available on the same cylinder
679 	 */
680 	cylno = cbtocylno(fs, bpref);
681 	if (cg_blktot(cgp)[cylno] == 0)
682 		goto norot;
683 	if (fs->fs_cpc == 0) {
684 		/*
685 		 * block layout info is not available, so just have
686 		 * to take any block in this cylinder.
687 		 */
688 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
689 		goto norot;
690 	}
691 	/*
692 	 * check the summary information to see if a block is
693 	 * available in the requested cylinder starting at the
694 	 * requested rotational position and proceeding around.
695 	 */
696 	cylbp = cg_blks(fs, cgp, cylno);
697 	pos = cbtorpos(fs, bpref);
698 	for (i = pos; i < fs->fs_nrpos; i++)
699 		if (cylbp[i] > 0)
700 			break;
701 	if (i == fs->fs_nrpos)
702 		for (i = 0; i < pos; i++)
703 			if (cylbp[i] > 0)
704 				break;
705 	if (cylbp[i] > 0) {
706 		/*
707 		 * found a rotational position, now find the actual
708 		 * block. A panic if none is actually there.
709 		 */
710 		pos = cylno % fs->fs_cpc;
711 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
712 		if (fs_postbl(fs, pos)[i] == -1) {
713 			printf("pos = %d, i = %d, fs = %s\n",
714 			    pos, i, fs->fs_fsmnt);
715 			panic("alloccgblk: cyl groups corrupted");
716 		}
717 		for (i = fs_postbl(fs, pos)[i];; ) {
718 			if (isblock(fs, cg_blksfree(cgp), bno + i)) {
719 				bno = blkstofrags(fs, (bno + i));
720 				goto gotit;
721 			}
722 			delta = fs_rotbl(fs)[i];
723 			if (delta <= 0 ||
724 			    delta + i > fragstoblks(fs, fs->fs_fpg))
725 				break;
726 			i += delta;
727 		}
728 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
729 		panic("alloccgblk: can't find blk in cyl");
730 	}
731 norot:
732 	/*
733 	 * no blocks in the requested cylinder, so take next
734 	 * available one in this cylinder group.
735 	 */
736 	bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
737 	if (bno < 0)
738 		return (NULL);
739 	cgp->cg_rotor = bno;
740 gotit:
741 	clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
742 	cgp->cg_cs.cs_nbfree--;
743 	fs->fs_cstotal.cs_nbfree--;
744 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
745 	cylno = cbtocylno(fs, bno);
746 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
747 	cg_blktot(cgp)[cylno]--;
748 	fs->fs_fmod++;
749 	return (cgp->cg_cgx * fs->fs_fpg + bno);
750 }
751 
752 /*
753  * Determine whether an inode can be allocated.
754  *
755  * Check to see if an inode is available, and if it is,
756  * allocate it using the following policy:
757  *   1) allocate the requested inode.
758  *   2) allocate the next available inode after the requested
759  *      inode in the specified cylinder group.
760  */
761 ino_t
762 ialloccg(ip, cg, ipref, mode)
763 	struct inode *ip;
764 	int cg;
765 	daddr_t ipref;
766 	int mode;
767 {
768 	register struct fs *fs;
769 	register struct cg *cgp;
770 	struct buf *bp;
771 	int error, start, len, loc, map, i;
772 
773 	fs = ip->i_fs;
774 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
775 		return (NULL);
776 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
777 		(int)fs->fs_cgsize, NOCRED, &bp);
778 	if (error) {
779 		brelse(bp);
780 		return (NULL);
781 	}
782 	cgp = bp->b_un.b_cg;
783 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
784 		brelse(bp);
785 		return (NULL);
786 	}
787 	cgp->cg_time = time.tv_sec;
788 	if (ipref) {
789 		ipref %= fs->fs_ipg;
790 		if (isclr(cg_inosused(cgp), ipref))
791 			goto gotit;
792 	}
793 	start = cgp->cg_irotor / NBBY;
794 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
795 	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
796 	if (loc == 0) {
797 		len = start + 1;
798 		start = 0;
799 		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
800 		if (loc == 0) {
801 			printf("cg = %s, irotor = %d, fs = %s\n",
802 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
803 			panic("ialloccg: map corrupted");
804 			/* NOTREACHED */
805 		}
806 	}
807 	i = start + len - loc;
808 	map = cg_inosused(cgp)[i];
809 	ipref = i * NBBY;
810 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
811 		if ((map & i) == 0) {
812 			cgp->cg_irotor = ipref;
813 			goto gotit;
814 		}
815 	}
816 	printf("fs = %s\n", fs->fs_fsmnt);
817 	panic("ialloccg: block not in map");
818 	/* NOTREACHED */
819 gotit:
820 	setbit(cg_inosused(cgp), ipref);
821 	cgp->cg_cs.cs_nifree--;
822 	fs->fs_cstotal.cs_nifree--;
823 	fs->fs_cs(fs, cg).cs_nifree--;
824 	fs->fs_fmod++;
825 	if ((mode & IFMT) == IFDIR) {
826 		cgp->cg_cs.cs_ndir++;
827 		fs->fs_cstotal.cs_ndir++;
828 		fs->fs_cs(fs, cg).cs_ndir++;
829 	}
830 	bdwrite(bp);
831 	return (cg * fs->fs_ipg + ipref);
832 }
833 
834 /*
835  * Free a block or fragment.
836  *
837  * The specified block or fragment is placed back in the
838  * free map. If a fragment is deallocated, a possible
839  * block reassembly is checked.
840  */
841 blkfree(ip, bno, size)
842 	register struct inode *ip;
843 	daddr_t bno;
844 	off_t size;
845 {
846 	register struct fs *fs;
847 	register struct cg *cgp;
848 	struct buf *bp;
849 	int error, cg, blk, frags, bbase;
850 	register int i;
851 	struct ucred *cred = u.u_cred;	/* XXX */
852 
853 	fs = ip->i_fs;
854 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
855 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
856 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
857 		panic("blkfree: bad size");
858 	}
859 	cg = dtog(fs, bno);
860 	if ((unsigned)bno >= fs->fs_size) {
861 		printf("bad block %d, ino %d\n", bno, ip->i_number);
862 		fserr(fs, cred->cr_uid, "bad block");
863 		return;
864 	}
865 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
866 		(int)fs->fs_cgsize, NOCRED, &bp);
867 	if (error) {
868 		brelse(bp);
869 		return;
870 	}
871 	cgp = bp->b_un.b_cg;
872 	if (!cg_chkmagic(cgp)) {
873 		brelse(bp);
874 		return;
875 	}
876 	cgp->cg_time = time.tv_sec;
877 	bno = dtogd(fs, bno);
878 	if (size == fs->fs_bsize) {
879 		if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
880 			printf("dev = 0x%x, block = %d, fs = %s\n",
881 			    ip->i_dev, bno, fs->fs_fsmnt);
882 			panic("blkfree: freeing free block");
883 		}
884 		setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
885 		cgp->cg_cs.cs_nbfree++;
886 		fs->fs_cstotal.cs_nbfree++;
887 		fs->fs_cs(fs, cg).cs_nbfree++;
888 		i = cbtocylno(fs, bno);
889 		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
890 		cg_blktot(cgp)[i]++;
891 	} else {
892 		bbase = bno - fragnum(fs, bno);
893 		/*
894 		 * decrement the counts associated with the old frags
895 		 */
896 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
897 		fragacct(fs, blk, cgp->cg_frsum, -1);
898 		/*
899 		 * deallocate the fragment
900 		 */
901 		frags = numfrags(fs, size);
902 		for (i = 0; i < frags; i++) {
903 			if (isset(cg_blksfree(cgp), bno + i)) {
904 				printf("dev = 0x%x, block = %d, fs = %s\n",
905 				    ip->i_dev, bno + i, fs->fs_fsmnt);
906 				panic("blkfree: freeing free frag");
907 			}
908 			setbit(cg_blksfree(cgp), bno + i);
909 		}
910 		cgp->cg_cs.cs_nffree += i;
911 		fs->fs_cstotal.cs_nffree += i;
912 		fs->fs_cs(fs, cg).cs_nffree += i;
913 		/*
914 		 * add back in counts associated with the new frags
915 		 */
916 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
917 		fragacct(fs, blk, cgp->cg_frsum, 1);
918 		/*
919 		 * if a complete block has been reassembled, account for it
920 		 */
921 		if (isblock(fs, cg_blksfree(cgp),
922 		    (daddr_t)fragstoblks(fs, bbase))) {
923 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
924 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
925 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
926 			cgp->cg_cs.cs_nbfree++;
927 			fs->fs_cstotal.cs_nbfree++;
928 			fs->fs_cs(fs, cg).cs_nbfree++;
929 			i = cbtocylno(fs, bbase);
930 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
931 			cg_blktot(cgp)[i]++;
932 		}
933 	}
934 	fs->fs_fmod++;
935 	bdwrite(bp);
936 }
937 
938 /*
939  * Free an inode.
940  *
941  * The specified inode is placed back in the free map.
942  */
943 ifree(ip, ino, mode)
944 	struct inode *ip;
945 	ino_t ino;
946 	int mode;
947 {
948 	register struct fs *fs;
949 	register struct cg *cgp;
950 	struct buf *bp;
951 	int error, cg;
952 
953 	fs = ip->i_fs;
954 	if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
955 		printf("dev = 0x%x, ino = %d, fs = %s\n",
956 		    ip->i_dev, ino, fs->fs_fsmnt);
957 		panic("ifree: range");
958 	}
959 	cg = itog(fs, ino);
960 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
961 		(int)fs->fs_cgsize, NOCRED, &bp);
962 	if (error) {
963 		brelse(bp);
964 		return;
965 	}
966 	cgp = bp->b_un.b_cg;
967 	if (!cg_chkmagic(cgp)) {
968 		brelse(bp);
969 		return;
970 	}
971 	cgp->cg_time = time.tv_sec;
972 	ino %= fs->fs_ipg;
973 	if (isclr(cg_inosused(cgp), ino)) {
974 		printf("dev = 0x%x, ino = %d, fs = %s\n",
975 		    ip->i_dev, ino, fs->fs_fsmnt);
976 		panic("ifree: freeing free inode");
977 	}
978 	clrbit(cg_inosused(cgp), ino);
979 	if (ino < cgp->cg_irotor)
980 		cgp->cg_irotor = ino;
981 	cgp->cg_cs.cs_nifree++;
982 	fs->fs_cstotal.cs_nifree++;
983 	fs->fs_cs(fs, cg).cs_nifree++;
984 	if ((mode & IFMT) == IFDIR) {
985 		cgp->cg_cs.cs_ndir--;
986 		fs->fs_cstotal.cs_ndir--;
987 		fs->fs_cs(fs, cg).cs_ndir--;
988 	}
989 	fs->fs_fmod++;
990 	bdwrite(bp);
991 }
992 
993 /*
994  * Find a block of the specified size in the specified cylinder group.
995  *
996  * It is a panic if a request is made to find a block if none are
997  * available.
998  */
999 daddr_t
1000 mapsearch(fs, cgp, bpref, allocsiz)
1001 	register struct fs *fs;
1002 	register struct cg *cgp;
1003 	daddr_t bpref;
1004 	int allocsiz;
1005 {
1006 	daddr_t bno;
1007 	int start, len, loc, i;
1008 	int blk, field, subfield, pos;
1009 
1010 	/*
1011 	 * find the fragment by searching through the free block
1012 	 * map for an appropriate bit pattern
1013 	 */
1014 	if (bpref)
1015 		start = dtogd(fs, bpref) / NBBY;
1016 	else
1017 		start = cgp->cg_frotor / NBBY;
1018 	len = howmany(fs->fs_fpg, NBBY) - start;
1019 	loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
1020 		(u_char *)fragtbl[fs->fs_frag],
1021 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1022 	if (loc == 0) {
1023 		len = start + 1;
1024 		start = 0;
1025 		loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
1026 			(u_char *)fragtbl[fs->fs_frag],
1027 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1028 		if (loc == 0) {
1029 			printf("start = %d, len = %d, fs = %s\n",
1030 			    start, len, fs->fs_fsmnt);
1031 			panic("alloccg: map corrupted");
1032 			/* NOTREACHED */
1033 		}
1034 	}
1035 	bno = (start + len - loc) * NBBY;
1036 	cgp->cg_frotor = bno;
1037 	/*
1038 	 * found the byte in the map
1039 	 * sift through the bits to find the selected frag
1040 	 */
1041 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1042 		blk = blkmap(fs, cg_blksfree(cgp), bno);
1043 		blk <<= 1;
1044 		field = around[allocsiz];
1045 		subfield = inside[allocsiz];
1046 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1047 			if ((blk & field) == subfield)
1048 				return (bno + pos);
1049 			field <<= 1;
1050 			subfield <<= 1;
1051 		}
1052 	}
1053 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1054 	panic("alloccg: block not in map");
1055 	return (-1);
1056 }
1057 
1058 /*
1059  * Fserr prints the name of a file system with an error diagnostic.
1060  *
1061  * The form of the error message is:
1062  *	fs: error message
1063  */
1064 fserr(fs, uid, cp)
1065 	struct fs *fs;
1066 	uid_t uid;
1067 	char *cp;
1068 {
1069 
1070 	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1071 }
1072