xref: /original-bsd/sys/ufs/lfs/lfs_alloc.c (revision 502feadc)
1 /*
2  * Copyright (c) 1982, 1986, 1989 Regents of the University of California.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms are permitted
6  * provided that the above copyright notice and this paragraph are
7  * duplicated in all such forms and that any documentation,
8  * advertising materials, and other materials related to such
9  * distribution and use acknowledge that the software was developed
10  * by the University of California, Berkeley.  The name of the
11  * University may not be used to endorse or promote products derived
12  * from this software without specific prior written permission.
13  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15  * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16  *
17  *	@(#)lfs_alloc.c	7.19 (Berkeley) 05/24/90
18  */
19 
20 #include "param.h"
21 #include "systm.h"
22 #include "buf.h"
23 #include "user.h"
24 #include "vnode.h"
25 #include "kernel.h"
26 #include "syslog.h"
27 #include "cmap.h"
28 #include "../ufs/quota.h"
29 #include "../ufs/inode.h"
30 #include "../ufs/fs.h"
31 
32 extern u_long		hashalloc();
33 extern ino_t		ialloccg();
34 extern daddr_t		alloccg();
35 extern daddr_t		alloccgblk();
36 extern daddr_t		fragextend();
37 extern daddr_t		blkpref();
38 extern daddr_t		mapsearch();
39 extern int		inside[], around[];
40 extern unsigned char	*fragtbl[];
41 
42 /*
43  * Allocate a block in the file system.
44  *
45  * The size of the requested block is given, which must be some
46  * multiple of fs_fsize and <= fs_bsize.
47  * A preference may be optionally specified. If a preference is given
48  * the following hierarchy is used to allocate a block:
49  *   1) allocate the requested block.
50  *   2) allocate a rotationally optimal block in the same cylinder.
51  *   3) allocate a block in the same cylinder group.
52  *   4) quadradically rehash into other cylinder groups, until an
53  *      available block is located.
54  * If no block preference is given the following heirarchy is used
55  * to allocate a block:
56  *   1) allocate a block in the cylinder group that contains the
57  *      inode for the file.
58  *   2) quadradically rehash into other cylinder groups, until an
59  *      available block is located.
60  */
61 alloc(ip, lbn, bpref, size, bnp)
62 	register struct inode *ip;
63 	daddr_t lbn, bpref;
64 	int size;
65 	daddr_t *bnp;
66 {
67 	daddr_t bno;
68 	register struct fs *fs;
69 	register struct buf *bp;
70 	int cg, error;
71 	struct ucred *cred = u.u_cred;		/* XXX */
72 
73 	*bnp = 0;
74 	fs = ip->i_fs;
75 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
76 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
77 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
78 		panic("alloc: bad size");
79 	}
80 	if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0)
81 		goto nospace;
82 	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
83 		goto nospace;
84 #ifdef QUOTA
85 	if (error = chkdq(ip, (long)btodb(size), cred, 0))
86 		return (error);
87 #endif
88 	if (bpref >= fs->fs_size)
89 		bpref = 0;
90 	if (bpref == 0)
91 		cg = itog(fs, ip->i_number);
92 	else
93 		cg = dtog(fs, bpref);
94 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, size,
95 		(u_long (*)())alloccg);
96 	if (bno > 0) {
97 		ip->i_blocks += btodb(size);
98 		ip->i_flag |= IUPD|ICHG;
99 		*bnp = bno;
100 		return (0);
101 	}
102 nospace:
103 	fserr(fs, cred->cr_uid, "file system full");
104 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
105 	return (ENOSPC);
106 }
107 
108 /*
109  * Reallocate a fragment to a bigger size
110  *
111  * The number and size of the old block is given, and a preference
112  * and new size is also specified. The allocator attempts to extend
113  * the original block. Failing that, the regular block allocator is
114  * invoked to get an appropriate block.
115  */
116 realloccg(ip, lbprev, bpref, osize, nsize, bpp)
117 	register struct inode *ip;
118 	off_t lbprev;
119 	daddr_t bpref;
120 	int osize, nsize;
121 	struct buf **bpp;
122 {
123 	register struct fs *fs;
124 	struct buf *bp, *obp;
125 	int cg, request;
126 	daddr_t bprev, bno, bn;
127 	int i, error, count;
128 	struct ucred *cred = u.u_cred;		/* XXX */
129 
130 	*bpp = 0;
131 	fs = ip->i_fs;
132 	if ((unsigned)osize > fs->fs_bsize || fragoff(fs, osize) != 0 ||
133 	    (unsigned)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) {
134 		printf("dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n",
135 		    ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt);
136 		panic("realloccg: bad size");
137 	}
138 	if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0)
139 		goto nospace;
140 	if ((bprev = ip->i_db[lbprev]) == 0) {
141 		printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n",
142 		    ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt);
143 		panic("realloccg: bad bprev");
144 	}
145 #ifdef QUOTA
146 	if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0))
147 		return (error);
148 #endif
149 	/*
150 	 * Allocate the extra space in the buffer.
151 	 */
152 	if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) {
153 		brelse(bp);
154 		return (error);
155 	}
156 	brealloc(bp, nsize);
157 	bp->b_flags |= B_DONE;
158 	bzero(bp->b_un.b_addr + osize, (unsigned)nsize - osize);
159 	/*
160 	 * Check for extension in the existing location.
161 	 */
162 	cg = dtog(fs, bprev);
163 	if (bno = fragextend(ip, cg, (long)bprev, osize, nsize)) {
164 		if (bp->b_blkno != fsbtodb(fs, bno))
165 			panic("bad blockno");
166 		ip->i_blocks += btodb(nsize - osize);
167 		ip->i_flag |= IUPD|ICHG;
168 		*bpp = bp;
169 		return (0);
170 	}
171 	/*
172 	 * Allocate a new disk location.
173 	 */
174 	if (bpref >= fs->fs_size)
175 		bpref = 0;
176 	switch ((int)fs->fs_optim) {
177 	case FS_OPTSPACE:
178 		/*
179 		 * Allocate an exact sized fragment. Although this makes
180 		 * best use of space, we will waste time relocating it if
181 		 * the file continues to grow. If the fragmentation is
182 		 * less than half of the minimum free reserve, we choose
183 		 * to begin optimizing for time.
184 		 */
185 		request = nsize;
186 		if (fs->fs_minfree < 5 ||
187 		    fs->fs_cstotal.cs_nffree >
188 		    fs->fs_dsize * fs->fs_minfree / (2 * 100))
189 			break;
190 		log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n",
191 			fs->fs_fsmnt);
192 		fs->fs_optim = FS_OPTTIME;
193 		break;
194 	case FS_OPTTIME:
195 		/*
196 		 * At this point we have discovered a file that is trying
197 		 * to grow a small fragment to a larger fragment. To save
198 		 * time, we allocate a full sized block, then free the
199 		 * unused portion. If the file continues to grow, the
200 		 * `fragextend' call above will be able to grow it in place
201 		 * without further copying. If aberrant programs cause
202 		 * disk fragmentation to grow within 2% of the free reserve,
203 		 * we choose to begin optimizing for space.
204 		 */
205 		request = fs->fs_bsize;
206 		if (fs->fs_cstotal.cs_nffree <
207 		    fs->fs_dsize * (fs->fs_minfree - 2) / 100)
208 			break;
209 		log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n",
210 			fs->fs_fsmnt);
211 		fs->fs_optim = FS_OPTSPACE;
212 		break;
213 	default:
214 		printf("dev = 0x%x, optim = %d, fs = %s\n",
215 		    ip->i_dev, fs->fs_optim, fs->fs_fsmnt);
216 		panic("realloccg: bad optim");
217 		/* NOTREACHED */
218 	}
219 	bno = (daddr_t)hashalloc(ip, cg, (long)bpref, request,
220 		(u_long (*)())alloccg);
221 	if (bno > 0) {
222 		bp->b_blkno = bn = fsbtodb(fs, bno);
223 		count = howmany(osize, CLBYTES);
224 		for (i = 0; i < count; i++)
225 			munhash(ip->i_devvp, bn + i * CLBYTES / DEV_BSIZE);
226 		blkfree(ip, bprev, (off_t)osize);
227 		if (nsize < request)
228 			blkfree(ip, bno + numfrags(fs, nsize),
229 				(off_t)(request - nsize));
230 		ip->i_blocks += btodb(nsize - osize);
231 		ip->i_flag |= IUPD|ICHG;
232 		*bpp = bp;
233 		return (0);
234 	}
235 	brelse(bp);
236 nospace:
237 	/*
238 	 * no space available
239 	 */
240 	fserr(fs, cred->cr_uid, "file system full");
241 	uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt);
242 	return (ENOSPC);
243 }
244 
245 /*
246  * Allocate an inode in the file system.
247  *
248  * A preference may be optionally specified. If a preference is given
249  * the following hierarchy is used to allocate an inode:
250  *   1) allocate the requested inode.
251  *   2) allocate an inode in the same cylinder group.
252  *   3) quadradically rehash into other cylinder groups, until an
253  *      available inode is located.
254  * If no inode preference is given the following heirarchy is used
255  * to allocate an inode:
256  *   1) allocate an inode in cylinder group 0.
257  *   2) quadradically rehash into other cylinder groups, until an
258  *      available inode is located.
259  */
260 ialloc(pip, ipref, mode, cred, ipp)
261 	register struct inode *pip;
262 	ino_t ipref;
263 	int mode;
264 	struct ucred *cred;
265 	struct inode **ipp;
266 {
267 	ino_t ino;
268 	register struct fs *fs;
269 	register struct inode *ip;
270 	int cg, error;
271 
272 	*ipp = 0;
273 	fs = pip->i_fs;
274 	if (fs->fs_cstotal.cs_nifree == 0)
275 		goto noinodes;
276 	if (ipref >= fs->fs_ncg * fs->fs_ipg)
277 		ipref = 0;
278 	cg = itog(fs, ipref);
279 	ino = (ino_t)hashalloc(pip, cg, (long)ipref, mode, ialloccg);
280 	if (ino == 0)
281 		goto noinodes;
282 	error = iget(pip, ino, ipp);
283 	if (error) {
284 		ifree(pip, ino, mode);
285 		return (error);
286 	}
287 	ip = *ipp;
288 	if (ip->i_mode) {
289 		printf("mode = 0%o, inum = %d, fs = %s\n",
290 		    ip->i_mode, ip->i_number, fs->fs_fsmnt);
291 		panic("ialloc: dup alloc");
292 	}
293 	if (ip->i_blocks) {				/* XXX */
294 		printf("free inode %s/%d had %d blocks\n",
295 		    fs->fs_fsmnt, ino, ip->i_blocks);
296 		ip->i_blocks = 0;
297 	}
298 	ip->i_flags = 0;
299 	/*
300 	 * Set up a new generation number for this inode.
301 	 */
302 	if (++nextgennumber < (u_long)time.tv_sec)
303 		nextgennumber = time.tv_sec;
304 	ip->i_gen = nextgennumber;
305 	return (0);
306 noinodes:
307 	fserr(fs, cred->cr_uid, "out of inodes");
308 	uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt);
309 	return (ENOSPC);
310 }
311 
312 /*
313  * Find a cylinder to place a directory.
314  *
315  * The policy implemented by this algorithm is to select from
316  * among those cylinder groups with above the average number of
317  * free inodes, the one with the smallest number of directories.
318  */
319 ino_t
320 dirpref(fs)
321 	register struct fs *fs;
322 {
323 	int cg, minndir, mincg, avgifree;
324 
325 	avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg;
326 	minndir = fs->fs_ipg;
327 	mincg = 0;
328 	for (cg = 0; cg < fs->fs_ncg; cg++)
329 		if (fs->fs_cs(fs, cg).cs_ndir < minndir &&
330 		    fs->fs_cs(fs, cg).cs_nifree >= avgifree) {
331 			mincg = cg;
332 			minndir = fs->fs_cs(fs, cg).cs_ndir;
333 		}
334 	return ((ino_t)(fs->fs_ipg * mincg));
335 }
336 
337 /*
338  * Select the desired position for the next block in a file.  The file is
339  * logically divided into sections. The first section is composed of the
340  * direct blocks. Each additional section contains fs_maxbpg blocks.
341  *
342  * If no blocks have been allocated in the first section, the policy is to
343  * request a block in the same cylinder group as the inode that describes
344  * the file. If no blocks have been allocated in any other section, the
345  * policy is to place the section in a cylinder group with a greater than
346  * average number of free blocks.  An appropriate cylinder group is found
347  * by using a rotor that sweeps the cylinder groups. When a new group of
348  * blocks is needed, the sweep begins in the cylinder group following the
349  * cylinder group from which the previous allocation was made. The sweep
350  * continues until a cylinder group with greater than the average number
351  * of free blocks is found. If the allocation is for the first block in an
352  * indirect block, the information on the previous allocation is unavailable;
353  * here a best guess is made based upon the logical block number being
354  * allocated.
355  *
356  * If a section is already partially allocated, the policy is to
357  * contiguously allocate fs_maxcontig blocks.  The end of one of these
358  * contiguous blocks and the beginning of the next is physically separated
359  * so that the disk head will be in transit between them for at least
360  * fs_rotdelay milliseconds.  This is to allow time for the processor to
361  * schedule another I/O transfer.
362  */
363 daddr_t
364 blkpref(ip, lbn, indx, bap)
365 	struct inode *ip;
366 	daddr_t lbn;
367 	int indx;
368 	daddr_t *bap;
369 {
370 	register struct fs *fs;
371 	register int cg;
372 	int avgbfree, startcg;
373 	daddr_t nextblk;
374 
375 	fs = ip->i_fs;
376 	if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
377 		if (lbn < NDADDR) {
378 			cg = itog(fs, ip->i_number);
379 			return (fs->fs_fpg * cg + fs->fs_frag);
380 		}
381 		/*
382 		 * Find a cylinder with greater than average number of
383 		 * unused data blocks.
384 		 */
385 		if (indx == 0 || bap[indx - 1] == 0)
386 			startcg = itog(fs, ip->i_number) + lbn / fs->fs_maxbpg;
387 		else
388 			startcg = dtog(fs, bap[indx - 1]) + 1;
389 		startcg %= fs->fs_ncg;
390 		avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg;
391 		for (cg = startcg; cg < fs->fs_ncg; cg++)
392 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
393 				fs->fs_cgrotor = cg;
394 				return (fs->fs_fpg * cg + fs->fs_frag);
395 			}
396 		for (cg = 0; cg <= startcg; cg++)
397 			if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
398 				fs->fs_cgrotor = cg;
399 				return (fs->fs_fpg * cg + fs->fs_frag);
400 			}
401 		return (NULL);
402 	}
403 	/*
404 	 * One or more previous blocks have been laid out. If less
405 	 * than fs_maxcontig previous blocks are contiguous, the
406 	 * next block is requested contiguously, otherwise it is
407 	 * requested rotationally delayed by fs_rotdelay milliseconds.
408 	 */
409 	nextblk = bap[indx - 1] + fs->fs_frag;
410 	if (indx > fs->fs_maxcontig &&
411 	    bap[indx - fs->fs_maxcontig] + blkstofrags(fs, fs->fs_maxcontig)
412 	    != nextblk)
413 		return (nextblk);
414 	if (fs->fs_rotdelay != 0)
415 		/*
416 		 * Here we convert ms of delay to frags as:
417 		 * (frags) = (ms) * (rev/sec) * (sect/rev) /
418 		 *	((sect/frag) * (ms/sec))
419 		 * then round up to the next block.
420 		 */
421 		nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect /
422 		    (NSPF(fs) * 1000), fs->fs_frag);
423 	return (nextblk);
424 }
425 
426 /*
427  * Implement the cylinder overflow algorithm.
428  *
429  * The policy implemented by this algorithm is:
430  *   1) allocate the block in its requested cylinder group.
431  *   2) quadradically rehash on the cylinder group number.
432  *   3) brute force search for a free block.
433  */
434 /*VARARGS5*/
435 u_long
436 hashalloc(ip, cg, pref, size, allocator)
437 	struct inode *ip;
438 	int cg;
439 	long pref;
440 	int size;	/* size for data blocks, mode for inodes */
441 	u_long (*allocator)();
442 {
443 	register struct fs *fs;
444 	long result;
445 	int i, icg = cg;
446 
447 	fs = ip->i_fs;
448 	/*
449 	 * 1: preferred cylinder group
450 	 */
451 	result = (*allocator)(ip, cg, pref, size);
452 	if (result)
453 		return (result);
454 	/*
455 	 * 2: quadratic rehash
456 	 */
457 	for (i = 1; i < fs->fs_ncg; i *= 2) {
458 		cg += i;
459 		if (cg >= fs->fs_ncg)
460 			cg -= fs->fs_ncg;
461 		result = (*allocator)(ip, cg, 0, size);
462 		if (result)
463 			return (result);
464 	}
465 	/*
466 	 * 3: brute force search
467 	 * Note that we start at i == 2, since 0 was checked initially,
468 	 * and 1 is always checked in the quadratic rehash.
469 	 */
470 	cg = (icg + 2) % fs->fs_ncg;
471 	for (i = 2; i < fs->fs_ncg; i++) {
472 		result = (*allocator)(ip, cg, 0, size);
473 		if (result)
474 			return (result);
475 		cg++;
476 		if (cg == fs->fs_ncg)
477 			cg = 0;
478 	}
479 	return (NULL);
480 }
481 
482 /*
483  * Determine whether a fragment can be extended.
484  *
485  * Check to see if the necessary fragments are available, and
486  * if they are, allocate them.
487  */
488 daddr_t
489 fragextend(ip, cg, bprev, osize, nsize)
490 	struct inode *ip;
491 	int cg;
492 	long bprev;
493 	int osize, nsize;
494 {
495 	register struct fs *fs;
496 	register struct cg *cgp;
497 	struct buf *bp;
498 	long bno;
499 	int frags, bbase;
500 	int i, error;
501 
502 	fs = ip->i_fs;
503 	if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize))
504 		return (NULL);
505 	frags = numfrags(fs, nsize);
506 	bbase = fragnum(fs, bprev);
507 	if (bbase > fragnum(fs, (bprev + frags - 1))) {
508 		/* cannot extend across a block boundary */
509 		return (NULL);
510 	}
511 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
512 		(int)fs->fs_cgsize, NOCRED, &bp);
513 	if (error) {
514 		brelse(bp);
515 		return (NULL);
516 	}
517 	cgp = bp->b_un.b_cg;
518 	if (!cg_chkmagic(cgp)) {
519 		brelse(bp);
520 		return (NULL);
521 	}
522 	cgp->cg_time = time.tv_sec;
523 	bno = dtogd(fs, bprev);
524 	for (i = numfrags(fs, osize); i < frags; i++)
525 		if (isclr(cg_blksfree(cgp), bno + i)) {
526 			brelse(bp);
527 			return (NULL);
528 		}
529 	/*
530 	 * the current fragment can be extended
531 	 * deduct the count on fragment being extended into
532 	 * increase the count on the remaining fragment (if any)
533 	 * allocate the extended piece
534 	 */
535 	for (i = frags; i < fs->fs_frag - bbase; i++)
536 		if (isclr(cg_blksfree(cgp), bno + i))
537 			break;
538 	cgp->cg_frsum[i - numfrags(fs, osize)]--;
539 	if (i != frags)
540 		cgp->cg_frsum[i - frags]++;
541 	for (i = numfrags(fs, osize); i < frags; i++) {
542 		clrbit(cg_blksfree(cgp), bno + i);
543 		cgp->cg_cs.cs_nffree--;
544 		fs->fs_cstotal.cs_nffree--;
545 		fs->fs_cs(fs, cg).cs_nffree--;
546 	}
547 	fs->fs_fmod++;
548 	bdwrite(bp);
549 	return (bprev);
550 }
551 
552 /*
553  * Determine whether a block can be allocated.
554  *
555  * Check to see if a block of the apprpriate size is available,
556  * and if it is, allocate it.
557  */
558 daddr_t
559 alloccg(ip, cg, bpref, size)
560 	struct inode *ip;
561 	int cg;
562 	daddr_t bpref;
563 	int size;
564 {
565 	register struct fs *fs;
566 	register struct cg *cgp;
567 	struct buf *bp;
568 	register int i;
569 	int error, bno, frags, allocsiz;
570 
571 	fs = ip->i_fs;
572 	if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize)
573 		return (NULL);
574 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
575 		(int)fs->fs_cgsize, NOCRED, &bp);
576 	if (error) {
577 		brelse(bp);
578 		return (NULL);
579 	}
580 	cgp = bp->b_un.b_cg;
581 	if (!cg_chkmagic(cgp) ||
582 	    (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) {
583 		brelse(bp);
584 		return (NULL);
585 	}
586 	cgp->cg_time = time.tv_sec;
587 	if (size == fs->fs_bsize) {
588 		bno = alloccgblk(fs, cgp, bpref);
589 		bdwrite(bp);
590 		return (bno);
591 	}
592 	/*
593 	 * check to see if any fragments are already available
594 	 * allocsiz is the size which will be allocated, hacking
595 	 * it down to a smaller size if necessary
596 	 */
597 	frags = numfrags(fs, size);
598 	for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++)
599 		if (cgp->cg_frsum[allocsiz] != 0)
600 			break;
601 	if (allocsiz == fs->fs_frag) {
602 		/*
603 		 * no fragments were available, so a block will be
604 		 * allocated, and hacked up
605 		 */
606 		if (cgp->cg_cs.cs_nbfree == 0) {
607 			brelse(bp);
608 			return (NULL);
609 		}
610 		bno = alloccgblk(fs, cgp, bpref);
611 		bpref = dtogd(fs, bno);
612 		for (i = frags; i < fs->fs_frag; i++)
613 			setbit(cg_blksfree(cgp), bpref + i);
614 		i = fs->fs_frag - frags;
615 		cgp->cg_cs.cs_nffree += i;
616 		fs->fs_cstotal.cs_nffree += i;
617 		fs->fs_cs(fs, cg).cs_nffree += i;
618 		fs->fs_fmod++;
619 		cgp->cg_frsum[i]++;
620 		bdwrite(bp);
621 		return (bno);
622 	}
623 	bno = mapsearch(fs, cgp, bpref, allocsiz);
624 	if (bno < 0) {
625 		brelse(bp);
626 		return (NULL);
627 	}
628 	for (i = 0; i < frags; i++)
629 		clrbit(cg_blksfree(cgp), bno + i);
630 	cgp->cg_cs.cs_nffree -= frags;
631 	fs->fs_cstotal.cs_nffree -= frags;
632 	fs->fs_cs(fs, cg).cs_nffree -= frags;
633 	fs->fs_fmod++;
634 	cgp->cg_frsum[allocsiz]--;
635 	if (frags != allocsiz)
636 		cgp->cg_frsum[allocsiz - frags]++;
637 	bdwrite(bp);
638 	return (cg * fs->fs_fpg + bno);
639 }
640 
641 /*
642  * Allocate a block in a cylinder group.
643  *
644  * This algorithm implements the following policy:
645  *   1) allocate the requested block.
646  *   2) allocate a rotationally optimal block in the same cylinder.
647  *   3) allocate the next available block on the block rotor for the
648  *      specified cylinder group.
649  * Note that this routine only allocates fs_bsize blocks; these
650  * blocks may be fragmented by the routine that allocates them.
651  */
652 daddr_t
653 alloccgblk(fs, cgp, bpref)
654 	register struct fs *fs;
655 	register struct cg *cgp;
656 	daddr_t bpref;
657 {
658 	daddr_t bno;
659 	int cylno, pos, delta;
660 	short *cylbp;
661 	register int i;
662 
663 	if (bpref == 0) {
664 		bpref = cgp->cg_rotor;
665 		goto norot;
666 	}
667 	bpref = blknum(fs, bpref);
668 	bpref = dtogd(fs, bpref);
669 	/*
670 	 * if the requested block is available, use it
671 	 */
672 	if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) {
673 		bno = bpref;
674 		goto gotit;
675 	}
676 	/*
677 	 * check for a block available on the same cylinder
678 	 */
679 	cylno = cbtocylno(fs, bpref);
680 	if (cg_blktot(cgp)[cylno] == 0)
681 		goto norot;
682 	if (fs->fs_cpc == 0) {
683 		/*
684 		 * block layout info is not available, so just have
685 		 * to take any block in this cylinder.
686 		 */
687 		bpref = howmany(fs->fs_spc * cylno, NSPF(fs));
688 		goto norot;
689 	}
690 	/*
691 	 * check the summary information to see if a block is
692 	 * available in the requested cylinder starting at the
693 	 * requested rotational position and proceeding around.
694 	 */
695 	cylbp = cg_blks(fs, cgp, cylno);
696 	pos = cbtorpos(fs, bpref);
697 	for (i = pos; i < fs->fs_nrpos; i++)
698 		if (cylbp[i] > 0)
699 			break;
700 	if (i == fs->fs_nrpos)
701 		for (i = 0; i < pos; i++)
702 			if (cylbp[i] > 0)
703 				break;
704 	if (cylbp[i] > 0) {
705 		/*
706 		 * found a rotational position, now find the actual
707 		 * block. A panic if none is actually there.
708 		 */
709 		pos = cylno % fs->fs_cpc;
710 		bno = (cylno - pos) * fs->fs_spc / NSPB(fs);
711 		if (fs_postbl(fs, pos)[i] == -1) {
712 			printf("pos = %d, i = %d, fs = %s\n",
713 			    pos, i, fs->fs_fsmnt);
714 			panic("alloccgblk: cyl groups corrupted");
715 		}
716 		for (i = fs_postbl(fs, pos)[i];; ) {
717 			if (isblock(fs, cg_blksfree(cgp), bno + i)) {
718 				bno = blkstofrags(fs, (bno + i));
719 				goto gotit;
720 			}
721 			delta = fs_rotbl(fs)[i];
722 			if (delta <= 0 ||
723 			    delta + i > fragstoblks(fs, fs->fs_fpg))
724 				break;
725 			i += delta;
726 		}
727 		printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt);
728 		panic("alloccgblk: can't find blk in cyl");
729 	}
730 norot:
731 	/*
732 	 * no blocks in the requested cylinder, so take next
733 	 * available one in this cylinder group.
734 	 */
735 	bno = mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
736 	if (bno < 0)
737 		return (NULL);
738 	cgp->cg_rotor = bno;
739 gotit:
740 	clrblock(fs, cg_blksfree(cgp), (long)fragstoblks(fs, bno));
741 	cgp->cg_cs.cs_nbfree--;
742 	fs->fs_cstotal.cs_nbfree--;
743 	fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--;
744 	cylno = cbtocylno(fs, bno);
745 	cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--;
746 	cg_blktot(cgp)[cylno]--;
747 	fs->fs_fmod++;
748 	return (cgp->cg_cgx * fs->fs_fpg + bno);
749 }
750 
751 /*
752  * Determine whether an inode can be allocated.
753  *
754  * Check to see if an inode is available, and if it is,
755  * allocate it using the following policy:
756  *   1) allocate the requested inode.
757  *   2) allocate the next available inode after the requested
758  *      inode in the specified cylinder group.
759  */
760 ino_t
761 ialloccg(ip, cg, ipref, mode)
762 	struct inode *ip;
763 	int cg;
764 	daddr_t ipref;
765 	int mode;
766 {
767 	register struct fs *fs;
768 	register struct cg *cgp;
769 	struct buf *bp;
770 	int error, start, len, loc, map, i;
771 
772 	fs = ip->i_fs;
773 	if (fs->fs_cs(fs, cg).cs_nifree == 0)
774 		return (NULL);
775 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
776 		(int)fs->fs_cgsize, NOCRED, &bp);
777 	if (error) {
778 		brelse(bp);
779 		return (NULL);
780 	}
781 	cgp = bp->b_un.b_cg;
782 	if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) {
783 		brelse(bp);
784 		return (NULL);
785 	}
786 	cgp->cg_time = time.tv_sec;
787 	if (ipref) {
788 		ipref %= fs->fs_ipg;
789 		if (isclr(cg_inosused(cgp), ipref))
790 			goto gotit;
791 	}
792 	start = cgp->cg_irotor / NBBY;
793 	len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY);
794 	loc = skpc(0xff, len, &cg_inosused(cgp)[start]);
795 	if (loc == 0) {
796 		len = start + 1;
797 		start = 0;
798 		loc = skpc(0xff, len, &cg_inosused(cgp)[0]);
799 		if (loc == 0) {
800 			printf("cg = %s, irotor = %d, fs = %s\n",
801 			    cg, cgp->cg_irotor, fs->fs_fsmnt);
802 			panic("ialloccg: map corrupted");
803 			/* NOTREACHED */
804 		}
805 	}
806 	i = start + len - loc;
807 	map = cg_inosused(cgp)[i];
808 	ipref = i * NBBY;
809 	for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) {
810 		if ((map & i) == 0) {
811 			cgp->cg_irotor = ipref;
812 			goto gotit;
813 		}
814 	}
815 	printf("fs = %s\n", fs->fs_fsmnt);
816 	panic("ialloccg: block not in map");
817 	/* NOTREACHED */
818 gotit:
819 	setbit(cg_inosused(cgp), ipref);
820 	cgp->cg_cs.cs_nifree--;
821 	fs->fs_cstotal.cs_nifree--;
822 	fs->fs_cs(fs, cg).cs_nifree--;
823 	fs->fs_fmod++;
824 	if ((mode & IFMT) == IFDIR) {
825 		cgp->cg_cs.cs_ndir++;
826 		fs->fs_cstotal.cs_ndir++;
827 		fs->fs_cs(fs, cg).cs_ndir++;
828 	}
829 	bdwrite(bp);
830 	return (cg * fs->fs_ipg + ipref);
831 }
832 
833 /*
834  * Free a block or fragment.
835  *
836  * The specified block or fragment is placed back in the
837  * free map. If a fragment is deallocated, a possible
838  * block reassembly is checked.
839  */
840 blkfree(ip, bno, size)
841 	register struct inode *ip;
842 	daddr_t bno;
843 	off_t size;
844 {
845 	register struct fs *fs;
846 	register struct cg *cgp;
847 	struct buf *bp;
848 	int error, cg, blk, frags, bbase;
849 	register int i;
850 	struct ucred *cred = u.u_cred;	/* XXX */
851 
852 	fs = ip->i_fs;
853 	if ((unsigned)size > fs->fs_bsize || fragoff(fs, size) != 0) {
854 		printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n",
855 		    ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt);
856 		panic("blkfree: bad size");
857 	}
858 	cg = dtog(fs, bno);
859 	if ((unsigned)bno >= fs->fs_size) {
860 		printf("bad block %d, ino %d\n", bno, ip->i_number);
861 		fserr(fs, cred->cr_uid, "bad block");
862 		return;
863 	}
864 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
865 		(int)fs->fs_cgsize, NOCRED, &bp);
866 	if (error) {
867 		brelse(bp);
868 		return;
869 	}
870 	cgp = bp->b_un.b_cg;
871 	if (!cg_chkmagic(cgp)) {
872 		brelse(bp);
873 		return;
874 	}
875 	cgp->cg_time = time.tv_sec;
876 	bno = dtogd(fs, bno);
877 	if (size == fs->fs_bsize) {
878 		if (isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno))) {
879 			printf("dev = 0x%x, block = %d, fs = %s\n",
880 			    ip->i_dev, bno, fs->fs_fsmnt);
881 			panic("blkfree: freeing free block");
882 		}
883 		setblock(fs, cg_blksfree(cgp), fragstoblks(fs, bno));
884 		cgp->cg_cs.cs_nbfree++;
885 		fs->fs_cstotal.cs_nbfree++;
886 		fs->fs_cs(fs, cg).cs_nbfree++;
887 		i = cbtocylno(fs, bno);
888 		cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++;
889 		cg_blktot(cgp)[i]++;
890 	} else {
891 		bbase = bno - fragnum(fs, bno);
892 		/*
893 		 * decrement the counts associated with the old frags
894 		 */
895 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
896 		fragacct(fs, blk, cgp->cg_frsum, -1);
897 		/*
898 		 * deallocate the fragment
899 		 */
900 		frags = numfrags(fs, size);
901 		for (i = 0; i < frags; i++) {
902 			if (isset(cg_blksfree(cgp), bno + i)) {
903 				printf("dev = 0x%x, block = %d, fs = %s\n",
904 				    ip->i_dev, bno + i, fs->fs_fsmnt);
905 				panic("blkfree: freeing free frag");
906 			}
907 			setbit(cg_blksfree(cgp), bno + i);
908 		}
909 		cgp->cg_cs.cs_nffree += i;
910 		fs->fs_cstotal.cs_nffree += i;
911 		fs->fs_cs(fs, cg).cs_nffree += i;
912 		/*
913 		 * add back in counts associated with the new frags
914 		 */
915 		blk = blkmap(fs, cg_blksfree(cgp), bbase);
916 		fragacct(fs, blk, cgp->cg_frsum, 1);
917 		/*
918 		 * if a complete block has been reassembled, account for it
919 		 */
920 		if (isblock(fs, cg_blksfree(cgp),
921 		    (daddr_t)fragstoblks(fs, bbase))) {
922 			cgp->cg_cs.cs_nffree -= fs->fs_frag;
923 			fs->fs_cstotal.cs_nffree -= fs->fs_frag;
924 			fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag;
925 			cgp->cg_cs.cs_nbfree++;
926 			fs->fs_cstotal.cs_nbfree++;
927 			fs->fs_cs(fs, cg).cs_nbfree++;
928 			i = cbtocylno(fs, bbase);
929 			cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++;
930 			cg_blktot(cgp)[i]++;
931 		}
932 	}
933 	fs->fs_fmod++;
934 	bdwrite(bp);
935 }
936 
937 /*
938  * Free an inode.
939  *
940  * The specified inode is placed back in the free map.
941  */
942 ifree(ip, ino, mode)
943 	struct inode *ip;
944 	ino_t ino;
945 	int mode;
946 {
947 	register struct fs *fs;
948 	register struct cg *cgp;
949 	struct buf *bp;
950 	int error, cg;
951 
952 	fs = ip->i_fs;
953 	if ((unsigned)ino >= fs->fs_ipg*fs->fs_ncg) {
954 		printf("dev = 0x%x, ino = %d, fs = %s\n",
955 		    ip->i_dev, ino, fs->fs_fsmnt);
956 		panic("ifree: range");
957 	}
958 	cg = itog(fs, ino);
959 	error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)),
960 		(int)fs->fs_cgsize, NOCRED, &bp);
961 	if (error) {
962 		brelse(bp);
963 		return;
964 	}
965 	cgp = bp->b_un.b_cg;
966 	if (!cg_chkmagic(cgp)) {
967 		brelse(bp);
968 		return;
969 	}
970 	cgp->cg_time = time.tv_sec;
971 	ino %= fs->fs_ipg;
972 	if (isclr(cg_inosused(cgp), ino)) {
973 		printf("dev = 0x%x, ino = %d, fs = %s\n",
974 		    ip->i_dev, ino, fs->fs_fsmnt);
975 		panic("ifree: freeing free inode");
976 	}
977 	clrbit(cg_inosused(cgp), ino);
978 	if (ino < cgp->cg_irotor)
979 		cgp->cg_irotor = ino;
980 	cgp->cg_cs.cs_nifree++;
981 	fs->fs_cstotal.cs_nifree++;
982 	fs->fs_cs(fs, cg).cs_nifree++;
983 	if ((mode & IFMT) == IFDIR) {
984 		cgp->cg_cs.cs_ndir--;
985 		fs->fs_cstotal.cs_ndir--;
986 		fs->fs_cs(fs, cg).cs_ndir--;
987 	}
988 	fs->fs_fmod++;
989 	bdwrite(bp);
990 }
991 
992 /*
993  * Find a block of the specified size in the specified cylinder group.
994  *
995  * It is a panic if a request is made to find a block if none are
996  * available.
997  */
998 daddr_t
999 mapsearch(fs, cgp, bpref, allocsiz)
1000 	register struct fs *fs;
1001 	register struct cg *cgp;
1002 	daddr_t bpref;
1003 	int allocsiz;
1004 {
1005 	daddr_t bno;
1006 	int start, len, loc, i;
1007 	int blk, field, subfield, pos;
1008 
1009 	/*
1010 	 * find the fragment by searching through the free block
1011 	 * map for an appropriate bit pattern
1012 	 */
1013 	if (bpref)
1014 		start = dtogd(fs, bpref) / NBBY;
1015 	else
1016 		start = cgp->cg_frotor / NBBY;
1017 	len = howmany(fs->fs_fpg, NBBY) - start;
1018 	loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[start],
1019 		(u_char *)fragtbl[fs->fs_frag],
1020 		(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1021 	if (loc == 0) {
1022 		len = start + 1;
1023 		start = 0;
1024 		loc = scanc((unsigned)len, (u_char *)&cg_blksfree(cgp)[0],
1025 			(u_char *)fragtbl[fs->fs_frag],
1026 			(u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY))));
1027 		if (loc == 0) {
1028 			printf("start = %d, len = %d, fs = %s\n",
1029 			    start, len, fs->fs_fsmnt);
1030 			panic("alloccg: map corrupted");
1031 			/* NOTREACHED */
1032 		}
1033 	}
1034 	bno = (start + len - loc) * NBBY;
1035 	cgp->cg_frotor = bno;
1036 	/*
1037 	 * found the byte in the map
1038 	 * sift through the bits to find the selected frag
1039 	 */
1040 	for (i = bno + NBBY; bno < i; bno += fs->fs_frag) {
1041 		blk = blkmap(fs, cg_blksfree(cgp), bno);
1042 		blk <<= 1;
1043 		field = around[allocsiz];
1044 		subfield = inside[allocsiz];
1045 		for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) {
1046 			if ((blk & field) == subfield)
1047 				return (bno + pos);
1048 			field <<= 1;
1049 			subfield <<= 1;
1050 		}
1051 	}
1052 	printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt);
1053 	panic("alloccg: block not in map");
1054 	return (-1);
1055 }
1056 
1057 /*
1058  * Fserr prints the name of a file system with an error diagnostic.
1059  *
1060  * The form of the error message is:
1061  *	fs: error message
1062  */
1063 fserr(fs, uid, cp)
1064 	struct fs *fs;
1065 	uid_t uid;
1066 	char *cp;
1067 {
1068 
1069 	log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp);
1070 }
1071