xref: /original-bsd/sys/ufs/ufs/ufs_disksubr.c (revision 3705696b)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)ufs_disksubr.c	8.1 (Berkeley) 06/11/93
8  */
9 
10 #include <sys/param.h>
11 #include <sys/systm.h>
12 #include <sys/buf.h>
13 #include <sys/disklabel.h>
14 #include <sys/syslog.h>
15 
16 /*
17  * Seek sort for disks.  We depend on the driver which calls us using b_resid
18  * as the current cylinder number.
19  *
20  * The argument dp structure holds a b_actf activity chain pointer on which we
21  * keep two queues, sorted in ascending cylinder order.  The first queue holds
22  * those requests which are positioned after the current cylinder (in the first
23  * request); the second holds requests which came in after their cylinder number
24  * was passed.  Thus we implement a one way scan, retracting after reaching the
25  * end of the drive to the first request on the second queue, at which time it
26  * becomes the first queue.
27  *
28  * A one-way scan is natural because of the way UNIX read-ahead blocks are
29  * allocated.
30  */
31 
32 #define	b_cylin	b_resid
33 
34 void
35 disksort(dp, bp)
36 	register struct buf *dp, *bp;
37 {
38 	register struct buf *ap;
39 
40 	/*
41 	 * If nothing on the activity queue, then
42 	 * we become the only thing.
43 	 */
44 	ap = dp->b_actf;
45 	if(ap == NULL) {
46 		dp->b_actf = bp;
47 		bp->b_actf = NULL;
48 		return;
49 	}
50 	/*
51 	 * If we lie after the first (currently active)
52 	 * request, then we must locate the second request list
53 	 * and add ourselves to it.
54 	 */
55 	if (bp->b_cylin < ap->b_cylin) {
56 		while (ap->b_actf) {
57 			/*
58 			 * Check for an ``inversion'' in the
59 			 * normally ascending cylinder numbers,
60 			 * indicating the start of the second request list.
61 			 */
62 			if (ap->b_actf->b_cylin < ap->b_cylin) {
63 				/*
64 				 * Search the second request list
65 				 * for the first request at a larger
66 				 * cylinder number.  We go before that;
67 				 * if there is no such request, we go at end.
68 				 */
69 				do {
70 					if (bp->b_cylin < ap->b_actf->b_cylin)
71 						goto insert;
72 					if (bp->b_cylin == ap->b_actf->b_cylin &&
73 					    bp->b_blkno < ap->b_actf->b_blkno)
74 						goto insert;
75 					ap = ap->b_actf;
76 				} while (ap->b_actf);
77 				goto insert;		/* after last */
78 			}
79 			ap = ap->b_actf;
80 		}
81 		/*
82 		 * No inversions... we will go after the last, and
83 		 * be the first request in the second request list.
84 		 */
85 		goto insert;
86 	}
87 	/*
88 	 * Request is at/after the current request...
89 	 * sort in the first request list.
90 	 */
91 	while (ap->b_actf) {
92 		/*
93 		 * We want to go after the current request
94 		 * if there is an inversion after it (i.e. it is
95 		 * the end of the first request list), or if
96 		 * the next request is a larger cylinder than our request.
97 		 */
98 		if (ap->b_actf->b_cylin < ap->b_cylin ||
99 		    bp->b_cylin < ap->b_actf->b_cylin ||
100 		    (bp->b_cylin == ap->b_actf->b_cylin &&
101 		    bp->b_blkno < ap->b_actf->b_blkno))
102 			goto insert;
103 		ap = ap->b_actf;
104 	}
105 	/*
106 	 * Neither a second list nor a larger
107 	 * request... we go at the end of the first list,
108 	 * which is the same as the end of the whole schebang.
109 	 */
110 insert:
111 	bp->b_actf = ap->b_actf;
112 	ap->b_actf = bp;
113 }
114 
115 /*
116  * Attempt to read a disk label from a device using the indicated stategy
117  * routine.  The label must be partly set up before this: secpercyl and
118  * anything required in the strategy routine (e.g., sector size) must be
119  * filled in before calling us.  Returns NULL on success and an error
120  * string on failure.
121  */
122 char *
123 readdisklabel(dev, strat, lp)
124 	dev_t dev;
125 	int (*strat)();
126 	register struct disklabel *lp;
127 {
128 	register struct buf *bp;
129 	struct disklabel *dlp;
130 	char *msg = NULL;
131 
132 	if (lp->d_secperunit == 0)
133 		lp->d_secperunit = 0x1fffffff;
134 	lp->d_npartitions = 1;
135 	if (lp->d_partitions[0].p_size == 0)
136 		lp->d_partitions[0].p_size = 0x1fffffff;
137 	lp->d_partitions[0].p_offset = 0;
138 
139 	bp = geteblk((int)lp->d_secsize);
140 	bp->b_dev = dev;
141 	bp->b_blkno = LABELSECTOR;
142 	bp->b_bcount = lp->d_secsize;
143 	bp->b_flags = B_BUSY | B_READ;
144 	bp->b_cylin = LABELSECTOR / lp->d_secpercyl;
145 	(*strat)(bp);
146 	if (biowait(bp)) {
147 		msg = "I/O error";
148 	} else for (dlp = (struct disklabel *)bp->b_un.b_addr;
149 	    dlp <= (struct disklabel *)(bp->b_un.b_addr+DEV_BSIZE-sizeof(*dlp));
150 	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
151 		if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
152 			if (msg == NULL)
153 				msg = "no disk label";
154 		} else if (dlp->d_npartitions > MAXPARTITIONS ||
155 			   dkcksum(dlp) != 0)
156 			msg = "disk label corrupted";
157 		else {
158 			*lp = *dlp;
159 			msg = NULL;
160 			break;
161 		}
162 	}
163 	bp->b_flags = B_INVAL | B_AGE;
164 	brelse(bp);
165 	return (msg);
166 }
167 
168 /*
169  * Check new disk label for sensibility before setting it.
170  */
171 int
172 setdisklabel(olp, nlp, openmask)
173 	register struct disklabel *olp, *nlp;
174 	u_long openmask;
175 {
176 	register i;
177 	register struct partition *opp, *npp;
178 
179 	if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
180 	    dkcksum(nlp) != 0)
181 		return (EINVAL);
182 	while ((i = ffs((long)openmask)) != 0) {
183 		i--;
184 		openmask &= ~(1 << i);
185 		if (nlp->d_npartitions <= i)
186 			return (EBUSY);
187 		opp = &olp->d_partitions[i];
188 		npp = &nlp->d_partitions[i];
189 		if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
190 			return (EBUSY);
191 		/*
192 		 * Copy internally-set partition information
193 		 * if new label doesn't include it.		XXX
194 		 */
195 		if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
196 			npp->p_fstype = opp->p_fstype;
197 			npp->p_fsize = opp->p_fsize;
198 			npp->p_frag = opp->p_frag;
199 			npp->p_cpg = opp->p_cpg;
200 		}
201 	}
202  	nlp->d_checksum = 0;
203  	nlp->d_checksum = dkcksum(nlp);
204 	*olp = *nlp;
205 	return (0);
206 }
207 
208 /* encoding of disk minor numbers, should be elsewhere... */
209 #define dkunit(dev)		(minor(dev) >> 3)
210 #define dkpart(dev)		(minor(dev) & 07)
211 #define dkminor(unit, part)	(((unit) << 3) | (part))
212 
213 /*
214  * Write disk label back to device after modification.
215  */
216 int
217 writedisklabel(dev, strat, lp)
218 	dev_t dev;
219 	int (*strat)();
220 	register struct disklabel *lp;
221 {
222 	struct buf *bp;
223 	struct disklabel *dlp;
224 	int labelpart;
225 	int error = 0;
226 
227 	labelpart = dkpart(dev);
228 	if (lp->d_partitions[labelpart].p_offset != 0) {
229 		if (lp->d_partitions[0].p_offset != 0)
230 			return (EXDEV);			/* not quite right */
231 		labelpart = 0;
232 	}
233 	bp = geteblk((int)lp->d_secsize);
234 	bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
235 	bp->b_blkno = LABELSECTOR;
236 	bp->b_bcount = lp->d_secsize;
237 	bp->b_flags = B_READ;
238 	(*strat)(bp);
239 	if (error = biowait(bp))
240 		goto done;
241 	for (dlp = (struct disklabel *)bp->b_un.b_addr;
242 	    dlp <= (struct disklabel *)
243 	      (bp->b_un.b_addr + lp->d_secsize - sizeof(*dlp));
244 	    dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
245 		if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
246 		    dkcksum(dlp) == 0) {
247 			*dlp = *lp;
248 			bp->b_flags = B_WRITE;
249 			(*strat)(bp);
250 			error = biowait(bp);
251 			goto done;
252 		}
253 	}
254 	error = ESRCH;
255 done:
256 	brelse(bp);
257 	return (error);
258 }
259 
260 /*
261  * Compute checksum for disk label.
262  */
263 dkcksum(lp)
264 	register struct disklabel *lp;
265 {
266 	register u_short *start, *end;
267 	register u_short sum = 0;
268 
269 	start = (u_short *)lp;
270 	end = (u_short *)&lp->d_partitions[lp->d_npartitions];
271 	while (start < end)
272 		sum ^= *start++;
273 	return (sum);
274 }
275 
276 /*
277  * Disk error is the preface to plaintive error messages
278  * about failing disk transfers.  It prints messages of the form
279 
280 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
281 
282  * if the offset of the error in the transfer and a disk label
283  * are both available.  blkdone should be -1 if the position of the error
284  * is unknown; the disklabel pointer may be null from drivers that have not
285  * been converted to use them.  The message is printed with printf
286  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
287  * The message should be completed (with at least a newline) with printf
288  * or addlog, respectively.  There is no trailing space.
289  */
290 void
291 diskerr(bp, dname, what, pri, blkdone, lp)
292 	register struct buf *bp;
293 	char *dname, *what;
294 	int pri, blkdone;
295 	register struct disklabel *lp;
296 {
297 	int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
298 	register void (*pr) __P((const char *, ...));
299 	char partname = 'a' + part;
300 	int sn;
301 
302 	if (pri != LOG_PRINTF) {
303 		log(pri, "");
304 		pr = addlog;
305 	} else
306 		pr = printf;
307 	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
308 	    bp->b_flags & B_READ ? "read" : "writ");
309 	sn = bp->b_blkno;
310 	if (bp->b_bcount <= DEV_BSIZE)
311 		(*pr)("%d", sn);
312 	else {
313 		if (blkdone >= 0) {
314 			sn += blkdone;
315 			(*pr)("%d of ", sn);
316 		}
317 		(*pr)("%d-%d", bp->b_blkno,
318 		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
319 	}
320 	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
321 #ifdef tahoe
322 		sn *= DEV_BSIZE / lp->d_secsize;		/* XXX */
323 #endif
324 		sn += lp->d_partitions[part].p_offset;
325 		(*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
326 		    sn / lp->d_secpercyl);
327 		sn %= lp->d_secpercyl;
328 		(*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
329 	}
330 }
331