xref: /openbsd/sys/kern/subr_disk.c (revision 07ea8d15)
1 /*	$NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $	*/
2 
3 /*
4  * Copyright (c) 1995 Jason R. Thorpe.  All rights reserved.
5  * Copyright (c) 1982, 1986, 1988, 1993
6  *	The Regents of the University of California.  All rights reserved.
7  * (c) UNIX System Laboratories, Inc.
8  * All or some portions of this file are derived from material licensed
9  * to the University of California by American Telephone and Telegraph
10  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11  * the permission of UNIX System Laboratories, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *	This product includes software developed by the University of
24  *	California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *	@(#)ufs_disksubr.c	8.5 (Berkeley) 1/21/94
42  */
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/fcntl.h>
49 #include <sys/buf.h>
50 #include <sys/stat.h>
51 #include <sys/syslog.h>
52 #include <sys/time.h>
53 #include <sys/disklabel.h>
54 #include <sys/conf.h>
55 #include <sys/disk.h>
56 #include <sys/dkio.h>
57 #include <sys/dkstat.h>		/* XXX */
58 
59 /*
60  * A global list of all disks attached to the system.  May grow or
61  * shrink over time.
62  */
63 struct	disklist_head disklist;	/* TAILQ_HEAD */
64 int	disk_count;		/* number of drives in global disklist */
65 
66 /*
67  * Old-style disk instrumentation structures.  These will go away
68  * someday.
69  */
70 long	dk_seek[DK_NDRIVE];
71 long	dk_time[DK_NDRIVE];
72 long	dk_wds[DK_NDRIVE];
73 long	dk_wpms[DK_NDRIVE];
74 long	dk_xfer[DK_NDRIVE];
75 int	dk_busy;
76 int	dk_ndrive;
77 int	dkn;			/* number of slots filled so far */
78 
79 /*
80  * Seek sort for disks.  We depend on the driver which calls us using b_resid
81  * as the current cylinder number.
82  *
83  * The argument ap structure holds a b_actf activity chain pointer on which we
84  * keep two queues, sorted in ascending cylinder order.  The first queue holds
85  * those requests which are positioned after the current cylinder (in the first
86  * request); the second holds requests which came in after their cylinder number
87  * was passed.  Thus we implement a one way scan, retracting after reaching the
88  * end of the drive to the first request on the second queue, at which time it
89  * becomes the first queue.
90  *
91  * A one-way scan is natural because of the way UNIX read-ahead blocks are
92  * allocated.
93  */
94 
95 void
96 disksort(ap, bp)
97 	register struct buf *ap, *bp;
98 {
99 	register struct buf *bq;
100 
101 	/* If the queue is empty, then it's easy. */
102 	if (ap->b_actf == NULL) {
103 		bp->b_actf = NULL;
104 		ap->b_actf = bp;
105 		return;
106 	}
107 
108 	/*
109 	 * If we lie after the first (currently active) request, then we
110 	 * must locate the second request list and add ourselves to it.
111 	 */
112 	bq = ap->b_actf;
113 	if (bp->b_cylinder < bq->b_cylinder) {
114 		while (bq->b_actf) {
115 			/*
116 			 * Check for an ``inversion'' in the normally ascending
117 			 * cylinder numbers, indicating the start of the second
118 			 * request list.
119 			 */
120 			if (bq->b_actf->b_cylinder < bq->b_cylinder) {
121 				/*
122 				 * Search the second request list for the first
123 				 * request at a larger cylinder number.  We go
124 				 * before that; if there is no such request, we
125 				 * go at end.
126 				 */
127 				do {
128 					if (bp->b_cylinder <
129 					    bq->b_actf->b_cylinder)
130 						goto insert;
131 					if (bp->b_cylinder ==
132 					    bq->b_actf->b_cylinder &&
133 					    bp->b_blkno < bq->b_actf->b_blkno)
134 						goto insert;
135 					bq = bq->b_actf;
136 				} while (bq->b_actf);
137 				goto insert;		/* after last */
138 			}
139 			bq = bq->b_actf;
140 		}
141 		/*
142 		 * No inversions... we will go after the last, and
143 		 * be the first request in the second request list.
144 		 */
145 		goto insert;
146 	}
147 	/*
148 	 * Request is at/after the current request...
149 	 * sort in the first request list.
150 	 */
151 	while (bq->b_actf) {
152 		/*
153 		 * We want to go after the current request if there is an
154 		 * inversion after it (i.e. it is the end of the first
155 		 * request list), or if the next request is a larger cylinder
156 		 * than our request.
157 		 */
158 		if (bq->b_actf->b_cylinder < bq->b_cylinder ||
159 		    bp->b_cylinder < bq->b_actf->b_cylinder ||
160 		    (bp->b_cylinder == bq->b_actf->b_cylinder &&
161 		    bp->b_blkno < bq->b_actf->b_blkno))
162 			goto insert;
163 		bq = bq->b_actf;
164 	}
165 	/*
166 	 * Neither a second list nor a larger request... we go at the end of
167 	 * the first list, which is the same as the end of the whole schebang.
168 	 */
169 insert:	bp->b_actf = bq->b_actf;
170 	bq->b_actf = bp;
171 }
172 
173 /* encoding of disk minor numbers, should be elsewhere... */
174 #define dkunit(dev)		(minor(dev) >> 3)
175 #define dkpart(dev)		(minor(dev) & 07)
176 #define dkminor(unit, part)	(((unit) << 3) | (part))
177 
178 /*
179  * Compute checksum for disk label.
180  */
181 u_int
182 dkcksum(lp)
183 	register struct disklabel *lp;
184 {
185 	register u_short *start, *end;
186 	register u_short sum = 0;
187 
188 	start = (u_short *)lp;
189 	end = (u_short *)&lp->d_partitions[lp->d_npartitions];
190 	while (start < end)
191 		sum ^= *start++;
192 	return (sum);
193 }
194 
195 /*
196  * Disk error is the preface to plaintive error messages
197  * about failing disk transfers.  It prints messages of the form
198 
199 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
200 
201  * if the offset of the error in the transfer and a disk label
202  * are both available.  blkdone should be -1 if the position of the error
203  * is unknown; the disklabel pointer may be null from drivers that have not
204  * been converted to use them.  The message is printed with printf
205  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
206  * The message should be completed (with at least a newline) with printf
207  * or addlog, respectively.  There is no trailing space.
208  */
209 void
210 diskerr(bp, dname, what, pri, blkdone, lp)
211 	register struct buf *bp;
212 	char *dname, *what;
213 	int pri, blkdone;
214 	register struct disklabel *lp;
215 {
216 	int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
217 	register int (*pr) __P((const char *, ...));
218 	char partname = 'a' + part;
219 	int sn;
220 
221 	if (pri != LOG_PRINTF) {
222 		static const char fmt[] = "";
223 		log(pri, fmt);
224 		pr = addlog;
225 	} else
226 		pr = printf;
227 	(*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
228 	    bp->b_flags & B_READ ? "read" : "writ");
229 	sn = bp->b_blkno;
230 	if (bp->b_bcount <= DEV_BSIZE)
231 		(*pr)("%d", sn);
232 	else {
233 		if (blkdone >= 0) {
234 			sn += blkdone;
235 			(*pr)("%d of ", sn);
236 		}
237 		(*pr)("%d-%d", bp->b_blkno,
238 		    bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
239 	}
240 	if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
241 #ifdef tahoe
242 		sn *= DEV_BSIZE / lp->d_secsize;		/* XXX */
243 #endif
244 		sn += lp->d_partitions[part].p_offset;
245 		(*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
246 		    sn / lp->d_secpercyl);
247 		sn %= lp->d_secpercyl;
248 		(*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
249 	}
250 }
251 
252 /*
253  * Initialize the disklist.  Called by main() before autoconfiguration.
254  */
255 void
256 disk_init()
257 {
258 
259 	TAILQ_INIT(&disklist);
260 	disk_count = 0;
261 	dk_ndrive = DK_NDRIVE;		/* XXX */
262 }
263 
264 /*
265  * Searches the disklist for the disk corresponding to the
266  * name provided.
267  */
268 struct disk *
269 disk_find(name)
270 	char *name;
271 {
272 	struct disk *diskp;
273 
274 	if ((name == NULL) || (disk_count <= 0))
275 		return (NULL);
276 
277 	for (diskp = disklist.tqh_first; diskp != NULL;
278 	    diskp = diskp->dk_link.tqe_next)
279 		if (strcmp(diskp->dk_name, name) == 0)
280 			return (diskp);
281 
282 	return (NULL);
283 }
284 
285 /*
286  * Attach a disk.
287  */
288 void
289 disk_attach(diskp)
290 	struct disk *diskp;
291 {
292 	int s;
293 
294 	/*
295 	 * Allocate and initialize the disklabel structures.  Note that
296 	 * it's not safe to sleep here, since we're probably going to be
297 	 * called during autoconfiguration.
298 	 */
299 	diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT);
300 	diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF,
301 	    M_NOWAIT);
302 	if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL))
303 		panic("disk_attach: can't allocate storage for disklabel");
304 
305 	bzero(diskp->dk_label, sizeof(struct disklabel));
306 	bzero(diskp->dk_cpulabel, sizeof(struct cpu_disklabel));
307 
308 	/*
309 	 * Set the attached timestamp.
310 	 */
311 	s = splclock();
312 	diskp->dk_attachtime = mono_time;
313 	splx(s);
314 
315 	/*
316 	 * Link into the disklist.
317 	 */
318 	TAILQ_INSERT_TAIL(&disklist, diskp, dk_link);
319 	++disk_count;
320 }
321 
322 /*
323  * Detach a disk.
324  */
325 void
326 disk_detach(diskp)
327 	struct disk *diskp;
328 {
329 
330 	/*
331 	 * Free the space used by the disklabel structures.
332 	 */
333 	free(diskp->dk_label, M_DEVBUF);
334 	free(diskp->dk_cpulabel, M_DEVBUF);
335 
336 	/*
337 	 * Remove from the disklist.
338 	 */
339 	TAILQ_REMOVE(&disklist, diskp, dk_link);
340 	if (--disk_count < 0)
341 		panic("disk_detach: disk_count < 0");
342 }
343 
344 /*
345  * Increment a disk's busy counter.  If the counter is going from
346  * 0 to 1, set the timestamp.
347  */
348 void
349 disk_busy(diskp)
350 	struct disk *diskp;
351 {
352 	int s;
353 
354 	/*
355 	 * XXX We'd like to use something as accurate as microtime(),
356 	 * but that doesn't depend on the system TOD clock.
357 	 */
358 	if (diskp->dk_busy++ == 0) {
359 		s = splclock();
360 		diskp->dk_timestamp = mono_time;
361 		splx(s);
362 	}
363 }
364 
365 /*
366  * Decrement a disk's busy counter, increment the byte count, total busy
367  * time, and reset the timestamp.
368  */
369 void
370 disk_unbusy(diskp, bcount)
371 	struct disk *diskp;
372 	long bcount;
373 {
374 	int s;
375 	struct timeval dv_time, diff_time;
376 
377 	if (diskp->dk_busy-- == 0)
378 		printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name);
379 
380 	s = splclock();
381 	dv_time = mono_time;
382 	splx(s);
383 
384 	timersub(&dv_time, &diskp->dk_timestamp, &diff_time);
385 	timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time);
386 
387 	diskp->dk_timestamp = dv_time;
388 	if (bcount > 0) {
389 		diskp->dk_bytes += bcount;
390 		diskp->dk_xfer++;
391 	}
392 	diskp->dk_seek++;
393 }
394 
395 /*
396  * Reset the metrics counters on the given disk.  Note that we cannot
397  * reset the busy counter, as it may case a panic in disk_unbusy().
398  * We also must avoid playing with the timestamp information, as it
399  * may skew any pending transfer results.
400  */
401 void
402 disk_resetstat(diskp)
403 	struct disk *diskp;
404 {
405 	int s = splbio(), t;
406 
407 	diskp->dk_xfer = 0;
408 	diskp->dk_bytes = 0;
409 	diskp->dk_seek = 0;
410 
411 	t = splclock();
412 	diskp->dk_attachtime = mono_time;
413 	splx(t);
414 
415 	timerclear(&diskp->dk_time);
416 
417 	splx(s);
418 }
419 
420 
421 int
422 dk_mountroot()
423 {
424 	dev_t rawdev, rrootdev;
425 	int part = DISKPART(rootdev);
426 	int (*mountrootfn) __P((void));
427 	extern struct proc *curproc;
428 	struct disklabel dl;
429 	int error;
430 
431 	rrootdev = blktochr(rootdev);
432 	rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART);
433 	printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev,
434 	    rrootdev, rawdev);
435 
436 	/*
437 	 * open device, ioctl for the disklabel, and close it.
438 	 */
439 	error = (cdevsw[major(rrootdev)].d_open)(rawdev, FREAD,
440 	    S_IFCHR, curproc);
441 	if (error)
442 		panic("cannot open disk, 0x%x/0x%x, error %d",
443 		    rootdev, rrootdev, error);
444 	error = (cdevsw[major(rrootdev)].d_ioctl)(rawdev, DIOCGDINFO,
445 	    (caddr_t)&dl, FREAD, curproc);
446 	if (error)
447 		panic("cannot read disk label, 0x%x/0x%x, error %d",
448 		    rootdev, rrootdev, error);
449 	(void) (cdevsw[major(rrootdev)].d_close)(rawdev, FREAD,
450 	    S_IFCHR, curproc);
451 
452 	if (dl.d_partitions[part].p_size == 0)
453 		panic("root filesystem has size 0");
454 	switch (dl.d_partitions[part].p_fstype) {
455 #ifdef EXT2FS
456 	case FS_EXT2FS:
457 		{
458 		extern int ext2fs_mountroot __P((void));
459 		mountrootfn = ext2fs_mountroot;
460 		}
461 		break;
462 #endif
463 #ifdef FFS
464 	case FS_BSDFFS:
465 		{
466 		extern int ffs_mountroot __P((void));
467 		mountrootfn = ffs_mountroot;
468 		}
469 		break;
470 #endif
471 #ifdef LFS
472 	case FS_BSDLFS:
473 		{
474 		extern int lfs_mountroot __P((void));
475 		mountrootfn = lfs_mountroot;
476 		}
477 		break;
478 #endif
479 #ifdef CD9660
480 	case FS_ISO9660:
481 		{
482 		extern int cd9660_mountroot __P((void));
483 		mountrootfn = cd9660_mountroot;
484 		}
485 		break;
486 #endif
487 	default:
488 		panic("filesystem type %d not known",
489 		    dl.d_partitions[part].p_fstype);
490 	}
491 	return (*mountrootfn)();
492 }
493