1 /* $NetBSD: subr_disk.c,v 1.17 1996/03/16 23:17:08 christos Exp $ */ 2 3 /* 4 * Copyright (c) 1995 Jason R. Thorpe. All rights reserved. 5 * Copyright (c) 1982, 1986, 1988, 1993 6 * The Regents of the University of California. All rights reserved. 7 * (c) UNIX System Laboratories, Inc. 8 * All or some portions of this file are derived from material licensed 9 * to the University of California by American Telephone and Telegraph 10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 11 * the permission of UNIX System Laboratories, Inc. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 3. All advertising materials mentioning features or use of this software 22 * must display the following acknowledgement: 23 * This product includes software developed by the University of 24 * California, Berkeley and its contributors. 25 * 4. Neither the name of the University nor the names of its contributors 26 * may be used to endorse or promote products derived from this software 27 * without specific prior written permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 39 * SUCH DAMAGE. 40 * 41 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94 42 */ 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/malloc.h> 48 #include <sys/fcntl.h> 49 #include <sys/buf.h> 50 #include <sys/stat.h> 51 #include <sys/syslog.h> 52 #include <sys/time.h> 53 #include <sys/disklabel.h> 54 #include <sys/conf.h> 55 #include <sys/disk.h> 56 #include <sys/dkio.h> 57 #include <sys/dkstat.h> /* XXX */ 58 59 /* 60 * A global list of all disks attached to the system. May grow or 61 * shrink over time. 62 */ 63 struct disklist_head disklist; /* TAILQ_HEAD */ 64 int disk_count; /* number of drives in global disklist */ 65 66 /* 67 * Old-style disk instrumentation structures. These will go away 68 * someday. 69 */ 70 long dk_seek[DK_NDRIVE]; 71 long dk_time[DK_NDRIVE]; 72 long dk_wds[DK_NDRIVE]; 73 long dk_wpms[DK_NDRIVE]; 74 long dk_xfer[DK_NDRIVE]; 75 int dk_busy; 76 int dk_ndrive; 77 int dkn; /* number of slots filled so far */ 78 79 /* 80 * Seek sort for disks. We depend on the driver which calls us using b_resid 81 * as the current cylinder number. 82 * 83 * The argument ap structure holds a b_actf activity chain pointer on which we 84 * keep two queues, sorted in ascending cylinder order. The first queue holds 85 * those requests which are positioned after the current cylinder (in the first 86 * request); the second holds requests which came in after their cylinder number 87 * was passed. Thus we implement a one way scan, retracting after reaching the 88 * end of the drive to the first request on the second queue, at which time it 89 * becomes the first queue. 90 * 91 * A one-way scan is natural because of the way UNIX read-ahead blocks are 92 * allocated. 93 */ 94 95 void 96 disksort(ap, bp) 97 register struct buf *ap, *bp; 98 { 99 register struct buf *bq; 100 101 /* If the queue is empty, then it's easy. */ 102 if (ap->b_actf == NULL) { 103 bp->b_actf = NULL; 104 ap->b_actf = bp; 105 return; 106 } 107 108 /* 109 * If we lie after the first (currently active) request, then we 110 * must locate the second request list and add ourselves to it. 111 */ 112 bq = ap->b_actf; 113 if (bp->b_cylinder < bq->b_cylinder) { 114 while (bq->b_actf) { 115 /* 116 * Check for an ``inversion'' in the normally ascending 117 * cylinder numbers, indicating the start of the second 118 * request list. 119 */ 120 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 121 /* 122 * Search the second request list for the first 123 * request at a larger cylinder number. We go 124 * before that; if there is no such request, we 125 * go at end. 126 */ 127 do { 128 if (bp->b_cylinder < 129 bq->b_actf->b_cylinder) 130 goto insert; 131 if (bp->b_cylinder == 132 bq->b_actf->b_cylinder && 133 bp->b_blkno < bq->b_actf->b_blkno) 134 goto insert; 135 bq = bq->b_actf; 136 } while (bq->b_actf); 137 goto insert; /* after last */ 138 } 139 bq = bq->b_actf; 140 } 141 /* 142 * No inversions... we will go after the last, and 143 * be the first request in the second request list. 144 */ 145 goto insert; 146 } 147 /* 148 * Request is at/after the current request... 149 * sort in the first request list. 150 */ 151 while (bq->b_actf) { 152 /* 153 * We want to go after the current request if there is an 154 * inversion after it (i.e. it is the end of the first 155 * request list), or if the next request is a larger cylinder 156 * than our request. 157 */ 158 if (bq->b_actf->b_cylinder < bq->b_cylinder || 159 bp->b_cylinder < bq->b_actf->b_cylinder || 160 (bp->b_cylinder == bq->b_actf->b_cylinder && 161 bp->b_blkno < bq->b_actf->b_blkno)) 162 goto insert; 163 bq = bq->b_actf; 164 } 165 /* 166 * Neither a second list nor a larger request... we go at the end of 167 * the first list, which is the same as the end of the whole schebang. 168 */ 169 insert: bp->b_actf = bq->b_actf; 170 bq->b_actf = bp; 171 } 172 173 /* encoding of disk minor numbers, should be elsewhere... */ 174 #define dkunit(dev) (minor(dev) >> 3) 175 #define dkpart(dev) (minor(dev) & 07) 176 #define dkminor(unit, part) (((unit) << 3) | (part)) 177 178 /* 179 * Compute checksum for disk label. 180 */ 181 u_int 182 dkcksum(lp) 183 register struct disklabel *lp; 184 { 185 register u_short *start, *end; 186 register u_short sum = 0; 187 188 start = (u_short *)lp; 189 end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 190 while (start < end) 191 sum ^= *start++; 192 return (sum); 193 } 194 195 /* 196 * Disk error is the preface to plaintive error messages 197 * about failing disk transfers. It prints messages of the form 198 199 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 200 201 * if the offset of the error in the transfer and a disk label 202 * are both available. blkdone should be -1 if the position of the error 203 * is unknown; the disklabel pointer may be null from drivers that have not 204 * been converted to use them. The message is printed with printf 205 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 206 * The message should be completed (with at least a newline) with printf 207 * or addlog, respectively. There is no trailing space. 208 */ 209 void 210 diskerr(bp, dname, what, pri, blkdone, lp) 211 register struct buf *bp; 212 char *dname, *what; 213 int pri, blkdone; 214 register struct disklabel *lp; 215 { 216 int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev); 217 register int (*pr) __P((const char *, ...)); 218 char partname = 'a' + part; 219 int sn; 220 221 if (pri != LOG_PRINTF) { 222 static const char fmt[] = ""; 223 log(pri, fmt); 224 pr = addlog; 225 } else 226 pr = printf; 227 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 228 bp->b_flags & B_READ ? "read" : "writ"); 229 sn = bp->b_blkno; 230 if (bp->b_bcount <= DEV_BSIZE) 231 (*pr)("%d", sn); 232 else { 233 if (blkdone >= 0) { 234 sn += blkdone; 235 (*pr)("%d of ", sn); 236 } 237 (*pr)("%d-%d", bp->b_blkno, 238 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 239 } 240 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 241 #ifdef tahoe 242 sn *= DEV_BSIZE / lp->d_secsize; /* XXX */ 243 #endif 244 sn += lp->d_partitions[part].p_offset; 245 (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn, 246 sn / lp->d_secpercyl); 247 sn %= lp->d_secpercyl; 248 (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors); 249 } 250 } 251 252 /* 253 * Initialize the disklist. Called by main() before autoconfiguration. 254 */ 255 void 256 disk_init() 257 { 258 259 TAILQ_INIT(&disklist); 260 disk_count = 0; 261 dk_ndrive = DK_NDRIVE; /* XXX */ 262 } 263 264 /* 265 * Searches the disklist for the disk corresponding to the 266 * name provided. 267 */ 268 struct disk * 269 disk_find(name) 270 char *name; 271 { 272 struct disk *diskp; 273 274 if ((name == NULL) || (disk_count <= 0)) 275 return (NULL); 276 277 for (diskp = disklist.tqh_first; diskp != NULL; 278 diskp = diskp->dk_link.tqe_next) 279 if (strcmp(diskp->dk_name, name) == 0) 280 return (diskp); 281 282 return (NULL); 283 } 284 285 /* 286 * Attach a disk. 287 */ 288 void 289 disk_attach(diskp) 290 struct disk *diskp; 291 { 292 int s; 293 294 /* 295 * Allocate and initialize the disklabel structures. Note that 296 * it's not safe to sleep here, since we're probably going to be 297 * called during autoconfiguration. 298 */ 299 diskp->dk_label = malloc(sizeof(struct disklabel), M_DEVBUF, M_NOWAIT); 300 diskp->dk_cpulabel = malloc(sizeof(struct cpu_disklabel), M_DEVBUF, 301 M_NOWAIT); 302 if ((diskp->dk_label == NULL) || (diskp->dk_cpulabel == NULL)) 303 panic("disk_attach: can't allocate storage for disklabel"); 304 305 bzero(diskp->dk_label, sizeof(struct disklabel)); 306 bzero(diskp->dk_cpulabel, sizeof(struct cpu_disklabel)); 307 308 /* 309 * Set the attached timestamp. 310 */ 311 s = splclock(); 312 diskp->dk_attachtime = mono_time; 313 splx(s); 314 315 /* 316 * Link into the disklist. 317 */ 318 TAILQ_INSERT_TAIL(&disklist, diskp, dk_link); 319 ++disk_count; 320 } 321 322 /* 323 * Detach a disk. 324 */ 325 void 326 disk_detach(diskp) 327 struct disk *diskp; 328 { 329 330 /* 331 * Free the space used by the disklabel structures. 332 */ 333 free(diskp->dk_label, M_DEVBUF); 334 free(diskp->dk_cpulabel, M_DEVBUF); 335 336 /* 337 * Remove from the disklist. 338 */ 339 TAILQ_REMOVE(&disklist, diskp, dk_link); 340 if (--disk_count < 0) 341 panic("disk_detach: disk_count < 0"); 342 } 343 344 /* 345 * Increment a disk's busy counter. If the counter is going from 346 * 0 to 1, set the timestamp. 347 */ 348 void 349 disk_busy(diskp) 350 struct disk *diskp; 351 { 352 int s; 353 354 /* 355 * XXX We'd like to use something as accurate as microtime(), 356 * but that doesn't depend on the system TOD clock. 357 */ 358 if (diskp->dk_busy++ == 0) { 359 s = splclock(); 360 diskp->dk_timestamp = mono_time; 361 splx(s); 362 } 363 } 364 365 /* 366 * Decrement a disk's busy counter, increment the byte count, total busy 367 * time, and reset the timestamp. 368 */ 369 void 370 disk_unbusy(diskp, bcount) 371 struct disk *diskp; 372 long bcount; 373 { 374 int s; 375 struct timeval dv_time, diff_time; 376 377 if (diskp->dk_busy-- == 0) 378 printf("disk_unbusy: %s: dk_busy < 0\n", diskp->dk_name); 379 380 s = splclock(); 381 dv_time = mono_time; 382 splx(s); 383 384 timersub(&dv_time, &diskp->dk_timestamp, &diff_time); 385 timeradd(&diskp->dk_time, &diff_time, &diskp->dk_time); 386 387 diskp->dk_timestamp = dv_time; 388 if (bcount > 0) { 389 diskp->dk_bytes += bcount; 390 diskp->dk_xfer++; 391 } 392 diskp->dk_seek++; 393 } 394 395 /* 396 * Reset the metrics counters on the given disk. Note that we cannot 397 * reset the busy counter, as it may case a panic in disk_unbusy(). 398 * We also must avoid playing with the timestamp information, as it 399 * may skew any pending transfer results. 400 */ 401 void 402 disk_resetstat(diskp) 403 struct disk *diskp; 404 { 405 int s = splbio(), t; 406 407 diskp->dk_xfer = 0; 408 diskp->dk_bytes = 0; 409 diskp->dk_seek = 0; 410 411 t = splclock(); 412 diskp->dk_attachtime = mono_time; 413 splx(t); 414 415 timerclear(&diskp->dk_time); 416 417 splx(s); 418 } 419 420 421 int 422 dk_mountroot() 423 { 424 dev_t rawdev, rrootdev; 425 int part = DISKPART(rootdev); 426 int (*mountrootfn) __P((void)); 427 extern struct proc *curproc; 428 struct disklabel dl; 429 int error; 430 431 rrootdev = blktochr(rootdev); 432 rawdev = MAKEDISKDEV(major(rrootdev), DISKUNIT(rootdev), RAW_PART); 433 printf("rootdev=0x%x rrootdev=0x%x rawdev=0x%x\n", rootdev, 434 rrootdev, rawdev); 435 436 /* 437 * open device, ioctl for the disklabel, and close it. 438 */ 439 error = (cdevsw[major(rrootdev)].d_open)(rawdev, FREAD, 440 S_IFCHR, curproc); 441 if (error) 442 panic("cannot open disk, 0x%x/0x%x, error %d", 443 rootdev, rrootdev, error); 444 error = (cdevsw[major(rrootdev)].d_ioctl)(rawdev, DIOCGDINFO, 445 (caddr_t)&dl, FREAD, curproc); 446 if (error) 447 panic("cannot read disk label, 0x%x/0x%x, error %d", 448 rootdev, rrootdev, error); 449 (void) (cdevsw[major(rrootdev)].d_close)(rawdev, FREAD, 450 S_IFCHR, curproc); 451 452 if (dl.d_partitions[part].p_size == 0) 453 panic("root filesystem has size 0"); 454 switch (dl.d_partitions[part].p_fstype) { 455 #ifdef EXT2FS 456 case FS_EXT2FS: 457 { 458 extern int ext2fs_mountroot __P((void)); 459 mountrootfn = ext2fs_mountroot; 460 } 461 break; 462 #endif 463 #ifdef FFS 464 case FS_BSDFFS: 465 { 466 extern int ffs_mountroot __P((void)); 467 mountrootfn = ffs_mountroot; 468 } 469 break; 470 #endif 471 #ifdef LFS 472 case FS_BSDLFS: 473 { 474 extern int lfs_mountroot __P((void)); 475 mountrootfn = lfs_mountroot; 476 } 477 break; 478 #endif 479 #ifdef CD9660 480 case FS_ISO9660: 481 { 482 extern int cd9660_mountroot __P((void)); 483 mountrootfn = cd9660_mountroot; 484 } 485 break; 486 #endif 487 default: 488 panic("filesystem type %d not known", 489 dl.d_partitions[part].p_fstype); 490 } 491 return (*mountrootfn)(); 492 } 493