1 /* 2 * Copyright (c) 1982, 1986, 1988, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)ufs_disksubr.c 8.4 (Berkeley) 09/23/93 8 */ 9 10 #include <sys/param.h> 11 #include <sys/systm.h> 12 #include <sys/buf.h> 13 #include <sys/disklabel.h> 14 #include <sys/syslog.h> 15 16 /* 17 * Seek sort for disks. We depend on the driver which calls us using b_resid 18 * as the current cylinder number. 19 * 20 * The argument ap structure holds a b_actf activity chain pointer on which we 21 * keep two queues, sorted in ascending cylinder order. The first queue holds 22 * those requests which are positioned after the current cylinder (in the first 23 * request); the second holds requests which came in after their cylinder number 24 * was passed. Thus we implement a one way scan, retracting after reaching the 25 * end of the drive to the first request on the second queue, at which time it 26 * becomes the first queue. 27 * 28 * A one-way scan is natural because of the way UNIX read-ahead blocks are 29 * allocated. 30 */ 31 32 /* 33 * For portability with historic industry practice, the 34 * cylinder number has to be maintained in the `b_resid' 35 * field. 36 */ 37 #define b_cylinder b_resid 38 39 void 40 disksort(ap, bp) 41 register struct buf *ap, *bp; 42 { 43 register struct buf *bq; 44 45 /* If the queue is empty, then it's easy. */ 46 if (ap->b_actf == NULL) { 47 bp->b_actf = NULL; 48 ap->b_actf = bp; 49 return; 50 } 51 52 /* 53 * If we lie after the first (currently active) request, then we 54 * must locate the second request list and add ourselves to it. 55 */ 56 bq = ap->b_actf; 57 if (bp->b_cylinder < bq->b_cylinder) { 58 while (bq->b_actf) { 59 /* 60 * Check for an ``inversion'' in the normally ascending 61 * cylinder numbers, indicating the start of the second 62 * request list. 63 */ 64 if (bq->b_actf->b_cylinder < bq->b_cylinder) { 65 /* 66 * Search the second request list for the first 67 * request at a larger cylinder number. We go 68 * before that; if there is no such request, we 69 * go at end. 70 */ 71 do { 72 if (bp->b_cylinder < 73 bq->b_actf->b_cylinder) 74 goto insert; 75 if (bp->b_cylinder == 76 bq->b_actf->b_cylinder && 77 bp->b_blkno < bq->b_actf->b_blkno) 78 goto insert; 79 bq = bq->b_actf; 80 } while (bq->b_actf); 81 goto insert; /* after last */ 82 } 83 bq = bq->b_actf; 84 } 85 /* 86 * No inversions... we will go after the last, and 87 * be the first request in the second request list. 88 */ 89 goto insert; 90 } 91 /* 92 * Request is at/after the current request... 93 * sort in the first request list. 94 */ 95 while (bq->b_actf) { 96 /* 97 * We want to go after the current request if there is an 98 * inversion after it (i.e. it is the end of the first 99 * request list), or if the next request is a larger cylinder 100 * than our request. 101 */ 102 if (bq->b_actf->b_cylinder < bq->b_cylinder || 103 bp->b_cylinder < bq->b_actf->b_cylinder || 104 (bp->b_cylinder == bq->b_actf->b_cylinder && 105 bp->b_blkno < bq->b_actf->b_blkno)) 106 goto insert; 107 bq = bq->b_actf; 108 } 109 /* 110 * Neither a second list nor a larger request... we go at the end of 111 * the first list, which is the same as the end of the whole schebang. 112 */ 113 insert: bp->b_actf = bq->b_actf; 114 bq->b_actf = bp; 115 } 116 117 /* 118 * Attempt to read a disk label from a device using the indicated stategy 119 * routine. The label must be partly set up before this: secpercyl and 120 * anything required in the strategy routine (e.g., sector size) must be 121 * filled in before calling us. Returns NULL on success and an error 122 * string on failure. 123 */ 124 char * 125 readdisklabel(dev, strat, lp) 126 dev_t dev; 127 int (*strat)(); 128 register struct disklabel *lp; 129 { 130 register struct buf *bp; 131 struct disklabel *dlp; 132 char *msg = NULL; 133 134 if (lp->d_secperunit == 0) 135 lp->d_secperunit = 0x1fffffff; 136 lp->d_npartitions = 1; 137 if (lp->d_partitions[0].p_size == 0) 138 lp->d_partitions[0].p_size = 0x1fffffff; 139 lp->d_partitions[0].p_offset = 0; 140 141 bp = geteblk((int)lp->d_secsize); 142 bp->b_dev = dev; 143 bp->b_blkno = LABELSECTOR; 144 bp->b_bcount = lp->d_secsize; 145 bp->b_flags = B_BUSY | B_READ; 146 bp->b_cylinder = LABELSECTOR / lp->d_secpercyl; 147 (*strat)(bp); 148 if (biowait(bp)) 149 msg = "I/O error"; 150 else for (dlp = (struct disklabel *)bp->b_data; 151 dlp <= (struct disklabel *)((char *)bp->b_data + 152 DEV_BSIZE - sizeof(*dlp)); 153 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) { 154 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) { 155 if (msg == NULL) 156 msg = "no disk label"; 157 } else if (dlp->d_npartitions > MAXPARTITIONS || 158 dkcksum(dlp) != 0) 159 msg = "disk label corrupted"; 160 else { 161 *lp = *dlp; 162 msg = NULL; 163 break; 164 } 165 } 166 bp->b_flags = B_INVAL | B_AGE; 167 brelse(bp); 168 return (msg); 169 } 170 171 /* 172 * Check new disk label for sensibility before setting it. 173 */ 174 int 175 setdisklabel(olp, nlp, openmask) 176 register struct disklabel *olp, *nlp; 177 u_long openmask; 178 { 179 register i; 180 register struct partition *opp, *npp; 181 182 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC || 183 dkcksum(nlp) != 0) 184 return (EINVAL); 185 while ((i = ffs((long)openmask)) != 0) { 186 i--; 187 openmask &= ~(1 << i); 188 if (nlp->d_npartitions <= i) 189 return (EBUSY); 190 opp = &olp->d_partitions[i]; 191 npp = &nlp->d_partitions[i]; 192 if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size) 193 return (EBUSY); 194 /* 195 * Copy internally-set partition information 196 * if new label doesn't include it. XXX 197 */ 198 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) { 199 npp->p_fstype = opp->p_fstype; 200 npp->p_fsize = opp->p_fsize; 201 npp->p_frag = opp->p_frag; 202 npp->p_cpg = opp->p_cpg; 203 } 204 } 205 nlp->d_checksum = 0; 206 nlp->d_checksum = dkcksum(nlp); 207 *olp = *nlp; 208 return (0); 209 } 210 211 /* encoding of disk minor numbers, should be elsewhere... */ 212 #define dkunit(dev) (minor(dev) >> 3) 213 #define dkpart(dev) (minor(dev) & 07) 214 #define dkminor(unit, part) (((unit) << 3) | (part)) 215 216 /* 217 * Write disk label back to device after modification. 218 */ 219 int 220 writedisklabel(dev, strat, lp) 221 dev_t dev; 222 int (*strat)(); 223 register struct disklabel *lp; 224 { 225 struct buf *bp; 226 struct disklabel *dlp; 227 int labelpart; 228 int error = 0; 229 230 labelpart = dkpart(dev); 231 if (lp->d_partitions[labelpart].p_offset != 0) { 232 if (lp->d_partitions[0].p_offset != 0) 233 return (EXDEV); /* not quite right */ 234 labelpart = 0; 235 } 236 bp = geteblk((int)lp->d_secsize); 237 bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart)); 238 bp->b_blkno = LABELSECTOR; 239 bp->b_bcount = lp->d_secsize; 240 bp->b_flags = B_READ; 241 (*strat)(bp); 242 if (error = biowait(bp)) 243 goto done; 244 for (dlp = (struct disklabel *)bp->b_data; 245 dlp <= (struct disklabel *) 246 ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp)); 247 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) { 248 if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC && 249 dkcksum(dlp) == 0) { 250 *dlp = *lp; 251 bp->b_flags = B_WRITE; 252 (*strat)(bp); 253 error = biowait(bp); 254 goto done; 255 } 256 } 257 error = ESRCH; 258 done: 259 brelse(bp); 260 return (error); 261 } 262 263 /* 264 * Compute checksum for disk label. 265 */ 266 dkcksum(lp) 267 register struct disklabel *lp; 268 { 269 register u_short *start, *end; 270 register u_short sum = 0; 271 272 start = (u_short *)lp; 273 end = (u_short *)&lp->d_partitions[lp->d_npartitions]; 274 while (start < end) 275 sum ^= *start++; 276 return (sum); 277 } 278 279 /* 280 * Disk error is the preface to plaintive error messages 281 * about failing disk transfers. It prints messages of the form 282 283 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d) 284 285 * if the offset of the error in the transfer and a disk label 286 * are both available. blkdone should be -1 if the position of the error 287 * is unknown; the disklabel pointer may be null from drivers that have not 288 * been converted to use them. The message is printed with printf 289 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority. 290 * The message should be completed (with at least a newline) with printf 291 * or addlog, respectively. There is no trailing space. 292 */ 293 void 294 diskerr(bp, dname, what, pri, blkdone, lp) 295 register struct buf *bp; 296 char *dname, *what; 297 int pri, blkdone; 298 register struct disklabel *lp; 299 { 300 int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev); 301 register void (*pr) __P((const char *, ...)); 302 char partname = 'a' + part; 303 int sn; 304 305 if (pri != LOG_PRINTF) { 306 log(pri, ""); 307 pr = addlog; 308 } else 309 pr = printf; 310 (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what, 311 bp->b_flags & B_READ ? "read" : "writ"); 312 sn = bp->b_blkno; 313 if (bp->b_bcount <= DEV_BSIZE) 314 (*pr)("%d", sn); 315 else { 316 if (blkdone >= 0) { 317 sn += blkdone; 318 (*pr)("%d of ", sn); 319 } 320 (*pr)("%d-%d", bp->b_blkno, 321 bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE); 322 } 323 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) { 324 #ifdef tahoe 325 sn *= DEV_BSIZE / lp->d_secsize; /* XXX */ 326 #endif 327 sn += lp->d_partitions[part].p_offset; 328 (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn, 329 sn / lp->d_secpercyl); 330 sn %= lp->d_secpercyl; 331 (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors); 332 } 333 } 334