1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.23 2006/03/08 17:14:11 dillon Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <vfs/ufs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/buf2.h> 110 111 #include <sys/ccdvar.h> 112 113 #include <sys/thread2.h> 114 115 #include <vm/vm_zone.h> 116 117 #if defined(CCDDEBUG) && !defined(DEBUG) 118 #define DEBUG 119 #endif 120 121 #ifdef DEBUG 122 #define CCDB_FOLLOW 0x01 123 #define CCDB_INIT 0x02 124 #define CCDB_IO 0x04 125 #define CCDB_LABEL 0x08 126 #define CCDB_VNODE 0x10 127 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 128 CCDB_VNODE; 129 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 130 #undef DEBUG 131 #endif 132 133 #define ccdunit(x) dkunit(x) 134 #define ccdpart(x) dkpart(x) 135 136 /* 137 This is how mirroring works (only writes are special): 138 139 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 140 linked together by the cb_mirror field. "cb_pflags & 141 CCDPF_MIRROR_DONE" is set to 0 on both of them. 142 143 When a component returns to ccdiodone(), it checks if "cb_pflags & 144 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 145 flag and returns. If it is, it means its partner has already 146 returned, so it will go to the regular cleanup. 147 148 */ 149 150 struct ccdbuf { 151 struct buf cb_buf; /* new I/O buf */ 152 struct bio *cb_obio; /* ptr. to original I/O buf */ 153 struct ccdbuf *cb_freenext; /* free list link */ 154 int cb_unit; /* target unit */ 155 int cb_comp; /* target component */ 156 int cb_pflags; /* mirror/parity status flag */ 157 struct ccdbuf *cb_mirror; /* mirror counterpart */ 158 }; 159 160 /* bits in cb_pflags */ 161 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 162 163 #define CCDLABELDEV(dev) \ 164 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 165 166 static d_open_t ccdopen; 167 static d_close_t ccdclose; 168 static d_strategy_t ccdstrategy; 169 static d_ioctl_t ccdioctl; 170 static d_dump_t ccddump; 171 static d_psize_t ccdsize; 172 173 #define NCCDFREEHIWAT 16 174 175 #define CDEV_MAJOR 74 176 177 static struct cdevsw ccd_cdevsw = { 178 /* name */ "ccd", 179 /* maj */ CDEV_MAJOR, 180 /* flags */ D_DISK, 181 /* port */ NULL, 182 /* clone */ NULL, 183 184 /* open */ ccdopen, 185 /* close */ ccdclose, 186 /* read */ physread, 187 /* write */ physwrite, 188 /* ioctl */ ccdioctl, 189 /* poll */ nopoll, 190 /* mmap */ nommap, 191 /* strategy */ ccdstrategy, 192 /* dump */ ccddump, 193 /* psize */ ccdsize 194 }; 195 196 /* called during module initialization */ 197 static void ccdattach (void); 198 static int ccd_modevent (module_t, int, void *); 199 200 /* called by biodone() at interrupt time */ 201 static void ccdiodone (struct bio *bio); 202 203 static void ccdstart (struct ccd_softc *, struct bio *); 204 static void ccdinterleave (struct ccd_softc *, int); 205 static void ccdintr (struct ccd_softc *, struct bio *); 206 static int ccdinit (struct ccddevice *, char **, struct thread *); 207 static int ccdlookup (char *, struct thread *td, struct vnode **); 208 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 209 struct bio *, daddr_t, caddr_t, long); 210 static void ccdgetdisklabel (dev_t); 211 static void ccdmakedisklabel (struct ccd_softc *); 212 static int ccdlock (struct ccd_softc *); 213 static void ccdunlock (struct ccd_softc *); 214 215 #ifdef DEBUG 216 static void printiinfo (struct ccdiinfo *); 217 #endif 218 219 /* Non-private for the benefit of libkvm. */ 220 struct ccd_softc *ccd_softc; 221 struct ccddevice *ccddevs; 222 struct ccdbuf *ccdfreebufs; 223 static int numccdfreebufs; 224 static int numccd = 0; 225 226 /* 227 * getccdbuf() - Allocate and zero a ccd buffer. 228 * 229 * This routine is called at splbio(). 230 */ 231 232 static __inline 233 struct ccdbuf * 234 getccdbuf(void) 235 { 236 struct ccdbuf *cbp; 237 238 /* 239 * Allocate from freelist or malloc as necessary 240 */ 241 if ((cbp = ccdfreebufs) != NULL) { 242 ccdfreebufs = cbp->cb_freenext; 243 --numccdfreebufs; 244 reinitbufbio(&cbp->cb_buf); 245 } else { 246 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 247 initbufbio(&cbp->cb_buf); 248 } 249 250 /* 251 * independant struct buf initialization 252 */ 253 LIST_INIT(&cbp->cb_buf.b_dep); 254 BUF_LOCKINIT(&cbp->cb_buf); 255 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 256 BUF_KERNPROC(&cbp->cb_buf); 257 258 return(cbp); 259 } 260 261 /* 262 * putccdbuf() - Free a ccd buffer. 263 * 264 * This routine is called at splbio(). 265 */ 266 267 static __inline 268 void 269 putccdbuf(struct ccdbuf *cbp) 270 { 271 BUF_UNLOCK(&cbp->cb_buf); 272 BUF_LOCKFREE(&cbp->cb_buf); 273 274 if (numccdfreebufs < NCCDFREEHIWAT) { 275 cbp->cb_freenext = ccdfreebufs; 276 ccdfreebufs = cbp; 277 ++numccdfreebufs; 278 } else { 279 free((caddr_t)cbp, M_DEVBUF); 280 } 281 } 282 283 284 /* 285 * Number of blocks to untouched in front of a component partition. 286 * This is to avoid violating its disklabel area when it starts at the 287 * beginning of the slice. 288 */ 289 #if !defined(CCD_OFFSET) 290 #define CCD_OFFSET 16 291 #endif 292 293 /* 294 * Called by main() during pseudo-device attachment. All we need 295 * to do is allocate enough space for devices to be configured later, and 296 * add devsw entries. 297 */ 298 static void 299 ccdattach(void) 300 { 301 int i; 302 int num = NCCD; 303 304 if (num > 1) 305 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 306 else 307 printf("ccd0: Concatenated disk driver\n"); 308 309 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF, 310 M_WAITOK | M_ZERO); 311 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF, 312 M_WAITOK | M_ZERO); 313 numccd = num; 314 315 cdevsw_add(&ccd_cdevsw, 0, 0); 316 /* XXX: is this necessary? */ 317 for (i = 0; i < numccd; ++i) 318 ccddevs[i].ccd_dk = -1; 319 } 320 321 static int 322 ccd_modevent(module_t mod, int type, void *data) 323 { 324 int error = 0; 325 326 switch (type) { 327 case MOD_LOAD: 328 ccdattach(); 329 break; 330 331 case MOD_UNLOAD: 332 printf("ccd0: Unload not supported!\n"); 333 error = EOPNOTSUPP; 334 break; 335 336 default: /* MOD_SHUTDOWN etc */ 337 break; 338 } 339 return (error); 340 } 341 342 DEV_MODULE(ccd, ccd_modevent, NULL); 343 344 static int 345 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 346 { 347 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 348 struct ccdcinfo *ci = NULL; /* XXX */ 349 size_t size; 350 int ix; 351 struct vnode *vp; 352 size_t minsize; 353 int maxsecsize; 354 struct partinfo dpart; 355 struct ccdgeom *ccg = &cs->sc_geom; 356 char tmppath[MAXPATHLEN]; 357 int error = 0; 358 struct ucred *cred; 359 360 KKASSERT(td->td_proc); 361 cred = td->td_proc->p_ucred; 362 363 #ifdef DEBUG 364 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 365 printf("ccdinit: unit %d\n", ccd->ccd_unit); 366 #endif 367 368 cs->sc_size = 0; 369 cs->sc_ileave = ccd->ccd_interleave; 370 cs->sc_nccdisks = ccd->ccd_ndev; 371 372 /* Allocate space for the component info. */ 373 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 374 M_DEVBUF, M_WAITOK); 375 376 /* 377 * Verify that each component piece exists and record 378 * relevant information about it. 379 */ 380 maxsecsize = 0; 381 minsize = 0; 382 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 383 vp = ccd->ccd_vpp[ix]; 384 ci = &cs->sc_cinfo[ix]; 385 ci->ci_vp = vp; 386 387 /* 388 * Copy in the pathname of the component. 389 */ 390 bzero(tmppath, sizeof(tmppath)); /* sanity */ 391 if ((error = copyinstr(cpaths[ix], tmppath, 392 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 393 #ifdef DEBUG 394 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 395 printf("ccd%d: can't copy path, error = %d\n", 396 ccd->ccd_unit, error); 397 #endif 398 goto fail; 399 } 400 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 401 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 402 403 ci->ci_dev = vn_todev(vp); 404 405 /* 406 * Get partition information for the component. 407 */ 408 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 409 FREAD, cred, td)) != 0) { 410 #ifdef DEBUG 411 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 412 printf("ccd%d: %s: ioctl failed, error = %d\n", 413 ccd->ccd_unit, ci->ci_path, error); 414 #endif 415 goto fail; 416 } 417 if (dpart.part->p_fstype == FS_BSDFFS) { 418 maxsecsize = 419 ((dpart.disklab->d_secsize > maxsecsize) ? 420 dpart.disklab->d_secsize : maxsecsize); 421 size = dpart.part->p_size - CCD_OFFSET; 422 } else { 423 #ifdef DEBUG 424 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 425 printf("ccd%d: %s: incorrect partition type\n", 426 ccd->ccd_unit, ci->ci_path); 427 #endif 428 error = EFTYPE; 429 goto fail; 430 } 431 432 /* 433 * Calculate the size, truncating to an interleave 434 * boundary if necessary. 435 */ 436 437 if (cs->sc_ileave > 1) 438 size -= size % cs->sc_ileave; 439 440 if (size == 0) { 441 #ifdef DEBUG 442 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 443 printf("ccd%d: %s: size == 0\n", 444 ccd->ccd_unit, ci->ci_path); 445 #endif 446 error = ENODEV; 447 goto fail; 448 } 449 450 if (minsize == 0 || size < minsize) 451 minsize = size; 452 ci->ci_size = size; 453 cs->sc_size += size; 454 } 455 456 /* 457 * Don't allow the interleave to be smaller than 458 * the biggest component sector. 459 */ 460 if ((cs->sc_ileave > 0) && 461 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 462 #ifdef DEBUG 463 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 464 printf("ccd%d: interleave must be at least %d\n", 465 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 466 #endif 467 error = EINVAL; 468 goto fail; 469 } 470 471 /* 472 * If uniform interleave is desired set all sizes to that of 473 * the smallest component. This will guarentee that a single 474 * interleave table is generated. 475 * 476 * Lost space must be taken into account when calculating the 477 * overall size. Half the space is lost when CCDF_MIRROR is 478 * specified. One disk is lost when CCDF_PARITY is specified. 479 */ 480 if (ccd->ccd_flags & CCDF_UNIFORM) { 481 for (ci = cs->sc_cinfo; 482 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 483 ci->ci_size = minsize; 484 } 485 if (ccd->ccd_flags & CCDF_MIRROR) { 486 /* 487 * Check to see if an even number of components 488 * have been specified. The interleave must also 489 * be non-zero in order for us to be able to 490 * guarentee the topology. 491 */ 492 if (cs->sc_nccdisks % 2) { 493 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 494 error = EINVAL; 495 goto fail; 496 } 497 if (cs->sc_ileave == 0) { 498 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 499 error = EINVAL; 500 goto fail; 501 } 502 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 503 } else if (ccd->ccd_flags & CCDF_PARITY) { 504 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 505 } else { 506 if (cs->sc_ileave == 0) { 507 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 508 error = EINVAL; 509 goto fail; 510 } 511 cs->sc_size = cs->sc_nccdisks * minsize; 512 } 513 } 514 515 /* 516 * Construct the interleave table. 517 */ 518 ccdinterleave(cs, ccd->ccd_unit); 519 520 /* 521 * Create pseudo-geometry based on 1MB cylinders. It's 522 * pretty close. 523 */ 524 ccg->ccg_secsize = maxsecsize; 525 ccg->ccg_ntracks = 1; 526 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 527 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 528 529 /* 530 * Add an devstat entry for this device. 531 */ 532 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 533 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 534 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 535 DEVSTAT_PRIORITY_ARRAY); 536 537 cs->sc_flags |= CCDF_INITED; 538 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 539 cs->sc_unit = ccd->ccd_unit; 540 return (0); 541 fail: 542 while (ci > cs->sc_cinfo) { 543 ci--; 544 free(ci->ci_path, M_DEVBUF); 545 } 546 free(cs->sc_cinfo, M_DEVBUF); 547 return (error); 548 } 549 550 static void 551 ccdinterleave(struct ccd_softc *cs, int unit) 552 { 553 struct ccdcinfo *ci, *smallci; 554 struct ccdiinfo *ii; 555 daddr_t bn, lbn; 556 int ix; 557 u_long size; 558 559 #ifdef DEBUG 560 if (ccddebug & CCDB_INIT) 561 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 562 #endif 563 564 /* 565 * Allocate an interleave table. The worst case occurs when each 566 * of N disks is of a different size, resulting in N interleave 567 * tables. 568 * 569 * Chances are this is too big, but we don't care. 570 */ 571 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 572 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 573 bzero((caddr_t)cs->sc_itable, size); 574 575 /* 576 * Trivial case: no interleave (actually interleave of disk size). 577 * Each table entry represents a single component in its entirety. 578 * 579 * An interleave of 0 may not be used with a mirror or parity setup. 580 */ 581 if (cs->sc_ileave == 0) { 582 bn = 0; 583 ii = cs->sc_itable; 584 585 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 586 /* Allocate space for ii_index. */ 587 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 588 ii->ii_ndisk = 1; 589 ii->ii_startblk = bn; 590 ii->ii_startoff = 0; 591 ii->ii_index[0] = ix; 592 bn += cs->sc_cinfo[ix].ci_size; 593 ii++; 594 } 595 ii->ii_ndisk = 0; 596 #ifdef DEBUG 597 if (ccddebug & CCDB_INIT) 598 printiinfo(cs->sc_itable); 599 #endif 600 return; 601 } 602 603 /* 604 * The following isn't fast or pretty; it doesn't have to be. 605 */ 606 size = 0; 607 bn = lbn = 0; 608 for (ii = cs->sc_itable; ; ii++) { 609 /* 610 * Allocate space for ii_index. We might allocate more then 611 * we use. 612 */ 613 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 614 M_DEVBUF, M_WAITOK); 615 616 /* 617 * Locate the smallest of the remaining components 618 */ 619 smallci = NULL; 620 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 621 ci++) { 622 if (ci->ci_size > size && 623 (smallci == NULL || 624 ci->ci_size < smallci->ci_size)) { 625 smallci = ci; 626 } 627 } 628 629 /* 630 * Nobody left, all done 631 */ 632 if (smallci == NULL) { 633 ii->ii_ndisk = 0; 634 break; 635 } 636 637 /* 638 * Record starting logical block using an sc_ileave blocksize. 639 */ 640 ii->ii_startblk = bn / cs->sc_ileave; 641 642 /* 643 * Record starting comopnent block using an sc_ileave 644 * blocksize. This value is relative to the beginning of 645 * a component disk. 646 */ 647 ii->ii_startoff = lbn; 648 649 /* 650 * Determine how many disks take part in this interleave 651 * and record their indices. 652 */ 653 ix = 0; 654 for (ci = cs->sc_cinfo; 655 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 656 if (ci->ci_size >= smallci->ci_size) { 657 ii->ii_index[ix++] = ci - cs->sc_cinfo; 658 } 659 } 660 ii->ii_ndisk = ix; 661 bn += ix * (smallci->ci_size - size); 662 lbn = smallci->ci_size / cs->sc_ileave; 663 size = smallci->ci_size; 664 } 665 #ifdef DEBUG 666 if (ccddebug & CCDB_INIT) 667 printiinfo(cs->sc_itable); 668 #endif 669 } 670 671 /* ARGSUSED */ 672 static int 673 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 674 { 675 int unit = ccdunit(dev); 676 struct ccd_softc *cs; 677 struct disklabel *lp; 678 int error = 0, part, pmask; 679 680 #ifdef DEBUG 681 if (ccddebug & CCDB_FOLLOW) 682 printf("ccdopen(%x, %x)\n", dev, flags); 683 #endif 684 if (unit >= numccd) 685 return (ENXIO); 686 cs = &ccd_softc[unit]; 687 688 if ((error = ccdlock(cs)) != 0) 689 return (error); 690 691 lp = &cs->sc_label; 692 693 part = ccdpart(dev); 694 pmask = (1 << part); 695 696 /* 697 * If we're initialized, check to see if there are any other 698 * open partitions. If not, then it's safe to update 699 * the in-core disklabel. 700 */ 701 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 702 ccdgetdisklabel(dev); 703 704 /* Check that the partition exists. */ 705 if (part != RAW_PART && ((part >= lp->d_npartitions) || 706 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 707 error = ENXIO; 708 goto done; 709 } 710 711 cs->sc_openmask |= pmask; 712 done: 713 ccdunlock(cs); 714 return (0); 715 } 716 717 /* ARGSUSED */ 718 static int 719 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 720 { 721 int unit = ccdunit(dev); 722 struct ccd_softc *cs; 723 int error = 0, part; 724 725 #ifdef DEBUG 726 if (ccddebug & CCDB_FOLLOW) 727 printf("ccdclose(%x, %x)\n", dev, flags); 728 #endif 729 730 if (unit >= numccd) 731 return (ENXIO); 732 cs = &ccd_softc[unit]; 733 734 if ((error = ccdlock(cs)) != 0) 735 return (error); 736 737 part = ccdpart(dev); 738 739 /* ...that much closer to allowing unconfiguration... */ 740 cs->sc_openmask &= ~(1 << part); 741 ccdunlock(cs); 742 return (0); 743 } 744 745 static void 746 ccdstrategy(dev_t dev, struct bio *bio) 747 { 748 int unit = ccdunit(dev); 749 struct bio *nbio; 750 struct buf *bp = bio->bio_buf; 751 struct ccd_softc *cs = &ccd_softc[unit]; 752 int wlabel; 753 struct disklabel *lp; 754 755 #ifdef DEBUG 756 if (ccddebug & CCDB_FOLLOW) 757 printf("ccdstrategy(%x): unit %d\n", bp, unit); 758 #endif 759 if ((cs->sc_flags & CCDF_INITED) == 0) { 760 bp->b_error = ENXIO; 761 bp->b_flags |= B_ERROR; 762 goto done; 763 } 764 765 /* If it's a nil transfer, wake up the top half now. */ 766 if (bp->b_bcount == 0) 767 goto done; 768 769 lp = &cs->sc_label; 770 771 /* 772 * Do bounds checking and adjust transfer. If there's an 773 * error, the bounds check will flag that for us. 774 */ 775 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 776 if (ccdpart(dev) != RAW_PART) { 777 nbio = bounds_check_with_label(dev, bio, lp, wlabel); 778 if (nbio == NULL) 779 goto done; 780 } else { 781 int pbn; /* in sc_secsize chunks */ 782 long sz; /* in sc_secsize chunks */ 783 784 pbn = bio->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 785 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 786 787 /* 788 * If out of bounds return an error. If at the EOF point, 789 * simply read or write less. 790 */ 791 792 if (pbn < 0 || pbn >= cs->sc_size) { 793 bp->b_resid = bp->b_bcount; 794 if (pbn != cs->sc_size) { 795 bp->b_error = EINVAL; 796 bp->b_flags |= B_ERROR | B_INVAL; 797 } 798 goto done; 799 } 800 801 /* 802 * If the request crosses EOF, truncate the request. 803 */ 804 if (pbn + sz > cs->sc_size) { 805 bp->b_bcount = (cs->sc_size - pbn) * 806 cs->sc_geom.ccg_secsize; 807 } 808 nbio = bio; 809 } 810 811 bp->b_resid = bp->b_bcount; 812 nbio->bio_driver_info = dev; 813 814 /* 815 * "Start" the unit. 816 */ 817 crit_enter(); 818 ccdstart(cs, nbio); 819 crit_exit(); 820 return; 821 822 /* 823 * note: bio, not nbio, is valid at the done label. 824 */ 825 done: 826 biodone(bio); 827 } 828 829 static void 830 ccdstart(struct ccd_softc *cs, struct bio *bio) 831 { 832 long bcount, rcount; 833 struct ccdbuf *cbp[4]; 834 struct buf *bp = bio->bio_buf; 835 dev_t dev = bio->bio_driver_info; 836 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 837 caddr_t addr; 838 daddr_t bn; 839 struct partition *pp; 840 841 #ifdef DEBUG 842 if (ccddebug & CCDB_FOLLOW) 843 printf("ccdstart(%x, %x)\n", cs, bp); 844 #endif 845 846 /* Record the transaction start */ 847 devstat_start_transaction(&cs->device_stats); 848 849 /* 850 * Translate the partition-relative block number to an absolute. 851 */ 852 bn = bio->bio_blkno; 853 if (ccdpart(dev) != RAW_PART) { 854 pp = &cs->sc_label.d_partitions[ccdpart(dev)]; 855 bn += pp->p_offset; 856 } 857 858 /* 859 * Allocate component buffers and fire off the requests 860 */ 861 addr = bp->b_data; 862 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 863 ccdbuffer(cbp, cs, bio, bn, addr, bcount); 864 rcount = cbp[0]->cb_buf.b_bcount; 865 866 if (cs->sc_cflags & CCDF_MIRROR) { 867 /* 868 * Mirroring. Writes go to both disks, reads are 869 * taken from whichever disk seems most appropriate. 870 * 871 * We attempt to localize reads to the disk whos arm 872 * is nearest the read request. We ignore seeks due 873 * to writes when making this determination and we 874 * also try to avoid hogging. 875 */ 876 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 877 vn_strategy(cbp[0]->cb_buf.b_vp, 878 &cbp[0]->cb_buf.b_bio1); 879 vn_strategy(cbp[1]->cb_buf.b_vp, 880 &cbp[1]->cb_buf.b_bio1); 881 } else { 882 int pick = cs->sc_pick; 883 daddr_t range = cs->sc_size / 16; 884 885 if (bn < cs->sc_blk[pick] - range || 886 bn > cs->sc_blk[pick] + range 887 ) { 888 cs->sc_pick = pick = 1 - pick; 889 } 890 cs->sc_blk[pick] = bn + btodb(rcount); 891 vn_strategy(cbp[pick]->cb_buf.b_vp, 892 &cbp[pick]->cb_buf.b_bio1); 893 } 894 } else { 895 /* 896 * Not mirroring 897 */ 898 vn_strategy(cbp[0]->cb_buf.b_vp, 899 &cbp[0]->cb_buf.b_bio1); 900 } 901 bn += btodb(rcount); 902 addr += rcount; 903 } 904 } 905 906 /* 907 * Build a component buffer header. 908 */ 909 static void 910 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, daddr_t bn, 911 caddr_t addr, long bcount) 912 { 913 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 914 struct ccdbuf *cbp; 915 daddr_t cbn, cboff; 916 off_t cbc; 917 918 #ifdef DEBUG 919 if (ccddebug & CCDB_IO) 920 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 921 cs, bp, bn, addr, bcount); 922 #endif 923 /* 924 * Determine which component bn falls in. 925 */ 926 cbn = bn; 927 cboff = 0; 928 929 if (cs->sc_ileave == 0) { 930 /* 931 * Serially concatenated and neither a mirror nor a parity 932 * config. This is a special case. 933 */ 934 daddr_t sblk; 935 936 sblk = 0; 937 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 938 sblk += ci->ci_size; 939 cbn -= sblk; 940 } else { 941 struct ccdiinfo *ii; 942 int ccdisk, off; 943 944 /* 945 * Calculate cbn, the logical superblock (sc_ileave chunks), 946 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 947 * to cbn. 948 */ 949 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 950 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 951 952 /* 953 * Figure out which interleave table to use. 954 */ 955 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 956 if (ii->ii_startblk > cbn) 957 break; 958 } 959 ii--; 960 961 /* 962 * off is the logical superblock relative to the beginning 963 * of this interleave block. 964 */ 965 off = cbn - ii->ii_startblk; 966 967 /* 968 * We must calculate which disk component to use (ccdisk), 969 * and recalculate cbn to be the superblock relative to 970 * the beginning of the component. This is typically done by 971 * adding 'off' and ii->ii_startoff together. However, 'off' 972 * must typically be divided by the number of components in 973 * this interleave array to be properly convert it from a 974 * CCD-relative logical superblock number to a 975 * component-relative superblock number. 976 */ 977 if (ii->ii_ndisk == 1) { 978 /* 979 * When we have just one disk, it can't be a mirror 980 * or a parity config. 981 */ 982 ccdisk = ii->ii_index[0]; 983 cbn = ii->ii_startoff + off; 984 } else { 985 if (cs->sc_cflags & CCDF_MIRROR) { 986 /* 987 * We have forced a uniform mapping, resulting 988 * in a single interleave array. We double 989 * up on the first half of the available 990 * components and our mirror is in the second 991 * half. This only works with a single 992 * interleave array because doubling up 993 * doubles the number of sectors, so there 994 * cannot be another interleave array because 995 * the next interleave array's calculations 996 * would be off. 997 */ 998 int ndisk2 = ii->ii_ndisk / 2; 999 ccdisk = ii->ii_index[off % ndisk2]; 1000 cbn = ii->ii_startoff + off / ndisk2; 1001 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1002 } else if (cs->sc_cflags & CCDF_PARITY) { 1003 /* 1004 * XXX not implemented yet 1005 */ 1006 int ndisk2 = ii->ii_ndisk - 1; 1007 ccdisk = ii->ii_index[off % ndisk2]; 1008 cbn = ii->ii_startoff + off / ndisk2; 1009 if (cbn % ii->ii_ndisk <= ccdisk) 1010 ccdisk++; 1011 } else { 1012 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1013 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1014 } 1015 } 1016 1017 ci = &cs->sc_cinfo[ccdisk]; 1018 1019 /* 1020 * Convert cbn from a superblock to a normal block so it 1021 * can be used to calculate (along with cboff) the normal 1022 * block index into this particular disk. 1023 */ 1024 cbn *= cs->sc_ileave; 1025 } 1026 1027 /* 1028 * Fill in the component buf structure. 1029 */ 1030 cbp = getccdbuf(); 1031 cbp->cb_buf.b_flags = bio->bio_buf->b_flags; 1032 cbp->cb_buf.b_data = addr; 1033 cbp->cb_buf.b_vp = ci->ci_vp; 1034 if (cs->sc_ileave == 0) 1035 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1036 else 1037 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1038 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1039 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1040 1041 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1042 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1043 cbp->cb_buf.b_bio1.bio_blkno = cbn + cboff + CCD_OFFSET; 1044 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1045 1046 /* 1047 * context for ccdiodone 1048 */ 1049 cbp->cb_obio = bio; 1050 cbp->cb_unit = cs - ccd_softc; 1051 cbp->cb_comp = ci - cs->sc_cinfo; 1052 1053 #ifdef DEBUG 1054 if (ccddebug & CCDB_IO) 1055 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1056 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1057 cbp->cb_buf.b_bio1.bio_blkno, 1058 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1059 #endif 1060 cb[0] = cbp; 1061 1062 /* 1063 * Note: both I/O's setup when reading from mirror, but only one 1064 * will be executed. 1065 */ 1066 if (cs->sc_cflags & CCDF_MIRROR) { 1067 /* mirror, setup second I/O */ 1068 cbp = getccdbuf(); 1069 1070 cbp->cb_buf.b_flags = bio->bio_buf->b_flags; 1071 cbp->cb_buf.b_data = addr; 1072 cbp->cb_buf.b_vp = ci2->ci_vp; 1073 if (cs->sc_ileave == 0) 1074 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1075 else 1076 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1077 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1078 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1079 1080 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1081 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1082 cbp->cb_buf.b_bio1.bio_blkno = cbn + cboff + CCD_OFFSET; 1083 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1084 1085 /* 1086 * context for ccdiodone 1087 */ 1088 cbp->cb_obio = bio; 1089 cbp->cb_unit = cs - ccd_softc; 1090 cbp->cb_comp = ci2 - cs->sc_cinfo; 1091 cb[1] = cbp; 1092 /* link together the ccdbuf's and clear "mirror done" flag */ 1093 cb[0]->cb_mirror = cb[1]; 1094 cb[1]->cb_mirror = cb[0]; 1095 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1096 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1097 } 1098 } 1099 1100 static void 1101 ccdintr(struct ccd_softc *cs, struct bio *bio) 1102 { 1103 struct buf *bp = bio->bio_buf; 1104 1105 #ifdef DEBUG 1106 if (ccddebug & CCDB_FOLLOW) 1107 printf("ccdintr(%x, %x)\n", cs, bp); 1108 #endif 1109 /* 1110 * Request is done for better or worse, wakeup the top half. 1111 */ 1112 if (bp->b_flags & B_ERROR) 1113 bp->b_resid = bp->b_bcount; 1114 devstat_end_transaction_buf(&cs->device_stats, bp); 1115 biodone(bio); 1116 } 1117 1118 /* 1119 * Called at interrupt time. 1120 * Mark the component as done and if all components are done, 1121 * take a ccd interrupt. 1122 */ 1123 static void 1124 ccdiodone(struct bio *bio) 1125 { 1126 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1127 struct bio *obio = cbp->cb_obio; 1128 struct buf *obp = obio->bio_buf; 1129 int unit = cbp->cb_unit; 1130 int count; 1131 1132 /* 1133 * Since we do not have exclusive access to underlying devices, 1134 * we can't keep cache translations around. 1135 */ 1136 clearbiocache(bio->bio_next); 1137 1138 crit_enter(); 1139 #ifdef DEBUG 1140 if (ccddebug & CCDB_FOLLOW) 1141 printf("ccdiodone(%x)\n", cbp); 1142 if (ccddebug & CCDB_IO) { 1143 printf("ccdiodone: bp %x bcount %d resid %d\n", 1144 obp, obp->b_bcount, obp->b_resid); 1145 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1146 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1147 cbp->cb_buf.b_lblkno, cbp->cb_buf.b_data, 1148 cbp->cb_buf.b_bcount); 1149 } 1150 #endif 1151 /* 1152 * If an error occured, report it. If this is a mirrored 1153 * configuration and the first of two possible reads, do not 1154 * set the error in the bp yet because the second read may 1155 * succeed. 1156 */ 1157 if (cbp->cb_buf.b_flags & B_ERROR) { 1158 const char *msg = ""; 1159 1160 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1161 (cbp->cb_buf.b_flags & B_READ) && 1162 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1163 /* 1164 * We will try our read on the other disk down 1165 * below, also reverse the default pick so if we 1166 * are doing a scan we do not keep hitting the 1167 * bad disk first. 1168 */ 1169 struct ccd_softc *cs = &ccd_softc[unit]; 1170 1171 msg = ", trying other disk"; 1172 cs->sc_pick = 1 - cs->sc_pick; 1173 cs->sc_blk[cs->sc_pick] = obio->bio_blkno; 1174 } else { 1175 obp->b_flags |= B_ERROR; 1176 obp->b_error = cbp->cb_buf.b_error ? 1177 cbp->cb_buf.b_error : EIO; 1178 } 1179 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1180 unit, obp->b_error, cbp->cb_comp, 1181 (int)cbp->cb_buf.b_bio2.bio_blkno, 1182 obio->bio_blkno, msg); 1183 } 1184 1185 /* 1186 * Process mirror. If we are writing, I/O has been initiated on both 1187 * buffers and we fall through only after both are finished. 1188 * 1189 * If we are reading only one I/O is initiated at a time. If an 1190 * error occurs we initiate the second I/O and return, otherwise 1191 * we free the second I/O without initiating it. 1192 */ 1193 1194 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1195 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1196 /* 1197 * When writing, handshake with the second buffer 1198 * to determine when both are done. If both are not 1199 * done, return here. 1200 */ 1201 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1202 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1203 putccdbuf(cbp); 1204 crit_exit(); 1205 return; 1206 } 1207 } else { 1208 /* 1209 * When reading, either dispose of the second buffer 1210 * or initiate I/O on the second buffer if an error 1211 * occured with this one. 1212 */ 1213 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1214 if (cbp->cb_buf.b_flags & B_ERROR) { 1215 cbp->cb_mirror->cb_pflags |= 1216 CCDPF_MIRROR_DONE; 1217 vn_strategy( 1218 cbp->cb_mirror->cb_buf.b_vp, 1219 &cbp->cb_mirror->cb_buf.b_bio1 1220 ); 1221 putccdbuf(cbp); 1222 crit_exit(); 1223 return; 1224 } else { 1225 putccdbuf(cbp->cb_mirror); 1226 /* fall through */ 1227 } 1228 } 1229 } 1230 } 1231 1232 /* 1233 * use b_bufsize to determine how big the original request was rather 1234 * then b_bcount, because b_bcount may have been truncated for EOF. 1235 * 1236 * XXX We check for an error, but we do not test the resid for an 1237 * aligned EOF condition. This may result in character & block 1238 * device access not recognizing EOF properly when read or written 1239 * sequentially, but will not effect filesystems. 1240 */ 1241 count = cbp->cb_buf.b_bufsize; 1242 putccdbuf(cbp); 1243 1244 /* 1245 * If all done, "interrupt". 1246 */ 1247 obp->b_resid -= count; 1248 if (obp->b_resid < 0) 1249 panic("ccdiodone: count"); 1250 if (obp->b_resid == 0) 1251 ccdintr(&ccd_softc[unit], obio); 1252 crit_exit(); 1253 } 1254 1255 static int 1256 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1257 { 1258 int unit = ccdunit(dev); 1259 int i, j, lookedup = 0, error = 0; 1260 int part, pmask; 1261 struct ccd_softc *cs; 1262 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1263 struct ccddevice ccd; 1264 char **cpp; 1265 struct vnode **vpp; 1266 struct ucred *cred; 1267 1268 KKASSERT(td->td_proc != NULL); 1269 cred = td->td_proc->p_ucred; 1270 1271 if (unit >= numccd) 1272 return (ENXIO); 1273 cs = &ccd_softc[unit]; 1274 1275 bzero(&ccd, sizeof(ccd)); 1276 1277 switch (cmd) { 1278 case CCDIOCSET: 1279 if (cs->sc_flags & CCDF_INITED) 1280 return (EBUSY); 1281 1282 if ((flag & FWRITE) == 0) 1283 return (EBADF); 1284 1285 if ((error = ccdlock(cs)) != 0) 1286 return (error); 1287 1288 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1289 return (EINVAL); 1290 1291 /* Fill in some important bits. */ 1292 ccd.ccd_unit = unit; 1293 ccd.ccd_interleave = ccio->ccio_ileave; 1294 if (ccd.ccd_interleave == 0 && 1295 ((ccio->ccio_flags & CCDF_MIRROR) || 1296 (ccio->ccio_flags & CCDF_PARITY))) { 1297 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1298 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1299 } 1300 if ((ccio->ccio_flags & CCDF_MIRROR) && 1301 (ccio->ccio_flags & CCDF_PARITY)) { 1302 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1303 ccio->ccio_flags &= ~CCDF_PARITY; 1304 } 1305 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1306 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1307 printf("ccd%d: mirror/parity forces uniform flag\n", 1308 unit); 1309 ccio->ccio_flags |= CCDF_UNIFORM; 1310 } 1311 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1312 1313 /* 1314 * Allocate space for and copy in the array of 1315 * componet pathnames and device numbers. 1316 */ 1317 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1318 M_DEVBUF, M_WAITOK); 1319 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1320 M_DEVBUF, M_WAITOK); 1321 1322 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1323 ccio->ccio_ndisks * sizeof(char **)); 1324 if (error) { 1325 free(vpp, M_DEVBUF); 1326 free(cpp, M_DEVBUF); 1327 ccdunlock(cs); 1328 return (error); 1329 } 1330 1331 #ifdef DEBUG 1332 if (ccddebug & CCDB_INIT) 1333 for (i = 0; i < ccio->ccio_ndisks; ++i) 1334 printf("ccdioctl: component %d: 0x%x\n", 1335 i, cpp[i]); 1336 #endif 1337 1338 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1339 #ifdef DEBUG 1340 if (ccddebug & CCDB_INIT) 1341 printf("ccdioctl: lookedup = %d\n", lookedup); 1342 #endif 1343 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1344 for (j = 0; j < lookedup; ++j) 1345 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1346 free(vpp, M_DEVBUF); 1347 free(cpp, M_DEVBUF); 1348 ccdunlock(cs); 1349 return (error); 1350 } 1351 ++lookedup; 1352 } 1353 ccd.ccd_cpp = cpp; 1354 ccd.ccd_vpp = vpp; 1355 ccd.ccd_ndev = ccio->ccio_ndisks; 1356 1357 /* 1358 * Initialize the ccd. Fills in the softc for us. 1359 */ 1360 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1361 for (j = 0; j < lookedup; ++j) 1362 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1363 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1364 free(vpp, M_DEVBUF); 1365 free(cpp, M_DEVBUF); 1366 ccdunlock(cs); 1367 return (error); 1368 } 1369 1370 /* 1371 * The ccd has been successfully initialized, so 1372 * we can place it into the array and read the disklabel. 1373 */ 1374 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1375 ccio->ccio_unit = unit; 1376 ccio->ccio_size = cs->sc_size; 1377 ccdgetdisklabel(dev); 1378 1379 ccdunlock(cs); 1380 1381 break; 1382 1383 case CCDIOCCLR: 1384 if ((cs->sc_flags & CCDF_INITED) == 0) 1385 return (ENXIO); 1386 1387 if ((flag & FWRITE) == 0) 1388 return (EBADF); 1389 1390 if ((error = ccdlock(cs)) != 0) 1391 return (error); 1392 1393 /* Don't unconfigure if any other partitions are open */ 1394 part = ccdpart(dev); 1395 pmask = (1 << part); 1396 if ((cs->sc_openmask & ~pmask)) { 1397 ccdunlock(cs); 1398 return (EBUSY); 1399 } 1400 1401 /* 1402 * Free ccd_softc information and clear entry. 1403 */ 1404 1405 /* Close the components and free their pathnames. */ 1406 for (i = 0; i < cs->sc_nccdisks; ++i) { 1407 /* 1408 * XXX: this close could potentially fail and 1409 * cause Bad Things. Maybe we need to force 1410 * the close to happen? 1411 */ 1412 #ifdef DEBUG 1413 if (ccddebug & CCDB_VNODE) 1414 vprint("CCDIOCCLR: vnode info", 1415 cs->sc_cinfo[i].ci_vp); 1416 #endif 1417 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td); 1418 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1419 } 1420 1421 /* Free interleave index. */ 1422 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1423 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1424 1425 /* Free component info and interleave table. */ 1426 free(cs->sc_cinfo, M_DEVBUF); 1427 free(cs->sc_itable, M_DEVBUF); 1428 cs->sc_flags &= ~CCDF_INITED; 1429 1430 /* 1431 * Free ccddevice information and clear entry. 1432 */ 1433 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1434 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1435 ccd.ccd_dk = -1; 1436 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1437 1438 /* 1439 * And remove the devstat entry. 1440 */ 1441 devstat_remove_entry(&cs->device_stats); 1442 1443 /* This must be atomic. */ 1444 crit_enter(); 1445 ccdunlock(cs); 1446 bzero(cs, sizeof(struct ccd_softc)); 1447 crit_exit(); 1448 1449 break; 1450 1451 case DIOCGDINFO: 1452 if ((cs->sc_flags & CCDF_INITED) == 0) 1453 return (ENXIO); 1454 1455 *(struct disklabel *)data = cs->sc_label; 1456 break; 1457 1458 case DIOCGPART: 1459 if ((cs->sc_flags & CCDF_INITED) == 0) 1460 return (ENXIO); 1461 1462 ((struct partinfo *)data)->disklab = &cs->sc_label; 1463 ((struct partinfo *)data)->part = 1464 &cs->sc_label.d_partitions[ccdpart(dev)]; 1465 break; 1466 1467 case DIOCWDINFO: 1468 case DIOCSDINFO: 1469 if ((cs->sc_flags & CCDF_INITED) == 0) 1470 return (ENXIO); 1471 1472 if ((flag & FWRITE) == 0) 1473 return (EBADF); 1474 1475 if ((error = ccdlock(cs)) != 0) 1476 return (error); 1477 1478 cs->sc_flags |= CCDF_LABELLING; 1479 1480 error = setdisklabel(&cs->sc_label, 1481 (struct disklabel *)data, 0); 1482 if (error == 0) { 1483 if (cmd == DIOCWDINFO) { 1484 dev_t cdev = CCDLABELDEV(dev); 1485 error = writedisklabel(cdev, &cs->sc_label); 1486 } 1487 } 1488 1489 cs->sc_flags &= ~CCDF_LABELLING; 1490 1491 ccdunlock(cs); 1492 1493 if (error) 1494 return (error); 1495 break; 1496 1497 case DIOCWLABEL: 1498 if ((cs->sc_flags & CCDF_INITED) == 0) 1499 return (ENXIO); 1500 1501 if ((flag & FWRITE) == 0) 1502 return (EBADF); 1503 if (*(int *)data != 0) 1504 cs->sc_flags |= CCDF_WLABEL; 1505 else 1506 cs->sc_flags &= ~CCDF_WLABEL; 1507 break; 1508 1509 default: 1510 return (ENOTTY); 1511 } 1512 1513 return (0); 1514 } 1515 1516 static int 1517 ccdsize(dev_t dev) 1518 { 1519 struct ccd_softc *cs; 1520 int part, size; 1521 1522 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1523 return (-1); 1524 1525 cs = &ccd_softc[ccdunit(dev)]; 1526 part = ccdpart(dev); 1527 1528 if ((cs->sc_flags & CCDF_INITED) == 0) 1529 return (-1); 1530 1531 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1532 size = -1; 1533 else 1534 size = cs->sc_label.d_partitions[part].p_size; 1535 1536 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1537 return (-1); 1538 1539 return (size); 1540 } 1541 1542 static int 1543 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize) 1544 { 1545 /* Not implemented. */ 1546 return ENXIO; 1547 } 1548 1549 /* 1550 * Lookup the provided name in the filesystem. If the file exists, 1551 * is a valid block device, and isn't being used by anyone else, 1552 * set *vpp to the file's vnode. 1553 */ 1554 static int 1555 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1556 { 1557 struct nlookupdata nd; 1558 struct ucred *cred; 1559 struct vnode *vp; 1560 int error; 1561 1562 KKASSERT(td->td_proc); 1563 cred = td->td_proc->p_ucred; 1564 *vpp = NULL; 1565 1566 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1567 if (error) 1568 return (error); 1569 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1570 #ifdef DEBUG 1571 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1572 printf("ccdlookup: vn_open error = %d\n", error); 1573 #endif 1574 goto done; 1575 } 1576 vp = nd.nl_open_vp; 1577 1578 if (vp->v_usecount > 1) { 1579 error = EBUSY; 1580 goto done; 1581 } 1582 1583 if (!vn_isdisk(vp, &error)) 1584 goto done; 1585 1586 #ifdef DEBUG 1587 if (ccddebug & CCDB_VNODE) 1588 vprint("ccdlookup: vnode info", vp); 1589 #endif 1590 1591 VOP_UNLOCK(vp, 0, td); 1592 nd.nl_open_vp = NULL; 1593 nlookup_done(&nd); 1594 *vpp = vp; /* leave ref intact */ 1595 return (0); 1596 done: 1597 nlookup_done(&nd); 1598 return (error); 1599 } 1600 1601 /* 1602 * Read the disklabel from the ccd. If one is not present, fake one 1603 * up. 1604 */ 1605 static void 1606 ccdgetdisklabel(dev_t dev) 1607 { 1608 int unit = ccdunit(dev); 1609 struct ccd_softc *cs = &ccd_softc[unit]; 1610 char *errstring; 1611 struct disklabel *lp = &cs->sc_label; 1612 struct ccdgeom *ccg = &cs->sc_geom; 1613 dev_t cdev; 1614 1615 bzero(lp, sizeof(*lp)); 1616 1617 lp->d_secperunit = cs->sc_size; 1618 lp->d_secsize = ccg->ccg_secsize; 1619 lp->d_nsectors = ccg->ccg_nsectors; 1620 lp->d_ntracks = ccg->ccg_ntracks; 1621 lp->d_ncylinders = ccg->ccg_ncylinders; 1622 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1623 1624 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1625 lp->d_type = DTYPE_CCD; 1626 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1627 lp->d_rpm = 3600; 1628 lp->d_interleave = 1; 1629 lp->d_flags = 0; 1630 1631 lp->d_partitions[RAW_PART].p_offset = 0; 1632 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1633 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1634 lp->d_npartitions = RAW_PART + 1; 1635 1636 lp->d_bbsize = BBSIZE; /* XXX */ 1637 lp->d_sbsize = SBSIZE; /* XXX */ 1638 1639 lp->d_magic = DISKMAGIC; 1640 lp->d_magic2 = DISKMAGIC; 1641 lp->d_checksum = dkcksum(&cs->sc_label); 1642 1643 /* 1644 * Call the generic disklabel extraction routine. 1645 */ 1646 cdev = CCDLABELDEV(dev); 1647 errstring = readdisklabel(cdev, &cs->sc_label); 1648 if (errstring != NULL) 1649 ccdmakedisklabel(cs); 1650 1651 #ifdef DEBUG 1652 /* It's actually extremely common to have unlabeled ccds. */ 1653 if (ccddebug & CCDB_LABEL) 1654 if (errstring != NULL) 1655 printf("ccd%d: %s\n", unit, errstring); 1656 #endif 1657 } 1658 1659 /* 1660 * Take care of things one might want to take care of in the event 1661 * that a disklabel isn't present. 1662 */ 1663 static void 1664 ccdmakedisklabel(struct ccd_softc *cs) 1665 { 1666 struct disklabel *lp = &cs->sc_label; 1667 1668 /* 1669 * For historical reasons, if there's no disklabel present 1670 * the raw partition must be marked FS_BSDFFS. 1671 */ 1672 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1673 1674 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1675 } 1676 1677 /* 1678 * Wait interruptibly for an exclusive lock. 1679 * 1680 * XXX 1681 * Several drivers do this; it should be abstracted and made MP-safe. 1682 */ 1683 static int 1684 ccdlock(struct ccd_softc *cs) 1685 { 1686 int error; 1687 1688 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1689 cs->sc_flags |= CCDF_WANTED; 1690 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1691 return (error); 1692 } 1693 cs->sc_flags |= CCDF_LOCKED; 1694 return (0); 1695 } 1696 1697 /* 1698 * Unlock and wake up any waiters. 1699 */ 1700 static void 1701 ccdunlock(struct ccd_softc *cs) 1702 { 1703 1704 cs->sc_flags &= ~CCDF_LOCKED; 1705 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1706 cs->sc_flags &= ~CCDF_WANTED; 1707 wakeup(cs); 1708 } 1709 } 1710 1711 #ifdef DEBUG 1712 static void 1713 printiinfo(struct ccdiinfo *ii) 1714 { 1715 int ix, i; 1716 1717 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1718 printf(" itab[%d]: #dk %d sblk %d soff %d", 1719 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1720 for (i = 0; i < ii->ii_ndisk; i++) 1721 printf(" %d", ii->ii_index[i]); 1722 printf("\n"); 1723 } 1724 } 1725 #endif 1726 1727 1728 /* Local Variables: */ 1729 /* c-argdecl-indent: 8 */ 1730 /* c-continued-statement-offset: 8 */ 1731 /* c-indent-level: 8 */ 1732 /* End: */ 1733