1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.24 2006/03/24 18:35:32 dillon Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <vfs/ufs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/buf2.h> 110 111 #include <sys/ccdvar.h> 112 113 #include <sys/thread2.h> 114 115 #include <vm/vm_zone.h> 116 117 #if defined(CCDDEBUG) && !defined(DEBUG) 118 #define DEBUG 119 #endif 120 121 #ifdef DEBUG 122 #define CCDB_FOLLOW 0x01 123 #define CCDB_INIT 0x02 124 #define CCDB_IO 0x04 125 #define CCDB_LABEL 0x08 126 #define CCDB_VNODE 0x10 127 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 128 CCDB_VNODE; 129 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 130 #undef DEBUG 131 #endif 132 133 #define ccdunit(x) dkunit(x) 134 #define ccdpart(x) dkpart(x) 135 136 /* 137 This is how mirroring works (only writes are special): 138 139 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 140 linked together by the cb_mirror field. "cb_pflags & 141 CCDPF_MIRROR_DONE" is set to 0 on both of them. 142 143 When a component returns to ccdiodone(), it checks if "cb_pflags & 144 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 145 flag and returns. If it is, it means its partner has already 146 returned, so it will go to the regular cleanup. 147 148 */ 149 150 struct ccdbuf { 151 struct buf cb_buf; /* new I/O buf */ 152 struct bio *cb_obio; /* ptr. to original I/O buf */ 153 struct ccdbuf *cb_freenext; /* free list link */ 154 int cb_unit; /* target unit */ 155 int cb_comp; /* target component */ 156 int cb_pflags; /* mirror/parity status flag */ 157 struct ccdbuf *cb_mirror; /* mirror counterpart */ 158 }; 159 160 /* bits in cb_pflags */ 161 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 162 163 #define CCDLABELDEV(dev) \ 164 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 165 166 static d_open_t ccdopen; 167 static d_close_t ccdclose; 168 static d_strategy_t ccdstrategy; 169 static d_ioctl_t ccdioctl; 170 static d_dump_t ccddump; 171 static d_psize_t ccdsize; 172 173 #define NCCDFREEHIWAT 16 174 175 #define CDEV_MAJOR 74 176 177 static struct cdevsw ccd_cdevsw = { 178 /* name */ "ccd", 179 /* maj */ CDEV_MAJOR, 180 /* flags */ D_DISK, 181 /* port */ NULL, 182 /* clone */ NULL, 183 184 /* open */ ccdopen, 185 /* close */ ccdclose, 186 /* read */ physread, 187 /* write */ physwrite, 188 /* ioctl */ ccdioctl, 189 /* poll */ nopoll, 190 /* mmap */ nommap, 191 /* strategy */ ccdstrategy, 192 /* dump */ ccddump, 193 /* psize */ ccdsize 194 }; 195 196 /* called during module initialization */ 197 static void ccdattach (void); 198 static int ccd_modevent (module_t, int, void *); 199 200 /* called by biodone() at interrupt time */ 201 static void ccdiodone (struct bio *bio); 202 203 static void ccdstart (struct ccd_softc *, struct bio *); 204 static void ccdinterleave (struct ccd_softc *, int); 205 static void ccdintr (struct ccd_softc *, struct bio *); 206 static int ccdinit (struct ccddevice *, char **, struct thread *); 207 static int ccdlookup (char *, struct thread *td, struct vnode **); 208 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 209 struct bio *, off_t, caddr_t, long); 210 static void ccdgetdisklabel (dev_t); 211 static void ccdmakedisklabel (struct ccd_softc *); 212 static int ccdlock (struct ccd_softc *); 213 static void ccdunlock (struct ccd_softc *); 214 215 #ifdef DEBUG 216 static void printiinfo (struct ccdiinfo *); 217 #endif 218 219 /* Non-private for the benefit of libkvm. */ 220 struct ccd_softc *ccd_softc; 221 struct ccddevice *ccddevs; 222 struct ccdbuf *ccdfreebufs; 223 static int numccdfreebufs; 224 static int numccd = 0; 225 226 /* 227 * getccdbuf() - Allocate and zero a ccd buffer. 228 * 229 * This routine is called at splbio(). 230 */ 231 232 static __inline 233 struct ccdbuf * 234 getccdbuf(void) 235 { 236 struct ccdbuf *cbp; 237 238 /* 239 * Allocate from freelist or malloc as necessary 240 */ 241 if ((cbp = ccdfreebufs) != NULL) { 242 ccdfreebufs = cbp->cb_freenext; 243 --numccdfreebufs; 244 reinitbufbio(&cbp->cb_buf); 245 } else { 246 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 247 initbufbio(&cbp->cb_buf); 248 } 249 250 /* 251 * independant struct buf initialization 252 */ 253 LIST_INIT(&cbp->cb_buf.b_dep); 254 BUF_LOCKINIT(&cbp->cb_buf); 255 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 256 BUF_KERNPROC(&cbp->cb_buf); 257 258 return(cbp); 259 } 260 261 /* 262 * putccdbuf() - Free a ccd buffer. 263 * 264 * This routine is called at splbio(). 265 */ 266 267 static __inline 268 void 269 putccdbuf(struct ccdbuf *cbp) 270 { 271 BUF_UNLOCK(&cbp->cb_buf); 272 BUF_LOCKFREE(&cbp->cb_buf); 273 274 if (numccdfreebufs < NCCDFREEHIWAT) { 275 cbp->cb_freenext = ccdfreebufs; 276 ccdfreebufs = cbp; 277 ++numccdfreebufs; 278 } else { 279 free((caddr_t)cbp, M_DEVBUF); 280 } 281 } 282 283 284 /* 285 * Number of blocks to untouched in front of a component partition. 286 * This is to avoid violating its disklabel area when it starts at the 287 * beginning of the slice. 288 */ 289 #if !defined(CCD_OFFSET) 290 #define CCD_OFFSET 16 291 #endif 292 293 /* 294 * Called by main() during pseudo-device attachment. All we need 295 * to do is allocate enough space for devices to be configured later, and 296 * add devsw entries. 297 */ 298 static void 299 ccdattach(void) 300 { 301 int i; 302 int num = NCCD; 303 304 if (num > 1) 305 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 306 else 307 printf("ccd0: Concatenated disk driver\n"); 308 309 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF, 310 M_WAITOK | M_ZERO); 311 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF, 312 M_WAITOK | M_ZERO); 313 numccd = num; 314 315 cdevsw_add(&ccd_cdevsw, 0, 0); 316 /* XXX: is this necessary? */ 317 for (i = 0; i < numccd; ++i) 318 ccddevs[i].ccd_dk = -1; 319 } 320 321 static int 322 ccd_modevent(module_t mod, int type, void *data) 323 { 324 int error = 0; 325 326 switch (type) { 327 case MOD_LOAD: 328 ccdattach(); 329 break; 330 331 case MOD_UNLOAD: 332 printf("ccd0: Unload not supported!\n"); 333 error = EOPNOTSUPP; 334 break; 335 336 default: /* MOD_SHUTDOWN etc */ 337 break; 338 } 339 return (error); 340 } 341 342 DEV_MODULE(ccd, ccd_modevent, NULL); 343 344 static int 345 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 346 { 347 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 348 struct ccdcinfo *ci = NULL; /* XXX */ 349 size_t size; 350 int ix; 351 struct vnode *vp; 352 size_t minsize; 353 int maxsecsize; 354 struct partinfo dpart; 355 struct ccdgeom *ccg = &cs->sc_geom; 356 char tmppath[MAXPATHLEN]; 357 int error = 0; 358 struct ucred *cred; 359 360 KKASSERT(td->td_proc); 361 cred = td->td_proc->p_ucred; 362 363 #ifdef DEBUG 364 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 365 printf("ccdinit: unit %d\n", ccd->ccd_unit); 366 #endif 367 368 cs->sc_size = 0; 369 cs->sc_ileave = ccd->ccd_interleave; 370 cs->sc_nccdisks = ccd->ccd_ndev; 371 372 /* Allocate space for the component info. */ 373 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 374 M_DEVBUF, M_WAITOK); 375 376 /* 377 * Verify that each component piece exists and record 378 * relevant information about it. 379 */ 380 maxsecsize = 0; 381 minsize = 0; 382 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 383 vp = ccd->ccd_vpp[ix]; 384 ci = &cs->sc_cinfo[ix]; 385 ci->ci_vp = vp; 386 387 /* 388 * Copy in the pathname of the component. 389 */ 390 bzero(tmppath, sizeof(tmppath)); /* sanity */ 391 if ((error = copyinstr(cpaths[ix], tmppath, 392 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 393 #ifdef DEBUG 394 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 395 printf("ccd%d: can't copy path, error = %d\n", 396 ccd->ccd_unit, error); 397 #endif 398 goto fail; 399 } 400 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 401 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 402 403 ci->ci_dev = vn_todev(vp); 404 405 /* 406 * Get partition information for the component. 407 */ 408 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 409 FREAD, cred, td)) != 0) { 410 #ifdef DEBUG 411 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 412 printf("ccd%d: %s: ioctl failed, error = %d\n", 413 ccd->ccd_unit, ci->ci_path, error); 414 #endif 415 goto fail; 416 } 417 if (dpart.part->p_fstype == FS_BSDFFS) { 418 maxsecsize = 419 ((dpart.disklab->d_secsize > maxsecsize) ? 420 dpart.disklab->d_secsize : maxsecsize); 421 size = dpart.part->p_size - CCD_OFFSET; 422 } else { 423 #ifdef DEBUG 424 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 425 printf("ccd%d: %s: incorrect partition type\n", 426 ccd->ccd_unit, ci->ci_path); 427 #endif 428 error = EFTYPE; 429 goto fail; 430 } 431 432 /* 433 * Calculate the size, truncating to an interleave 434 * boundary if necessary. 435 */ 436 437 if (cs->sc_ileave > 1) 438 size -= size % cs->sc_ileave; 439 440 if (size == 0) { 441 #ifdef DEBUG 442 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 443 printf("ccd%d: %s: size == 0\n", 444 ccd->ccd_unit, ci->ci_path); 445 #endif 446 error = ENODEV; 447 goto fail; 448 } 449 450 if (minsize == 0 || size < minsize) 451 minsize = size; 452 ci->ci_size = size; 453 cs->sc_size += size; 454 } 455 456 /* 457 * Don't allow the interleave to be smaller than 458 * the biggest component sector. 459 */ 460 if ((cs->sc_ileave > 0) && 461 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 462 #ifdef DEBUG 463 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 464 printf("ccd%d: interleave must be at least %d\n", 465 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 466 #endif 467 error = EINVAL; 468 goto fail; 469 } 470 471 /* 472 * If uniform interleave is desired set all sizes to that of 473 * the smallest component. This will guarentee that a single 474 * interleave table is generated. 475 * 476 * Lost space must be taken into account when calculating the 477 * overall size. Half the space is lost when CCDF_MIRROR is 478 * specified. One disk is lost when CCDF_PARITY is specified. 479 */ 480 if (ccd->ccd_flags & CCDF_UNIFORM) { 481 for (ci = cs->sc_cinfo; 482 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 483 ci->ci_size = minsize; 484 } 485 if (ccd->ccd_flags & CCDF_MIRROR) { 486 /* 487 * Check to see if an even number of components 488 * have been specified. The interleave must also 489 * be non-zero in order for us to be able to 490 * guarentee the topology. 491 */ 492 if (cs->sc_nccdisks % 2) { 493 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 494 error = EINVAL; 495 goto fail; 496 } 497 if (cs->sc_ileave == 0) { 498 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 499 error = EINVAL; 500 goto fail; 501 } 502 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 503 } else if (ccd->ccd_flags & CCDF_PARITY) { 504 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 505 } else { 506 if (cs->sc_ileave == 0) { 507 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 508 error = EINVAL; 509 goto fail; 510 } 511 cs->sc_size = cs->sc_nccdisks * minsize; 512 } 513 } 514 515 /* 516 * Construct the interleave table. 517 */ 518 ccdinterleave(cs, ccd->ccd_unit); 519 520 /* 521 * Create pseudo-geometry based on 1MB cylinders. It's 522 * pretty close. 523 */ 524 ccg->ccg_secsize = maxsecsize; 525 ccg->ccg_ntracks = 1; 526 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 527 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 528 529 /* 530 * Add an devstat entry for this device. 531 */ 532 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 533 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 534 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 535 DEVSTAT_PRIORITY_ARRAY); 536 537 cs->sc_flags |= CCDF_INITED; 538 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 539 cs->sc_unit = ccd->ccd_unit; 540 return (0); 541 fail: 542 while (ci > cs->sc_cinfo) { 543 ci--; 544 free(ci->ci_path, M_DEVBUF); 545 } 546 free(cs->sc_cinfo, M_DEVBUF); 547 return (error); 548 } 549 550 static void 551 ccdinterleave(struct ccd_softc *cs, int unit) 552 { 553 struct ccdcinfo *ci, *smallci; 554 struct ccdiinfo *ii; 555 daddr_t bn, lbn; 556 int ix; 557 u_long size; 558 559 #ifdef DEBUG 560 if (ccddebug & CCDB_INIT) 561 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 562 #endif 563 564 /* 565 * Allocate an interleave table. The worst case occurs when each 566 * of N disks is of a different size, resulting in N interleave 567 * tables. 568 * 569 * Chances are this is too big, but we don't care. 570 */ 571 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 572 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 573 bzero((caddr_t)cs->sc_itable, size); 574 575 /* 576 * Trivial case: no interleave (actually interleave of disk size). 577 * Each table entry represents a single component in its entirety. 578 * 579 * An interleave of 0 may not be used with a mirror or parity setup. 580 */ 581 if (cs->sc_ileave == 0) { 582 bn = 0; 583 ii = cs->sc_itable; 584 585 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 586 /* Allocate space for ii_index. */ 587 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 588 ii->ii_ndisk = 1; 589 ii->ii_startblk = bn; 590 ii->ii_startoff = 0; 591 ii->ii_index[0] = ix; 592 bn += cs->sc_cinfo[ix].ci_size; 593 ii++; 594 } 595 ii->ii_ndisk = 0; 596 #ifdef DEBUG 597 if (ccddebug & CCDB_INIT) 598 printiinfo(cs->sc_itable); 599 #endif 600 return; 601 } 602 603 /* 604 * The following isn't fast or pretty; it doesn't have to be. 605 */ 606 size = 0; 607 bn = lbn = 0; 608 for (ii = cs->sc_itable; ; ii++) { 609 /* 610 * Allocate space for ii_index. We might allocate more then 611 * we use. 612 */ 613 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 614 M_DEVBUF, M_WAITOK); 615 616 /* 617 * Locate the smallest of the remaining components 618 */ 619 smallci = NULL; 620 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 621 ci++) { 622 if (ci->ci_size > size && 623 (smallci == NULL || 624 ci->ci_size < smallci->ci_size)) { 625 smallci = ci; 626 } 627 } 628 629 /* 630 * Nobody left, all done 631 */ 632 if (smallci == NULL) { 633 ii->ii_ndisk = 0; 634 break; 635 } 636 637 /* 638 * Record starting logical block using an sc_ileave blocksize. 639 */ 640 ii->ii_startblk = bn / cs->sc_ileave; 641 642 /* 643 * Record starting comopnent block using an sc_ileave 644 * blocksize. This value is relative to the beginning of 645 * a component disk. 646 */ 647 ii->ii_startoff = lbn; 648 649 /* 650 * Determine how many disks take part in this interleave 651 * and record their indices. 652 */ 653 ix = 0; 654 for (ci = cs->sc_cinfo; 655 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 656 if (ci->ci_size >= smallci->ci_size) { 657 ii->ii_index[ix++] = ci - cs->sc_cinfo; 658 } 659 } 660 ii->ii_ndisk = ix; 661 bn += ix * (smallci->ci_size - size); 662 lbn = smallci->ci_size / cs->sc_ileave; 663 size = smallci->ci_size; 664 } 665 #ifdef DEBUG 666 if (ccddebug & CCDB_INIT) 667 printiinfo(cs->sc_itable); 668 #endif 669 } 670 671 /* ARGSUSED */ 672 static int 673 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 674 { 675 int unit = ccdunit(dev); 676 struct ccd_softc *cs; 677 struct disklabel *lp; 678 int error = 0, part, pmask; 679 680 #ifdef DEBUG 681 if (ccddebug & CCDB_FOLLOW) 682 printf("ccdopen(%x, %x)\n", dev, flags); 683 #endif 684 if (unit >= numccd) 685 return (ENXIO); 686 cs = &ccd_softc[unit]; 687 688 if ((error = ccdlock(cs)) != 0) 689 return (error); 690 691 lp = &cs->sc_label; 692 693 part = ccdpart(dev); 694 pmask = (1 << part); 695 696 /* 697 * If we're initialized, check to see if there are any other 698 * open partitions. If not, then it's safe to update 699 * the in-core disklabel. 700 */ 701 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 702 ccdgetdisklabel(dev); 703 704 /* Check that the partition exists. */ 705 if (part != RAW_PART && ((part >= lp->d_npartitions) || 706 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 707 error = ENXIO; 708 goto done; 709 } 710 711 cs->sc_openmask |= pmask; 712 done: 713 ccdunlock(cs); 714 return (0); 715 } 716 717 /* ARGSUSED */ 718 static int 719 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 720 { 721 int unit = ccdunit(dev); 722 struct ccd_softc *cs; 723 int error = 0, part; 724 725 #ifdef DEBUG 726 if (ccddebug & CCDB_FOLLOW) 727 printf("ccdclose(%x, %x)\n", dev, flags); 728 #endif 729 730 if (unit >= numccd) 731 return (ENXIO); 732 cs = &ccd_softc[unit]; 733 734 if ((error = ccdlock(cs)) != 0) 735 return (error); 736 737 part = ccdpart(dev); 738 739 /* ...that much closer to allowing unconfiguration... */ 740 cs->sc_openmask &= ~(1 << part); 741 ccdunlock(cs); 742 return (0); 743 } 744 745 static void 746 ccdstrategy(dev_t dev, struct bio *bio) 747 { 748 int unit = ccdunit(dev); 749 struct bio *nbio; 750 struct buf *bp = bio->bio_buf; 751 struct ccd_softc *cs = &ccd_softc[unit]; 752 int wlabel; 753 struct disklabel *lp; 754 755 #ifdef DEBUG 756 if (ccddebug & CCDB_FOLLOW) 757 printf("ccdstrategy(%x): unit %d\n", bp, unit); 758 #endif 759 if ((cs->sc_flags & CCDF_INITED) == 0) { 760 bp->b_error = ENXIO; 761 bp->b_flags |= B_ERROR; 762 goto done; 763 } 764 765 /* If it's a nil transfer, wake up the top half now. */ 766 if (bp->b_bcount == 0) 767 goto done; 768 769 lp = &cs->sc_label; 770 771 /* 772 * Do bounds checking and adjust transfer. If there's an 773 * error, the bounds check will flag that for us. 774 */ 775 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 776 if (ccdpart(dev) != RAW_PART) { 777 nbio = bounds_check_with_label(dev, bio, lp, wlabel); 778 if (nbio == NULL) 779 goto done; 780 } else { 781 int pbn; /* in sc_secsize chunks */ 782 long sz; /* in sc_secsize chunks */ 783 784 pbn = (int)(bio->bio_offset / cs->sc_geom.ccg_secsize); 785 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 786 787 /* 788 * If out of bounds return an error. If at the EOF point, 789 * simply read or write less. 790 */ 791 792 if (pbn < 0 || pbn >= cs->sc_size) { 793 bp->b_resid = bp->b_bcount; 794 if (pbn != cs->sc_size) { 795 bp->b_error = EINVAL; 796 bp->b_flags |= B_ERROR | B_INVAL; 797 } 798 goto done; 799 } 800 801 /* 802 * If the request crosses EOF, truncate the request. 803 */ 804 if (pbn + sz > cs->sc_size) { 805 bp->b_bcount = (cs->sc_size - pbn) * 806 cs->sc_geom.ccg_secsize; 807 } 808 nbio = bio; 809 } 810 811 bp->b_resid = bp->b_bcount; 812 nbio->bio_driver_info = dev; 813 814 /* 815 * "Start" the unit. 816 */ 817 crit_enter(); 818 ccdstart(cs, nbio); 819 crit_exit(); 820 return; 821 822 /* 823 * note: bio, not nbio, is valid at the done label. 824 */ 825 done: 826 biodone(bio); 827 } 828 829 static void 830 ccdstart(struct ccd_softc *cs, struct bio *bio) 831 { 832 long bcount, rcount; 833 struct ccdbuf *cbp[4]; 834 struct buf *bp = bio->bio_buf; 835 dev_t dev = bio->bio_driver_info; 836 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 837 caddr_t addr; 838 off_t doffset; 839 struct partition *pp; 840 841 #ifdef DEBUG 842 if (ccddebug & CCDB_FOLLOW) 843 printf("ccdstart(%x, %x)\n", cs, bp); 844 #endif 845 846 /* Record the transaction start */ 847 devstat_start_transaction(&cs->device_stats); 848 849 /* 850 * Translate the partition-relative block number to an absolute. 851 */ 852 doffset = bio->bio_offset; 853 if (ccdpart(dev) != RAW_PART) { 854 pp = &cs->sc_label.d_partitions[ccdpart(dev)]; 855 doffset += pp->p_offset * cs->sc_label.d_secsize; 856 } 857 858 /* 859 * Allocate component buffers and fire off the requests 860 */ 861 addr = bp->b_data; 862 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 863 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 864 rcount = cbp[0]->cb_buf.b_bcount; 865 866 if (cs->sc_cflags & CCDF_MIRROR) { 867 /* 868 * Mirroring. Writes go to both disks, reads are 869 * taken from whichever disk seems most appropriate. 870 * 871 * We attempt to localize reads to the disk whos arm 872 * is nearest the read request. We ignore seeks due 873 * to writes when making this determination and we 874 * also try to avoid hogging. 875 */ 876 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 877 vn_strategy(cbp[0]->cb_buf.b_vp, 878 &cbp[0]->cb_buf.b_bio1); 879 vn_strategy(cbp[1]->cb_buf.b_vp, 880 &cbp[1]->cb_buf.b_bio1); 881 } else { 882 int pick = cs->sc_pick; 883 daddr_t range = cs->sc_size / 16 * cs->sc_label.d_secsize; 884 885 if (doffset < cs->sc_blk[pick] - range || 886 doffset > cs->sc_blk[pick] + range 887 ) { 888 cs->sc_pick = pick = 1 - pick; 889 } 890 cs->sc_blk[pick] = doffset + rcount; 891 vn_strategy(cbp[pick]->cb_buf.b_vp, 892 &cbp[pick]->cb_buf.b_bio1); 893 } 894 } else { 895 /* 896 * Not mirroring 897 */ 898 vn_strategy(cbp[0]->cb_buf.b_vp, 899 &cbp[0]->cb_buf.b_bio1); 900 } 901 doffset += rcount; 902 addr += rcount; 903 } 904 } 905 906 /* 907 * Build a component buffer header. 908 */ 909 static void 910 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 911 off_t doffset, caddr_t addr, long bcount) 912 { 913 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 914 struct ccdbuf *cbp; 915 daddr_t bn, cbn, cboff; 916 off_t cbc; 917 918 #ifdef DEBUG 919 if (ccddebug & CCDB_IO) 920 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 921 cs, bp, bn, addr, bcount); 922 #endif 923 /* 924 * Determine which component bn falls in. 925 */ 926 bn = (daddr_t)(doffset / cs->sc_geom.ccg_secsize); 927 cbn = bn; 928 cboff = 0; 929 930 if (cs->sc_ileave == 0) { 931 /* 932 * Serially concatenated and neither a mirror nor a parity 933 * config. This is a special case. 934 */ 935 daddr_t sblk; 936 937 sblk = 0; 938 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 939 sblk += ci->ci_size; 940 cbn -= sblk; 941 } else { 942 struct ccdiinfo *ii; 943 int ccdisk, off; 944 945 /* 946 * Calculate cbn, the logical superblock (sc_ileave chunks), 947 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 948 * to cbn. 949 */ 950 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 951 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 952 953 /* 954 * Figure out which interleave table to use. 955 */ 956 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 957 if (ii->ii_startblk > cbn) 958 break; 959 } 960 ii--; 961 962 /* 963 * off is the logical superblock relative to the beginning 964 * of this interleave block. 965 */ 966 off = cbn - ii->ii_startblk; 967 968 /* 969 * We must calculate which disk component to use (ccdisk), 970 * and recalculate cbn to be the superblock relative to 971 * the beginning of the component. This is typically done by 972 * adding 'off' and ii->ii_startoff together. However, 'off' 973 * must typically be divided by the number of components in 974 * this interleave array to be properly convert it from a 975 * CCD-relative logical superblock number to a 976 * component-relative superblock number. 977 */ 978 if (ii->ii_ndisk == 1) { 979 /* 980 * When we have just one disk, it can't be a mirror 981 * or a parity config. 982 */ 983 ccdisk = ii->ii_index[0]; 984 cbn = ii->ii_startoff + off; 985 } else { 986 if (cs->sc_cflags & CCDF_MIRROR) { 987 /* 988 * We have forced a uniform mapping, resulting 989 * in a single interleave array. We double 990 * up on the first half of the available 991 * components and our mirror is in the second 992 * half. This only works with a single 993 * interleave array because doubling up 994 * doubles the number of sectors, so there 995 * cannot be another interleave array because 996 * the next interleave array's calculations 997 * would be off. 998 */ 999 int ndisk2 = ii->ii_ndisk / 2; 1000 ccdisk = ii->ii_index[off % ndisk2]; 1001 cbn = ii->ii_startoff + off / ndisk2; 1002 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1003 } else if (cs->sc_cflags & CCDF_PARITY) { 1004 /* 1005 * XXX not implemented yet 1006 */ 1007 int ndisk2 = ii->ii_ndisk - 1; 1008 ccdisk = ii->ii_index[off % ndisk2]; 1009 cbn = ii->ii_startoff + off / ndisk2; 1010 if (cbn % ii->ii_ndisk <= ccdisk) 1011 ccdisk++; 1012 } else { 1013 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1014 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1015 } 1016 } 1017 1018 ci = &cs->sc_cinfo[ccdisk]; 1019 1020 /* 1021 * Convert cbn from a superblock to a normal block so it 1022 * can be used to calculate (along with cboff) the normal 1023 * block index into this particular disk. 1024 */ 1025 cbn *= cs->sc_ileave; 1026 } 1027 1028 /* 1029 * Fill in the component buf structure. 1030 */ 1031 cbp = getccdbuf(); 1032 cbp->cb_buf.b_flags = bio->bio_buf->b_flags; 1033 cbp->cb_buf.b_data = addr; 1034 cbp->cb_buf.b_vp = ci->ci_vp; 1035 if (cs->sc_ileave == 0) 1036 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1037 else 1038 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1039 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1040 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1041 1042 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1043 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1044 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1045 1046 /* 1047 * context for ccdiodone 1048 */ 1049 cbp->cb_obio = bio; 1050 cbp->cb_unit = cs - ccd_softc; 1051 cbp->cb_comp = ci - cs->sc_cinfo; 1052 1053 #ifdef DEBUG 1054 if (ccddebug & CCDB_IO) 1055 printf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1056 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1057 cbp->cb_buf.b_bio1.bio_offset, 1058 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1059 #endif 1060 cb[0] = cbp; 1061 1062 /* 1063 * Note: both I/O's setup when reading from mirror, but only one 1064 * will be executed. 1065 */ 1066 if (cs->sc_cflags & CCDF_MIRROR) { 1067 /* mirror, setup second I/O */ 1068 cbp = getccdbuf(); 1069 1070 cbp->cb_buf.b_flags = bio->bio_buf->b_flags; 1071 cbp->cb_buf.b_data = addr; 1072 cbp->cb_buf.b_vp = ci2->ci_vp; 1073 if (cs->sc_ileave == 0) 1074 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1075 else 1076 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1077 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1078 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1079 1080 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1081 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1082 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1083 1084 /* 1085 * context for ccdiodone 1086 */ 1087 cbp->cb_obio = bio; 1088 cbp->cb_unit = cs - ccd_softc; 1089 cbp->cb_comp = ci2 - cs->sc_cinfo; 1090 cb[1] = cbp; 1091 /* link together the ccdbuf's and clear "mirror done" flag */ 1092 cb[0]->cb_mirror = cb[1]; 1093 cb[1]->cb_mirror = cb[0]; 1094 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1095 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1096 } 1097 } 1098 1099 static void 1100 ccdintr(struct ccd_softc *cs, struct bio *bio) 1101 { 1102 struct buf *bp = bio->bio_buf; 1103 1104 #ifdef DEBUG 1105 if (ccddebug & CCDB_FOLLOW) 1106 printf("ccdintr(%x, %x)\n", cs, bp); 1107 #endif 1108 /* 1109 * Request is done for better or worse, wakeup the top half. 1110 */ 1111 if (bp->b_flags & B_ERROR) 1112 bp->b_resid = bp->b_bcount; 1113 devstat_end_transaction_buf(&cs->device_stats, bp); 1114 biodone(bio); 1115 } 1116 1117 /* 1118 * Called at interrupt time. 1119 * Mark the component as done and if all components are done, 1120 * take a ccd interrupt. 1121 */ 1122 static void 1123 ccdiodone(struct bio *bio) 1124 { 1125 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1126 struct bio *obio = cbp->cb_obio; 1127 struct buf *obp = obio->bio_buf; 1128 int unit = cbp->cb_unit; 1129 int count; 1130 1131 /* 1132 * Since we do not have exclusive access to underlying devices, 1133 * we can't keep cache translations around. 1134 */ 1135 clearbiocache(bio->bio_next); 1136 1137 crit_enter(); 1138 #ifdef DEBUG 1139 if (ccddebug & CCDB_FOLLOW) 1140 printf("ccdiodone(%x)\n", cbp); 1141 if (ccddebug & CCDB_IO) { 1142 printf("ccdiodone: bp %x bcount %d resid %d\n", 1143 obp, obp->b_bcount, obp->b_resid); 1144 printf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1145 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1146 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1147 cbp->cb_buf.b_bcount); 1148 } 1149 #endif 1150 /* 1151 * If an error occured, report it. If this is a mirrored 1152 * configuration and the first of two possible reads, do not 1153 * set the error in the bp yet because the second read may 1154 * succeed. 1155 */ 1156 if (cbp->cb_buf.b_flags & B_ERROR) { 1157 const char *msg = ""; 1158 1159 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1160 (cbp->cb_buf.b_flags & B_READ) && 1161 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1162 /* 1163 * We will try our read on the other disk down 1164 * below, also reverse the default pick so if we 1165 * are doing a scan we do not keep hitting the 1166 * bad disk first. 1167 */ 1168 struct ccd_softc *cs = &ccd_softc[unit]; 1169 1170 msg = ", trying other disk"; 1171 cs->sc_pick = 1 - cs->sc_pick; 1172 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1173 } else { 1174 obp->b_flags |= B_ERROR; 1175 obp->b_error = cbp->cb_buf.b_error ? 1176 cbp->cb_buf.b_error : EIO; 1177 } 1178 printf("ccd%d: error %d on component %d offset %lld (ccd offset %lld)%s\n", 1179 unit, obp->b_error, cbp->cb_comp, 1180 cbp->cb_buf.b_bio2.bio_offset, 1181 obio->bio_offset, msg); 1182 } 1183 1184 /* 1185 * Process mirror. If we are writing, I/O has been initiated on both 1186 * buffers and we fall through only after both are finished. 1187 * 1188 * If we are reading only one I/O is initiated at a time. If an 1189 * error occurs we initiate the second I/O and return, otherwise 1190 * we free the second I/O without initiating it. 1191 */ 1192 1193 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1194 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1195 /* 1196 * When writing, handshake with the second buffer 1197 * to determine when both are done. If both are not 1198 * done, return here. 1199 */ 1200 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1201 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1202 putccdbuf(cbp); 1203 crit_exit(); 1204 return; 1205 } 1206 } else { 1207 /* 1208 * When reading, either dispose of the second buffer 1209 * or initiate I/O on the second buffer if an error 1210 * occured with this one. 1211 */ 1212 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1213 if (cbp->cb_buf.b_flags & B_ERROR) { 1214 cbp->cb_mirror->cb_pflags |= 1215 CCDPF_MIRROR_DONE; 1216 vn_strategy( 1217 cbp->cb_mirror->cb_buf.b_vp, 1218 &cbp->cb_mirror->cb_buf.b_bio1 1219 ); 1220 putccdbuf(cbp); 1221 crit_exit(); 1222 return; 1223 } else { 1224 putccdbuf(cbp->cb_mirror); 1225 /* fall through */ 1226 } 1227 } 1228 } 1229 } 1230 1231 /* 1232 * use b_bufsize to determine how big the original request was rather 1233 * then b_bcount, because b_bcount may have been truncated for EOF. 1234 * 1235 * XXX We check for an error, but we do not test the resid for an 1236 * aligned EOF condition. This may result in character & block 1237 * device access not recognizing EOF properly when read or written 1238 * sequentially, but will not effect filesystems. 1239 */ 1240 count = cbp->cb_buf.b_bufsize; 1241 putccdbuf(cbp); 1242 1243 /* 1244 * If all done, "interrupt". 1245 */ 1246 obp->b_resid -= count; 1247 if (obp->b_resid < 0) 1248 panic("ccdiodone: count"); 1249 if (obp->b_resid == 0) 1250 ccdintr(&ccd_softc[unit], obio); 1251 crit_exit(); 1252 } 1253 1254 static int 1255 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1256 { 1257 int unit = ccdunit(dev); 1258 int i, j, lookedup = 0, error = 0; 1259 int part, pmask; 1260 struct ccd_softc *cs; 1261 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1262 struct ccddevice ccd; 1263 char **cpp; 1264 struct vnode **vpp; 1265 struct ucred *cred; 1266 1267 KKASSERT(td->td_proc != NULL); 1268 cred = td->td_proc->p_ucred; 1269 1270 if (unit >= numccd) 1271 return (ENXIO); 1272 cs = &ccd_softc[unit]; 1273 1274 bzero(&ccd, sizeof(ccd)); 1275 1276 switch (cmd) { 1277 case CCDIOCSET: 1278 if (cs->sc_flags & CCDF_INITED) 1279 return (EBUSY); 1280 1281 if ((flag & FWRITE) == 0) 1282 return (EBADF); 1283 1284 if ((error = ccdlock(cs)) != 0) 1285 return (error); 1286 1287 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1288 return (EINVAL); 1289 1290 /* Fill in some important bits. */ 1291 ccd.ccd_unit = unit; 1292 ccd.ccd_interleave = ccio->ccio_ileave; 1293 if (ccd.ccd_interleave == 0 && 1294 ((ccio->ccio_flags & CCDF_MIRROR) || 1295 (ccio->ccio_flags & CCDF_PARITY))) { 1296 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1297 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1298 } 1299 if ((ccio->ccio_flags & CCDF_MIRROR) && 1300 (ccio->ccio_flags & CCDF_PARITY)) { 1301 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1302 ccio->ccio_flags &= ~CCDF_PARITY; 1303 } 1304 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1305 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1306 printf("ccd%d: mirror/parity forces uniform flag\n", 1307 unit); 1308 ccio->ccio_flags |= CCDF_UNIFORM; 1309 } 1310 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1311 1312 /* 1313 * Allocate space for and copy in the array of 1314 * componet pathnames and device numbers. 1315 */ 1316 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1317 M_DEVBUF, M_WAITOK); 1318 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1319 M_DEVBUF, M_WAITOK); 1320 1321 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1322 ccio->ccio_ndisks * sizeof(char **)); 1323 if (error) { 1324 free(vpp, M_DEVBUF); 1325 free(cpp, M_DEVBUF); 1326 ccdunlock(cs); 1327 return (error); 1328 } 1329 1330 #ifdef DEBUG 1331 if (ccddebug & CCDB_INIT) 1332 for (i = 0; i < ccio->ccio_ndisks; ++i) 1333 printf("ccdioctl: component %d: 0x%x\n", 1334 i, cpp[i]); 1335 #endif 1336 1337 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1338 #ifdef DEBUG 1339 if (ccddebug & CCDB_INIT) 1340 printf("ccdioctl: lookedup = %d\n", lookedup); 1341 #endif 1342 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1343 for (j = 0; j < lookedup; ++j) 1344 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1345 free(vpp, M_DEVBUF); 1346 free(cpp, M_DEVBUF); 1347 ccdunlock(cs); 1348 return (error); 1349 } 1350 ++lookedup; 1351 } 1352 ccd.ccd_cpp = cpp; 1353 ccd.ccd_vpp = vpp; 1354 ccd.ccd_ndev = ccio->ccio_ndisks; 1355 1356 /* 1357 * Initialize the ccd. Fills in the softc for us. 1358 */ 1359 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1360 for (j = 0; j < lookedup; ++j) 1361 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1362 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1363 free(vpp, M_DEVBUF); 1364 free(cpp, M_DEVBUF); 1365 ccdunlock(cs); 1366 return (error); 1367 } 1368 1369 /* 1370 * The ccd has been successfully initialized, so 1371 * we can place it into the array and read the disklabel. 1372 */ 1373 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1374 ccio->ccio_unit = unit; 1375 ccio->ccio_size = cs->sc_size; 1376 ccdgetdisklabel(dev); 1377 1378 ccdunlock(cs); 1379 1380 break; 1381 1382 case CCDIOCCLR: 1383 if ((cs->sc_flags & CCDF_INITED) == 0) 1384 return (ENXIO); 1385 1386 if ((flag & FWRITE) == 0) 1387 return (EBADF); 1388 1389 if ((error = ccdlock(cs)) != 0) 1390 return (error); 1391 1392 /* Don't unconfigure if any other partitions are open */ 1393 part = ccdpart(dev); 1394 pmask = (1 << part); 1395 if ((cs->sc_openmask & ~pmask)) { 1396 ccdunlock(cs); 1397 return (EBUSY); 1398 } 1399 1400 /* 1401 * Free ccd_softc information and clear entry. 1402 */ 1403 1404 /* Close the components and free their pathnames. */ 1405 for (i = 0; i < cs->sc_nccdisks; ++i) { 1406 /* 1407 * XXX: this close could potentially fail and 1408 * cause Bad Things. Maybe we need to force 1409 * the close to happen? 1410 */ 1411 #ifdef DEBUG 1412 if (ccddebug & CCDB_VNODE) 1413 vprint("CCDIOCCLR: vnode info", 1414 cs->sc_cinfo[i].ci_vp); 1415 #endif 1416 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td); 1417 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1418 } 1419 1420 /* Free interleave index. */ 1421 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1422 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1423 1424 /* Free component info and interleave table. */ 1425 free(cs->sc_cinfo, M_DEVBUF); 1426 free(cs->sc_itable, M_DEVBUF); 1427 cs->sc_flags &= ~CCDF_INITED; 1428 1429 /* 1430 * Free ccddevice information and clear entry. 1431 */ 1432 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1433 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1434 ccd.ccd_dk = -1; 1435 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1436 1437 /* 1438 * And remove the devstat entry. 1439 */ 1440 devstat_remove_entry(&cs->device_stats); 1441 1442 /* This must be atomic. */ 1443 crit_enter(); 1444 ccdunlock(cs); 1445 bzero(cs, sizeof(struct ccd_softc)); 1446 crit_exit(); 1447 1448 break; 1449 1450 case DIOCGDINFO: 1451 if ((cs->sc_flags & CCDF_INITED) == 0) 1452 return (ENXIO); 1453 1454 *(struct disklabel *)data = cs->sc_label; 1455 break; 1456 1457 case DIOCGPART: 1458 if ((cs->sc_flags & CCDF_INITED) == 0) 1459 return (ENXIO); 1460 1461 ((struct partinfo *)data)->disklab = &cs->sc_label; 1462 ((struct partinfo *)data)->part = 1463 &cs->sc_label.d_partitions[ccdpart(dev)]; 1464 break; 1465 1466 case DIOCWDINFO: 1467 case DIOCSDINFO: 1468 if ((cs->sc_flags & CCDF_INITED) == 0) 1469 return (ENXIO); 1470 1471 if ((flag & FWRITE) == 0) 1472 return (EBADF); 1473 1474 if ((error = ccdlock(cs)) != 0) 1475 return (error); 1476 1477 cs->sc_flags |= CCDF_LABELLING; 1478 1479 error = setdisklabel(&cs->sc_label, 1480 (struct disklabel *)data, 0); 1481 if (error == 0) { 1482 if (cmd == DIOCWDINFO) { 1483 dev_t cdev = CCDLABELDEV(dev); 1484 error = writedisklabel(cdev, &cs->sc_label); 1485 } 1486 } 1487 1488 cs->sc_flags &= ~CCDF_LABELLING; 1489 1490 ccdunlock(cs); 1491 1492 if (error) 1493 return (error); 1494 break; 1495 1496 case DIOCWLABEL: 1497 if ((cs->sc_flags & CCDF_INITED) == 0) 1498 return (ENXIO); 1499 1500 if ((flag & FWRITE) == 0) 1501 return (EBADF); 1502 if (*(int *)data != 0) 1503 cs->sc_flags |= CCDF_WLABEL; 1504 else 1505 cs->sc_flags &= ~CCDF_WLABEL; 1506 break; 1507 1508 default: 1509 return (ENOTTY); 1510 } 1511 1512 return (0); 1513 } 1514 1515 static int 1516 ccdsize(dev_t dev) 1517 { 1518 struct ccd_softc *cs; 1519 int part, size; 1520 1521 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1522 return (-1); 1523 1524 cs = &ccd_softc[ccdunit(dev)]; 1525 part = ccdpart(dev); 1526 1527 if ((cs->sc_flags & CCDF_INITED) == 0) 1528 return (-1); 1529 1530 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1531 size = -1; 1532 else 1533 size = cs->sc_label.d_partitions[part].p_size; 1534 1535 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1536 return (-1); 1537 1538 return (size); 1539 } 1540 1541 static int 1542 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize) 1543 { 1544 /* Not implemented. */ 1545 return ENXIO; 1546 } 1547 1548 /* 1549 * Lookup the provided name in the filesystem. If the file exists, 1550 * is a valid block device, and isn't being used by anyone else, 1551 * set *vpp to the file's vnode. 1552 */ 1553 static int 1554 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1555 { 1556 struct nlookupdata nd; 1557 struct ucred *cred; 1558 struct vnode *vp; 1559 int error; 1560 1561 KKASSERT(td->td_proc); 1562 cred = td->td_proc->p_ucred; 1563 *vpp = NULL; 1564 1565 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1566 if (error) 1567 return (error); 1568 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1569 #ifdef DEBUG 1570 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1571 printf("ccdlookup: vn_open error = %d\n", error); 1572 #endif 1573 goto done; 1574 } 1575 vp = nd.nl_open_vp; 1576 1577 if (vp->v_usecount > 1) { 1578 error = EBUSY; 1579 goto done; 1580 } 1581 1582 if (!vn_isdisk(vp, &error)) 1583 goto done; 1584 1585 #ifdef DEBUG 1586 if (ccddebug & CCDB_VNODE) 1587 vprint("ccdlookup: vnode info", vp); 1588 #endif 1589 1590 VOP_UNLOCK(vp, 0, td); 1591 nd.nl_open_vp = NULL; 1592 nlookup_done(&nd); 1593 *vpp = vp; /* leave ref intact */ 1594 return (0); 1595 done: 1596 nlookup_done(&nd); 1597 return (error); 1598 } 1599 1600 /* 1601 * Read the disklabel from the ccd. If one is not present, fake one 1602 * up. 1603 */ 1604 static void 1605 ccdgetdisklabel(dev_t dev) 1606 { 1607 int unit = ccdunit(dev); 1608 struct ccd_softc *cs = &ccd_softc[unit]; 1609 char *errstring; 1610 struct disklabel *lp = &cs->sc_label; 1611 struct ccdgeom *ccg = &cs->sc_geom; 1612 dev_t cdev; 1613 1614 bzero(lp, sizeof(*lp)); 1615 1616 lp->d_secperunit = cs->sc_size; 1617 lp->d_secsize = ccg->ccg_secsize; 1618 lp->d_nsectors = ccg->ccg_nsectors; 1619 lp->d_ntracks = ccg->ccg_ntracks; 1620 lp->d_ncylinders = ccg->ccg_ncylinders; 1621 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1622 1623 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1624 lp->d_type = DTYPE_CCD; 1625 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1626 lp->d_rpm = 3600; 1627 lp->d_interleave = 1; 1628 lp->d_flags = 0; 1629 1630 lp->d_partitions[RAW_PART].p_offset = 0; 1631 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1632 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1633 lp->d_npartitions = RAW_PART + 1; 1634 1635 lp->d_bbsize = BBSIZE; /* XXX */ 1636 lp->d_sbsize = SBSIZE; /* XXX */ 1637 1638 lp->d_magic = DISKMAGIC; 1639 lp->d_magic2 = DISKMAGIC; 1640 lp->d_checksum = dkcksum(&cs->sc_label); 1641 1642 /* 1643 * Call the generic disklabel extraction routine. 1644 */ 1645 cdev = CCDLABELDEV(dev); 1646 errstring = readdisklabel(cdev, &cs->sc_label); 1647 if (errstring != NULL) 1648 ccdmakedisklabel(cs); 1649 1650 #ifdef DEBUG 1651 /* It's actually extremely common to have unlabeled ccds. */ 1652 if (ccddebug & CCDB_LABEL) 1653 if (errstring != NULL) 1654 printf("ccd%d: %s\n", unit, errstring); 1655 #endif 1656 } 1657 1658 /* 1659 * Take care of things one might want to take care of in the event 1660 * that a disklabel isn't present. 1661 */ 1662 static void 1663 ccdmakedisklabel(struct ccd_softc *cs) 1664 { 1665 struct disklabel *lp = &cs->sc_label; 1666 1667 /* 1668 * For historical reasons, if there's no disklabel present 1669 * the raw partition must be marked FS_BSDFFS. 1670 */ 1671 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1672 1673 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1674 } 1675 1676 /* 1677 * Wait interruptibly for an exclusive lock. 1678 * 1679 * XXX 1680 * Several drivers do this; it should be abstracted and made MP-safe. 1681 */ 1682 static int 1683 ccdlock(struct ccd_softc *cs) 1684 { 1685 int error; 1686 1687 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1688 cs->sc_flags |= CCDF_WANTED; 1689 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1690 return (error); 1691 } 1692 cs->sc_flags |= CCDF_LOCKED; 1693 return (0); 1694 } 1695 1696 /* 1697 * Unlock and wake up any waiters. 1698 */ 1699 static void 1700 ccdunlock(struct ccd_softc *cs) 1701 { 1702 1703 cs->sc_flags &= ~CCDF_LOCKED; 1704 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1705 cs->sc_flags &= ~CCDF_WANTED; 1706 wakeup(cs); 1707 } 1708 } 1709 1710 #ifdef DEBUG 1711 static void 1712 printiinfo(struct ccdiinfo *ii) 1713 { 1714 int ix, i; 1715 1716 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1717 printf(" itab[%d]: #dk %d sblk %d soff %d", 1718 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1719 for (i = 0; i < ii->ii_ndisk; i++) 1720 printf(" %d", ii->ii_index[i]); 1721 printf("\n"); 1722 } 1723 } 1724 #endif 1725 1726 1727 /* Local Variables: */ 1728 /* c-argdecl-indent: 8 */ 1729 /* c-continued-statement-offset: 8 */ 1730 /* c-indent-level: 8 */ 1731 /* End: */ 1732