1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.32 2006/05/06 02:43:02 dillon Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <sys/devicestat.h> 106 #include <sys/fcntl.h> 107 #include <sys/vnode.h> 108 #include <sys/buf2.h> 109 #include <sys/ccdvar.h> 110 111 #include <vm/vm_zone.h> 112 113 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 114 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 115 116 #include <sys/thread2.h> 117 118 #if defined(CCDDEBUG) && !defined(DEBUG) 119 #define DEBUG 120 #endif 121 122 #ifdef DEBUG 123 #define CCDB_FOLLOW 0x01 124 #define CCDB_INIT 0x02 125 #define CCDB_IO 0x04 126 #define CCDB_LABEL 0x08 127 #define CCDB_VNODE 0x10 128 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 129 CCDB_VNODE; 130 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 131 #undef DEBUG 132 #endif 133 134 #define ccdunit(x) dkunit(x) 135 #define ccdpart(x) dkpart(x) 136 137 /* 138 This is how mirroring works (only writes are special): 139 140 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 141 linked together by the cb_mirror field. "cb_pflags & 142 CCDPF_MIRROR_DONE" is set to 0 on both of them. 143 144 When a component returns to ccdiodone(), it checks if "cb_pflags & 145 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 146 flag and returns. If it is, it means its partner has already 147 returned, so it will go to the regular cleanup. 148 149 */ 150 151 struct ccdbuf { 152 struct buf cb_buf; /* new I/O buf */ 153 struct vnode *cb_vp; /* related vnode */ 154 struct bio *cb_obio; /* ptr. to original I/O buf */ 155 struct ccdbuf *cb_freenext; /* free list link */ 156 int cb_unit; /* target unit */ 157 int cb_comp; /* target component */ 158 int cb_pflags; /* mirror/parity status flag */ 159 struct ccdbuf *cb_mirror; /* mirror counterpart */ 160 }; 161 162 /* bits in cb_pflags */ 163 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 164 165 #define CCDLABELDEV(dev) \ 166 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 167 168 static d_open_t ccdopen; 169 static d_close_t ccdclose; 170 static d_strategy_t ccdstrategy; 171 static d_ioctl_t ccdioctl; 172 static d_dump_t ccddump; 173 static d_psize_t ccdsize; 174 175 #define NCCDFREEHIWAT 16 176 177 #define CDEV_MAJOR 74 178 179 static struct cdevsw ccd_cdevsw = { 180 /* name */ "ccd", 181 /* maj */ CDEV_MAJOR, 182 /* flags */ D_DISK, 183 /* port */ NULL, 184 /* clone */ NULL, 185 186 /* open */ ccdopen, 187 /* close */ ccdclose, 188 /* read */ physread, 189 /* write */ physwrite, 190 /* ioctl */ ccdioctl, 191 /* poll */ nopoll, 192 /* mmap */ nommap, 193 /* strategy */ ccdstrategy, 194 /* dump */ ccddump, 195 /* psize */ ccdsize 196 }; 197 198 /* called during module initialization */ 199 static void ccdattach (void); 200 static int ccd_modevent (module_t, int, void *); 201 202 /* called by biodone() at interrupt time */ 203 static void ccdiodone (struct bio *bio); 204 205 static void ccdstart (struct ccd_softc *, struct bio *); 206 static void ccdinterleave (struct ccd_softc *, int); 207 static void ccdintr (struct ccd_softc *, struct bio *); 208 static int ccdinit (struct ccddevice *, char **, struct thread *); 209 static int ccdlookup (char *, struct thread *td, struct vnode **); 210 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 211 struct bio *, off_t, caddr_t, long); 212 static void ccdgetdisklabel (dev_t); 213 static void ccdmakedisklabel (struct ccd_softc *); 214 static int ccdlock (struct ccd_softc *); 215 static void ccdunlock (struct ccd_softc *); 216 217 #ifdef DEBUG 218 static void printiinfo (struct ccdiinfo *); 219 #endif 220 221 /* Non-private for the benefit of libkvm. */ 222 struct ccd_softc *ccd_softc; 223 struct ccddevice *ccddevs; 224 struct ccdbuf *ccdfreebufs; 225 static int numccdfreebufs; 226 static int numccd = 0; 227 228 /* 229 * getccdbuf() - Allocate and zero a ccd buffer. 230 * 231 * This routine is called at splbio(). 232 */ 233 234 static __inline 235 struct ccdbuf * 236 getccdbuf(void) 237 { 238 struct ccdbuf *cbp; 239 240 /* 241 * Allocate from freelist or malloc as necessary 242 */ 243 if ((cbp = ccdfreebufs) != NULL) { 244 ccdfreebufs = cbp->cb_freenext; 245 --numccdfreebufs; 246 reinitbufbio(&cbp->cb_buf); 247 } else { 248 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 249 initbufbio(&cbp->cb_buf); 250 } 251 252 /* 253 * independant struct buf initialization 254 */ 255 LIST_INIT(&cbp->cb_buf.b_dep); 256 BUF_LOCKINIT(&cbp->cb_buf); 257 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 258 BUF_KERNPROC(&cbp->cb_buf); 259 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 260 261 return(cbp); 262 } 263 264 /* 265 * putccdbuf() - Free a ccd buffer. 266 * 267 * This routine is called at splbio(). 268 */ 269 270 static __inline 271 void 272 putccdbuf(struct ccdbuf *cbp) 273 { 274 BUF_UNLOCK(&cbp->cb_buf); 275 BUF_LOCKFREE(&cbp->cb_buf); 276 277 if (numccdfreebufs < NCCDFREEHIWAT) { 278 cbp->cb_freenext = ccdfreebufs; 279 ccdfreebufs = cbp; 280 ++numccdfreebufs; 281 } else { 282 free((caddr_t)cbp, M_DEVBUF); 283 } 284 } 285 286 287 /* 288 * Number of blocks to untouched in front of a component partition. 289 * This is to avoid violating its disklabel area when it starts at the 290 * beginning of the slice. 291 */ 292 #if !defined(CCD_OFFSET) 293 #define CCD_OFFSET 16 294 #endif 295 296 /* 297 * Called by main() during pseudo-device attachment. All we need 298 * to do is allocate enough space for devices to be configured later, and 299 * add devsw entries. 300 */ 301 static void 302 ccdattach(void) 303 { 304 int i; 305 int num = NCCD; 306 307 if (num > 1) 308 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 309 else 310 printf("ccd0: Concatenated disk driver\n"); 311 312 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF, 313 M_WAITOK | M_ZERO); 314 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF, 315 M_WAITOK | M_ZERO); 316 numccd = num; 317 318 cdevsw_add(&ccd_cdevsw, 0, 0); 319 /* XXX: is this necessary? */ 320 for (i = 0; i < numccd; ++i) 321 ccddevs[i].ccd_dk = -1; 322 } 323 324 static int 325 ccd_modevent(module_t mod, int type, void *data) 326 { 327 int error = 0; 328 329 switch (type) { 330 case MOD_LOAD: 331 ccdattach(); 332 break; 333 334 case MOD_UNLOAD: 335 printf("ccd0: Unload not supported!\n"); 336 error = EOPNOTSUPP; 337 break; 338 339 default: /* MOD_SHUTDOWN etc */ 340 break; 341 } 342 return (error); 343 } 344 345 DEV_MODULE(ccd, ccd_modevent, NULL); 346 347 static int 348 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 349 { 350 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 351 struct ccdcinfo *ci = NULL; /* XXX */ 352 size_t size; 353 int ix; 354 struct vnode *vp; 355 size_t minsize; 356 int maxsecsize; 357 struct partinfo dpart; 358 struct ccdgeom *ccg = &cs->sc_geom; 359 char tmppath[MAXPATHLEN]; 360 int error = 0; 361 struct ucred *cred; 362 363 KKASSERT(td->td_proc); 364 cred = td->td_proc->p_ucred; 365 366 #ifdef DEBUG 367 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 368 printf("ccdinit: unit %d\n", ccd->ccd_unit); 369 #endif 370 371 cs->sc_size = 0; 372 cs->sc_ileave = ccd->ccd_interleave; 373 cs->sc_nccdisks = ccd->ccd_ndev; 374 375 /* Allocate space for the component info. */ 376 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 377 M_DEVBUF, M_WAITOK); 378 379 /* 380 * Verify that each component piece exists and record 381 * relevant information about it. 382 */ 383 maxsecsize = 0; 384 minsize = 0; 385 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 386 vp = ccd->ccd_vpp[ix]; 387 ci = &cs->sc_cinfo[ix]; 388 ci->ci_vp = vp; 389 390 /* 391 * Copy in the pathname of the component. 392 */ 393 bzero(tmppath, sizeof(tmppath)); /* sanity */ 394 if ((error = copyinstr(cpaths[ix], tmppath, 395 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 396 #ifdef DEBUG 397 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 398 printf("ccd%d: can't copy path, error = %d\n", 399 ccd->ccd_unit, error); 400 #endif 401 goto fail; 402 } 403 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 404 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 405 406 ci->ci_dev = vn_todev(vp); 407 408 /* 409 * Get partition information for the component. 410 */ 411 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 412 FREAD, cred)) != 0) { 413 #ifdef DEBUG 414 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 415 printf("ccd%d: %s: ioctl failed, error = %d\n", 416 ccd->ccd_unit, ci->ci_path, error); 417 #endif 418 goto fail; 419 } 420 if (dpart.part->p_fstype == FS_BSDFFS) { 421 maxsecsize = 422 ((dpart.disklab->d_secsize > maxsecsize) ? 423 dpart.disklab->d_secsize : maxsecsize); 424 size = dpart.part->p_size - CCD_OFFSET; 425 } else { 426 #ifdef DEBUG 427 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 428 printf("ccd%d: %s: incorrect partition type\n", 429 ccd->ccd_unit, ci->ci_path); 430 #endif 431 error = EFTYPE; 432 goto fail; 433 } 434 435 /* 436 * Calculate the size, truncating to an interleave 437 * boundary if necessary. 438 */ 439 440 if (cs->sc_ileave > 1) 441 size -= size % cs->sc_ileave; 442 443 if (size == 0) { 444 #ifdef DEBUG 445 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 446 printf("ccd%d: %s: size == 0\n", 447 ccd->ccd_unit, ci->ci_path); 448 #endif 449 error = ENODEV; 450 goto fail; 451 } 452 453 if (minsize == 0 || size < minsize) 454 minsize = size; 455 ci->ci_size = size; 456 cs->sc_size += size; 457 } 458 459 /* 460 * Don't allow the interleave to be smaller than 461 * the biggest component sector. 462 */ 463 if ((cs->sc_ileave > 0) && 464 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 465 #ifdef DEBUG 466 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 467 printf("ccd%d: interleave must be at least %d\n", 468 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 469 #endif 470 error = EINVAL; 471 goto fail; 472 } 473 474 /* 475 * If uniform interleave is desired set all sizes to that of 476 * the smallest component. This will guarentee that a single 477 * interleave table is generated. 478 * 479 * Lost space must be taken into account when calculating the 480 * overall size. Half the space is lost when CCDF_MIRROR is 481 * specified. One disk is lost when CCDF_PARITY is specified. 482 */ 483 if (ccd->ccd_flags & CCDF_UNIFORM) { 484 for (ci = cs->sc_cinfo; 485 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 486 ci->ci_size = minsize; 487 } 488 if (ccd->ccd_flags & CCDF_MIRROR) { 489 /* 490 * Check to see if an even number of components 491 * have been specified. The interleave must also 492 * be non-zero in order for us to be able to 493 * guarentee the topology. 494 */ 495 if (cs->sc_nccdisks % 2) { 496 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 497 error = EINVAL; 498 goto fail; 499 } 500 if (cs->sc_ileave == 0) { 501 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 502 error = EINVAL; 503 goto fail; 504 } 505 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 506 } else if (ccd->ccd_flags & CCDF_PARITY) { 507 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 508 } else { 509 if (cs->sc_ileave == 0) { 510 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 511 error = EINVAL; 512 goto fail; 513 } 514 cs->sc_size = cs->sc_nccdisks * minsize; 515 } 516 } 517 518 /* 519 * Construct the interleave table. 520 */ 521 ccdinterleave(cs, ccd->ccd_unit); 522 523 /* 524 * Create pseudo-geometry based on 1MB cylinders. It's 525 * pretty close. 526 */ 527 ccg->ccg_secsize = maxsecsize; 528 ccg->ccg_ntracks = 1; 529 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 530 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 531 532 /* 533 * Add an devstat entry for this device. 534 */ 535 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 536 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 537 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 538 DEVSTAT_PRIORITY_ARRAY); 539 540 cs->sc_flags |= CCDF_INITED; 541 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 542 cs->sc_unit = ccd->ccd_unit; 543 return (0); 544 fail: 545 while (ci > cs->sc_cinfo) { 546 ci--; 547 free(ci->ci_path, M_DEVBUF); 548 } 549 free(cs->sc_cinfo, M_DEVBUF); 550 return (error); 551 } 552 553 static void 554 ccdinterleave(struct ccd_softc *cs, int unit) 555 { 556 struct ccdcinfo *ci, *smallci; 557 struct ccdiinfo *ii; 558 daddr_t bn, lbn; 559 int ix; 560 u_long size; 561 562 #ifdef DEBUG 563 if (ccddebug & CCDB_INIT) 564 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 565 #endif 566 567 /* 568 * Allocate an interleave table. The worst case occurs when each 569 * of N disks is of a different size, resulting in N interleave 570 * tables. 571 * 572 * Chances are this is too big, but we don't care. 573 */ 574 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 575 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 576 bzero((caddr_t)cs->sc_itable, size); 577 578 /* 579 * Trivial case: no interleave (actually interleave of disk size). 580 * Each table entry represents a single component in its entirety. 581 * 582 * An interleave of 0 may not be used with a mirror or parity setup. 583 */ 584 if (cs->sc_ileave == 0) { 585 bn = 0; 586 ii = cs->sc_itable; 587 588 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 589 /* Allocate space for ii_index. */ 590 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 591 ii->ii_ndisk = 1; 592 ii->ii_startblk = bn; 593 ii->ii_startoff = 0; 594 ii->ii_index[0] = ix; 595 bn += cs->sc_cinfo[ix].ci_size; 596 ii++; 597 } 598 ii->ii_ndisk = 0; 599 #ifdef DEBUG 600 if (ccddebug & CCDB_INIT) 601 printiinfo(cs->sc_itable); 602 #endif 603 return; 604 } 605 606 /* 607 * The following isn't fast or pretty; it doesn't have to be. 608 */ 609 size = 0; 610 bn = lbn = 0; 611 for (ii = cs->sc_itable; ; ii++) { 612 /* 613 * Allocate space for ii_index. We might allocate more then 614 * we use. 615 */ 616 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 617 M_DEVBUF, M_WAITOK); 618 619 /* 620 * Locate the smallest of the remaining components 621 */ 622 smallci = NULL; 623 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 624 ci++) { 625 if (ci->ci_size > size && 626 (smallci == NULL || 627 ci->ci_size < smallci->ci_size)) { 628 smallci = ci; 629 } 630 } 631 632 /* 633 * Nobody left, all done 634 */ 635 if (smallci == NULL) { 636 ii->ii_ndisk = 0; 637 break; 638 } 639 640 /* 641 * Record starting logical block using an sc_ileave blocksize. 642 */ 643 ii->ii_startblk = bn / cs->sc_ileave; 644 645 /* 646 * Record starting comopnent block using an sc_ileave 647 * blocksize. This value is relative to the beginning of 648 * a component disk. 649 */ 650 ii->ii_startoff = lbn; 651 652 /* 653 * Determine how many disks take part in this interleave 654 * and record their indices. 655 */ 656 ix = 0; 657 for (ci = cs->sc_cinfo; 658 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 659 if (ci->ci_size >= smallci->ci_size) { 660 ii->ii_index[ix++] = ci - cs->sc_cinfo; 661 } 662 } 663 ii->ii_ndisk = ix; 664 bn += ix * (smallci->ci_size - size); 665 lbn = smallci->ci_size / cs->sc_ileave; 666 size = smallci->ci_size; 667 } 668 #ifdef DEBUG 669 if (ccddebug & CCDB_INIT) 670 printiinfo(cs->sc_itable); 671 #endif 672 } 673 674 /* ARGSUSED */ 675 static int 676 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 677 { 678 int unit = ccdunit(dev); 679 struct ccd_softc *cs; 680 struct disklabel *lp; 681 int error = 0, part, pmask; 682 683 #ifdef DEBUG 684 if (ccddebug & CCDB_FOLLOW) 685 printf("ccdopen(%x, %x)\n", dev, flags); 686 #endif 687 if (unit >= numccd) 688 return (ENXIO); 689 cs = &ccd_softc[unit]; 690 691 if ((error = ccdlock(cs)) != 0) 692 return (error); 693 694 lp = &cs->sc_label; 695 696 part = ccdpart(dev); 697 pmask = (1 << part); 698 699 /* 700 * If we're initialized, check to see if there are any other 701 * open partitions. If not, then it's safe to update 702 * the in-core disklabel. 703 */ 704 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 705 ccdgetdisklabel(dev); 706 707 /* Check that the partition exists. */ 708 if (part != RAW_PART && ((part >= lp->d_npartitions) || 709 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 710 error = ENXIO; 711 goto done; 712 } 713 714 cs->sc_openmask |= pmask; 715 done: 716 ccdunlock(cs); 717 return (0); 718 } 719 720 /* ARGSUSED */ 721 static int 722 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 723 { 724 int unit = ccdunit(dev); 725 struct ccd_softc *cs; 726 int error = 0, part; 727 728 #ifdef DEBUG 729 if (ccddebug & CCDB_FOLLOW) 730 printf("ccdclose(%x, %x)\n", dev, flags); 731 #endif 732 733 if (unit >= numccd) 734 return (ENXIO); 735 cs = &ccd_softc[unit]; 736 737 if ((error = ccdlock(cs)) != 0) 738 return (error); 739 740 part = ccdpart(dev); 741 742 /* ...that much closer to allowing unconfiguration... */ 743 cs->sc_openmask &= ~(1 << part); 744 ccdunlock(cs); 745 return (0); 746 } 747 748 static void 749 ccdstrategy(dev_t dev, struct bio *bio) 750 { 751 int unit = ccdunit(dev); 752 struct bio *nbio; 753 struct buf *bp = bio->bio_buf; 754 struct ccd_softc *cs = &ccd_softc[unit]; 755 int wlabel; 756 struct disklabel *lp; 757 758 #ifdef DEBUG 759 if (ccddebug & CCDB_FOLLOW) 760 printf("ccdstrategy(%x): unit %d\n", bp, unit); 761 #endif 762 if ((cs->sc_flags & CCDF_INITED) == 0) { 763 bp->b_error = ENXIO; 764 goto error; 765 } 766 767 /* If it's a nil transfer, wake up the top half now. */ 768 if (bp->b_bcount == 0) { 769 bp->b_resid = 0; 770 goto done; 771 } 772 773 lp = &cs->sc_label; 774 775 /* 776 * Do bounds checking and adjust transfer. If there's an 777 * error, the bounds check will flag that for us. 778 */ 779 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 780 if (ccdpart(dev) != RAW_PART) { 781 nbio = bounds_check_with_label(dev, bio, lp, wlabel); 782 if (nbio == NULL) 783 goto done; 784 } else { 785 int pbn; /* in sc_secsize chunks */ 786 long sz; /* in sc_secsize chunks */ 787 788 pbn = (int)(bio->bio_offset / cs->sc_geom.ccg_secsize); 789 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 790 791 /* 792 * If out of bounds return an error. If the request goes 793 * past EOF, clip the request as appropriate. If exactly 794 * at EOF, return success (don't clip), but with 0 bytes 795 * of I/O. 796 * 797 * Mark EOF B_INVAL (just like bad), indicating that the 798 * contents of the buffer, if any, is invalid. 799 */ 800 if (pbn < 0) 801 goto bad; 802 if (pbn + sz > cs->sc_size) { 803 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 804 goto bad; 805 if (pbn == cs->sc_size) { 806 bp->b_resid = bp->b_bcount; 807 bp->b_flags |= B_INVAL; 808 goto done; 809 } 810 sz = cs->sc_size - pbn; 811 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 812 } 813 nbio = bio; 814 } 815 816 bp->b_resid = bp->b_bcount; 817 nbio->bio_driver_info = dev; 818 819 /* 820 * "Start" the unit. 821 */ 822 crit_enter(); 823 ccdstart(cs, nbio); 824 crit_exit(); 825 return; 826 827 /* 828 * note: bio, not nbio, is valid at the done label. 829 */ 830 bad: 831 bp->b_error = EINVAL; 832 error: 833 bp->b_resid = bp->b_bcount; 834 bp->b_flags |= B_ERROR | B_INVAL; 835 done: 836 biodone(bio); 837 } 838 839 static void 840 ccdstart(struct ccd_softc *cs, struct bio *bio) 841 { 842 long bcount, rcount; 843 struct ccdbuf *cbp[4]; 844 struct buf *bp = bio->bio_buf; 845 dev_t dev = bio->bio_driver_info; 846 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 847 caddr_t addr; 848 off_t doffset; 849 struct partition *pp; 850 851 #ifdef DEBUG 852 if (ccddebug & CCDB_FOLLOW) 853 printf("ccdstart(%x, %x)\n", cs, bp); 854 #endif 855 856 /* Record the transaction start */ 857 devstat_start_transaction(&cs->device_stats); 858 859 /* 860 * Translate the partition-relative block number to an absolute. 861 */ 862 doffset = bio->bio_offset; 863 if (ccdpart(dev) != RAW_PART) { 864 pp = &cs->sc_label.d_partitions[ccdpart(dev)]; 865 doffset += pp->p_offset * cs->sc_label.d_secsize; 866 } 867 868 /* 869 * Allocate component buffers and fire off the requests 870 */ 871 addr = bp->b_data; 872 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 873 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 874 rcount = cbp[0]->cb_buf.b_bcount; 875 876 if (cs->sc_cflags & CCDF_MIRROR) { 877 /* 878 * Mirroring. Writes go to both disks, reads are 879 * taken from whichever disk seems most appropriate. 880 * 881 * We attempt to localize reads to the disk whos arm 882 * is nearest the read request. We ignore seeks due 883 * to writes when making this determination and we 884 * also try to avoid hogging. 885 */ 886 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 887 vn_strategy(cbp[0]->cb_vp, 888 &cbp[0]->cb_buf.b_bio1); 889 vn_strategy(cbp[1]->cb_vp, 890 &cbp[1]->cb_buf.b_bio1); 891 } else { 892 int pick = cs->sc_pick; 893 daddr_t range = cs->sc_size / 16 * cs->sc_label.d_secsize; 894 895 if (doffset < cs->sc_blk[pick] - range || 896 doffset > cs->sc_blk[pick] + range 897 ) { 898 cs->sc_pick = pick = 1 - pick; 899 } 900 cs->sc_blk[pick] = doffset + rcount; 901 vn_strategy(cbp[pick]->cb_vp, 902 &cbp[pick]->cb_buf.b_bio1); 903 } 904 } else { 905 /* 906 * Not mirroring 907 */ 908 vn_strategy(cbp[0]->cb_vp, 909 &cbp[0]->cb_buf.b_bio1); 910 } 911 doffset += rcount; 912 addr += rcount; 913 } 914 } 915 916 /* 917 * Build a component buffer header. 918 */ 919 static void 920 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 921 off_t doffset, caddr_t addr, long bcount) 922 { 923 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 924 struct ccdbuf *cbp; 925 daddr_t bn, cbn, cboff; 926 off_t cbc; 927 928 #ifdef DEBUG 929 if (ccddebug & CCDB_IO) 930 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 931 cs, bp, bn, addr, bcount); 932 #endif 933 /* 934 * Determine which component bn falls in. 935 */ 936 bn = (daddr_t)(doffset / cs->sc_geom.ccg_secsize); 937 cbn = bn; 938 cboff = 0; 939 940 if (cs->sc_ileave == 0) { 941 /* 942 * Serially concatenated and neither a mirror nor a parity 943 * config. This is a special case. 944 */ 945 daddr_t sblk; 946 947 sblk = 0; 948 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 949 sblk += ci->ci_size; 950 cbn -= sblk; 951 } else { 952 struct ccdiinfo *ii; 953 int ccdisk, off; 954 955 /* 956 * Calculate cbn, the logical superblock (sc_ileave chunks), 957 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 958 * to cbn. 959 */ 960 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 961 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 962 963 /* 964 * Figure out which interleave table to use. 965 */ 966 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 967 if (ii->ii_startblk > cbn) 968 break; 969 } 970 ii--; 971 972 /* 973 * off is the logical superblock relative to the beginning 974 * of this interleave block. 975 */ 976 off = cbn - ii->ii_startblk; 977 978 /* 979 * We must calculate which disk component to use (ccdisk), 980 * and recalculate cbn to be the superblock relative to 981 * the beginning of the component. This is typically done by 982 * adding 'off' and ii->ii_startoff together. However, 'off' 983 * must typically be divided by the number of components in 984 * this interleave array to be properly convert it from a 985 * CCD-relative logical superblock number to a 986 * component-relative superblock number. 987 */ 988 if (ii->ii_ndisk == 1) { 989 /* 990 * When we have just one disk, it can't be a mirror 991 * or a parity config. 992 */ 993 ccdisk = ii->ii_index[0]; 994 cbn = ii->ii_startoff + off; 995 } else { 996 if (cs->sc_cflags & CCDF_MIRROR) { 997 /* 998 * We have forced a uniform mapping, resulting 999 * in a single interleave array. We double 1000 * up on the first half of the available 1001 * components and our mirror is in the second 1002 * half. This only works with a single 1003 * interleave array because doubling up 1004 * doubles the number of sectors, so there 1005 * cannot be another interleave array because 1006 * the next interleave array's calculations 1007 * would be off. 1008 */ 1009 int ndisk2 = ii->ii_ndisk / 2; 1010 ccdisk = ii->ii_index[off % ndisk2]; 1011 cbn = ii->ii_startoff + off / ndisk2; 1012 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1013 } else if (cs->sc_cflags & CCDF_PARITY) { 1014 /* 1015 * XXX not implemented yet 1016 */ 1017 int ndisk2 = ii->ii_ndisk - 1; 1018 ccdisk = ii->ii_index[off % ndisk2]; 1019 cbn = ii->ii_startoff + off / ndisk2; 1020 if (cbn % ii->ii_ndisk <= ccdisk) 1021 ccdisk++; 1022 } else { 1023 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1024 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1025 } 1026 } 1027 1028 ci = &cs->sc_cinfo[ccdisk]; 1029 1030 /* 1031 * Convert cbn from a superblock to a normal block so it 1032 * can be used to calculate (along with cboff) the normal 1033 * block index into this particular disk. 1034 */ 1035 cbn *= cs->sc_ileave; 1036 } 1037 1038 /* 1039 * Fill in the component buf structure. 1040 * 1041 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1042 * will be truncated on device EOF so we use b_bufsize to detect 1043 * the case. 1044 */ 1045 cbp = getccdbuf(); 1046 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1047 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1048 cbp->cb_buf.b_data = addr; 1049 cbp->cb_vp = ci->ci_vp; 1050 if (cs->sc_ileave == 0) 1051 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1052 else 1053 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1054 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1055 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1056 1057 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1058 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1059 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1060 1061 /* 1062 * context for ccdiodone 1063 */ 1064 cbp->cb_obio = bio; 1065 cbp->cb_unit = cs - ccd_softc; 1066 cbp->cb_comp = ci - cs->sc_cinfo; 1067 1068 #ifdef DEBUG 1069 if (ccddebug & CCDB_IO) 1070 printf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1071 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1072 cbp->cb_buf.b_bio1.bio_offset, 1073 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1074 #endif 1075 cb[0] = cbp; 1076 1077 /* 1078 * Note: both I/O's setup when reading from mirror, but only one 1079 * will be executed. 1080 */ 1081 if (cs->sc_cflags & CCDF_MIRROR) { 1082 /* mirror, setup second I/O */ 1083 cbp = getccdbuf(); 1084 1085 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1086 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1087 cbp->cb_buf.b_data = addr; 1088 cbp->cb_vp = ci2->ci_vp; 1089 if (cs->sc_ileave == 0) 1090 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1091 else 1092 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1093 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1094 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1095 1096 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1097 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1098 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1099 1100 /* 1101 * context for ccdiodone 1102 */ 1103 cbp->cb_obio = bio; 1104 cbp->cb_unit = cs - ccd_softc; 1105 cbp->cb_comp = ci2 - cs->sc_cinfo; 1106 cb[1] = cbp; 1107 /* link together the ccdbuf's and clear "mirror done" flag */ 1108 cb[0]->cb_mirror = cb[1]; 1109 cb[1]->cb_mirror = cb[0]; 1110 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1111 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1112 } 1113 } 1114 1115 static void 1116 ccdintr(struct ccd_softc *cs, struct bio *bio) 1117 { 1118 struct buf *bp = bio->bio_buf; 1119 1120 #ifdef DEBUG 1121 if (ccddebug & CCDB_FOLLOW) 1122 printf("ccdintr(%x, %x)\n", cs, bp); 1123 #endif 1124 /* 1125 * Request is done for better or worse, wakeup the top half. 1126 */ 1127 if (bp->b_flags & B_ERROR) 1128 bp->b_resid = bp->b_bcount; 1129 devstat_end_transaction_buf(&cs->device_stats, bp); 1130 biodone(bio); 1131 } 1132 1133 /* 1134 * Called at interrupt time. 1135 * Mark the component as done and if all components are done, 1136 * take a ccd interrupt. 1137 */ 1138 static void 1139 ccdiodone(struct bio *bio) 1140 { 1141 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1142 struct bio *obio = cbp->cb_obio; 1143 struct buf *obp = obio->bio_buf; 1144 int unit = cbp->cb_unit; 1145 int count; 1146 1147 /* 1148 * Since we do not have exclusive access to underlying devices, 1149 * we can't keep cache translations around. 1150 */ 1151 clearbiocache(bio->bio_next); 1152 1153 crit_enter(); 1154 #ifdef DEBUG 1155 if (ccddebug & CCDB_FOLLOW) 1156 printf("ccdiodone(%x)\n", cbp); 1157 if (ccddebug & CCDB_IO) { 1158 printf("ccdiodone: bp %x bcount %d resid %d\n", 1159 obp, obp->b_bcount, obp->b_resid); 1160 printf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1161 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1162 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1163 cbp->cb_buf.b_bcount); 1164 } 1165 #endif 1166 1167 /* 1168 * If an error occured, report it. If this is a mirrored 1169 * configuration and the first of two possible reads, do not 1170 * set the error in the bp yet because the second read may 1171 * succeed. 1172 */ 1173 if (cbp->cb_buf.b_flags & B_ERROR) { 1174 const char *msg = ""; 1175 1176 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1177 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1178 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1179 /* 1180 * We will try our read on the other disk down 1181 * below, also reverse the default pick so if we 1182 * are doing a scan we do not keep hitting the 1183 * bad disk first. 1184 */ 1185 struct ccd_softc *cs = &ccd_softc[unit]; 1186 1187 msg = ", trying other disk"; 1188 cs->sc_pick = 1 - cs->sc_pick; 1189 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1190 } else { 1191 obp->b_flags |= B_ERROR; 1192 obp->b_error = cbp->cb_buf.b_error ? 1193 cbp->cb_buf.b_error : EIO; 1194 } 1195 printf("ccd%d: error %d on component %d offset %lld (ccd offset %lld)%s\n", 1196 unit, obp->b_error, cbp->cb_comp, 1197 cbp->cb_buf.b_bio2.bio_offset, 1198 obio->bio_offset, msg); 1199 } 1200 1201 /* 1202 * Process mirror. If we are writing, I/O has been initiated on both 1203 * buffers and we fall through only after both are finished. 1204 * 1205 * If we are reading only one I/O is initiated at a time. If an 1206 * error occurs we initiate the second I/O and return, otherwise 1207 * we free the second I/O without initiating it. 1208 */ 1209 1210 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1211 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1212 /* 1213 * When writing, handshake with the second buffer 1214 * to determine when both are done. If both are not 1215 * done, return here. 1216 */ 1217 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1218 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1219 putccdbuf(cbp); 1220 crit_exit(); 1221 return; 1222 } 1223 } else { 1224 /* 1225 * When reading, either dispose of the second buffer 1226 * or initiate I/O on the second buffer if an error 1227 * occured with this one. 1228 */ 1229 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1230 if (cbp->cb_buf.b_flags & B_ERROR) { 1231 cbp->cb_mirror->cb_pflags |= 1232 CCDPF_MIRROR_DONE; 1233 vn_strategy( 1234 cbp->cb_mirror->cb_vp, 1235 &cbp->cb_mirror->cb_buf.b_bio1 1236 ); 1237 putccdbuf(cbp); 1238 crit_exit(); 1239 return; 1240 } else { 1241 putccdbuf(cbp->cb_mirror); 1242 /* fall through */ 1243 } 1244 } 1245 } 1246 } 1247 1248 /* 1249 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1250 */ 1251 count = cbp->cb_buf.b_bufsize; 1252 putccdbuf(cbp); 1253 1254 /* 1255 * If all done, "interrupt". 1256 */ 1257 obp->b_resid -= count; 1258 if (obp->b_resid < 0) 1259 panic("ccdiodone: count"); 1260 if (obp->b_resid == 0) 1261 ccdintr(&ccd_softc[unit], obio); 1262 crit_exit(); 1263 } 1264 1265 static int 1266 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1267 { 1268 int unit = ccdunit(dev); 1269 int i, j, lookedup = 0, error = 0; 1270 int part, pmask; 1271 struct ccd_softc *cs; 1272 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1273 struct ccddevice ccd; 1274 char **cpp; 1275 struct vnode **vpp; 1276 struct ucred *cred; 1277 1278 KKASSERT(td->td_proc != NULL); 1279 cred = td->td_proc->p_ucred; 1280 1281 if (unit >= numccd) 1282 return (ENXIO); 1283 cs = &ccd_softc[unit]; 1284 1285 bzero(&ccd, sizeof(ccd)); 1286 1287 switch (cmd) { 1288 case CCDIOCSET: 1289 if (cs->sc_flags & CCDF_INITED) 1290 return (EBUSY); 1291 1292 if ((flag & FWRITE) == 0) 1293 return (EBADF); 1294 1295 if ((error = ccdlock(cs)) != 0) 1296 return (error); 1297 1298 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1299 return (EINVAL); 1300 1301 /* Fill in some important bits. */ 1302 ccd.ccd_unit = unit; 1303 ccd.ccd_interleave = ccio->ccio_ileave; 1304 if (ccd.ccd_interleave == 0 && 1305 ((ccio->ccio_flags & CCDF_MIRROR) || 1306 (ccio->ccio_flags & CCDF_PARITY))) { 1307 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1308 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1309 } 1310 if ((ccio->ccio_flags & CCDF_MIRROR) && 1311 (ccio->ccio_flags & CCDF_PARITY)) { 1312 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1313 ccio->ccio_flags &= ~CCDF_PARITY; 1314 } 1315 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1316 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1317 printf("ccd%d: mirror/parity forces uniform flag\n", 1318 unit); 1319 ccio->ccio_flags |= CCDF_UNIFORM; 1320 } 1321 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1322 1323 /* 1324 * Allocate space for and copy in the array of 1325 * componet pathnames and device numbers. 1326 */ 1327 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1328 M_DEVBUF, M_WAITOK); 1329 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1330 M_DEVBUF, M_WAITOK); 1331 1332 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1333 ccio->ccio_ndisks * sizeof(char **)); 1334 if (error) { 1335 free(vpp, M_DEVBUF); 1336 free(cpp, M_DEVBUF); 1337 ccdunlock(cs); 1338 return (error); 1339 } 1340 1341 #ifdef DEBUG 1342 if (ccddebug & CCDB_INIT) 1343 for (i = 0; i < ccio->ccio_ndisks; ++i) 1344 printf("ccdioctl: component %d: 0x%x\n", 1345 i, cpp[i]); 1346 #endif 1347 1348 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1349 #ifdef DEBUG 1350 if (ccddebug & CCDB_INIT) 1351 printf("ccdioctl: lookedup = %d\n", lookedup); 1352 #endif 1353 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1354 for (j = 0; j < lookedup; ++j) 1355 (void)vn_close(vpp[j], FREAD|FWRITE); 1356 free(vpp, M_DEVBUF); 1357 free(cpp, M_DEVBUF); 1358 ccdunlock(cs); 1359 return (error); 1360 } 1361 ++lookedup; 1362 } 1363 ccd.ccd_cpp = cpp; 1364 ccd.ccd_vpp = vpp; 1365 ccd.ccd_ndev = ccio->ccio_ndisks; 1366 1367 /* 1368 * Initialize the ccd. Fills in the softc for us. 1369 */ 1370 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1371 for (j = 0; j < lookedup; ++j) 1372 (void)vn_close(vpp[j], FREAD|FWRITE); 1373 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1374 free(vpp, M_DEVBUF); 1375 free(cpp, M_DEVBUF); 1376 ccdunlock(cs); 1377 return (error); 1378 } 1379 1380 /* 1381 * The ccd has been successfully initialized, so 1382 * we can place it into the array and read the disklabel. 1383 */ 1384 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1385 ccio->ccio_unit = unit; 1386 ccio->ccio_size = cs->sc_size; 1387 ccdgetdisklabel(dev); 1388 1389 ccdunlock(cs); 1390 1391 break; 1392 1393 case CCDIOCCLR: 1394 if ((cs->sc_flags & CCDF_INITED) == 0) 1395 return (ENXIO); 1396 1397 if ((flag & FWRITE) == 0) 1398 return (EBADF); 1399 1400 if ((error = ccdlock(cs)) != 0) 1401 return (error); 1402 1403 /* Don't unconfigure if any other partitions are open */ 1404 part = ccdpart(dev); 1405 pmask = (1 << part); 1406 if ((cs->sc_openmask & ~pmask)) { 1407 ccdunlock(cs); 1408 return (EBUSY); 1409 } 1410 1411 /* 1412 * Free ccd_softc information and clear entry. 1413 */ 1414 1415 /* Close the components and free their pathnames. */ 1416 for (i = 0; i < cs->sc_nccdisks; ++i) { 1417 /* 1418 * XXX: this close could potentially fail and 1419 * cause Bad Things. Maybe we need to force 1420 * the close to happen? 1421 */ 1422 #ifdef DEBUG 1423 if (ccddebug & CCDB_VNODE) 1424 vprint("CCDIOCCLR: vnode info", 1425 cs->sc_cinfo[i].ci_vp); 1426 #endif 1427 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1428 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1429 } 1430 1431 /* Free interleave index. */ 1432 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1433 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1434 1435 /* Free component info and interleave table. */ 1436 free(cs->sc_cinfo, M_DEVBUF); 1437 free(cs->sc_itable, M_DEVBUF); 1438 cs->sc_flags &= ~CCDF_INITED; 1439 1440 /* 1441 * Free ccddevice information and clear entry. 1442 */ 1443 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1444 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1445 ccd.ccd_dk = -1; 1446 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1447 1448 /* 1449 * And remove the devstat entry. 1450 */ 1451 devstat_remove_entry(&cs->device_stats); 1452 1453 /* This must be atomic. */ 1454 crit_enter(); 1455 ccdunlock(cs); 1456 bzero(cs, sizeof(struct ccd_softc)); 1457 crit_exit(); 1458 1459 break; 1460 1461 case DIOCGDINFO: 1462 if ((cs->sc_flags & CCDF_INITED) == 0) 1463 return (ENXIO); 1464 1465 *(struct disklabel *)data = cs->sc_label; 1466 break; 1467 1468 case DIOCGPART: 1469 if ((cs->sc_flags & CCDF_INITED) == 0) 1470 return (ENXIO); 1471 1472 ((struct partinfo *)data)->disklab = &cs->sc_label; 1473 ((struct partinfo *)data)->part = 1474 &cs->sc_label.d_partitions[ccdpart(dev)]; 1475 break; 1476 1477 case DIOCWDINFO: 1478 case DIOCSDINFO: 1479 if ((cs->sc_flags & CCDF_INITED) == 0) 1480 return (ENXIO); 1481 1482 if ((flag & FWRITE) == 0) 1483 return (EBADF); 1484 1485 if ((error = ccdlock(cs)) != 0) 1486 return (error); 1487 1488 cs->sc_flags |= CCDF_LABELLING; 1489 1490 error = setdisklabel(&cs->sc_label, 1491 (struct disklabel *)data, 0); 1492 if (error == 0) { 1493 if (cmd == DIOCWDINFO) { 1494 dev_t cdev = CCDLABELDEV(dev); 1495 error = writedisklabel(cdev, &cs->sc_label); 1496 } 1497 } 1498 1499 cs->sc_flags &= ~CCDF_LABELLING; 1500 1501 ccdunlock(cs); 1502 1503 if (error) 1504 return (error); 1505 break; 1506 1507 case DIOCWLABEL: 1508 if ((cs->sc_flags & CCDF_INITED) == 0) 1509 return (ENXIO); 1510 1511 if ((flag & FWRITE) == 0) 1512 return (EBADF); 1513 if (*(int *)data != 0) 1514 cs->sc_flags |= CCDF_WLABEL; 1515 else 1516 cs->sc_flags &= ~CCDF_WLABEL; 1517 break; 1518 1519 default: 1520 return (ENOTTY); 1521 } 1522 1523 return (0); 1524 } 1525 1526 static int 1527 ccdsize(dev_t dev) 1528 { 1529 struct ccd_softc *cs; 1530 int part, size; 1531 1532 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1533 return (-1); 1534 1535 cs = &ccd_softc[ccdunit(dev)]; 1536 part = ccdpart(dev); 1537 1538 if ((cs->sc_flags & CCDF_INITED) == 0) 1539 return (-1); 1540 1541 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1542 size = -1; 1543 else 1544 size = cs->sc_label.d_partitions[part].p_size; 1545 1546 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1547 return (-1); 1548 1549 return (size); 1550 } 1551 1552 static int 1553 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize) 1554 { 1555 /* Not implemented. */ 1556 return ENXIO; 1557 } 1558 1559 /* 1560 * Lookup the provided name in the filesystem. If the file exists, 1561 * is a valid block device, and isn't being used by anyone else, 1562 * set *vpp to the file's vnode. 1563 */ 1564 static int 1565 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1566 { 1567 struct nlookupdata nd; 1568 struct ucred *cred; 1569 struct vnode *vp; 1570 int error; 1571 1572 KKASSERT(td->td_proc); 1573 cred = td->td_proc->p_ucred; 1574 *vpp = NULL; 1575 1576 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1577 if (error) 1578 return (error); 1579 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1580 #ifdef DEBUG 1581 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1582 printf("ccdlookup: vn_open error = %d\n", error); 1583 #endif 1584 goto done; 1585 } 1586 vp = nd.nl_open_vp; 1587 1588 if (vp->v_usecount > 1) { 1589 error = EBUSY; 1590 goto done; 1591 } 1592 1593 if (!vn_isdisk(vp, &error)) 1594 goto done; 1595 1596 #ifdef DEBUG 1597 if (ccddebug & CCDB_VNODE) 1598 vprint("ccdlookup: vnode info", vp); 1599 #endif 1600 1601 VOP_UNLOCK(vp, 0); 1602 nd.nl_open_vp = NULL; 1603 nlookup_done(&nd); 1604 *vpp = vp; /* leave ref intact */ 1605 return (0); 1606 done: 1607 nlookup_done(&nd); 1608 return (error); 1609 } 1610 1611 /* 1612 * Read the disklabel from the ccd. If one is not present, fake one 1613 * up. 1614 */ 1615 static void 1616 ccdgetdisklabel(dev_t dev) 1617 { 1618 int unit = ccdunit(dev); 1619 struct ccd_softc *cs = &ccd_softc[unit]; 1620 char *errstring; 1621 struct disklabel *lp = &cs->sc_label; 1622 struct ccdgeom *ccg = &cs->sc_geom; 1623 dev_t cdev; 1624 1625 bzero(lp, sizeof(*lp)); 1626 1627 lp->d_secperunit = cs->sc_size; 1628 lp->d_secsize = ccg->ccg_secsize; 1629 lp->d_nsectors = ccg->ccg_nsectors; 1630 lp->d_ntracks = ccg->ccg_ntracks; 1631 lp->d_ncylinders = ccg->ccg_ncylinders; 1632 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1633 1634 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1635 lp->d_type = DTYPE_CCD; 1636 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1637 lp->d_rpm = 3600; 1638 lp->d_interleave = 1; 1639 lp->d_flags = 0; 1640 1641 lp->d_partitions[RAW_PART].p_offset = 0; 1642 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1643 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1644 lp->d_npartitions = RAW_PART + 1; 1645 1646 lp->d_bbsize = BBSIZE; /* XXX */ 1647 lp->d_sbsize = SBSIZE; /* XXX */ 1648 1649 lp->d_magic = DISKMAGIC; 1650 lp->d_magic2 = DISKMAGIC; 1651 lp->d_checksum = dkcksum(&cs->sc_label); 1652 1653 /* 1654 * Call the generic disklabel extraction routine. 1655 */ 1656 cdev = CCDLABELDEV(dev); 1657 errstring = readdisklabel(cdev, &cs->sc_label); 1658 if (errstring != NULL) 1659 ccdmakedisklabel(cs); 1660 1661 #ifdef DEBUG 1662 /* It's actually extremely common to have unlabeled ccds. */ 1663 if (ccddebug & CCDB_LABEL) 1664 if (errstring != NULL) 1665 printf("ccd%d: %s\n", unit, errstring); 1666 #endif 1667 } 1668 1669 /* 1670 * Take care of things one might want to take care of in the event 1671 * that a disklabel isn't present. 1672 */ 1673 static void 1674 ccdmakedisklabel(struct ccd_softc *cs) 1675 { 1676 struct disklabel *lp = &cs->sc_label; 1677 1678 /* 1679 * For historical reasons, if there's no disklabel present 1680 * the raw partition must be marked FS_BSDFFS. 1681 */ 1682 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1683 1684 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1685 } 1686 1687 /* 1688 * Wait interruptibly for an exclusive lock. 1689 * 1690 * XXX 1691 * Several drivers do this; it should be abstracted and made MP-safe. 1692 */ 1693 static int 1694 ccdlock(struct ccd_softc *cs) 1695 { 1696 int error; 1697 1698 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1699 cs->sc_flags |= CCDF_WANTED; 1700 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1701 return (error); 1702 } 1703 cs->sc_flags |= CCDF_LOCKED; 1704 return (0); 1705 } 1706 1707 /* 1708 * Unlock and wake up any waiters. 1709 */ 1710 static void 1711 ccdunlock(struct ccd_softc *cs) 1712 { 1713 1714 cs->sc_flags &= ~CCDF_LOCKED; 1715 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1716 cs->sc_flags &= ~CCDF_WANTED; 1717 wakeup(cs); 1718 } 1719 } 1720 1721 #ifdef DEBUG 1722 static void 1723 printiinfo(struct ccdiinfo *ii) 1724 { 1725 int ix, i; 1726 1727 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1728 printf(" itab[%d]: #dk %d sblk %d soff %d", 1729 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1730 for (i = 0; i < ii->ii_ndisk; i++) 1731 printf(" %d", ii->ii_index[i]); 1732 printf("\n"); 1733 } 1734 } 1735 #endif 1736 1737 1738 /* Local Variables: */ 1739 /* c-argdecl-indent: 8 */ 1740 /* c-continued-statement-offset: 8 */ 1741 /* c-indent-level: 8 */ 1742 /* End: */ 1743