1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.21 2005/12/11 01:54:07 swildner Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <vfs/ufs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/buf2.h> 110 111 #include <sys/ccdvar.h> 112 113 #include <sys/thread2.h> 114 115 #include <vm/vm_zone.h> 116 117 #if defined(CCDDEBUG) && !defined(DEBUG) 118 #define DEBUG 119 #endif 120 121 #ifdef DEBUG 122 #define CCDB_FOLLOW 0x01 123 #define CCDB_INIT 0x02 124 #define CCDB_IO 0x04 125 #define CCDB_LABEL 0x08 126 #define CCDB_VNODE 0x10 127 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 128 CCDB_VNODE; 129 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 130 #undef DEBUG 131 #endif 132 133 #define ccdunit(x) dkunit(x) 134 #define ccdpart(x) dkpart(x) 135 136 /* 137 This is how mirroring works (only writes are special): 138 139 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 140 linked together by the cb_mirror field. "cb_pflags & 141 CCDPF_MIRROR_DONE" is set to 0 on both of them. 142 143 When a component returns to ccdiodone(), it checks if "cb_pflags & 144 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 145 flag and returns. If it is, it means its partner has already 146 returned, so it will go to the regular cleanup. 147 148 */ 149 150 struct ccdbuf { 151 struct buf cb_buf; /* new I/O buf */ 152 struct buf *cb_obp; /* ptr. to original I/O buf */ 153 struct ccdbuf *cb_freenext; /* free list link */ 154 int cb_unit; /* target unit */ 155 int cb_comp; /* target component */ 156 int cb_pflags; /* mirror/parity status flag */ 157 struct ccdbuf *cb_mirror; /* mirror counterpart */ 158 }; 159 160 /* bits in cb_pflags */ 161 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 162 163 #define CCDLABELDEV(dev) \ 164 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 165 166 static d_open_t ccdopen; 167 static d_close_t ccdclose; 168 static d_strategy_t ccdstrategy; 169 static d_ioctl_t ccdioctl; 170 static d_dump_t ccddump; 171 static d_psize_t ccdsize; 172 173 #define NCCDFREEHIWAT 16 174 175 #define CDEV_MAJOR 74 176 177 static struct cdevsw ccd_cdevsw = { 178 /* name */ "ccd", 179 /* maj */ CDEV_MAJOR, 180 /* flags */ D_DISK, 181 /* port */ NULL, 182 /* clone */ NULL, 183 184 /* open */ ccdopen, 185 /* close */ ccdclose, 186 /* read */ physread, 187 /* write */ physwrite, 188 /* ioctl */ ccdioctl, 189 /* poll */ nopoll, 190 /* mmap */ nommap, 191 /* strategy */ ccdstrategy, 192 /* dump */ ccddump, 193 /* psize */ ccdsize 194 }; 195 196 /* called during module initialization */ 197 static void ccdattach (void); 198 static int ccd_modevent (module_t, int, void *); 199 200 /* called by biodone() at interrupt time */ 201 static void ccdiodone (struct ccdbuf *cbp); 202 203 static void ccdstart (struct ccd_softc *, struct buf *); 204 static void ccdinterleave (struct ccd_softc *, int); 205 static void ccdintr (struct ccd_softc *, struct buf *); 206 static int ccdinit (struct ccddevice *, char **, struct thread *); 207 static int ccdlookup (char *, struct thread *td, struct vnode **); 208 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 209 struct buf *, daddr_t, caddr_t, long); 210 static void ccdgetdisklabel (dev_t); 211 static void ccdmakedisklabel (struct ccd_softc *); 212 static int ccdlock (struct ccd_softc *); 213 static void ccdunlock (struct ccd_softc *); 214 215 #ifdef DEBUG 216 static void printiinfo (struct ccdiinfo *); 217 #endif 218 219 /* Non-private for the benefit of libkvm. */ 220 struct ccd_softc *ccd_softc; 221 struct ccddevice *ccddevs; 222 struct ccdbuf *ccdfreebufs; 223 static int numccdfreebufs; 224 static int numccd = 0; 225 226 /* 227 * getccdbuf() - Allocate and zero a ccd buffer. 228 * 229 * This routine is called at splbio(). 230 */ 231 232 static __inline 233 struct ccdbuf * 234 getccdbuf(struct ccdbuf *cpy) 235 { 236 struct ccdbuf *cbp; 237 238 /* 239 * Allocate from freelist or malloc as necessary 240 */ 241 if ((cbp = ccdfreebufs) != NULL) { 242 ccdfreebufs = cbp->cb_freenext; 243 --numccdfreebufs; 244 } else { 245 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 246 } 247 248 /* 249 * Used by mirroring code 250 */ 251 if (cpy) 252 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 253 else 254 bzero(cbp, sizeof(struct ccdbuf)); 255 256 /* 257 * independant struct buf initialization 258 */ 259 LIST_INIT(&cbp->cb_buf.b_dep); 260 BUF_LOCKINIT(&cbp->cb_buf); 261 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 262 BUF_KERNPROC(&cbp->cb_buf); 263 264 return(cbp); 265 } 266 267 /* 268 * putccdbuf() - Free a ccd buffer. 269 * 270 * This routine is called at splbio(). 271 */ 272 273 static __inline 274 void 275 putccdbuf(struct ccdbuf *cbp) 276 { 277 BUF_UNLOCK(&cbp->cb_buf); 278 BUF_LOCKFREE(&cbp->cb_buf); 279 280 if (numccdfreebufs < NCCDFREEHIWAT) { 281 cbp->cb_freenext = ccdfreebufs; 282 ccdfreebufs = cbp; 283 ++numccdfreebufs; 284 } else { 285 free((caddr_t)cbp, M_DEVBUF); 286 } 287 } 288 289 290 /* 291 * Number of blocks to untouched in front of a component partition. 292 * This is to avoid violating its disklabel area when it starts at the 293 * beginning of the slice. 294 */ 295 #if !defined(CCD_OFFSET) 296 #define CCD_OFFSET 16 297 #endif 298 299 /* 300 * Called by main() during pseudo-device attachment. All we need 301 * to do is allocate enough space for devices to be configured later, and 302 * add devsw entries. 303 */ 304 static void 305 ccdattach(void) 306 { 307 int i; 308 int num = NCCD; 309 310 if (num > 1) 311 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 312 else 313 printf("ccd0: Concatenated disk driver\n"); 314 315 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF, 316 M_WAITOK | M_ZERO); 317 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF, 318 M_WAITOK | M_ZERO); 319 numccd = num; 320 321 cdevsw_add(&ccd_cdevsw, 0, 0); 322 /* XXX: is this necessary? */ 323 for (i = 0; i < numccd; ++i) 324 ccddevs[i].ccd_dk = -1; 325 } 326 327 static int 328 ccd_modevent(module_t mod, int type, void *data) 329 { 330 int error = 0; 331 332 switch (type) { 333 case MOD_LOAD: 334 ccdattach(); 335 break; 336 337 case MOD_UNLOAD: 338 printf("ccd0: Unload not supported!\n"); 339 error = EOPNOTSUPP; 340 break; 341 342 default: /* MOD_SHUTDOWN etc */ 343 break; 344 } 345 return (error); 346 } 347 348 DEV_MODULE(ccd, ccd_modevent, NULL); 349 350 static int 351 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 352 { 353 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 354 struct ccdcinfo *ci = NULL; /* XXX */ 355 size_t size; 356 int ix; 357 struct vnode *vp; 358 size_t minsize; 359 int maxsecsize; 360 struct partinfo dpart; 361 struct ccdgeom *ccg = &cs->sc_geom; 362 char tmppath[MAXPATHLEN]; 363 int error = 0; 364 struct ucred *cred; 365 366 KKASSERT(td->td_proc); 367 cred = td->td_proc->p_ucred; 368 369 #ifdef DEBUG 370 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 371 printf("ccdinit: unit %d\n", ccd->ccd_unit); 372 #endif 373 374 cs->sc_size = 0; 375 cs->sc_ileave = ccd->ccd_interleave; 376 cs->sc_nccdisks = ccd->ccd_ndev; 377 378 /* Allocate space for the component info. */ 379 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 380 M_DEVBUF, M_WAITOK); 381 382 /* 383 * Verify that each component piece exists and record 384 * relevant information about it. 385 */ 386 maxsecsize = 0; 387 minsize = 0; 388 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 389 vp = ccd->ccd_vpp[ix]; 390 ci = &cs->sc_cinfo[ix]; 391 ci->ci_vp = vp; 392 393 /* 394 * Copy in the pathname of the component. 395 */ 396 bzero(tmppath, sizeof(tmppath)); /* sanity */ 397 if ((error = copyinstr(cpaths[ix], tmppath, 398 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 399 #ifdef DEBUG 400 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 401 printf("ccd%d: can't copy path, error = %d\n", 402 ccd->ccd_unit, error); 403 #endif 404 goto fail; 405 } 406 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 407 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 408 409 ci->ci_dev = vn_todev(vp); 410 411 /* 412 * Get partition information for the component. 413 */ 414 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 415 FREAD, cred, td)) != 0) { 416 #ifdef DEBUG 417 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 418 printf("ccd%d: %s: ioctl failed, error = %d\n", 419 ccd->ccd_unit, ci->ci_path, error); 420 #endif 421 goto fail; 422 } 423 if (dpart.part->p_fstype == FS_BSDFFS) { 424 maxsecsize = 425 ((dpart.disklab->d_secsize > maxsecsize) ? 426 dpart.disklab->d_secsize : maxsecsize); 427 size = dpart.part->p_size - CCD_OFFSET; 428 } else { 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 printf("ccd%d: %s: incorrect partition type\n", 432 ccd->ccd_unit, ci->ci_path); 433 #endif 434 error = EFTYPE; 435 goto fail; 436 } 437 438 /* 439 * Calculate the size, truncating to an interleave 440 * boundary if necessary. 441 */ 442 443 if (cs->sc_ileave > 1) 444 size -= size % cs->sc_ileave; 445 446 if (size == 0) { 447 #ifdef DEBUG 448 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 449 printf("ccd%d: %s: size == 0\n", 450 ccd->ccd_unit, ci->ci_path); 451 #endif 452 error = ENODEV; 453 goto fail; 454 } 455 456 if (minsize == 0 || size < minsize) 457 minsize = size; 458 ci->ci_size = size; 459 cs->sc_size += size; 460 } 461 462 /* 463 * Don't allow the interleave to be smaller than 464 * the biggest component sector. 465 */ 466 if ((cs->sc_ileave > 0) && 467 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 468 #ifdef DEBUG 469 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 470 printf("ccd%d: interleave must be at least %d\n", 471 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 472 #endif 473 error = EINVAL; 474 goto fail; 475 } 476 477 /* 478 * If uniform interleave is desired set all sizes to that of 479 * the smallest component. This will guarentee that a single 480 * interleave table is generated. 481 * 482 * Lost space must be taken into account when calculating the 483 * overall size. Half the space is lost when CCDF_MIRROR is 484 * specified. One disk is lost when CCDF_PARITY is specified. 485 */ 486 if (ccd->ccd_flags & CCDF_UNIFORM) { 487 for (ci = cs->sc_cinfo; 488 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 489 ci->ci_size = minsize; 490 } 491 if (ccd->ccd_flags & CCDF_MIRROR) { 492 /* 493 * Check to see if an even number of components 494 * have been specified. The interleave must also 495 * be non-zero in order for us to be able to 496 * guarentee the topology. 497 */ 498 if (cs->sc_nccdisks % 2) { 499 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 500 error = EINVAL; 501 goto fail; 502 } 503 if (cs->sc_ileave == 0) { 504 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 505 error = EINVAL; 506 goto fail; 507 } 508 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 509 } else if (ccd->ccd_flags & CCDF_PARITY) { 510 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 511 } else { 512 if (cs->sc_ileave == 0) { 513 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 514 error = EINVAL; 515 goto fail; 516 } 517 cs->sc_size = cs->sc_nccdisks * minsize; 518 } 519 } 520 521 /* 522 * Construct the interleave table. 523 */ 524 ccdinterleave(cs, ccd->ccd_unit); 525 526 /* 527 * Create pseudo-geometry based on 1MB cylinders. It's 528 * pretty close. 529 */ 530 ccg->ccg_secsize = maxsecsize; 531 ccg->ccg_ntracks = 1; 532 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 533 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 534 535 /* 536 * Add an devstat entry for this device. 537 */ 538 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 539 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 540 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 541 DEVSTAT_PRIORITY_ARRAY); 542 543 cs->sc_flags |= CCDF_INITED; 544 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 545 cs->sc_unit = ccd->ccd_unit; 546 return (0); 547 fail: 548 while (ci > cs->sc_cinfo) { 549 ci--; 550 free(ci->ci_path, M_DEVBUF); 551 } 552 free(cs->sc_cinfo, M_DEVBUF); 553 return (error); 554 } 555 556 static void 557 ccdinterleave(struct ccd_softc *cs, int unit) 558 { 559 struct ccdcinfo *ci, *smallci; 560 struct ccdiinfo *ii; 561 daddr_t bn, lbn; 562 int ix; 563 u_long size; 564 565 #ifdef DEBUG 566 if (ccddebug & CCDB_INIT) 567 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 568 #endif 569 570 /* 571 * Allocate an interleave table. The worst case occurs when each 572 * of N disks is of a different size, resulting in N interleave 573 * tables. 574 * 575 * Chances are this is too big, but we don't care. 576 */ 577 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 578 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 579 bzero((caddr_t)cs->sc_itable, size); 580 581 /* 582 * Trivial case: no interleave (actually interleave of disk size). 583 * Each table entry represents a single component in its entirety. 584 * 585 * An interleave of 0 may not be used with a mirror or parity setup. 586 */ 587 if (cs->sc_ileave == 0) { 588 bn = 0; 589 ii = cs->sc_itable; 590 591 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 592 /* Allocate space for ii_index. */ 593 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 594 ii->ii_ndisk = 1; 595 ii->ii_startblk = bn; 596 ii->ii_startoff = 0; 597 ii->ii_index[0] = ix; 598 bn += cs->sc_cinfo[ix].ci_size; 599 ii++; 600 } 601 ii->ii_ndisk = 0; 602 #ifdef DEBUG 603 if (ccddebug & CCDB_INIT) 604 printiinfo(cs->sc_itable); 605 #endif 606 return; 607 } 608 609 /* 610 * The following isn't fast or pretty; it doesn't have to be. 611 */ 612 size = 0; 613 bn = lbn = 0; 614 for (ii = cs->sc_itable; ; ii++) { 615 /* 616 * Allocate space for ii_index. We might allocate more then 617 * we use. 618 */ 619 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 620 M_DEVBUF, M_WAITOK); 621 622 /* 623 * Locate the smallest of the remaining components 624 */ 625 smallci = NULL; 626 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 627 ci++) { 628 if (ci->ci_size > size && 629 (smallci == NULL || 630 ci->ci_size < smallci->ci_size)) { 631 smallci = ci; 632 } 633 } 634 635 /* 636 * Nobody left, all done 637 */ 638 if (smallci == NULL) { 639 ii->ii_ndisk = 0; 640 break; 641 } 642 643 /* 644 * Record starting logical block using an sc_ileave blocksize. 645 */ 646 ii->ii_startblk = bn / cs->sc_ileave; 647 648 /* 649 * Record starting comopnent block using an sc_ileave 650 * blocksize. This value is relative to the beginning of 651 * a component disk. 652 */ 653 ii->ii_startoff = lbn; 654 655 /* 656 * Determine how many disks take part in this interleave 657 * and record their indices. 658 */ 659 ix = 0; 660 for (ci = cs->sc_cinfo; 661 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 662 if (ci->ci_size >= smallci->ci_size) { 663 ii->ii_index[ix++] = ci - cs->sc_cinfo; 664 } 665 } 666 ii->ii_ndisk = ix; 667 bn += ix * (smallci->ci_size - size); 668 lbn = smallci->ci_size / cs->sc_ileave; 669 size = smallci->ci_size; 670 } 671 #ifdef DEBUG 672 if (ccddebug & CCDB_INIT) 673 printiinfo(cs->sc_itable); 674 #endif 675 } 676 677 /* ARGSUSED */ 678 static int 679 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 680 { 681 int unit = ccdunit(dev); 682 struct ccd_softc *cs; 683 struct disklabel *lp; 684 int error = 0, part, pmask; 685 686 #ifdef DEBUG 687 if (ccddebug & CCDB_FOLLOW) 688 printf("ccdopen(%x, %x)\n", dev, flags); 689 #endif 690 if (unit >= numccd) 691 return (ENXIO); 692 cs = &ccd_softc[unit]; 693 694 if ((error = ccdlock(cs)) != 0) 695 return (error); 696 697 lp = &cs->sc_label; 698 699 part = ccdpart(dev); 700 pmask = (1 << part); 701 702 /* 703 * If we're initialized, check to see if there are any other 704 * open partitions. If not, then it's safe to update 705 * the in-core disklabel. 706 */ 707 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 708 ccdgetdisklabel(dev); 709 710 /* Check that the partition exists. */ 711 if (part != RAW_PART && ((part >= lp->d_npartitions) || 712 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 713 error = ENXIO; 714 goto done; 715 } 716 717 cs->sc_openmask |= pmask; 718 done: 719 ccdunlock(cs); 720 return (0); 721 } 722 723 /* ARGSUSED */ 724 static int 725 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 726 { 727 int unit = ccdunit(dev); 728 struct ccd_softc *cs; 729 int error = 0, part; 730 731 #ifdef DEBUG 732 if (ccddebug & CCDB_FOLLOW) 733 printf("ccdclose(%x, %x)\n", dev, flags); 734 #endif 735 736 if (unit >= numccd) 737 return (ENXIO); 738 cs = &ccd_softc[unit]; 739 740 if ((error = ccdlock(cs)) != 0) 741 return (error); 742 743 part = ccdpart(dev); 744 745 /* ...that much closer to allowing unconfiguration... */ 746 cs->sc_openmask &= ~(1 << part); 747 ccdunlock(cs); 748 return (0); 749 } 750 751 static void 752 ccdstrategy(struct buf *bp) 753 { 754 int unit = ccdunit(bp->b_dev); 755 struct ccd_softc *cs = &ccd_softc[unit]; 756 int wlabel; 757 struct disklabel *lp; 758 759 #ifdef DEBUG 760 if (ccddebug & CCDB_FOLLOW) 761 printf("ccdstrategy(%x): unit %d\n", bp, unit); 762 #endif 763 if ((cs->sc_flags & CCDF_INITED) == 0) { 764 bp->b_error = ENXIO; 765 bp->b_flags |= B_ERROR; 766 goto done; 767 } 768 769 /* If it's a nil transfer, wake up the top half now. */ 770 if (bp->b_bcount == 0) 771 goto done; 772 773 lp = &cs->sc_label; 774 775 /* 776 * Do bounds checking and adjust transfer. If there's an 777 * error, the bounds check will flag that for us. 778 */ 779 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 780 if (ccdpart(bp->b_dev) != RAW_PART) { 781 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 782 goto done; 783 } else { 784 int pbn; /* in sc_secsize chunks */ 785 long sz; /* in sc_secsize chunks */ 786 787 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 788 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 789 790 /* 791 * If out of bounds return an error. If at the EOF point, 792 * simply read or write less. 793 */ 794 795 if (pbn < 0 || pbn >= cs->sc_size) { 796 bp->b_resid = bp->b_bcount; 797 if (pbn != cs->sc_size) { 798 bp->b_error = EINVAL; 799 bp->b_flags |= B_ERROR | B_INVAL; 800 } 801 goto done; 802 } 803 804 /* 805 * If the request crosses EOF, truncate the request. 806 */ 807 if (pbn + sz > cs->sc_size) { 808 bp->b_bcount = (cs->sc_size - pbn) * 809 cs->sc_geom.ccg_secsize; 810 } 811 } 812 813 bp->b_resid = bp->b_bcount; 814 815 /* 816 * "Start" the unit. 817 */ 818 crit_enter(); 819 ccdstart(cs, bp); 820 crit_exit(); 821 return; 822 done: 823 biodone(bp); 824 } 825 826 static void 827 ccdstart(struct ccd_softc *cs, struct buf *bp) 828 { 829 long bcount, rcount; 830 struct ccdbuf *cbp[4]; 831 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 832 caddr_t addr; 833 daddr_t bn; 834 struct partition *pp; 835 836 #ifdef DEBUG 837 if (ccddebug & CCDB_FOLLOW) 838 printf("ccdstart(%x, %x)\n", cs, bp); 839 #endif 840 841 /* Record the transaction start */ 842 devstat_start_transaction(&cs->device_stats); 843 844 /* 845 * Translate the partition-relative block number to an absolute. 846 */ 847 bn = bp->b_blkno; 848 if (ccdpart(bp->b_dev) != RAW_PART) { 849 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)]; 850 bn += pp->p_offset; 851 } 852 853 /* 854 * Allocate component buffers and fire off the requests 855 */ 856 addr = bp->b_data; 857 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 858 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 859 rcount = cbp[0]->cb_buf.b_bcount; 860 861 if (cs->sc_cflags & CCDF_MIRROR) { 862 /* 863 * Mirroring. Writes go to both disks, reads are 864 * taken from whichever disk seems most appropriate. 865 * 866 * We attempt to localize reads to the disk whos arm 867 * is nearest the read request. We ignore seeks due 868 * to writes when making this determination and we 869 * also try to avoid hogging. 870 */ 871 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 872 cbp[0]->cb_buf.b_vp->v_numoutput++; 873 cbp[1]->cb_buf.b_vp->v_numoutput++; 874 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, 875 &cbp[0]->cb_buf); 876 VOP_STRATEGY(cbp[1]->cb_buf.b_vp, 877 &cbp[1]->cb_buf); 878 } else { 879 int pick = cs->sc_pick; 880 daddr_t range = cs->sc_size / 16; 881 882 if (bn < cs->sc_blk[pick] - range || 883 bn > cs->sc_blk[pick] + range 884 ) { 885 cs->sc_pick = pick = 1 - pick; 886 } 887 cs->sc_blk[pick] = bn + btodb(rcount); 888 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp, 889 &cbp[pick]->cb_buf); 890 } 891 } else { 892 /* 893 * Not mirroring 894 */ 895 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) 896 cbp[0]->cb_buf.b_vp->v_numoutput++; 897 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf); 898 } 899 bn += btodb(rcount); 900 addr += rcount; 901 } 902 } 903 904 /* 905 * Build a component buffer header. 906 */ 907 static void 908 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct buf *bp, daddr_t bn, 909 caddr_t addr, long bcount) 910 { 911 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 912 struct ccdbuf *cbp; 913 daddr_t cbn, cboff; 914 off_t cbc; 915 916 #ifdef DEBUG 917 if (ccddebug & CCDB_IO) 918 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 919 cs, bp, bn, addr, bcount); 920 #endif 921 /* 922 * Determine which component bn falls in. 923 */ 924 cbn = bn; 925 cboff = 0; 926 927 if (cs->sc_ileave == 0) { 928 /* 929 * Serially concatenated and neither a mirror nor a parity 930 * config. This is a special case. 931 */ 932 daddr_t sblk; 933 934 sblk = 0; 935 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 936 sblk += ci->ci_size; 937 cbn -= sblk; 938 } else { 939 struct ccdiinfo *ii; 940 int ccdisk, off; 941 942 /* 943 * Calculate cbn, the logical superblock (sc_ileave chunks), 944 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 945 * to cbn. 946 */ 947 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 948 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 949 950 /* 951 * Figure out which interleave table to use. 952 */ 953 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 954 if (ii->ii_startblk > cbn) 955 break; 956 } 957 ii--; 958 959 /* 960 * off is the logical superblock relative to the beginning 961 * of this interleave block. 962 */ 963 off = cbn - ii->ii_startblk; 964 965 /* 966 * We must calculate which disk component to use (ccdisk), 967 * and recalculate cbn to be the superblock relative to 968 * the beginning of the component. This is typically done by 969 * adding 'off' and ii->ii_startoff together. However, 'off' 970 * must typically be divided by the number of components in 971 * this interleave array to be properly convert it from a 972 * CCD-relative logical superblock number to a 973 * component-relative superblock number. 974 */ 975 if (ii->ii_ndisk == 1) { 976 /* 977 * When we have just one disk, it can't be a mirror 978 * or a parity config. 979 */ 980 ccdisk = ii->ii_index[0]; 981 cbn = ii->ii_startoff + off; 982 } else { 983 if (cs->sc_cflags & CCDF_MIRROR) { 984 /* 985 * We have forced a uniform mapping, resulting 986 * in a single interleave array. We double 987 * up on the first half of the available 988 * components and our mirror is in the second 989 * half. This only works with a single 990 * interleave array because doubling up 991 * doubles the number of sectors, so there 992 * cannot be another interleave array because 993 * the next interleave array's calculations 994 * would be off. 995 */ 996 int ndisk2 = ii->ii_ndisk / 2; 997 ccdisk = ii->ii_index[off % ndisk2]; 998 cbn = ii->ii_startoff + off / ndisk2; 999 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1000 } else if (cs->sc_cflags & CCDF_PARITY) { 1001 /* 1002 * XXX not implemented yet 1003 */ 1004 int ndisk2 = ii->ii_ndisk - 1; 1005 ccdisk = ii->ii_index[off % ndisk2]; 1006 cbn = ii->ii_startoff + off / ndisk2; 1007 if (cbn % ii->ii_ndisk <= ccdisk) 1008 ccdisk++; 1009 } else { 1010 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1011 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1012 } 1013 } 1014 1015 ci = &cs->sc_cinfo[ccdisk]; 1016 1017 /* 1018 * Convert cbn from a superblock to a normal block so it 1019 * can be used to calculate (along with cboff) the normal 1020 * block index into this particular disk. 1021 */ 1022 cbn *= cs->sc_ileave; 1023 } 1024 1025 /* 1026 * Fill in the component buf structure. 1027 */ 1028 cbp = getccdbuf(NULL); 1029 cbp->cb_buf.b_flags = bp->b_flags; 1030 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone; 1031 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */ 1032 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET; 1033 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET); 1034 cbp->cb_buf.b_data = addr; 1035 cbp->cb_buf.b_vp = ci->ci_vp; 1036 if (cs->sc_ileave == 0) 1037 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1038 else 1039 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1040 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1041 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1042 1043 /* 1044 * context for ccdiodone 1045 */ 1046 cbp->cb_obp = bp; 1047 cbp->cb_unit = cs - ccd_softc; 1048 cbp->cb_comp = ci - cs->sc_cinfo; 1049 1050 #ifdef DEBUG 1051 if (ccddebug & CCDB_IO) 1052 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1053 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno, 1054 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1055 #endif 1056 cb[0] = cbp; 1057 1058 /* 1059 * Note: both I/O's setup when reading from mirror, but only one 1060 * will be executed. 1061 */ 1062 if (cs->sc_cflags & CCDF_MIRROR) { 1063 /* mirror, setup second I/O */ 1064 cbp = getccdbuf(cb[0]); 1065 cbp->cb_buf.b_dev = ci2->ci_dev; 1066 cbp->cb_buf.b_vp = ci2->ci_vp; 1067 cbp->cb_comp = ci2 - cs->sc_cinfo; 1068 cb[1] = cbp; 1069 /* link together the ccdbuf's and clear "mirror done" flag */ 1070 cb[0]->cb_mirror = cb[1]; 1071 cb[1]->cb_mirror = cb[0]; 1072 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1073 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1074 } 1075 } 1076 1077 static void 1078 ccdintr(struct ccd_softc *cs, struct buf *bp) 1079 { 1080 #ifdef DEBUG 1081 if (ccddebug & CCDB_FOLLOW) 1082 printf("ccdintr(%x, %x)\n", cs, bp); 1083 #endif 1084 /* 1085 * Request is done for better or worse, wakeup the top half. 1086 */ 1087 if (bp->b_flags & B_ERROR) 1088 bp->b_resid = bp->b_bcount; 1089 devstat_end_transaction_buf(&cs->device_stats, bp); 1090 biodone(bp); 1091 } 1092 1093 /* 1094 * Called at interrupt time. 1095 * Mark the component as done and if all components are done, 1096 * take a ccd interrupt. 1097 */ 1098 static void 1099 ccdiodone(struct ccdbuf *cbp) 1100 { 1101 struct buf *bp = cbp->cb_obp; 1102 int unit = cbp->cb_unit; 1103 int count; 1104 1105 crit_enter(); 1106 #ifdef DEBUG 1107 if (ccddebug & CCDB_FOLLOW) 1108 printf("ccdiodone(%x)\n", cbp); 1109 if (ccddebug & CCDB_IO) { 1110 printf("ccdiodone: bp %x bcount %d resid %d\n", 1111 bp, bp->b_bcount, bp->b_resid); 1112 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1113 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1114 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 1115 cbp->cb_buf.b_bcount); 1116 } 1117 #endif 1118 /* 1119 * If an error occured, report it. If this is a mirrored 1120 * configuration and the first of two possible reads, do not 1121 * set the error in the bp yet because the second read may 1122 * succeed. 1123 */ 1124 1125 if (cbp->cb_buf.b_flags & B_ERROR) { 1126 const char *msg = ""; 1127 1128 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1129 (cbp->cb_buf.b_flags & B_READ) && 1130 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1131 /* 1132 * We will try our read on the other disk down 1133 * below, also reverse the default pick so if we 1134 * are doing a scan we do not keep hitting the 1135 * bad disk first. 1136 */ 1137 struct ccd_softc *cs = &ccd_softc[unit]; 1138 1139 msg = ", trying other disk"; 1140 cs->sc_pick = 1 - cs->sc_pick; 1141 cs->sc_blk[cs->sc_pick] = bp->b_blkno; 1142 } else { 1143 bp->b_flags |= B_ERROR; 1144 bp->b_error = cbp->cb_buf.b_error ? 1145 cbp->cb_buf.b_error : EIO; 1146 } 1147 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1148 unit, bp->b_error, cbp->cb_comp, 1149 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg); 1150 } 1151 1152 /* 1153 * Process mirror. If we are writing, I/O has been initiated on both 1154 * buffers and we fall through only after both are finished. 1155 * 1156 * If we are reading only one I/O is initiated at a time. If an 1157 * error occurs we initiate the second I/O and return, otherwise 1158 * we free the second I/O without initiating it. 1159 */ 1160 1161 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1162 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1163 /* 1164 * When writing, handshake with the second buffer 1165 * to determine when both are done. If both are not 1166 * done, return here. 1167 */ 1168 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1169 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1170 putccdbuf(cbp); 1171 crit_exit(); 1172 return; 1173 } 1174 } else { 1175 /* 1176 * When reading, either dispose of the second buffer 1177 * or initiate I/O on the second buffer if an error 1178 * occured with this one. 1179 */ 1180 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1181 if (cbp->cb_buf.b_flags & B_ERROR) { 1182 cbp->cb_mirror->cb_pflags |= 1183 CCDPF_MIRROR_DONE; 1184 VOP_STRATEGY( 1185 cbp->cb_mirror->cb_buf.b_vp, 1186 &cbp->cb_mirror->cb_buf 1187 ); 1188 putccdbuf(cbp); 1189 crit_exit(); 1190 return; 1191 } else { 1192 putccdbuf(cbp->cb_mirror); 1193 /* fall through */ 1194 } 1195 } 1196 } 1197 } 1198 1199 /* 1200 * use b_bufsize to determine how big the original request was rather 1201 * then b_bcount, because b_bcount may have been truncated for EOF. 1202 * 1203 * XXX We check for an error, but we do not test the resid for an 1204 * aligned EOF condition. This may result in character & block 1205 * device access not recognizing EOF properly when read or written 1206 * sequentially, but will not effect filesystems. 1207 */ 1208 count = cbp->cb_buf.b_bufsize; 1209 putccdbuf(cbp); 1210 1211 /* 1212 * If all done, "interrupt". 1213 */ 1214 bp->b_resid -= count; 1215 if (bp->b_resid < 0) 1216 panic("ccdiodone: count"); 1217 if (bp->b_resid == 0) 1218 ccdintr(&ccd_softc[unit], bp); 1219 crit_exit(); 1220 } 1221 1222 static int 1223 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1224 { 1225 int unit = ccdunit(dev); 1226 int i, j, lookedup = 0, error = 0; 1227 int part, pmask; 1228 struct ccd_softc *cs; 1229 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1230 struct ccddevice ccd; 1231 char **cpp; 1232 struct vnode **vpp; 1233 struct ucred *cred; 1234 1235 KKASSERT(td->td_proc != NULL); 1236 cred = td->td_proc->p_ucred; 1237 1238 if (unit >= numccd) 1239 return (ENXIO); 1240 cs = &ccd_softc[unit]; 1241 1242 bzero(&ccd, sizeof(ccd)); 1243 1244 switch (cmd) { 1245 case CCDIOCSET: 1246 if (cs->sc_flags & CCDF_INITED) 1247 return (EBUSY); 1248 1249 if ((flag & FWRITE) == 0) 1250 return (EBADF); 1251 1252 if ((error = ccdlock(cs)) != 0) 1253 return (error); 1254 1255 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1256 return (EINVAL); 1257 1258 /* Fill in some important bits. */ 1259 ccd.ccd_unit = unit; 1260 ccd.ccd_interleave = ccio->ccio_ileave; 1261 if (ccd.ccd_interleave == 0 && 1262 ((ccio->ccio_flags & CCDF_MIRROR) || 1263 (ccio->ccio_flags & CCDF_PARITY))) { 1264 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1265 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1266 } 1267 if ((ccio->ccio_flags & CCDF_MIRROR) && 1268 (ccio->ccio_flags & CCDF_PARITY)) { 1269 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1270 ccio->ccio_flags &= ~CCDF_PARITY; 1271 } 1272 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1273 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1274 printf("ccd%d: mirror/parity forces uniform flag\n", 1275 unit); 1276 ccio->ccio_flags |= CCDF_UNIFORM; 1277 } 1278 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1279 1280 /* 1281 * Allocate space for and copy in the array of 1282 * componet pathnames and device numbers. 1283 */ 1284 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1285 M_DEVBUF, M_WAITOK); 1286 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1287 M_DEVBUF, M_WAITOK); 1288 1289 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1290 ccio->ccio_ndisks * sizeof(char **)); 1291 if (error) { 1292 free(vpp, M_DEVBUF); 1293 free(cpp, M_DEVBUF); 1294 ccdunlock(cs); 1295 return (error); 1296 } 1297 1298 #ifdef DEBUG 1299 if (ccddebug & CCDB_INIT) 1300 for (i = 0; i < ccio->ccio_ndisks; ++i) 1301 printf("ccdioctl: component %d: 0x%x\n", 1302 i, cpp[i]); 1303 #endif 1304 1305 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1306 #ifdef DEBUG 1307 if (ccddebug & CCDB_INIT) 1308 printf("ccdioctl: lookedup = %d\n", lookedup); 1309 #endif 1310 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1311 for (j = 0; j < lookedup; ++j) 1312 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1313 free(vpp, M_DEVBUF); 1314 free(cpp, M_DEVBUF); 1315 ccdunlock(cs); 1316 return (error); 1317 } 1318 ++lookedup; 1319 } 1320 ccd.ccd_cpp = cpp; 1321 ccd.ccd_vpp = vpp; 1322 ccd.ccd_ndev = ccio->ccio_ndisks; 1323 1324 /* 1325 * Initialize the ccd. Fills in the softc for us. 1326 */ 1327 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1328 for (j = 0; j < lookedup; ++j) 1329 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1330 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1331 free(vpp, M_DEVBUF); 1332 free(cpp, M_DEVBUF); 1333 ccdunlock(cs); 1334 return (error); 1335 } 1336 1337 /* 1338 * The ccd has been successfully initialized, so 1339 * we can place it into the array and read the disklabel. 1340 */ 1341 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1342 ccio->ccio_unit = unit; 1343 ccio->ccio_size = cs->sc_size; 1344 ccdgetdisklabel(dev); 1345 1346 ccdunlock(cs); 1347 1348 break; 1349 1350 case CCDIOCCLR: 1351 if ((cs->sc_flags & CCDF_INITED) == 0) 1352 return (ENXIO); 1353 1354 if ((flag & FWRITE) == 0) 1355 return (EBADF); 1356 1357 if ((error = ccdlock(cs)) != 0) 1358 return (error); 1359 1360 /* Don't unconfigure if any other partitions are open */ 1361 part = ccdpart(dev); 1362 pmask = (1 << part); 1363 if ((cs->sc_openmask & ~pmask)) { 1364 ccdunlock(cs); 1365 return (EBUSY); 1366 } 1367 1368 /* 1369 * Free ccd_softc information and clear entry. 1370 */ 1371 1372 /* Close the components and free their pathnames. */ 1373 for (i = 0; i < cs->sc_nccdisks; ++i) { 1374 /* 1375 * XXX: this close could potentially fail and 1376 * cause Bad Things. Maybe we need to force 1377 * the close to happen? 1378 */ 1379 #ifdef DEBUG 1380 if (ccddebug & CCDB_VNODE) 1381 vprint("CCDIOCCLR: vnode info", 1382 cs->sc_cinfo[i].ci_vp); 1383 #endif 1384 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td); 1385 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1386 } 1387 1388 /* Free interleave index. */ 1389 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1390 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1391 1392 /* Free component info and interleave table. */ 1393 free(cs->sc_cinfo, M_DEVBUF); 1394 free(cs->sc_itable, M_DEVBUF); 1395 cs->sc_flags &= ~CCDF_INITED; 1396 1397 /* 1398 * Free ccddevice information and clear entry. 1399 */ 1400 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1401 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1402 ccd.ccd_dk = -1; 1403 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1404 1405 /* 1406 * And remove the devstat entry. 1407 */ 1408 devstat_remove_entry(&cs->device_stats); 1409 1410 /* This must be atomic. */ 1411 crit_enter(); 1412 ccdunlock(cs); 1413 bzero(cs, sizeof(struct ccd_softc)); 1414 crit_exit(); 1415 1416 break; 1417 1418 case DIOCGDINFO: 1419 if ((cs->sc_flags & CCDF_INITED) == 0) 1420 return (ENXIO); 1421 1422 *(struct disklabel *)data = cs->sc_label; 1423 break; 1424 1425 case DIOCGPART: 1426 if ((cs->sc_flags & CCDF_INITED) == 0) 1427 return (ENXIO); 1428 1429 ((struct partinfo *)data)->disklab = &cs->sc_label; 1430 ((struct partinfo *)data)->part = 1431 &cs->sc_label.d_partitions[ccdpart(dev)]; 1432 break; 1433 1434 case DIOCWDINFO: 1435 case DIOCSDINFO: 1436 if ((cs->sc_flags & CCDF_INITED) == 0) 1437 return (ENXIO); 1438 1439 if ((flag & FWRITE) == 0) 1440 return (EBADF); 1441 1442 if ((error = ccdlock(cs)) != 0) 1443 return (error); 1444 1445 cs->sc_flags |= CCDF_LABELLING; 1446 1447 error = setdisklabel(&cs->sc_label, 1448 (struct disklabel *)data, 0); 1449 if (error == 0) { 1450 if (cmd == DIOCWDINFO) { 1451 dev_t cdev = CCDLABELDEV(dev); 1452 error = writedisklabel(cdev, &cs->sc_label); 1453 } 1454 } 1455 1456 cs->sc_flags &= ~CCDF_LABELLING; 1457 1458 ccdunlock(cs); 1459 1460 if (error) 1461 return (error); 1462 break; 1463 1464 case DIOCWLABEL: 1465 if ((cs->sc_flags & CCDF_INITED) == 0) 1466 return (ENXIO); 1467 1468 if ((flag & FWRITE) == 0) 1469 return (EBADF); 1470 if (*(int *)data != 0) 1471 cs->sc_flags |= CCDF_WLABEL; 1472 else 1473 cs->sc_flags &= ~CCDF_WLABEL; 1474 break; 1475 1476 default: 1477 return (ENOTTY); 1478 } 1479 1480 return (0); 1481 } 1482 1483 static int 1484 ccdsize(dev_t dev) 1485 { 1486 struct ccd_softc *cs; 1487 int part, size; 1488 1489 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1490 return (-1); 1491 1492 cs = &ccd_softc[ccdunit(dev)]; 1493 part = ccdpart(dev); 1494 1495 if ((cs->sc_flags & CCDF_INITED) == 0) 1496 return (-1); 1497 1498 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1499 size = -1; 1500 else 1501 size = cs->sc_label.d_partitions[part].p_size; 1502 1503 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1504 return (-1); 1505 1506 return (size); 1507 } 1508 1509 static int 1510 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize) 1511 { 1512 /* Not implemented. */ 1513 return ENXIO; 1514 } 1515 1516 /* 1517 * Lookup the provided name in the filesystem. If the file exists, 1518 * is a valid block device, and isn't being used by anyone else, 1519 * set *vpp to the file's vnode. 1520 */ 1521 static int 1522 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1523 { 1524 struct nlookupdata nd; 1525 struct ucred *cred; 1526 struct vnode *vp; 1527 int error; 1528 1529 KKASSERT(td->td_proc); 1530 cred = td->td_proc->p_ucred; 1531 *vpp = NULL; 1532 1533 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1534 if (error) 1535 return (error); 1536 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1537 #ifdef DEBUG 1538 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1539 printf("ccdlookup: vn_open error = %d\n", error); 1540 #endif 1541 goto done; 1542 } 1543 vp = nd.nl_open_vp; 1544 1545 if (vp->v_usecount > 1) { 1546 error = EBUSY; 1547 goto done; 1548 } 1549 1550 if (!vn_isdisk(vp, &error)) 1551 goto done; 1552 1553 #ifdef DEBUG 1554 if (ccddebug & CCDB_VNODE) 1555 vprint("ccdlookup: vnode info", vp); 1556 #endif 1557 1558 VOP_UNLOCK(vp, 0, td); 1559 nd.nl_open_vp = NULL; 1560 nlookup_done(&nd); 1561 *vpp = vp; /* leave ref intact */ 1562 return (0); 1563 done: 1564 nlookup_done(&nd); 1565 return (error); 1566 } 1567 1568 /* 1569 * Read the disklabel from the ccd. If one is not present, fake one 1570 * up. 1571 */ 1572 static void 1573 ccdgetdisklabel(dev_t dev) 1574 { 1575 int unit = ccdunit(dev); 1576 struct ccd_softc *cs = &ccd_softc[unit]; 1577 char *errstring; 1578 struct disklabel *lp = &cs->sc_label; 1579 struct ccdgeom *ccg = &cs->sc_geom; 1580 dev_t cdev; 1581 1582 bzero(lp, sizeof(*lp)); 1583 1584 lp->d_secperunit = cs->sc_size; 1585 lp->d_secsize = ccg->ccg_secsize; 1586 lp->d_nsectors = ccg->ccg_nsectors; 1587 lp->d_ntracks = ccg->ccg_ntracks; 1588 lp->d_ncylinders = ccg->ccg_ncylinders; 1589 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1590 1591 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1592 lp->d_type = DTYPE_CCD; 1593 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1594 lp->d_rpm = 3600; 1595 lp->d_interleave = 1; 1596 lp->d_flags = 0; 1597 1598 lp->d_partitions[RAW_PART].p_offset = 0; 1599 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1600 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1601 lp->d_npartitions = RAW_PART + 1; 1602 1603 lp->d_bbsize = BBSIZE; /* XXX */ 1604 lp->d_sbsize = SBSIZE; /* XXX */ 1605 1606 lp->d_magic = DISKMAGIC; 1607 lp->d_magic2 = DISKMAGIC; 1608 lp->d_checksum = dkcksum(&cs->sc_label); 1609 1610 /* 1611 * Call the generic disklabel extraction routine. 1612 */ 1613 cdev = CCDLABELDEV(dev); 1614 errstring = readdisklabel(cdev, &cs->sc_label); 1615 if (errstring != NULL) 1616 ccdmakedisklabel(cs); 1617 1618 #ifdef DEBUG 1619 /* It's actually extremely common to have unlabeled ccds. */ 1620 if (ccddebug & CCDB_LABEL) 1621 if (errstring != NULL) 1622 printf("ccd%d: %s\n", unit, errstring); 1623 #endif 1624 } 1625 1626 /* 1627 * Take care of things one might want to take care of in the event 1628 * that a disklabel isn't present. 1629 */ 1630 static void 1631 ccdmakedisklabel(struct ccd_softc *cs) 1632 { 1633 struct disklabel *lp = &cs->sc_label; 1634 1635 /* 1636 * For historical reasons, if there's no disklabel present 1637 * the raw partition must be marked FS_BSDFFS. 1638 */ 1639 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1640 1641 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1642 } 1643 1644 /* 1645 * Wait interruptibly for an exclusive lock. 1646 * 1647 * XXX 1648 * Several drivers do this; it should be abstracted and made MP-safe. 1649 */ 1650 static int 1651 ccdlock(struct ccd_softc *cs) 1652 { 1653 int error; 1654 1655 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1656 cs->sc_flags |= CCDF_WANTED; 1657 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1658 return (error); 1659 } 1660 cs->sc_flags |= CCDF_LOCKED; 1661 return (0); 1662 } 1663 1664 /* 1665 * Unlock and wake up any waiters. 1666 */ 1667 static void 1668 ccdunlock(struct ccd_softc *cs) 1669 { 1670 1671 cs->sc_flags &= ~CCDF_LOCKED; 1672 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1673 cs->sc_flags &= ~CCDF_WANTED; 1674 wakeup(cs); 1675 } 1676 } 1677 1678 #ifdef DEBUG 1679 static void 1680 printiinfo(struct ccdiinfo *ii) 1681 { 1682 int ix, i; 1683 1684 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1685 printf(" itab[%d]: #dk %d sblk %d soff %d", 1686 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1687 for (i = 0; i < ii->ii_ndisk; i++) 1688 printf(" %d", ii->ii_index[i]); 1689 printf("\n"); 1690 } 1691 } 1692 #endif 1693 1694 1695 /* Local Variables: */ 1696 /* c-argdecl-indent: 8 */ 1697 /* c-continued-statement-offset: 8 */ 1698 /* c-indent-level: 8 */ 1699 /* End: */ 1700