1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.22 2006/02/17 19:17:55 dillon Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <vfs/ufs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/buf2.h> 110 111 #include <sys/ccdvar.h> 112 113 #include <sys/thread2.h> 114 115 #include <vm/vm_zone.h> 116 117 #if defined(CCDDEBUG) && !defined(DEBUG) 118 #define DEBUG 119 #endif 120 121 #ifdef DEBUG 122 #define CCDB_FOLLOW 0x01 123 #define CCDB_INIT 0x02 124 #define CCDB_IO 0x04 125 #define CCDB_LABEL 0x08 126 #define CCDB_VNODE 0x10 127 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 128 CCDB_VNODE; 129 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 130 #undef DEBUG 131 #endif 132 133 #define ccdunit(x) dkunit(x) 134 #define ccdpart(x) dkpart(x) 135 136 /* 137 This is how mirroring works (only writes are special): 138 139 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 140 linked together by the cb_mirror field. "cb_pflags & 141 CCDPF_MIRROR_DONE" is set to 0 on both of them. 142 143 When a component returns to ccdiodone(), it checks if "cb_pflags & 144 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 145 flag and returns. If it is, it means its partner has already 146 returned, so it will go to the regular cleanup. 147 148 */ 149 150 struct ccdbuf { 151 struct buf cb_buf; /* new I/O buf */ 152 struct bio *cb_obio; /* ptr. to original I/O buf */ 153 struct ccdbuf *cb_freenext; /* free list link */ 154 int cb_unit; /* target unit */ 155 int cb_comp; /* target component */ 156 int cb_pflags; /* mirror/parity status flag */ 157 struct ccdbuf *cb_mirror; /* mirror counterpart */ 158 }; 159 160 /* bits in cb_pflags */ 161 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 162 163 #define CCDLABELDEV(dev) \ 164 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 165 166 static d_open_t ccdopen; 167 static d_close_t ccdclose; 168 static d_strategy_t ccdstrategy; 169 static d_ioctl_t ccdioctl; 170 static d_dump_t ccddump; 171 static d_psize_t ccdsize; 172 173 #define NCCDFREEHIWAT 16 174 175 #define CDEV_MAJOR 74 176 177 static struct cdevsw ccd_cdevsw = { 178 /* name */ "ccd", 179 /* maj */ CDEV_MAJOR, 180 /* flags */ D_DISK, 181 /* port */ NULL, 182 /* clone */ NULL, 183 184 /* open */ ccdopen, 185 /* close */ ccdclose, 186 /* read */ physread, 187 /* write */ physwrite, 188 /* ioctl */ ccdioctl, 189 /* poll */ nopoll, 190 /* mmap */ nommap, 191 /* strategy */ ccdstrategy, 192 /* dump */ ccddump, 193 /* psize */ ccdsize 194 }; 195 196 /* called during module initialization */ 197 static void ccdattach (void); 198 static int ccd_modevent (module_t, int, void *); 199 200 /* called by biodone() at interrupt time */ 201 static void ccdiodone (struct bio *bio); 202 203 static void ccdstart (struct ccd_softc *, struct bio *); 204 static void ccdinterleave (struct ccd_softc *, int); 205 static void ccdintr (struct ccd_softc *, struct bio *); 206 static int ccdinit (struct ccddevice *, char **, struct thread *); 207 static int ccdlookup (char *, struct thread *td, struct vnode **); 208 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 209 struct bio *, daddr_t, caddr_t, long); 210 static void ccdgetdisklabel (dev_t); 211 static void ccdmakedisklabel (struct ccd_softc *); 212 static int ccdlock (struct ccd_softc *); 213 static void ccdunlock (struct ccd_softc *); 214 215 #ifdef DEBUG 216 static void printiinfo (struct ccdiinfo *); 217 #endif 218 219 /* Non-private for the benefit of libkvm. */ 220 struct ccd_softc *ccd_softc; 221 struct ccddevice *ccddevs; 222 struct ccdbuf *ccdfreebufs; 223 static int numccdfreebufs; 224 static int numccd = 0; 225 226 /* 227 * getccdbuf() - Allocate and zero a ccd buffer. 228 * 229 * This routine is called at splbio(). 230 */ 231 232 static __inline 233 struct ccdbuf * 234 getccdbuf(struct ccdbuf *cpy) 235 { 236 struct ccdbuf *cbp; 237 238 /* 239 * Allocate from freelist or malloc as necessary 240 */ 241 if ((cbp = ccdfreebufs) != NULL) { 242 ccdfreebufs = cbp->cb_freenext; 243 --numccdfreebufs; 244 reinitbufbio(&cbp->cb_buf); 245 } else { 246 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 247 initbufbio(&cbp->cb_buf); 248 } 249 250 /* 251 * Used by mirroring code 252 */ 253 if (cpy) 254 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 255 else 256 bzero(cbp, sizeof(struct ccdbuf)); 257 258 /* 259 * independant struct buf initialization 260 */ 261 LIST_INIT(&cbp->cb_buf.b_dep); 262 BUF_LOCKINIT(&cbp->cb_buf); 263 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 264 BUF_KERNPROC(&cbp->cb_buf); 265 266 return(cbp); 267 } 268 269 /* 270 * putccdbuf() - Free a ccd buffer. 271 * 272 * This routine is called at splbio(). 273 */ 274 275 static __inline 276 void 277 putccdbuf(struct ccdbuf *cbp) 278 { 279 BUF_UNLOCK(&cbp->cb_buf); 280 BUF_LOCKFREE(&cbp->cb_buf); 281 282 if (numccdfreebufs < NCCDFREEHIWAT) { 283 cbp->cb_freenext = ccdfreebufs; 284 ccdfreebufs = cbp; 285 ++numccdfreebufs; 286 } else { 287 free((caddr_t)cbp, M_DEVBUF); 288 } 289 } 290 291 292 /* 293 * Number of blocks to untouched in front of a component partition. 294 * This is to avoid violating its disklabel area when it starts at the 295 * beginning of the slice. 296 */ 297 #if !defined(CCD_OFFSET) 298 #define CCD_OFFSET 16 299 #endif 300 301 /* 302 * Called by main() during pseudo-device attachment. All we need 303 * to do is allocate enough space for devices to be configured later, and 304 * add devsw entries. 305 */ 306 static void 307 ccdattach(void) 308 { 309 int i; 310 int num = NCCD; 311 312 if (num > 1) 313 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 314 else 315 printf("ccd0: Concatenated disk driver\n"); 316 317 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF, 318 M_WAITOK | M_ZERO); 319 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF, 320 M_WAITOK | M_ZERO); 321 numccd = num; 322 323 cdevsw_add(&ccd_cdevsw, 0, 0); 324 /* XXX: is this necessary? */ 325 for (i = 0; i < numccd; ++i) 326 ccddevs[i].ccd_dk = -1; 327 } 328 329 static int 330 ccd_modevent(module_t mod, int type, void *data) 331 { 332 int error = 0; 333 334 switch (type) { 335 case MOD_LOAD: 336 ccdattach(); 337 break; 338 339 case MOD_UNLOAD: 340 printf("ccd0: Unload not supported!\n"); 341 error = EOPNOTSUPP; 342 break; 343 344 default: /* MOD_SHUTDOWN etc */ 345 break; 346 } 347 return (error); 348 } 349 350 DEV_MODULE(ccd, ccd_modevent, NULL); 351 352 static int 353 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 354 { 355 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 356 struct ccdcinfo *ci = NULL; /* XXX */ 357 size_t size; 358 int ix; 359 struct vnode *vp; 360 size_t minsize; 361 int maxsecsize; 362 struct partinfo dpart; 363 struct ccdgeom *ccg = &cs->sc_geom; 364 char tmppath[MAXPATHLEN]; 365 int error = 0; 366 struct ucred *cred; 367 368 KKASSERT(td->td_proc); 369 cred = td->td_proc->p_ucred; 370 371 #ifdef DEBUG 372 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 373 printf("ccdinit: unit %d\n", ccd->ccd_unit); 374 #endif 375 376 cs->sc_size = 0; 377 cs->sc_ileave = ccd->ccd_interleave; 378 cs->sc_nccdisks = ccd->ccd_ndev; 379 380 /* Allocate space for the component info. */ 381 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 382 M_DEVBUF, M_WAITOK); 383 384 /* 385 * Verify that each component piece exists and record 386 * relevant information about it. 387 */ 388 maxsecsize = 0; 389 minsize = 0; 390 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 391 vp = ccd->ccd_vpp[ix]; 392 ci = &cs->sc_cinfo[ix]; 393 ci->ci_vp = vp; 394 395 /* 396 * Copy in the pathname of the component. 397 */ 398 bzero(tmppath, sizeof(tmppath)); /* sanity */ 399 if ((error = copyinstr(cpaths[ix], tmppath, 400 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 401 #ifdef DEBUG 402 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 403 printf("ccd%d: can't copy path, error = %d\n", 404 ccd->ccd_unit, error); 405 #endif 406 goto fail; 407 } 408 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 409 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 410 411 ci->ci_dev = vn_todev(vp); 412 413 /* 414 * Get partition information for the component. 415 */ 416 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 417 FREAD, cred, td)) != 0) { 418 #ifdef DEBUG 419 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 420 printf("ccd%d: %s: ioctl failed, error = %d\n", 421 ccd->ccd_unit, ci->ci_path, error); 422 #endif 423 goto fail; 424 } 425 if (dpart.part->p_fstype == FS_BSDFFS) { 426 maxsecsize = 427 ((dpart.disklab->d_secsize > maxsecsize) ? 428 dpart.disklab->d_secsize : maxsecsize); 429 size = dpart.part->p_size - CCD_OFFSET; 430 } else { 431 #ifdef DEBUG 432 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 433 printf("ccd%d: %s: incorrect partition type\n", 434 ccd->ccd_unit, ci->ci_path); 435 #endif 436 error = EFTYPE; 437 goto fail; 438 } 439 440 /* 441 * Calculate the size, truncating to an interleave 442 * boundary if necessary. 443 */ 444 445 if (cs->sc_ileave > 1) 446 size -= size % cs->sc_ileave; 447 448 if (size == 0) { 449 #ifdef DEBUG 450 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 451 printf("ccd%d: %s: size == 0\n", 452 ccd->ccd_unit, ci->ci_path); 453 #endif 454 error = ENODEV; 455 goto fail; 456 } 457 458 if (minsize == 0 || size < minsize) 459 minsize = size; 460 ci->ci_size = size; 461 cs->sc_size += size; 462 } 463 464 /* 465 * Don't allow the interleave to be smaller than 466 * the biggest component sector. 467 */ 468 if ((cs->sc_ileave > 0) && 469 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 470 #ifdef DEBUG 471 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 472 printf("ccd%d: interleave must be at least %d\n", 473 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 474 #endif 475 error = EINVAL; 476 goto fail; 477 } 478 479 /* 480 * If uniform interleave is desired set all sizes to that of 481 * the smallest component. This will guarentee that a single 482 * interleave table is generated. 483 * 484 * Lost space must be taken into account when calculating the 485 * overall size. Half the space is lost when CCDF_MIRROR is 486 * specified. One disk is lost when CCDF_PARITY is specified. 487 */ 488 if (ccd->ccd_flags & CCDF_UNIFORM) { 489 for (ci = cs->sc_cinfo; 490 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 491 ci->ci_size = minsize; 492 } 493 if (ccd->ccd_flags & CCDF_MIRROR) { 494 /* 495 * Check to see if an even number of components 496 * have been specified. The interleave must also 497 * be non-zero in order for us to be able to 498 * guarentee the topology. 499 */ 500 if (cs->sc_nccdisks % 2) { 501 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 502 error = EINVAL; 503 goto fail; 504 } 505 if (cs->sc_ileave == 0) { 506 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 507 error = EINVAL; 508 goto fail; 509 } 510 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 511 } else if (ccd->ccd_flags & CCDF_PARITY) { 512 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 513 } else { 514 if (cs->sc_ileave == 0) { 515 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 516 error = EINVAL; 517 goto fail; 518 } 519 cs->sc_size = cs->sc_nccdisks * minsize; 520 } 521 } 522 523 /* 524 * Construct the interleave table. 525 */ 526 ccdinterleave(cs, ccd->ccd_unit); 527 528 /* 529 * Create pseudo-geometry based on 1MB cylinders. It's 530 * pretty close. 531 */ 532 ccg->ccg_secsize = maxsecsize; 533 ccg->ccg_ntracks = 1; 534 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 535 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 536 537 /* 538 * Add an devstat entry for this device. 539 */ 540 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 541 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 542 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 543 DEVSTAT_PRIORITY_ARRAY); 544 545 cs->sc_flags |= CCDF_INITED; 546 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 547 cs->sc_unit = ccd->ccd_unit; 548 return (0); 549 fail: 550 while (ci > cs->sc_cinfo) { 551 ci--; 552 free(ci->ci_path, M_DEVBUF); 553 } 554 free(cs->sc_cinfo, M_DEVBUF); 555 return (error); 556 } 557 558 static void 559 ccdinterleave(struct ccd_softc *cs, int unit) 560 { 561 struct ccdcinfo *ci, *smallci; 562 struct ccdiinfo *ii; 563 daddr_t bn, lbn; 564 int ix; 565 u_long size; 566 567 #ifdef DEBUG 568 if (ccddebug & CCDB_INIT) 569 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 570 #endif 571 572 /* 573 * Allocate an interleave table. The worst case occurs when each 574 * of N disks is of a different size, resulting in N interleave 575 * tables. 576 * 577 * Chances are this is too big, but we don't care. 578 */ 579 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 580 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 581 bzero((caddr_t)cs->sc_itable, size); 582 583 /* 584 * Trivial case: no interleave (actually interleave of disk size). 585 * Each table entry represents a single component in its entirety. 586 * 587 * An interleave of 0 may not be used with a mirror or parity setup. 588 */ 589 if (cs->sc_ileave == 0) { 590 bn = 0; 591 ii = cs->sc_itable; 592 593 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 594 /* Allocate space for ii_index. */ 595 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 596 ii->ii_ndisk = 1; 597 ii->ii_startblk = bn; 598 ii->ii_startoff = 0; 599 ii->ii_index[0] = ix; 600 bn += cs->sc_cinfo[ix].ci_size; 601 ii++; 602 } 603 ii->ii_ndisk = 0; 604 #ifdef DEBUG 605 if (ccddebug & CCDB_INIT) 606 printiinfo(cs->sc_itable); 607 #endif 608 return; 609 } 610 611 /* 612 * The following isn't fast or pretty; it doesn't have to be. 613 */ 614 size = 0; 615 bn = lbn = 0; 616 for (ii = cs->sc_itable; ; ii++) { 617 /* 618 * Allocate space for ii_index. We might allocate more then 619 * we use. 620 */ 621 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 622 M_DEVBUF, M_WAITOK); 623 624 /* 625 * Locate the smallest of the remaining components 626 */ 627 smallci = NULL; 628 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 629 ci++) { 630 if (ci->ci_size > size && 631 (smallci == NULL || 632 ci->ci_size < smallci->ci_size)) { 633 smallci = ci; 634 } 635 } 636 637 /* 638 * Nobody left, all done 639 */ 640 if (smallci == NULL) { 641 ii->ii_ndisk = 0; 642 break; 643 } 644 645 /* 646 * Record starting logical block using an sc_ileave blocksize. 647 */ 648 ii->ii_startblk = bn / cs->sc_ileave; 649 650 /* 651 * Record starting comopnent block using an sc_ileave 652 * blocksize. This value is relative to the beginning of 653 * a component disk. 654 */ 655 ii->ii_startoff = lbn; 656 657 /* 658 * Determine how many disks take part in this interleave 659 * and record their indices. 660 */ 661 ix = 0; 662 for (ci = cs->sc_cinfo; 663 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 664 if (ci->ci_size >= smallci->ci_size) { 665 ii->ii_index[ix++] = ci - cs->sc_cinfo; 666 } 667 } 668 ii->ii_ndisk = ix; 669 bn += ix * (smallci->ci_size - size); 670 lbn = smallci->ci_size / cs->sc_ileave; 671 size = smallci->ci_size; 672 } 673 #ifdef DEBUG 674 if (ccddebug & CCDB_INIT) 675 printiinfo(cs->sc_itable); 676 #endif 677 } 678 679 /* ARGSUSED */ 680 static int 681 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 682 { 683 int unit = ccdunit(dev); 684 struct ccd_softc *cs; 685 struct disklabel *lp; 686 int error = 0, part, pmask; 687 688 #ifdef DEBUG 689 if (ccddebug & CCDB_FOLLOW) 690 printf("ccdopen(%x, %x)\n", dev, flags); 691 #endif 692 if (unit >= numccd) 693 return (ENXIO); 694 cs = &ccd_softc[unit]; 695 696 if ((error = ccdlock(cs)) != 0) 697 return (error); 698 699 lp = &cs->sc_label; 700 701 part = ccdpart(dev); 702 pmask = (1 << part); 703 704 /* 705 * If we're initialized, check to see if there are any other 706 * open partitions. If not, then it's safe to update 707 * the in-core disklabel. 708 */ 709 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 710 ccdgetdisklabel(dev); 711 712 /* Check that the partition exists. */ 713 if (part != RAW_PART && ((part >= lp->d_npartitions) || 714 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 715 error = ENXIO; 716 goto done; 717 } 718 719 cs->sc_openmask |= pmask; 720 done: 721 ccdunlock(cs); 722 return (0); 723 } 724 725 /* ARGSUSED */ 726 static int 727 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 728 { 729 int unit = ccdunit(dev); 730 struct ccd_softc *cs; 731 int error = 0, part; 732 733 #ifdef DEBUG 734 if (ccddebug & CCDB_FOLLOW) 735 printf("ccdclose(%x, %x)\n", dev, flags); 736 #endif 737 738 if (unit >= numccd) 739 return (ENXIO); 740 cs = &ccd_softc[unit]; 741 742 if ((error = ccdlock(cs)) != 0) 743 return (error); 744 745 part = ccdpart(dev); 746 747 /* ...that much closer to allowing unconfiguration... */ 748 cs->sc_openmask &= ~(1 << part); 749 ccdunlock(cs); 750 return (0); 751 } 752 753 static void 754 ccdstrategy(dev_t dev, struct bio *bio) 755 { 756 int unit = ccdunit(dev); 757 struct bio *nbio; 758 struct buf *bp = bio->bio_buf; 759 struct ccd_softc *cs = &ccd_softc[unit]; 760 int wlabel; 761 struct disklabel *lp; 762 763 #ifdef DEBUG 764 if (ccddebug & CCDB_FOLLOW) 765 printf("ccdstrategy(%x): unit %d\n", bp, unit); 766 #endif 767 if ((cs->sc_flags & CCDF_INITED) == 0) { 768 bp->b_error = ENXIO; 769 bp->b_flags |= B_ERROR; 770 goto done; 771 } 772 773 /* If it's a nil transfer, wake up the top half now. */ 774 if (bp->b_bcount == 0) 775 goto done; 776 777 lp = &cs->sc_label; 778 779 /* 780 * Do bounds checking and adjust transfer. If there's an 781 * error, the bounds check will flag that for us. 782 */ 783 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 784 if (ccdpart(dev) != RAW_PART) { 785 nbio = bounds_check_with_label(dev, bio, lp, wlabel); 786 if (nbio == NULL) 787 goto done; 788 } else { 789 int pbn; /* in sc_secsize chunks */ 790 long sz; /* in sc_secsize chunks */ 791 792 pbn = bio->bio_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 793 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 794 795 /* 796 * If out of bounds return an error. If at the EOF point, 797 * simply read or write less. 798 */ 799 800 if (pbn < 0 || pbn >= cs->sc_size) { 801 bp->b_resid = bp->b_bcount; 802 if (pbn != cs->sc_size) { 803 bp->b_error = EINVAL; 804 bp->b_flags |= B_ERROR | B_INVAL; 805 } 806 goto done; 807 } 808 809 /* 810 * If the request crosses EOF, truncate the request. 811 */ 812 if (pbn + sz > cs->sc_size) { 813 bp->b_bcount = (cs->sc_size - pbn) * 814 cs->sc_geom.ccg_secsize; 815 } 816 nbio = bio; 817 } 818 819 bp->b_resid = bp->b_bcount; 820 nbio->bio_driver_info = dev; 821 822 /* 823 * "Start" the unit. 824 */ 825 crit_enter(); 826 ccdstart(cs, nbio); 827 crit_exit(); 828 return; 829 830 /* 831 * note: bio, not nbio, is valid at the done label. 832 */ 833 done: 834 biodone(bio); 835 } 836 837 static void 838 ccdstart(struct ccd_softc *cs, struct bio *bio) 839 { 840 long bcount, rcount; 841 struct ccdbuf *cbp[4]; 842 struct buf *bp = bio->bio_buf; 843 dev_t dev = bio->bio_driver_info; 844 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 845 caddr_t addr; 846 daddr_t bn; 847 struct partition *pp; 848 849 #ifdef DEBUG 850 if (ccddebug & CCDB_FOLLOW) 851 printf("ccdstart(%x, %x)\n", cs, bp); 852 #endif 853 854 /* Record the transaction start */ 855 devstat_start_transaction(&cs->device_stats); 856 857 /* 858 * Translate the partition-relative block number to an absolute. 859 */ 860 bn = bio->bio_blkno; 861 if (ccdpart(dev) != RAW_PART) { 862 pp = &cs->sc_label.d_partitions[ccdpart(dev)]; 863 bn += pp->p_offset; 864 } 865 866 /* 867 * Allocate component buffers and fire off the requests 868 */ 869 addr = bp->b_data; 870 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 871 ccdbuffer(cbp, cs, bio, bn, addr, bcount); 872 rcount = cbp[0]->cb_buf.b_bcount; 873 874 if (cs->sc_cflags & CCDF_MIRROR) { 875 /* 876 * Mirroring. Writes go to both disks, reads are 877 * taken from whichever disk seems most appropriate. 878 * 879 * We attempt to localize reads to the disk whos arm 880 * is nearest the read request. We ignore seeks due 881 * to writes when making this determination and we 882 * also try to avoid hogging. 883 */ 884 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 885 vn_strategy(cbp[0]->cb_buf.b_vp, 886 &cbp[0]->cb_buf.b_bio1); 887 vn_strategy(cbp[1]->cb_buf.b_vp, 888 &cbp[1]->cb_buf.b_bio1); 889 } else { 890 int pick = cs->sc_pick; 891 daddr_t range = cs->sc_size / 16; 892 893 if (bn < cs->sc_blk[pick] - range || 894 bn > cs->sc_blk[pick] + range 895 ) { 896 cs->sc_pick = pick = 1 - pick; 897 } 898 cs->sc_blk[pick] = bn + btodb(rcount); 899 vn_strategy(cbp[pick]->cb_buf.b_vp, 900 &cbp[pick]->cb_buf.b_bio1); 901 } 902 } else { 903 /* 904 * Not mirroring 905 */ 906 vn_strategy(cbp[0]->cb_buf.b_vp, 907 &cbp[0]->cb_buf.b_bio1); 908 } 909 bn += btodb(rcount); 910 addr += rcount; 911 } 912 } 913 914 /* 915 * Build a component buffer header. 916 */ 917 static void 918 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, daddr_t bn, 919 caddr_t addr, long bcount) 920 { 921 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 922 struct ccdbuf *cbp; 923 daddr_t cbn, cboff; 924 off_t cbc; 925 926 #ifdef DEBUG 927 if (ccddebug & CCDB_IO) 928 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 929 cs, bp, bn, addr, bcount); 930 #endif 931 /* 932 * Determine which component bn falls in. 933 */ 934 cbn = bn; 935 cboff = 0; 936 937 if (cs->sc_ileave == 0) { 938 /* 939 * Serially concatenated and neither a mirror nor a parity 940 * config. This is a special case. 941 */ 942 daddr_t sblk; 943 944 sblk = 0; 945 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 946 sblk += ci->ci_size; 947 cbn -= sblk; 948 } else { 949 struct ccdiinfo *ii; 950 int ccdisk, off; 951 952 /* 953 * Calculate cbn, the logical superblock (sc_ileave chunks), 954 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 955 * to cbn. 956 */ 957 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 958 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 959 960 /* 961 * Figure out which interleave table to use. 962 */ 963 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 964 if (ii->ii_startblk > cbn) 965 break; 966 } 967 ii--; 968 969 /* 970 * off is the logical superblock relative to the beginning 971 * of this interleave block. 972 */ 973 off = cbn - ii->ii_startblk; 974 975 /* 976 * We must calculate which disk component to use (ccdisk), 977 * and recalculate cbn to be the superblock relative to 978 * the beginning of the component. This is typically done by 979 * adding 'off' and ii->ii_startoff together. However, 'off' 980 * must typically be divided by the number of components in 981 * this interleave array to be properly convert it from a 982 * CCD-relative logical superblock number to a 983 * component-relative superblock number. 984 */ 985 if (ii->ii_ndisk == 1) { 986 /* 987 * When we have just one disk, it can't be a mirror 988 * or a parity config. 989 */ 990 ccdisk = ii->ii_index[0]; 991 cbn = ii->ii_startoff + off; 992 } else { 993 if (cs->sc_cflags & CCDF_MIRROR) { 994 /* 995 * We have forced a uniform mapping, resulting 996 * in a single interleave array. We double 997 * up on the first half of the available 998 * components and our mirror is in the second 999 * half. This only works with a single 1000 * interleave array because doubling up 1001 * doubles the number of sectors, so there 1002 * cannot be another interleave array because 1003 * the next interleave array's calculations 1004 * would be off. 1005 */ 1006 int ndisk2 = ii->ii_ndisk / 2; 1007 ccdisk = ii->ii_index[off % ndisk2]; 1008 cbn = ii->ii_startoff + off / ndisk2; 1009 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1010 } else if (cs->sc_cflags & CCDF_PARITY) { 1011 /* 1012 * XXX not implemented yet 1013 */ 1014 int ndisk2 = ii->ii_ndisk - 1; 1015 ccdisk = ii->ii_index[off % ndisk2]; 1016 cbn = ii->ii_startoff + off / ndisk2; 1017 if (cbn % ii->ii_ndisk <= ccdisk) 1018 ccdisk++; 1019 } else { 1020 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1021 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1022 } 1023 } 1024 1025 ci = &cs->sc_cinfo[ccdisk]; 1026 1027 /* 1028 * Convert cbn from a superblock to a normal block so it 1029 * can be used to calculate (along with cboff) the normal 1030 * block index into this particular disk. 1031 */ 1032 cbn *= cs->sc_ileave; 1033 } 1034 1035 /* 1036 * Fill in the component buf structure. 1037 */ 1038 cbp = getccdbuf(NULL); 1039 cbp->cb_buf.b_flags = bio->bio_buf->b_flags; 1040 /*cbp->cb_buf.b_dev = ci->ci_dev; */ 1041 cbp->cb_buf.b_data = addr; 1042 cbp->cb_buf.b_vp = ci->ci_vp; 1043 if (cs->sc_ileave == 0) 1044 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1045 else 1046 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1047 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1048 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1049 1050 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1051 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1052 cbp->cb_buf.b_bio1.bio_blkno = cbn + cboff + CCD_OFFSET; 1053 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + CCD_OFFSET); 1054 1055 /* 1056 * context for ccdiodone 1057 */ 1058 cbp->cb_obio = bio; 1059 cbp->cb_unit = cs - ccd_softc; 1060 cbp->cb_comp = ci - cs->sc_cinfo; 1061 1062 #ifdef DEBUG 1063 if (ccddebug & CCDB_IO) 1064 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1065 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1066 cbp->cb_buf.b_bio1.bio_blkno, 1067 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1068 #endif 1069 cb[0] = cbp; 1070 1071 /* 1072 * Note: both I/O's setup when reading from mirror, but only one 1073 * will be executed. 1074 */ 1075 if (cs->sc_cflags & CCDF_MIRROR) { 1076 /* mirror, setup second I/O */ 1077 cbp = getccdbuf(cb[0]); 1078 /* cbp->cb_buf.b_dev = ci2->ci_dev; */ 1079 cbp->cb_buf.b_vp = ci2->ci_vp; 1080 cbp->cb_comp = ci2 - cs->sc_cinfo; 1081 cb[1] = cbp; 1082 /* link together the ccdbuf's and clear "mirror done" flag */ 1083 cb[0]->cb_mirror = cb[1]; 1084 cb[1]->cb_mirror = cb[0]; 1085 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1086 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1087 } 1088 } 1089 1090 static void 1091 ccdintr(struct ccd_softc *cs, struct bio *bio) 1092 { 1093 struct buf *bp = bio->bio_buf; 1094 1095 #ifdef DEBUG 1096 if (ccddebug & CCDB_FOLLOW) 1097 printf("ccdintr(%x, %x)\n", cs, bp); 1098 #endif 1099 /* 1100 * Request is done for better or worse, wakeup the top half. 1101 */ 1102 if (bp->b_flags & B_ERROR) 1103 bp->b_resid = bp->b_bcount; 1104 devstat_end_transaction_buf(&cs->device_stats, bp); 1105 biodone(bio); 1106 } 1107 1108 /* 1109 * Called at interrupt time. 1110 * Mark the component as done and if all components are done, 1111 * take a ccd interrupt. 1112 */ 1113 static void 1114 ccdiodone(struct bio *bio) 1115 { 1116 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1117 struct bio *obio = cbp->cb_obio; 1118 struct buf *obp = obio->bio_buf; 1119 int unit = cbp->cb_unit; 1120 int count; 1121 1122 /* 1123 * Since we do not have exclusive access to underlying devices, 1124 * we can't keep cache translations around. 1125 */ 1126 clearbiocache(bio->bio_next); 1127 1128 crit_enter(); 1129 #ifdef DEBUG 1130 if (ccddebug & CCDB_FOLLOW) 1131 printf("ccdiodone(%x)\n", cbp); 1132 if (ccddebug & CCDB_IO) { 1133 printf("ccdiodone: bp %x bcount %d resid %d\n", 1134 obp, obp->b_bcount, obp->b_resid); 1135 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1136 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1137 cbp->cb_buf.b_lblkno, cbp->cb_buf.b_data, 1138 cbp->cb_buf.b_bcount); 1139 } 1140 #endif 1141 /* 1142 * If an error occured, report it. If this is a mirrored 1143 * configuration and the first of two possible reads, do not 1144 * set the error in the bp yet because the second read may 1145 * succeed. 1146 */ 1147 if (cbp->cb_buf.b_flags & B_ERROR) { 1148 const char *msg = ""; 1149 1150 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1151 (cbp->cb_buf.b_flags & B_READ) && 1152 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1153 /* 1154 * We will try our read on the other disk down 1155 * below, also reverse the default pick so if we 1156 * are doing a scan we do not keep hitting the 1157 * bad disk first. 1158 */ 1159 struct ccd_softc *cs = &ccd_softc[unit]; 1160 1161 msg = ", trying other disk"; 1162 cs->sc_pick = 1 - cs->sc_pick; 1163 cs->sc_blk[cs->sc_pick] = obio->bio_blkno; 1164 } else { 1165 obp->b_flags |= B_ERROR; 1166 obp->b_error = cbp->cb_buf.b_error ? 1167 cbp->cb_buf.b_error : EIO; 1168 } 1169 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1170 unit, obp->b_error, cbp->cb_comp, 1171 (int)cbp->cb_buf.b_bio2.bio_blkno, 1172 obio->bio_blkno, msg); 1173 } 1174 1175 /* 1176 * Process mirror. If we are writing, I/O has been initiated on both 1177 * buffers and we fall through only after both are finished. 1178 * 1179 * If we are reading only one I/O is initiated at a time. If an 1180 * error occurs we initiate the second I/O and return, otherwise 1181 * we free the second I/O without initiating it. 1182 */ 1183 1184 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1185 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1186 /* 1187 * When writing, handshake with the second buffer 1188 * to determine when both are done. If both are not 1189 * done, return here. 1190 */ 1191 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1192 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1193 putccdbuf(cbp); 1194 crit_exit(); 1195 return; 1196 } 1197 } else { 1198 /* 1199 * When reading, either dispose of the second buffer 1200 * or initiate I/O on the second buffer if an error 1201 * occured with this one. 1202 */ 1203 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1204 if (cbp->cb_buf.b_flags & B_ERROR) { 1205 cbp->cb_mirror->cb_pflags |= 1206 CCDPF_MIRROR_DONE; 1207 vn_strategy( 1208 cbp->cb_mirror->cb_buf.b_vp, 1209 &cbp->cb_mirror->cb_buf.b_bio1 1210 ); 1211 putccdbuf(cbp); 1212 crit_exit(); 1213 return; 1214 } else { 1215 putccdbuf(cbp->cb_mirror); 1216 /* fall through */ 1217 } 1218 } 1219 } 1220 } 1221 1222 /* 1223 * use b_bufsize to determine how big the original request was rather 1224 * then b_bcount, because b_bcount may have been truncated for EOF. 1225 * 1226 * XXX We check for an error, but we do not test the resid for an 1227 * aligned EOF condition. This may result in character & block 1228 * device access not recognizing EOF properly when read or written 1229 * sequentially, but will not effect filesystems. 1230 */ 1231 count = cbp->cb_buf.b_bufsize; 1232 putccdbuf(cbp); 1233 1234 /* 1235 * If all done, "interrupt". 1236 */ 1237 obp->b_resid -= count; 1238 if (obp->b_resid < 0) 1239 panic("ccdiodone: count"); 1240 if (obp->b_resid == 0) 1241 ccdintr(&ccd_softc[unit], obio); 1242 crit_exit(); 1243 } 1244 1245 static int 1246 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1247 { 1248 int unit = ccdunit(dev); 1249 int i, j, lookedup = 0, error = 0; 1250 int part, pmask; 1251 struct ccd_softc *cs; 1252 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1253 struct ccddevice ccd; 1254 char **cpp; 1255 struct vnode **vpp; 1256 struct ucred *cred; 1257 1258 KKASSERT(td->td_proc != NULL); 1259 cred = td->td_proc->p_ucred; 1260 1261 if (unit >= numccd) 1262 return (ENXIO); 1263 cs = &ccd_softc[unit]; 1264 1265 bzero(&ccd, sizeof(ccd)); 1266 1267 switch (cmd) { 1268 case CCDIOCSET: 1269 if (cs->sc_flags & CCDF_INITED) 1270 return (EBUSY); 1271 1272 if ((flag & FWRITE) == 0) 1273 return (EBADF); 1274 1275 if ((error = ccdlock(cs)) != 0) 1276 return (error); 1277 1278 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1279 return (EINVAL); 1280 1281 /* Fill in some important bits. */ 1282 ccd.ccd_unit = unit; 1283 ccd.ccd_interleave = ccio->ccio_ileave; 1284 if (ccd.ccd_interleave == 0 && 1285 ((ccio->ccio_flags & CCDF_MIRROR) || 1286 (ccio->ccio_flags & CCDF_PARITY))) { 1287 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1288 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1289 } 1290 if ((ccio->ccio_flags & CCDF_MIRROR) && 1291 (ccio->ccio_flags & CCDF_PARITY)) { 1292 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1293 ccio->ccio_flags &= ~CCDF_PARITY; 1294 } 1295 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1296 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1297 printf("ccd%d: mirror/parity forces uniform flag\n", 1298 unit); 1299 ccio->ccio_flags |= CCDF_UNIFORM; 1300 } 1301 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1302 1303 /* 1304 * Allocate space for and copy in the array of 1305 * componet pathnames and device numbers. 1306 */ 1307 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1308 M_DEVBUF, M_WAITOK); 1309 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1310 M_DEVBUF, M_WAITOK); 1311 1312 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1313 ccio->ccio_ndisks * sizeof(char **)); 1314 if (error) { 1315 free(vpp, M_DEVBUF); 1316 free(cpp, M_DEVBUF); 1317 ccdunlock(cs); 1318 return (error); 1319 } 1320 1321 #ifdef DEBUG 1322 if (ccddebug & CCDB_INIT) 1323 for (i = 0; i < ccio->ccio_ndisks; ++i) 1324 printf("ccdioctl: component %d: 0x%x\n", 1325 i, cpp[i]); 1326 #endif 1327 1328 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1329 #ifdef DEBUG 1330 if (ccddebug & CCDB_INIT) 1331 printf("ccdioctl: lookedup = %d\n", lookedup); 1332 #endif 1333 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1334 for (j = 0; j < lookedup; ++j) 1335 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1336 free(vpp, M_DEVBUF); 1337 free(cpp, M_DEVBUF); 1338 ccdunlock(cs); 1339 return (error); 1340 } 1341 ++lookedup; 1342 } 1343 ccd.ccd_cpp = cpp; 1344 ccd.ccd_vpp = vpp; 1345 ccd.ccd_ndev = ccio->ccio_ndisks; 1346 1347 /* 1348 * Initialize the ccd. Fills in the softc for us. 1349 */ 1350 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1351 for (j = 0; j < lookedup; ++j) 1352 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1353 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1354 free(vpp, M_DEVBUF); 1355 free(cpp, M_DEVBUF); 1356 ccdunlock(cs); 1357 return (error); 1358 } 1359 1360 /* 1361 * The ccd has been successfully initialized, so 1362 * we can place it into the array and read the disklabel. 1363 */ 1364 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1365 ccio->ccio_unit = unit; 1366 ccio->ccio_size = cs->sc_size; 1367 ccdgetdisklabel(dev); 1368 1369 ccdunlock(cs); 1370 1371 break; 1372 1373 case CCDIOCCLR: 1374 if ((cs->sc_flags & CCDF_INITED) == 0) 1375 return (ENXIO); 1376 1377 if ((flag & FWRITE) == 0) 1378 return (EBADF); 1379 1380 if ((error = ccdlock(cs)) != 0) 1381 return (error); 1382 1383 /* Don't unconfigure if any other partitions are open */ 1384 part = ccdpart(dev); 1385 pmask = (1 << part); 1386 if ((cs->sc_openmask & ~pmask)) { 1387 ccdunlock(cs); 1388 return (EBUSY); 1389 } 1390 1391 /* 1392 * Free ccd_softc information and clear entry. 1393 */ 1394 1395 /* Close the components and free their pathnames. */ 1396 for (i = 0; i < cs->sc_nccdisks; ++i) { 1397 /* 1398 * XXX: this close could potentially fail and 1399 * cause Bad Things. Maybe we need to force 1400 * the close to happen? 1401 */ 1402 #ifdef DEBUG 1403 if (ccddebug & CCDB_VNODE) 1404 vprint("CCDIOCCLR: vnode info", 1405 cs->sc_cinfo[i].ci_vp); 1406 #endif 1407 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td); 1408 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1409 } 1410 1411 /* Free interleave index. */ 1412 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1413 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1414 1415 /* Free component info and interleave table. */ 1416 free(cs->sc_cinfo, M_DEVBUF); 1417 free(cs->sc_itable, M_DEVBUF); 1418 cs->sc_flags &= ~CCDF_INITED; 1419 1420 /* 1421 * Free ccddevice information and clear entry. 1422 */ 1423 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1424 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1425 ccd.ccd_dk = -1; 1426 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1427 1428 /* 1429 * And remove the devstat entry. 1430 */ 1431 devstat_remove_entry(&cs->device_stats); 1432 1433 /* This must be atomic. */ 1434 crit_enter(); 1435 ccdunlock(cs); 1436 bzero(cs, sizeof(struct ccd_softc)); 1437 crit_exit(); 1438 1439 break; 1440 1441 case DIOCGDINFO: 1442 if ((cs->sc_flags & CCDF_INITED) == 0) 1443 return (ENXIO); 1444 1445 *(struct disklabel *)data = cs->sc_label; 1446 break; 1447 1448 case DIOCGPART: 1449 if ((cs->sc_flags & CCDF_INITED) == 0) 1450 return (ENXIO); 1451 1452 ((struct partinfo *)data)->disklab = &cs->sc_label; 1453 ((struct partinfo *)data)->part = 1454 &cs->sc_label.d_partitions[ccdpart(dev)]; 1455 break; 1456 1457 case DIOCWDINFO: 1458 case DIOCSDINFO: 1459 if ((cs->sc_flags & CCDF_INITED) == 0) 1460 return (ENXIO); 1461 1462 if ((flag & FWRITE) == 0) 1463 return (EBADF); 1464 1465 if ((error = ccdlock(cs)) != 0) 1466 return (error); 1467 1468 cs->sc_flags |= CCDF_LABELLING; 1469 1470 error = setdisklabel(&cs->sc_label, 1471 (struct disklabel *)data, 0); 1472 if (error == 0) { 1473 if (cmd == DIOCWDINFO) { 1474 dev_t cdev = CCDLABELDEV(dev); 1475 error = writedisklabel(cdev, &cs->sc_label); 1476 } 1477 } 1478 1479 cs->sc_flags &= ~CCDF_LABELLING; 1480 1481 ccdunlock(cs); 1482 1483 if (error) 1484 return (error); 1485 break; 1486 1487 case DIOCWLABEL: 1488 if ((cs->sc_flags & CCDF_INITED) == 0) 1489 return (ENXIO); 1490 1491 if ((flag & FWRITE) == 0) 1492 return (EBADF); 1493 if (*(int *)data != 0) 1494 cs->sc_flags |= CCDF_WLABEL; 1495 else 1496 cs->sc_flags &= ~CCDF_WLABEL; 1497 break; 1498 1499 default: 1500 return (ENOTTY); 1501 } 1502 1503 return (0); 1504 } 1505 1506 static int 1507 ccdsize(dev_t dev) 1508 { 1509 struct ccd_softc *cs; 1510 int part, size; 1511 1512 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1513 return (-1); 1514 1515 cs = &ccd_softc[ccdunit(dev)]; 1516 part = ccdpart(dev); 1517 1518 if ((cs->sc_flags & CCDF_INITED) == 0) 1519 return (-1); 1520 1521 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1522 size = -1; 1523 else 1524 size = cs->sc_label.d_partitions[part].p_size; 1525 1526 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1527 return (-1); 1528 1529 return (size); 1530 } 1531 1532 static int 1533 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize) 1534 { 1535 /* Not implemented. */ 1536 return ENXIO; 1537 } 1538 1539 /* 1540 * Lookup the provided name in the filesystem. If the file exists, 1541 * is a valid block device, and isn't being used by anyone else, 1542 * set *vpp to the file's vnode. 1543 */ 1544 static int 1545 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1546 { 1547 struct nlookupdata nd; 1548 struct ucred *cred; 1549 struct vnode *vp; 1550 int error; 1551 1552 KKASSERT(td->td_proc); 1553 cred = td->td_proc->p_ucred; 1554 *vpp = NULL; 1555 1556 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1557 if (error) 1558 return (error); 1559 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1560 #ifdef DEBUG 1561 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1562 printf("ccdlookup: vn_open error = %d\n", error); 1563 #endif 1564 goto done; 1565 } 1566 vp = nd.nl_open_vp; 1567 1568 if (vp->v_usecount > 1) { 1569 error = EBUSY; 1570 goto done; 1571 } 1572 1573 if (!vn_isdisk(vp, &error)) 1574 goto done; 1575 1576 #ifdef DEBUG 1577 if (ccddebug & CCDB_VNODE) 1578 vprint("ccdlookup: vnode info", vp); 1579 #endif 1580 1581 VOP_UNLOCK(vp, 0, td); 1582 nd.nl_open_vp = NULL; 1583 nlookup_done(&nd); 1584 *vpp = vp; /* leave ref intact */ 1585 return (0); 1586 done: 1587 nlookup_done(&nd); 1588 return (error); 1589 } 1590 1591 /* 1592 * Read the disklabel from the ccd. If one is not present, fake one 1593 * up. 1594 */ 1595 static void 1596 ccdgetdisklabel(dev_t dev) 1597 { 1598 int unit = ccdunit(dev); 1599 struct ccd_softc *cs = &ccd_softc[unit]; 1600 char *errstring; 1601 struct disklabel *lp = &cs->sc_label; 1602 struct ccdgeom *ccg = &cs->sc_geom; 1603 dev_t cdev; 1604 1605 bzero(lp, sizeof(*lp)); 1606 1607 lp->d_secperunit = cs->sc_size; 1608 lp->d_secsize = ccg->ccg_secsize; 1609 lp->d_nsectors = ccg->ccg_nsectors; 1610 lp->d_ntracks = ccg->ccg_ntracks; 1611 lp->d_ncylinders = ccg->ccg_ncylinders; 1612 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1613 1614 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1615 lp->d_type = DTYPE_CCD; 1616 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1617 lp->d_rpm = 3600; 1618 lp->d_interleave = 1; 1619 lp->d_flags = 0; 1620 1621 lp->d_partitions[RAW_PART].p_offset = 0; 1622 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1623 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1624 lp->d_npartitions = RAW_PART + 1; 1625 1626 lp->d_bbsize = BBSIZE; /* XXX */ 1627 lp->d_sbsize = SBSIZE; /* XXX */ 1628 1629 lp->d_magic = DISKMAGIC; 1630 lp->d_magic2 = DISKMAGIC; 1631 lp->d_checksum = dkcksum(&cs->sc_label); 1632 1633 /* 1634 * Call the generic disklabel extraction routine. 1635 */ 1636 cdev = CCDLABELDEV(dev); 1637 errstring = readdisklabel(cdev, &cs->sc_label); 1638 if (errstring != NULL) 1639 ccdmakedisklabel(cs); 1640 1641 #ifdef DEBUG 1642 /* It's actually extremely common to have unlabeled ccds. */ 1643 if (ccddebug & CCDB_LABEL) 1644 if (errstring != NULL) 1645 printf("ccd%d: %s\n", unit, errstring); 1646 #endif 1647 } 1648 1649 /* 1650 * Take care of things one might want to take care of in the event 1651 * that a disklabel isn't present. 1652 */ 1653 static void 1654 ccdmakedisklabel(struct ccd_softc *cs) 1655 { 1656 struct disklabel *lp = &cs->sc_label; 1657 1658 /* 1659 * For historical reasons, if there's no disklabel present 1660 * the raw partition must be marked FS_BSDFFS. 1661 */ 1662 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1663 1664 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1665 } 1666 1667 /* 1668 * Wait interruptibly for an exclusive lock. 1669 * 1670 * XXX 1671 * Several drivers do this; it should be abstracted and made MP-safe. 1672 */ 1673 static int 1674 ccdlock(struct ccd_softc *cs) 1675 { 1676 int error; 1677 1678 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1679 cs->sc_flags |= CCDF_WANTED; 1680 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1681 return (error); 1682 } 1683 cs->sc_flags |= CCDF_LOCKED; 1684 return (0); 1685 } 1686 1687 /* 1688 * Unlock and wake up any waiters. 1689 */ 1690 static void 1691 ccdunlock(struct ccd_softc *cs) 1692 { 1693 1694 cs->sc_flags &= ~CCDF_LOCKED; 1695 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1696 cs->sc_flags &= ~CCDF_WANTED; 1697 wakeup(cs); 1698 } 1699 } 1700 1701 #ifdef DEBUG 1702 static void 1703 printiinfo(struct ccdiinfo *ii) 1704 { 1705 int ix, i; 1706 1707 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1708 printf(" itab[%d]: #dk %d sblk %d soff %d", 1709 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1710 for (i = 0; i < ii->ii_ndisk; i++) 1711 printf(" %d", ii->ii_index[i]); 1712 printf("\n"); 1713 } 1714 } 1715 #endif 1716 1717 1718 /* Local Variables: */ 1719 /* c-argdecl-indent: 8 */ 1720 /* c-continued-statement-offset: 8 */ 1721 /* c-indent-level: 8 */ 1722 /* End: */ 1723