1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.18 2004/11/12 00:09:03 dillon Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <vfs/ufs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/buf2.h> 110 111 #include <sys/ccdvar.h> 112 113 #include <vm/vm_zone.h> 114 115 #if defined(CCDDEBUG) && !defined(DEBUG) 116 #define DEBUG 117 #endif 118 119 #ifdef DEBUG 120 #define CCDB_FOLLOW 0x01 121 #define CCDB_INIT 0x02 122 #define CCDB_IO 0x04 123 #define CCDB_LABEL 0x08 124 #define CCDB_VNODE 0x10 125 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 126 CCDB_VNODE; 127 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 128 #undef DEBUG 129 #endif 130 131 #define ccdunit(x) dkunit(x) 132 #define ccdpart(x) dkpart(x) 133 134 /* 135 This is how mirroring works (only writes are special): 136 137 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 138 linked together by the cb_mirror field. "cb_pflags & 139 CCDPF_MIRROR_DONE" is set to 0 on both of them. 140 141 When a component returns to ccdiodone(), it checks if "cb_pflags & 142 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 143 flag and returns. If it is, it means its partner has already 144 returned, so it will go to the regular cleanup. 145 146 */ 147 148 struct ccdbuf { 149 struct buf cb_buf; /* new I/O buf */ 150 struct buf *cb_obp; /* ptr. to original I/O buf */ 151 struct ccdbuf *cb_freenext; /* free list link */ 152 int cb_unit; /* target unit */ 153 int cb_comp; /* target component */ 154 int cb_pflags; /* mirror/parity status flag */ 155 struct ccdbuf *cb_mirror; /* mirror counterpart */ 156 }; 157 158 /* bits in cb_pflags */ 159 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 160 161 #define CCDLABELDEV(dev) \ 162 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 163 164 static d_open_t ccdopen; 165 static d_close_t ccdclose; 166 static d_strategy_t ccdstrategy; 167 static d_ioctl_t ccdioctl; 168 static d_dump_t ccddump; 169 static d_psize_t ccdsize; 170 171 #define NCCDFREEHIWAT 16 172 173 #define CDEV_MAJOR 74 174 175 static struct cdevsw ccd_cdevsw = { 176 /* name */ "ccd", 177 /* maj */ CDEV_MAJOR, 178 /* flags */ D_DISK, 179 /* port */ NULL, 180 /* clone */ NULL, 181 182 /* open */ ccdopen, 183 /* close */ ccdclose, 184 /* read */ physread, 185 /* write */ physwrite, 186 /* ioctl */ ccdioctl, 187 /* poll */ nopoll, 188 /* mmap */ nommap, 189 /* strategy */ ccdstrategy, 190 /* dump */ ccddump, 191 /* psize */ ccdsize 192 }; 193 194 /* called during module initialization */ 195 static void ccdattach (void); 196 static int ccd_modevent (module_t, int, void *); 197 198 /* called by biodone() at interrupt time */ 199 static void ccdiodone (struct ccdbuf *cbp); 200 201 static void ccdstart (struct ccd_softc *, struct buf *); 202 static void ccdinterleave (struct ccd_softc *, int); 203 static void ccdintr (struct ccd_softc *, struct buf *); 204 static int ccdinit (struct ccddevice *, char **, struct thread *); 205 static int ccdlookup (char *, struct thread *td, struct vnode **); 206 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 207 struct buf *, daddr_t, caddr_t, long); 208 static void ccdgetdisklabel (dev_t); 209 static void ccdmakedisklabel (struct ccd_softc *); 210 static int ccdlock (struct ccd_softc *); 211 static void ccdunlock (struct ccd_softc *); 212 213 #ifdef DEBUG 214 static void printiinfo (struct ccdiinfo *); 215 #endif 216 217 /* Non-private for the benefit of libkvm. */ 218 struct ccd_softc *ccd_softc; 219 struct ccddevice *ccddevs; 220 struct ccdbuf *ccdfreebufs; 221 static int numccdfreebufs; 222 static int numccd = 0; 223 224 /* 225 * getccdbuf() - Allocate and zero a ccd buffer. 226 * 227 * This routine is called at splbio(). 228 */ 229 230 static __inline 231 struct ccdbuf * 232 getccdbuf(struct ccdbuf *cpy) 233 { 234 struct ccdbuf *cbp; 235 236 /* 237 * Allocate from freelist or malloc as necessary 238 */ 239 if ((cbp = ccdfreebufs) != NULL) { 240 ccdfreebufs = cbp->cb_freenext; 241 --numccdfreebufs; 242 } else { 243 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 244 } 245 246 /* 247 * Used by mirroring code 248 */ 249 if (cpy) 250 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 251 else 252 bzero(cbp, sizeof(struct ccdbuf)); 253 254 /* 255 * independant struct buf initialization 256 */ 257 LIST_INIT(&cbp->cb_buf.b_dep); 258 BUF_LOCKINIT(&cbp->cb_buf); 259 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 260 BUF_KERNPROC(&cbp->cb_buf); 261 262 return(cbp); 263 } 264 265 /* 266 * putccdbuf() - Free a ccd buffer. 267 * 268 * This routine is called at splbio(). 269 */ 270 271 static __inline 272 void 273 putccdbuf(struct ccdbuf *cbp) 274 { 275 BUF_UNLOCK(&cbp->cb_buf); 276 BUF_LOCKFREE(&cbp->cb_buf); 277 278 if (numccdfreebufs < NCCDFREEHIWAT) { 279 cbp->cb_freenext = ccdfreebufs; 280 ccdfreebufs = cbp; 281 ++numccdfreebufs; 282 } else { 283 free((caddr_t)cbp, M_DEVBUF); 284 } 285 } 286 287 288 /* 289 * Number of blocks to untouched in front of a component partition. 290 * This is to avoid violating its disklabel area when it starts at the 291 * beginning of the slice. 292 */ 293 #if !defined(CCD_OFFSET) 294 #define CCD_OFFSET 16 295 #endif 296 297 /* 298 * Called by main() during pseudo-device attachment. All we need 299 * to do is allocate enough space for devices to be configured later, and 300 * add devsw entries. 301 */ 302 static void 303 ccdattach() 304 { 305 int i; 306 int num = NCCD; 307 308 if (num > 1) 309 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 310 else 311 printf("ccd0: Concatenated disk driver\n"); 312 313 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF, 314 M_WAITOK | M_ZERO); 315 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF, 316 M_WAITOK | M_ZERO); 317 numccd = num; 318 319 cdevsw_add(&ccd_cdevsw, 0, 0); 320 /* XXX: is this necessary? */ 321 for (i = 0; i < numccd; ++i) 322 ccddevs[i].ccd_dk = -1; 323 } 324 325 static int 326 ccd_modevent(mod, type, data) 327 module_t mod; 328 int type; 329 void *data; 330 { 331 int error = 0; 332 333 switch (type) { 334 case MOD_LOAD: 335 ccdattach(); 336 break; 337 338 case MOD_UNLOAD: 339 printf("ccd0: Unload not supported!\n"); 340 error = EOPNOTSUPP; 341 break; 342 343 default: /* MOD_SHUTDOWN etc */ 344 break; 345 } 346 return (error); 347 } 348 349 DEV_MODULE(ccd, ccd_modevent, NULL); 350 351 static int 352 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 353 { 354 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 355 struct ccdcinfo *ci = NULL; /* XXX */ 356 size_t size; 357 int ix; 358 struct vnode *vp; 359 size_t minsize; 360 int maxsecsize; 361 struct partinfo dpart; 362 struct ccdgeom *ccg = &cs->sc_geom; 363 char tmppath[MAXPATHLEN]; 364 int error = 0; 365 struct ucred *cred; 366 367 KKASSERT(td->td_proc); 368 cred = td->td_proc->p_ucred; 369 370 #ifdef DEBUG 371 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 372 printf("ccdinit: unit %d\n", ccd->ccd_unit); 373 #endif 374 375 cs->sc_size = 0; 376 cs->sc_ileave = ccd->ccd_interleave; 377 cs->sc_nccdisks = ccd->ccd_ndev; 378 379 /* Allocate space for the component info. */ 380 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 381 M_DEVBUF, M_WAITOK); 382 383 /* 384 * Verify that each component piece exists and record 385 * relevant information about it. 386 */ 387 maxsecsize = 0; 388 minsize = 0; 389 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 390 vp = ccd->ccd_vpp[ix]; 391 ci = &cs->sc_cinfo[ix]; 392 ci->ci_vp = vp; 393 394 /* 395 * Copy in the pathname of the component. 396 */ 397 bzero(tmppath, sizeof(tmppath)); /* sanity */ 398 if ((error = copyinstr(cpaths[ix], tmppath, 399 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 400 #ifdef DEBUG 401 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 402 printf("ccd%d: can't copy path, error = %d\n", 403 ccd->ccd_unit, error); 404 #endif 405 goto fail; 406 } 407 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 408 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 409 410 ci->ci_dev = vn_todev(vp); 411 412 /* 413 * Get partition information for the component. 414 */ 415 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 416 FREAD, cred, td)) != 0) { 417 #ifdef DEBUG 418 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 419 printf("ccd%d: %s: ioctl failed, error = %d\n", 420 ccd->ccd_unit, ci->ci_path, error); 421 #endif 422 goto fail; 423 } 424 if (dpart.part->p_fstype == FS_BSDFFS) { 425 maxsecsize = 426 ((dpart.disklab->d_secsize > maxsecsize) ? 427 dpart.disklab->d_secsize : maxsecsize); 428 size = dpart.part->p_size - CCD_OFFSET; 429 } else { 430 #ifdef DEBUG 431 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 432 printf("ccd%d: %s: incorrect partition type\n", 433 ccd->ccd_unit, ci->ci_path); 434 #endif 435 error = EFTYPE; 436 goto fail; 437 } 438 439 /* 440 * Calculate the size, truncating to an interleave 441 * boundary if necessary. 442 */ 443 444 if (cs->sc_ileave > 1) 445 size -= size % cs->sc_ileave; 446 447 if (size == 0) { 448 #ifdef DEBUG 449 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 450 printf("ccd%d: %s: size == 0\n", 451 ccd->ccd_unit, ci->ci_path); 452 #endif 453 error = ENODEV; 454 goto fail; 455 } 456 457 if (minsize == 0 || size < minsize) 458 minsize = size; 459 ci->ci_size = size; 460 cs->sc_size += size; 461 } 462 463 /* 464 * Don't allow the interleave to be smaller than 465 * the biggest component sector. 466 */ 467 if ((cs->sc_ileave > 0) && 468 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 469 #ifdef DEBUG 470 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 471 printf("ccd%d: interleave must be at least %d\n", 472 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 473 #endif 474 error = EINVAL; 475 goto fail; 476 } 477 478 /* 479 * If uniform interleave is desired set all sizes to that of 480 * the smallest component. This will guarentee that a single 481 * interleave table is generated. 482 * 483 * Lost space must be taken into account when calculating the 484 * overall size. Half the space is lost when CCDF_MIRROR is 485 * specified. One disk is lost when CCDF_PARITY is specified. 486 */ 487 if (ccd->ccd_flags & CCDF_UNIFORM) { 488 for (ci = cs->sc_cinfo; 489 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 490 ci->ci_size = minsize; 491 } 492 if (ccd->ccd_flags & CCDF_MIRROR) { 493 /* 494 * Check to see if an even number of components 495 * have been specified. The interleave must also 496 * be non-zero in order for us to be able to 497 * guarentee the topology. 498 */ 499 if (cs->sc_nccdisks % 2) { 500 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 501 error = EINVAL; 502 goto fail; 503 } 504 if (cs->sc_ileave == 0) { 505 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 506 error = EINVAL; 507 goto fail; 508 } 509 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 510 } else if (ccd->ccd_flags & CCDF_PARITY) { 511 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 512 } else { 513 if (cs->sc_ileave == 0) { 514 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 515 error = EINVAL; 516 goto fail; 517 } 518 cs->sc_size = cs->sc_nccdisks * minsize; 519 } 520 } 521 522 /* 523 * Construct the interleave table. 524 */ 525 ccdinterleave(cs, ccd->ccd_unit); 526 527 /* 528 * Create pseudo-geometry based on 1MB cylinders. It's 529 * pretty close. 530 */ 531 ccg->ccg_secsize = maxsecsize; 532 ccg->ccg_ntracks = 1; 533 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 534 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 535 536 /* 537 * Add an devstat entry for this device. 538 */ 539 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 540 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 541 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 542 DEVSTAT_PRIORITY_ARRAY); 543 544 cs->sc_flags |= CCDF_INITED; 545 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 546 cs->sc_unit = ccd->ccd_unit; 547 return (0); 548 fail: 549 while (ci > cs->sc_cinfo) { 550 ci--; 551 free(ci->ci_path, M_DEVBUF); 552 } 553 free(cs->sc_cinfo, M_DEVBUF); 554 return (error); 555 } 556 557 static void 558 ccdinterleave(cs, unit) 559 struct ccd_softc *cs; 560 int unit; 561 { 562 struct ccdcinfo *ci, *smallci; 563 struct ccdiinfo *ii; 564 daddr_t bn, lbn; 565 int ix; 566 u_long size; 567 568 #ifdef DEBUG 569 if (ccddebug & CCDB_INIT) 570 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 571 #endif 572 573 /* 574 * Allocate an interleave table. The worst case occurs when each 575 * of N disks is of a different size, resulting in N interleave 576 * tables. 577 * 578 * Chances are this is too big, but we don't care. 579 */ 580 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 581 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 582 bzero((caddr_t)cs->sc_itable, size); 583 584 /* 585 * Trivial case: no interleave (actually interleave of disk size). 586 * Each table entry represents a single component in its entirety. 587 * 588 * An interleave of 0 may not be used with a mirror or parity setup. 589 */ 590 if (cs->sc_ileave == 0) { 591 bn = 0; 592 ii = cs->sc_itable; 593 594 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 595 /* Allocate space for ii_index. */ 596 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 597 ii->ii_ndisk = 1; 598 ii->ii_startblk = bn; 599 ii->ii_startoff = 0; 600 ii->ii_index[0] = ix; 601 bn += cs->sc_cinfo[ix].ci_size; 602 ii++; 603 } 604 ii->ii_ndisk = 0; 605 #ifdef DEBUG 606 if (ccddebug & CCDB_INIT) 607 printiinfo(cs->sc_itable); 608 #endif 609 return; 610 } 611 612 /* 613 * The following isn't fast or pretty; it doesn't have to be. 614 */ 615 size = 0; 616 bn = lbn = 0; 617 for (ii = cs->sc_itable; ; ii++) { 618 /* 619 * Allocate space for ii_index. We might allocate more then 620 * we use. 621 */ 622 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 623 M_DEVBUF, M_WAITOK); 624 625 /* 626 * Locate the smallest of the remaining components 627 */ 628 smallci = NULL; 629 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 630 ci++) { 631 if (ci->ci_size > size && 632 (smallci == NULL || 633 ci->ci_size < smallci->ci_size)) { 634 smallci = ci; 635 } 636 } 637 638 /* 639 * Nobody left, all done 640 */ 641 if (smallci == NULL) { 642 ii->ii_ndisk = 0; 643 break; 644 } 645 646 /* 647 * Record starting logical block using an sc_ileave blocksize. 648 */ 649 ii->ii_startblk = bn / cs->sc_ileave; 650 651 /* 652 * Record starting comopnent block using an sc_ileave 653 * blocksize. This value is relative to the beginning of 654 * a component disk. 655 */ 656 ii->ii_startoff = lbn; 657 658 /* 659 * Determine how many disks take part in this interleave 660 * and record their indices. 661 */ 662 ix = 0; 663 for (ci = cs->sc_cinfo; 664 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 665 if (ci->ci_size >= smallci->ci_size) { 666 ii->ii_index[ix++] = ci - cs->sc_cinfo; 667 } 668 } 669 ii->ii_ndisk = ix; 670 bn += ix * (smallci->ci_size - size); 671 lbn = smallci->ci_size / cs->sc_ileave; 672 size = smallci->ci_size; 673 } 674 #ifdef DEBUG 675 if (ccddebug & CCDB_INIT) 676 printiinfo(cs->sc_itable); 677 #endif 678 } 679 680 /* ARGSUSED */ 681 static int 682 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 683 { 684 int unit = ccdunit(dev); 685 struct ccd_softc *cs; 686 struct disklabel *lp; 687 int error = 0, part, pmask; 688 689 #ifdef DEBUG 690 if (ccddebug & CCDB_FOLLOW) 691 printf("ccdopen(%x, %x)\n", dev, flags); 692 #endif 693 if (unit >= numccd) 694 return (ENXIO); 695 cs = &ccd_softc[unit]; 696 697 if ((error = ccdlock(cs)) != 0) 698 return (error); 699 700 lp = &cs->sc_label; 701 702 part = ccdpart(dev); 703 pmask = (1 << part); 704 705 /* 706 * If we're initialized, check to see if there are any other 707 * open partitions. If not, then it's safe to update 708 * the in-core disklabel. 709 */ 710 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 711 ccdgetdisklabel(dev); 712 713 /* Check that the partition exists. */ 714 if (part != RAW_PART && ((part >= lp->d_npartitions) || 715 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 716 error = ENXIO; 717 goto done; 718 } 719 720 cs->sc_openmask |= pmask; 721 done: 722 ccdunlock(cs); 723 return (0); 724 } 725 726 /* ARGSUSED */ 727 static int 728 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 729 { 730 int unit = ccdunit(dev); 731 struct ccd_softc *cs; 732 int error = 0, part; 733 734 #ifdef DEBUG 735 if (ccddebug & CCDB_FOLLOW) 736 printf("ccdclose(%x, %x)\n", dev, flags); 737 #endif 738 739 if (unit >= numccd) 740 return (ENXIO); 741 cs = &ccd_softc[unit]; 742 743 if ((error = ccdlock(cs)) != 0) 744 return (error); 745 746 part = ccdpart(dev); 747 748 /* ...that much closer to allowing unconfiguration... */ 749 cs->sc_openmask &= ~(1 << part); 750 ccdunlock(cs); 751 return (0); 752 } 753 754 static void 755 ccdstrategy(bp) 756 struct buf *bp; 757 { 758 int unit = ccdunit(bp->b_dev); 759 struct ccd_softc *cs = &ccd_softc[unit]; 760 int s; 761 int wlabel; 762 struct disklabel *lp; 763 764 #ifdef DEBUG 765 if (ccddebug & CCDB_FOLLOW) 766 printf("ccdstrategy(%x): unit %d\n", bp, unit); 767 #endif 768 if ((cs->sc_flags & CCDF_INITED) == 0) { 769 bp->b_error = ENXIO; 770 bp->b_flags |= B_ERROR; 771 goto done; 772 } 773 774 /* If it's a nil transfer, wake up the top half now. */ 775 if (bp->b_bcount == 0) 776 goto done; 777 778 lp = &cs->sc_label; 779 780 /* 781 * Do bounds checking and adjust transfer. If there's an 782 * error, the bounds check will flag that for us. 783 */ 784 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 785 if (ccdpart(bp->b_dev) != RAW_PART) { 786 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 787 goto done; 788 } else { 789 int pbn; /* in sc_secsize chunks */ 790 long sz; /* in sc_secsize chunks */ 791 792 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 793 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 794 795 /* 796 * If out of bounds return an error. If at the EOF point, 797 * simply read or write less. 798 */ 799 800 if (pbn < 0 || pbn >= cs->sc_size) { 801 bp->b_resid = bp->b_bcount; 802 if (pbn != cs->sc_size) { 803 bp->b_error = EINVAL; 804 bp->b_flags |= B_ERROR | B_INVAL; 805 } 806 goto done; 807 } 808 809 /* 810 * If the request crosses EOF, truncate the request. 811 */ 812 if (pbn + sz > cs->sc_size) { 813 bp->b_bcount = (cs->sc_size - pbn) * 814 cs->sc_geom.ccg_secsize; 815 } 816 } 817 818 bp->b_resid = bp->b_bcount; 819 820 /* 821 * "Start" the unit. 822 */ 823 s = splbio(); 824 ccdstart(cs, bp); 825 splx(s); 826 return; 827 done: 828 biodone(bp); 829 } 830 831 static void 832 ccdstart(cs, bp) 833 struct ccd_softc *cs; 834 struct buf *bp; 835 { 836 long bcount, rcount; 837 struct ccdbuf *cbp[4]; 838 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 839 caddr_t addr; 840 daddr_t bn; 841 struct partition *pp; 842 843 #ifdef DEBUG 844 if (ccddebug & CCDB_FOLLOW) 845 printf("ccdstart(%x, %x)\n", cs, bp); 846 #endif 847 848 /* Record the transaction start */ 849 devstat_start_transaction(&cs->device_stats); 850 851 /* 852 * Translate the partition-relative block number to an absolute. 853 */ 854 bn = bp->b_blkno; 855 if (ccdpart(bp->b_dev) != RAW_PART) { 856 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)]; 857 bn += pp->p_offset; 858 } 859 860 /* 861 * Allocate component buffers and fire off the requests 862 */ 863 addr = bp->b_data; 864 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 865 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 866 rcount = cbp[0]->cb_buf.b_bcount; 867 868 if (cs->sc_cflags & CCDF_MIRROR) { 869 /* 870 * Mirroring. Writes go to both disks, reads are 871 * taken from whichever disk seems most appropriate. 872 * 873 * We attempt to localize reads to the disk whos arm 874 * is nearest the read request. We ignore seeks due 875 * to writes when making this determination and we 876 * also try to avoid hogging. 877 */ 878 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 879 cbp[0]->cb_buf.b_vp->v_numoutput++; 880 cbp[1]->cb_buf.b_vp->v_numoutput++; 881 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, 882 &cbp[0]->cb_buf); 883 VOP_STRATEGY(cbp[1]->cb_buf.b_vp, 884 &cbp[1]->cb_buf); 885 } else { 886 int pick = cs->sc_pick; 887 daddr_t range = cs->sc_size / 16; 888 889 if (bn < cs->sc_blk[pick] - range || 890 bn > cs->sc_blk[pick] + range 891 ) { 892 cs->sc_pick = pick = 1 - pick; 893 } 894 cs->sc_blk[pick] = bn + btodb(rcount); 895 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp, 896 &cbp[pick]->cb_buf); 897 } 898 } else { 899 /* 900 * Not mirroring 901 */ 902 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) 903 cbp[0]->cb_buf.b_vp->v_numoutput++; 904 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf); 905 } 906 bn += btodb(rcount); 907 addr += rcount; 908 } 909 } 910 911 /* 912 * Build a component buffer header. 913 */ 914 static void 915 ccdbuffer(cb, cs, bp, bn, addr, bcount) 916 struct ccdbuf **cb; 917 struct ccd_softc *cs; 918 struct buf *bp; 919 daddr_t bn; 920 caddr_t addr; 921 long bcount; 922 { 923 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 924 struct ccdbuf *cbp; 925 daddr_t cbn, cboff; 926 off_t cbc; 927 928 #ifdef DEBUG 929 if (ccddebug & CCDB_IO) 930 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 931 cs, bp, bn, addr, bcount); 932 #endif 933 /* 934 * Determine which component bn falls in. 935 */ 936 cbn = bn; 937 cboff = 0; 938 939 if (cs->sc_ileave == 0) { 940 /* 941 * Serially concatenated and neither a mirror nor a parity 942 * config. This is a special case. 943 */ 944 daddr_t sblk; 945 946 sblk = 0; 947 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 948 sblk += ci->ci_size; 949 cbn -= sblk; 950 } else { 951 struct ccdiinfo *ii; 952 int ccdisk, off; 953 954 /* 955 * Calculate cbn, the logical superblock (sc_ileave chunks), 956 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 957 * to cbn. 958 */ 959 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 960 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 961 962 /* 963 * Figure out which interleave table to use. 964 */ 965 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 966 if (ii->ii_startblk > cbn) 967 break; 968 } 969 ii--; 970 971 /* 972 * off is the logical superblock relative to the beginning 973 * of this interleave block. 974 */ 975 off = cbn - ii->ii_startblk; 976 977 /* 978 * We must calculate which disk component to use (ccdisk), 979 * and recalculate cbn to be the superblock relative to 980 * the beginning of the component. This is typically done by 981 * adding 'off' and ii->ii_startoff together. However, 'off' 982 * must typically be divided by the number of components in 983 * this interleave array to be properly convert it from a 984 * CCD-relative logical superblock number to a 985 * component-relative superblock number. 986 */ 987 if (ii->ii_ndisk == 1) { 988 /* 989 * When we have just one disk, it can't be a mirror 990 * or a parity config. 991 */ 992 ccdisk = ii->ii_index[0]; 993 cbn = ii->ii_startoff + off; 994 } else { 995 if (cs->sc_cflags & CCDF_MIRROR) { 996 /* 997 * We have forced a uniform mapping, resulting 998 * in a single interleave array. We double 999 * up on the first half of the available 1000 * components and our mirror is in the second 1001 * half. This only works with a single 1002 * interleave array because doubling up 1003 * doubles the number of sectors, so there 1004 * cannot be another interleave array because 1005 * the next interleave array's calculations 1006 * would be off. 1007 */ 1008 int ndisk2 = ii->ii_ndisk / 2; 1009 ccdisk = ii->ii_index[off % ndisk2]; 1010 cbn = ii->ii_startoff + off / ndisk2; 1011 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1012 } else if (cs->sc_cflags & CCDF_PARITY) { 1013 /* 1014 * XXX not implemented yet 1015 */ 1016 int ndisk2 = ii->ii_ndisk - 1; 1017 ccdisk = ii->ii_index[off % ndisk2]; 1018 cbn = ii->ii_startoff + off / ndisk2; 1019 if (cbn % ii->ii_ndisk <= ccdisk) 1020 ccdisk++; 1021 } else { 1022 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1023 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1024 } 1025 } 1026 1027 ci = &cs->sc_cinfo[ccdisk]; 1028 1029 /* 1030 * Convert cbn from a superblock to a normal block so it 1031 * can be used to calculate (along with cboff) the normal 1032 * block index into this particular disk. 1033 */ 1034 cbn *= cs->sc_ileave; 1035 } 1036 1037 /* 1038 * Fill in the component buf structure. 1039 */ 1040 cbp = getccdbuf(NULL); 1041 cbp->cb_buf.b_flags = bp->b_flags | B_CALL; 1042 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone; 1043 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */ 1044 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET; 1045 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET); 1046 cbp->cb_buf.b_data = addr; 1047 cbp->cb_buf.b_vp = ci->ci_vp; 1048 if (cs->sc_ileave == 0) 1049 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1050 else 1051 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1052 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1053 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1054 1055 /* 1056 * context for ccdiodone 1057 */ 1058 cbp->cb_obp = bp; 1059 cbp->cb_unit = cs - ccd_softc; 1060 cbp->cb_comp = ci - cs->sc_cinfo; 1061 1062 #ifdef DEBUG 1063 if (ccddebug & CCDB_IO) 1064 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1065 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno, 1066 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1067 #endif 1068 cb[0] = cbp; 1069 1070 /* 1071 * Note: both I/O's setup when reading from mirror, but only one 1072 * will be executed. 1073 */ 1074 if (cs->sc_cflags & CCDF_MIRROR) { 1075 /* mirror, setup second I/O */ 1076 cbp = getccdbuf(cb[0]); 1077 cbp->cb_buf.b_dev = ci2->ci_dev; 1078 cbp->cb_buf.b_vp = ci2->ci_vp; 1079 cbp->cb_comp = ci2 - cs->sc_cinfo; 1080 cb[1] = cbp; 1081 /* link together the ccdbuf's and clear "mirror done" flag */ 1082 cb[0]->cb_mirror = cb[1]; 1083 cb[1]->cb_mirror = cb[0]; 1084 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1085 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1086 } 1087 } 1088 1089 static void 1090 ccdintr(cs, bp) 1091 struct ccd_softc *cs; 1092 struct buf *bp; 1093 { 1094 #ifdef DEBUG 1095 if (ccddebug & CCDB_FOLLOW) 1096 printf("ccdintr(%x, %x)\n", cs, bp); 1097 #endif 1098 /* 1099 * Request is done for better or worse, wakeup the top half. 1100 */ 1101 if (bp->b_flags & B_ERROR) 1102 bp->b_resid = bp->b_bcount; 1103 devstat_end_transaction_buf(&cs->device_stats, bp); 1104 biodone(bp); 1105 } 1106 1107 /* 1108 * Called at interrupt time. 1109 * Mark the component as done and if all components are done, 1110 * take a ccd interrupt. 1111 */ 1112 static void 1113 ccdiodone(cbp) 1114 struct ccdbuf *cbp; 1115 { 1116 struct buf *bp = cbp->cb_obp; 1117 int unit = cbp->cb_unit; 1118 int count, s; 1119 1120 s = splbio(); 1121 #ifdef DEBUG 1122 if (ccddebug & CCDB_FOLLOW) 1123 printf("ccdiodone(%x)\n", cbp); 1124 if (ccddebug & CCDB_IO) { 1125 printf("ccdiodone: bp %x bcount %d resid %d\n", 1126 bp, bp->b_bcount, bp->b_resid); 1127 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1128 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1129 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 1130 cbp->cb_buf.b_bcount); 1131 } 1132 #endif 1133 /* 1134 * If an error occured, report it. If this is a mirrored 1135 * configuration and the first of two possible reads, do not 1136 * set the error in the bp yet because the second read may 1137 * succeed. 1138 */ 1139 1140 if (cbp->cb_buf.b_flags & B_ERROR) { 1141 const char *msg = ""; 1142 1143 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1144 (cbp->cb_buf.b_flags & B_READ) && 1145 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1146 /* 1147 * We will try our read on the other disk down 1148 * below, also reverse the default pick so if we 1149 * are doing a scan we do not keep hitting the 1150 * bad disk first. 1151 */ 1152 struct ccd_softc *cs = &ccd_softc[unit]; 1153 1154 msg = ", trying other disk"; 1155 cs->sc_pick = 1 - cs->sc_pick; 1156 cs->sc_blk[cs->sc_pick] = bp->b_blkno; 1157 } else { 1158 bp->b_flags |= B_ERROR; 1159 bp->b_error = cbp->cb_buf.b_error ? 1160 cbp->cb_buf.b_error : EIO; 1161 } 1162 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1163 unit, bp->b_error, cbp->cb_comp, 1164 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg); 1165 } 1166 1167 /* 1168 * Process mirror. If we are writing, I/O has been initiated on both 1169 * buffers and we fall through only after both are finished. 1170 * 1171 * If we are reading only one I/O is initiated at a time. If an 1172 * error occurs we initiate the second I/O and return, otherwise 1173 * we free the second I/O without initiating it. 1174 */ 1175 1176 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1177 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1178 /* 1179 * When writing, handshake with the second buffer 1180 * to determine when both are done. If both are not 1181 * done, return here. 1182 */ 1183 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1184 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1185 putccdbuf(cbp); 1186 splx(s); 1187 return; 1188 } 1189 } else { 1190 /* 1191 * When reading, either dispose of the second buffer 1192 * or initiate I/O on the second buffer if an error 1193 * occured with this one. 1194 */ 1195 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1196 if (cbp->cb_buf.b_flags & B_ERROR) { 1197 cbp->cb_mirror->cb_pflags |= 1198 CCDPF_MIRROR_DONE; 1199 VOP_STRATEGY( 1200 cbp->cb_mirror->cb_buf.b_vp, 1201 &cbp->cb_mirror->cb_buf 1202 ); 1203 putccdbuf(cbp); 1204 splx(s); 1205 return; 1206 } else { 1207 putccdbuf(cbp->cb_mirror); 1208 /* fall through */ 1209 } 1210 } 1211 } 1212 } 1213 1214 /* 1215 * use b_bufsize to determine how big the original request was rather 1216 * then b_bcount, because b_bcount may have been truncated for EOF. 1217 * 1218 * XXX We check for an error, but we do not test the resid for an 1219 * aligned EOF condition. This may result in character & block 1220 * device access not recognizing EOF properly when read or written 1221 * sequentially, but will not effect filesystems. 1222 */ 1223 count = cbp->cb_buf.b_bufsize; 1224 putccdbuf(cbp); 1225 1226 /* 1227 * If all done, "interrupt". 1228 */ 1229 bp->b_resid -= count; 1230 if (bp->b_resid < 0) 1231 panic("ccdiodone: count"); 1232 if (bp->b_resid == 0) 1233 ccdintr(&ccd_softc[unit], bp); 1234 splx(s); 1235 } 1236 1237 static int 1238 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1239 { 1240 int unit = ccdunit(dev); 1241 int i, j, lookedup = 0, error = 0; 1242 int part, pmask, s; 1243 struct ccd_softc *cs; 1244 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1245 struct ccddevice ccd; 1246 char **cpp; 1247 struct vnode **vpp; 1248 struct ucred *cred; 1249 1250 KKASSERT(td->td_proc != NULL); 1251 cred = td->td_proc->p_ucred; 1252 1253 if (unit >= numccd) 1254 return (ENXIO); 1255 cs = &ccd_softc[unit]; 1256 1257 bzero(&ccd, sizeof(ccd)); 1258 1259 switch (cmd) { 1260 case CCDIOCSET: 1261 if (cs->sc_flags & CCDF_INITED) 1262 return (EBUSY); 1263 1264 if ((flag & FWRITE) == 0) 1265 return (EBADF); 1266 1267 if ((error = ccdlock(cs)) != 0) 1268 return (error); 1269 1270 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1271 return (EINVAL); 1272 1273 /* Fill in some important bits. */ 1274 ccd.ccd_unit = unit; 1275 ccd.ccd_interleave = ccio->ccio_ileave; 1276 if (ccd.ccd_interleave == 0 && 1277 ((ccio->ccio_flags & CCDF_MIRROR) || 1278 (ccio->ccio_flags & CCDF_PARITY))) { 1279 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1280 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1281 } 1282 if ((ccio->ccio_flags & CCDF_MIRROR) && 1283 (ccio->ccio_flags & CCDF_PARITY)) { 1284 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1285 ccio->ccio_flags &= ~CCDF_PARITY; 1286 } 1287 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1288 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1289 printf("ccd%d: mirror/parity forces uniform flag\n", 1290 unit); 1291 ccio->ccio_flags |= CCDF_UNIFORM; 1292 } 1293 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1294 1295 /* 1296 * Allocate space for and copy in the array of 1297 * componet pathnames and device numbers. 1298 */ 1299 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1300 M_DEVBUF, M_WAITOK); 1301 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1302 M_DEVBUF, M_WAITOK); 1303 1304 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1305 ccio->ccio_ndisks * sizeof(char **)); 1306 if (error) { 1307 free(vpp, M_DEVBUF); 1308 free(cpp, M_DEVBUF); 1309 ccdunlock(cs); 1310 return (error); 1311 } 1312 1313 #ifdef DEBUG 1314 if (ccddebug & CCDB_INIT) 1315 for (i = 0; i < ccio->ccio_ndisks; ++i) 1316 printf("ccdioctl: component %d: 0x%x\n", 1317 i, cpp[i]); 1318 #endif 1319 1320 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1321 #ifdef DEBUG 1322 if (ccddebug & CCDB_INIT) 1323 printf("ccdioctl: lookedup = %d\n", lookedup); 1324 #endif 1325 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1326 for (j = 0; j < lookedup; ++j) 1327 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1328 free(vpp, M_DEVBUF); 1329 free(cpp, M_DEVBUF); 1330 ccdunlock(cs); 1331 return (error); 1332 } 1333 ++lookedup; 1334 } 1335 ccd.ccd_cpp = cpp; 1336 ccd.ccd_vpp = vpp; 1337 ccd.ccd_ndev = ccio->ccio_ndisks; 1338 1339 /* 1340 * Initialize the ccd. Fills in the softc for us. 1341 */ 1342 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1343 for (j = 0; j < lookedup; ++j) 1344 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1345 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1346 free(vpp, M_DEVBUF); 1347 free(cpp, M_DEVBUF); 1348 ccdunlock(cs); 1349 return (error); 1350 } 1351 1352 /* 1353 * The ccd has been successfully initialized, so 1354 * we can place it into the array and read the disklabel. 1355 */ 1356 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1357 ccio->ccio_unit = unit; 1358 ccio->ccio_size = cs->sc_size; 1359 ccdgetdisklabel(dev); 1360 1361 ccdunlock(cs); 1362 1363 break; 1364 1365 case CCDIOCCLR: 1366 if ((cs->sc_flags & CCDF_INITED) == 0) 1367 return (ENXIO); 1368 1369 if ((flag & FWRITE) == 0) 1370 return (EBADF); 1371 1372 if ((error = ccdlock(cs)) != 0) 1373 return (error); 1374 1375 /* Don't unconfigure if any other partitions are open */ 1376 part = ccdpart(dev); 1377 pmask = (1 << part); 1378 if ((cs->sc_openmask & ~pmask)) { 1379 ccdunlock(cs); 1380 return (EBUSY); 1381 } 1382 1383 /* 1384 * Free ccd_softc information and clear entry. 1385 */ 1386 1387 /* Close the components and free their pathnames. */ 1388 for (i = 0; i < cs->sc_nccdisks; ++i) { 1389 /* 1390 * XXX: this close could potentially fail and 1391 * cause Bad Things. Maybe we need to force 1392 * the close to happen? 1393 */ 1394 #ifdef DEBUG 1395 if (ccddebug & CCDB_VNODE) 1396 vprint("CCDIOCCLR: vnode info", 1397 cs->sc_cinfo[i].ci_vp); 1398 #endif 1399 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td); 1400 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1401 } 1402 1403 /* Free interleave index. */ 1404 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1405 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1406 1407 /* Free component info and interleave table. */ 1408 free(cs->sc_cinfo, M_DEVBUF); 1409 free(cs->sc_itable, M_DEVBUF); 1410 cs->sc_flags &= ~CCDF_INITED; 1411 1412 /* 1413 * Free ccddevice information and clear entry. 1414 */ 1415 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1416 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1417 ccd.ccd_dk = -1; 1418 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1419 1420 /* 1421 * And remove the devstat entry. 1422 */ 1423 devstat_remove_entry(&cs->device_stats); 1424 1425 /* This must be atomic. */ 1426 s = splhigh(); 1427 ccdunlock(cs); 1428 bzero(cs, sizeof(struct ccd_softc)); 1429 splx(s); 1430 1431 break; 1432 1433 case DIOCGDINFO: 1434 if ((cs->sc_flags & CCDF_INITED) == 0) 1435 return (ENXIO); 1436 1437 *(struct disklabel *)data = cs->sc_label; 1438 break; 1439 1440 case DIOCGPART: 1441 if ((cs->sc_flags & CCDF_INITED) == 0) 1442 return (ENXIO); 1443 1444 ((struct partinfo *)data)->disklab = &cs->sc_label; 1445 ((struct partinfo *)data)->part = 1446 &cs->sc_label.d_partitions[ccdpart(dev)]; 1447 break; 1448 1449 case DIOCWDINFO: 1450 case DIOCSDINFO: 1451 if ((cs->sc_flags & CCDF_INITED) == 0) 1452 return (ENXIO); 1453 1454 if ((flag & FWRITE) == 0) 1455 return (EBADF); 1456 1457 if ((error = ccdlock(cs)) != 0) 1458 return (error); 1459 1460 cs->sc_flags |= CCDF_LABELLING; 1461 1462 error = setdisklabel(&cs->sc_label, 1463 (struct disklabel *)data, 0); 1464 if (error == 0) { 1465 if (cmd == DIOCWDINFO) { 1466 dev_t cdev = CCDLABELDEV(dev); 1467 error = writedisklabel(cdev, &cs->sc_label); 1468 } 1469 } 1470 1471 cs->sc_flags &= ~CCDF_LABELLING; 1472 1473 ccdunlock(cs); 1474 1475 if (error) 1476 return (error); 1477 break; 1478 1479 case DIOCWLABEL: 1480 if ((cs->sc_flags & CCDF_INITED) == 0) 1481 return (ENXIO); 1482 1483 if ((flag & FWRITE) == 0) 1484 return (EBADF); 1485 if (*(int *)data != 0) 1486 cs->sc_flags |= CCDF_WLABEL; 1487 else 1488 cs->sc_flags &= ~CCDF_WLABEL; 1489 break; 1490 1491 default: 1492 return (ENOTTY); 1493 } 1494 1495 return (0); 1496 } 1497 1498 static int 1499 ccdsize(dev_t dev) 1500 { 1501 struct ccd_softc *cs; 1502 int part, size; 1503 1504 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1505 return (-1); 1506 1507 cs = &ccd_softc[ccdunit(dev)]; 1508 part = ccdpart(dev); 1509 1510 if ((cs->sc_flags & CCDF_INITED) == 0) 1511 return (-1); 1512 1513 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1514 size = -1; 1515 else 1516 size = cs->sc_label.d_partitions[part].p_size; 1517 1518 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1519 return (-1); 1520 1521 return (size); 1522 } 1523 1524 static int 1525 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize) 1526 { 1527 /* Not implemented. */ 1528 return ENXIO; 1529 } 1530 1531 /* 1532 * Lookup the provided name in the filesystem. If the file exists, 1533 * is a valid block device, and isn't being used by anyone else, 1534 * set *vpp to the file's vnode. 1535 */ 1536 static int 1537 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1538 { 1539 struct nlookupdata nd; 1540 struct ucred *cred; 1541 struct vnode *vp; 1542 int error; 1543 1544 KKASSERT(td->td_proc); 1545 cred = td->td_proc->p_ucred; 1546 *vpp = NULL; 1547 1548 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1549 if (error) 1550 return (error); 1551 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1552 #ifdef DEBUG 1553 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1554 printf("ccdlookup: vn_open error = %d\n", error); 1555 #endif 1556 goto done; 1557 } 1558 vp = nd.nl_open_vp; 1559 1560 if (vp->v_usecount > 1) { 1561 error = EBUSY; 1562 goto done; 1563 } 1564 1565 if (!vn_isdisk(vp, &error)) 1566 goto done; 1567 1568 #ifdef DEBUG 1569 if (ccddebug & CCDB_VNODE) 1570 vprint("ccdlookup: vnode info", vp); 1571 #endif 1572 1573 VOP_UNLOCK(vp, 0, td); 1574 nd.nl_open_vp = NULL; 1575 nlookup_done(&nd); 1576 *vpp = vp; /* leave ref intact */ 1577 return (0); 1578 done: 1579 nlookup_done(&nd); 1580 return (error); 1581 } 1582 1583 /* 1584 * Read the disklabel from the ccd. If one is not present, fake one 1585 * up. 1586 */ 1587 static void 1588 ccdgetdisklabel(dev) 1589 dev_t dev; 1590 { 1591 int unit = ccdunit(dev); 1592 struct ccd_softc *cs = &ccd_softc[unit]; 1593 char *errstring; 1594 struct disklabel *lp = &cs->sc_label; 1595 struct ccdgeom *ccg = &cs->sc_geom; 1596 dev_t cdev; 1597 1598 bzero(lp, sizeof(*lp)); 1599 1600 lp->d_secperunit = cs->sc_size; 1601 lp->d_secsize = ccg->ccg_secsize; 1602 lp->d_nsectors = ccg->ccg_nsectors; 1603 lp->d_ntracks = ccg->ccg_ntracks; 1604 lp->d_ncylinders = ccg->ccg_ncylinders; 1605 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1606 1607 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1608 lp->d_type = DTYPE_CCD; 1609 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1610 lp->d_rpm = 3600; 1611 lp->d_interleave = 1; 1612 lp->d_flags = 0; 1613 1614 lp->d_partitions[RAW_PART].p_offset = 0; 1615 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1616 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1617 lp->d_npartitions = RAW_PART + 1; 1618 1619 lp->d_bbsize = BBSIZE; /* XXX */ 1620 lp->d_sbsize = SBSIZE; /* XXX */ 1621 1622 lp->d_magic = DISKMAGIC; 1623 lp->d_magic2 = DISKMAGIC; 1624 lp->d_checksum = dkcksum(&cs->sc_label); 1625 1626 /* 1627 * Call the generic disklabel extraction routine. 1628 */ 1629 cdev = CCDLABELDEV(dev); 1630 errstring = readdisklabel(cdev, &cs->sc_label); 1631 if (errstring != NULL) 1632 ccdmakedisklabel(cs); 1633 1634 #ifdef DEBUG 1635 /* It's actually extremely common to have unlabeled ccds. */ 1636 if (ccddebug & CCDB_LABEL) 1637 if (errstring != NULL) 1638 printf("ccd%d: %s\n", unit, errstring); 1639 #endif 1640 } 1641 1642 /* 1643 * Take care of things one might want to take care of in the event 1644 * that a disklabel isn't present. 1645 */ 1646 static void 1647 ccdmakedisklabel(cs) 1648 struct ccd_softc *cs; 1649 { 1650 struct disklabel *lp = &cs->sc_label; 1651 1652 /* 1653 * For historical reasons, if there's no disklabel present 1654 * the raw partition must be marked FS_BSDFFS. 1655 */ 1656 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1657 1658 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1659 } 1660 1661 /* 1662 * Wait interruptibly for an exclusive lock. 1663 * 1664 * XXX 1665 * Several drivers do this; it should be abstracted and made MP-safe. 1666 */ 1667 static int 1668 ccdlock(cs) 1669 struct ccd_softc *cs; 1670 { 1671 int error; 1672 1673 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1674 cs->sc_flags |= CCDF_WANTED; 1675 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1676 return (error); 1677 } 1678 cs->sc_flags |= CCDF_LOCKED; 1679 return (0); 1680 } 1681 1682 /* 1683 * Unlock and wake up any waiters. 1684 */ 1685 static void 1686 ccdunlock(cs) 1687 struct ccd_softc *cs; 1688 { 1689 1690 cs->sc_flags &= ~CCDF_LOCKED; 1691 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1692 cs->sc_flags &= ~CCDF_WANTED; 1693 wakeup(cs); 1694 } 1695 } 1696 1697 #ifdef DEBUG 1698 static void 1699 printiinfo(ii) 1700 struct ccdiinfo *ii; 1701 { 1702 int ix, i; 1703 1704 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1705 printf(" itab[%d]: #dk %d sblk %d soff %d", 1706 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1707 for (i = 0; i < ii->ii_ndisk; i++) 1708 printf(" %d", ii->ii_index[i]); 1709 printf("\n"); 1710 } 1711 } 1712 #endif 1713 1714 1715 /* Local Variables: */ 1716 /* c-argdecl-indent: 8 */ 1717 /* c-continued-statement-offset: 8 */ 1718 /* c-indent-level: 8 */ 1719 /* End: */ 1720