1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.12 2003/09/23 05:03:40 dillon Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/namei.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <vfs/ufs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/buf2.h> 110 111 #include <sys/ccdvar.h> 112 113 #include <vm/vm_zone.h> 114 115 #if defined(CCDDEBUG) && !defined(DEBUG) 116 #define DEBUG 117 #endif 118 119 #ifdef DEBUG 120 #define CCDB_FOLLOW 0x01 121 #define CCDB_INIT 0x02 122 #define CCDB_IO 0x04 123 #define CCDB_LABEL 0x08 124 #define CCDB_VNODE 0x10 125 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 126 CCDB_VNODE; 127 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 128 #undef DEBUG 129 #endif 130 131 #define ccdunit(x) dkunit(x) 132 #define ccdpart(x) dkpart(x) 133 134 /* 135 This is how mirroring works (only writes are special): 136 137 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 138 linked together by the cb_mirror field. "cb_pflags & 139 CCDPF_MIRROR_DONE" is set to 0 on both of them. 140 141 When a component returns to ccdiodone(), it checks if "cb_pflags & 142 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 143 flag and returns. If it is, it means its partner has already 144 returned, so it will go to the regular cleanup. 145 146 */ 147 148 struct ccdbuf { 149 struct buf cb_buf; /* new I/O buf */ 150 struct buf *cb_obp; /* ptr. to original I/O buf */ 151 struct ccdbuf *cb_freenext; /* free list link */ 152 int cb_unit; /* target unit */ 153 int cb_comp; /* target component */ 154 int cb_pflags; /* mirror/parity status flag */ 155 struct ccdbuf *cb_mirror; /* mirror counterpart */ 156 }; 157 158 /* bits in cb_pflags */ 159 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 160 161 #define CCDLABELDEV(dev) \ 162 (makedev(major((dev)), dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 163 164 static d_open_t ccdopen; 165 static d_close_t ccdclose; 166 static d_strategy_t ccdstrategy; 167 static d_ioctl_t ccdioctl; 168 static d_dump_t ccddump; 169 static d_psize_t ccdsize; 170 171 #define NCCDFREEHIWAT 16 172 173 #define CDEV_MAJOR 74 174 175 static struct cdevsw ccd_cdevsw = { 176 /* name */ "ccd", 177 /* maj */ CDEV_MAJOR, 178 /* flags */ D_DISK, 179 /* port */ NULL, 180 /* autoq */ 0, 181 182 /* open */ ccdopen, 183 /* close */ ccdclose, 184 /* read */ physread, 185 /* write */ physwrite, 186 /* ioctl */ ccdioctl, 187 /* poll */ nopoll, 188 /* mmap */ nommap, 189 /* strategy */ ccdstrategy, 190 /* dump */ ccddump, 191 /* psize */ ccdsize 192 }; 193 194 /* called during module initialization */ 195 static void ccdattach (void); 196 static int ccd_modevent (module_t, int, void *); 197 198 /* called by biodone() at interrupt time */ 199 static void ccdiodone (struct ccdbuf *cbp); 200 201 static void ccdstart (struct ccd_softc *, struct buf *); 202 static void ccdinterleave (struct ccd_softc *, int); 203 static void ccdintr (struct ccd_softc *, struct buf *); 204 static int ccdinit (struct ccddevice *, char **, struct thread *); 205 static int ccdlookup (char *, struct thread *td, struct vnode **); 206 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 207 struct buf *, daddr_t, caddr_t, long); 208 static void ccdgetdisklabel (dev_t); 209 static void ccdmakedisklabel (struct ccd_softc *); 210 static int ccdlock (struct ccd_softc *); 211 static void ccdunlock (struct ccd_softc *); 212 213 #ifdef DEBUG 214 static void printiinfo (struct ccdiinfo *); 215 #endif 216 217 /* Non-private for the benefit of libkvm. */ 218 struct ccd_softc *ccd_softc; 219 struct ccddevice *ccddevs; 220 struct ccdbuf *ccdfreebufs; 221 static int numccdfreebufs; 222 static int numccd = 0; 223 224 /* 225 * getccdbuf() - Allocate and zero a ccd buffer. 226 * 227 * This routine is called at splbio(). 228 */ 229 230 static __inline 231 struct ccdbuf * 232 getccdbuf(struct ccdbuf *cpy) 233 { 234 struct ccdbuf *cbp; 235 236 /* 237 * Allocate from freelist or malloc as necessary 238 */ 239 if ((cbp = ccdfreebufs) != NULL) { 240 ccdfreebufs = cbp->cb_freenext; 241 --numccdfreebufs; 242 } else { 243 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 244 } 245 246 /* 247 * Used by mirroring code 248 */ 249 if (cpy) 250 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 251 else 252 bzero(cbp, sizeof(struct ccdbuf)); 253 254 /* 255 * independant struct buf initialization 256 */ 257 LIST_INIT(&cbp->cb_buf.b_dep); 258 BUF_LOCKINIT(&cbp->cb_buf); 259 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 260 BUF_KERNPROC(&cbp->cb_buf); 261 262 return(cbp); 263 } 264 265 /* 266 * putccdbuf() - Free a ccd buffer. 267 * 268 * This routine is called at splbio(). 269 */ 270 271 static __inline 272 void 273 putccdbuf(struct ccdbuf *cbp) 274 { 275 BUF_UNLOCK(&cbp->cb_buf); 276 BUF_LOCKFREE(&cbp->cb_buf); 277 278 if (numccdfreebufs < NCCDFREEHIWAT) { 279 cbp->cb_freenext = ccdfreebufs; 280 ccdfreebufs = cbp; 281 ++numccdfreebufs; 282 } else { 283 free((caddr_t)cbp, M_DEVBUF); 284 } 285 } 286 287 288 /* 289 * Number of blocks to untouched in front of a component partition. 290 * This is to avoid violating its disklabel area when it starts at the 291 * beginning of the slice. 292 */ 293 #if !defined(CCD_OFFSET) 294 #define CCD_OFFSET 16 295 #endif 296 297 /* 298 * Called by main() during pseudo-device attachment. All we need 299 * to do is allocate enough space for devices to be configured later, and 300 * add devsw entries. 301 */ 302 static void 303 ccdattach() 304 { 305 int i; 306 int num = NCCD; 307 308 if (num > 1) 309 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 310 else 311 printf("ccd0: Concatenated disk driver\n"); 312 313 ccd_softc = (struct ccd_softc *)malloc(num * sizeof(struct ccd_softc), 314 M_DEVBUF, M_NOWAIT); 315 ccddevs = (struct ccddevice *)malloc(num * sizeof(struct ccddevice), 316 M_DEVBUF, M_NOWAIT); 317 if ((ccd_softc == NULL) || (ccddevs == NULL)) { 318 printf("WARNING: no memory for concatenated disks\n"); 319 if (ccd_softc != NULL) 320 free(ccd_softc, M_DEVBUF); 321 if (ccddevs != NULL) 322 free(ccddevs, M_DEVBUF); 323 return; 324 } 325 numccd = num; 326 bzero(ccd_softc, num * sizeof(struct ccd_softc)); 327 bzero(ccddevs, num * sizeof(struct ccddevice)); 328 329 cdevsw_add(&ccd_cdevsw); 330 /* XXX: is this necessary? */ 331 for (i = 0; i < numccd; ++i) 332 ccddevs[i].ccd_dk = -1; 333 } 334 335 static int 336 ccd_modevent(mod, type, data) 337 module_t mod; 338 int type; 339 void *data; 340 { 341 int error = 0; 342 343 switch (type) { 344 case MOD_LOAD: 345 ccdattach(); 346 break; 347 348 case MOD_UNLOAD: 349 printf("ccd0: Unload not supported!\n"); 350 error = EOPNOTSUPP; 351 break; 352 353 default: /* MOD_SHUTDOWN etc */ 354 break; 355 } 356 return (error); 357 } 358 359 DEV_MODULE(ccd, ccd_modevent, NULL); 360 361 static int 362 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 363 { 364 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 365 struct ccdcinfo *ci = NULL; /* XXX */ 366 size_t size; 367 int ix; 368 struct vnode *vp; 369 size_t minsize; 370 int maxsecsize; 371 struct partinfo dpart; 372 struct ccdgeom *ccg = &cs->sc_geom; 373 char tmppath[MAXPATHLEN]; 374 int error = 0; 375 struct ucred *cred; 376 377 KKASSERT(td->td_proc); 378 cred = td->td_proc->p_ucred; 379 380 #ifdef DEBUG 381 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 382 printf("ccdinit: unit %d\n", ccd->ccd_unit); 383 #endif 384 385 cs->sc_size = 0; 386 cs->sc_ileave = ccd->ccd_interleave; 387 cs->sc_nccdisks = ccd->ccd_ndev; 388 389 /* Allocate space for the component info. */ 390 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 391 M_DEVBUF, M_WAITOK); 392 393 /* 394 * Verify that each component piece exists and record 395 * relevant information about it. 396 */ 397 maxsecsize = 0; 398 minsize = 0; 399 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 400 vp = ccd->ccd_vpp[ix]; 401 ci = &cs->sc_cinfo[ix]; 402 ci->ci_vp = vp; 403 404 /* 405 * Copy in the pathname of the component. 406 */ 407 bzero(tmppath, sizeof(tmppath)); /* sanity */ 408 if ((error = copyinstr(cpaths[ix], tmppath, 409 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 410 #ifdef DEBUG 411 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 412 printf("ccd%d: can't copy path, error = %d\n", 413 ccd->ccd_unit, error); 414 #endif 415 goto fail; 416 } 417 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 418 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 419 420 ci->ci_dev = vn_todev(vp); 421 422 /* 423 * Get partition information for the component. 424 */ 425 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 426 FREAD, cred, td)) != 0) { 427 #ifdef DEBUG 428 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 429 printf("ccd%d: %s: ioctl failed, error = %d\n", 430 ccd->ccd_unit, ci->ci_path, error); 431 #endif 432 goto fail; 433 } 434 if (dpart.part->p_fstype == FS_BSDFFS) { 435 maxsecsize = 436 ((dpart.disklab->d_secsize > maxsecsize) ? 437 dpart.disklab->d_secsize : maxsecsize); 438 size = dpart.part->p_size - CCD_OFFSET; 439 } else { 440 #ifdef DEBUG 441 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 442 printf("ccd%d: %s: incorrect partition type\n", 443 ccd->ccd_unit, ci->ci_path); 444 #endif 445 error = EFTYPE; 446 goto fail; 447 } 448 449 /* 450 * Calculate the size, truncating to an interleave 451 * boundary if necessary. 452 */ 453 454 if (cs->sc_ileave > 1) 455 size -= size % cs->sc_ileave; 456 457 if (size == 0) { 458 #ifdef DEBUG 459 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 460 printf("ccd%d: %s: size == 0\n", 461 ccd->ccd_unit, ci->ci_path); 462 #endif 463 error = ENODEV; 464 goto fail; 465 } 466 467 if (minsize == 0 || size < minsize) 468 minsize = size; 469 ci->ci_size = size; 470 cs->sc_size += size; 471 } 472 473 /* 474 * Don't allow the interleave to be smaller than 475 * the biggest component sector. 476 */ 477 if ((cs->sc_ileave > 0) && 478 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 479 #ifdef DEBUG 480 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 481 printf("ccd%d: interleave must be at least %d\n", 482 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 483 #endif 484 error = EINVAL; 485 goto fail; 486 } 487 488 /* 489 * If uniform interleave is desired set all sizes to that of 490 * the smallest component. This will guarentee that a single 491 * interleave table is generated. 492 * 493 * Lost space must be taken into account when calculating the 494 * overall size. Half the space is lost when CCDF_MIRROR is 495 * specified. One disk is lost when CCDF_PARITY is specified. 496 */ 497 if (ccd->ccd_flags & CCDF_UNIFORM) { 498 for (ci = cs->sc_cinfo; 499 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 500 ci->ci_size = minsize; 501 } 502 if (ccd->ccd_flags & CCDF_MIRROR) { 503 /* 504 * Check to see if an even number of components 505 * have been specified. The interleave must also 506 * be non-zero in order for us to be able to 507 * guarentee the topology. 508 */ 509 if (cs->sc_nccdisks % 2) { 510 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 511 error = EINVAL; 512 goto fail; 513 } 514 if (cs->sc_ileave == 0) { 515 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 516 error = EINVAL; 517 goto fail; 518 } 519 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 520 } else if (ccd->ccd_flags & CCDF_PARITY) { 521 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 522 } else { 523 if (cs->sc_ileave == 0) { 524 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 525 error = EINVAL; 526 goto fail; 527 } 528 cs->sc_size = cs->sc_nccdisks * minsize; 529 } 530 } 531 532 /* 533 * Construct the interleave table. 534 */ 535 ccdinterleave(cs, ccd->ccd_unit); 536 537 /* 538 * Create pseudo-geometry based on 1MB cylinders. It's 539 * pretty close. 540 */ 541 ccg->ccg_secsize = maxsecsize; 542 ccg->ccg_ntracks = 1; 543 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 544 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 545 546 /* 547 * Add an devstat entry for this device. 548 */ 549 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 550 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 551 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 552 DEVSTAT_PRIORITY_ARRAY); 553 554 cs->sc_flags |= CCDF_INITED; 555 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 556 cs->sc_unit = ccd->ccd_unit; 557 return (0); 558 fail: 559 while (ci > cs->sc_cinfo) { 560 ci--; 561 free(ci->ci_path, M_DEVBUF); 562 } 563 free(cs->sc_cinfo, M_DEVBUF); 564 return (error); 565 } 566 567 static void 568 ccdinterleave(cs, unit) 569 struct ccd_softc *cs; 570 int unit; 571 { 572 struct ccdcinfo *ci, *smallci; 573 struct ccdiinfo *ii; 574 daddr_t bn, lbn; 575 int ix; 576 u_long size; 577 578 #ifdef DEBUG 579 if (ccddebug & CCDB_INIT) 580 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 581 #endif 582 583 /* 584 * Allocate an interleave table. The worst case occurs when each 585 * of N disks is of a different size, resulting in N interleave 586 * tables. 587 * 588 * Chances are this is too big, but we don't care. 589 */ 590 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 591 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 592 bzero((caddr_t)cs->sc_itable, size); 593 594 /* 595 * Trivial case: no interleave (actually interleave of disk size). 596 * Each table entry represents a single component in its entirety. 597 * 598 * An interleave of 0 may not be used with a mirror or parity setup. 599 */ 600 if (cs->sc_ileave == 0) { 601 bn = 0; 602 ii = cs->sc_itable; 603 604 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 605 /* Allocate space for ii_index. */ 606 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 607 ii->ii_ndisk = 1; 608 ii->ii_startblk = bn; 609 ii->ii_startoff = 0; 610 ii->ii_index[0] = ix; 611 bn += cs->sc_cinfo[ix].ci_size; 612 ii++; 613 } 614 ii->ii_ndisk = 0; 615 #ifdef DEBUG 616 if (ccddebug & CCDB_INIT) 617 printiinfo(cs->sc_itable); 618 #endif 619 return; 620 } 621 622 /* 623 * The following isn't fast or pretty; it doesn't have to be. 624 */ 625 size = 0; 626 bn = lbn = 0; 627 for (ii = cs->sc_itable; ; ii++) { 628 /* 629 * Allocate space for ii_index. We might allocate more then 630 * we use. 631 */ 632 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 633 M_DEVBUF, M_WAITOK); 634 635 /* 636 * Locate the smallest of the remaining components 637 */ 638 smallci = NULL; 639 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 640 ci++) { 641 if (ci->ci_size > size && 642 (smallci == NULL || 643 ci->ci_size < smallci->ci_size)) { 644 smallci = ci; 645 } 646 } 647 648 /* 649 * Nobody left, all done 650 */ 651 if (smallci == NULL) { 652 ii->ii_ndisk = 0; 653 break; 654 } 655 656 /* 657 * Record starting logical block using an sc_ileave blocksize. 658 */ 659 ii->ii_startblk = bn / cs->sc_ileave; 660 661 /* 662 * Record starting comopnent block using an sc_ileave 663 * blocksize. This value is relative to the beginning of 664 * a component disk. 665 */ 666 ii->ii_startoff = lbn; 667 668 /* 669 * Determine how many disks take part in this interleave 670 * and record their indices. 671 */ 672 ix = 0; 673 for (ci = cs->sc_cinfo; 674 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 675 if (ci->ci_size >= smallci->ci_size) { 676 ii->ii_index[ix++] = ci - cs->sc_cinfo; 677 } 678 } 679 ii->ii_ndisk = ix; 680 bn += ix * (smallci->ci_size - size); 681 lbn = smallci->ci_size / cs->sc_ileave; 682 size = smallci->ci_size; 683 } 684 #ifdef DEBUG 685 if (ccddebug & CCDB_INIT) 686 printiinfo(cs->sc_itable); 687 #endif 688 } 689 690 /* ARGSUSED */ 691 static int 692 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 693 { 694 int unit = ccdunit(dev); 695 struct ccd_softc *cs; 696 struct disklabel *lp; 697 int error = 0, part, pmask; 698 699 #ifdef DEBUG 700 if (ccddebug & CCDB_FOLLOW) 701 printf("ccdopen(%x, %x)\n", dev, flags); 702 #endif 703 if (unit >= numccd) 704 return (ENXIO); 705 cs = &ccd_softc[unit]; 706 707 if ((error = ccdlock(cs)) != 0) 708 return (error); 709 710 lp = &cs->sc_label; 711 712 part = ccdpart(dev); 713 pmask = (1 << part); 714 715 /* 716 * If we're initialized, check to see if there are any other 717 * open partitions. If not, then it's safe to update 718 * the in-core disklabel. 719 */ 720 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 721 ccdgetdisklabel(dev); 722 723 /* Check that the partition exists. */ 724 if (part != RAW_PART && ((part >= lp->d_npartitions) || 725 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 726 error = ENXIO; 727 goto done; 728 } 729 730 cs->sc_openmask |= pmask; 731 done: 732 ccdunlock(cs); 733 return (0); 734 } 735 736 /* ARGSUSED */ 737 static int 738 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 739 { 740 int unit = ccdunit(dev); 741 struct ccd_softc *cs; 742 int error = 0, part; 743 744 #ifdef DEBUG 745 if (ccddebug & CCDB_FOLLOW) 746 printf("ccdclose(%x, %x)\n", dev, flags); 747 #endif 748 749 if (unit >= numccd) 750 return (ENXIO); 751 cs = &ccd_softc[unit]; 752 753 if ((error = ccdlock(cs)) != 0) 754 return (error); 755 756 part = ccdpart(dev); 757 758 /* ...that much closer to allowing unconfiguration... */ 759 cs->sc_openmask &= ~(1 << part); 760 ccdunlock(cs); 761 return (0); 762 } 763 764 static void 765 ccdstrategy(bp) 766 struct buf *bp; 767 { 768 int unit = ccdunit(bp->b_dev); 769 struct ccd_softc *cs = &ccd_softc[unit]; 770 int s; 771 int wlabel; 772 struct disklabel *lp; 773 774 #ifdef DEBUG 775 if (ccddebug & CCDB_FOLLOW) 776 printf("ccdstrategy(%x): unit %d\n", bp, unit); 777 #endif 778 if ((cs->sc_flags & CCDF_INITED) == 0) { 779 bp->b_error = ENXIO; 780 bp->b_flags |= B_ERROR; 781 goto done; 782 } 783 784 /* If it's a nil transfer, wake up the top half now. */ 785 if (bp->b_bcount == 0) 786 goto done; 787 788 lp = &cs->sc_label; 789 790 /* 791 * Do bounds checking and adjust transfer. If there's an 792 * error, the bounds check will flag that for us. 793 */ 794 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 795 if (ccdpart(bp->b_dev) != RAW_PART) { 796 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 797 goto done; 798 } else { 799 int pbn; /* in sc_secsize chunks */ 800 long sz; /* in sc_secsize chunks */ 801 802 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 803 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 804 805 /* 806 * If out of bounds return an error. If at the EOF point, 807 * simply read or write less. 808 */ 809 810 if (pbn < 0 || pbn >= cs->sc_size) { 811 bp->b_resid = bp->b_bcount; 812 if (pbn != cs->sc_size) { 813 bp->b_error = EINVAL; 814 bp->b_flags |= B_ERROR | B_INVAL; 815 } 816 goto done; 817 } 818 819 /* 820 * If the request crosses EOF, truncate the request. 821 */ 822 if (pbn + sz > cs->sc_size) { 823 bp->b_bcount = (cs->sc_size - pbn) * 824 cs->sc_geom.ccg_secsize; 825 } 826 } 827 828 bp->b_resid = bp->b_bcount; 829 830 /* 831 * "Start" the unit. 832 */ 833 s = splbio(); 834 ccdstart(cs, bp); 835 splx(s); 836 return; 837 done: 838 biodone(bp); 839 } 840 841 static void 842 ccdstart(cs, bp) 843 struct ccd_softc *cs; 844 struct buf *bp; 845 { 846 long bcount, rcount; 847 struct ccdbuf *cbp[4]; 848 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 849 caddr_t addr; 850 daddr_t bn; 851 struct partition *pp; 852 853 #ifdef DEBUG 854 if (ccddebug & CCDB_FOLLOW) 855 printf("ccdstart(%x, %x)\n", cs, bp); 856 #endif 857 858 /* Record the transaction start */ 859 devstat_start_transaction(&cs->device_stats); 860 861 /* 862 * Translate the partition-relative block number to an absolute. 863 */ 864 bn = bp->b_blkno; 865 if (ccdpart(bp->b_dev) != RAW_PART) { 866 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)]; 867 bn += pp->p_offset; 868 } 869 870 /* 871 * Allocate component buffers and fire off the requests 872 */ 873 addr = bp->b_data; 874 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 875 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 876 rcount = cbp[0]->cb_buf.b_bcount; 877 878 if (cs->sc_cflags & CCDF_MIRROR) { 879 /* 880 * Mirroring. Writes go to both disks, reads are 881 * taken from whichever disk seems most appropriate. 882 * 883 * We attempt to localize reads to the disk whos arm 884 * is nearest the read request. We ignore seeks due 885 * to writes when making this determination and we 886 * also try to avoid hogging. 887 */ 888 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 889 cbp[0]->cb_buf.b_vp->v_numoutput++; 890 cbp[1]->cb_buf.b_vp->v_numoutput++; 891 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, 892 &cbp[0]->cb_buf); 893 VOP_STRATEGY(cbp[1]->cb_buf.b_vp, 894 &cbp[1]->cb_buf); 895 } else { 896 int pick = cs->sc_pick; 897 daddr_t range = cs->sc_size / 16; 898 899 if (bn < cs->sc_blk[pick] - range || 900 bn > cs->sc_blk[pick] + range 901 ) { 902 cs->sc_pick = pick = 1 - pick; 903 } 904 cs->sc_blk[pick] = bn + btodb(rcount); 905 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp, 906 &cbp[pick]->cb_buf); 907 } 908 } else { 909 /* 910 * Not mirroring 911 */ 912 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) 913 cbp[0]->cb_buf.b_vp->v_numoutput++; 914 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf); 915 } 916 bn += btodb(rcount); 917 addr += rcount; 918 } 919 } 920 921 /* 922 * Build a component buffer header. 923 */ 924 static void 925 ccdbuffer(cb, cs, bp, bn, addr, bcount) 926 struct ccdbuf **cb; 927 struct ccd_softc *cs; 928 struct buf *bp; 929 daddr_t bn; 930 caddr_t addr; 931 long bcount; 932 { 933 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 934 struct ccdbuf *cbp; 935 daddr_t cbn, cboff; 936 off_t cbc; 937 938 #ifdef DEBUG 939 if (ccddebug & CCDB_IO) 940 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 941 cs, bp, bn, addr, bcount); 942 #endif 943 /* 944 * Determine which component bn falls in. 945 */ 946 cbn = bn; 947 cboff = 0; 948 949 if (cs->sc_ileave == 0) { 950 /* 951 * Serially concatenated and neither a mirror nor a parity 952 * config. This is a special case. 953 */ 954 daddr_t sblk; 955 956 sblk = 0; 957 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 958 sblk += ci->ci_size; 959 cbn -= sblk; 960 } else { 961 struct ccdiinfo *ii; 962 int ccdisk, off; 963 964 /* 965 * Calculate cbn, the logical superblock (sc_ileave chunks), 966 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 967 * to cbn. 968 */ 969 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 970 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 971 972 /* 973 * Figure out which interleave table to use. 974 */ 975 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 976 if (ii->ii_startblk > cbn) 977 break; 978 } 979 ii--; 980 981 /* 982 * off is the logical superblock relative to the beginning 983 * of this interleave block. 984 */ 985 off = cbn - ii->ii_startblk; 986 987 /* 988 * We must calculate which disk component to use (ccdisk), 989 * and recalculate cbn to be the superblock relative to 990 * the beginning of the component. This is typically done by 991 * adding 'off' and ii->ii_startoff together. However, 'off' 992 * must typically be divided by the number of components in 993 * this interleave array to be properly convert it from a 994 * CCD-relative logical superblock number to a 995 * component-relative superblock number. 996 */ 997 if (ii->ii_ndisk == 1) { 998 /* 999 * When we have just one disk, it can't be a mirror 1000 * or a parity config. 1001 */ 1002 ccdisk = ii->ii_index[0]; 1003 cbn = ii->ii_startoff + off; 1004 } else { 1005 if (cs->sc_cflags & CCDF_MIRROR) { 1006 /* 1007 * We have forced a uniform mapping, resulting 1008 * in a single interleave array. We double 1009 * up on the first half of the available 1010 * components and our mirror is in the second 1011 * half. This only works with a single 1012 * interleave array because doubling up 1013 * doubles the number of sectors, so there 1014 * cannot be another interleave array because 1015 * the next interleave array's calculations 1016 * would be off. 1017 */ 1018 int ndisk2 = ii->ii_ndisk / 2; 1019 ccdisk = ii->ii_index[off % ndisk2]; 1020 cbn = ii->ii_startoff + off / ndisk2; 1021 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1022 } else if (cs->sc_cflags & CCDF_PARITY) { 1023 /* 1024 * XXX not implemented yet 1025 */ 1026 int ndisk2 = ii->ii_ndisk - 1; 1027 ccdisk = ii->ii_index[off % ndisk2]; 1028 cbn = ii->ii_startoff + off / ndisk2; 1029 if (cbn % ii->ii_ndisk <= ccdisk) 1030 ccdisk++; 1031 } else { 1032 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1033 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1034 } 1035 } 1036 1037 ci = &cs->sc_cinfo[ccdisk]; 1038 1039 /* 1040 * Convert cbn from a superblock to a normal block so it 1041 * can be used to calculate (along with cboff) the normal 1042 * block index into this particular disk. 1043 */ 1044 cbn *= cs->sc_ileave; 1045 } 1046 1047 /* 1048 * Fill in the component buf structure. 1049 */ 1050 cbp = getccdbuf(NULL); 1051 cbp->cb_buf.b_flags = bp->b_flags | B_CALL; 1052 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone; 1053 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */ 1054 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET; 1055 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET); 1056 cbp->cb_buf.b_data = addr; 1057 cbp->cb_buf.b_vp = ci->ci_vp; 1058 if (cs->sc_ileave == 0) 1059 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1060 else 1061 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1062 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1063 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1064 1065 /* 1066 * context for ccdiodone 1067 */ 1068 cbp->cb_obp = bp; 1069 cbp->cb_unit = cs - ccd_softc; 1070 cbp->cb_comp = ci - cs->sc_cinfo; 1071 1072 #ifdef DEBUG 1073 if (ccddebug & CCDB_IO) 1074 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1075 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno, 1076 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1077 #endif 1078 cb[0] = cbp; 1079 1080 /* 1081 * Note: both I/O's setup when reading from mirror, but only one 1082 * will be executed. 1083 */ 1084 if (cs->sc_cflags & CCDF_MIRROR) { 1085 /* mirror, setup second I/O */ 1086 cbp = getccdbuf(cb[0]); 1087 cbp->cb_buf.b_dev = ci2->ci_dev; 1088 cbp->cb_buf.b_vp = ci2->ci_vp; 1089 cbp->cb_comp = ci2 - cs->sc_cinfo; 1090 cb[1] = cbp; 1091 /* link together the ccdbuf's and clear "mirror done" flag */ 1092 cb[0]->cb_mirror = cb[1]; 1093 cb[1]->cb_mirror = cb[0]; 1094 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1095 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1096 } 1097 } 1098 1099 static void 1100 ccdintr(cs, bp) 1101 struct ccd_softc *cs; 1102 struct buf *bp; 1103 { 1104 #ifdef DEBUG 1105 if (ccddebug & CCDB_FOLLOW) 1106 printf("ccdintr(%x, %x)\n", cs, bp); 1107 #endif 1108 /* 1109 * Request is done for better or worse, wakeup the top half. 1110 */ 1111 if (bp->b_flags & B_ERROR) 1112 bp->b_resid = bp->b_bcount; 1113 devstat_end_transaction_buf(&cs->device_stats, bp); 1114 biodone(bp); 1115 } 1116 1117 /* 1118 * Called at interrupt time. 1119 * Mark the component as done and if all components are done, 1120 * take a ccd interrupt. 1121 */ 1122 static void 1123 ccdiodone(cbp) 1124 struct ccdbuf *cbp; 1125 { 1126 struct buf *bp = cbp->cb_obp; 1127 int unit = cbp->cb_unit; 1128 int count, s; 1129 1130 s = splbio(); 1131 #ifdef DEBUG 1132 if (ccddebug & CCDB_FOLLOW) 1133 printf("ccdiodone(%x)\n", cbp); 1134 if (ccddebug & CCDB_IO) { 1135 printf("ccdiodone: bp %x bcount %d resid %d\n", 1136 bp, bp->b_bcount, bp->b_resid); 1137 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1138 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1139 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 1140 cbp->cb_buf.b_bcount); 1141 } 1142 #endif 1143 /* 1144 * If an error occured, report it. If this is a mirrored 1145 * configuration and the first of two possible reads, do not 1146 * set the error in the bp yet because the second read may 1147 * succeed. 1148 */ 1149 1150 if (cbp->cb_buf.b_flags & B_ERROR) { 1151 const char *msg = ""; 1152 1153 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1154 (cbp->cb_buf.b_flags & B_READ) && 1155 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1156 /* 1157 * We will try our read on the other disk down 1158 * below, also reverse the default pick so if we 1159 * are doing a scan we do not keep hitting the 1160 * bad disk first. 1161 */ 1162 struct ccd_softc *cs = &ccd_softc[unit]; 1163 1164 msg = ", trying other disk"; 1165 cs->sc_pick = 1 - cs->sc_pick; 1166 cs->sc_blk[cs->sc_pick] = bp->b_blkno; 1167 } else { 1168 bp->b_flags |= B_ERROR; 1169 bp->b_error = cbp->cb_buf.b_error ? 1170 cbp->cb_buf.b_error : EIO; 1171 } 1172 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1173 unit, bp->b_error, cbp->cb_comp, 1174 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg); 1175 } 1176 1177 /* 1178 * Process mirror. If we are writing, I/O has been initiated on both 1179 * buffers and we fall through only after both are finished. 1180 * 1181 * If we are reading only one I/O is initiated at a time. If an 1182 * error occurs we initiate the second I/O and return, otherwise 1183 * we free the second I/O without initiating it. 1184 */ 1185 1186 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1187 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1188 /* 1189 * When writing, handshake with the second buffer 1190 * to determine when both are done. If both are not 1191 * done, return here. 1192 */ 1193 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1194 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1195 putccdbuf(cbp); 1196 splx(s); 1197 return; 1198 } 1199 } else { 1200 /* 1201 * When reading, either dispose of the second buffer 1202 * or initiate I/O on the second buffer if an error 1203 * occured with this one. 1204 */ 1205 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1206 if (cbp->cb_buf.b_flags & B_ERROR) { 1207 cbp->cb_mirror->cb_pflags |= 1208 CCDPF_MIRROR_DONE; 1209 VOP_STRATEGY( 1210 cbp->cb_mirror->cb_buf.b_vp, 1211 &cbp->cb_mirror->cb_buf 1212 ); 1213 putccdbuf(cbp); 1214 splx(s); 1215 return; 1216 } else { 1217 putccdbuf(cbp->cb_mirror); 1218 /* fall through */ 1219 } 1220 } 1221 } 1222 } 1223 1224 /* 1225 * use b_bufsize to determine how big the original request was rather 1226 * then b_bcount, because b_bcount may have been truncated for EOF. 1227 * 1228 * XXX We check for an error, but we do not test the resid for an 1229 * aligned EOF condition. This may result in character & block 1230 * device access not recognizing EOF properly when read or written 1231 * sequentially, but will not effect filesystems. 1232 */ 1233 count = cbp->cb_buf.b_bufsize; 1234 putccdbuf(cbp); 1235 1236 /* 1237 * If all done, "interrupt". 1238 */ 1239 bp->b_resid -= count; 1240 if (bp->b_resid < 0) 1241 panic("ccdiodone: count"); 1242 if (bp->b_resid == 0) 1243 ccdintr(&ccd_softc[unit], bp); 1244 splx(s); 1245 } 1246 1247 static int 1248 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1249 { 1250 int unit = ccdunit(dev); 1251 int i, j, lookedup = 0, error = 0; 1252 int part, pmask, s; 1253 struct ccd_softc *cs; 1254 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1255 struct ccddevice ccd; 1256 char **cpp; 1257 struct vnode **vpp; 1258 struct ucred *cred; 1259 1260 KKASSERT(td->td_proc != NULL); 1261 cred = td->td_proc->p_ucred; 1262 1263 if (unit >= numccd) 1264 return (ENXIO); 1265 cs = &ccd_softc[unit]; 1266 1267 bzero(&ccd, sizeof(ccd)); 1268 1269 switch (cmd) { 1270 case CCDIOCSET: 1271 if (cs->sc_flags & CCDF_INITED) 1272 return (EBUSY); 1273 1274 if ((flag & FWRITE) == 0) 1275 return (EBADF); 1276 1277 if ((error = ccdlock(cs)) != 0) 1278 return (error); 1279 1280 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1281 return (EINVAL); 1282 1283 /* Fill in some important bits. */ 1284 ccd.ccd_unit = unit; 1285 ccd.ccd_interleave = ccio->ccio_ileave; 1286 if (ccd.ccd_interleave == 0 && 1287 ((ccio->ccio_flags & CCDF_MIRROR) || 1288 (ccio->ccio_flags & CCDF_PARITY))) { 1289 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1290 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1291 } 1292 if ((ccio->ccio_flags & CCDF_MIRROR) && 1293 (ccio->ccio_flags & CCDF_PARITY)) { 1294 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1295 ccio->ccio_flags &= ~CCDF_PARITY; 1296 } 1297 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1298 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1299 printf("ccd%d: mirror/parity forces uniform flag\n", 1300 unit); 1301 ccio->ccio_flags |= CCDF_UNIFORM; 1302 } 1303 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1304 1305 /* 1306 * Allocate space for and copy in the array of 1307 * componet pathnames and device numbers. 1308 */ 1309 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1310 M_DEVBUF, M_WAITOK); 1311 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1312 M_DEVBUF, M_WAITOK); 1313 1314 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1315 ccio->ccio_ndisks * sizeof(char **)); 1316 if (error) { 1317 free(vpp, M_DEVBUF); 1318 free(cpp, M_DEVBUF); 1319 ccdunlock(cs); 1320 return (error); 1321 } 1322 1323 #ifdef DEBUG 1324 if (ccddebug & CCDB_INIT) 1325 for (i = 0; i < ccio->ccio_ndisks; ++i) 1326 printf("ccdioctl: component %d: 0x%x\n", 1327 i, cpp[i]); 1328 #endif 1329 1330 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1331 #ifdef DEBUG 1332 if (ccddebug & CCDB_INIT) 1333 printf("ccdioctl: lookedup = %d\n", lookedup); 1334 #endif 1335 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1336 for (j = 0; j < lookedup; ++j) 1337 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1338 free(vpp, M_DEVBUF); 1339 free(cpp, M_DEVBUF); 1340 ccdunlock(cs); 1341 return (error); 1342 } 1343 ++lookedup; 1344 } 1345 ccd.ccd_cpp = cpp; 1346 ccd.ccd_vpp = vpp; 1347 ccd.ccd_ndev = ccio->ccio_ndisks; 1348 1349 /* 1350 * Initialize the ccd. Fills in the softc for us. 1351 */ 1352 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1353 for (j = 0; j < lookedup; ++j) 1354 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1355 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1356 free(vpp, M_DEVBUF); 1357 free(cpp, M_DEVBUF); 1358 ccdunlock(cs); 1359 return (error); 1360 } 1361 1362 /* 1363 * The ccd has been successfully initialized, so 1364 * we can place it into the array and read the disklabel. 1365 */ 1366 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1367 ccio->ccio_unit = unit; 1368 ccio->ccio_size = cs->sc_size; 1369 ccdgetdisklabel(dev); 1370 1371 ccdunlock(cs); 1372 1373 break; 1374 1375 case CCDIOCCLR: 1376 if ((cs->sc_flags & CCDF_INITED) == 0) 1377 return (ENXIO); 1378 1379 if ((flag & FWRITE) == 0) 1380 return (EBADF); 1381 1382 if ((error = ccdlock(cs)) != 0) 1383 return (error); 1384 1385 /* Don't unconfigure if any other partitions are open */ 1386 part = ccdpart(dev); 1387 pmask = (1 << part); 1388 if ((cs->sc_openmask & ~pmask)) { 1389 ccdunlock(cs); 1390 return (EBUSY); 1391 } 1392 1393 /* 1394 * Free ccd_softc information and clear entry. 1395 */ 1396 1397 /* Close the components and free their pathnames. */ 1398 for (i = 0; i < cs->sc_nccdisks; ++i) { 1399 /* 1400 * XXX: this close could potentially fail and 1401 * cause Bad Things. Maybe we need to force 1402 * the close to happen? 1403 */ 1404 #ifdef DEBUG 1405 if (ccddebug & CCDB_VNODE) 1406 vprint("CCDIOCCLR: vnode info", 1407 cs->sc_cinfo[i].ci_vp); 1408 #endif 1409 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td); 1410 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1411 } 1412 1413 /* Free interleave index. */ 1414 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1415 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1416 1417 /* Free component info and interleave table. */ 1418 free(cs->sc_cinfo, M_DEVBUF); 1419 free(cs->sc_itable, M_DEVBUF); 1420 cs->sc_flags &= ~CCDF_INITED; 1421 1422 /* 1423 * Free ccddevice information and clear entry. 1424 */ 1425 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1426 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1427 ccd.ccd_dk = -1; 1428 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1429 1430 /* 1431 * And remove the devstat entry. 1432 */ 1433 devstat_remove_entry(&cs->device_stats); 1434 1435 /* This must be atomic. */ 1436 s = splhigh(); 1437 ccdunlock(cs); 1438 bzero(cs, sizeof(struct ccd_softc)); 1439 splx(s); 1440 1441 break; 1442 1443 case DIOCGDINFO: 1444 if ((cs->sc_flags & CCDF_INITED) == 0) 1445 return (ENXIO); 1446 1447 *(struct disklabel *)data = cs->sc_label; 1448 break; 1449 1450 case DIOCGPART: 1451 if ((cs->sc_flags & CCDF_INITED) == 0) 1452 return (ENXIO); 1453 1454 ((struct partinfo *)data)->disklab = &cs->sc_label; 1455 ((struct partinfo *)data)->part = 1456 &cs->sc_label.d_partitions[ccdpart(dev)]; 1457 break; 1458 1459 case DIOCWDINFO: 1460 case DIOCSDINFO: 1461 if ((cs->sc_flags & CCDF_INITED) == 0) 1462 return (ENXIO); 1463 1464 if ((flag & FWRITE) == 0) 1465 return (EBADF); 1466 1467 if ((error = ccdlock(cs)) != 0) 1468 return (error); 1469 1470 cs->sc_flags |= CCDF_LABELLING; 1471 1472 error = setdisklabel(&cs->sc_label, 1473 (struct disklabel *)data, 0); 1474 if (error == 0) { 1475 if (cmd == DIOCWDINFO) 1476 error = writedisklabel(CCDLABELDEV(dev), 1477 &cs->sc_label); 1478 } 1479 1480 cs->sc_flags &= ~CCDF_LABELLING; 1481 1482 ccdunlock(cs); 1483 1484 if (error) 1485 return (error); 1486 break; 1487 1488 case DIOCWLABEL: 1489 if ((cs->sc_flags & CCDF_INITED) == 0) 1490 return (ENXIO); 1491 1492 if ((flag & FWRITE) == 0) 1493 return (EBADF); 1494 if (*(int *)data != 0) 1495 cs->sc_flags |= CCDF_WLABEL; 1496 else 1497 cs->sc_flags &= ~CCDF_WLABEL; 1498 break; 1499 1500 default: 1501 return (ENOTTY); 1502 } 1503 1504 return (0); 1505 } 1506 1507 static int 1508 ccdsize(dev_t dev) 1509 { 1510 struct ccd_softc *cs; 1511 int part, size; 1512 1513 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1514 return (-1); 1515 1516 cs = &ccd_softc[ccdunit(dev)]; 1517 part = ccdpart(dev); 1518 1519 if ((cs->sc_flags & CCDF_INITED) == 0) 1520 return (-1); 1521 1522 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1523 size = -1; 1524 else 1525 size = cs->sc_label.d_partitions[part].p_size; 1526 1527 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1528 return (-1); 1529 1530 return (size); 1531 } 1532 1533 static int 1534 ccddump(dev) 1535 dev_t dev; 1536 { 1537 1538 /* Not implemented. */ 1539 return ENXIO; 1540 } 1541 1542 /* 1543 * Lookup the provided name in the filesystem. If the file exists, 1544 * is a valid block device, and isn't being used by anyone else, 1545 * set *vpp to the file's vnode. 1546 */ 1547 static int 1548 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1549 { 1550 struct nameidata nd; 1551 struct vnode *vp; 1552 int error; 1553 struct ucred *cred; 1554 1555 KKASSERT(td->td_proc); 1556 cred = td->td_proc->p_ucred; 1557 1558 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, path, td); 1559 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) { 1560 #ifdef DEBUG 1561 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1562 printf("ccdlookup: vn_open error = %d\n", error); 1563 #endif 1564 return (error); 1565 } 1566 vp = nd.ni_vp; 1567 1568 if (vp->v_usecount > 1) { 1569 error = EBUSY; 1570 goto bad; 1571 } 1572 1573 if (!vn_isdisk(vp, &error)) 1574 goto bad; 1575 1576 #ifdef DEBUG 1577 if (ccddebug & CCDB_VNODE) 1578 vprint("ccdlookup: vnode info", vp); 1579 #endif 1580 1581 VOP_UNLOCK(vp, 0, td); 1582 NDFREE(&nd, NDF_ONLY_PNBUF); 1583 *vpp = vp; 1584 return (0); 1585 bad: 1586 VOP_UNLOCK(vp, 0, td); 1587 NDFREE(&nd, NDF_ONLY_PNBUF); 1588 /* vn_close does vrele() for vp */ 1589 (void)vn_close(vp, FREAD|FWRITE, td); 1590 return (error); 1591 } 1592 1593 /* 1594 * Read the disklabel from the ccd. If one is not present, fake one 1595 * up. 1596 */ 1597 static void 1598 ccdgetdisklabel(dev) 1599 dev_t dev; 1600 { 1601 int unit = ccdunit(dev); 1602 struct ccd_softc *cs = &ccd_softc[unit]; 1603 char *errstring; 1604 struct disklabel *lp = &cs->sc_label; 1605 struct ccdgeom *ccg = &cs->sc_geom; 1606 1607 bzero(lp, sizeof(*lp)); 1608 1609 lp->d_secperunit = cs->sc_size; 1610 lp->d_secsize = ccg->ccg_secsize; 1611 lp->d_nsectors = ccg->ccg_nsectors; 1612 lp->d_ntracks = ccg->ccg_ntracks; 1613 lp->d_ncylinders = ccg->ccg_ncylinders; 1614 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1615 1616 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1617 lp->d_type = DTYPE_CCD; 1618 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1619 lp->d_rpm = 3600; 1620 lp->d_interleave = 1; 1621 lp->d_flags = 0; 1622 1623 lp->d_partitions[RAW_PART].p_offset = 0; 1624 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1625 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1626 lp->d_npartitions = RAW_PART + 1; 1627 1628 lp->d_bbsize = BBSIZE; /* XXX */ 1629 lp->d_sbsize = SBSIZE; /* XXX */ 1630 1631 lp->d_magic = DISKMAGIC; 1632 lp->d_magic2 = DISKMAGIC; 1633 lp->d_checksum = dkcksum(&cs->sc_label); 1634 1635 /* 1636 * Call the generic disklabel extraction routine. 1637 */ 1638 errstring = readdisklabel(CCDLABELDEV(dev), &cs->sc_label); 1639 if (errstring != NULL) 1640 ccdmakedisklabel(cs); 1641 1642 #ifdef DEBUG 1643 /* It's actually extremely common to have unlabeled ccds. */ 1644 if (ccddebug & CCDB_LABEL) 1645 if (errstring != NULL) 1646 printf("ccd%d: %s\n", unit, errstring); 1647 #endif 1648 } 1649 1650 /* 1651 * Take care of things one might want to take care of in the event 1652 * that a disklabel isn't present. 1653 */ 1654 static void 1655 ccdmakedisklabel(cs) 1656 struct ccd_softc *cs; 1657 { 1658 struct disklabel *lp = &cs->sc_label; 1659 1660 /* 1661 * For historical reasons, if there's no disklabel present 1662 * the raw partition must be marked FS_BSDFFS. 1663 */ 1664 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1665 1666 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1667 } 1668 1669 /* 1670 * Wait interruptibly for an exclusive lock. 1671 * 1672 * XXX 1673 * Several drivers do this; it should be abstracted and made MP-safe. 1674 */ 1675 static int 1676 ccdlock(cs) 1677 struct ccd_softc *cs; 1678 { 1679 int error; 1680 1681 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1682 cs->sc_flags |= CCDF_WANTED; 1683 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1684 return (error); 1685 } 1686 cs->sc_flags |= CCDF_LOCKED; 1687 return (0); 1688 } 1689 1690 /* 1691 * Unlock and wake up any waiters. 1692 */ 1693 static void 1694 ccdunlock(cs) 1695 struct ccd_softc *cs; 1696 { 1697 1698 cs->sc_flags &= ~CCDF_LOCKED; 1699 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1700 cs->sc_flags &= ~CCDF_WANTED; 1701 wakeup(cs); 1702 } 1703 } 1704 1705 #ifdef DEBUG 1706 static void 1707 printiinfo(ii) 1708 struct ccdiinfo *ii; 1709 { 1710 int ix, i; 1711 1712 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1713 printf(" itab[%d]: #dk %d sblk %d soff %d", 1714 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1715 for (i = 0; i < ii->ii_ndisk; i++) 1716 printf(" %d", ii->ii_index[i]); 1717 printf("\n"); 1718 } 1719 } 1720 #endif 1721 1722 1723 /* Local Variables: */ 1724 /* c-argdecl-indent: 8 */ 1725 /* c-continued-statement-offset: 8 */ 1726 /* c-indent-level: 8 */ 1727 /* End: */ 1728