1 /* $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ */ 2 /* $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.20 2005/08/03 16:36:33 hmp Exp $ */ 3 4 /* $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ */ 5 6 /* 7 * Copyright (c) 1995 Jason R. Thorpe. 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed for the NetBSD Project 21 * by Jason R. Thorpe. 22 * 4. The name of the author may not be used to endorse or promote products 23 * derived from this software without specific prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 26 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 27 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 28 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 /* 39 * Copyright (c) 1988 University of Utah. 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. All advertising materials mentioning features or use of this software 56 * must display the following acknowledgement: 57 * This product includes software developed by the University of 58 * California, Berkeley and its contributors. 59 * 4. Neither the name of the University nor the names of its contributors 60 * may be used to endorse or promote products derived from this software 61 * without specific prior written permission. 62 * 63 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 64 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 65 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 66 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 67 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 68 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 69 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 70 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 71 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 72 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 73 * SUCH DAMAGE. 74 * 75 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 76 * 77 * @(#)cd.c 8.2 (Berkeley) 11/16/93 78 */ 79 80 /* 81 * "Concatenated" disk driver. 82 * 83 * Dynamic configuration and disklabel support by: 84 * Jason R. Thorpe <thorpej@nas.nasa.gov> 85 * Numerical Aerodynamic Simulation Facility 86 * Mail Stop 258-6 87 * NASA Ames Research Center 88 * Moffett Field, CA 94035 89 */ 90 91 #include "use_ccd.h" 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/module.h> 97 #include <sys/proc.h> 98 #include <sys/buf.h> 99 #include <sys/malloc.h> 100 #include <sys/nlookup.h> 101 #include <sys/conf.h> 102 #include <sys/stat.h> 103 #include <sys/sysctl.h> 104 #include <sys/disklabel.h> 105 #include <vfs/ufs/fs.h> 106 #include <sys/devicestat.h> 107 #include <sys/fcntl.h> 108 #include <sys/vnode.h> 109 #include <sys/buf2.h> 110 111 #include <sys/ccdvar.h> 112 113 #include <sys/thread2.h> 114 115 #include <vm/vm_zone.h> 116 117 #if defined(CCDDEBUG) && !defined(DEBUG) 118 #define DEBUG 119 #endif 120 121 #ifdef DEBUG 122 #define CCDB_FOLLOW 0x01 123 #define CCDB_INIT 0x02 124 #define CCDB_IO 0x04 125 #define CCDB_LABEL 0x08 126 #define CCDB_VNODE 0x10 127 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 128 CCDB_VNODE; 129 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 130 #undef DEBUG 131 #endif 132 133 #define ccdunit(x) dkunit(x) 134 #define ccdpart(x) dkpart(x) 135 136 /* 137 This is how mirroring works (only writes are special): 138 139 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 140 linked together by the cb_mirror field. "cb_pflags & 141 CCDPF_MIRROR_DONE" is set to 0 on both of them. 142 143 When a component returns to ccdiodone(), it checks if "cb_pflags & 144 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 145 flag and returns. If it is, it means its partner has already 146 returned, so it will go to the regular cleanup. 147 148 */ 149 150 struct ccdbuf { 151 struct buf cb_buf; /* new I/O buf */ 152 struct buf *cb_obp; /* ptr. to original I/O buf */ 153 struct ccdbuf *cb_freenext; /* free list link */ 154 int cb_unit; /* target unit */ 155 int cb_comp; /* target component */ 156 int cb_pflags; /* mirror/parity status flag */ 157 struct ccdbuf *cb_mirror; /* mirror counterpart */ 158 }; 159 160 /* bits in cb_pflags */ 161 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 162 163 #define CCDLABELDEV(dev) \ 164 (make_sub_dev(dev, dkmakeminor(ccdunit((dev)), 0, RAW_PART))) 165 166 static d_open_t ccdopen; 167 static d_close_t ccdclose; 168 static d_strategy_t ccdstrategy; 169 static d_ioctl_t ccdioctl; 170 static d_dump_t ccddump; 171 static d_psize_t ccdsize; 172 173 #define NCCDFREEHIWAT 16 174 175 #define CDEV_MAJOR 74 176 177 static struct cdevsw ccd_cdevsw = { 178 /* name */ "ccd", 179 /* maj */ CDEV_MAJOR, 180 /* flags */ D_DISK, 181 /* port */ NULL, 182 /* clone */ NULL, 183 184 /* open */ ccdopen, 185 /* close */ ccdclose, 186 /* read */ physread, 187 /* write */ physwrite, 188 /* ioctl */ ccdioctl, 189 /* poll */ nopoll, 190 /* mmap */ nommap, 191 /* strategy */ ccdstrategy, 192 /* dump */ ccddump, 193 /* psize */ ccdsize 194 }; 195 196 /* called during module initialization */ 197 static void ccdattach (void); 198 static int ccd_modevent (module_t, int, void *); 199 200 /* called by biodone() at interrupt time */ 201 static void ccdiodone (struct ccdbuf *cbp); 202 203 static void ccdstart (struct ccd_softc *, struct buf *); 204 static void ccdinterleave (struct ccd_softc *, int); 205 static void ccdintr (struct ccd_softc *, struct buf *); 206 static int ccdinit (struct ccddevice *, char **, struct thread *); 207 static int ccdlookup (char *, struct thread *td, struct vnode **); 208 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 209 struct buf *, daddr_t, caddr_t, long); 210 static void ccdgetdisklabel (dev_t); 211 static void ccdmakedisklabel (struct ccd_softc *); 212 static int ccdlock (struct ccd_softc *); 213 static void ccdunlock (struct ccd_softc *); 214 215 #ifdef DEBUG 216 static void printiinfo (struct ccdiinfo *); 217 #endif 218 219 /* Non-private for the benefit of libkvm. */ 220 struct ccd_softc *ccd_softc; 221 struct ccddevice *ccddevs; 222 struct ccdbuf *ccdfreebufs; 223 static int numccdfreebufs; 224 static int numccd = 0; 225 226 /* 227 * getccdbuf() - Allocate and zero a ccd buffer. 228 * 229 * This routine is called at splbio(). 230 */ 231 232 static __inline 233 struct ccdbuf * 234 getccdbuf(struct ccdbuf *cpy) 235 { 236 struct ccdbuf *cbp; 237 238 /* 239 * Allocate from freelist or malloc as necessary 240 */ 241 if ((cbp = ccdfreebufs) != NULL) { 242 ccdfreebufs = cbp->cb_freenext; 243 --numccdfreebufs; 244 } else { 245 cbp = malloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK); 246 } 247 248 /* 249 * Used by mirroring code 250 */ 251 if (cpy) 252 bcopy(cpy, cbp, sizeof(struct ccdbuf)); 253 else 254 bzero(cbp, sizeof(struct ccdbuf)); 255 256 /* 257 * independant struct buf initialization 258 */ 259 LIST_INIT(&cbp->cb_buf.b_dep); 260 BUF_LOCKINIT(&cbp->cb_buf); 261 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 262 BUF_KERNPROC(&cbp->cb_buf); 263 264 return(cbp); 265 } 266 267 /* 268 * putccdbuf() - Free a ccd buffer. 269 * 270 * This routine is called at splbio(). 271 */ 272 273 static __inline 274 void 275 putccdbuf(struct ccdbuf *cbp) 276 { 277 BUF_UNLOCK(&cbp->cb_buf); 278 BUF_LOCKFREE(&cbp->cb_buf); 279 280 if (numccdfreebufs < NCCDFREEHIWAT) { 281 cbp->cb_freenext = ccdfreebufs; 282 ccdfreebufs = cbp; 283 ++numccdfreebufs; 284 } else { 285 free((caddr_t)cbp, M_DEVBUF); 286 } 287 } 288 289 290 /* 291 * Number of blocks to untouched in front of a component partition. 292 * This is to avoid violating its disklabel area when it starts at the 293 * beginning of the slice. 294 */ 295 #if !defined(CCD_OFFSET) 296 #define CCD_OFFSET 16 297 #endif 298 299 /* 300 * Called by main() during pseudo-device attachment. All we need 301 * to do is allocate enough space for devices to be configured later, and 302 * add devsw entries. 303 */ 304 static void 305 ccdattach() 306 { 307 int i; 308 int num = NCCD; 309 310 if (num > 1) 311 printf("ccd0-%d: Concatenated disk drivers\n", num-1); 312 else 313 printf("ccd0: Concatenated disk driver\n"); 314 315 ccd_softc = malloc(num * sizeof(struct ccd_softc), M_DEVBUF, 316 M_WAITOK | M_ZERO); 317 ccddevs = malloc(num * sizeof(struct ccddevice), M_DEVBUF, 318 M_WAITOK | M_ZERO); 319 numccd = num; 320 321 cdevsw_add(&ccd_cdevsw, 0, 0); 322 /* XXX: is this necessary? */ 323 for (i = 0; i < numccd; ++i) 324 ccddevs[i].ccd_dk = -1; 325 } 326 327 static int 328 ccd_modevent(mod, type, data) 329 module_t mod; 330 int type; 331 void *data; 332 { 333 int error = 0; 334 335 switch (type) { 336 case MOD_LOAD: 337 ccdattach(); 338 break; 339 340 case MOD_UNLOAD: 341 printf("ccd0: Unload not supported!\n"); 342 error = EOPNOTSUPP; 343 break; 344 345 default: /* MOD_SHUTDOWN etc */ 346 break; 347 } 348 return (error); 349 } 350 351 DEV_MODULE(ccd, ccd_modevent, NULL); 352 353 static int 354 ccdinit(struct ccddevice *ccd, char **cpaths, struct thread *td) 355 { 356 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 357 struct ccdcinfo *ci = NULL; /* XXX */ 358 size_t size; 359 int ix; 360 struct vnode *vp; 361 size_t minsize; 362 int maxsecsize; 363 struct partinfo dpart; 364 struct ccdgeom *ccg = &cs->sc_geom; 365 char tmppath[MAXPATHLEN]; 366 int error = 0; 367 struct ucred *cred; 368 369 KKASSERT(td->td_proc); 370 cred = td->td_proc->p_ucred; 371 372 #ifdef DEBUG 373 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 374 printf("ccdinit: unit %d\n", ccd->ccd_unit); 375 #endif 376 377 cs->sc_size = 0; 378 cs->sc_ileave = ccd->ccd_interleave; 379 cs->sc_nccdisks = ccd->ccd_ndev; 380 381 /* Allocate space for the component info. */ 382 cs->sc_cinfo = malloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 383 M_DEVBUF, M_WAITOK); 384 385 /* 386 * Verify that each component piece exists and record 387 * relevant information about it. 388 */ 389 maxsecsize = 0; 390 minsize = 0; 391 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 392 vp = ccd->ccd_vpp[ix]; 393 ci = &cs->sc_cinfo[ix]; 394 ci->ci_vp = vp; 395 396 /* 397 * Copy in the pathname of the component. 398 */ 399 bzero(tmppath, sizeof(tmppath)); /* sanity */ 400 if ((error = copyinstr(cpaths[ix], tmppath, 401 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 402 #ifdef DEBUG 403 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 404 printf("ccd%d: can't copy path, error = %d\n", 405 ccd->ccd_unit, error); 406 #endif 407 goto fail; 408 } 409 ci->ci_path = malloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 410 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 411 412 ci->ci_dev = vn_todev(vp); 413 414 /* 415 * Get partition information for the component. 416 */ 417 if ((error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, 418 FREAD, cred, td)) != 0) { 419 #ifdef DEBUG 420 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 421 printf("ccd%d: %s: ioctl failed, error = %d\n", 422 ccd->ccd_unit, ci->ci_path, error); 423 #endif 424 goto fail; 425 } 426 if (dpart.part->p_fstype == FS_BSDFFS) { 427 maxsecsize = 428 ((dpart.disklab->d_secsize > maxsecsize) ? 429 dpart.disklab->d_secsize : maxsecsize); 430 size = dpart.part->p_size - CCD_OFFSET; 431 } else { 432 #ifdef DEBUG 433 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 434 printf("ccd%d: %s: incorrect partition type\n", 435 ccd->ccd_unit, ci->ci_path); 436 #endif 437 error = EFTYPE; 438 goto fail; 439 } 440 441 /* 442 * Calculate the size, truncating to an interleave 443 * boundary if necessary. 444 */ 445 446 if (cs->sc_ileave > 1) 447 size -= size % cs->sc_ileave; 448 449 if (size == 0) { 450 #ifdef DEBUG 451 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 452 printf("ccd%d: %s: size == 0\n", 453 ccd->ccd_unit, ci->ci_path); 454 #endif 455 error = ENODEV; 456 goto fail; 457 } 458 459 if (minsize == 0 || size < minsize) 460 minsize = size; 461 ci->ci_size = size; 462 cs->sc_size += size; 463 } 464 465 /* 466 * Don't allow the interleave to be smaller than 467 * the biggest component sector. 468 */ 469 if ((cs->sc_ileave > 0) && 470 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 471 #ifdef DEBUG 472 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 473 printf("ccd%d: interleave must be at least %d\n", 474 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 475 #endif 476 error = EINVAL; 477 goto fail; 478 } 479 480 /* 481 * If uniform interleave is desired set all sizes to that of 482 * the smallest component. This will guarentee that a single 483 * interleave table is generated. 484 * 485 * Lost space must be taken into account when calculating the 486 * overall size. Half the space is lost when CCDF_MIRROR is 487 * specified. One disk is lost when CCDF_PARITY is specified. 488 */ 489 if (ccd->ccd_flags & CCDF_UNIFORM) { 490 for (ci = cs->sc_cinfo; 491 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 492 ci->ci_size = minsize; 493 } 494 if (ccd->ccd_flags & CCDF_MIRROR) { 495 /* 496 * Check to see if an even number of components 497 * have been specified. The interleave must also 498 * be non-zero in order for us to be able to 499 * guarentee the topology. 500 */ 501 if (cs->sc_nccdisks % 2) { 502 printf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 503 error = EINVAL; 504 goto fail; 505 } 506 if (cs->sc_ileave == 0) { 507 printf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 508 error = EINVAL; 509 goto fail; 510 } 511 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 512 } else if (ccd->ccd_flags & CCDF_PARITY) { 513 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 514 } else { 515 if (cs->sc_ileave == 0) { 516 printf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 517 error = EINVAL; 518 goto fail; 519 } 520 cs->sc_size = cs->sc_nccdisks * minsize; 521 } 522 } 523 524 /* 525 * Construct the interleave table. 526 */ 527 ccdinterleave(cs, ccd->ccd_unit); 528 529 /* 530 * Create pseudo-geometry based on 1MB cylinders. It's 531 * pretty close. 532 */ 533 ccg->ccg_secsize = maxsecsize; 534 ccg->ccg_ntracks = 1; 535 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 536 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 537 538 /* 539 * Add an devstat entry for this device. 540 */ 541 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 542 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 543 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 544 DEVSTAT_PRIORITY_ARRAY); 545 546 cs->sc_flags |= CCDF_INITED; 547 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 548 cs->sc_unit = ccd->ccd_unit; 549 return (0); 550 fail: 551 while (ci > cs->sc_cinfo) { 552 ci--; 553 free(ci->ci_path, M_DEVBUF); 554 } 555 free(cs->sc_cinfo, M_DEVBUF); 556 return (error); 557 } 558 559 static void 560 ccdinterleave(cs, unit) 561 struct ccd_softc *cs; 562 int unit; 563 { 564 struct ccdcinfo *ci, *smallci; 565 struct ccdiinfo *ii; 566 daddr_t bn, lbn; 567 int ix; 568 u_long size; 569 570 #ifdef DEBUG 571 if (ccddebug & CCDB_INIT) 572 printf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 573 #endif 574 575 /* 576 * Allocate an interleave table. The worst case occurs when each 577 * of N disks is of a different size, resulting in N interleave 578 * tables. 579 * 580 * Chances are this is too big, but we don't care. 581 */ 582 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 583 cs->sc_itable = (struct ccdiinfo *)malloc(size, M_DEVBUF, M_WAITOK); 584 bzero((caddr_t)cs->sc_itable, size); 585 586 /* 587 * Trivial case: no interleave (actually interleave of disk size). 588 * Each table entry represents a single component in its entirety. 589 * 590 * An interleave of 0 may not be used with a mirror or parity setup. 591 */ 592 if (cs->sc_ileave == 0) { 593 bn = 0; 594 ii = cs->sc_itable; 595 596 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 597 /* Allocate space for ii_index. */ 598 ii->ii_index = malloc(sizeof(int), M_DEVBUF, M_WAITOK); 599 ii->ii_ndisk = 1; 600 ii->ii_startblk = bn; 601 ii->ii_startoff = 0; 602 ii->ii_index[0] = ix; 603 bn += cs->sc_cinfo[ix].ci_size; 604 ii++; 605 } 606 ii->ii_ndisk = 0; 607 #ifdef DEBUG 608 if (ccddebug & CCDB_INIT) 609 printiinfo(cs->sc_itable); 610 #endif 611 return; 612 } 613 614 /* 615 * The following isn't fast or pretty; it doesn't have to be. 616 */ 617 size = 0; 618 bn = lbn = 0; 619 for (ii = cs->sc_itable; ; ii++) { 620 /* 621 * Allocate space for ii_index. We might allocate more then 622 * we use. 623 */ 624 ii->ii_index = malloc((sizeof(int) * cs->sc_nccdisks), 625 M_DEVBUF, M_WAITOK); 626 627 /* 628 * Locate the smallest of the remaining components 629 */ 630 smallci = NULL; 631 for (ci = cs->sc_cinfo; ci < &cs->sc_cinfo[cs->sc_nccdisks]; 632 ci++) { 633 if (ci->ci_size > size && 634 (smallci == NULL || 635 ci->ci_size < smallci->ci_size)) { 636 smallci = ci; 637 } 638 } 639 640 /* 641 * Nobody left, all done 642 */ 643 if (smallci == NULL) { 644 ii->ii_ndisk = 0; 645 break; 646 } 647 648 /* 649 * Record starting logical block using an sc_ileave blocksize. 650 */ 651 ii->ii_startblk = bn / cs->sc_ileave; 652 653 /* 654 * Record starting comopnent block using an sc_ileave 655 * blocksize. This value is relative to the beginning of 656 * a component disk. 657 */ 658 ii->ii_startoff = lbn; 659 660 /* 661 * Determine how many disks take part in this interleave 662 * and record their indices. 663 */ 664 ix = 0; 665 for (ci = cs->sc_cinfo; 666 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 667 if (ci->ci_size >= smallci->ci_size) { 668 ii->ii_index[ix++] = ci - cs->sc_cinfo; 669 } 670 } 671 ii->ii_ndisk = ix; 672 bn += ix * (smallci->ci_size - size); 673 lbn = smallci->ci_size / cs->sc_ileave; 674 size = smallci->ci_size; 675 } 676 #ifdef DEBUG 677 if (ccddebug & CCDB_INIT) 678 printiinfo(cs->sc_itable); 679 #endif 680 } 681 682 /* ARGSUSED */ 683 static int 684 ccdopen(dev_t dev, int flags, int fmt, d_thread_t *td) 685 { 686 int unit = ccdunit(dev); 687 struct ccd_softc *cs; 688 struct disklabel *lp; 689 int error = 0, part, pmask; 690 691 #ifdef DEBUG 692 if (ccddebug & CCDB_FOLLOW) 693 printf("ccdopen(%x, %x)\n", dev, flags); 694 #endif 695 if (unit >= numccd) 696 return (ENXIO); 697 cs = &ccd_softc[unit]; 698 699 if ((error = ccdlock(cs)) != 0) 700 return (error); 701 702 lp = &cs->sc_label; 703 704 part = ccdpart(dev); 705 pmask = (1 << part); 706 707 /* 708 * If we're initialized, check to see if there are any other 709 * open partitions. If not, then it's safe to update 710 * the in-core disklabel. 711 */ 712 if ((cs->sc_flags & CCDF_INITED) && (cs->sc_openmask == 0)) 713 ccdgetdisklabel(dev); 714 715 /* Check that the partition exists. */ 716 if (part != RAW_PART && ((part >= lp->d_npartitions) || 717 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 718 error = ENXIO; 719 goto done; 720 } 721 722 cs->sc_openmask |= pmask; 723 done: 724 ccdunlock(cs); 725 return (0); 726 } 727 728 /* ARGSUSED */ 729 static int 730 ccdclose(dev_t dev, int flags, int fmt, d_thread_t *td) 731 { 732 int unit = ccdunit(dev); 733 struct ccd_softc *cs; 734 int error = 0, part; 735 736 #ifdef DEBUG 737 if (ccddebug & CCDB_FOLLOW) 738 printf("ccdclose(%x, %x)\n", dev, flags); 739 #endif 740 741 if (unit >= numccd) 742 return (ENXIO); 743 cs = &ccd_softc[unit]; 744 745 if ((error = ccdlock(cs)) != 0) 746 return (error); 747 748 part = ccdpart(dev); 749 750 /* ...that much closer to allowing unconfiguration... */ 751 cs->sc_openmask &= ~(1 << part); 752 ccdunlock(cs); 753 return (0); 754 } 755 756 static void 757 ccdstrategy(bp) 758 struct buf *bp; 759 { 760 int unit = ccdunit(bp->b_dev); 761 struct ccd_softc *cs = &ccd_softc[unit]; 762 int wlabel; 763 struct disklabel *lp; 764 765 #ifdef DEBUG 766 if (ccddebug & CCDB_FOLLOW) 767 printf("ccdstrategy(%x): unit %d\n", bp, unit); 768 #endif 769 if ((cs->sc_flags & CCDF_INITED) == 0) { 770 bp->b_error = ENXIO; 771 bp->b_flags |= B_ERROR; 772 goto done; 773 } 774 775 /* If it's a nil transfer, wake up the top half now. */ 776 if (bp->b_bcount == 0) 777 goto done; 778 779 lp = &cs->sc_label; 780 781 /* 782 * Do bounds checking and adjust transfer. If there's an 783 * error, the bounds check will flag that for us. 784 */ 785 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 786 if (ccdpart(bp->b_dev) != RAW_PART) { 787 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 788 goto done; 789 } else { 790 int pbn; /* in sc_secsize chunks */ 791 long sz; /* in sc_secsize chunks */ 792 793 pbn = bp->b_blkno / (cs->sc_geom.ccg_secsize / DEV_BSIZE); 794 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 795 796 /* 797 * If out of bounds return an error. If at the EOF point, 798 * simply read or write less. 799 */ 800 801 if (pbn < 0 || pbn >= cs->sc_size) { 802 bp->b_resid = bp->b_bcount; 803 if (pbn != cs->sc_size) { 804 bp->b_error = EINVAL; 805 bp->b_flags |= B_ERROR | B_INVAL; 806 } 807 goto done; 808 } 809 810 /* 811 * If the request crosses EOF, truncate the request. 812 */ 813 if (pbn + sz > cs->sc_size) { 814 bp->b_bcount = (cs->sc_size - pbn) * 815 cs->sc_geom.ccg_secsize; 816 } 817 } 818 819 bp->b_resid = bp->b_bcount; 820 821 /* 822 * "Start" the unit. 823 */ 824 crit_enter(); 825 ccdstart(cs, bp); 826 crit_exit(); 827 return; 828 done: 829 biodone(bp); 830 } 831 832 static void 833 ccdstart(cs, bp) 834 struct ccd_softc *cs; 835 struct buf *bp; 836 { 837 long bcount, rcount; 838 struct ccdbuf *cbp[4]; 839 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 840 caddr_t addr; 841 daddr_t bn; 842 struct partition *pp; 843 844 #ifdef DEBUG 845 if (ccddebug & CCDB_FOLLOW) 846 printf("ccdstart(%x, %x)\n", cs, bp); 847 #endif 848 849 /* Record the transaction start */ 850 devstat_start_transaction(&cs->device_stats); 851 852 /* 853 * Translate the partition-relative block number to an absolute. 854 */ 855 bn = bp->b_blkno; 856 if (ccdpart(bp->b_dev) != RAW_PART) { 857 pp = &cs->sc_label.d_partitions[ccdpart(bp->b_dev)]; 858 bn += pp->p_offset; 859 } 860 861 /* 862 * Allocate component buffers and fire off the requests 863 */ 864 addr = bp->b_data; 865 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 866 ccdbuffer(cbp, cs, bp, bn, addr, bcount); 867 rcount = cbp[0]->cb_buf.b_bcount; 868 869 if (cs->sc_cflags & CCDF_MIRROR) { 870 /* 871 * Mirroring. Writes go to both disks, reads are 872 * taken from whichever disk seems most appropriate. 873 * 874 * We attempt to localize reads to the disk whos arm 875 * is nearest the read request. We ignore seeks due 876 * to writes when making this determination and we 877 * also try to avoid hogging. 878 */ 879 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) { 880 cbp[0]->cb_buf.b_vp->v_numoutput++; 881 cbp[1]->cb_buf.b_vp->v_numoutput++; 882 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, 883 &cbp[0]->cb_buf); 884 VOP_STRATEGY(cbp[1]->cb_buf.b_vp, 885 &cbp[1]->cb_buf); 886 } else { 887 int pick = cs->sc_pick; 888 daddr_t range = cs->sc_size / 16; 889 890 if (bn < cs->sc_blk[pick] - range || 891 bn > cs->sc_blk[pick] + range 892 ) { 893 cs->sc_pick = pick = 1 - pick; 894 } 895 cs->sc_blk[pick] = bn + btodb(rcount); 896 VOP_STRATEGY(cbp[pick]->cb_buf.b_vp, 897 &cbp[pick]->cb_buf); 898 } 899 } else { 900 /* 901 * Not mirroring 902 */ 903 if ((cbp[0]->cb_buf.b_flags & B_READ) == 0) 904 cbp[0]->cb_buf.b_vp->v_numoutput++; 905 VOP_STRATEGY(cbp[0]->cb_buf.b_vp, &cbp[0]->cb_buf); 906 } 907 bn += btodb(rcount); 908 addr += rcount; 909 } 910 } 911 912 /* 913 * Build a component buffer header. 914 */ 915 static void 916 ccdbuffer(cb, cs, bp, bn, addr, bcount) 917 struct ccdbuf **cb; 918 struct ccd_softc *cs; 919 struct buf *bp; 920 daddr_t bn; 921 caddr_t addr; 922 long bcount; 923 { 924 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 925 struct ccdbuf *cbp; 926 daddr_t cbn, cboff; 927 off_t cbc; 928 929 #ifdef DEBUG 930 if (ccddebug & CCDB_IO) 931 printf("ccdbuffer(%x, %x, %d, %x, %d)\n", 932 cs, bp, bn, addr, bcount); 933 #endif 934 /* 935 * Determine which component bn falls in. 936 */ 937 cbn = bn; 938 cboff = 0; 939 940 if (cs->sc_ileave == 0) { 941 /* 942 * Serially concatenated and neither a mirror nor a parity 943 * config. This is a special case. 944 */ 945 daddr_t sblk; 946 947 sblk = 0; 948 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 949 sblk += ci->ci_size; 950 cbn -= sblk; 951 } else { 952 struct ccdiinfo *ii; 953 int ccdisk, off; 954 955 /* 956 * Calculate cbn, the logical superblock (sc_ileave chunks), 957 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 958 * to cbn. 959 */ 960 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 961 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 962 963 /* 964 * Figure out which interleave table to use. 965 */ 966 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 967 if (ii->ii_startblk > cbn) 968 break; 969 } 970 ii--; 971 972 /* 973 * off is the logical superblock relative to the beginning 974 * of this interleave block. 975 */ 976 off = cbn - ii->ii_startblk; 977 978 /* 979 * We must calculate which disk component to use (ccdisk), 980 * and recalculate cbn to be the superblock relative to 981 * the beginning of the component. This is typically done by 982 * adding 'off' and ii->ii_startoff together. However, 'off' 983 * must typically be divided by the number of components in 984 * this interleave array to be properly convert it from a 985 * CCD-relative logical superblock number to a 986 * component-relative superblock number. 987 */ 988 if (ii->ii_ndisk == 1) { 989 /* 990 * When we have just one disk, it can't be a mirror 991 * or a parity config. 992 */ 993 ccdisk = ii->ii_index[0]; 994 cbn = ii->ii_startoff + off; 995 } else { 996 if (cs->sc_cflags & CCDF_MIRROR) { 997 /* 998 * We have forced a uniform mapping, resulting 999 * in a single interleave array. We double 1000 * up on the first half of the available 1001 * components and our mirror is in the second 1002 * half. This only works with a single 1003 * interleave array because doubling up 1004 * doubles the number of sectors, so there 1005 * cannot be another interleave array because 1006 * the next interleave array's calculations 1007 * would be off. 1008 */ 1009 int ndisk2 = ii->ii_ndisk / 2; 1010 ccdisk = ii->ii_index[off % ndisk2]; 1011 cbn = ii->ii_startoff + off / ndisk2; 1012 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1013 } else if (cs->sc_cflags & CCDF_PARITY) { 1014 /* 1015 * XXX not implemented yet 1016 */ 1017 int ndisk2 = ii->ii_ndisk - 1; 1018 ccdisk = ii->ii_index[off % ndisk2]; 1019 cbn = ii->ii_startoff + off / ndisk2; 1020 if (cbn % ii->ii_ndisk <= ccdisk) 1021 ccdisk++; 1022 } else { 1023 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1024 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1025 } 1026 } 1027 1028 ci = &cs->sc_cinfo[ccdisk]; 1029 1030 /* 1031 * Convert cbn from a superblock to a normal block so it 1032 * can be used to calculate (along with cboff) the normal 1033 * block index into this particular disk. 1034 */ 1035 cbn *= cs->sc_ileave; 1036 } 1037 1038 /* 1039 * Fill in the component buf structure. 1040 */ 1041 cbp = getccdbuf(NULL); 1042 cbp->cb_buf.b_flags = bp->b_flags; 1043 cbp->cb_buf.b_iodone = (void (*)(struct buf *))ccdiodone; 1044 cbp->cb_buf.b_dev = ci->ci_dev; /* XXX */ 1045 cbp->cb_buf.b_blkno = cbn + cboff + CCD_OFFSET; 1046 cbp->cb_buf.b_offset = dbtob(cbn + cboff + CCD_OFFSET); 1047 cbp->cb_buf.b_data = addr; 1048 cbp->cb_buf.b_vp = ci->ci_vp; 1049 if (cs->sc_ileave == 0) 1050 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1051 else 1052 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1053 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1054 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1055 1056 /* 1057 * context for ccdiodone 1058 */ 1059 cbp->cb_obp = bp; 1060 cbp->cb_unit = cs - ccd_softc; 1061 cbp->cb_comp = ci - cs->sc_cinfo; 1062 1063 #ifdef DEBUG 1064 if (ccddebug & CCDB_IO) 1065 printf(" dev %x(u%d): cbp %x bn %d addr %x bcnt %d\n", 1066 ci->ci_dev, ci-cs->sc_cinfo, cbp, cbp->cb_buf.b_blkno, 1067 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1068 #endif 1069 cb[0] = cbp; 1070 1071 /* 1072 * Note: both I/O's setup when reading from mirror, but only one 1073 * will be executed. 1074 */ 1075 if (cs->sc_cflags & CCDF_MIRROR) { 1076 /* mirror, setup second I/O */ 1077 cbp = getccdbuf(cb[0]); 1078 cbp->cb_buf.b_dev = ci2->ci_dev; 1079 cbp->cb_buf.b_vp = ci2->ci_vp; 1080 cbp->cb_comp = ci2 - cs->sc_cinfo; 1081 cb[1] = cbp; 1082 /* link together the ccdbuf's and clear "mirror done" flag */ 1083 cb[0]->cb_mirror = cb[1]; 1084 cb[1]->cb_mirror = cb[0]; 1085 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1086 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1087 } 1088 } 1089 1090 static void 1091 ccdintr(cs, bp) 1092 struct ccd_softc *cs; 1093 struct buf *bp; 1094 { 1095 #ifdef DEBUG 1096 if (ccddebug & CCDB_FOLLOW) 1097 printf("ccdintr(%x, %x)\n", cs, bp); 1098 #endif 1099 /* 1100 * Request is done for better or worse, wakeup the top half. 1101 */ 1102 if (bp->b_flags & B_ERROR) 1103 bp->b_resid = bp->b_bcount; 1104 devstat_end_transaction_buf(&cs->device_stats, bp); 1105 biodone(bp); 1106 } 1107 1108 /* 1109 * Called at interrupt time. 1110 * Mark the component as done and if all components are done, 1111 * take a ccd interrupt. 1112 */ 1113 static void 1114 ccdiodone(cbp) 1115 struct ccdbuf *cbp; 1116 { 1117 struct buf *bp = cbp->cb_obp; 1118 int unit = cbp->cb_unit; 1119 int count; 1120 1121 crit_enter(); 1122 #ifdef DEBUG 1123 if (ccddebug & CCDB_FOLLOW) 1124 printf("ccdiodone(%x)\n", cbp); 1125 if (ccddebug & CCDB_IO) { 1126 printf("ccdiodone: bp %x bcount %d resid %d\n", 1127 bp, bp->b_bcount, bp->b_resid); 1128 printf(" dev %x(u%d), cbp %x bn %d addr %x bcnt %d\n", 1129 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1130 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 1131 cbp->cb_buf.b_bcount); 1132 } 1133 #endif 1134 /* 1135 * If an error occured, report it. If this is a mirrored 1136 * configuration and the first of two possible reads, do not 1137 * set the error in the bp yet because the second read may 1138 * succeed. 1139 */ 1140 1141 if (cbp->cb_buf.b_flags & B_ERROR) { 1142 const char *msg = ""; 1143 1144 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1145 (cbp->cb_buf.b_flags & B_READ) && 1146 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1147 /* 1148 * We will try our read on the other disk down 1149 * below, also reverse the default pick so if we 1150 * are doing a scan we do not keep hitting the 1151 * bad disk first. 1152 */ 1153 struct ccd_softc *cs = &ccd_softc[unit]; 1154 1155 msg = ", trying other disk"; 1156 cs->sc_pick = 1 - cs->sc_pick; 1157 cs->sc_blk[cs->sc_pick] = bp->b_blkno; 1158 } else { 1159 bp->b_flags |= B_ERROR; 1160 bp->b_error = cbp->cb_buf.b_error ? 1161 cbp->cb_buf.b_error : EIO; 1162 } 1163 printf("ccd%d: error %d on component %d block %d (ccd block %d)%s\n", 1164 unit, bp->b_error, cbp->cb_comp, 1165 (int)cbp->cb_buf.b_blkno, bp->b_blkno, msg); 1166 } 1167 1168 /* 1169 * Process mirror. If we are writing, I/O has been initiated on both 1170 * buffers and we fall through only after both are finished. 1171 * 1172 * If we are reading only one I/O is initiated at a time. If an 1173 * error occurs we initiate the second I/O and return, otherwise 1174 * we free the second I/O without initiating it. 1175 */ 1176 1177 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1178 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 1179 /* 1180 * When writing, handshake with the second buffer 1181 * to determine when both are done. If both are not 1182 * done, return here. 1183 */ 1184 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1185 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1186 putccdbuf(cbp); 1187 crit_exit(); 1188 return; 1189 } 1190 } else { 1191 /* 1192 * When reading, either dispose of the second buffer 1193 * or initiate I/O on the second buffer if an error 1194 * occured with this one. 1195 */ 1196 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1197 if (cbp->cb_buf.b_flags & B_ERROR) { 1198 cbp->cb_mirror->cb_pflags |= 1199 CCDPF_MIRROR_DONE; 1200 VOP_STRATEGY( 1201 cbp->cb_mirror->cb_buf.b_vp, 1202 &cbp->cb_mirror->cb_buf 1203 ); 1204 putccdbuf(cbp); 1205 crit_exit(); 1206 return; 1207 } else { 1208 putccdbuf(cbp->cb_mirror); 1209 /* fall through */ 1210 } 1211 } 1212 } 1213 } 1214 1215 /* 1216 * use b_bufsize to determine how big the original request was rather 1217 * then b_bcount, because b_bcount may have been truncated for EOF. 1218 * 1219 * XXX We check for an error, but we do not test the resid for an 1220 * aligned EOF condition. This may result in character & block 1221 * device access not recognizing EOF properly when read or written 1222 * sequentially, but will not effect filesystems. 1223 */ 1224 count = cbp->cb_buf.b_bufsize; 1225 putccdbuf(cbp); 1226 1227 /* 1228 * If all done, "interrupt". 1229 */ 1230 bp->b_resid -= count; 1231 if (bp->b_resid < 0) 1232 panic("ccdiodone: count"); 1233 if (bp->b_resid == 0) 1234 ccdintr(&ccd_softc[unit], bp); 1235 crit_exit(); 1236 } 1237 1238 static int 1239 ccdioctl(dev_t dev, u_long cmd, caddr_t data, int flag, d_thread_t *td) 1240 { 1241 int unit = ccdunit(dev); 1242 int i, j, lookedup = 0, error = 0; 1243 int part, pmask; 1244 struct ccd_softc *cs; 1245 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1246 struct ccddevice ccd; 1247 char **cpp; 1248 struct vnode **vpp; 1249 struct ucred *cred; 1250 1251 KKASSERT(td->td_proc != NULL); 1252 cred = td->td_proc->p_ucred; 1253 1254 if (unit >= numccd) 1255 return (ENXIO); 1256 cs = &ccd_softc[unit]; 1257 1258 bzero(&ccd, sizeof(ccd)); 1259 1260 switch (cmd) { 1261 case CCDIOCSET: 1262 if (cs->sc_flags & CCDF_INITED) 1263 return (EBUSY); 1264 1265 if ((flag & FWRITE) == 0) 1266 return (EBADF); 1267 1268 if ((error = ccdlock(cs)) != 0) 1269 return (error); 1270 1271 if (ccio->ccio_ndisks > CCD_MAXNDISKS) 1272 return (EINVAL); 1273 1274 /* Fill in some important bits. */ 1275 ccd.ccd_unit = unit; 1276 ccd.ccd_interleave = ccio->ccio_ileave; 1277 if (ccd.ccd_interleave == 0 && 1278 ((ccio->ccio_flags & CCDF_MIRROR) || 1279 (ccio->ccio_flags & CCDF_PARITY))) { 1280 printf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1281 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1282 } 1283 if ((ccio->ccio_flags & CCDF_MIRROR) && 1284 (ccio->ccio_flags & CCDF_PARITY)) { 1285 printf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1286 ccio->ccio_flags &= ~CCDF_PARITY; 1287 } 1288 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1289 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1290 printf("ccd%d: mirror/parity forces uniform flag\n", 1291 unit); 1292 ccio->ccio_flags |= CCDF_UNIFORM; 1293 } 1294 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1295 1296 /* 1297 * Allocate space for and copy in the array of 1298 * componet pathnames and device numbers. 1299 */ 1300 cpp = malloc(ccio->ccio_ndisks * sizeof(char *), 1301 M_DEVBUF, M_WAITOK); 1302 vpp = malloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1303 M_DEVBUF, M_WAITOK); 1304 1305 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1306 ccio->ccio_ndisks * sizeof(char **)); 1307 if (error) { 1308 free(vpp, M_DEVBUF); 1309 free(cpp, M_DEVBUF); 1310 ccdunlock(cs); 1311 return (error); 1312 } 1313 1314 #ifdef DEBUG 1315 if (ccddebug & CCDB_INIT) 1316 for (i = 0; i < ccio->ccio_ndisks; ++i) 1317 printf("ccdioctl: component %d: 0x%x\n", 1318 i, cpp[i]); 1319 #endif 1320 1321 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1322 #ifdef DEBUG 1323 if (ccddebug & CCDB_INIT) 1324 printf("ccdioctl: lookedup = %d\n", lookedup); 1325 #endif 1326 if ((error = ccdlookup(cpp[i], td, &vpp[i])) != 0) { 1327 for (j = 0; j < lookedup; ++j) 1328 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1329 free(vpp, M_DEVBUF); 1330 free(cpp, M_DEVBUF); 1331 ccdunlock(cs); 1332 return (error); 1333 } 1334 ++lookedup; 1335 } 1336 ccd.ccd_cpp = cpp; 1337 ccd.ccd_vpp = vpp; 1338 ccd.ccd_ndev = ccio->ccio_ndisks; 1339 1340 /* 1341 * Initialize the ccd. Fills in the softc for us. 1342 */ 1343 if ((error = ccdinit(&ccd, cpp, td)) != 0) { 1344 for (j = 0; j < lookedup; ++j) 1345 (void)vn_close(vpp[j], FREAD|FWRITE, td); 1346 bzero(&ccd_softc[unit], sizeof(struct ccd_softc)); 1347 free(vpp, M_DEVBUF); 1348 free(cpp, M_DEVBUF); 1349 ccdunlock(cs); 1350 return (error); 1351 } 1352 1353 /* 1354 * The ccd has been successfully initialized, so 1355 * we can place it into the array and read the disklabel. 1356 */ 1357 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1358 ccio->ccio_unit = unit; 1359 ccio->ccio_size = cs->sc_size; 1360 ccdgetdisklabel(dev); 1361 1362 ccdunlock(cs); 1363 1364 break; 1365 1366 case CCDIOCCLR: 1367 if ((cs->sc_flags & CCDF_INITED) == 0) 1368 return (ENXIO); 1369 1370 if ((flag & FWRITE) == 0) 1371 return (EBADF); 1372 1373 if ((error = ccdlock(cs)) != 0) 1374 return (error); 1375 1376 /* Don't unconfigure if any other partitions are open */ 1377 part = ccdpart(dev); 1378 pmask = (1 << part); 1379 if ((cs->sc_openmask & ~pmask)) { 1380 ccdunlock(cs); 1381 return (EBUSY); 1382 } 1383 1384 /* 1385 * Free ccd_softc information and clear entry. 1386 */ 1387 1388 /* Close the components and free their pathnames. */ 1389 for (i = 0; i < cs->sc_nccdisks; ++i) { 1390 /* 1391 * XXX: this close could potentially fail and 1392 * cause Bad Things. Maybe we need to force 1393 * the close to happen? 1394 */ 1395 #ifdef DEBUG 1396 if (ccddebug & CCDB_VNODE) 1397 vprint("CCDIOCCLR: vnode info", 1398 cs->sc_cinfo[i].ci_vp); 1399 #endif 1400 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, td); 1401 free(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1402 } 1403 1404 /* Free interleave index. */ 1405 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1406 free(cs->sc_itable[i].ii_index, M_DEVBUF); 1407 1408 /* Free component info and interleave table. */ 1409 free(cs->sc_cinfo, M_DEVBUF); 1410 free(cs->sc_itable, M_DEVBUF); 1411 cs->sc_flags &= ~CCDF_INITED; 1412 1413 /* 1414 * Free ccddevice information and clear entry. 1415 */ 1416 free(ccddevs[unit].ccd_cpp, M_DEVBUF); 1417 free(ccddevs[unit].ccd_vpp, M_DEVBUF); 1418 ccd.ccd_dk = -1; 1419 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1420 1421 /* 1422 * And remove the devstat entry. 1423 */ 1424 devstat_remove_entry(&cs->device_stats); 1425 1426 /* This must be atomic. */ 1427 crit_enter(); 1428 ccdunlock(cs); 1429 bzero(cs, sizeof(struct ccd_softc)); 1430 crit_exit(); 1431 1432 break; 1433 1434 case DIOCGDINFO: 1435 if ((cs->sc_flags & CCDF_INITED) == 0) 1436 return (ENXIO); 1437 1438 *(struct disklabel *)data = cs->sc_label; 1439 break; 1440 1441 case DIOCGPART: 1442 if ((cs->sc_flags & CCDF_INITED) == 0) 1443 return (ENXIO); 1444 1445 ((struct partinfo *)data)->disklab = &cs->sc_label; 1446 ((struct partinfo *)data)->part = 1447 &cs->sc_label.d_partitions[ccdpart(dev)]; 1448 break; 1449 1450 case DIOCWDINFO: 1451 case DIOCSDINFO: 1452 if ((cs->sc_flags & CCDF_INITED) == 0) 1453 return (ENXIO); 1454 1455 if ((flag & FWRITE) == 0) 1456 return (EBADF); 1457 1458 if ((error = ccdlock(cs)) != 0) 1459 return (error); 1460 1461 cs->sc_flags |= CCDF_LABELLING; 1462 1463 error = setdisklabel(&cs->sc_label, 1464 (struct disklabel *)data, 0); 1465 if (error == 0) { 1466 if (cmd == DIOCWDINFO) { 1467 dev_t cdev = CCDLABELDEV(dev); 1468 error = writedisklabel(cdev, &cs->sc_label); 1469 } 1470 } 1471 1472 cs->sc_flags &= ~CCDF_LABELLING; 1473 1474 ccdunlock(cs); 1475 1476 if (error) 1477 return (error); 1478 break; 1479 1480 case DIOCWLABEL: 1481 if ((cs->sc_flags & CCDF_INITED) == 0) 1482 return (ENXIO); 1483 1484 if ((flag & FWRITE) == 0) 1485 return (EBADF); 1486 if (*(int *)data != 0) 1487 cs->sc_flags |= CCDF_WLABEL; 1488 else 1489 cs->sc_flags &= ~CCDF_WLABEL; 1490 break; 1491 1492 default: 1493 return (ENOTTY); 1494 } 1495 1496 return (0); 1497 } 1498 1499 static int 1500 ccdsize(dev_t dev) 1501 { 1502 struct ccd_softc *cs; 1503 int part, size; 1504 1505 if (ccdopen(dev, 0, S_IFCHR, curthread)) 1506 return (-1); 1507 1508 cs = &ccd_softc[ccdunit(dev)]; 1509 part = ccdpart(dev); 1510 1511 if ((cs->sc_flags & CCDF_INITED) == 0) 1512 return (-1); 1513 1514 if (cs->sc_label.d_partitions[part].p_fstype != FS_SWAP) 1515 size = -1; 1516 else 1517 size = cs->sc_label.d_partitions[part].p_size; 1518 1519 if (ccdclose(dev, 0, S_IFCHR, curthread)) 1520 return (-1); 1521 1522 return (size); 1523 } 1524 1525 static int 1526 ccddump(dev_t dev, u_int count, u_int blkno, u_int secsize) 1527 { 1528 /* Not implemented. */ 1529 return ENXIO; 1530 } 1531 1532 /* 1533 * Lookup the provided name in the filesystem. If the file exists, 1534 * is a valid block device, and isn't being used by anyone else, 1535 * set *vpp to the file's vnode. 1536 */ 1537 static int 1538 ccdlookup(char *path, struct thread *td, struct vnode **vpp) 1539 { 1540 struct nlookupdata nd; 1541 struct ucred *cred; 1542 struct vnode *vp; 1543 int error; 1544 1545 KKASSERT(td->td_proc); 1546 cred = td->td_proc->p_ucred; 1547 *vpp = NULL; 1548 1549 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1550 if (error) 1551 return (error); 1552 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1553 #ifdef DEBUG 1554 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1555 printf("ccdlookup: vn_open error = %d\n", error); 1556 #endif 1557 goto done; 1558 } 1559 vp = nd.nl_open_vp; 1560 1561 if (vp->v_usecount > 1) { 1562 error = EBUSY; 1563 goto done; 1564 } 1565 1566 if (!vn_isdisk(vp, &error)) 1567 goto done; 1568 1569 #ifdef DEBUG 1570 if (ccddebug & CCDB_VNODE) 1571 vprint("ccdlookup: vnode info", vp); 1572 #endif 1573 1574 VOP_UNLOCK(vp, 0, td); 1575 nd.nl_open_vp = NULL; 1576 nlookup_done(&nd); 1577 *vpp = vp; /* leave ref intact */ 1578 return (0); 1579 done: 1580 nlookup_done(&nd); 1581 return (error); 1582 } 1583 1584 /* 1585 * Read the disklabel from the ccd. If one is not present, fake one 1586 * up. 1587 */ 1588 static void 1589 ccdgetdisklabel(dev) 1590 dev_t dev; 1591 { 1592 int unit = ccdunit(dev); 1593 struct ccd_softc *cs = &ccd_softc[unit]; 1594 char *errstring; 1595 struct disklabel *lp = &cs->sc_label; 1596 struct ccdgeom *ccg = &cs->sc_geom; 1597 dev_t cdev; 1598 1599 bzero(lp, sizeof(*lp)); 1600 1601 lp->d_secperunit = cs->sc_size; 1602 lp->d_secsize = ccg->ccg_secsize; 1603 lp->d_nsectors = ccg->ccg_nsectors; 1604 lp->d_ntracks = ccg->ccg_ntracks; 1605 lp->d_ncylinders = ccg->ccg_ncylinders; 1606 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1607 1608 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1609 lp->d_type = DTYPE_CCD; 1610 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1611 lp->d_rpm = 3600; 1612 lp->d_interleave = 1; 1613 lp->d_flags = 0; 1614 1615 lp->d_partitions[RAW_PART].p_offset = 0; 1616 lp->d_partitions[RAW_PART].p_size = cs->sc_size; 1617 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1618 lp->d_npartitions = RAW_PART + 1; 1619 1620 lp->d_bbsize = BBSIZE; /* XXX */ 1621 lp->d_sbsize = SBSIZE; /* XXX */ 1622 1623 lp->d_magic = DISKMAGIC; 1624 lp->d_magic2 = DISKMAGIC; 1625 lp->d_checksum = dkcksum(&cs->sc_label); 1626 1627 /* 1628 * Call the generic disklabel extraction routine. 1629 */ 1630 cdev = CCDLABELDEV(dev); 1631 errstring = readdisklabel(cdev, &cs->sc_label); 1632 if (errstring != NULL) 1633 ccdmakedisklabel(cs); 1634 1635 #ifdef DEBUG 1636 /* It's actually extremely common to have unlabeled ccds. */ 1637 if (ccddebug & CCDB_LABEL) 1638 if (errstring != NULL) 1639 printf("ccd%d: %s\n", unit, errstring); 1640 #endif 1641 } 1642 1643 /* 1644 * Take care of things one might want to take care of in the event 1645 * that a disklabel isn't present. 1646 */ 1647 static void 1648 ccdmakedisklabel(cs) 1649 struct ccd_softc *cs; 1650 { 1651 struct disklabel *lp = &cs->sc_label; 1652 1653 /* 1654 * For historical reasons, if there's no disklabel present 1655 * the raw partition must be marked FS_BSDFFS. 1656 */ 1657 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1658 1659 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1660 } 1661 1662 /* 1663 * Wait interruptibly for an exclusive lock. 1664 * 1665 * XXX 1666 * Several drivers do this; it should be abstracted and made MP-safe. 1667 */ 1668 static int 1669 ccdlock(cs) 1670 struct ccd_softc *cs; 1671 { 1672 int error; 1673 1674 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1675 cs->sc_flags |= CCDF_WANTED; 1676 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1677 return (error); 1678 } 1679 cs->sc_flags |= CCDF_LOCKED; 1680 return (0); 1681 } 1682 1683 /* 1684 * Unlock and wake up any waiters. 1685 */ 1686 static void 1687 ccdunlock(cs) 1688 struct ccd_softc *cs; 1689 { 1690 1691 cs->sc_flags &= ~CCDF_LOCKED; 1692 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1693 cs->sc_flags &= ~CCDF_WANTED; 1694 wakeup(cs); 1695 } 1696 } 1697 1698 #ifdef DEBUG 1699 static void 1700 printiinfo(ii) 1701 struct ccdiinfo *ii; 1702 { 1703 int ix, i; 1704 1705 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1706 printf(" itab[%d]: #dk %d sblk %d soff %d", 1707 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1708 for (i = 0; i < ii->ii_ndisk; i++) 1709 printf(" %d", ii->ii_index[i]); 1710 printf("\n"); 1711 } 1712 } 1713 #endif 1714 1715 1716 /* Local Variables: */ 1717 /* c-argdecl-indent: 8 */ 1718 /* c-continued-statement-offset: 8 */ 1719 /* c-indent-level: 8 */ 1720 /* End: */ 1721