1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 35 */ 36 /* 37 * Copyright (c) 1995 Jason R. Thorpe. 38 * All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed for the NetBSD Project 51 * by Jason R. Thorpe. 52 * 4. The name of the author may not be used to endorse or promote products 53 * derived from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 58 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 59 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 60 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 61 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 62 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 63 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * Copyright (c) 1990, 1993 71 * The Regents of the University of California. All rights reserved. 72 * 73 * This code is derived from software contributed to Berkeley by 74 * the Systems Programming Group of the University of Utah Computer 75 * Science Department. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. All advertising materials mentioning features or use of this software 86 * must display the following acknowledgement: 87 * This product includes software developed by the University of 88 * California, Berkeley and its contributors. 89 * 4. Neither the name of the University nor the names of its contributors 90 * may be used to endorse or promote products derived from this software 91 * without specific prior written permission. 92 * 93 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 94 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 95 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 96 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 97 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 98 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 99 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 101 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 102 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 103 * SUCH DAMAGE. 104 * 105 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 106 */ 107 /* 108 * @(#)cd.c 8.2 (Berkeley) 11/16/93 109 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 110 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 111 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 112 */ 113 114 /* 115 * "Concatenated" disk driver. 116 * 117 * Original dynamic configuration support by: 118 * Jason R. Thorpe <thorpej@nas.nasa.gov> 119 * Numerical Aerodynamic Simulation Facility 120 * Mail Stop 258-6 121 * NASA Ames Research Center 122 * Moffett Field, CA 94035 123 */ 124 125 #include "use_ccd.h" 126 127 #include <sys/param.h> 128 #include <sys/systm.h> 129 #include <sys/kernel.h> 130 #include <sys/module.h> 131 #include <sys/proc.h> 132 #include <sys/buf.h> 133 #include <sys/malloc.h> 134 #include <sys/nlookup.h> 135 #include <sys/conf.h> 136 #include <sys/stat.h> 137 #include <sys/sysctl.h> 138 #include <sys/disk.h> 139 #include <sys/dtype.h> 140 #include <sys/diskslice.h> 141 #include <sys/devicestat.h> 142 #include <sys/fcntl.h> 143 #include <sys/vnode.h> 144 #include <sys/buf2.h> 145 #include <sys/ccdvar.h> 146 147 #include <vm/vm_zone.h> 148 149 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 150 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 151 152 #include <sys/thread2.h> 153 154 #if defined(CCDDEBUG) && !defined(DEBUG) 155 #define DEBUG 156 #endif 157 158 #ifdef DEBUG 159 #define CCDB_FOLLOW 0x01 160 #define CCDB_INIT 0x02 161 #define CCDB_IO 0x04 162 #define CCDB_LABEL 0x08 163 #define CCDB_VNODE 0x10 164 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 165 CCDB_VNODE; 166 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 167 #undef DEBUG 168 #endif 169 170 #define ccdunit(x) dkunit(x) 171 #define ccdpart(x) dkpart(x) 172 173 /* 174 This is how mirroring works (only writes are special): 175 176 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 177 linked together by the cb_mirror field. "cb_pflags & 178 CCDPF_MIRROR_DONE" is set to 0 on both of them. 179 180 When a component returns to ccdiodone(), it checks if "cb_pflags & 181 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 182 flag and returns. If it is, it means its partner has already 183 returned, so it will go to the regular cleanup. 184 185 */ 186 187 struct ccdbuf { 188 struct buf cb_buf; /* new I/O buf */ 189 struct vnode *cb_vp; /* related vnode */ 190 struct bio *cb_obio; /* ptr. to original I/O buf */ 191 struct ccdbuf *cb_freenext; /* free list link */ 192 int cb_unit; /* target unit */ 193 int cb_comp; /* target component */ 194 int cb_pflags; /* mirror/parity status flag */ 195 struct ccdbuf *cb_mirror; /* mirror counterpart */ 196 }; 197 198 /* bits in cb_pflags */ 199 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 200 201 static d_open_t ccdopen; 202 static d_close_t ccdclose; 203 static d_strategy_t ccdstrategy; 204 static d_ioctl_t ccdioctl; 205 static d_dump_t ccddump; 206 207 #define NCCDFREEHIWAT 16 208 209 #define CDEV_MAJOR 74 210 211 static struct dev_ops ccd_ops = { 212 { "ccd", CDEV_MAJOR, D_DISK }, 213 .d_open = ccdopen, 214 .d_close = ccdclose, 215 .d_read = physread, 216 .d_write = physwrite, 217 .d_ioctl = ccdioctl, 218 .d_strategy = ccdstrategy, 219 .d_dump = ccddump 220 }; 221 222 /* called during module initialization */ 223 static void ccdattach (void); 224 static int ccddetach (void); 225 static int ccd_modevent (module_t, int, void *); 226 227 /* called by biodone() at interrupt time */ 228 static void ccdiodone (struct bio *bio); 229 230 static void ccdstart (struct ccd_softc *, struct bio *); 231 static void ccdinterleave (struct ccd_softc *, int); 232 static void ccdintr (struct ccd_softc *, struct bio *); 233 static int ccdinit (struct ccddevice *, char **, struct ucred *); 234 static int ccdlookup (char *, struct vnode **); 235 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 236 struct bio *, off_t, caddr_t, long); 237 static int ccdlock (struct ccd_softc *); 238 static void ccdunlock (struct ccd_softc *); 239 240 #ifdef DEBUG 241 static void printiinfo (struct ccdiinfo *); 242 #endif 243 244 /* Non-private for the benefit of libkvm. */ 245 struct ccd_softc *ccd_softc; 246 struct ccddevice *ccddevs; 247 struct ccdbuf *ccdfreebufs; 248 static int numccdfreebufs; 249 static int numccd = 0; 250 251 /* 252 * getccdbuf() - Allocate and zero a ccd buffer. 253 * 254 * This routine is called at splbio(). 255 */ 256 257 static __inline 258 struct ccdbuf * 259 getccdbuf(void) 260 { 261 struct ccdbuf *cbp; 262 263 /* 264 * Allocate from freelist or malloc as necessary 265 */ 266 if ((cbp = ccdfreebufs) != NULL) { 267 ccdfreebufs = cbp->cb_freenext; 268 --numccdfreebufs; 269 reinitbufbio(&cbp->cb_buf); 270 } else { 271 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 272 initbufbio(&cbp->cb_buf); 273 } 274 275 /* 276 * independant struct buf initialization 277 */ 278 buf_dep_init(&cbp->cb_buf); 279 BUF_LOCKINIT(&cbp->cb_buf); 280 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 281 BUF_KERNPROC(&cbp->cb_buf); 282 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 283 284 return(cbp); 285 } 286 287 /* 288 * putccdbuf() - Free a ccd buffer. 289 * 290 * This routine is called at splbio(). 291 */ 292 293 static __inline 294 void 295 putccdbuf(struct ccdbuf *cbp) 296 { 297 BUF_UNLOCK(&cbp->cb_buf); 298 BUF_LOCKFREE(&cbp->cb_buf); 299 300 if (numccdfreebufs < NCCDFREEHIWAT) { 301 cbp->cb_freenext = ccdfreebufs; 302 ccdfreebufs = cbp; 303 ++numccdfreebufs; 304 } else { 305 kfree((caddr_t)cbp, M_DEVBUF); 306 } 307 } 308 309 /* 310 * Called by main() during pseudo-device attachment. All we need 311 * to do is allocate enough space for devices to be configured later, and 312 * add devsw entries. 313 */ 314 static void 315 ccdattach(void) 316 { 317 struct disk_info info; 318 struct ccd_softc *cs; 319 int i; 320 int num = NCCD; 321 322 if (num > 1) 323 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 324 else 325 kprintf("ccd0: Concatenated disk driver\n"); 326 327 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 328 M_WAITOK | M_ZERO); 329 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 330 M_WAITOK | M_ZERO); 331 numccd = num; 332 333 /* 334 * With normal disk devices the open simply fails if the media 335 * is not present. With CCD we have to be able to open the 336 * raw disk to use the ioctl's to set it up, so create a dummy 337 * disk info structure so dscheck() doesn't blow up. 338 */ 339 bzero(&info, sizeof(info)); 340 info.d_media_blksize = DEV_BSIZE; 341 342 for (i = 0; i < numccd; ++i) { 343 cs = &ccd_softc[i]; 344 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 345 cs->sc_dev->si_drv1 = cs; 346 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 347 disk_setdiskinfo(&cs->sc_disk, &info); 348 } 349 } 350 351 static int 352 ccddetach(void) 353 { 354 struct ccd_softc *cs; 355 struct dev_ioctl_args ioctl_args; 356 int i; 357 int error = 0; 358 int eval; 359 360 bzero(&ioctl_args, sizeof(ioctl_args)); 361 362 for (i = 0; i < numccd; ++i) { 363 cs = &ccd_softc[i]; 364 if (cs->sc_dev == NULL) 365 continue; 366 ioctl_args.a_head.a_dev = cs->sc_dev; 367 ioctl_args.a_cmd = CCDIOCCLR; 368 ioctl_args.a_fflag = FWRITE; 369 eval = ccdioctl(&ioctl_args); 370 if (eval && eval != ENXIO) { 371 kprintf("ccd%d: In use, cannot detach\n", i); 372 error = EBUSY; 373 } 374 } 375 if (error == 0) { 376 for (i = 0; i < numccd; ++i) { 377 cs = &ccd_softc[i]; 378 if (cs->sc_dev == NULL) 379 continue; 380 disk_destroy(&cs->sc_disk); 381 cs->sc_dev = NULL; 382 } 383 if (ccd_softc) 384 kfree(ccd_softc, M_DEVBUF); 385 if (ccddevs) 386 kfree(ccddevs, M_DEVBUF); 387 } 388 return (error); 389 } 390 391 static int 392 ccd_modevent(module_t mod, int type, void *data) 393 { 394 int error = 0; 395 396 switch (type) { 397 case MOD_LOAD: 398 ccdattach(); 399 break; 400 401 case MOD_UNLOAD: 402 error = ccddetach(); 403 break; 404 405 default: /* MOD_SHUTDOWN etc */ 406 break; 407 } 408 return (error); 409 } 410 411 DEV_MODULE(ccd, ccd_modevent, NULL); 412 413 static int 414 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 415 { 416 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 417 struct ccdcinfo *ci = NULL; /* XXX */ 418 int ix; 419 struct vnode *vp; 420 u_int64_t skip; 421 u_int64_t size; 422 u_int64_t minsize; 423 int maxsecsize; 424 struct partinfo dpart; 425 struct ccdgeom *ccg = &cs->sc_geom; 426 char tmppath[MAXPATHLEN]; 427 int error = 0; 428 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 432 #endif 433 434 cs->sc_size = 0; 435 cs->sc_ileave = ccd->ccd_interleave; 436 cs->sc_nccdisks = ccd->ccd_ndev; 437 438 /* Allocate space for the component info. */ 439 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 440 M_DEVBUF, M_WAITOK); 441 cs->sc_maxiosize = MAXPHYS; 442 443 /* 444 * Verify that each component piece exists and record 445 * relevant information about it. 446 */ 447 maxsecsize = 0; 448 minsize = 0; 449 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 450 vp = ccd->ccd_vpp[ix]; 451 ci = &cs->sc_cinfo[ix]; 452 ci->ci_vp = vp; 453 454 /* 455 * Copy in the pathname of the component. 456 */ 457 bzero(tmppath, sizeof(tmppath)); /* sanity */ 458 if ((error = copyinstr(cpaths[ix], tmppath, 459 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 460 #ifdef DEBUG 461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 462 kprintf("ccd%d: can't copy path, error = %d\n", 463 ccd->ccd_unit, error); 464 #endif 465 goto fail; 466 } 467 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 468 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 469 470 ci->ci_dev = vn_todev(vp); 471 if (ci->ci_dev->si_iosize_max && 472 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 473 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 474 } 475 476 /* 477 * Get partition information for the component. 478 */ 479 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, cred); 480 if (error) { 481 #ifdef DEBUG 482 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 483 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 484 ccd->ccd_unit, ci->ci_path, error); 485 #endif 486 goto fail; 487 } 488 if (dpart.fstype != FS_CCD && 489 !kuuid_is_ccd(&dpart.fstype_uuid)) { 490 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 491 ccd->ccd_unit, ci->ci_path); 492 error = EFTYPE; 493 goto fail; 494 } 495 if (maxsecsize < dpart.media_blksize) 496 maxsecsize = dpart.media_blksize; 497 498 /* 499 * Skip a certain amount of storage at the beginning of 500 * the component to make sure we don't infringe on any 501 * reserved sectors. This is handled entirely by 502 * dpart.reserved_blocks but we also impose a minimum 503 * of 16 sectors for backwards compatibility. 504 */ 505 skip = 16; 506 if (skip < dpart.reserved_blocks) 507 skip = dpart.reserved_blocks; 508 size = dpart.media_blocks - skip; 509 510 /* 511 * Calculate the size, truncating to an interleave 512 * boundary if necessary. 513 */ 514 if (cs->sc_ileave > 1) 515 size -= size % cs->sc_ileave; 516 517 if ((int64_t)size <= 0) { 518 #ifdef DEBUG 519 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 520 kprintf("ccd%d: %s: size == 0\n", 521 ccd->ccd_unit, ci->ci_path); 522 #endif 523 error = ENODEV; 524 goto fail; 525 } 526 527 /* 528 * Calculate the smallest uniform component, used 529 * elsewhere. 530 */ 531 if (minsize == 0 || minsize > size) 532 minsize = size; 533 ci->ci_skip = skip; 534 ci->ci_size = size; 535 cs->sc_size += size; 536 } 537 kprintf("ccd%d: max component iosize is %d total blocks %lld\n", 538 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size); 539 540 /* 541 * Don't allow the interleave to be smaller than 542 * the biggest component sector. 543 */ 544 if ((cs->sc_ileave > 0) && 545 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 546 #ifdef DEBUG 547 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 548 kprintf("ccd%d: interleave must be at least %d\n", 549 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 550 #endif 551 error = EINVAL; 552 goto fail; 553 } 554 555 /* 556 * If uniform interleave is desired set all sizes to that of 557 * the smallest component. This will guarentee that a single 558 * interleave table is generated. 559 * 560 * Lost space must be taken into account when calculating the 561 * overall size. Half the space is lost when CCDF_MIRROR is 562 * specified. One disk is lost when CCDF_PARITY is specified. 563 */ 564 if (ccd->ccd_flags & CCDF_UNIFORM) { 565 for (ci = cs->sc_cinfo; 566 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 567 ci->ci_size = minsize; 568 } 569 if (ccd->ccd_flags & CCDF_MIRROR) { 570 /* 571 * Check to see if an even number of components 572 * have been specified. The interleave must also 573 * be non-zero in order for us to be able to 574 * guarentee the topology. 575 */ 576 if (cs->sc_nccdisks % 2) { 577 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 578 error = EINVAL; 579 goto fail; 580 } 581 if (cs->sc_ileave == 0) { 582 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 583 error = EINVAL; 584 goto fail; 585 } 586 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 587 } else if (ccd->ccd_flags & CCDF_PARITY) { 588 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 589 } else { 590 if (cs->sc_ileave == 0) { 591 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 592 error = EINVAL; 593 goto fail; 594 } 595 cs->sc_size = cs->sc_nccdisks * minsize; 596 } 597 } 598 599 /* 600 * Construct the interleave table. 601 */ 602 ccdinterleave(cs, ccd->ccd_unit); 603 604 /* 605 * Create pseudo-geometry based on 1MB cylinders. It's 606 * pretty close. 607 */ 608 ccg->ccg_secsize = maxsecsize; 609 ccg->ccg_ntracks = 1; 610 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 611 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 612 613 /* 614 * Add an devstat entry for this device. 615 */ 616 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 617 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 618 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 619 DEVSTAT_PRIORITY_ARRAY); 620 621 cs->sc_flags |= CCDF_INITED; 622 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 623 cs->sc_unit = ccd->ccd_unit; 624 return (0); 625 fail: 626 while (ci > cs->sc_cinfo) { 627 ci--; 628 kfree(ci->ci_path, M_DEVBUF); 629 } 630 kfree(cs->sc_cinfo, M_DEVBUF); 631 cs->sc_cinfo = NULL; 632 return (error); 633 } 634 635 static void 636 ccdinterleave(struct ccd_softc *cs, int unit) 637 { 638 struct ccdcinfo *ci, *smallci; 639 struct ccdiinfo *ii; 640 u_int64_t bn; 641 u_int64_t lbn; 642 u_int64_t size; 643 int icount; 644 int ix; 645 646 #ifdef DEBUG 647 if (ccddebug & CCDB_INIT) 648 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 649 #endif 650 651 /* 652 * Allocate an interleave table. The worst case occurs when each 653 * of N disks is of a different size, resulting in N interleave 654 * tables. 655 * 656 * Chances are this is too big, but we don't care. 657 */ 658 icount = cs->sc_nccdisks + 1; 659 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 660 M_DEVBUF, M_WAITOK|M_ZERO); 661 662 /* 663 * Trivial case: no interleave (actually interleave of disk size). 664 * Each table entry represents a single component in its entirety. 665 * 666 * An interleave of 0 may not be used with a mirror or parity setup. 667 */ 668 if (cs->sc_ileave == 0) { 669 bn = 0; 670 ii = cs->sc_itable; 671 672 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 673 /* Allocate space for ii_index. */ 674 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 675 ii->ii_ndisk = 1; 676 ii->ii_startblk = bn; 677 ii->ii_startoff = 0; 678 ii->ii_index[0] = ix; 679 bn += cs->sc_cinfo[ix].ci_size; 680 ii++; 681 } 682 ii->ii_ndisk = 0; 683 #ifdef DEBUG 684 if (ccddebug & CCDB_INIT) 685 printiinfo(cs->sc_itable); 686 #endif 687 return; 688 } 689 690 /* 691 * The following isn't fast or pretty; it doesn't have to be. 692 */ 693 size = 0; 694 bn = lbn = 0; 695 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 696 /* 697 * Allocate space for ii_index. We might allocate more then 698 * we use. 699 */ 700 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 701 M_DEVBUF, M_WAITOK); 702 703 /* 704 * Locate the smallest of the remaining components 705 */ 706 smallci = NULL; 707 ci = cs->sc_cinfo; 708 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 709 if (ci->ci_size > size && 710 (smallci == NULL || 711 ci->ci_size < smallci->ci_size)) { 712 smallci = ci; 713 } 714 ++ci; 715 } 716 717 /* 718 * Nobody left, all done 719 */ 720 if (smallci == NULL) { 721 ii->ii_ndisk = 0; 722 break; 723 } 724 725 /* 726 * Record starting logical block using an sc_ileave blocksize. 727 */ 728 ii->ii_startblk = bn / cs->sc_ileave; 729 730 /* 731 * Record starting component block using an sc_ileave 732 * blocksize. This value is relative to the beginning of 733 * a component disk. 734 */ 735 ii->ii_startoff = lbn; 736 737 /* 738 * Determine how many disks take part in this interleave 739 * and record their indices. 740 */ 741 ix = 0; 742 for (ci = cs->sc_cinfo; 743 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 744 if (ci->ci_size >= smallci->ci_size) { 745 ii->ii_index[ix++] = ci - cs->sc_cinfo; 746 } 747 } 748 ii->ii_ndisk = ix; 749 750 /* 751 * Adjust for loop 752 */ 753 bn += ix * (smallci->ci_size - size); 754 lbn = smallci->ci_size / cs->sc_ileave; 755 size = smallci->ci_size; 756 } 757 if (ii == &cs->sc_itable[icount]) 758 panic("ccdinterlave software bug! table exhausted"); 759 #ifdef DEBUG 760 if (ccddebug & CCDB_INIT) 761 printiinfo(cs->sc_itable); 762 #endif 763 } 764 765 /* ARGSUSED */ 766 static int 767 ccdopen(struct dev_open_args *ap) 768 { 769 cdev_t dev = ap->a_head.a_dev; 770 int unit = ccdunit(dev); 771 struct ccd_softc *cs; 772 int error = 0; 773 774 #ifdef DEBUG 775 if (ccddebug & CCDB_FOLLOW) 776 kprintf("ccdopen(%x, %x)\n", dev, flags); 777 #endif 778 if (unit >= numccd) 779 return (ENXIO); 780 cs = &ccd_softc[unit]; 781 782 if ((error = ccdlock(cs)) == 0) { 783 ccdunlock(cs); 784 } 785 return (error); 786 } 787 788 /* ARGSUSED */ 789 static int 790 ccdclose(struct dev_close_args *ap) 791 { 792 cdev_t dev = ap->a_head.a_dev; 793 int unit = ccdunit(dev); 794 struct ccd_softc *cs; 795 int error = 0; 796 797 #ifdef DEBUG 798 if (ccddebug & CCDB_FOLLOW) 799 kprintf("ccdclose(%x, %x)\n", dev, flags); 800 #endif 801 802 if (unit >= numccd) 803 return (ENXIO); 804 cs = &ccd_softc[unit]; 805 if ((error = ccdlock(cs)) == 0) { 806 ccdunlock(cs); 807 } 808 return (error); 809 } 810 811 static int 812 ccdstrategy(struct dev_strategy_args *ap) 813 { 814 cdev_t dev = ap->a_head.a_dev; 815 struct bio *bio = ap->a_bio; 816 int unit = ccdunit(dev); 817 struct bio *nbio; 818 struct buf *bp = bio->bio_buf; 819 struct ccd_softc *cs = &ccd_softc[unit]; 820 u_int64_t pbn; /* in sc_secsize chunks */ 821 u_int32_t sz; /* in sc_secsize chunks */ 822 823 #ifdef DEBUG 824 if (ccddebug & CCDB_FOLLOW) 825 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 826 #endif 827 if ((cs->sc_flags & CCDF_INITED) == 0) { 828 bp->b_error = ENXIO; 829 goto error; 830 } 831 832 /* If it's a nil transfer, wake up the top half now. */ 833 if (bp->b_bcount == 0) { 834 bp->b_resid = 0; 835 goto done; 836 } 837 838 /* 839 * Do bounds checking and adjust transfer. If there's an 840 * error, the bounds check will flag that for us. 841 */ 842 843 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 844 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 845 846 /* 847 * If out of bounds return an error. If the request goes 848 * past EOF, clip the request as appropriate. If exactly 849 * at EOF, return success (don't clip), but with 0 bytes 850 * of I/O. 851 * 852 * Mark EOF B_INVAL (just like bad), indicating that the 853 * contents of the buffer, if any, is invalid. 854 */ 855 if ((int64_t)pbn < 0) 856 goto bad; 857 if (pbn + sz > cs->sc_size) { 858 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 859 goto bad; 860 if (pbn == cs->sc_size) { 861 bp->b_resid = bp->b_bcount; 862 bp->b_flags |= B_INVAL; 863 goto done; 864 } 865 sz = (long)(cs->sc_size - pbn); 866 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 867 } 868 nbio = bio; 869 870 bp->b_resid = bp->b_bcount; 871 nbio->bio_driver_info = dev; 872 873 /* 874 * "Start" the unit. 875 */ 876 crit_enter(); 877 ccdstart(cs, nbio); 878 crit_exit(); 879 return(0); 880 881 /* 882 * note: bio, not nbio, is valid at the done label. 883 */ 884 bad: 885 bp->b_error = EINVAL; 886 error: 887 bp->b_resid = bp->b_bcount; 888 bp->b_flags |= B_ERROR | B_INVAL; 889 done: 890 biodone(bio); 891 return(0); 892 } 893 894 static void 895 ccdstart(struct ccd_softc *cs, struct bio *bio) 896 { 897 long bcount, rcount; 898 struct ccdbuf *cbp[4]; 899 struct buf *bp = bio->bio_buf; 900 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 901 caddr_t addr; 902 off_t doffset; 903 904 #ifdef DEBUG 905 if (ccddebug & CCDB_FOLLOW) 906 kprintf("ccdstart(%x, %x)\n", cs, bp); 907 #endif 908 909 /* Record the transaction start */ 910 devstat_start_transaction(&cs->device_stats); 911 912 /* 913 * Allocate component buffers and fire off the requests 914 */ 915 doffset = bio->bio_offset; 916 addr = bp->b_data; 917 918 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 919 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 920 rcount = cbp[0]->cb_buf.b_bcount; 921 922 if (cs->sc_cflags & CCDF_MIRROR) { 923 /* 924 * Mirroring. Writes go to both disks, reads are 925 * taken from whichever disk seems most appropriate. 926 * 927 * We attempt to localize reads to the disk whos arm 928 * is nearest the read request. We ignore seeks due 929 * to writes when making this determination and we 930 * also try to avoid hogging. 931 */ 932 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 933 vn_strategy(cbp[0]->cb_vp, 934 &cbp[0]->cb_buf.b_bio1); 935 vn_strategy(cbp[1]->cb_vp, 936 &cbp[1]->cb_buf.b_bio1); 937 } else { 938 int pick = cs->sc_pick; 939 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 940 if (doffset < cs->sc_blk[pick] - range || 941 doffset > cs->sc_blk[pick] + range 942 ) { 943 cs->sc_pick = pick = 1 - pick; 944 } 945 cs->sc_blk[pick] = doffset + rcount; 946 vn_strategy(cbp[pick]->cb_vp, 947 &cbp[pick]->cb_buf.b_bio1); 948 } 949 } else { 950 /* 951 * Not mirroring 952 */ 953 vn_strategy(cbp[0]->cb_vp, 954 &cbp[0]->cb_buf.b_bio1); 955 } 956 doffset += rcount; 957 addr += rcount; 958 } 959 } 960 961 /* 962 * Build a component buffer header. 963 */ 964 static void 965 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 966 off_t doffset, caddr_t addr, long bcount) 967 { 968 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 969 struct ccdbuf *cbp; 970 u_int64_t bn; 971 u_int64_t cbn; 972 u_int64_t cboff; 973 off_t cbc; 974 975 #ifdef DEBUG 976 if (ccddebug & CCDB_IO) 977 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 978 cs, bp, bn, addr, bcount); 979 #endif 980 /* 981 * Determine which component bn falls in. 982 */ 983 bn = doffset / cs->sc_geom.ccg_secsize; 984 cbn = bn; 985 cboff = 0; 986 987 if (cs->sc_ileave == 0) { 988 /* 989 * Serially concatenated and neither a mirror nor a parity 990 * config. This is a special case. 991 */ 992 daddr_t sblk; 993 994 sblk = 0; 995 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 996 sblk += ci->ci_size; 997 cbn -= sblk; 998 } else { 999 struct ccdiinfo *ii; 1000 int ccdisk, off; 1001 1002 /* 1003 * Calculate cbn, the logical superblock (sc_ileave chunks), 1004 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 1005 * to cbn. 1006 */ 1007 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 1008 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 1009 1010 /* 1011 * Figure out which interleave table to use. 1012 */ 1013 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 1014 if (ii->ii_startblk > cbn) 1015 break; 1016 } 1017 ii--; 1018 1019 /* 1020 * off is the logical superblock relative to the beginning 1021 * of this interleave block. 1022 */ 1023 off = cbn - ii->ii_startblk; 1024 1025 /* 1026 * We must calculate which disk component to use (ccdisk), 1027 * and recalculate cbn to be the superblock relative to 1028 * the beginning of the component. This is typically done by 1029 * adding 'off' and ii->ii_startoff together. However, 'off' 1030 * must typically be divided by the number of components in 1031 * this interleave array to be properly convert it from a 1032 * CCD-relative logical superblock number to a 1033 * component-relative superblock number. 1034 */ 1035 if (ii->ii_ndisk == 1) { 1036 /* 1037 * When we have just one disk, it can't be a mirror 1038 * or a parity config. 1039 */ 1040 ccdisk = ii->ii_index[0]; 1041 cbn = ii->ii_startoff + off; 1042 } else { 1043 if (cs->sc_cflags & CCDF_MIRROR) { 1044 /* 1045 * We have forced a uniform mapping, resulting 1046 * in a single interleave array. We double 1047 * up on the first half of the available 1048 * components and our mirror is in the second 1049 * half. This only works with a single 1050 * interleave array because doubling up 1051 * doubles the number of sectors, so there 1052 * cannot be another interleave array because 1053 * the next interleave array's calculations 1054 * would be off. 1055 */ 1056 int ndisk2 = ii->ii_ndisk / 2; 1057 ccdisk = ii->ii_index[off % ndisk2]; 1058 cbn = ii->ii_startoff + off / ndisk2; 1059 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1060 } else if (cs->sc_cflags & CCDF_PARITY) { 1061 /* 1062 * XXX not implemented yet 1063 */ 1064 int ndisk2 = ii->ii_ndisk - 1; 1065 ccdisk = ii->ii_index[off % ndisk2]; 1066 cbn = ii->ii_startoff + off / ndisk2; 1067 if (cbn % ii->ii_ndisk <= ccdisk) 1068 ccdisk++; 1069 } else { 1070 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1071 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1072 } 1073 } 1074 1075 ci = &cs->sc_cinfo[ccdisk]; 1076 1077 /* 1078 * Convert cbn from a superblock to a normal block so it 1079 * can be used to calculate (along with cboff) the normal 1080 * block index into this particular disk. 1081 */ 1082 cbn *= cs->sc_ileave; 1083 } 1084 1085 /* 1086 * Fill in the component buf structure. 1087 * 1088 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1089 * will be truncated on device EOF so we use b_bufsize to detect 1090 * the case. 1091 */ 1092 cbp = getccdbuf(); 1093 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1094 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1095 cbp->cb_buf.b_data = addr; 1096 cbp->cb_vp = ci->ci_vp; 1097 if (cs->sc_ileave == 0) 1098 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1099 else 1100 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1101 if (cbc > cs->sc_maxiosize) 1102 cbc = cs->sc_maxiosize; 1103 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1104 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1105 1106 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1107 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1108 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1109 1110 /* 1111 * context for ccdiodone 1112 */ 1113 cbp->cb_obio = bio; 1114 cbp->cb_unit = cs - ccd_softc; 1115 cbp->cb_comp = ci - cs->sc_cinfo; 1116 1117 #ifdef DEBUG 1118 if (ccddebug & CCDB_IO) 1119 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1120 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1121 cbp->cb_buf.b_bio1.bio_offset, 1122 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1123 #endif 1124 cb[0] = cbp; 1125 1126 /* 1127 * Note: both I/O's setup when reading from mirror, but only one 1128 * will be executed. 1129 */ 1130 if (cs->sc_cflags & CCDF_MIRROR) { 1131 /* mirror, setup second I/O */ 1132 cbp = getccdbuf(); 1133 1134 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1135 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1136 cbp->cb_buf.b_data = addr; 1137 cbp->cb_vp = ci2->ci_vp; 1138 if (cs->sc_ileave == 0) 1139 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1140 else 1141 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1142 if (cbc > cs->sc_maxiosize) 1143 cbc = cs->sc_maxiosize; 1144 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1145 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1146 1147 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1148 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1149 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1150 1151 /* 1152 * context for ccdiodone 1153 */ 1154 cbp->cb_obio = bio; 1155 cbp->cb_unit = cs - ccd_softc; 1156 cbp->cb_comp = ci2 - cs->sc_cinfo; 1157 cb[1] = cbp; 1158 /* link together the ccdbuf's and clear "mirror done" flag */ 1159 cb[0]->cb_mirror = cb[1]; 1160 cb[1]->cb_mirror = cb[0]; 1161 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1162 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1163 } 1164 } 1165 1166 static void 1167 ccdintr(struct ccd_softc *cs, struct bio *bio) 1168 { 1169 struct buf *bp = bio->bio_buf; 1170 1171 #ifdef DEBUG 1172 if (ccddebug & CCDB_FOLLOW) 1173 kprintf("ccdintr(%x, %x)\n", cs, bp); 1174 #endif 1175 /* 1176 * Request is done for better or worse, wakeup the top half. 1177 */ 1178 if (bp->b_flags & B_ERROR) 1179 bp->b_resid = bp->b_bcount; 1180 devstat_end_transaction_buf(&cs->device_stats, bp); 1181 biodone(bio); 1182 } 1183 1184 /* 1185 * Called at interrupt time. 1186 * Mark the component as done and if all components are done, 1187 * take a ccd interrupt. 1188 */ 1189 static void 1190 ccdiodone(struct bio *bio) 1191 { 1192 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1193 struct bio *obio = cbp->cb_obio; 1194 struct buf *obp = obio->bio_buf; 1195 int unit = cbp->cb_unit; 1196 int count; 1197 1198 /* 1199 * Since we do not have exclusive access to underlying devices, 1200 * we can't keep cache translations around. 1201 */ 1202 clearbiocache(bio->bio_next); 1203 1204 crit_enter(); 1205 #ifdef DEBUG 1206 if (ccddebug & CCDB_FOLLOW) 1207 kprintf("ccdiodone(%x)\n", cbp); 1208 if (ccddebug & CCDB_IO) { 1209 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1210 obp, obp->b_bcount, obp->b_resid); 1211 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1212 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1213 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1214 cbp->cb_buf.b_bcount); 1215 } 1216 #endif 1217 1218 /* 1219 * If an error occured, report it. If this is a mirrored 1220 * configuration and the first of two possible reads, do not 1221 * set the error in the bp yet because the second read may 1222 * succeed. 1223 */ 1224 if (cbp->cb_buf.b_flags & B_ERROR) { 1225 const char *msg = ""; 1226 1227 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1228 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1229 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1230 /* 1231 * We will try our read on the other disk down 1232 * below, also reverse the default pick so if we 1233 * are doing a scan we do not keep hitting the 1234 * bad disk first. 1235 */ 1236 struct ccd_softc *cs = &ccd_softc[unit]; 1237 1238 msg = ", trying other disk"; 1239 cs->sc_pick = 1 - cs->sc_pick; 1240 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1241 } else { 1242 obp->b_flags |= B_ERROR; 1243 obp->b_error = cbp->cb_buf.b_error ? 1244 cbp->cb_buf.b_error : EIO; 1245 } 1246 kprintf("ccd%d: error %d on component %d offset %lld (ccd offset %lld)%s\n", 1247 unit, obp->b_error, cbp->cb_comp, 1248 cbp->cb_buf.b_bio2.bio_offset, 1249 obio->bio_offset, msg); 1250 } 1251 1252 /* 1253 * Process mirror. If we are writing, I/O has been initiated on both 1254 * buffers and we fall through only after both are finished. 1255 * 1256 * If we are reading only one I/O is initiated at a time. If an 1257 * error occurs we initiate the second I/O and return, otherwise 1258 * we free the second I/O without initiating it. 1259 */ 1260 1261 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1262 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1263 /* 1264 * When writing, handshake with the second buffer 1265 * to determine when both are done. If both are not 1266 * done, return here. 1267 */ 1268 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1269 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1270 putccdbuf(cbp); 1271 crit_exit(); 1272 return; 1273 } 1274 } else { 1275 /* 1276 * When reading, either dispose of the second buffer 1277 * or initiate I/O on the second buffer if an error 1278 * occured with this one. 1279 */ 1280 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1281 if (cbp->cb_buf.b_flags & B_ERROR) { 1282 cbp->cb_mirror->cb_pflags |= 1283 CCDPF_MIRROR_DONE; 1284 vn_strategy( 1285 cbp->cb_mirror->cb_vp, 1286 &cbp->cb_mirror->cb_buf.b_bio1 1287 ); 1288 putccdbuf(cbp); 1289 crit_exit(); 1290 return; 1291 } else { 1292 putccdbuf(cbp->cb_mirror); 1293 /* fall through */ 1294 } 1295 } 1296 } 1297 } 1298 1299 /* 1300 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1301 */ 1302 count = cbp->cb_buf.b_bufsize; 1303 putccdbuf(cbp); 1304 1305 /* 1306 * If all done, "interrupt". 1307 */ 1308 obp->b_resid -= count; 1309 if (obp->b_resid < 0) 1310 panic("ccdiodone: count"); 1311 if (obp->b_resid == 0) 1312 ccdintr(&ccd_softc[unit], obio); 1313 crit_exit(); 1314 } 1315 1316 static int 1317 ccdioctl(struct dev_ioctl_args *ap) 1318 { 1319 cdev_t dev = ap->a_head.a_dev; 1320 int unit = ccdunit(dev); 1321 int i, j, lookedup = 0, error = 0; 1322 struct ccd_softc *cs; 1323 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1324 struct ccddevice ccd; 1325 struct disk_info info; 1326 char **cpp; 1327 struct vnode **vpp; 1328 1329 if (unit >= numccd) 1330 return (ENXIO); 1331 cs = &ccd_softc[unit]; 1332 1333 bzero(&ccd, sizeof(ccd)); 1334 1335 switch (ap->a_cmd) { 1336 case CCDIOCSET: 1337 if (cs->sc_flags & CCDF_INITED) 1338 return (EBUSY); 1339 1340 if ((ap->a_fflag & FWRITE) == 0) 1341 return (EBADF); 1342 1343 if ((error = ccdlock(cs)) != 0) 1344 return (error); 1345 1346 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1347 ccdunlock(cs); 1348 return (EINVAL); 1349 } 1350 1351 /* Fill in some important bits. */ 1352 ccd.ccd_unit = unit; 1353 ccd.ccd_interleave = ccio->ccio_ileave; 1354 if (ccd.ccd_interleave == 0 && 1355 ((ccio->ccio_flags & CCDF_MIRROR) || 1356 (ccio->ccio_flags & CCDF_PARITY))) { 1357 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1358 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1359 } 1360 if ((ccio->ccio_flags & CCDF_MIRROR) && 1361 (ccio->ccio_flags & CCDF_PARITY)) { 1362 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1363 ccio->ccio_flags &= ~CCDF_PARITY; 1364 } 1365 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1366 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1367 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1368 unit); 1369 ccio->ccio_flags |= CCDF_UNIFORM; 1370 } 1371 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1372 1373 /* 1374 * Allocate space for and copy in the array of 1375 * componet pathnames and device numbers. 1376 */ 1377 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1378 M_DEVBUF, M_WAITOK); 1379 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1380 M_DEVBUF, M_WAITOK); 1381 1382 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1383 ccio->ccio_ndisks * sizeof(char **)); 1384 if (error) { 1385 kfree(vpp, M_DEVBUF); 1386 kfree(cpp, M_DEVBUF); 1387 ccdunlock(cs); 1388 return (error); 1389 } 1390 1391 #ifdef DEBUG 1392 if (ccddebug & CCDB_INIT) { 1393 for (i = 0; i < ccio->ccio_ndisks; ++i) 1394 kprintf("ccdioctl: component %d: 0x%x\n", 1395 i, cpp[i]); 1396 } 1397 #endif 1398 1399 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1400 #ifdef DEBUG 1401 if (ccddebug & CCDB_INIT) 1402 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1403 #endif 1404 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1405 for (j = 0; j < lookedup; ++j) 1406 (void)vn_close(vpp[j], FREAD|FWRITE); 1407 kfree(vpp, M_DEVBUF); 1408 kfree(cpp, M_DEVBUF); 1409 ccdunlock(cs); 1410 return (error); 1411 } 1412 ++lookedup; 1413 } 1414 ccd.ccd_cpp = cpp; 1415 ccd.ccd_vpp = vpp; 1416 ccd.ccd_ndev = ccio->ccio_ndisks; 1417 1418 /* 1419 * Initialize the ccd. Fills in the softc for us. 1420 */ 1421 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1422 for (j = 0; j < lookedup; ++j) 1423 (void)vn_close(vpp[j], FREAD|FWRITE); 1424 kfree(vpp, M_DEVBUF); 1425 kfree(cpp, M_DEVBUF); 1426 ccdunlock(cs); 1427 return (error); 1428 } 1429 1430 /* 1431 * The ccd has been successfully initialized, so 1432 * we can place it into the array and read the disklabel. 1433 */ 1434 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1435 ccio->ccio_unit = unit; 1436 ccio->ccio_size = cs->sc_size; 1437 1438 bzero(&info, sizeof(info)); 1439 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1440 info.d_media_blocks = cs->sc_size; 1441 info.d_nheads = cs->sc_geom.ccg_ntracks; 1442 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1443 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1444 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1445 1446 /* 1447 * For cases where a label is directly applied to the ccd, 1448 * without slices, DSO_COMPATMBR forces one sector be 1449 * reserved for backwards compatibility. 1450 */ 1451 info.d_dsflags = DSO_COMPATMBR; 1452 disk_setdiskinfo(&cs->sc_disk, &info); 1453 1454 ccdunlock(cs); 1455 1456 break; 1457 1458 case CCDIOCCLR: 1459 if ((cs->sc_flags & CCDF_INITED) == 0) 1460 return (ENXIO); 1461 1462 if ((ap->a_fflag & FWRITE) == 0) 1463 return (EBADF); 1464 1465 if ((error = ccdlock(cs)) != 0) 1466 return (error); 1467 1468 if (dev_drefs(cs->sc_dev) > 1) { 1469 ccdunlock(cs); 1470 return (EBUSY); 1471 } 1472 1473 /* 1474 * Free ccd_softc information and clear entry. 1475 */ 1476 1477 /* Close the components and free their pathnames. */ 1478 for (i = 0; i < cs->sc_nccdisks; ++i) { 1479 /* 1480 * XXX: this close could potentially fail and 1481 * cause Bad Things. Maybe we need to force 1482 * the close to happen? 1483 */ 1484 #ifdef DEBUG 1485 if (ccddebug & CCDB_VNODE) 1486 vprint("CCDIOCCLR: vnode info", 1487 cs->sc_cinfo[i].ci_vp); 1488 #endif 1489 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1490 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1491 } 1492 1493 /* Free interleave index. */ 1494 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1495 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1496 1497 /* Free component info and interleave table. */ 1498 kfree(cs->sc_cinfo, M_DEVBUF); 1499 kfree(cs->sc_itable, M_DEVBUF); 1500 cs->sc_cinfo = NULL; 1501 cs->sc_itable = NULL; 1502 cs->sc_flags &= ~CCDF_INITED; 1503 1504 /* 1505 * Free ccddevice information and clear entry. 1506 */ 1507 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1508 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1509 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1510 1511 /* 1512 * And remove the devstat entry. 1513 */ 1514 devstat_remove_entry(&cs->device_stats); 1515 1516 /* This must be atomic. */ 1517 crit_enter(); 1518 ccdunlock(cs); 1519 crit_exit(); 1520 1521 break; 1522 1523 default: 1524 return (ENOTTY); 1525 } 1526 1527 return (0); 1528 } 1529 1530 static int 1531 ccddump(struct dev_dump_args *ap) 1532 { 1533 /* Not implemented. */ 1534 return ENXIO; 1535 } 1536 1537 /* 1538 * Lookup the provided name in the filesystem. If the file exists, 1539 * is a valid block device, and isn't being used by anyone else, 1540 * set *vpp to the file's vnode. 1541 */ 1542 static int 1543 ccdlookup(char *path, struct vnode **vpp) 1544 { 1545 struct nlookupdata nd; 1546 struct vnode *vp; 1547 int error; 1548 1549 *vpp = NULL; 1550 1551 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1552 if (error) 1553 return (error); 1554 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1555 #ifdef DEBUG 1556 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1557 kprintf("ccdlookup: vn_open error = %d\n", error); 1558 #endif 1559 goto done; 1560 } 1561 vp = nd.nl_open_vp; 1562 1563 if (vp->v_opencount > 1) { 1564 error = EBUSY; 1565 goto done; 1566 } 1567 1568 if (!vn_isdisk(vp, &error)) 1569 goto done; 1570 1571 #ifdef DEBUG 1572 if (ccddebug & CCDB_VNODE) 1573 vprint("ccdlookup: vnode info", vp); 1574 #endif 1575 1576 vn_unlock(vp); 1577 nd.nl_open_vp = NULL; 1578 nlookup_done(&nd); 1579 *vpp = vp; /* leave ref intact */ 1580 return (0); 1581 done: 1582 nlookup_done(&nd); 1583 return (error); 1584 } 1585 1586 /* 1587 * Wait interruptibly for an exclusive lock. 1588 * 1589 * XXX 1590 * Several drivers do this; it should be abstracted and made MP-safe. 1591 */ 1592 static int 1593 ccdlock(struct ccd_softc *cs) 1594 { 1595 int error; 1596 1597 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1598 cs->sc_flags |= CCDF_WANTED; 1599 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1600 return (error); 1601 } 1602 cs->sc_flags |= CCDF_LOCKED; 1603 return (0); 1604 } 1605 1606 /* 1607 * Unlock and wake up any waiters. 1608 */ 1609 static void 1610 ccdunlock(struct ccd_softc *cs) 1611 { 1612 1613 cs->sc_flags &= ~CCDF_LOCKED; 1614 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1615 cs->sc_flags &= ~CCDF_WANTED; 1616 wakeup(cs); 1617 } 1618 } 1619 1620 #ifdef DEBUG 1621 static void 1622 printiinfo(struct ccdiinfo *ii) 1623 { 1624 int ix, i; 1625 1626 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1627 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1628 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1629 for (i = 0; i < ii->ii_ndisk; i++) 1630 kprintf(" %d", ii->ii_index[i]); 1631 kprintf("\n"); 1632 } 1633 } 1634 #endif 1635 1636 1637 /* Local Variables: */ 1638 /* c-argdecl-indent: 8 */ 1639 /* c-continued-statement-offset: 8 */ 1640 /* c-indent-level: 8 */ 1641 /* End: */ 1642