1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 35 */ 36 /* 37 * Copyright (c) 1995 Jason R. Thorpe. 38 * All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed for the NetBSD Project 51 * by Jason R. Thorpe. 52 * 4. The name of the author may not be used to endorse or promote products 53 * derived from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 58 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 59 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 60 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 61 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 62 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 63 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * Copyright (c) 1990, 1993 71 * The Regents of the University of California. All rights reserved. 72 * 73 * This code is derived from software contributed to Berkeley by 74 * the Systems Programming Group of the University of Utah Computer 75 * Science Department. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. All advertising materials mentioning features or use of this software 86 * must display the following acknowledgement: 87 * This product includes software developed by the University of 88 * California, Berkeley and its contributors. 89 * 4. Neither the name of the University nor the names of its contributors 90 * may be used to endorse or promote products derived from this software 91 * without specific prior written permission. 92 * 93 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 94 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 95 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 96 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 97 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 98 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 99 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 101 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 102 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 103 * SUCH DAMAGE. 104 * 105 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 106 */ 107 /* 108 * @(#)cd.c 8.2 (Berkeley) 11/16/93 109 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 110 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 111 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 112 */ 113 114 /* 115 * "Concatenated" disk driver. 116 * 117 * Original dynamic configuration support by: 118 * Jason R. Thorpe <thorpej@nas.nasa.gov> 119 * Numerical Aerodynamic Simulation Facility 120 * Mail Stop 258-6 121 * NASA Ames Research Center 122 * Moffett Field, CA 94035 123 */ 124 125 #include "use_ccd.h" 126 127 #include <sys/param.h> 128 #include <sys/systm.h> 129 #include <sys/kernel.h> 130 #include <sys/module.h> 131 #include <sys/proc.h> 132 #include <sys/buf.h> 133 #include <sys/malloc.h> 134 #include <sys/nlookup.h> 135 #include <sys/conf.h> 136 #include <sys/stat.h> 137 #include <sys/sysctl.h> 138 #include <sys/disk.h> 139 #include <sys/dtype.h> 140 #include <sys/diskslice.h> 141 #include <sys/devicestat.h> 142 #include <sys/fcntl.h> 143 #include <sys/vnode.h> 144 #include <sys/buf2.h> 145 #include <sys/ccdvar.h> 146 147 #include <vm/vm_zone.h> 148 149 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 150 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 151 152 #include <sys/thread2.h> 153 154 #if defined(CCDDEBUG) && !defined(DEBUG) 155 #define DEBUG 156 #endif 157 158 #ifdef DEBUG 159 #define CCDB_FOLLOW 0x01 160 #define CCDB_INIT 0x02 161 #define CCDB_IO 0x04 162 #define CCDB_LABEL 0x08 163 #define CCDB_VNODE 0x10 164 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 165 CCDB_VNODE; 166 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 167 #undef DEBUG 168 #endif 169 170 #define ccdunit(x) dkunit(x) 171 #define ccdpart(x) dkpart(x) 172 173 /* 174 This is how mirroring works (only writes are special): 175 176 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 177 linked together by the cb_mirror field. "cb_pflags & 178 CCDPF_MIRROR_DONE" is set to 0 on both of them. 179 180 When a component returns to ccdiodone(), it checks if "cb_pflags & 181 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 182 flag and returns. If it is, it means its partner has already 183 returned, so it will go to the regular cleanup. 184 185 */ 186 187 struct ccdbuf { 188 struct buf cb_buf; /* new I/O buf */ 189 struct vnode *cb_vp; /* related vnode */ 190 struct bio *cb_obio; /* ptr. to original I/O buf */ 191 struct ccdbuf *cb_freenext; /* free list link */ 192 int cb_unit; /* target unit */ 193 int cb_comp; /* target component */ 194 int cb_pflags; /* mirror/parity status flag */ 195 struct ccdbuf *cb_mirror; /* mirror counterpart */ 196 }; 197 198 /* bits in cb_pflags */ 199 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 200 201 static d_open_t ccdopen; 202 static d_close_t ccdclose; 203 static d_strategy_t ccdstrategy; 204 static d_ioctl_t ccdioctl; 205 static d_dump_t ccddump; 206 207 #define NCCDFREEHIWAT 16 208 209 #define CDEV_MAJOR 74 210 211 static struct dev_ops ccd_ops = { 212 { "ccd", CDEV_MAJOR, D_DISK }, 213 .d_open = ccdopen, 214 .d_close = ccdclose, 215 .d_read = physread, 216 .d_write = physwrite, 217 .d_ioctl = ccdioctl, 218 .d_strategy = ccdstrategy, 219 .d_dump = ccddump 220 }; 221 222 /* called during module initialization */ 223 static void ccdattach (void); 224 static int ccddetach (void); 225 static int ccd_modevent (module_t, int, void *); 226 227 /* called by biodone() at interrupt time */ 228 static void ccdiodone (struct bio *bio); 229 230 static void ccdstart (struct ccd_softc *, struct bio *); 231 static void ccdinterleave (struct ccd_softc *, int); 232 static void ccdintr (struct ccd_softc *, struct bio *); 233 static int ccdinit (struct ccddevice *, char **, struct ucred *); 234 static int ccdlookup (char *, struct vnode **); 235 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 236 struct bio *, off_t, caddr_t, long); 237 static int ccdlock (struct ccd_softc *); 238 static void ccdunlock (struct ccd_softc *); 239 240 #ifdef DEBUG 241 static void printiinfo (struct ccdiinfo *); 242 #endif 243 244 /* Non-private for the benefit of libkvm. */ 245 struct ccd_softc *ccd_softc; 246 struct ccddevice *ccddevs; 247 struct ccdbuf *ccdfreebufs; 248 static int numccdfreebufs; 249 static int numccd = 0; 250 251 /* 252 * getccdbuf() - Allocate and zero a ccd buffer. 253 * 254 * This routine is called at splbio(). 255 */ 256 257 static __inline 258 struct ccdbuf * 259 getccdbuf(void) 260 { 261 struct ccdbuf *cbp; 262 263 /* 264 * Allocate from freelist or malloc as necessary 265 */ 266 if ((cbp = ccdfreebufs) != NULL) { 267 ccdfreebufs = cbp->cb_freenext; 268 --numccdfreebufs; 269 reinitbufbio(&cbp->cb_buf); 270 } else { 271 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 272 initbufbio(&cbp->cb_buf); 273 } 274 275 /* 276 * independant struct buf initialization 277 */ 278 buf_dep_init(&cbp->cb_buf); 279 BUF_LOCKINIT(&cbp->cb_buf); 280 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 281 BUF_KERNPROC(&cbp->cb_buf); 282 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 283 284 return(cbp); 285 } 286 287 /* 288 * putccdbuf() - Free a ccd buffer. 289 * 290 * This routine is called at splbio(). 291 */ 292 293 static __inline 294 void 295 putccdbuf(struct ccdbuf *cbp) 296 { 297 BUF_UNLOCK(&cbp->cb_buf); 298 BUF_LOCKFREE(&cbp->cb_buf); 299 300 if (numccdfreebufs < NCCDFREEHIWAT) { 301 cbp->cb_freenext = ccdfreebufs; 302 ccdfreebufs = cbp; 303 ++numccdfreebufs; 304 } else { 305 kfree((caddr_t)cbp, M_DEVBUF); 306 } 307 } 308 309 /* 310 * Called by main() during pseudo-device attachment. All we need 311 * to do is allocate enough space for devices to be configured later, and 312 * add devsw entries. 313 */ 314 static void 315 ccdattach(void) 316 { 317 struct disk_info info; 318 struct ccd_softc *cs; 319 int i; 320 int num = NCCD; 321 322 if (num > 1) 323 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 324 else 325 kprintf("ccd0: Concatenated disk driver\n"); 326 327 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 328 M_WAITOK | M_ZERO); 329 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 330 M_WAITOK | M_ZERO); 331 numccd = num; 332 333 /* 334 * With normal disk devices the open simply fails if the media 335 * is not present. With CCD we have to be able to open the 336 * raw disk to use the ioctl's to set it up, so create a dummy 337 * disk info structure so dscheck() doesn't blow up. 338 */ 339 bzero(&info, sizeof(info)); 340 info.d_media_blksize = DEV_BSIZE; 341 342 for (i = 0; i < numccd; ++i) { 343 cs = &ccd_softc[i]; 344 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 345 cs->sc_dev->si_drv1 = cs; 346 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 347 disk_setdiskinfo(&cs->sc_disk, &info); 348 } 349 } 350 351 static int 352 ccddetach(void) 353 { 354 struct ccd_softc *cs; 355 struct dev_ioctl_args ioctl_args; 356 int i; 357 int error = 0; 358 int eval; 359 360 bzero(&ioctl_args, sizeof(ioctl_args)); 361 362 for (i = 0; i < numccd; ++i) { 363 cs = &ccd_softc[i]; 364 if (cs->sc_dev == NULL) 365 continue; 366 ioctl_args.a_head.a_dev = cs->sc_dev; 367 ioctl_args.a_cmd = CCDIOCCLR; 368 ioctl_args.a_fflag = FWRITE; 369 eval = ccdioctl(&ioctl_args); 370 if (eval && eval != ENXIO) { 371 kprintf("ccd%d: In use, cannot detach\n", i); 372 error = EBUSY; 373 } 374 } 375 if (error == 0) { 376 for (i = 0; i < numccd; ++i) { 377 cs = &ccd_softc[i]; 378 if (cs->sc_dev == NULL) 379 continue; 380 disk_destroy(&cs->sc_disk); 381 cs->sc_dev = NULL; 382 } 383 if (ccd_softc) 384 kfree(ccd_softc, M_DEVBUF); 385 if (ccddevs) 386 kfree(ccddevs, M_DEVBUF); 387 } 388 return (error); 389 } 390 391 static int 392 ccd_modevent(module_t mod, int type, void *data) 393 { 394 int error = 0; 395 396 switch (type) { 397 case MOD_LOAD: 398 ccdattach(); 399 break; 400 401 case MOD_UNLOAD: 402 error = ccddetach(); 403 break; 404 405 default: /* MOD_SHUTDOWN etc */ 406 break; 407 } 408 return (error); 409 } 410 411 DEV_MODULE(ccd, ccd_modevent, NULL); 412 413 static int 414 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 415 { 416 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 417 struct ccdcinfo *ci = NULL; /* XXX */ 418 int ix; 419 struct vnode *vp; 420 u_int64_t skip; 421 u_int64_t size; 422 u_int64_t minsize; 423 int maxsecsize; 424 struct partinfo dpart; 425 struct ccdgeom *ccg = &cs->sc_geom; 426 char tmppath[MAXPATHLEN]; 427 int error = 0; 428 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 432 #endif 433 434 cs->sc_size = 0; 435 cs->sc_ileave = ccd->ccd_interleave; 436 cs->sc_nccdisks = ccd->ccd_ndev; 437 438 /* Allocate space for the component info. */ 439 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 440 M_DEVBUF, M_WAITOK); 441 cs->sc_maxiosize = MAXPHYS; 442 443 /* 444 * Verify that each component piece exists and record 445 * relevant information about it. 446 */ 447 maxsecsize = 0; 448 minsize = 0; 449 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 450 vp = ccd->ccd_vpp[ix]; 451 ci = &cs->sc_cinfo[ix]; 452 ci->ci_vp = vp; 453 454 /* 455 * Copy in the pathname of the component. 456 */ 457 bzero(tmppath, sizeof(tmppath)); /* sanity */ 458 if ((error = copyinstr(cpaths[ix], tmppath, 459 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 460 #ifdef DEBUG 461 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 462 kprintf("ccd%d: can't copy path, error = %d\n", 463 ccd->ccd_unit, error); 464 #endif 465 goto fail; 466 } 467 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 468 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 469 470 ci->ci_dev = vn_todev(vp); 471 if (ci->ci_dev->si_iosize_max && 472 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 473 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 474 } 475 476 /* 477 * Get partition information for the component. 478 */ 479 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, 480 cred, NULL); 481 if (error) { 482 #ifdef DEBUG 483 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 484 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 485 ccd->ccd_unit, ci->ci_path, error); 486 #endif 487 goto fail; 488 } 489 if (dpart.fstype != FS_CCD && 490 !kuuid_is_ccd(&dpart.fstype_uuid)) { 491 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 492 ccd->ccd_unit, ci->ci_path); 493 error = EFTYPE; 494 goto fail; 495 } 496 if (maxsecsize < dpart.media_blksize) 497 maxsecsize = dpart.media_blksize; 498 499 /* 500 * Skip a certain amount of storage at the beginning of 501 * the component to make sure we don't infringe on any 502 * reserved sectors. This is handled entirely by 503 * dpart.reserved_blocks but we also impose a minimum 504 * of 16 sectors for backwards compatibility. 505 */ 506 skip = 16; 507 if (skip < dpart.reserved_blocks) 508 skip = dpart.reserved_blocks; 509 size = dpart.media_blocks - skip; 510 511 /* 512 * Calculate the size, truncating to an interleave 513 * boundary if necessary. 514 */ 515 if (cs->sc_ileave > 1) 516 size -= size % cs->sc_ileave; 517 518 if ((int64_t)size <= 0) { 519 #ifdef DEBUG 520 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 521 kprintf("ccd%d: %s: size == 0\n", 522 ccd->ccd_unit, ci->ci_path); 523 #endif 524 error = ENODEV; 525 goto fail; 526 } 527 528 /* 529 * Calculate the smallest uniform component, used 530 * elsewhere. 531 */ 532 if (minsize == 0 || minsize > size) 533 minsize = size; 534 ci->ci_skip = skip; 535 ci->ci_size = size; 536 cs->sc_size += size; 537 } 538 kprintf("ccd%d: max component iosize is %d total blocks %lld\n", 539 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size); 540 541 /* 542 * Don't allow the interleave to be smaller than 543 * the biggest component sector. 544 */ 545 if ((cs->sc_ileave > 0) && 546 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 547 #ifdef DEBUG 548 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 549 kprintf("ccd%d: interleave must be at least %d\n", 550 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 551 #endif 552 error = EINVAL; 553 goto fail; 554 } 555 556 /* 557 * If uniform interleave is desired set all sizes to that of 558 * the smallest component. This will guarentee that a single 559 * interleave table is generated. 560 * 561 * Lost space must be taken into account when calculating the 562 * overall size. Half the space is lost when CCDF_MIRROR is 563 * specified. One disk is lost when CCDF_PARITY is specified. 564 */ 565 if (ccd->ccd_flags & CCDF_UNIFORM) { 566 for (ci = cs->sc_cinfo; 567 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 568 ci->ci_size = minsize; 569 } 570 if (ccd->ccd_flags & CCDF_MIRROR) { 571 /* 572 * Check to see if an even number of components 573 * have been specified. The interleave must also 574 * be non-zero in order for us to be able to 575 * guarentee the topology. 576 */ 577 if (cs->sc_nccdisks % 2) { 578 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 579 error = EINVAL; 580 goto fail; 581 } 582 if (cs->sc_ileave == 0) { 583 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 584 error = EINVAL; 585 goto fail; 586 } 587 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 588 } else if (ccd->ccd_flags & CCDF_PARITY) { 589 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 590 } else { 591 if (cs->sc_ileave == 0) { 592 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 593 error = EINVAL; 594 goto fail; 595 } 596 cs->sc_size = cs->sc_nccdisks * minsize; 597 } 598 } 599 600 /* 601 * Construct the interleave table. 602 */ 603 ccdinterleave(cs, ccd->ccd_unit); 604 605 /* 606 * Create pseudo-geometry based on 1MB cylinders. It's 607 * pretty close. 608 */ 609 ccg->ccg_secsize = maxsecsize; 610 ccg->ccg_ntracks = 1; 611 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 612 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 613 614 /* 615 * Add an devstat entry for this device. 616 */ 617 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 618 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 619 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 620 DEVSTAT_PRIORITY_ARRAY); 621 622 cs->sc_flags |= CCDF_INITED; 623 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 624 cs->sc_unit = ccd->ccd_unit; 625 return (0); 626 fail: 627 while (ci > cs->sc_cinfo) { 628 ci--; 629 kfree(ci->ci_path, M_DEVBUF); 630 } 631 kfree(cs->sc_cinfo, M_DEVBUF); 632 cs->sc_cinfo = NULL; 633 return (error); 634 } 635 636 static void 637 ccdinterleave(struct ccd_softc *cs, int unit) 638 { 639 struct ccdcinfo *ci, *smallci; 640 struct ccdiinfo *ii; 641 u_int64_t bn; 642 u_int64_t lbn; 643 u_int64_t size; 644 int icount; 645 int ix; 646 647 #ifdef DEBUG 648 if (ccddebug & CCDB_INIT) 649 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 650 #endif 651 652 /* 653 * Allocate an interleave table. The worst case occurs when each 654 * of N disks is of a different size, resulting in N interleave 655 * tables. 656 * 657 * Chances are this is too big, but we don't care. 658 */ 659 icount = cs->sc_nccdisks + 1; 660 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 661 M_DEVBUF, M_WAITOK|M_ZERO); 662 663 /* 664 * Trivial case: no interleave (actually interleave of disk size). 665 * Each table entry represents a single component in its entirety. 666 * 667 * An interleave of 0 may not be used with a mirror or parity setup. 668 */ 669 if (cs->sc_ileave == 0) { 670 bn = 0; 671 ii = cs->sc_itable; 672 673 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 674 /* Allocate space for ii_index. */ 675 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 676 ii->ii_ndisk = 1; 677 ii->ii_startblk = bn; 678 ii->ii_startoff = 0; 679 ii->ii_index[0] = ix; 680 bn += cs->sc_cinfo[ix].ci_size; 681 ii++; 682 } 683 ii->ii_ndisk = 0; 684 #ifdef DEBUG 685 if (ccddebug & CCDB_INIT) 686 printiinfo(cs->sc_itable); 687 #endif 688 return; 689 } 690 691 /* 692 * The following isn't fast or pretty; it doesn't have to be. 693 */ 694 size = 0; 695 bn = lbn = 0; 696 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 697 /* 698 * Allocate space for ii_index. We might allocate more then 699 * we use. 700 */ 701 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 702 M_DEVBUF, M_WAITOK); 703 704 /* 705 * Locate the smallest of the remaining components 706 */ 707 smallci = NULL; 708 ci = cs->sc_cinfo; 709 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 710 if (ci->ci_size > size && 711 (smallci == NULL || 712 ci->ci_size < smallci->ci_size)) { 713 smallci = ci; 714 } 715 ++ci; 716 } 717 718 /* 719 * Nobody left, all done 720 */ 721 if (smallci == NULL) { 722 ii->ii_ndisk = 0; 723 break; 724 } 725 726 /* 727 * Record starting logical block using an sc_ileave blocksize. 728 */ 729 ii->ii_startblk = bn / cs->sc_ileave; 730 731 /* 732 * Record starting component block using an sc_ileave 733 * blocksize. This value is relative to the beginning of 734 * a component disk. 735 */ 736 ii->ii_startoff = lbn; 737 738 /* 739 * Determine how many disks take part in this interleave 740 * and record their indices. 741 */ 742 ix = 0; 743 for (ci = cs->sc_cinfo; 744 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 745 if (ci->ci_size >= smallci->ci_size) { 746 ii->ii_index[ix++] = ci - cs->sc_cinfo; 747 } 748 } 749 ii->ii_ndisk = ix; 750 751 /* 752 * Adjust for loop 753 */ 754 bn += ix * (smallci->ci_size - size); 755 lbn = smallci->ci_size / cs->sc_ileave; 756 size = smallci->ci_size; 757 } 758 if (ii == &cs->sc_itable[icount]) 759 panic("ccdinterlave software bug! table exhausted"); 760 #ifdef DEBUG 761 if (ccddebug & CCDB_INIT) 762 printiinfo(cs->sc_itable); 763 #endif 764 } 765 766 /* ARGSUSED */ 767 static int 768 ccdopen(struct dev_open_args *ap) 769 { 770 cdev_t dev = ap->a_head.a_dev; 771 int unit = ccdunit(dev); 772 struct ccd_softc *cs; 773 int error = 0; 774 775 #ifdef DEBUG 776 if (ccddebug & CCDB_FOLLOW) 777 kprintf("ccdopen(%x, %x)\n", dev, flags); 778 #endif 779 if (unit >= numccd) 780 return (ENXIO); 781 cs = &ccd_softc[unit]; 782 783 if ((error = ccdlock(cs)) == 0) { 784 ccdunlock(cs); 785 } 786 return (error); 787 } 788 789 /* ARGSUSED */ 790 static int 791 ccdclose(struct dev_close_args *ap) 792 { 793 cdev_t dev = ap->a_head.a_dev; 794 int unit = ccdunit(dev); 795 struct ccd_softc *cs; 796 int error = 0; 797 798 #ifdef DEBUG 799 if (ccddebug & CCDB_FOLLOW) 800 kprintf("ccdclose(%x, %x)\n", dev, flags); 801 #endif 802 803 if (unit >= numccd) 804 return (ENXIO); 805 cs = &ccd_softc[unit]; 806 if ((error = ccdlock(cs)) == 0) { 807 ccdunlock(cs); 808 } 809 return (error); 810 } 811 812 static int 813 ccdstrategy(struct dev_strategy_args *ap) 814 { 815 cdev_t dev = ap->a_head.a_dev; 816 struct bio *bio = ap->a_bio; 817 int unit = ccdunit(dev); 818 struct bio *nbio; 819 struct buf *bp = bio->bio_buf; 820 struct ccd_softc *cs = &ccd_softc[unit]; 821 u_int64_t pbn; /* in sc_secsize chunks */ 822 u_int32_t sz; /* in sc_secsize chunks */ 823 824 #ifdef DEBUG 825 if (ccddebug & CCDB_FOLLOW) 826 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 827 #endif 828 if ((cs->sc_flags & CCDF_INITED) == 0) { 829 bp->b_error = ENXIO; 830 goto error; 831 } 832 833 /* If it's a nil transfer, wake up the top half now. */ 834 if (bp->b_bcount == 0) { 835 bp->b_resid = 0; 836 goto done; 837 } 838 839 /* 840 * Do bounds checking and adjust transfer. If there's an 841 * error, the bounds check will flag that for us. 842 */ 843 844 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 845 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 846 847 /* 848 * If out of bounds return an error. If the request goes 849 * past EOF, clip the request as appropriate. If exactly 850 * at EOF, return success (don't clip), but with 0 bytes 851 * of I/O. 852 * 853 * Mark EOF B_INVAL (just like bad), indicating that the 854 * contents of the buffer, if any, is invalid. 855 */ 856 if ((int64_t)pbn < 0) 857 goto bad; 858 if (pbn + sz > cs->sc_size) { 859 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 860 goto bad; 861 if (pbn == cs->sc_size) { 862 bp->b_resid = bp->b_bcount; 863 bp->b_flags |= B_INVAL; 864 goto done; 865 } 866 sz = (long)(cs->sc_size - pbn); 867 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 868 } 869 nbio = bio; 870 871 bp->b_resid = bp->b_bcount; 872 nbio->bio_driver_info = dev; 873 874 /* 875 * "Start" the unit. 876 */ 877 crit_enter(); 878 ccdstart(cs, nbio); 879 crit_exit(); 880 return(0); 881 882 /* 883 * note: bio, not nbio, is valid at the done label. 884 */ 885 bad: 886 bp->b_error = EINVAL; 887 error: 888 bp->b_resid = bp->b_bcount; 889 bp->b_flags |= B_ERROR | B_INVAL; 890 done: 891 biodone(bio); 892 return(0); 893 } 894 895 static void 896 ccdstart(struct ccd_softc *cs, struct bio *bio) 897 { 898 long bcount, rcount; 899 struct ccdbuf *cbp[4]; 900 struct buf *bp = bio->bio_buf; 901 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 902 caddr_t addr; 903 off_t doffset; 904 905 #ifdef DEBUG 906 if (ccddebug & CCDB_FOLLOW) 907 kprintf("ccdstart(%x, %x)\n", cs, bp); 908 #endif 909 910 /* Record the transaction start */ 911 devstat_start_transaction(&cs->device_stats); 912 913 /* 914 * Allocate component buffers and fire off the requests 915 */ 916 doffset = bio->bio_offset; 917 addr = bp->b_data; 918 919 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 920 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 921 rcount = cbp[0]->cb_buf.b_bcount; 922 923 if (cs->sc_cflags & CCDF_MIRROR) { 924 /* 925 * Mirroring. Writes go to both disks, reads are 926 * taken from whichever disk seems most appropriate. 927 * 928 * We attempt to localize reads to the disk whos arm 929 * is nearest the read request. We ignore seeks due 930 * to writes when making this determination and we 931 * also try to avoid hogging. 932 */ 933 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 934 vn_strategy(cbp[0]->cb_vp, 935 &cbp[0]->cb_buf.b_bio1); 936 vn_strategy(cbp[1]->cb_vp, 937 &cbp[1]->cb_buf.b_bio1); 938 } else { 939 int pick = cs->sc_pick; 940 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 941 if (doffset < cs->sc_blk[pick] - range || 942 doffset > cs->sc_blk[pick] + range 943 ) { 944 cs->sc_pick = pick = 1 - pick; 945 } 946 cs->sc_blk[pick] = doffset + rcount; 947 vn_strategy(cbp[pick]->cb_vp, 948 &cbp[pick]->cb_buf.b_bio1); 949 } 950 } else { 951 /* 952 * Not mirroring 953 */ 954 vn_strategy(cbp[0]->cb_vp, 955 &cbp[0]->cb_buf.b_bio1); 956 } 957 doffset += rcount; 958 addr += rcount; 959 } 960 } 961 962 /* 963 * Build a component buffer header. 964 */ 965 static void 966 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 967 off_t doffset, caddr_t addr, long bcount) 968 { 969 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 970 struct ccdbuf *cbp; 971 u_int64_t bn; 972 u_int64_t cbn; 973 u_int64_t cboff; 974 off_t cbc; 975 976 #ifdef DEBUG 977 if (ccddebug & CCDB_IO) 978 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 979 cs, bp, bn, addr, bcount); 980 #endif 981 /* 982 * Determine which component bn falls in. 983 */ 984 bn = doffset / cs->sc_geom.ccg_secsize; 985 cbn = bn; 986 cboff = 0; 987 988 if (cs->sc_ileave == 0) { 989 /* 990 * Serially concatenated and neither a mirror nor a parity 991 * config. This is a special case. 992 */ 993 daddr_t sblk; 994 995 sblk = 0; 996 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 997 sblk += ci->ci_size; 998 cbn -= sblk; 999 } else { 1000 struct ccdiinfo *ii; 1001 int ccdisk, off; 1002 1003 /* 1004 * Calculate cbn, the logical superblock (sc_ileave chunks), 1005 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 1006 * to cbn. 1007 */ 1008 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 1009 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 1010 1011 /* 1012 * Figure out which interleave table to use. 1013 */ 1014 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 1015 if (ii->ii_startblk > cbn) 1016 break; 1017 } 1018 ii--; 1019 1020 /* 1021 * off is the logical superblock relative to the beginning 1022 * of this interleave block. 1023 */ 1024 off = cbn - ii->ii_startblk; 1025 1026 /* 1027 * We must calculate which disk component to use (ccdisk), 1028 * and recalculate cbn to be the superblock relative to 1029 * the beginning of the component. This is typically done by 1030 * adding 'off' and ii->ii_startoff together. However, 'off' 1031 * must typically be divided by the number of components in 1032 * this interleave array to be properly convert it from a 1033 * CCD-relative logical superblock number to a 1034 * component-relative superblock number. 1035 */ 1036 if (ii->ii_ndisk == 1) { 1037 /* 1038 * When we have just one disk, it can't be a mirror 1039 * or a parity config. 1040 */ 1041 ccdisk = ii->ii_index[0]; 1042 cbn = ii->ii_startoff + off; 1043 } else { 1044 if (cs->sc_cflags & CCDF_MIRROR) { 1045 /* 1046 * We have forced a uniform mapping, resulting 1047 * in a single interleave array. We double 1048 * up on the first half of the available 1049 * components and our mirror is in the second 1050 * half. This only works with a single 1051 * interleave array because doubling up 1052 * doubles the number of sectors, so there 1053 * cannot be another interleave array because 1054 * the next interleave array's calculations 1055 * would be off. 1056 */ 1057 int ndisk2 = ii->ii_ndisk / 2; 1058 ccdisk = ii->ii_index[off % ndisk2]; 1059 cbn = ii->ii_startoff + off / ndisk2; 1060 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1061 } else if (cs->sc_cflags & CCDF_PARITY) { 1062 /* 1063 * XXX not implemented yet 1064 */ 1065 int ndisk2 = ii->ii_ndisk - 1; 1066 ccdisk = ii->ii_index[off % ndisk2]; 1067 cbn = ii->ii_startoff + off / ndisk2; 1068 if (cbn % ii->ii_ndisk <= ccdisk) 1069 ccdisk++; 1070 } else { 1071 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1072 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1073 } 1074 } 1075 1076 ci = &cs->sc_cinfo[ccdisk]; 1077 1078 /* 1079 * Convert cbn from a superblock to a normal block so it 1080 * can be used to calculate (along with cboff) the normal 1081 * block index into this particular disk. 1082 */ 1083 cbn *= cs->sc_ileave; 1084 } 1085 1086 /* 1087 * Fill in the component buf structure. 1088 * 1089 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1090 * will be truncated on device EOF so we use b_bufsize to detect 1091 * the case. 1092 */ 1093 cbp = getccdbuf(); 1094 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1095 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1096 cbp->cb_buf.b_data = addr; 1097 cbp->cb_vp = ci->ci_vp; 1098 if (cs->sc_ileave == 0) 1099 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1100 else 1101 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1102 if (cbc > cs->sc_maxiosize) 1103 cbc = cs->sc_maxiosize; 1104 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1105 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1106 1107 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1108 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1109 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1110 1111 /* 1112 * context for ccdiodone 1113 */ 1114 cbp->cb_obio = bio; 1115 cbp->cb_unit = cs - ccd_softc; 1116 cbp->cb_comp = ci - cs->sc_cinfo; 1117 1118 #ifdef DEBUG 1119 if (ccddebug & CCDB_IO) 1120 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1121 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1122 cbp->cb_buf.b_bio1.bio_offset, 1123 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1124 #endif 1125 cb[0] = cbp; 1126 1127 /* 1128 * Note: both I/O's setup when reading from mirror, but only one 1129 * will be executed. 1130 */ 1131 if (cs->sc_cflags & CCDF_MIRROR) { 1132 /* mirror, setup second I/O */ 1133 cbp = getccdbuf(); 1134 1135 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1136 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1137 cbp->cb_buf.b_data = addr; 1138 cbp->cb_vp = ci2->ci_vp; 1139 if (cs->sc_ileave == 0) 1140 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1141 else 1142 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1143 if (cbc > cs->sc_maxiosize) 1144 cbc = cs->sc_maxiosize; 1145 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1146 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1147 1148 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1149 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1150 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1151 1152 /* 1153 * context for ccdiodone 1154 */ 1155 cbp->cb_obio = bio; 1156 cbp->cb_unit = cs - ccd_softc; 1157 cbp->cb_comp = ci2 - cs->sc_cinfo; 1158 cb[1] = cbp; 1159 /* link together the ccdbuf's and clear "mirror done" flag */ 1160 cb[0]->cb_mirror = cb[1]; 1161 cb[1]->cb_mirror = cb[0]; 1162 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1163 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1164 } 1165 } 1166 1167 static void 1168 ccdintr(struct ccd_softc *cs, struct bio *bio) 1169 { 1170 struct buf *bp = bio->bio_buf; 1171 1172 #ifdef DEBUG 1173 if (ccddebug & CCDB_FOLLOW) 1174 kprintf("ccdintr(%x, %x)\n", cs, bp); 1175 #endif 1176 /* 1177 * Request is done for better or worse, wakeup the top half. 1178 */ 1179 if (bp->b_flags & B_ERROR) 1180 bp->b_resid = bp->b_bcount; 1181 devstat_end_transaction_buf(&cs->device_stats, bp); 1182 biodone(bio); 1183 } 1184 1185 /* 1186 * Called at interrupt time. 1187 * Mark the component as done and if all components are done, 1188 * take a ccd interrupt. 1189 */ 1190 static void 1191 ccdiodone(struct bio *bio) 1192 { 1193 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1194 struct bio *obio = cbp->cb_obio; 1195 struct buf *obp = obio->bio_buf; 1196 int unit = cbp->cb_unit; 1197 int count; 1198 1199 /* 1200 * Since we do not have exclusive access to underlying devices, 1201 * we can't keep cache translations around. 1202 */ 1203 clearbiocache(bio->bio_next); 1204 1205 crit_enter(); 1206 #ifdef DEBUG 1207 if (ccddebug & CCDB_FOLLOW) 1208 kprintf("ccdiodone(%x)\n", cbp); 1209 if (ccddebug & CCDB_IO) { 1210 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1211 obp, obp->b_bcount, obp->b_resid); 1212 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1213 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1214 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1215 cbp->cb_buf.b_bcount); 1216 } 1217 #endif 1218 1219 /* 1220 * If an error occured, report it. If this is a mirrored 1221 * configuration and the first of two possible reads, do not 1222 * set the error in the bp yet because the second read may 1223 * succeed. 1224 */ 1225 if (cbp->cb_buf.b_flags & B_ERROR) { 1226 const char *msg = ""; 1227 1228 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1229 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1230 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1231 /* 1232 * We will try our read on the other disk down 1233 * below, also reverse the default pick so if we 1234 * are doing a scan we do not keep hitting the 1235 * bad disk first. 1236 */ 1237 struct ccd_softc *cs = &ccd_softc[unit]; 1238 1239 msg = ", trying other disk"; 1240 cs->sc_pick = 1 - cs->sc_pick; 1241 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1242 } else { 1243 obp->b_flags |= B_ERROR; 1244 obp->b_error = cbp->cb_buf.b_error ? 1245 cbp->cb_buf.b_error : EIO; 1246 } 1247 kprintf("ccd%d: error %d on component %d " 1248 "offset %jd (ccd offset %jd)%s\n", 1249 unit, obp->b_error, cbp->cb_comp, 1250 (intmax_t)cbp->cb_buf.b_bio2.bio_offset, 1251 (intmax_t)obio->bio_offset, 1252 msg); 1253 } 1254 1255 /* 1256 * Process mirror. If we are writing, I/O has been initiated on both 1257 * buffers and we fall through only after both are finished. 1258 * 1259 * If we are reading only one I/O is initiated at a time. If an 1260 * error occurs we initiate the second I/O and return, otherwise 1261 * we free the second I/O without initiating it. 1262 */ 1263 1264 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1265 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1266 /* 1267 * When writing, handshake with the second buffer 1268 * to determine when both are done. If both are not 1269 * done, return here. 1270 */ 1271 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1272 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1273 putccdbuf(cbp); 1274 crit_exit(); 1275 return; 1276 } 1277 } else { 1278 /* 1279 * When reading, either dispose of the second buffer 1280 * or initiate I/O on the second buffer if an error 1281 * occured with this one. 1282 */ 1283 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1284 if (cbp->cb_buf.b_flags & B_ERROR) { 1285 cbp->cb_mirror->cb_pflags |= 1286 CCDPF_MIRROR_DONE; 1287 vn_strategy( 1288 cbp->cb_mirror->cb_vp, 1289 &cbp->cb_mirror->cb_buf.b_bio1 1290 ); 1291 putccdbuf(cbp); 1292 crit_exit(); 1293 return; 1294 } else { 1295 putccdbuf(cbp->cb_mirror); 1296 /* fall through */ 1297 } 1298 } 1299 } 1300 } 1301 1302 /* 1303 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1304 */ 1305 count = cbp->cb_buf.b_bufsize; 1306 putccdbuf(cbp); 1307 1308 /* 1309 * If all done, "interrupt". 1310 */ 1311 obp->b_resid -= count; 1312 if (obp->b_resid < 0) 1313 panic("ccdiodone: count"); 1314 if (obp->b_resid == 0) 1315 ccdintr(&ccd_softc[unit], obio); 1316 crit_exit(); 1317 } 1318 1319 static int 1320 ccdioctl(struct dev_ioctl_args *ap) 1321 { 1322 cdev_t dev = ap->a_head.a_dev; 1323 int unit = ccdunit(dev); 1324 int i, j, lookedup = 0, error = 0; 1325 struct ccd_softc *cs; 1326 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1327 struct ccddevice ccd; 1328 struct disk_info info; 1329 char **cpp; 1330 struct vnode **vpp; 1331 1332 if (unit >= numccd) 1333 return (ENXIO); 1334 cs = &ccd_softc[unit]; 1335 1336 bzero(&ccd, sizeof(ccd)); 1337 1338 switch (ap->a_cmd) { 1339 case CCDIOCSET: 1340 if (cs->sc_flags & CCDF_INITED) 1341 return (EBUSY); 1342 1343 if ((ap->a_fflag & FWRITE) == 0) 1344 return (EBADF); 1345 1346 if ((error = ccdlock(cs)) != 0) 1347 return (error); 1348 1349 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1350 ccdunlock(cs); 1351 return (EINVAL); 1352 } 1353 1354 /* Fill in some important bits. */ 1355 ccd.ccd_unit = unit; 1356 ccd.ccd_interleave = ccio->ccio_ileave; 1357 if (ccd.ccd_interleave == 0 && 1358 ((ccio->ccio_flags & CCDF_MIRROR) || 1359 (ccio->ccio_flags & CCDF_PARITY))) { 1360 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1361 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1362 } 1363 if ((ccio->ccio_flags & CCDF_MIRROR) && 1364 (ccio->ccio_flags & CCDF_PARITY)) { 1365 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1366 ccio->ccio_flags &= ~CCDF_PARITY; 1367 } 1368 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1369 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1370 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1371 unit); 1372 ccio->ccio_flags |= CCDF_UNIFORM; 1373 } 1374 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1375 1376 /* 1377 * Allocate space for and copy in the array of 1378 * componet pathnames and device numbers. 1379 */ 1380 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1381 M_DEVBUF, M_WAITOK); 1382 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1383 M_DEVBUF, M_WAITOK); 1384 1385 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1386 ccio->ccio_ndisks * sizeof(char **)); 1387 if (error) { 1388 kfree(vpp, M_DEVBUF); 1389 kfree(cpp, M_DEVBUF); 1390 ccdunlock(cs); 1391 return (error); 1392 } 1393 1394 #ifdef DEBUG 1395 if (ccddebug & CCDB_INIT) { 1396 for (i = 0; i < ccio->ccio_ndisks; ++i) 1397 kprintf("ccdioctl: component %d: 0x%x\n", 1398 i, cpp[i]); 1399 } 1400 #endif 1401 1402 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1403 #ifdef DEBUG 1404 if (ccddebug & CCDB_INIT) 1405 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1406 #endif 1407 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1408 for (j = 0; j < lookedup; ++j) 1409 (void)vn_close(vpp[j], FREAD|FWRITE); 1410 kfree(vpp, M_DEVBUF); 1411 kfree(cpp, M_DEVBUF); 1412 ccdunlock(cs); 1413 return (error); 1414 } 1415 ++lookedup; 1416 } 1417 ccd.ccd_cpp = cpp; 1418 ccd.ccd_vpp = vpp; 1419 ccd.ccd_ndev = ccio->ccio_ndisks; 1420 1421 /* 1422 * Initialize the ccd. Fills in the softc for us. 1423 */ 1424 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1425 for (j = 0; j < lookedup; ++j) 1426 (void)vn_close(vpp[j], FREAD|FWRITE); 1427 kfree(vpp, M_DEVBUF); 1428 kfree(cpp, M_DEVBUF); 1429 ccdunlock(cs); 1430 return (error); 1431 } 1432 1433 /* 1434 * The ccd has been successfully initialized, so 1435 * we can place it into the array and read the disklabel. 1436 */ 1437 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1438 ccio->ccio_unit = unit; 1439 ccio->ccio_size = cs->sc_size; 1440 1441 bzero(&info, sizeof(info)); 1442 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1443 info.d_media_blocks = cs->sc_size; 1444 info.d_nheads = cs->sc_geom.ccg_ntracks; 1445 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1446 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1447 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1448 1449 /* 1450 * For cases where a label is directly applied to the ccd, 1451 * without slices, DSO_COMPATMBR forces one sector be 1452 * reserved for backwards compatibility. 1453 */ 1454 info.d_dsflags = DSO_COMPATMBR; 1455 disk_setdiskinfo(&cs->sc_disk, &info); 1456 1457 ccdunlock(cs); 1458 1459 break; 1460 1461 case CCDIOCCLR: 1462 if ((cs->sc_flags & CCDF_INITED) == 0) 1463 return (ENXIO); 1464 1465 if ((ap->a_fflag & FWRITE) == 0) 1466 return (EBADF); 1467 1468 if ((error = ccdlock(cs)) != 0) 1469 return (error); 1470 1471 if (dev_drefs(cs->sc_dev) > 1) { 1472 ccdunlock(cs); 1473 return (EBUSY); 1474 } 1475 1476 /* 1477 * Free ccd_softc information and clear entry. 1478 */ 1479 1480 /* Close the components and free their pathnames. */ 1481 for (i = 0; i < cs->sc_nccdisks; ++i) { 1482 /* 1483 * XXX: this close could potentially fail and 1484 * cause Bad Things. Maybe we need to force 1485 * the close to happen? 1486 */ 1487 #ifdef DEBUG 1488 if (ccddebug & CCDB_VNODE) 1489 vprint("CCDIOCCLR: vnode info", 1490 cs->sc_cinfo[i].ci_vp); 1491 #endif 1492 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1493 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1494 } 1495 1496 /* Free interleave index. */ 1497 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1498 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1499 1500 /* Free component info and interleave table. */ 1501 kfree(cs->sc_cinfo, M_DEVBUF); 1502 kfree(cs->sc_itable, M_DEVBUF); 1503 cs->sc_cinfo = NULL; 1504 cs->sc_itable = NULL; 1505 cs->sc_flags &= ~CCDF_INITED; 1506 1507 /* 1508 * Free ccddevice information and clear entry. 1509 */ 1510 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1511 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1512 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1513 1514 /* 1515 * And remove the devstat entry. 1516 */ 1517 devstat_remove_entry(&cs->device_stats); 1518 1519 /* This must be atomic. */ 1520 crit_enter(); 1521 ccdunlock(cs); 1522 crit_exit(); 1523 1524 break; 1525 1526 default: 1527 return (ENOTTY); 1528 } 1529 1530 return (0); 1531 } 1532 1533 static int 1534 ccddump(struct dev_dump_args *ap) 1535 { 1536 /* Not implemented. */ 1537 return ENXIO; 1538 } 1539 1540 /* 1541 * Lookup the provided name in the filesystem. If the file exists, 1542 * is a valid block device, and isn't being used by anyone else, 1543 * set *vpp to the file's vnode. 1544 */ 1545 static int 1546 ccdlookup(char *path, struct vnode **vpp) 1547 { 1548 struct nlookupdata nd; 1549 struct vnode *vp; 1550 int error; 1551 1552 *vpp = NULL; 1553 1554 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1555 if (error) 1556 return (error); 1557 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1558 #ifdef DEBUG 1559 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1560 kprintf("ccdlookup: vn_open error = %d\n", error); 1561 #endif 1562 goto done; 1563 } 1564 vp = nd.nl_open_vp; 1565 1566 if (vp->v_opencount > 1) { 1567 error = EBUSY; 1568 goto done; 1569 } 1570 1571 if (!vn_isdisk(vp, &error)) 1572 goto done; 1573 1574 #ifdef DEBUG 1575 if (ccddebug & CCDB_VNODE) 1576 vprint("ccdlookup: vnode info", vp); 1577 #endif 1578 1579 vn_unlock(vp); 1580 nd.nl_open_vp = NULL; 1581 nlookup_done(&nd); 1582 *vpp = vp; /* leave ref intact */ 1583 return (0); 1584 done: 1585 nlookup_done(&nd); 1586 return (error); 1587 } 1588 1589 /* 1590 * Wait interruptibly for an exclusive lock. 1591 * 1592 * XXX 1593 * Several drivers do this; it should be abstracted and made MP-safe. 1594 */ 1595 static int 1596 ccdlock(struct ccd_softc *cs) 1597 { 1598 int error; 1599 1600 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1601 cs->sc_flags |= CCDF_WANTED; 1602 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1603 return (error); 1604 } 1605 cs->sc_flags |= CCDF_LOCKED; 1606 return (0); 1607 } 1608 1609 /* 1610 * Unlock and wake up any waiters. 1611 */ 1612 static void 1613 ccdunlock(struct ccd_softc *cs) 1614 { 1615 1616 cs->sc_flags &= ~CCDF_LOCKED; 1617 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1618 cs->sc_flags &= ~CCDF_WANTED; 1619 wakeup(cs); 1620 } 1621 } 1622 1623 #ifdef DEBUG 1624 static void 1625 printiinfo(struct ccdiinfo *ii) 1626 { 1627 int ix, i; 1628 1629 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1630 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1631 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1632 for (i = 0; i < ii->ii_ndisk; i++) 1633 kprintf(" %d", ii->ii_index[i]); 1634 kprintf("\n"); 1635 } 1636 } 1637 #endif 1638 1639 1640 /* Local Variables: */ 1641 /* c-argdecl-indent: 8 */ 1642 /* c-continued-statement-offset: 8 */ 1643 /* c-indent-level: 8 */ 1644 /* End: */ 1645