1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 /* 36 * Copyright (c) 1995 Jason R. Thorpe. 37 * All rights reserved. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed for the NetBSD Project 50 * by Jason R. Thorpe. 51 * 4. The name of the author may not be used to endorse or promote products 52 * derived from this software without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 58 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 59 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 60 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 61 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 */ 66 67 /* 68 * Copyright (c) 1988 University of Utah. 69 * Copyright (c) 1990, 1993 70 * The Regents of the University of California. All rights reserved. 71 * 72 * This code is derived from software contributed to Berkeley by 73 * the Systems Programming Group of the University of Utah Computer 74 * Science Department. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 3. All advertising materials mentioning features or use of this software 85 * must display the following acknowledgement: 86 * This product includes software developed by the University of 87 * California, Berkeley and its contributors. 88 * 4. Neither the name of the University nor the names of its contributors 89 * may be used to endorse or promote products derived from this software 90 * without specific prior written permission. 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 102 * SUCH DAMAGE. 103 * 104 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 105 */ 106 /* 107 * @(#)cd.c 8.2 (Berkeley) 11/16/93 108 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 109 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 110 */ 111 112 /* 113 * "Concatenated" disk driver. 114 * 115 * Original dynamic configuration support by: 116 * Jason R. Thorpe <thorpej@nas.nasa.gov> 117 * Numerical Aerodynamic Simulation Facility 118 * Mail Stop 258-6 119 * NASA Ames Research Center 120 * Moffett Field, CA 94035 121 */ 122 123 #include "use_ccd.h" 124 125 #include <sys/param.h> 126 #include <sys/systm.h> 127 #include <sys/kernel.h> 128 #include <sys/module.h> 129 #include <sys/proc.h> 130 #include <sys/buf.h> 131 #include <sys/malloc.h> 132 #include <sys/nlookup.h> 133 #include <sys/conf.h> 134 #include <sys/stat.h> 135 #include <sys/sysctl.h> 136 #include <sys/disk.h> 137 #include <sys/dtype.h> 138 #include <sys/diskslice.h> 139 #include <sys/devicestat.h> 140 #include <sys/fcntl.h> 141 #include <sys/vnode.h> 142 #include <sys/ccdvar.h> 143 144 #include <vm/vm_zone.h> 145 146 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 147 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 148 149 #include <sys/thread2.h> 150 #include <sys/buf2.h> 151 #include <sys/mplock2.h> 152 153 #if defined(CCDDEBUG) && !defined(DEBUG) 154 #define DEBUG 155 #endif 156 157 #ifdef DEBUG 158 #define CCDB_FOLLOW 0x01 159 #define CCDB_INIT 0x02 160 #define CCDB_IO 0x04 161 #define CCDB_LABEL 0x08 162 #define CCDB_VNODE 0x10 163 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 164 CCDB_VNODE; 165 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 166 #undef DEBUG 167 #endif 168 169 #define ccdunit(x) dkunit(x) 170 #define ccdpart(x) dkpart(x) 171 172 /* 173 This is how mirroring works (only writes are special): 174 175 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 176 linked together by the cb_mirror field. "cb_pflags & 177 CCDPF_MIRROR_DONE" is set to 0 on both of them. 178 179 When a component returns to ccdiodone(), it checks if "cb_pflags & 180 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 181 flag and returns. If it is, it means its partner has already 182 returned, so it will go to the regular cleanup. 183 184 */ 185 186 struct ccdbuf { 187 struct buf cb_buf; /* new I/O buf */ 188 struct vnode *cb_vp; /* related vnode */ 189 struct bio *cb_obio; /* ptr. to original I/O buf */ 190 struct ccdbuf *cb_freenext; /* free list link */ 191 int cb_unit; /* target unit */ 192 int cb_comp; /* target component */ 193 int cb_pflags; /* mirror/parity status flag */ 194 struct ccdbuf *cb_mirror; /* mirror counterpart */ 195 }; 196 197 /* bits in cb_pflags */ 198 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 199 200 static d_open_t ccdopen; 201 static d_close_t ccdclose; 202 static d_strategy_t ccdstrategy; 203 static d_ioctl_t ccdioctl; 204 static d_dump_t ccddump; 205 206 #define NCCDFREEHIWAT 16 207 208 static struct dev_ops ccd_ops = { 209 { "ccd", 0, D_DISK }, 210 .d_open = ccdopen, 211 .d_close = ccdclose, 212 .d_read = physread, 213 .d_write = physwrite, 214 .d_ioctl = ccdioctl, 215 .d_strategy = ccdstrategy, 216 .d_dump = ccddump 217 }; 218 219 /* called during module initialization */ 220 static void ccdattach (void); 221 static int ccddetach (void); 222 static int ccd_modevent (module_t, int, void *); 223 224 /* called by biodone() at interrupt time */ 225 static void ccdiodone (struct bio *bio); 226 227 static void ccdstart (struct ccd_softc *, struct bio *); 228 static void ccdinterleave (struct ccd_softc *, int); 229 static void ccdintr (struct ccd_softc *, struct bio *); 230 static int ccdinit (struct ccddevice *, char **, struct ucred *); 231 static int ccdlookup (char *, struct vnode **); 232 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 233 struct bio *, off_t, caddr_t, long); 234 static int ccdlock (struct ccd_softc *); 235 static void ccdunlock (struct ccd_softc *); 236 237 #ifdef DEBUG 238 static void printiinfo (struct ccdiinfo *); 239 #endif 240 241 /* Non-private for the benefit of libkvm. */ 242 struct ccd_softc *ccd_softc; 243 struct ccddevice *ccddevs; 244 struct ccdbuf *ccdfreebufs; 245 static int numccdfreebufs; 246 static int numccd = 0; 247 248 /* 249 * getccdbuf() - Allocate and zero a ccd buffer. 250 * 251 * This routine is called at splbio(). 252 */ 253 254 static __inline 255 struct ccdbuf * 256 getccdbuf(void) 257 { 258 struct ccdbuf *cbp; 259 260 /* 261 * Allocate from freelist or malloc as necessary 262 */ 263 if ((cbp = ccdfreebufs) != NULL) { 264 ccdfreebufs = cbp->cb_freenext; 265 --numccdfreebufs; 266 reinitbufbio(&cbp->cb_buf); 267 } else { 268 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 269 initbufbio(&cbp->cb_buf); 270 } 271 272 /* 273 * independant struct buf initialization 274 */ 275 buf_dep_init(&cbp->cb_buf); 276 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 277 BUF_KERNPROC(&cbp->cb_buf); 278 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 279 280 return(cbp); 281 } 282 283 /* 284 * putccdbuf() - Free a ccd buffer. 285 * 286 * This routine is called at splbio(). 287 */ 288 289 static __inline 290 void 291 putccdbuf(struct ccdbuf *cbp) 292 { 293 BUF_UNLOCK(&cbp->cb_buf); 294 295 if (numccdfreebufs < NCCDFREEHIWAT) { 296 cbp->cb_freenext = ccdfreebufs; 297 ccdfreebufs = cbp; 298 ++numccdfreebufs; 299 } else { 300 uninitbufbio(&cbp->cb_buf); 301 kfree((caddr_t)cbp, M_DEVBUF); 302 } 303 } 304 305 /* 306 * Called by main() during pseudo-device attachment. All we need 307 * to do is allocate enough space for devices to be configured later, and 308 * add devsw entries. 309 */ 310 static void 311 ccdattach(void) 312 { 313 struct disk_info info; 314 struct ccd_softc *cs; 315 int i; 316 int num = NCCD; 317 318 if (num > 1) 319 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 320 else 321 kprintf("ccd0: Concatenated disk driver\n"); 322 323 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 324 M_WAITOK | M_ZERO); 325 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 326 M_WAITOK | M_ZERO); 327 numccd = num; 328 329 /* 330 * With normal disk devices the open simply fails if the media 331 * is not present. With CCD we have to be able to open the 332 * raw disk to use the ioctl's to set it up, so create a dummy 333 * disk info structure so dscheck() doesn't blow up. 334 */ 335 bzero(&info, sizeof(info)); 336 info.d_media_blksize = DEV_BSIZE; 337 338 for (i = 0; i < numccd; ++i) { 339 cs = &ccd_softc[i]; 340 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 341 cs->sc_dev->si_drv1 = cs; 342 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 343 disk_setdiskinfo(&cs->sc_disk, &info); 344 } 345 } 346 347 static int 348 ccddetach(void) 349 { 350 struct ccd_softc *cs; 351 struct dev_ioctl_args ioctl_args; 352 int i; 353 int error = 0; 354 int eval; 355 356 bzero(&ioctl_args, sizeof(ioctl_args)); 357 358 for (i = 0; i < numccd; ++i) { 359 cs = &ccd_softc[i]; 360 if (cs->sc_dev == NULL) 361 continue; 362 ioctl_args.a_head.a_dev = cs->sc_dev; 363 ioctl_args.a_cmd = CCDIOCCLR; 364 ioctl_args.a_fflag = FWRITE; 365 eval = ccdioctl(&ioctl_args); 366 if (eval && eval != ENXIO) { 367 kprintf("ccd%d: In use, cannot detach\n", i); 368 error = EBUSY; 369 } 370 } 371 if (error == 0) { 372 for (i = 0; i < numccd; ++i) { 373 cs = &ccd_softc[i]; 374 if (cs->sc_dev == NULL) 375 continue; 376 disk_destroy(&cs->sc_disk); 377 cs->sc_dev = NULL; 378 } 379 if (ccd_softc) 380 kfree(ccd_softc, M_DEVBUF); 381 if (ccddevs) 382 kfree(ccddevs, M_DEVBUF); 383 } 384 return (error); 385 } 386 387 static int 388 ccd_modevent(module_t mod, int type, void *data) 389 { 390 int error = 0; 391 392 switch (type) { 393 case MOD_LOAD: 394 ccdattach(); 395 break; 396 397 case MOD_UNLOAD: 398 error = ccddetach(); 399 break; 400 401 default: /* MOD_SHUTDOWN etc */ 402 break; 403 } 404 return (error); 405 } 406 407 DEV_MODULE(ccd, ccd_modevent, NULL); 408 409 static int 410 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 411 { 412 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 413 struct ccdcinfo *ci = NULL; /* XXX */ 414 int ix; 415 struct vnode *vp; 416 u_int64_t skip; 417 u_int64_t size; 418 u_int64_t minsize; 419 int maxsecsize; 420 struct partinfo dpart; 421 struct ccdgeom *ccg = &cs->sc_geom; 422 char tmppath[MAXPATHLEN]; 423 int error = 0; 424 425 #ifdef DEBUG 426 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 427 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 428 #endif 429 430 cs->sc_size = 0; 431 cs->sc_ileave = ccd->ccd_interleave; 432 cs->sc_nccdisks = ccd->ccd_ndev; 433 434 /* Allocate space for the component info. */ 435 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 436 M_DEVBUF, M_WAITOK); 437 cs->sc_maxiosize = MAXPHYS; 438 439 /* 440 * Verify that each component piece exists and record 441 * relevant information about it. 442 */ 443 maxsecsize = 0; 444 minsize = 0; 445 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 446 vp = ccd->ccd_vpp[ix]; 447 ci = &cs->sc_cinfo[ix]; 448 ci->ci_vp = vp; 449 450 /* 451 * Copy in the pathname of the component. 452 */ 453 bzero(tmppath, sizeof(tmppath)); /* sanity */ 454 if ((error = copyinstr(cpaths[ix], tmppath, 455 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 456 #ifdef DEBUG 457 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 458 kprintf("ccd%d: can't copy path, error = %d\n", 459 ccd->ccd_unit, error); 460 #endif 461 goto fail; 462 } 463 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 464 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 465 466 ci->ci_dev = vn_todev(vp); 467 if (ci->ci_dev->si_iosize_max && 468 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 469 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 470 } 471 472 /* 473 * Get partition information for the component. 474 */ 475 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, 476 cred, NULL); 477 if (error) { 478 #ifdef DEBUG 479 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 480 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 481 ccd->ccd_unit, ci->ci_path, error); 482 #endif 483 goto fail; 484 } 485 if (dpart.fstype != FS_CCD && 486 !kuuid_is_ccd(&dpart.fstype_uuid)) { 487 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 488 ccd->ccd_unit, ci->ci_path); 489 error = EFTYPE; 490 goto fail; 491 } 492 if (maxsecsize < dpart.media_blksize) 493 maxsecsize = dpart.media_blksize; 494 495 /* 496 * Skip a certain amount of storage at the beginning of 497 * the component to make sure we don't infringe on any 498 * reserved sectors. This is handled entirely by 499 * dpart.reserved_blocks but we also impose a minimum 500 * of 16 sectors for backwards compatibility. 501 */ 502 skip = 16; 503 if (skip < dpart.reserved_blocks) 504 skip = dpart.reserved_blocks; 505 size = dpart.media_blocks - skip; 506 507 /* 508 * Calculate the size, truncating to an interleave 509 * boundary if necessary. 510 */ 511 if (cs->sc_ileave > 1) 512 size -= size % cs->sc_ileave; 513 514 if ((int64_t)size <= 0) { 515 #ifdef DEBUG 516 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 517 kprintf("ccd%d: %s: size == 0\n", 518 ccd->ccd_unit, ci->ci_path); 519 #endif 520 error = ENODEV; 521 goto fail; 522 } 523 524 /* 525 * Calculate the smallest uniform component, used 526 * elsewhere. 527 */ 528 if (minsize == 0 || minsize > size) 529 minsize = size; 530 ci->ci_skip = skip; 531 ci->ci_size = size; 532 cs->sc_size += size; 533 } 534 kprintf("ccd%d: max component iosize is %d total blocks %lld\n", 535 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size); 536 537 /* 538 * Don't allow the interleave to be smaller than 539 * the biggest component sector. 540 */ 541 if ((cs->sc_ileave > 0) && 542 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 543 #ifdef DEBUG 544 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 545 kprintf("ccd%d: interleave must be at least %d\n", 546 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 547 #endif 548 error = EINVAL; 549 goto fail; 550 } 551 552 /* 553 * If uniform interleave is desired set all sizes to that of 554 * the smallest component. This will guarentee that a single 555 * interleave table is generated. 556 * 557 * Lost space must be taken into account when calculating the 558 * overall size. Half the space is lost when CCDF_MIRROR is 559 * specified. One disk is lost when CCDF_PARITY is specified. 560 */ 561 if (ccd->ccd_flags & CCDF_UNIFORM) { 562 for (ci = cs->sc_cinfo; 563 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 564 ci->ci_size = minsize; 565 } 566 if (ccd->ccd_flags & CCDF_MIRROR) { 567 /* 568 * Check to see if an even number of components 569 * have been specified. The interleave must also 570 * be non-zero in order for us to be able to 571 * guarentee the topology. 572 */ 573 if (cs->sc_nccdisks % 2) { 574 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 575 error = EINVAL; 576 goto fail; 577 } 578 if (cs->sc_ileave == 0) { 579 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 580 error = EINVAL; 581 goto fail; 582 } 583 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 584 } else if (ccd->ccd_flags & CCDF_PARITY) { 585 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 586 } else { 587 if (cs->sc_ileave == 0) { 588 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 589 error = EINVAL; 590 goto fail; 591 } 592 cs->sc_size = cs->sc_nccdisks * minsize; 593 } 594 } 595 596 /* 597 * Construct the interleave table. 598 */ 599 ccdinterleave(cs, ccd->ccd_unit); 600 601 /* 602 * Create pseudo-geometry based on 1MB cylinders. It's 603 * pretty close. 604 */ 605 ccg->ccg_secsize = maxsecsize; 606 ccg->ccg_ntracks = 1; 607 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 608 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 609 610 /* 611 * Add an devstat entry for this device. 612 */ 613 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 614 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 615 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 616 DEVSTAT_PRIORITY_ARRAY); 617 618 cs->sc_flags |= CCDF_INITED; 619 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 620 cs->sc_unit = ccd->ccd_unit; 621 return (0); 622 fail: 623 while (ci > cs->sc_cinfo) { 624 ci--; 625 kfree(ci->ci_path, M_DEVBUF); 626 } 627 kfree(cs->sc_cinfo, M_DEVBUF); 628 cs->sc_cinfo = NULL; 629 return (error); 630 } 631 632 static void 633 ccdinterleave(struct ccd_softc *cs, int unit) 634 { 635 struct ccdcinfo *ci, *smallci; 636 struct ccdiinfo *ii; 637 u_int64_t bn; 638 u_int64_t lbn; 639 u_int64_t size; 640 int icount; 641 int ix; 642 643 #ifdef DEBUG 644 if (ccddebug & CCDB_INIT) 645 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 646 #endif 647 648 /* 649 * Allocate an interleave table. The worst case occurs when each 650 * of N disks is of a different size, resulting in N interleave 651 * tables. 652 * 653 * Chances are this is too big, but we don't care. 654 */ 655 icount = cs->sc_nccdisks + 1; 656 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 657 M_DEVBUF, M_WAITOK|M_ZERO); 658 659 /* 660 * Trivial case: no interleave (actually interleave of disk size). 661 * Each table entry represents a single component in its entirety. 662 * 663 * An interleave of 0 may not be used with a mirror or parity setup. 664 */ 665 if (cs->sc_ileave == 0) { 666 bn = 0; 667 ii = cs->sc_itable; 668 669 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 670 /* Allocate space for ii_index. */ 671 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 672 ii->ii_ndisk = 1; 673 ii->ii_startblk = bn; 674 ii->ii_startoff = 0; 675 ii->ii_index[0] = ix; 676 bn += cs->sc_cinfo[ix].ci_size; 677 ii++; 678 } 679 ii->ii_ndisk = 0; 680 #ifdef DEBUG 681 if (ccddebug & CCDB_INIT) 682 printiinfo(cs->sc_itable); 683 #endif 684 return; 685 } 686 687 /* 688 * The following isn't fast or pretty; it doesn't have to be. 689 */ 690 size = 0; 691 bn = lbn = 0; 692 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 693 /* 694 * Allocate space for ii_index. We might allocate more then 695 * we use. 696 */ 697 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 698 M_DEVBUF, M_WAITOK); 699 700 /* 701 * Locate the smallest of the remaining components 702 */ 703 smallci = NULL; 704 ci = cs->sc_cinfo; 705 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 706 if (ci->ci_size > size && 707 (smallci == NULL || 708 ci->ci_size < smallci->ci_size)) { 709 smallci = ci; 710 } 711 ++ci; 712 } 713 714 /* 715 * Nobody left, all done 716 */ 717 if (smallci == NULL) { 718 ii->ii_ndisk = 0; 719 break; 720 } 721 722 /* 723 * Record starting logical block using an sc_ileave blocksize. 724 */ 725 ii->ii_startblk = bn / cs->sc_ileave; 726 727 /* 728 * Record starting component block using an sc_ileave 729 * blocksize. This value is relative to the beginning of 730 * a component disk. 731 */ 732 ii->ii_startoff = lbn; 733 734 /* 735 * Determine how many disks take part in this interleave 736 * and record their indices. 737 */ 738 ix = 0; 739 for (ci = cs->sc_cinfo; 740 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 741 if (ci->ci_size >= smallci->ci_size) { 742 ii->ii_index[ix++] = ci - cs->sc_cinfo; 743 } 744 } 745 ii->ii_ndisk = ix; 746 747 /* 748 * Adjust for loop 749 */ 750 bn += ix * (smallci->ci_size - size); 751 lbn = smallci->ci_size / cs->sc_ileave; 752 size = smallci->ci_size; 753 } 754 if (ii == &cs->sc_itable[icount]) 755 panic("ccdinterlave software bug! table exhausted"); 756 #ifdef DEBUG 757 if (ccddebug & CCDB_INIT) 758 printiinfo(cs->sc_itable); 759 #endif 760 } 761 762 /* ARGSUSED */ 763 static int 764 ccdopen(struct dev_open_args *ap) 765 { 766 cdev_t dev = ap->a_head.a_dev; 767 int unit = ccdunit(dev); 768 struct ccd_softc *cs; 769 int error = 0; 770 771 #ifdef DEBUG 772 if (ccddebug & CCDB_FOLLOW) 773 kprintf("ccdopen(%x, %x)\n", dev, flags); 774 #endif 775 if (unit >= numccd) 776 return (ENXIO); 777 cs = &ccd_softc[unit]; 778 779 if ((error = ccdlock(cs)) == 0) { 780 ccdunlock(cs); 781 } 782 return (error); 783 } 784 785 /* ARGSUSED */ 786 static int 787 ccdclose(struct dev_close_args *ap) 788 { 789 cdev_t dev = ap->a_head.a_dev; 790 int unit = ccdunit(dev); 791 struct ccd_softc *cs; 792 int error = 0; 793 794 #ifdef DEBUG 795 if (ccddebug & CCDB_FOLLOW) 796 kprintf("ccdclose(%x, %x)\n", dev, flags); 797 #endif 798 799 if (unit >= numccd) 800 return (ENXIO); 801 cs = &ccd_softc[unit]; 802 if ((error = ccdlock(cs)) == 0) { 803 ccdunlock(cs); 804 } 805 return (error); 806 } 807 808 static int 809 ccdstrategy(struct dev_strategy_args *ap) 810 { 811 cdev_t dev = ap->a_head.a_dev; 812 struct bio *bio = ap->a_bio; 813 int unit = ccdunit(dev); 814 struct bio *nbio; 815 struct buf *bp = bio->bio_buf; 816 struct ccd_softc *cs = &ccd_softc[unit]; 817 u_int64_t pbn; /* in sc_secsize chunks */ 818 u_int32_t sz; /* in sc_secsize chunks */ 819 820 #ifdef DEBUG 821 if (ccddebug & CCDB_FOLLOW) 822 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 823 #endif 824 if ((cs->sc_flags & CCDF_INITED) == 0) { 825 bp->b_error = ENXIO; 826 goto error; 827 } 828 829 /* If it's a nil transfer, wake up the top half now. */ 830 if (bp->b_bcount == 0) { 831 bp->b_resid = 0; 832 goto done; 833 } 834 835 /* 836 * Do bounds checking and adjust transfer. If there's an 837 * error, the bounds check will flag that for us. 838 */ 839 840 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 841 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 842 843 /* 844 * If out of bounds return an error. If the request goes 845 * past EOF, clip the request as appropriate. If exactly 846 * at EOF, return success (don't clip), but with 0 bytes 847 * of I/O. 848 * 849 * Mark EOF B_INVAL (just like bad), indicating that the 850 * contents of the buffer, if any, is invalid. 851 */ 852 if ((int64_t)pbn < 0) 853 goto bad; 854 if (pbn + sz > cs->sc_size) { 855 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 856 goto bad; 857 if (pbn == cs->sc_size) { 858 bp->b_resid = bp->b_bcount; 859 bp->b_flags |= B_INVAL; 860 goto done; 861 } 862 sz = (long)(cs->sc_size - pbn); 863 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 864 } 865 nbio = bio; 866 867 bp->b_resid = bp->b_bcount; 868 nbio->bio_driver_info = dev; 869 870 /* 871 * "Start" the unit. 872 */ 873 crit_enter(); 874 ccdstart(cs, nbio); 875 crit_exit(); 876 return(0); 877 878 /* 879 * note: bio, not nbio, is valid at the done label. 880 */ 881 bad: 882 bp->b_error = EINVAL; 883 error: 884 bp->b_resid = bp->b_bcount; 885 bp->b_flags |= B_ERROR | B_INVAL; 886 done: 887 biodone(bio); 888 return(0); 889 } 890 891 static void 892 ccdstart(struct ccd_softc *cs, struct bio *bio) 893 { 894 long bcount, rcount; 895 struct ccdbuf *cbp[4]; 896 struct buf *bp = bio->bio_buf; 897 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 898 caddr_t addr; 899 off_t doffset; 900 901 #ifdef DEBUG 902 if (ccddebug & CCDB_FOLLOW) 903 kprintf("ccdstart(%x, %x)\n", cs, bp); 904 #endif 905 906 /* Record the transaction start */ 907 devstat_start_transaction(&cs->device_stats); 908 909 /* 910 * Allocate component buffers and fire off the requests 911 */ 912 doffset = bio->bio_offset; 913 addr = bp->b_data; 914 915 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 916 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 917 rcount = cbp[0]->cb_buf.b_bcount; 918 919 if (cs->sc_cflags & CCDF_MIRROR) { 920 /* 921 * Mirroring. Writes go to both disks, reads are 922 * taken from whichever disk seems most appropriate. 923 * 924 * We attempt to localize reads to the disk whos arm 925 * is nearest the read request. We ignore seeks due 926 * to writes when making this determination and we 927 * also try to avoid hogging. 928 */ 929 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 930 vn_strategy(cbp[0]->cb_vp, 931 &cbp[0]->cb_buf.b_bio1); 932 vn_strategy(cbp[1]->cb_vp, 933 &cbp[1]->cb_buf.b_bio1); 934 } else { 935 int pick = cs->sc_pick; 936 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 937 if (doffset < cs->sc_blk[pick] - range || 938 doffset > cs->sc_blk[pick] + range 939 ) { 940 cs->sc_pick = pick = 1 - pick; 941 } 942 cs->sc_blk[pick] = doffset + rcount; 943 vn_strategy(cbp[pick]->cb_vp, 944 &cbp[pick]->cb_buf.b_bio1); 945 } 946 } else { 947 /* 948 * Not mirroring 949 */ 950 vn_strategy(cbp[0]->cb_vp, 951 &cbp[0]->cb_buf.b_bio1); 952 } 953 doffset += rcount; 954 addr += rcount; 955 } 956 } 957 958 /* 959 * Build a component buffer header. 960 */ 961 static void 962 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 963 off_t doffset, caddr_t addr, long bcount) 964 { 965 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 966 struct ccdbuf *cbp; 967 u_int64_t bn; 968 u_int64_t cbn; 969 u_int64_t cboff; 970 off_t cbc; 971 972 #ifdef DEBUG 973 if (ccddebug & CCDB_IO) 974 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 975 cs, bp, bn, addr, bcount); 976 #endif 977 /* 978 * Determine which component bn falls in. 979 */ 980 bn = doffset / cs->sc_geom.ccg_secsize; 981 cbn = bn; 982 cboff = 0; 983 984 if (cs->sc_ileave == 0) { 985 /* 986 * Serially concatenated and neither a mirror nor a parity 987 * config. This is a special case. 988 */ 989 daddr_t sblk; 990 991 sblk = 0; 992 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 993 sblk += ci->ci_size; 994 cbn -= sblk; 995 } else { 996 struct ccdiinfo *ii; 997 int ccdisk, off; 998 999 /* 1000 * Calculate cbn, the logical superblock (sc_ileave chunks), 1001 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 1002 * to cbn. 1003 */ 1004 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 1005 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 1006 1007 /* 1008 * Figure out which interleave table to use. 1009 */ 1010 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 1011 if (ii->ii_startblk > cbn) 1012 break; 1013 } 1014 ii--; 1015 1016 /* 1017 * off is the logical superblock relative to the beginning 1018 * of this interleave block. 1019 */ 1020 off = cbn - ii->ii_startblk; 1021 1022 /* 1023 * We must calculate which disk component to use (ccdisk), 1024 * and recalculate cbn to be the superblock relative to 1025 * the beginning of the component. This is typically done by 1026 * adding 'off' and ii->ii_startoff together. However, 'off' 1027 * must typically be divided by the number of components in 1028 * this interleave array to be properly convert it from a 1029 * CCD-relative logical superblock number to a 1030 * component-relative superblock number. 1031 */ 1032 if (ii->ii_ndisk == 1) { 1033 /* 1034 * When we have just one disk, it can't be a mirror 1035 * or a parity config. 1036 */ 1037 ccdisk = ii->ii_index[0]; 1038 cbn = ii->ii_startoff + off; 1039 } else { 1040 if (cs->sc_cflags & CCDF_MIRROR) { 1041 /* 1042 * We have forced a uniform mapping, resulting 1043 * in a single interleave array. We double 1044 * up on the first half of the available 1045 * components and our mirror is in the second 1046 * half. This only works with a single 1047 * interleave array because doubling up 1048 * doubles the number of sectors, so there 1049 * cannot be another interleave array because 1050 * the next interleave array's calculations 1051 * would be off. 1052 */ 1053 int ndisk2 = ii->ii_ndisk / 2; 1054 ccdisk = ii->ii_index[off % ndisk2]; 1055 cbn = ii->ii_startoff + off / ndisk2; 1056 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1057 } else if (cs->sc_cflags & CCDF_PARITY) { 1058 /* 1059 * XXX not implemented yet 1060 */ 1061 int ndisk2 = ii->ii_ndisk - 1; 1062 ccdisk = ii->ii_index[off % ndisk2]; 1063 cbn = ii->ii_startoff + off / ndisk2; 1064 if (cbn % ii->ii_ndisk <= ccdisk) 1065 ccdisk++; 1066 } else { 1067 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1068 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1069 } 1070 } 1071 1072 ci = &cs->sc_cinfo[ccdisk]; 1073 1074 /* 1075 * Convert cbn from a superblock to a normal block so it 1076 * can be used to calculate (along with cboff) the normal 1077 * block index into this particular disk. 1078 */ 1079 cbn *= cs->sc_ileave; 1080 } 1081 1082 /* 1083 * Fill in the component buf structure. 1084 * 1085 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1086 * will be truncated on device EOF so we use b_bufsize to detect 1087 * the case. 1088 */ 1089 cbp = getccdbuf(); 1090 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1091 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1092 cbp->cb_buf.b_data = addr; 1093 cbp->cb_vp = ci->ci_vp; 1094 if (cs->sc_ileave == 0) 1095 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1096 else 1097 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1098 if (cbc > cs->sc_maxiosize) 1099 cbc = cs->sc_maxiosize; 1100 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1101 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1102 1103 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1104 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1105 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1106 1107 /* 1108 * context for ccdiodone 1109 */ 1110 cbp->cb_obio = bio; 1111 cbp->cb_unit = cs - ccd_softc; 1112 cbp->cb_comp = ci - cs->sc_cinfo; 1113 1114 #ifdef DEBUG 1115 if (ccddebug & CCDB_IO) 1116 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1117 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1118 cbp->cb_buf.b_bio1.bio_offset, 1119 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1120 #endif 1121 cb[0] = cbp; 1122 1123 /* 1124 * Note: both I/O's setup when reading from mirror, but only one 1125 * will be executed. 1126 */ 1127 if (cs->sc_cflags & CCDF_MIRROR) { 1128 /* mirror, setup second I/O */ 1129 cbp = getccdbuf(); 1130 1131 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1132 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1133 cbp->cb_buf.b_data = addr; 1134 cbp->cb_vp = ci2->ci_vp; 1135 if (cs->sc_ileave == 0) 1136 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1137 else 1138 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1139 if (cbc > cs->sc_maxiosize) 1140 cbc = cs->sc_maxiosize; 1141 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1142 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1143 1144 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1145 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1146 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1147 1148 /* 1149 * context for ccdiodone 1150 */ 1151 cbp->cb_obio = bio; 1152 cbp->cb_unit = cs - ccd_softc; 1153 cbp->cb_comp = ci2 - cs->sc_cinfo; 1154 cb[1] = cbp; 1155 /* link together the ccdbuf's and clear "mirror done" flag */ 1156 cb[0]->cb_mirror = cb[1]; 1157 cb[1]->cb_mirror = cb[0]; 1158 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1159 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1160 } 1161 } 1162 1163 static void 1164 ccdintr(struct ccd_softc *cs, struct bio *bio) 1165 { 1166 struct buf *bp = bio->bio_buf; 1167 1168 #ifdef DEBUG 1169 if (ccddebug & CCDB_FOLLOW) 1170 kprintf("ccdintr(%x, %x)\n", cs, bp); 1171 #endif 1172 /* 1173 * Request is done for better or worse, wakeup the top half. 1174 */ 1175 if (bp->b_flags & B_ERROR) 1176 bp->b_resid = bp->b_bcount; 1177 devstat_end_transaction_buf(&cs->device_stats, bp); 1178 biodone(bio); 1179 } 1180 1181 /* 1182 * Called at interrupt time. 1183 * 1184 * Mark the component as done and if all components are done, 1185 * take a ccd interrupt. 1186 */ 1187 static void 1188 ccdiodone(struct bio *bio) 1189 { 1190 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1191 struct bio *obio = cbp->cb_obio; 1192 struct buf *obp = obio->bio_buf; 1193 int unit = cbp->cb_unit; 1194 int count; 1195 1196 /* 1197 * Since we do not have exclusive access to underlying devices, 1198 * we can't keep cache translations around. 1199 */ 1200 clearbiocache(bio->bio_next); 1201 1202 get_mplock(); 1203 crit_enter(); 1204 #ifdef DEBUG 1205 if (ccddebug & CCDB_FOLLOW) 1206 kprintf("ccdiodone(%x)\n", cbp); 1207 if (ccddebug & CCDB_IO) { 1208 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1209 obp, obp->b_bcount, obp->b_resid); 1210 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1211 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1212 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1213 cbp->cb_buf.b_bcount); 1214 } 1215 #endif 1216 1217 /* 1218 * If an error occured, report it. If this is a mirrored 1219 * configuration and the first of two possible reads, do not 1220 * set the error in the bp yet because the second read may 1221 * succeed. 1222 */ 1223 if (cbp->cb_buf.b_flags & B_ERROR) { 1224 const char *msg = ""; 1225 1226 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1227 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1228 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1229 /* 1230 * We will try our read on the other disk down 1231 * below, also reverse the default pick so if we 1232 * are doing a scan we do not keep hitting the 1233 * bad disk first. 1234 */ 1235 struct ccd_softc *cs = &ccd_softc[unit]; 1236 1237 msg = ", trying other disk"; 1238 cs->sc_pick = 1 - cs->sc_pick; 1239 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1240 } else { 1241 obp->b_flags |= B_ERROR; 1242 obp->b_error = cbp->cb_buf.b_error ? 1243 cbp->cb_buf.b_error : EIO; 1244 } 1245 kprintf("ccd%d: error %d on component %d " 1246 "offset %jd (ccd offset %jd)%s\n", 1247 unit, obp->b_error, cbp->cb_comp, 1248 (intmax_t)cbp->cb_buf.b_bio2.bio_offset, 1249 (intmax_t)obio->bio_offset, 1250 msg); 1251 } 1252 1253 /* 1254 * Process mirror. If we are writing, I/O has been initiated on both 1255 * buffers and we fall through only after both are finished. 1256 * 1257 * If we are reading only one I/O is initiated at a time. If an 1258 * error occurs we initiate the second I/O and return, otherwise 1259 * we free the second I/O without initiating it. 1260 */ 1261 1262 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1263 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1264 /* 1265 * When writing, handshake with the second buffer 1266 * to determine when both are done. If both are not 1267 * done, return here. 1268 */ 1269 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1270 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1271 putccdbuf(cbp); 1272 crit_exit(); 1273 rel_mplock(); 1274 return; 1275 } 1276 } else { 1277 /* 1278 * When reading, either dispose of the second buffer 1279 * or initiate I/O on the second buffer if an error 1280 * occured with this one. 1281 */ 1282 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1283 if (cbp->cb_buf.b_flags & B_ERROR) { 1284 cbp->cb_mirror->cb_pflags |= 1285 CCDPF_MIRROR_DONE; 1286 vn_strategy( 1287 cbp->cb_mirror->cb_vp, 1288 &cbp->cb_mirror->cb_buf.b_bio1 1289 ); 1290 putccdbuf(cbp); 1291 crit_exit(); 1292 rel_mplock(); 1293 return; 1294 } else { 1295 putccdbuf(cbp->cb_mirror); 1296 /* fall through */ 1297 } 1298 } 1299 } 1300 } 1301 1302 /* 1303 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1304 */ 1305 count = cbp->cb_buf.b_bufsize; 1306 putccdbuf(cbp); 1307 1308 /* 1309 * If all done, "interrupt". 1310 */ 1311 obp->b_resid -= count; 1312 if (obp->b_resid < 0) 1313 panic("ccdiodone: count"); 1314 if (obp->b_resid == 0) 1315 ccdintr(&ccd_softc[unit], obio); 1316 crit_exit(); 1317 rel_mplock(); 1318 } 1319 1320 static int 1321 ccdioctl(struct dev_ioctl_args *ap) 1322 { 1323 cdev_t dev = ap->a_head.a_dev; 1324 int unit = ccdunit(dev); 1325 int i, j, lookedup = 0, error = 0; 1326 struct ccd_softc *cs; 1327 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1328 struct ccddevice ccd; 1329 struct disk_info info; 1330 char **cpp; 1331 struct vnode **vpp; 1332 1333 if (unit >= numccd) 1334 return (ENXIO); 1335 cs = &ccd_softc[unit]; 1336 1337 bzero(&ccd, sizeof(ccd)); 1338 1339 switch (ap->a_cmd) { 1340 case CCDIOCSET: 1341 if (cs->sc_flags & CCDF_INITED) 1342 return (EBUSY); 1343 1344 if ((ap->a_fflag & FWRITE) == 0) 1345 return (EBADF); 1346 1347 if ((error = ccdlock(cs)) != 0) 1348 return (error); 1349 1350 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1351 ccdunlock(cs); 1352 return (EINVAL); 1353 } 1354 1355 /* Fill in some important bits. */ 1356 ccd.ccd_unit = unit; 1357 ccd.ccd_interleave = ccio->ccio_ileave; 1358 if (ccd.ccd_interleave == 0 && 1359 ((ccio->ccio_flags & CCDF_MIRROR) || 1360 (ccio->ccio_flags & CCDF_PARITY))) { 1361 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1362 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1363 } 1364 if ((ccio->ccio_flags & CCDF_MIRROR) && 1365 (ccio->ccio_flags & CCDF_PARITY)) { 1366 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1367 ccio->ccio_flags &= ~CCDF_PARITY; 1368 } 1369 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1370 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1371 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1372 unit); 1373 ccio->ccio_flags |= CCDF_UNIFORM; 1374 } 1375 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1376 1377 /* 1378 * Allocate space for and copy in the array of 1379 * componet pathnames and device numbers. 1380 */ 1381 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1382 M_DEVBUF, M_WAITOK); 1383 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1384 M_DEVBUF, M_WAITOK); 1385 1386 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1387 ccio->ccio_ndisks * sizeof(char **)); 1388 if (error) { 1389 kfree(vpp, M_DEVBUF); 1390 kfree(cpp, M_DEVBUF); 1391 ccdunlock(cs); 1392 return (error); 1393 } 1394 1395 #ifdef DEBUG 1396 if (ccddebug & CCDB_INIT) { 1397 for (i = 0; i < ccio->ccio_ndisks; ++i) 1398 kprintf("ccdioctl: component %d: 0x%x\n", 1399 i, cpp[i]); 1400 } 1401 #endif 1402 1403 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1404 #ifdef DEBUG 1405 if (ccddebug & CCDB_INIT) 1406 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1407 #endif 1408 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1409 for (j = 0; j < lookedup; ++j) 1410 (void)vn_close(vpp[j], FREAD|FWRITE); 1411 kfree(vpp, M_DEVBUF); 1412 kfree(cpp, M_DEVBUF); 1413 ccdunlock(cs); 1414 return (error); 1415 } 1416 ++lookedup; 1417 } 1418 ccd.ccd_cpp = cpp; 1419 ccd.ccd_vpp = vpp; 1420 ccd.ccd_ndev = ccio->ccio_ndisks; 1421 1422 /* 1423 * Initialize the ccd. Fills in the softc for us. 1424 */ 1425 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1426 for (j = 0; j < lookedup; ++j) 1427 (void)vn_close(vpp[j], FREAD|FWRITE); 1428 kfree(vpp, M_DEVBUF); 1429 kfree(cpp, M_DEVBUF); 1430 ccdunlock(cs); 1431 return (error); 1432 } 1433 1434 /* 1435 * The ccd has been successfully initialized, so 1436 * we can place it into the array and read the disklabel. 1437 */ 1438 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1439 ccio->ccio_unit = unit; 1440 ccio->ccio_size = cs->sc_size; 1441 1442 bzero(&info, sizeof(info)); 1443 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1444 info.d_media_blocks = cs->sc_size; 1445 info.d_nheads = cs->sc_geom.ccg_ntracks; 1446 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1447 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1448 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1449 1450 /* 1451 * For cases where a label is directly applied to the ccd, 1452 * without slices, DSO_COMPATMBR forces one sector be 1453 * reserved for backwards compatibility. 1454 */ 1455 info.d_dsflags = DSO_COMPATMBR; 1456 disk_setdiskinfo(&cs->sc_disk, &info); 1457 1458 ccdunlock(cs); 1459 1460 break; 1461 1462 case CCDIOCCLR: 1463 if ((cs->sc_flags & CCDF_INITED) == 0) 1464 return (ENXIO); 1465 1466 if ((ap->a_fflag & FWRITE) == 0) 1467 return (EBADF); 1468 1469 if ((error = ccdlock(cs)) != 0) 1470 return (error); 1471 1472 if (dev_drefs(cs->sc_dev) > 1) { 1473 ccdunlock(cs); 1474 return (EBUSY); 1475 } 1476 1477 /* 1478 * Free ccd_softc information and clear entry. 1479 */ 1480 1481 /* Close the components and free their pathnames. */ 1482 for (i = 0; i < cs->sc_nccdisks; ++i) { 1483 /* 1484 * XXX: this close could potentially fail and 1485 * cause Bad Things. Maybe we need to force 1486 * the close to happen? 1487 */ 1488 #ifdef DEBUG 1489 if (ccddebug & CCDB_VNODE) 1490 vprint("CCDIOCCLR: vnode info", 1491 cs->sc_cinfo[i].ci_vp); 1492 #endif 1493 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1494 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1495 } 1496 1497 /* Free interleave index. */ 1498 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1499 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1500 1501 /* Free component info and interleave table. */ 1502 kfree(cs->sc_cinfo, M_DEVBUF); 1503 kfree(cs->sc_itable, M_DEVBUF); 1504 cs->sc_cinfo = NULL; 1505 cs->sc_itable = NULL; 1506 cs->sc_flags &= ~CCDF_INITED; 1507 1508 /* 1509 * Free ccddevice information and clear entry. 1510 */ 1511 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1512 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1513 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1514 1515 /* 1516 * And remove the devstat entry. 1517 */ 1518 devstat_remove_entry(&cs->device_stats); 1519 1520 /* This must be atomic. */ 1521 crit_enter(); 1522 ccdunlock(cs); 1523 crit_exit(); 1524 1525 break; 1526 1527 default: 1528 return (ENOTTY); 1529 } 1530 1531 return (0); 1532 } 1533 1534 static int 1535 ccddump(struct dev_dump_args *ap) 1536 { 1537 /* Not implemented. */ 1538 return ENXIO; 1539 } 1540 1541 /* 1542 * Lookup the provided name in the filesystem. If the file exists, 1543 * is a valid block device, and isn't being used by anyone else, 1544 * set *vpp to the file's vnode. 1545 */ 1546 static int 1547 ccdlookup(char *path, struct vnode **vpp) 1548 { 1549 struct nlookupdata nd; 1550 struct vnode *vp; 1551 int error; 1552 1553 *vpp = NULL; 1554 1555 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1556 if (error) 1557 return (error); 1558 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1559 #ifdef DEBUG 1560 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1561 kprintf("ccdlookup: vn_open error = %d\n", error); 1562 #endif 1563 goto done; 1564 } 1565 vp = nd.nl_open_vp; 1566 1567 if (vp->v_opencount > 1) { 1568 error = EBUSY; 1569 goto done; 1570 } 1571 1572 if (!vn_isdisk(vp, &error)) 1573 goto done; 1574 1575 #ifdef DEBUG 1576 if (ccddebug & CCDB_VNODE) 1577 vprint("ccdlookup: vnode info", vp); 1578 #endif 1579 1580 vn_unlock(vp); 1581 nd.nl_open_vp = NULL; 1582 nlookup_done(&nd); 1583 *vpp = vp; /* leave ref intact */ 1584 return (0); 1585 done: 1586 nlookup_done(&nd); 1587 return (error); 1588 } 1589 1590 /* 1591 * Wait interruptibly for an exclusive lock. 1592 * 1593 * XXX 1594 * Several drivers do this; it should be abstracted and made MP-safe. 1595 */ 1596 static int 1597 ccdlock(struct ccd_softc *cs) 1598 { 1599 int error; 1600 1601 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1602 cs->sc_flags |= CCDF_WANTED; 1603 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1604 return (error); 1605 } 1606 cs->sc_flags |= CCDF_LOCKED; 1607 return (0); 1608 } 1609 1610 /* 1611 * Unlock and wake up any waiters. 1612 */ 1613 static void 1614 ccdunlock(struct ccd_softc *cs) 1615 { 1616 1617 cs->sc_flags &= ~CCDF_LOCKED; 1618 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1619 cs->sc_flags &= ~CCDF_WANTED; 1620 wakeup(cs); 1621 } 1622 } 1623 1624 #ifdef DEBUG 1625 static void 1626 printiinfo(struct ccdiinfo *ii) 1627 { 1628 int ix, i; 1629 1630 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1631 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1632 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1633 for (i = 0; i < ii->ii_ndisk; i++) 1634 kprintf(" %d", ii->ii_index[i]); 1635 kprintf("\n"); 1636 } 1637 } 1638 #endif 1639 1640 1641 /* Local Variables: */ 1642 /* c-argdecl-indent: 8 */ 1643 /* c-continued-statement-offset: 8 */ 1644 /* c-indent-level: 8 */ 1645 /* End: */ 1646