1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 /* 36 * Copyright (c) 1995 Jason R. Thorpe. 37 * All rights reserved. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed for the NetBSD Project 50 * by Jason R. Thorpe. 51 * 4. The name of the author may not be used to endorse or promote products 52 * derived from this software without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 58 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 59 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 60 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 61 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 */ 66 67 /* 68 * Copyright (c) 1988 University of Utah. 69 * Copyright (c) 1990, 1993 70 * The Regents of the University of California. All rights reserved. 71 * 72 * This code is derived from software contributed to Berkeley by 73 * the Systems Programming Group of the University of Utah Computer 74 * Science Department. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 3. All advertising materials mentioning features or use of this software 85 * must display the following acknowledgement: 86 * This product includes software developed by the University of 87 * California, Berkeley and its contributors. 88 * 4. Neither the name of the University nor the names of its contributors 89 * may be used to endorse or promote products derived from this software 90 * without specific prior written permission. 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 102 * SUCH DAMAGE. 103 * 104 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 105 */ 106 /* 107 * @(#)cd.c 8.2 (Berkeley) 11/16/93 108 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 109 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 110 */ 111 112 /* 113 * "Concatenated" disk driver. 114 * 115 * Original dynamic configuration support by: 116 * Jason R. Thorpe <thorpej@nas.nasa.gov> 117 * Numerical Aerodynamic Simulation Facility 118 * Mail Stop 258-6 119 * NASA Ames Research Center 120 * Moffett Field, CA 94035 121 */ 122 123 #include "use_ccd.h" 124 125 #include <sys/param.h> 126 #include <sys/systm.h> 127 #include <sys/kernel.h> 128 #include <sys/module.h> 129 #include <sys/proc.h> 130 #include <sys/buf.h> 131 #include <sys/malloc.h> 132 #include <sys/nlookup.h> 133 #include <sys/conf.h> 134 #include <sys/stat.h> 135 #include <sys/sysctl.h> 136 #include <sys/disk.h> 137 #include <sys/dtype.h> 138 #include <sys/diskslice.h> 139 #include <sys/devicestat.h> 140 #include <sys/fcntl.h> 141 #include <sys/vnode.h> 142 #include <sys/ccdvar.h> 143 144 #include <vm/vm_zone.h> 145 146 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 147 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 148 149 #include <sys/buf2.h> 150 151 #if defined(CCDDEBUG) && !defined(DEBUG) 152 #define DEBUG 153 #endif 154 155 #ifdef DEBUG 156 #define CCDB_FOLLOW 0x01 157 #define CCDB_INIT 0x02 158 #define CCDB_IO 0x04 159 #define CCDB_LABEL 0x08 160 #define CCDB_VNODE 0x10 161 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 162 CCDB_VNODE; 163 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 164 #undef DEBUG 165 #endif 166 167 #define ccdunit(x) dkunit(x) 168 #define ccdpart(x) dkpart(x) 169 170 /* 171 This is how mirroring works (only writes are special): 172 173 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 174 linked together by the cb_mirror field. "cb_pflags & 175 CCDPF_MIRROR_DONE" is set to 0 on both of them. 176 177 When a component returns to ccdiodone(), it checks if "cb_pflags & 178 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 179 flag and returns. If it is, it means its partner has already 180 returned, so it will go to the regular cleanup. 181 182 */ 183 184 struct ccdbuf { 185 struct buf cb_buf; /* new I/O buf */ 186 struct vnode *cb_vp; /* related vnode */ 187 struct bio *cb_obio; /* ptr. to original I/O buf */ 188 int cb_unit; /* target unit */ 189 int cb_comp; /* target component */ 190 int cb_pflags; /* mirror/parity status flag */ 191 struct ccdbuf *cb_mirror; /* mirror counterpart */ 192 }; 193 194 /* bits in cb_pflags */ 195 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 196 197 static d_open_t ccdopen; 198 static d_close_t ccdclose; 199 static d_strategy_t ccdstrategy; 200 static d_ioctl_t ccdioctl; 201 static d_dump_t ccddump; 202 203 static struct dev_ops ccd_ops = { 204 { "ccd", 0, D_DISK | D_MPSAFE }, 205 .d_open = ccdopen, 206 .d_close = ccdclose, 207 .d_read = physread, 208 .d_write = physwrite, 209 .d_ioctl = ccdioctl, 210 .d_strategy = ccdstrategy, 211 .d_dump = ccddump 212 }; 213 214 /* called during module initialization */ 215 static void ccdattach (void); 216 static int ccddetach (void); 217 static int ccd_modevent (module_t, int, void *); 218 219 /* called by biodone() at interrupt time */ 220 static void ccdiodone (struct bio *bio); 221 222 static void ccdstart (struct ccd_softc *, struct bio *); 223 static void ccdinterleave (struct ccd_softc *, int); 224 static void ccdintr (struct ccd_softc *, struct bio *); 225 static int ccdinit (struct ccddevice *, char **, struct ucred *); 226 static int ccdlookup (char *, struct vnode **); 227 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 228 struct bio *, off_t, caddr_t, long); 229 static int ccdlock (struct ccd_softc *); 230 static void ccdunlock (struct ccd_softc *); 231 232 #ifdef DEBUG 233 static void printiinfo (struct ccdiinfo *); 234 #endif 235 236 /* Non-private for the benefit of libkvm. */ 237 struct ccd_softc *ccd_softc; 238 struct ccddevice *ccddevs; 239 static int numccd = 0; 240 241 /* 242 * getccdbuf() - Allocate and zero a ccd buffer. 243 */ 244 static struct ccdbuf * 245 getccdbuf(void) 246 { 247 struct ccdbuf *cbp; 248 249 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK | M_ZERO); 250 initbufbio(&cbp->cb_buf); 251 252 /* 253 * independant struct buf initialization 254 */ 255 buf_dep_init(&cbp->cb_buf); 256 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 257 BUF_KERNPROC(&cbp->cb_buf); 258 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 259 260 return(cbp); 261 } 262 263 /* 264 * putccdbuf() - Free a ccd buffer. 265 */ 266 static void 267 putccdbuf(struct ccdbuf *cbp) 268 { 269 BUF_UNLOCK(&cbp->cb_buf); 270 271 uninitbufbio(&cbp->cb_buf); 272 kfree(cbp, M_DEVBUF); 273 } 274 275 /* 276 * Called by main() during pseudo-device attachment. All we need 277 * to do is allocate enough space for devices to be configured later, and 278 * add devsw entries. 279 */ 280 static void 281 ccdattach(void) 282 { 283 struct disk_info info; 284 struct ccd_softc *cs; 285 int i; 286 int num = NCCD; 287 288 if (num > 1) 289 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 290 else 291 kprintf("ccd0: Concatenated disk driver\n"); 292 293 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 294 M_WAITOK | M_ZERO); 295 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 296 M_WAITOK | M_ZERO); 297 numccd = num; 298 299 /* 300 * With normal disk devices the open simply fails if the media 301 * is not present. With CCD we have to be able to open the 302 * raw disk to use the ioctl's to set it up, so create a dummy 303 * disk info structure so dscheck() doesn't blow up. 304 */ 305 bzero(&info, sizeof(info)); 306 info.d_media_blksize = DEV_BSIZE; 307 308 for (i = 0; i < numccd; ++i) { 309 cs = &ccd_softc[i]; 310 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 311 cs->sc_dev->si_drv1 = cs; 312 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 313 disk_setdiskinfo(&cs->sc_disk, &info); 314 } 315 } 316 317 static int 318 ccddetach(void) 319 { 320 struct ccd_softc *cs; 321 struct dev_ioctl_args ioctl_args; 322 int i; 323 int error = 0; 324 int eval; 325 326 bzero(&ioctl_args, sizeof(ioctl_args)); 327 328 for (i = 0; i < numccd; ++i) { 329 cs = &ccd_softc[i]; 330 if (cs->sc_dev == NULL) 331 continue; 332 ioctl_args.a_head.a_dev = cs->sc_dev; 333 ioctl_args.a_cmd = CCDIOCCLR; 334 ioctl_args.a_fflag = FWRITE; 335 eval = ccdioctl(&ioctl_args); 336 if (eval && eval != ENXIO) { 337 kprintf("ccd%d: In use, cannot detach\n", i); 338 error = EBUSY; 339 } 340 } 341 if (error == 0) { 342 for (i = 0; i < numccd; ++i) { 343 cs = &ccd_softc[i]; 344 if (cs->sc_dev == NULL) 345 continue; 346 disk_destroy(&cs->sc_disk); 347 cs->sc_dev = NULL; 348 } 349 if (ccd_softc) 350 kfree(ccd_softc, M_DEVBUF); 351 if (ccddevs) 352 kfree(ccddevs, M_DEVBUF); 353 } 354 return (error); 355 } 356 357 static int 358 ccd_modevent(module_t mod, int type, void *data) 359 { 360 int error = 0; 361 362 switch (type) { 363 case MOD_LOAD: 364 ccdattach(); 365 break; 366 367 case MOD_UNLOAD: 368 error = ccddetach(); 369 break; 370 371 default: /* MOD_SHUTDOWN etc */ 372 break; 373 } 374 return (error); 375 } 376 377 DEV_MODULE(ccd, ccd_modevent, NULL); 378 379 static int 380 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 381 { 382 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 383 struct ccdcinfo *ci = NULL; /* XXX */ 384 int ix; 385 struct vnode *vp; 386 u_int64_t skip; 387 u_int64_t size; 388 u_int64_t minsize; 389 int maxsecsize; 390 struct partinfo dpart; 391 struct ccdgeom *ccg = &cs->sc_geom; 392 char tmppath[MAXPATHLEN]; 393 int error = 0; 394 395 #ifdef DEBUG 396 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 397 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 398 #endif 399 400 cs->sc_size = 0; 401 cs->sc_ileave = ccd->ccd_interleave; 402 cs->sc_nccdisks = ccd->ccd_ndev; 403 404 /* Allocate space for the component info. */ 405 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 406 M_DEVBUF, M_WAITOK); 407 cs->sc_maxiosize = MAXPHYS; 408 409 lockinit(&cs->sc_lock, "ccdlck", 0, 0); 410 ccdlock(cs); 411 412 /* 413 * Verify that each component piece exists and record 414 * relevant information about it. 415 */ 416 maxsecsize = 0; 417 minsize = 0; 418 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 419 vp = ccd->ccd_vpp[ix]; 420 ci = &cs->sc_cinfo[ix]; 421 ci->ci_vp = vp; 422 423 /* 424 * Copy in the pathname of the component. 425 */ 426 bzero(tmppath, sizeof(tmppath)); /* sanity */ 427 if ((error = copyinstr(cpaths[ix], tmppath, 428 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 429 #ifdef DEBUG 430 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 431 kprintf("ccd%d: can't copy path, error = %d\n", 432 ccd->ccd_unit, error); 433 #endif 434 goto fail; 435 } 436 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 437 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 438 439 ci->ci_dev = vn_todev(vp); 440 if (ci->ci_dev->si_iosize_max && 441 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 442 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 443 } 444 445 /* 446 * Get partition information for the component. 447 */ 448 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, 449 cred, NULL); 450 if (error) { 451 #ifdef DEBUG 452 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 453 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 454 ccd->ccd_unit, ci->ci_path, error); 455 #endif 456 goto fail; 457 } 458 if (dpart.fstype != FS_CCD && 459 !kuuid_is_ccd(&dpart.fstype_uuid)) { 460 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 461 ccd->ccd_unit, ci->ci_path); 462 error = EFTYPE; 463 goto fail; 464 } 465 if (maxsecsize < dpart.media_blksize) 466 maxsecsize = dpart.media_blksize; 467 468 /* 469 * Skip a certain amount of storage at the beginning of 470 * the component to make sure we don't infringe on any 471 * reserved sectors. This is handled entirely by 472 * dpart.reserved_blocks but we also impose a minimum 473 * of 16 sectors for backwards compatibility. 474 */ 475 skip = 16; 476 if (skip < dpart.reserved_blocks) 477 skip = dpart.reserved_blocks; 478 size = dpart.media_blocks - skip; 479 480 /* 481 * Calculate the size, truncating to an interleave 482 * boundary if necessary. 483 */ 484 if (cs->sc_ileave > 1) 485 size -= size % cs->sc_ileave; 486 487 if ((int64_t)size <= 0) { 488 #ifdef DEBUG 489 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 490 kprintf("ccd%d: %s: size == 0\n", 491 ccd->ccd_unit, ci->ci_path); 492 #endif 493 error = ENODEV; 494 goto fail; 495 } 496 497 /* 498 * Calculate the smallest uniform component, used 499 * elsewhere. 500 */ 501 if (minsize == 0 || minsize > size) 502 minsize = size; 503 ci->ci_skip = skip; 504 ci->ci_size = size; 505 cs->sc_size += size; 506 } 507 kprintf("ccd%d: max component iosize is %d total blocks %lld\n", 508 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size); 509 510 /* 511 * Don't allow the interleave to be smaller than 512 * the biggest component sector. 513 */ 514 if ((cs->sc_ileave > 0) && 515 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 516 #ifdef DEBUG 517 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 518 kprintf("ccd%d: interleave must be at least %d\n", 519 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 520 #endif 521 error = EINVAL; 522 goto fail; 523 } 524 525 /* 526 * If uniform interleave is desired set all sizes to that of 527 * the smallest component. This will guarentee that a single 528 * interleave table is generated. 529 * 530 * Lost space must be taken into account when calculating the 531 * overall size. Half the space is lost when CCDF_MIRROR is 532 * specified. One disk is lost when CCDF_PARITY is specified. 533 */ 534 if (ccd->ccd_flags & CCDF_UNIFORM) { 535 for (ci = cs->sc_cinfo; 536 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 537 ci->ci_size = minsize; 538 } 539 if (ccd->ccd_flags & CCDF_MIRROR) { 540 /* 541 * Check to see if an even number of components 542 * have been specified. The interleave must also 543 * be non-zero in order for us to be able to 544 * guarentee the topology. 545 */ 546 if (cs->sc_nccdisks % 2) { 547 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 548 error = EINVAL; 549 goto fail; 550 } 551 if (cs->sc_ileave == 0) { 552 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 553 error = EINVAL; 554 goto fail; 555 } 556 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 557 } else if (ccd->ccd_flags & CCDF_PARITY) { 558 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 559 } else { 560 if (cs->sc_ileave == 0) { 561 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 562 error = EINVAL; 563 goto fail; 564 } 565 cs->sc_size = cs->sc_nccdisks * minsize; 566 } 567 } 568 569 /* 570 * Construct the interleave table. 571 */ 572 ccdinterleave(cs, ccd->ccd_unit); 573 574 /* 575 * Create pseudo-geometry based on 1MB cylinders. It's 576 * pretty close. 577 */ 578 ccg->ccg_secsize = maxsecsize; 579 ccg->ccg_ntracks = 1; 580 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 581 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 582 583 /* 584 * Add an devstat entry for this device. 585 */ 586 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 587 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 588 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 589 DEVSTAT_PRIORITY_ARRAY); 590 591 cs->sc_flags |= CCDF_INITED; 592 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 593 cs->sc_unit = ccd->ccd_unit; 594 return (0); 595 fail: 596 while (ci > cs->sc_cinfo) { 597 ci--; 598 kfree(ci->ci_path, M_DEVBUF); 599 } 600 kfree(cs->sc_cinfo, M_DEVBUF); 601 cs->sc_cinfo = NULL; 602 return (error); 603 } 604 605 static void 606 ccdinterleave(struct ccd_softc *cs, int unit) 607 { 608 struct ccdcinfo *ci, *smallci; 609 struct ccdiinfo *ii; 610 u_int64_t bn; 611 u_int64_t lbn; 612 u_int64_t size; 613 int icount; 614 int ix; 615 616 #ifdef DEBUG 617 if (ccddebug & CCDB_INIT) 618 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 619 #endif 620 621 /* 622 * Allocate an interleave table. The worst case occurs when each 623 * of N disks is of a different size, resulting in N interleave 624 * tables. 625 * 626 * Chances are this is too big, but we don't care. 627 */ 628 icount = cs->sc_nccdisks + 1; 629 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 630 M_DEVBUF, M_WAITOK|M_ZERO); 631 632 /* 633 * Trivial case: no interleave (actually interleave of disk size). 634 * Each table entry represents a single component in its entirety. 635 * 636 * An interleave of 0 may not be used with a mirror or parity setup. 637 */ 638 if (cs->sc_ileave == 0) { 639 bn = 0; 640 ii = cs->sc_itable; 641 642 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 643 /* Allocate space for ii_index. */ 644 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 645 ii->ii_ndisk = 1; 646 ii->ii_startblk = bn; 647 ii->ii_startoff = 0; 648 ii->ii_index[0] = ix; 649 bn += cs->sc_cinfo[ix].ci_size; 650 ii++; 651 } 652 ii->ii_ndisk = 0; 653 #ifdef DEBUG 654 if (ccddebug & CCDB_INIT) 655 printiinfo(cs->sc_itable); 656 #endif 657 return; 658 } 659 660 /* 661 * The following isn't fast or pretty; it doesn't have to be. 662 */ 663 size = 0; 664 bn = lbn = 0; 665 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 666 /* 667 * Allocate space for ii_index. We might allocate more then 668 * we use. 669 */ 670 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 671 M_DEVBUF, M_WAITOK); 672 673 /* 674 * Locate the smallest of the remaining components 675 */ 676 smallci = NULL; 677 ci = cs->sc_cinfo; 678 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 679 if (ci->ci_size > size && 680 (smallci == NULL || 681 ci->ci_size < smallci->ci_size)) { 682 smallci = ci; 683 } 684 ++ci; 685 } 686 687 /* 688 * Nobody left, all done 689 */ 690 if (smallci == NULL) { 691 ii->ii_ndisk = 0; 692 break; 693 } 694 695 /* 696 * Record starting logical block using an sc_ileave blocksize. 697 */ 698 ii->ii_startblk = bn / cs->sc_ileave; 699 700 /* 701 * Record starting component block using an sc_ileave 702 * blocksize. This value is relative to the beginning of 703 * a component disk. 704 */ 705 ii->ii_startoff = lbn; 706 707 /* 708 * Determine how many disks take part in this interleave 709 * and record their indices. 710 */ 711 ix = 0; 712 for (ci = cs->sc_cinfo; 713 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 714 if (ci->ci_size >= smallci->ci_size) { 715 ii->ii_index[ix++] = ci - cs->sc_cinfo; 716 } 717 } 718 ii->ii_ndisk = ix; 719 720 /* 721 * Adjust for loop 722 */ 723 bn += ix * (smallci->ci_size - size); 724 lbn = smallci->ci_size / cs->sc_ileave; 725 size = smallci->ci_size; 726 } 727 if (ii == &cs->sc_itable[icount]) 728 panic("ccdinterlave software bug! table exhausted"); 729 #ifdef DEBUG 730 if (ccddebug & CCDB_INIT) 731 printiinfo(cs->sc_itable); 732 #endif 733 } 734 735 /* ARGSUSED */ 736 static int 737 ccdopen(struct dev_open_args *ap) 738 { 739 cdev_t dev = ap->a_head.a_dev; 740 int unit = ccdunit(dev); 741 struct ccd_softc *cs; 742 int error = 0; 743 744 #ifdef DEBUG 745 if (ccddebug & CCDB_FOLLOW) 746 kprintf("ccdopen(%x, %x)\n", dev, flags); 747 #endif 748 if (unit >= numccd) 749 return (ENXIO); 750 cs = &ccd_softc[unit]; 751 752 if ((error = ccdlock(cs)) == 0) { 753 ccdunlock(cs); 754 } 755 return (error); 756 } 757 758 /* ARGSUSED */ 759 static int 760 ccdclose(struct dev_close_args *ap) 761 { 762 cdev_t dev = ap->a_head.a_dev; 763 int unit = ccdunit(dev); 764 struct ccd_softc *cs; 765 int error = 0; 766 767 #ifdef DEBUG 768 if (ccddebug & CCDB_FOLLOW) 769 kprintf("ccdclose(%x, %x)\n", dev, flags); 770 #endif 771 772 if (unit >= numccd) 773 return (ENXIO); 774 cs = &ccd_softc[unit]; 775 if ((error = ccdlock(cs)) == 0) { 776 ccdunlock(cs); 777 } 778 return (error); 779 } 780 781 static int 782 ccdstrategy(struct dev_strategy_args *ap) 783 { 784 cdev_t dev = ap->a_head.a_dev; 785 struct bio *bio = ap->a_bio; 786 int unit = ccdunit(dev); 787 struct bio *nbio; 788 struct buf *bp = bio->bio_buf; 789 struct ccd_softc *cs = &ccd_softc[unit]; 790 u_int64_t pbn; /* in sc_secsize chunks */ 791 u_int32_t sz; /* in sc_secsize chunks */ 792 793 #ifdef DEBUG 794 if (ccddebug & CCDB_FOLLOW) 795 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 796 #endif 797 if ((cs->sc_flags & CCDF_INITED) == 0) { 798 bp->b_error = ENXIO; 799 goto error; 800 } 801 802 /* If it's a nil transfer, wake up the top half now. */ 803 if (bp->b_bcount == 0) { 804 bp->b_resid = 0; 805 goto done; 806 } 807 808 /* 809 * Do bounds checking and adjust transfer. If there's an 810 * error, the bounds check will flag that for us. 811 */ 812 813 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 814 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 815 816 /* 817 * If out of bounds return an error. If the request goes 818 * past EOF, clip the request as appropriate. If exactly 819 * at EOF, return success (don't clip), but with 0 bytes 820 * of I/O. 821 * 822 * Mark EOF B_INVAL (just like bad), indicating that the 823 * contents of the buffer, if any, is invalid. 824 */ 825 if ((int64_t)pbn < 0) 826 goto bad; 827 if (pbn + sz > cs->sc_size) { 828 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 829 goto bad; 830 if (pbn == cs->sc_size) { 831 bp->b_resid = bp->b_bcount; 832 bp->b_flags |= B_INVAL; 833 goto done; 834 } 835 sz = (long)(cs->sc_size - pbn); 836 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 837 } 838 nbio = bio; 839 840 bp->b_resid = bp->b_bcount; 841 nbio->bio_driver_info = dev; 842 843 /* 844 * "Start" the unit. 845 */ 846 ccdstart(cs, nbio); 847 return(0); 848 849 /* 850 * note: bio, not nbio, is valid at the done label. 851 */ 852 bad: 853 bp->b_error = EINVAL; 854 error: 855 bp->b_resid = bp->b_bcount; 856 bp->b_flags |= B_ERROR | B_INVAL; 857 done: 858 biodone(bio); 859 return(0); 860 } 861 862 static void 863 ccdstart(struct ccd_softc *cs, struct bio *bio) 864 { 865 long bcount, rcount; 866 struct ccdbuf *cbp[4]; 867 struct buf *bp = bio->bio_buf; 868 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 869 caddr_t addr; 870 off_t doffset; 871 872 #ifdef DEBUG 873 if (ccddebug & CCDB_FOLLOW) 874 kprintf("ccdstart(%x, %x)\n", cs, bp); 875 #endif 876 877 /* Record the transaction start */ 878 devstat_start_transaction(&cs->device_stats); 879 880 /* 881 * Allocate component buffers and fire off the requests 882 */ 883 doffset = bio->bio_offset; 884 addr = bp->b_data; 885 886 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 887 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 888 rcount = cbp[0]->cb_buf.b_bcount; 889 890 if (cs->sc_cflags & CCDF_MIRROR) { 891 /* 892 * Mirroring. Writes go to both disks, reads are 893 * taken from whichever disk seems most appropriate. 894 * 895 * We attempt to localize reads to the disk whos arm 896 * is nearest the read request. We ignore seeks due 897 * to writes when making this determination and we 898 * also try to avoid hogging. 899 */ 900 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 901 vn_strategy(cbp[0]->cb_vp, 902 &cbp[0]->cb_buf.b_bio1); 903 vn_strategy(cbp[1]->cb_vp, 904 &cbp[1]->cb_buf.b_bio1); 905 } else { 906 int pick = cs->sc_pick; 907 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 908 if (doffset < cs->sc_blk[pick] - range || 909 doffset > cs->sc_blk[pick] + range 910 ) { 911 cs->sc_pick = pick = 1 - pick; 912 } 913 cs->sc_blk[pick] = doffset + rcount; 914 vn_strategy(cbp[pick]->cb_vp, 915 &cbp[pick]->cb_buf.b_bio1); 916 } 917 } else { 918 /* 919 * Not mirroring 920 */ 921 vn_strategy(cbp[0]->cb_vp, 922 &cbp[0]->cb_buf.b_bio1); 923 } 924 doffset += rcount; 925 addr += rcount; 926 } 927 } 928 929 /* 930 * Build a component buffer header. 931 */ 932 static void 933 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 934 off_t doffset, caddr_t addr, long bcount) 935 { 936 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 937 struct ccdbuf *cbp; 938 u_int64_t bn; 939 u_int64_t cbn; 940 u_int64_t cboff; 941 off_t cbc; 942 943 #ifdef DEBUG 944 if (ccddebug & CCDB_IO) 945 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 946 cs, bp, bn, addr, bcount); 947 #endif 948 /* 949 * Determine which component bn falls in. 950 */ 951 bn = doffset / cs->sc_geom.ccg_secsize; 952 cbn = bn; 953 cboff = 0; 954 955 if (cs->sc_ileave == 0) { 956 /* 957 * Serially concatenated and neither a mirror nor a parity 958 * config. This is a special case. 959 */ 960 daddr_t sblk; 961 962 sblk = 0; 963 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 964 sblk += ci->ci_size; 965 cbn -= sblk; 966 } else { 967 struct ccdiinfo *ii; 968 int ccdisk, off; 969 970 /* 971 * Calculate cbn, the logical superblock (sc_ileave chunks), 972 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 973 * to cbn. 974 */ 975 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 976 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 977 978 /* 979 * Figure out which interleave table to use. 980 */ 981 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 982 if (ii->ii_startblk > cbn) 983 break; 984 } 985 ii--; 986 987 /* 988 * off is the logical superblock relative to the beginning 989 * of this interleave block. 990 */ 991 off = cbn - ii->ii_startblk; 992 993 /* 994 * We must calculate which disk component to use (ccdisk), 995 * and recalculate cbn to be the superblock relative to 996 * the beginning of the component. This is typically done by 997 * adding 'off' and ii->ii_startoff together. However, 'off' 998 * must typically be divided by the number of components in 999 * this interleave array to be properly convert it from a 1000 * CCD-relative logical superblock number to a 1001 * component-relative superblock number. 1002 */ 1003 if (ii->ii_ndisk == 1) { 1004 /* 1005 * When we have just one disk, it can't be a mirror 1006 * or a parity config. 1007 */ 1008 ccdisk = ii->ii_index[0]; 1009 cbn = ii->ii_startoff + off; 1010 } else { 1011 if (cs->sc_cflags & CCDF_MIRROR) { 1012 /* 1013 * We have forced a uniform mapping, resulting 1014 * in a single interleave array. We double 1015 * up on the first half of the available 1016 * components and our mirror is in the second 1017 * half. This only works with a single 1018 * interleave array because doubling up 1019 * doubles the number of sectors, so there 1020 * cannot be another interleave array because 1021 * the next interleave array's calculations 1022 * would be off. 1023 */ 1024 int ndisk2 = ii->ii_ndisk / 2; 1025 ccdisk = ii->ii_index[off % ndisk2]; 1026 cbn = ii->ii_startoff + off / ndisk2; 1027 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1028 } else if (cs->sc_cflags & CCDF_PARITY) { 1029 /* 1030 * XXX not implemented yet 1031 */ 1032 int ndisk2 = ii->ii_ndisk - 1; 1033 ccdisk = ii->ii_index[off % ndisk2]; 1034 cbn = ii->ii_startoff + off / ndisk2; 1035 if (cbn % ii->ii_ndisk <= ccdisk) 1036 ccdisk++; 1037 } else { 1038 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1039 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1040 } 1041 } 1042 1043 ci = &cs->sc_cinfo[ccdisk]; 1044 1045 /* 1046 * Convert cbn from a superblock to a normal block so it 1047 * can be used to calculate (along with cboff) the normal 1048 * block index into this particular disk. 1049 */ 1050 cbn *= cs->sc_ileave; 1051 } 1052 1053 /* 1054 * Fill in the component buf structure. 1055 * 1056 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1057 * will be truncated on device EOF so we use b_bufsize to detect 1058 * the case. 1059 */ 1060 cbp = getccdbuf(); 1061 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1062 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1063 cbp->cb_buf.b_data = addr; 1064 cbp->cb_vp = ci->ci_vp; 1065 if (cs->sc_ileave == 0) 1066 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1067 else 1068 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1069 if (cbc > cs->sc_maxiosize) 1070 cbc = cs->sc_maxiosize; 1071 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1072 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1073 1074 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1075 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1076 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1077 1078 /* 1079 * context for ccdiodone 1080 */ 1081 cbp->cb_obio = bio; 1082 cbp->cb_unit = cs - ccd_softc; 1083 cbp->cb_comp = ci - cs->sc_cinfo; 1084 1085 #ifdef DEBUG 1086 if (ccddebug & CCDB_IO) 1087 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1088 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1089 cbp->cb_buf.b_bio1.bio_offset, 1090 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1091 #endif 1092 cb[0] = cbp; 1093 1094 /* 1095 * Note: both I/O's setup when reading from mirror, but only one 1096 * will be executed. 1097 */ 1098 if (cs->sc_cflags & CCDF_MIRROR) { 1099 /* mirror, setup second I/O */ 1100 cbp = getccdbuf(); 1101 1102 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1103 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1104 cbp->cb_buf.b_data = addr; 1105 cbp->cb_vp = ci2->ci_vp; 1106 if (cs->sc_ileave == 0) 1107 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1108 else 1109 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1110 if (cbc > cs->sc_maxiosize) 1111 cbc = cs->sc_maxiosize; 1112 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1113 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1114 1115 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1116 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1117 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1118 1119 /* 1120 * context for ccdiodone 1121 */ 1122 cbp->cb_obio = bio; 1123 cbp->cb_unit = cs - ccd_softc; 1124 cbp->cb_comp = ci2 - cs->sc_cinfo; 1125 cb[1] = cbp; 1126 /* link together the ccdbuf's and clear "mirror done" flag */ 1127 cb[0]->cb_mirror = cb[1]; 1128 cb[1]->cb_mirror = cb[0]; 1129 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1130 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1131 } 1132 } 1133 1134 static void 1135 ccdintr(struct ccd_softc *cs, struct bio *bio) 1136 { 1137 struct buf *bp = bio->bio_buf; 1138 1139 #ifdef DEBUG 1140 if (ccddebug & CCDB_FOLLOW) 1141 kprintf("ccdintr(%x, %x)\n", cs, bp); 1142 #endif 1143 /* 1144 * Request is done for better or worse, wakeup the top half. 1145 */ 1146 if (bp->b_flags & B_ERROR) 1147 bp->b_resid = bp->b_bcount; 1148 devstat_end_transaction_buf(&cs->device_stats, bp); 1149 biodone(bio); 1150 } 1151 1152 /* 1153 * Called at interrupt time. 1154 * 1155 * Mark the component as done and if all components are done, 1156 * take a ccd interrupt. 1157 */ 1158 static void 1159 ccdiodone(struct bio *bio) 1160 { 1161 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1162 struct bio *obio = cbp->cb_obio; 1163 struct buf *obp = obio->bio_buf; 1164 int unit = cbp->cb_unit; 1165 struct ccd_softc *sc = &ccd_softc[unit]; 1166 int count; 1167 1168 /* 1169 * Since we do not have exclusive access to underlying devices, 1170 * we can't keep cache translations around. 1171 */ 1172 clearbiocache(bio->bio_next); 1173 1174 ccdlock(sc); 1175 1176 #ifdef DEBUG 1177 if (ccddebug & CCDB_FOLLOW) 1178 kprintf("ccdiodone(%x)\n", cbp); 1179 if (ccddebug & CCDB_IO) { 1180 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1181 obp, obp->b_bcount, obp->b_resid); 1182 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1183 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1184 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1185 cbp->cb_buf.b_bcount); 1186 } 1187 #endif 1188 1189 /* 1190 * If an error occured, report it. If this is a mirrored 1191 * configuration and the first of two possible reads, do not 1192 * set the error in the bp yet because the second read may 1193 * succeed. 1194 */ 1195 if (cbp->cb_buf.b_flags & B_ERROR) { 1196 const char *msg = ""; 1197 1198 if ((sc->sc_cflags & CCDF_MIRROR) && 1199 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1200 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1201 /* 1202 * We will try our read on the other disk down 1203 * below, also reverse the default pick so if we 1204 * are doing a scan we do not keep hitting the 1205 * bad disk first. 1206 */ 1207 msg = ", trying other disk"; 1208 sc->sc_pick = 1 - sc->sc_pick; 1209 sc->sc_blk[sc->sc_pick] = obio->bio_offset; 1210 } else { 1211 obp->b_flags |= B_ERROR; 1212 obp->b_error = cbp->cb_buf.b_error ? 1213 cbp->cb_buf.b_error : EIO; 1214 } 1215 kprintf("ccd%d: error %d on component %d " 1216 "offset %jd (ccd offset %jd)%s\n", 1217 unit, obp->b_error, cbp->cb_comp, 1218 (intmax_t)cbp->cb_buf.b_bio2.bio_offset, 1219 (intmax_t)obio->bio_offset, 1220 msg); 1221 } 1222 1223 /* 1224 * Process mirror. If we are writing, I/O has been initiated on both 1225 * buffers and we fall through only after both are finished. 1226 * 1227 * If we are reading only one I/O is initiated at a time. If an 1228 * error occurs we initiate the second I/O and return, otherwise 1229 * we free the second I/O without initiating it. 1230 */ 1231 1232 if (sc->sc_cflags & CCDF_MIRROR) { 1233 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1234 /* 1235 * When writing, handshake with the second buffer 1236 * to determine when both are done. If both are not 1237 * done, return here. 1238 */ 1239 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1240 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1241 putccdbuf(cbp); 1242 ccdunlock(sc); 1243 return; 1244 } 1245 } else { 1246 /* 1247 * When reading, either dispose of the second buffer 1248 * or initiate I/O on the second buffer if an error 1249 * occured with this one. 1250 */ 1251 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1252 if (cbp->cb_buf.b_flags & B_ERROR) { 1253 cbp->cb_mirror->cb_pflags |= 1254 CCDPF_MIRROR_DONE; 1255 vn_strategy( 1256 cbp->cb_mirror->cb_vp, 1257 &cbp->cb_mirror->cb_buf.b_bio1 1258 ); 1259 putccdbuf(cbp); 1260 ccdunlock(sc); 1261 return; 1262 } else { 1263 putccdbuf(cbp->cb_mirror); 1264 /* fall through */ 1265 } 1266 } 1267 } 1268 } 1269 1270 /* 1271 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1272 */ 1273 count = cbp->cb_buf.b_bufsize; 1274 putccdbuf(cbp); 1275 1276 /* 1277 * If all done, "interrupt". 1278 */ 1279 obp->b_resid -= count; 1280 if (obp->b_resid < 0) 1281 panic("ccdiodone: count"); 1282 1283 ccdunlock(sc); 1284 1285 if (obp->b_resid == 0) 1286 ccdintr(sc, obio); 1287 } 1288 1289 static int 1290 ccdioctl(struct dev_ioctl_args *ap) 1291 { 1292 cdev_t dev = ap->a_head.a_dev; 1293 int unit = ccdunit(dev); 1294 int i, j, lookedup = 0, error = 0; 1295 struct ccd_softc *cs; 1296 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1297 struct ccddevice ccd; 1298 struct disk_info info; 1299 char **cpp; 1300 struct vnode **vpp; 1301 1302 if (unit >= numccd) 1303 return (ENXIO); 1304 cs = &ccd_softc[unit]; 1305 1306 bzero(&ccd, sizeof(ccd)); 1307 1308 switch (ap->a_cmd) { 1309 case CCDIOCSET: 1310 if (cs->sc_flags & CCDF_INITED) 1311 return (EBUSY); 1312 1313 if ((ap->a_fflag & FWRITE) == 0) 1314 return (EBADF); 1315 1316 if ((error = ccdlock(cs)) != 0) 1317 return (error); 1318 1319 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1320 ccdunlock(cs); 1321 return (EINVAL); 1322 } 1323 1324 /* Fill in some important bits. */ 1325 ccd.ccd_unit = unit; 1326 ccd.ccd_interleave = ccio->ccio_ileave; 1327 if (ccd.ccd_interleave == 0 && 1328 ((ccio->ccio_flags & CCDF_MIRROR) || 1329 (ccio->ccio_flags & CCDF_PARITY))) { 1330 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1331 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1332 } 1333 if ((ccio->ccio_flags & CCDF_MIRROR) && 1334 (ccio->ccio_flags & CCDF_PARITY)) { 1335 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1336 ccio->ccio_flags &= ~CCDF_PARITY; 1337 } 1338 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1339 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1340 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1341 unit); 1342 ccio->ccio_flags |= CCDF_UNIFORM; 1343 } 1344 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1345 1346 /* 1347 * Allocate space for and copy in the array of 1348 * componet pathnames and device numbers. 1349 */ 1350 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1351 M_DEVBUF, M_WAITOK); 1352 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1353 M_DEVBUF, M_WAITOK); 1354 1355 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1356 ccio->ccio_ndisks * sizeof(char **)); 1357 if (error) { 1358 kfree(vpp, M_DEVBUF); 1359 kfree(cpp, M_DEVBUF); 1360 ccdunlock(cs); 1361 return (error); 1362 } 1363 1364 #ifdef DEBUG 1365 if (ccddebug & CCDB_INIT) { 1366 for (i = 0; i < ccio->ccio_ndisks; ++i) 1367 kprintf("ccdioctl: component %d: 0x%x\n", 1368 i, cpp[i]); 1369 } 1370 #endif 1371 1372 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1373 #ifdef DEBUG 1374 if (ccddebug & CCDB_INIT) 1375 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1376 #endif 1377 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1378 for (j = 0; j < lookedup; ++j) 1379 (void)vn_close(vpp[j], FREAD|FWRITE, NULL); 1380 kfree(vpp, M_DEVBUF); 1381 kfree(cpp, M_DEVBUF); 1382 ccdunlock(cs); 1383 return (error); 1384 } 1385 ++lookedup; 1386 } 1387 ccd.ccd_cpp = cpp; 1388 ccd.ccd_vpp = vpp; 1389 ccd.ccd_ndev = ccio->ccio_ndisks; 1390 1391 /* 1392 * Initialize the ccd. Fills in the softc for us. 1393 */ 1394 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1395 for (j = 0; j < lookedup; ++j) 1396 vn_close(vpp[j], FREAD|FWRITE, NULL); 1397 kfree(vpp, M_DEVBUF); 1398 kfree(cpp, M_DEVBUF); 1399 ccdunlock(cs); 1400 return (error); 1401 } 1402 1403 /* 1404 * The ccd has been successfully initialized, so 1405 * we can place it into the array and read the disklabel. 1406 */ 1407 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1408 ccio->ccio_unit = unit; 1409 ccio->ccio_size = cs->sc_size; 1410 1411 bzero(&info, sizeof(info)); 1412 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1413 info.d_media_blocks = cs->sc_size; 1414 info.d_nheads = cs->sc_geom.ccg_ntracks; 1415 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1416 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1417 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1418 1419 /* 1420 * For cases where a label is directly applied to the ccd, 1421 * without slices, DSO_COMPATMBR forces one sector be 1422 * reserved for backwards compatibility. 1423 */ 1424 info.d_dsflags = DSO_COMPATMBR; 1425 disk_setdiskinfo(&cs->sc_disk, &info); 1426 1427 ccdunlock(cs); 1428 1429 break; 1430 1431 case CCDIOCCLR: 1432 if ((cs->sc_flags & CCDF_INITED) == 0) 1433 return (ENXIO); 1434 1435 if ((ap->a_fflag & FWRITE) == 0) 1436 return (EBADF); 1437 1438 if ((error = ccdlock(cs)) != 0) 1439 return (error); 1440 1441 if (dev_drefs(cs->sc_dev) > 1) { 1442 ccdunlock(cs); 1443 return (EBUSY); 1444 } 1445 1446 /* 1447 * Free ccd_softc information and clear entry. 1448 */ 1449 1450 /* Close the components and free their pathnames. */ 1451 for (i = 0; i < cs->sc_nccdisks; ++i) { 1452 /* 1453 * XXX: this close could potentially fail and 1454 * cause Bad Things. Maybe we need to force 1455 * the close to happen? 1456 */ 1457 #ifdef DEBUG 1458 if (ccddebug & CCDB_VNODE) 1459 vprint("CCDIOCCLR: vnode info", 1460 cs->sc_cinfo[i].ci_vp); 1461 #endif 1462 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, NULL); 1463 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1464 } 1465 1466 /* Free interleave index. */ 1467 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1468 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1469 1470 /* Free component info and interleave table. */ 1471 kfree(cs->sc_cinfo, M_DEVBUF); 1472 kfree(cs->sc_itable, M_DEVBUF); 1473 cs->sc_cinfo = NULL; 1474 cs->sc_itable = NULL; 1475 cs->sc_flags &= ~CCDF_INITED; 1476 1477 /* 1478 * Free ccddevice information and clear entry. 1479 */ 1480 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1481 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1482 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1483 1484 /* 1485 * And remove the devstat entry. 1486 */ 1487 devstat_remove_entry(&cs->device_stats); 1488 1489 ccdunlock(cs); 1490 1491 break; 1492 1493 default: 1494 return (ENOTTY); 1495 } 1496 1497 return (0); 1498 } 1499 1500 static int 1501 ccddump(struct dev_dump_args *ap) 1502 { 1503 /* Not implemented. */ 1504 return ENXIO; 1505 } 1506 1507 /* 1508 * Lookup the provided name in the filesystem. If the file exists, 1509 * is a valid block device, and isn't being used by anyone else, 1510 * set *vpp to the file's vnode. 1511 */ 1512 static int 1513 ccdlookup(char *path, struct vnode **vpp) 1514 { 1515 struct nlookupdata nd; 1516 struct vnode *vp; 1517 int error; 1518 1519 *vpp = NULL; 1520 1521 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1522 if (error) 1523 return (error); 1524 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1525 #ifdef DEBUG 1526 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1527 kprintf("ccdlookup: vn_open error = %d\n", error); 1528 #endif 1529 goto done; 1530 } 1531 vp = nd.nl_open_vp; 1532 1533 if (vp->v_opencount > 1) { 1534 error = EBUSY; 1535 goto done; 1536 } 1537 1538 if (!vn_isdisk(vp, &error)) 1539 goto done; 1540 1541 #ifdef DEBUG 1542 if (ccddebug & CCDB_VNODE) 1543 vprint("ccdlookup: vnode info", vp); 1544 #endif 1545 1546 vn_unlock(vp); 1547 nd.nl_open_vp = NULL; 1548 nlookup_done(&nd); 1549 *vpp = vp; /* leave ref intact */ 1550 return (0); 1551 done: 1552 nlookup_done(&nd); 1553 return (error); 1554 } 1555 1556 /* 1557 * Wait interruptibly for an exclusive lock. 1558 */ 1559 static int 1560 ccdlock(struct ccd_softc *cs) 1561 { 1562 lockmgr(&cs->sc_lock, LK_EXCLUSIVE); 1563 1564 return (0); 1565 } 1566 1567 /* 1568 * Unlock and wake up any waiters. 1569 */ 1570 static void 1571 ccdunlock(struct ccd_softc *cs) 1572 { 1573 lockmgr(&cs->sc_lock, LK_RELEASE); 1574 } 1575 1576 #ifdef DEBUG 1577 static void 1578 printiinfo(struct ccdiinfo *ii) 1579 { 1580 int ix, i; 1581 1582 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1583 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1584 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1585 for (i = 0; i < ii->ii_ndisk; i++) 1586 kprintf(" %d", ii->ii_index[i]); 1587 kprintf("\n"); 1588 } 1589 } 1590 #endif 1591