1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 /* 36 * Copyright (c) 1995 Jason R. Thorpe. 37 * All rights reserved. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. All advertising materials mentioning features or use of this software 48 * must display the following acknowledgement: 49 * This product includes software developed for the NetBSD Project 50 * by Jason R. Thorpe. 51 * 4. The name of the author may not be used to endorse or promote products 52 * derived from this software without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 55 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 56 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 57 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 58 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 59 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 60 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 61 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 62 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 */ 66 67 /* 68 * Copyright (c) 1988 University of Utah. 69 * Copyright (c) 1990, 1993 70 * The Regents of the University of California. All rights reserved. 71 * 72 * This code is derived from software contributed to Berkeley by 73 * the Systems Programming Group of the University of Utah Computer 74 * Science Department. 75 * 76 * Redistribution and use in source and binary forms, with or without 77 * modification, are permitted provided that the following conditions 78 * are met: 79 * 1. Redistributions of source code must retain the above copyright 80 * notice, this list of conditions and the following disclaimer. 81 * 2. Redistributions in binary form must reproduce the above copyright 82 * notice, this list of conditions and the following disclaimer in the 83 * documentation and/or other materials provided with the distribution. 84 * 3. All advertising materials mentioning features or use of this software 85 * must display the following acknowledgement: 86 * This product includes software developed by the University of 87 * California, Berkeley and its contributors. 88 * 4. Neither the name of the University nor the names of its contributors 89 * may be used to endorse or promote products derived from this software 90 * without specific prior written permission. 91 * 92 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 93 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 94 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 95 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 96 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 97 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 98 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 100 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 101 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 102 * SUCH DAMAGE. 103 * 104 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 105 */ 106 /* 107 * @(#)cd.c 8.2 (Berkeley) 11/16/93 108 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 109 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 110 */ 111 112 /* 113 * "Concatenated" disk driver. 114 * 115 * Original dynamic configuration support by: 116 * Jason R. Thorpe <thorpej@nas.nasa.gov> 117 * Numerical Aerodynamic Simulation Facility 118 * Mail Stop 258-6 119 * NASA Ames Research Center 120 * Moffett Field, CA 94035 121 */ 122 123 #include "use_ccd.h" 124 125 #include <sys/param.h> 126 #include <sys/systm.h> 127 #include <sys/kernel.h> 128 #include <sys/module.h> 129 #include <sys/proc.h> 130 #include <sys/buf.h> 131 #include <sys/malloc.h> 132 #include <sys/nlookup.h> 133 #include <sys/conf.h> 134 #include <sys/stat.h> 135 #include <sys/sysctl.h> 136 #include <sys/disk.h> 137 #include <sys/dtype.h> 138 #include <sys/diskslice.h> 139 #include <sys/devicestat.h> 140 #include <sys/fcntl.h> 141 #include <sys/vnode.h> 142 #include <sys/ccdvar.h> 143 144 #include <vm/vm_zone.h> 145 146 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 147 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 148 149 #include <sys/thread2.h> 150 #include <sys/buf2.h> 151 152 #if defined(CCDDEBUG) && !defined(DEBUG) 153 #define DEBUG 154 #endif 155 156 #ifdef DEBUG 157 #define CCDB_FOLLOW 0x01 158 #define CCDB_INIT 0x02 159 #define CCDB_IO 0x04 160 #define CCDB_LABEL 0x08 161 #define CCDB_VNODE 0x10 162 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 163 CCDB_VNODE; 164 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 165 #undef DEBUG 166 #endif 167 168 #define ccdunit(x) dkunit(x) 169 #define ccdpart(x) dkpart(x) 170 171 /* 172 This is how mirroring works (only writes are special): 173 174 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 175 linked together by the cb_mirror field. "cb_pflags & 176 CCDPF_MIRROR_DONE" is set to 0 on both of them. 177 178 When a component returns to ccdiodone(), it checks if "cb_pflags & 179 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 180 flag and returns. If it is, it means its partner has already 181 returned, so it will go to the regular cleanup. 182 183 */ 184 185 struct ccdbuf { 186 struct buf cb_buf; /* new I/O buf */ 187 struct vnode *cb_vp; /* related vnode */ 188 struct bio *cb_obio; /* ptr. to original I/O buf */ 189 int cb_unit; /* target unit */ 190 int cb_comp; /* target component */ 191 int cb_pflags; /* mirror/parity status flag */ 192 struct ccdbuf *cb_mirror; /* mirror counterpart */ 193 }; 194 195 /* bits in cb_pflags */ 196 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 197 198 static d_open_t ccdopen; 199 static d_close_t ccdclose; 200 static d_strategy_t ccdstrategy; 201 static d_ioctl_t ccdioctl; 202 static d_dump_t ccddump; 203 204 static struct dev_ops ccd_ops = { 205 { "ccd", 0, D_DISK | D_MPSAFE }, 206 .d_open = ccdopen, 207 .d_close = ccdclose, 208 .d_read = physread, 209 .d_write = physwrite, 210 .d_ioctl = ccdioctl, 211 .d_strategy = ccdstrategy, 212 .d_dump = ccddump 213 }; 214 215 /* called during module initialization */ 216 static void ccdattach (void); 217 static int ccddetach (void); 218 static int ccd_modevent (module_t, int, void *); 219 220 /* called by biodone() at interrupt time */ 221 static void ccdiodone (struct bio *bio); 222 223 static void ccdstart (struct ccd_softc *, struct bio *); 224 static void ccdinterleave (struct ccd_softc *, int); 225 static void ccdintr (struct ccd_softc *, struct bio *); 226 static int ccdinit (struct ccddevice *, char **, struct ucred *); 227 static int ccdlookup (char *, struct vnode **); 228 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 229 struct bio *, off_t, caddr_t, long); 230 static int ccdlock (struct ccd_softc *); 231 static void ccdunlock (struct ccd_softc *); 232 233 #ifdef DEBUG 234 static void printiinfo (struct ccdiinfo *); 235 #endif 236 237 /* Non-private for the benefit of libkvm. */ 238 struct ccd_softc *ccd_softc; 239 struct ccddevice *ccddevs; 240 static int numccd = 0; 241 242 /* 243 * getccdbuf() - Allocate and zero a ccd buffer. 244 */ 245 static struct ccdbuf * 246 getccdbuf(void) 247 { 248 struct ccdbuf *cbp; 249 250 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK | M_ZERO); 251 initbufbio(&cbp->cb_buf); 252 253 /* 254 * independant struct buf initialization 255 */ 256 buf_dep_init(&cbp->cb_buf); 257 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 258 BUF_KERNPROC(&cbp->cb_buf); 259 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 260 261 return(cbp); 262 } 263 264 /* 265 * putccdbuf() - Free a ccd buffer. 266 */ 267 static void 268 putccdbuf(struct ccdbuf *cbp) 269 { 270 BUF_UNLOCK(&cbp->cb_buf); 271 272 uninitbufbio(&cbp->cb_buf); 273 kfree(cbp, M_DEVBUF); 274 } 275 276 /* 277 * Called by main() during pseudo-device attachment. All we need 278 * to do is allocate enough space for devices to be configured later, and 279 * add devsw entries. 280 */ 281 static void 282 ccdattach(void) 283 { 284 struct disk_info info; 285 struct ccd_softc *cs; 286 int i; 287 int num = NCCD; 288 289 if (num > 1) 290 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 291 else 292 kprintf("ccd0: Concatenated disk driver\n"); 293 294 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 295 M_WAITOK | M_ZERO); 296 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 297 M_WAITOK | M_ZERO); 298 numccd = num; 299 300 /* 301 * With normal disk devices the open simply fails if the media 302 * is not present. With CCD we have to be able to open the 303 * raw disk to use the ioctl's to set it up, so create a dummy 304 * disk info structure so dscheck() doesn't blow up. 305 */ 306 bzero(&info, sizeof(info)); 307 info.d_media_blksize = DEV_BSIZE; 308 309 for (i = 0; i < numccd; ++i) { 310 cs = &ccd_softc[i]; 311 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 312 cs->sc_dev->si_drv1 = cs; 313 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 314 disk_setdiskinfo(&cs->sc_disk, &info); 315 } 316 } 317 318 static int 319 ccddetach(void) 320 { 321 struct ccd_softc *cs; 322 struct dev_ioctl_args ioctl_args; 323 int i; 324 int error = 0; 325 int eval; 326 327 bzero(&ioctl_args, sizeof(ioctl_args)); 328 329 for (i = 0; i < numccd; ++i) { 330 cs = &ccd_softc[i]; 331 if (cs->sc_dev == NULL) 332 continue; 333 ioctl_args.a_head.a_dev = cs->sc_dev; 334 ioctl_args.a_cmd = CCDIOCCLR; 335 ioctl_args.a_fflag = FWRITE; 336 eval = ccdioctl(&ioctl_args); 337 if (eval && eval != ENXIO) { 338 kprintf("ccd%d: In use, cannot detach\n", i); 339 error = EBUSY; 340 } 341 } 342 if (error == 0) { 343 for (i = 0; i < numccd; ++i) { 344 cs = &ccd_softc[i]; 345 if (cs->sc_dev == NULL) 346 continue; 347 disk_destroy(&cs->sc_disk); 348 cs->sc_dev = NULL; 349 } 350 if (ccd_softc) 351 kfree(ccd_softc, M_DEVBUF); 352 if (ccddevs) 353 kfree(ccddevs, M_DEVBUF); 354 } 355 return (error); 356 } 357 358 static int 359 ccd_modevent(module_t mod, int type, void *data) 360 { 361 int error = 0; 362 363 switch (type) { 364 case MOD_LOAD: 365 ccdattach(); 366 break; 367 368 case MOD_UNLOAD: 369 error = ccddetach(); 370 break; 371 372 default: /* MOD_SHUTDOWN etc */ 373 break; 374 } 375 return (error); 376 } 377 378 DEV_MODULE(ccd, ccd_modevent, NULL); 379 380 static int 381 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 382 { 383 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 384 struct ccdcinfo *ci = NULL; /* XXX */ 385 int ix; 386 struct vnode *vp; 387 u_int64_t skip; 388 u_int64_t size; 389 u_int64_t minsize; 390 int maxsecsize; 391 struct partinfo dpart; 392 struct ccdgeom *ccg = &cs->sc_geom; 393 char tmppath[MAXPATHLEN]; 394 int error = 0; 395 396 #ifdef DEBUG 397 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 398 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 399 #endif 400 401 cs->sc_size = 0; 402 cs->sc_ileave = ccd->ccd_interleave; 403 cs->sc_nccdisks = ccd->ccd_ndev; 404 405 /* Allocate space for the component info. */ 406 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 407 M_DEVBUF, M_WAITOK); 408 cs->sc_maxiosize = MAXPHYS; 409 410 lockinit(&cs->sc_lock, "ccdlck", 0, 0); 411 ccdlock(cs); 412 413 /* 414 * Verify that each component piece exists and record 415 * relevant information about it. 416 */ 417 maxsecsize = 0; 418 minsize = 0; 419 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 420 vp = ccd->ccd_vpp[ix]; 421 ci = &cs->sc_cinfo[ix]; 422 ci->ci_vp = vp; 423 424 /* 425 * Copy in the pathname of the component. 426 */ 427 bzero(tmppath, sizeof(tmppath)); /* sanity */ 428 if ((error = copyinstr(cpaths[ix], tmppath, 429 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 430 #ifdef DEBUG 431 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 432 kprintf("ccd%d: can't copy path, error = %d\n", 433 ccd->ccd_unit, error); 434 #endif 435 goto fail; 436 } 437 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 438 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 439 440 ci->ci_dev = vn_todev(vp); 441 if (ci->ci_dev->si_iosize_max && 442 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 443 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 444 } 445 446 /* 447 * Get partition information for the component. 448 */ 449 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, 450 cred, NULL); 451 if (error) { 452 #ifdef DEBUG 453 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 454 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 455 ccd->ccd_unit, ci->ci_path, error); 456 #endif 457 goto fail; 458 } 459 if (dpart.fstype != FS_CCD && 460 !kuuid_is_ccd(&dpart.fstype_uuid)) { 461 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 462 ccd->ccd_unit, ci->ci_path); 463 error = EFTYPE; 464 goto fail; 465 } 466 if (maxsecsize < dpart.media_blksize) 467 maxsecsize = dpart.media_blksize; 468 469 /* 470 * Skip a certain amount of storage at the beginning of 471 * the component to make sure we don't infringe on any 472 * reserved sectors. This is handled entirely by 473 * dpart.reserved_blocks but we also impose a minimum 474 * of 16 sectors for backwards compatibility. 475 */ 476 skip = 16; 477 if (skip < dpart.reserved_blocks) 478 skip = dpart.reserved_blocks; 479 size = dpart.media_blocks - skip; 480 481 /* 482 * Calculate the size, truncating to an interleave 483 * boundary if necessary. 484 */ 485 if (cs->sc_ileave > 1) 486 size -= size % cs->sc_ileave; 487 488 if ((int64_t)size <= 0) { 489 #ifdef DEBUG 490 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 491 kprintf("ccd%d: %s: size == 0\n", 492 ccd->ccd_unit, ci->ci_path); 493 #endif 494 error = ENODEV; 495 goto fail; 496 } 497 498 /* 499 * Calculate the smallest uniform component, used 500 * elsewhere. 501 */ 502 if (minsize == 0 || minsize > size) 503 minsize = size; 504 ci->ci_skip = skip; 505 ci->ci_size = size; 506 cs->sc_size += size; 507 } 508 kprintf("ccd%d: max component iosize is %d total blocks %lld\n", 509 cs->sc_unit, cs->sc_maxiosize, (long long)cs->sc_size); 510 511 /* 512 * Don't allow the interleave to be smaller than 513 * the biggest component sector. 514 */ 515 if ((cs->sc_ileave > 0) && 516 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 517 #ifdef DEBUG 518 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 519 kprintf("ccd%d: interleave must be at least %d\n", 520 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 521 #endif 522 error = EINVAL; 523 goto fail; 524 } 525 526 /* 527 * If uniform interleave is desired set all sizes to that of 528 * the smallest component. This will guarentee that a single 529 * interleave table is generated. 530 * 531 * Lost space must be taken into account when calculating the 532 * overall size. Half the space is lost when CCDF_MIRROR is 533 * specified. One disk is lost when CCDF_PARITY is specified. 534 */ 535 if (ccd->ccd_flags & CCDF_UNIFORM) { 536 for (ci = cs->sc_cinfo; 537 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 538 ci->ci_size = minsize; 539 } 540 if (ccd->ccd_flags & CCDF_MIRROR) { 541 /* 542 * Check to see if an even number of components 543 * have been specified. The interleave must also 544 * be non-zero in order for us to be able to 545 * guarentee the topology. 546 */ 547 if (cs->sc_nccdisks % 2) { 548 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 549 error = EINVAL; 550 goto fail; 551 } 552 if (cs->sc_ileave == 0) { 553 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 554 error = EINVAL; 555 goto fail; 556 } 557 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 558 } else if (ccd->ccd_flags & CCDF_PARITY) { 559 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 560 } else { 561 if (cs->sc_ileave == 0) { 562 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 563 error = EINVAL; 564 goto fail; 565 } 566 cs->sc_size = cs->sc_nccdisks * minsize; 567 } 568 } 569 570 /* 571 * Construct the interleave table. 572 */ 573 ccdinterleave(cs, ccd->ccd_unit); 574 575 /* 576 * Create pseudo-geometry based on 1MB cylinders. It's 577 * pretty close. 578 */ 579 ccg->ccg_secsize = maxsecsize; 580 ccg->ccg_ntracks = 1; 581 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 582 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 583 584 /* 585 * Add an devstat entry for this device. 586 */ 587 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 588 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 589 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 590 DEVSTAT_PRIORITY_ARRAY); 591 592 cs->sc_flags |= CCDF_INITED; 593 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 594 cs->sc_unit = ccd->ccd_unit; 595 return (0); 596 fail: 597 while (ci > cs->sc_cinfo) { 598 ci--; 599 kfree(ci->ci_path, M_DEVBUF); 600 } 601 kfree(cs->sc_cinfo, M_DEVBUF); 602 cs->sc_cinfo = NULL; 603 return (error); 604 } 605 606 static void 607 ccdinterleave(struct ccd_softc *cs, int unit) 608 { 609 struct ccdcinfo *ci, *smallci; 610 struct ccdiinfo *ii; 611 u_int64_t bn; 612 u_int64_t lbn; 613 u_int64_t size; 614 int icount; 615 int ix; 616 617 #ifdef DEBUG 618 if (ccddebug & CCDB_INIT) 619 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 620 #endif 621 622 /* 623 * Allocate an interleave table. The worst case occurs when each 624 * of N disks is of a different size, resulting in N interleave 625 * tables. 626 * 627 * Chances are this is too big, but we don't care. 628 */ 629 icount = cs->sc_nccdisks + 1; 630 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 631 M_DEVBUF, M_WAITOK|M_ZERO); 632 633 /* 634 * Trivial case: no interleave (actually interleave of disk size). 635 * Each table entry represents a single component in its entirety. 636 * 637 * An interleave of 0 may not be used with a mirror or parity setup. 638 */ 639 if (cs->sc_ileave == 0) { 640 bn = 0; 641 ii = cs->sc_itable; 642 643 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 644 /* Allocate space for ii_index. */ 645 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 646 ii->ii_ndisk = 1; 647 ii->ii_startblk = bn; 648 ii->ii_startoff = 0; 649 ii->ii_index[0] = ix; 650 bn += cs->sc_cinfo[ix].ci_size; 651 ii++; 652 } 653 ii->ii_ndisk = 0; 654 #ifdef DEBUG 655 if (ccddebug & CCDB_INIT) 656 printiinfo(cs->sc_itable); 657 #endif 658 return; 659 } 660 661 /* 662 * The following isn't fast or pretty; it doesn't have to be. 663 */ 664 size = 0; 665 bn = lbn = 0; 666 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 667 /* 668 * Allocate space for ii_index. We might allocate more then 669 * we use. 670 */ 671 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 672 M_DEVBUF, M_WAITOK); 673 674 /* 675 * Locate the smallest of the remaining components 676 */ 677 smallci = NULL; 678 ci = cs->sc_cinfo; 679 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 680 if (ci->ci_size > size && 681 (smallci == NULL || 682 ci->ci_size < smallci->ci_size)) { 683 smallci = ci; 684 } 685 ++ci; 686 } 687 688 /* 689 * Nobody left, all done 690 */ 691 if (smallci == NULL) { 692 ii->ii_ndisk = 0; 693 break; 694 } 695 696 /* 697 * Record starting logical block using an sc_ileave blocksize. 698 */ 699 ii->ii_startblk = bn / cs->sc_ileave; 700 701 /* 702 * Record starting component block using an sc_ileave 703 * blocksize. This value is relative to the beginning of 704 * a component disk. 705 */ 706 ii->ii_startoff = lbn; 707 708 /* 709 * Determine how many disks take part in this interleave 710 * and record their indices. 711 */ 712 ix = 0; 713 for (ci = cs->sc_cinfo; 714 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 715 if (ci->ci_size >= smallci->ci_size) { 716 ii->ii_index[ix++] = ci - cs->sc_cinfo; 717 } 718 } 719 ii->ii_ndisk = ix; 720 721 /* 722 * Adjust for loop 723 */ 724 bn += ix * (smallci->ci_size - size); 725 lbn = smallci->ci_size / cs->sc_ileave; 726 size = smallci->ci_size; 727 } 728 if (ii == &cs->sc_itable[icount]) 729 panic("ccdinterlave software bug! table exhausted"); 730 #ifdef DEBUG 731 if (ccddebug & CCDB_INIT) 732 printiinfo(cs->sc_itable); 733 #endif 734 } 735 736 /* ARGSUSED */ 737 static int 738 ccdopen(struct dev_open_args *ap) 739 { 740 cdev_t dev = ap->a_head.a_dev; 741 int unit = ccdunit(dev); 742 struct ccd_softc *cs; 743 int error = 0; 744 745 #ifdef DEBUG 746 if (ccddebug & CCDB_FOLLOW) 747 kprintf("ccdopen(%x, %x)\n", dev, flags); 748 #endif 749 if (unit >= numccd) 750 return (ENXIO); 751 cs = &ccd_softc[unit]; 752 753 if ((error = ccdlock(cs)) == 0) { 754 ccdunlock(cs); 755 } 756 return (error); 757 } 758 759 /* ARGSUSED */ 760 static int 761 ccdclose(struct dev_close_args *ap) 762 { 763 cdev_t dev = ap->a_head.a_dev; 764 int unit = ccdunit(dev); 765 struct ccd_softc *cs; 766 int error = 0; 767 768 #ifdef DEBUG 769 if (ccddebug & CCDB_FOLLOW) 770 kprintf("ccdclose(%x, %x)\n", dev, flags); 771 #endif 772 773 if (unit >= numccd) 774 return (ENXIO); 775 cs = &ccd_softc[unit]; 776 if ((error = ccdlock(cs)) == 0) { 777 ccdunlock(cs); 778 } 779 return (error); 780 } 781 782 static int 783 ccdstrategy(struct dev_strategy_args *ap) 784 { 785 cdev_t dev = ap->a_head.a_dev; 786 struct bio *bio = ap->a_bio; 787 int unit = ccdunit(dev); 788 struct bio *nbio; 789 struct buf *bp = bio->bio_buf; 790 struct ccd_softc *cs = &ccd_softc[unit]; 791 u_int64_t pbn; /* in sc_secsize chunks */ 792 u_int32_t sz; /* in sc_secsize chunks */ 793 794 #ifdef DEBUG 795 if (ccddebug & CCDB_FOLLOW) 796 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 797 #endif 798 if ((cs->sc_flags & CCDF_INITED) == 0) { 799 bp->b_error = ENXIO; 800 goto error; 801 } 802 803 /* If it's a nil transfer, wake up the top half now. */ 804 if (bp->b_bcount == 0) { 805 bp->b_resid = 0; 806 goto done; 807 } 808 809 /* 810 * Do bounds checking and adjust transfer. If there's an 811 * error, the bounds check will flag that for us. 812 */ 813 814 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 815 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 816 817 /* 818 * If out of bounds return an error. If the request goes 819 * past EOF, clip the request as appropriate. If exactly 820 * at EOF, return success (don't clip), but with 0 bytes 821 * of I/O. 822 * 823 * Mark EOF B_INVAL (just like bad), indicating that the 824 * contents of the buffer, if any, is invalid. 825 */ 826 if ((int64_t)pbn < 0) 827 goto bad; 828 if (pbn + sz > cs->sc_size) { 829 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 830 goto bad; 831 if (pbn == cs->sc_size) { 832 bp->b_resid = bp->b_bcount; 833 bp->b_flags |= B_INVAL; 834 goto done; 835 } 836 sz = (long)(cs->sc_size - pbn); 837 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 838 } 839 nbio = bio; 840 841 bp->b_resid = bp->b_bcount; 842 nbio->bio_driver_info = dev; 843 844 /* 845 * "Start" the unit. 846 */ 847 ccdstart(cs, nbio); 848 return(0); 849 850 /* 851 * note: bio, not nbio, is valid at the done label. 852 */ 853 bad: 854 bp->b_error = EINVAL; 855 error: 856 bp->b_resid = bp->b_bcount; 857 bp->b_flags |= B_ERROR | B_INVAL; 858 done: 859 biodone(bio); 860 return(0); 861 } 862 863 static void 864 ccdstart(struct ccd_softc *cs, struct bio *bio) 865 { 866 long bcount, rcount; 867 struct ccdbuf *cbp[4]; 868 struct buf *bp = bio->bio_buf; 869 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 870 caddr_t addr; 871 off_t doffset; 872 873 #ifdef DEBUG 874 if (ccddebug & CCDB_FOLLOW) 875 kprintf("ccdstart(%x, %x)\n", cs, bp); 876 #endif 877 878 /* Record the transaction start */ 879 devstat_start_transaction(&cs->device_stats); 880 881 /* 882 * Allocate component buffers and fire off the requests 883 */ 884 doffset = bio->bio_offset; 885 addr = bp->b_data; 886 887 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 888 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 889 rcount = cbp[0]->cb_buf.b_bcount; 890 891 if (cs->sc_cflags & CCDF_MIRROR) { 892 /* 893 * Mirroring. Writes go to both disks, reads are 894 * taken from whichever disk seems most appropriate. 895 * 896 * We attempt to localize reads to the disk whos arm 897 * is nearest the read request. We ignore seeks due 898 * to writes when making this determination and we 899 * also try to avoid hogging. 900 */ 901 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 902 vn_strategy(cbp[0]->cb_vp, 903 &cbp[0]->cb_buf.b_bio1); 904 vn_strategy(cbp[1]->cb_vp, 905 &cbp[1]->cb_buf.b_bio1); 906 } else { 907 int pick = cs->sc_pick; 908 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 909 if (doffset < cs->sc_blk[pick] - range || 910 doffset > cs->sc_blk[pick] + range 911 ) { 912 cs->sc_pick = pick = 1 - pick; 913 } 914 cs->sc_blk[pick] = doffset + rcount; 915 vn_strategy(cbp[pick]->cb_vp, 916 &cbp[pick]->cb_buf.b_bio1); 917 } 918 } else { 919 /* 920 * Not mirroring 921 */ 922 vn_strategy(cbp[0]->cb_vp, 923 &cbp[0]->cb_buf.b_bio1); 924 } 925 doffset += rcount; 926 addr += rcount; 927 } 928 } 929 930 /* 931 * Build a component buffer header. 932 */ 933 static void 934 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 935 off_t doffset, caddr_t addr, long bcount) 936 { 937 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 938 struct ccdbuf *cbp; 939 u_int64_t bn; 940 u_int64_t cbn; 941 u_int64_t cboff; 942 off_t cbc; 943 944 #ifdef DEBUG 945 if (ccddebug & CCDB_IO) 946 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 947 cs, bp, bn, addr, bcount); 948 #endif 949 /* 950 * Determine which component bn falls in. 951 */ 952 bn = doffset / cs->sc_geom.ccg_secsize; 953 cbn = bn; 954 cboff = 0; 955 956 if (cs->sc_ileave == 0) { 957 /* 958 * Serially concatenated and neither a mirror nor a parity 959 * config. This is a special case. 960 */ 961 daddr_t sblk; 962 963 sblk = 0; 964 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 965 sblk += ci->ci_size; 966 cbn -= sblk; 967 } else { 968 struct ccdiinfo *ii; 969 int ccdisk, off; 970 971 /* 972 * Calculate cbn, the logical superblock (sc_ileave chunks), 973 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 974 * to cbn. 975 */ 976 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 977 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 978 979 /* 980 * Figure out which interleave table to use. 981 */ 982 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 983 if (ii->ii_startblk > cbn) 984 break; 985 } 986 ii--; 987 988 /* 989 * off is the logical superblock relative to the beginning 990 * of this interleave block. 991 */ 992 off = cbn - ii->ii_startblk; 993 994 /* 995 * We must calculate which disk component to use (ccdisk), 996 * and recalculate cbn to be the superblock relative to 997 * the beginning of the component. This is typically done by 998 * adding 'off' and ii->ii_startoff together. However, 'off' 999 * must typically be divided by the number of components in 1000 * this interleave array to be properly convert it from a 1001 * CCD-relative logical superblock number to a 1002 * component-relative superblock number. 1003 */ 1004 if (ii->ii_ndisk == 1) { 1005 /* 1006 * When we have just one disk, it can't be a mirror 1007 * or a parity config. 1008 */ 1009 ccdisk = ii->ii_index[0]; 1010 cbn = ii->ii_startoff + off; 1011 } else { 1012 if (cs->sc_cflags & CCDF_MIRROR) { 1013 /* 1014 * We have forced a uniform mapping, resulting 1015 * in a single interleave array. We double 1016 * up on the first half of the available 1017 * components and our mirror is in the second 1018 * half. This only works with a single 1019 * interleave array because doubling up 1020 * doubles the number of sectors, so there 1021 * cannot be another interleave array because 1022 * the next interleave array's calculations 1023 * would be off. 1024 */ 1025 int ndisk2 = ii->ii_ndisk / 2; 1026 ccdisk = ii->ii_index[off % ndisk2]; 1027 cbn = ii->ii_startoff + off / ndisk2; 1028 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1029 } else if (cs->sc_cflags & CCDF_PARITY) { 1030 /* 1031 * XXX not implemented yet 1032 */ 1033 int ndisk2 = ii->ii_ndisk - 1; 1034 ccdisk = ii->ii_index[off % ndisk2]; 1035 cbn = ii->ii_startoff + off / ndisk2; 1036 if (cbn % ii->ii_ndisk <= ccdisk) 1037 ccdisk++; 1038 } else { 1039 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1040 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1041 } 1042 } 1043 1044 ci = &cs->sc_cinfo[ccdisk]; 1045 1046 /* 1047 * Convert cbn from a superblock to a normal block so it 1048 * can be used to calculate (along with cboff) the normal 1049 * block index into this particular disk. 1050 */ 1051 cbn *= cs->sc_ileave; 1052 } 1053 1054 /* 1055 * Fill in the component buf structure. 1056 * 1057 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1058 * will be truncated on device EOF so we use b_bufsize to detect 1059 * the case. 1060 */ 1061 cbp = getccdbuf(); 1062 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1063 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1064 cbp->cb_buf.b_data = addr; 1065 cbp->cb_vp = ci->ci_vp; 1066 if (cs->sc_ileave == 0) 1067 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1068 else 1069 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1070 if (cbc > cs->sc_maxiosize) 1071 cbc = cs->sc_maxiosize; 1072 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1073 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1074 1075 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1076 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1077 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1078 1079 /* 1080 * context for ccdiodone 1081 */ 1082 cbp->cb_obio = bio; 1083 cbp->cb_unit = cs - ccd_softc; 1084 cbp->cb_comp = ci - cs->sc_cinfo; 1085 1086 #ifdef DEBUG 1087 if (ccddebug & CCDB_IO) 1088 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1089 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1090 cbp->cb_buf.b_bio1.bio_offset, 1091 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1092 #endif 1093 cb[0] = cbp; 1094 1095 /* 1096 * Note: both I/O's setup when reading from mirror, but only one 1097 * will be executed. 1098 */ 1099 if (cs->sc_cflags & CCDF_MIRROR) { 1100 /* mirror, setup second I/O */ 1101 cbp = getccdbuf(); 1102 1103 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1104 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1105 cbp->cb_buf.b_data = addr; 1106 cbp->cb_vp = ci2->ci_vp; 1107 if (cs->sc_ileave == 0) 1108 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1109 else 1110 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1111 if (cbc > cs->sc_maxiosize) 1112 cbc = cs->sc_maxiosize; 1113 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1114 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1115 1116 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1117 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1118 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1119 1120 /* 1121 * context for ccdiodone 1122 */ 1123 cbp->cb_obio = bio; 1124 cbp->cb_unit = cs - ccd_softc; 1125 cbp->cb_comp = ci2 - cs->sc_cinfo; 1126 cb[1] = cbp; 1127 /* link together the ccdbuf's and clear "mirror done" flag */ 1128 cb[0]->cb_mirror = cb[1]; 1129 cb[1]->cb_mirror = cb[0]; 1130 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1131 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1132 } 1133 } 1134 1135 static void 1136 ccdintr(struct ccd_softc *cs, struct bio *bio) 1137 { 1138 struct buf *bp = bio->bio_buf; 1139 1140 #ifdef DEBUG 1141 if (ccddebug & CCDB_FOLLOW) 1142 kprintf("ccdintr(%x, %x)\n", cs, bp); 1143 #endif 1144 /* 1145 * Request is done for better or worse, wakeup the top half. 1146 */ 1147 if (bp->b_flags & B_ERROR) 1148 bp->b_resid = bp->b_bcount; 1149 devstat_end_transaction_buf(&cs->device_stats, bp); 1150 biodone(bio); 1151 } 1152 1153 /* 1154 * Called at interrupt time. 1155 * 1156 * Mark the component as done and if all components are done, 1157 * take a ccd interrupt. 1158 */ 1159 static void 1160 ccdiodone(struct bio *bio) 1161 { 1162 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1163 struct bio *obio = cbp->cb_obio; 1164 struct buf *obp = obio->bio_buf; 1165 int unit = cbp->cb_unit; 1166 struct ccd_softc *sc = &ccd_softc[unit]; 1167 int count; 1168 1169 /* 1170 * Since we do not have exclusive access to underlying devices, 1171 * we can't keep cache translations around. 1172 */ 1173 clearbiocache(bio->bio_next); 1174 1175 ccdlock(sc); 1176 1177 #ifdef DEBUG 1178 if (ccddebug & CCDB_FOLLOW) 1179 kprintf("ccdiodone(%x)\n", cbp); 1180 if (ccddebug & CCDB_IO) { 1181 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1182 obp, obp->b_bcount, obp->b_resid); 1183 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1184 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1185 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1186 cbp->cb_buf.b_bcount); 1187 } 1188 #endif 1189 1190 /* 1191 * If an error occured, report it. If this is a mirrored 1192 * configuration and the first of two possible reads, do not 1193 * set the error in the bp yet because the second read may 1194 * succeed. 1195 */ 1196 if (cbp->cb_buf.b_flags & B_ERROR) { 1197 const char *msg = ""; 1198 1199 if ((sc->sc_cflags & CCDF_MIRROR) && 1200 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1201 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1202 /* 1203 * We will try our read on the other disk down 1204 * below, also reverse the default pick so if we 1205 * are doing a scan we do not keep hitting the 1206 * bad disk first. 1207 */ 1208 msg = ", trying other disk"; 1209 sc->sc_pick = 1 - sc->sc_pick; 1210 sc->sc_blk[sc->sc_pick] = obio->bio_offset; 1211 } else { 1212 obp->b_flags |= B_ERROR; 1213 obp->b_error = cbp->cb_buf.b_error ? 1214 cbp->cb_buf.b_error : EIO; 1215 } 1216 kprintf("ccd%d: error %d on component %d " 1217 "offset %jd (ccd offset %jd)%s\n", 1218 unit, obp->b_error, cbp->cb_comp, 1219 (intmax_t)cbp->cb_buf.b_bio2.bio_offset, 1220 (intmax_t)obio->bio_offset, 1221 msg); 1222 } 1223 1224 /* 1225 * Process mirror. If we are writing, I/O has been initiated on both 1226 * buffers and we fall through only after both are finished. 1227 * 1228 * If we are reading only one I/O is initiated at a time. If an 1229 * error occurs we initiate the second I/O and return, otherwise 1230 * we free the second I/O without initiating it. 1231 */ 1232 1233 if (sc->sc_cflags & CCDF_MIRROR) { 1234 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1235 /* 1236 * When writing, handshake with the second buffer 1237 * to determine when both are done. If both are not 1238 * done, return here. 1239 */ 1240 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1241 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1242 putccdbuf(cbp); 1243 ccdunlock(sc); 1244 return; 1245 } 1246 } else { 1247 /* 1248 * When reading, either dispose of the second buffer 1249 * or initiate I/O on the second buffer if an error 1250 * occured with this one. 1251 */ 1252 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1253 if (cbp->cb_buf.b_flags & B_ERROR) { 1254 cbp->cb_mirror->cb_pflags |= 1255 CCDPF_MIRROR_DONE; 1256 vn_strategy( 1257 cbp->cb_mirror->cb_vp, 1258 &cbp->cb_mirror->cb_buf.b_bio1 1259 ); 1260 putccdbuf(cbp); 1261 ccdunlock(sc); 1262 return; 1263 } else { 1264 putccdbuf(cbp->cb_mirror); 1265 /* fall through */ 1266 } 1267 } 1268 } 1269 } 1270 1271 /* 1272 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1273 */ 1274 count = cbp->cb_buf.b_bufsize; 1275 putccdbuf(cbp); 1276 1277 /* 1278 * If all done, "interrupt". 1279 */ 1280 obp->b_resid -= count; 1281 if (obp->b_resid < 0) 1282 panic("ccdiodone: count"); 1283 1284 ccdunlock(sc); 1285 1286 if (obp->b_resid == 0) 1287 ccdintr(sc, obio); 1288 } 1289 1290 static int 1291 ccdioctl(struct dev_ioctl_args *ap) 1292 { 1293 cdev_t dev = ap->a_head.a_dev; 1294 int unit = ccdunit(dev); 1295 int i, j, lookedup = 0, error = 0; 1296 struct ccd_softc *cs; 1297 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1298 struct ccddevice ccd; 1299 struct disk_info info; 1300 char **cpp; 1301 struct vnode **vpp; 1302 1303 if (unit >= numccd) 1304 return (ENXIO); 1305 cs = &ccd_softc[unit]; 1306 1307 bzero(&ccd, sizeof(ccd)); 1308 1309 switch (ap->a_cmd) { 1310 case CCDIOCSET: 1311 if (cs->sc_flags & CCDF_INITED) 1312 return (EBUSY); 1313 1314 if ((ap->a_fflag & FWRITE) == 0) 1315 return (EBADF); 1316 1317 if ((error = ccdlock(cs)) != 0) 1318 return (error); 1319 1320 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1321 ccdunlock(cs); 1322 return (EINVAL); 1323 } 1324 1325 /* Fill in some important bits. */ 1326 ccd.ccd_unit = unit; 1327 ccd.ccd_interleave = ccio->ccio_ileave; 1328 if (ccd.ccd_interleave == 0 && 1329 ((ccio->ccio_flags & CCDF_MIRROR) || 1330 (ccio->ccio_flags & CCDF_PARITY))) { 1331 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1332 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1333 } 1334 if ((ccio->ccio_flags & CCDF_MIRROR) && 1335 (ccio->ccio_flags & CCDF_PARITY)) { 1336 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1337 ccio->ccio_flags &= ~CCDF_PARITY; 1338 } 1339 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1340 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1341 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1342 unit); 1343 ccio->ccio_flags |= CCDF_UNIFORM; 1344 } 1345 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1346 1347 /* 1348 * Allocate space for and copy in the array of 1349 * componet pathnames and device numbers. 1350 */ 1351 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1352 M_DEVBUF, M_WAITOK); 1353 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1354 M_DEVBUF, M_WAITOK); 1355 1356 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1357 ccio->ccio_ndisks * sizeof(char **)); 1358 if (error) { 1359 kfree(vpp, M_DEVBUF); 1360 kfree(cpp, M_DEVBUF); 1361 ccdunlock(cs); 1362 return (error); 1363 } 1364 1365 #ifdef DEBUG 1366 if (ccddebug & CCDB_INIT) { 1367 for (i = 0; i < ccio->ccio_ndisks; ++i) 1368 kprintf("ccdioctl: component %d: 0x%x\n", 1369 i, cpp[i]); 1370 } 1371 #endif 1372 1373 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1374 #ifdef DEBUG 1375 if (ccddebug & CCDB_INIT) 1376 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1377 #endif 1378 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1379 for (j = 0; j < lookedup; ++j) 1380 (void)vn_close(vpp[j], FREAD|FWRITE, NULL); 1381 kfree(vpp, M_DEVBUF); 1382 kfree(cpp, M_DEVBUF); 1383 ccdunlock(cs); 1384 return (error); 1385 } 1386 ++lookedup; 1387 } 1388 ccd.ccd_cpp = cpp; 1389 ccd.ccd_vpp = vpp; 1390 ccd.ccd_ndev = ccio->ccio_ndisks; 1391 1392 /* 1393 * Initialize the ccd. Fills in the softc for us. 1394 */ 1395 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1396 for (j = 0; j < lookedup; ++j) 1397 vn_close(vpp[j], FREAD|FWRITE, NULL); 1398 kfree(vpp, M_DEVBUF); 1399 kfree(cpp, M_DEVBUF); 1400 ccdunlock(cs); 1401 return (error); 1402 } 1403 1404 /* 1405 * The ccd has been successfully initialized, so 1406 * we can place it into the array and read the disklabel. 1407 */ 1408 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1409 ccio->ccio_unit = unit; 1410 ccio->ccio_size = cs->sc_size; 1411 1412 bzero(&info, sizeof(info)); 1413 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1414 info.d_media_blocks = cs->sc_size; 1415 info.d_nheads = cs->sc_geom.ccg_ntracks; 1416 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1417 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1418 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1419 1420 /* 1421 * For cases where a label is directly applied to the ccd, 1422 * without slices, DSO_COMPATMBR forces one sector be 1423 * reserved for backwards compatibility. 1424 */ 1425 info.d_dsflags = DSO_COMPATMBR; 1426 disk_setdiskinfo(&cs->sc_disk, &info); 1427 1428 ccdunlock(cs); 1429 1430 break; 1431 1432 case CCDIOCCLR: 1433 if ((cs->sc_flags & CCDF_INITED) == 0) 1434 return (ENXIO); 1435 1436 if ((ap->a_fflag & FWRITE) == 0) 1437 return (EBADF); 1438 1439 if ((error = ccdlock(cs)) != 0) 1440 return (error); 1441 1442 if (dev_drefs(cs->sc_dev) > 1) { 1443 ccdunlock(cs); 1444 return (EBUSY); 1445 } 1446 1447 /* 1448 * Free ccd_softc information and clear entry. 1449 */ 1450 1451 /* Close the components and free their pathnames. */ 1452 for (i = 0; i < cs->sc_nccdisks; ++i) { 1453 /* 1454 * XXX: this close could potentially fail and 1455 * cause Bad Things. Maybe we need to force 1456 * the close to happen? 1457 */ 1458 #ifdef DEBUG 1459 if (ccddebug & CCDB_VNODE) 1460 vprint("CCDIOCCLR: vnode info", 1461 cs->sc_cinfo[i].ci_vp); 1462 #endif 1463 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, NULL); 1464 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1465 } 1466 1467 /* Free interleave index. */ 1468 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1469 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1470 1471 /* Free component info and interleave table. */ 1472 kfree(cs->sc_cinfo, M_DEVBUF); 1473 kfree(cs->sc_itable, M_DEVBUF); 1474 cs->sc_cinfo = NULL; 1475 cs->sc_itable = NULL; 1476 cs->sc_flags &= ~CCDF_INITED; 1477 1478 /* 1479 * Free ccddevice information and clear entry. 1480 */ 1481 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1482 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1483 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1484 1485 /* 1486 * And remove the devstat entry. 1487 */ 1488 devstat_remove_entry(&cs->device_stats); 1489 1490 ccdunlock(cs); 1491 1492 break; 1493 1494 default: 1495 return (ENOTTY); 1496 } 1497 1498 return (0); 1499 } 1500 1501 static int 1502 ccddump(struct dev_dump_args *ap) 1503 { 1504 /* Not implemented. */ 1505 return ENXIO; 1506 } 1507 1508 /* 1509 * Lookup the provided name in the filesystem. If the file exists, 1510 * is a valid block device, and isn't being used by anyone else, 1511 * set *vpp to the file's vnode. 1512 */ 1513 static int 1514 ccdlookup(char *path, struct vnode **vpp) 1515 { 1516 struct nlookupdata nd; 1517 struct vnode *vp; 1518 int error; 1519 1520 *vpp = NULL; 1521 1522 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1523 if (error) 1524 return (error); 1525 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1526 #ifdef DEBUG 1527 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 1528 kprintf("ccdlookup: vn_open error = %d\n", error); 1529 #endif 1530 goto done; 1531 } 1532 vp = nd.nl_open_vp; 1533 1534 if (vp->v_opencount > 1) { 1535 error = EBUSY; 1536 goto done; 1537 } 1538 1539 if (!vn_isdisk(vp, &error)) 1540 goto done; 1541 1542 #ifdef DEBUG 1543 if (ccddebug & CCDB_VNODE) 1544 vprint("ccdlookup: vnode info", vp); 1545 #endif 1546 1547 vn_unlock(vp); 1548 nd.nl_open_vp = NULL; 1549 nlookup_done(&nd); 1550 *vpp = vp; /* leave ref intact */ 1551 return (0); 1552 done: 1553 nlookup_done(&nd); 1554 return (error); 1555 } 1556 1557 /* 1558 * Wait interruptibly for an exclusive lock. 1559 */ 1560 static int 1561 ccdlock(struct ccd_softc *cs) 1562 { 1563 lockmgr(&cs->sc_lock, LK_EXCLUSIVE); 1564 1565 return (0); 1566 } 1567 1568 /* 1569 * Unlock and wake up any waiters. 1570 */ 1571 static void 1572 ccdunlock(struct ccd_softc *cs) 1573 { 1574 lockmgr(&cs->sc_lock, LK_RELEASE); 1575 } 1576 1577 #ifdef DEBUG 1578 static void 1579 printiinfo(struct ccdiinfo *ii) 1580 { 1581 int ix, i; 1582 1583 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1584 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1585 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1586 for (i = 0; i < ii->ii_ndisk; i++) 1587 kprintf(" %d", ii->ii_index[i]); 1588 kprintf("\n"); 1589 } 1590 } 1591 #endif 1592