1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 35 */ 36 /* 37 * Copyright (c) 1995 Jason R. Thorpe. 38 * All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed for the NetBSD Project 51 * by Jason R. Thorpe. 52 * 4. The name of the author may not be used to endorse or promote products 53 * derived from this software without specific prior written permission. 54 * 55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 56 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 57 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 58 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 59 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 60 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 61 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 62 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 63 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 64 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 65 * SUCH DAMAGE. 66 */ 67 68 /* 69 * Copyright (c) 1988 University of Utah. 70 * Copyright (c) 1990, 1993 71 * The Regents of the University of California. All rights reserved. 72 * 73 * This code is derived from software contributed to Berkeley by 74 * the Systems Programming Group of the University of Utah Computer 75 * Science Department. 76 * 77 * Redistribution and use in source and binary forms, with or without 78 * modification, are permitted provided that the following conditions 79 * are met: 80 * 1. Redistributions of source code must retain the above copyright 81 * notice, this list of conditions and the following disclaimer. 82 * 2. Redistributions in binary form must reproduce the above copyright 83 * notice, this list of conditions and the following disclaimer in the 84 * documentation and/or other materials provided with the distribution. 85 * 3. All advertising materials mentioning features or use of this software 86 * must display the following acknowledgement: 87 * This product includes software developed by the University of 88 * California, Berkeley and its contributors. 89 * 4. Neither the name of the University nor the names of its contributors 90 * may be used to endorse or promote products derived from this software 91 * without specific prior written permission. 92 * 93 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 94 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 95 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 96 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 97 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 98 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 99 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 100 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 101 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 102 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 103 * SUCH DAMAGE. 104 * 105 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 106 */ 107 /* 108 * @(#)cd.c 8.2 (Berkeley) 11/16/93 109 * $FreeBSD: src/sys/dev/ccd/ccd.c,v 1.73.2.1 2001/09/11 09:49:52 kris Exp $ 110 * $NetBSD: ccd.c,v 1.22 1995/12/08 19:13:26 thorpej Exp $ 111 * $DragonFly: src/sys/dev/disk/ccd/ccd.c,v 1.50 2007/11/06 03:50:02 dillon Exp $ 112 */ 113 114 /* 115 * "Concatenated" disk driver. 116 * 117 * Original dynamic configuration support by: 118 * Jason R. Thorpe <thorpej@nas.nasa.gov> 119 * Numerical Aerodynamic Simulation Facility 120 * Mail Stop 258-6 121 * NASA Ames Research Center 122 * Moffett Field, CA 94035 123 */ 124 125 #include "use_ccd.h" 126 127 #include <sys/param.h> 128 #include <sys/systm.h> 129 #include <sys/kernel.h> 130 #include <sys/module.h> 131 #include <sys/proc.h> 132 #include <sys/buf.h> 133 #include <sys/malloc.h> 134 #include <sys/nlookup.h> 135 #include <sys/conf.h> 136 #include <sys/stat.h> 137 #include <sys/sysctl.h> 138 #include <sys/disk.h> 139 #include <sys/dtype.h> 140 #include <sys/diskslice.h> 141 #include <sys/devicestat.h> 142 #include <sys/fcntl.h> 143 #include <sys/vnode.h> 144 #include <sys/buf2.h> 145 #include <sys/ccdvar.h> 146 147 #include <vm/vm_zone.h> 148 149 #include <vfs/ufs/dinode.h> /* XXX Used only for fs.h */ 150 #include <vfs/ufs/fs.h> /* XXX used only to get BBSIZE and SBSIZE */ 151 152 #include <sys/thread2.h> 153 154 #if defined(CCDDEBUG) && !defined(DEBUG) 155 #define DEBUG 156 #endif 157 158 #ifdef DEBUG 159 #define CCDB_FOLLOW 0x01 160 #define CCDB_INIT 0x02 161 #define CCDB_IO 0x04 162 #define CCDB_LABEL 0x08 163 #define CCDB_VNODE 0x10 164 static int ccddebug = CCDB_FOLLOW | CCDB_INIT | CCDB_IO | CCDB_LABEL | 165 CCDB_VNODE; 166 SYSCTL_INT(_debug, OID_AUTO, ccddebug, CTLFLAG_RW, &ccddebug, 0, ""); 167 #undef DEBUG 168 #endif 169 170 #define ccdunit(x) dkunit(x) 171 #define ccdpart(x) dkpart(x) 172 173 /* 174 This is how mirroring works (only writes are special): 175 176 When initiating a write, ccdbuffer() returns two "struct ccdbuf *"s 177 linked together by the cb_mirror field. "cb_pflags & 178 CCDPF_MIRROR_DONE" is set to 0 on both of them. 179 180 When a component returns to ccdiodone(), it checks if "cb_pflags & 181 CCDPF_MIRROR_DONE" is set or not. If not, it sets the partner's 182 flag and returns. If it is, it means its partner has already 183 returned, so it will go to the regular cleanup. 184 185 */ 186 187 struct ccdbuf { 188 struct buf cb_buf; /* new I/O buf */ 189 struct vnode *cb_vp; /* related vnode */ 190 struct bio *cb_obio; /* ptr. to original I/O buf */ 191 struct ccdbuf *cb_freenext; /* free list link */ 192 int cb_unit; /* target unit */ 193 int cb_comp; /* target component */ 194 int cb_pflags; /* mirror/parity status flag */ 195 struct ccdbuf *cb_mirror; /* mirror counterpart */ 196 }; 197 198 /* bits in cb_pflags */ 199 #define CCDPF_MIRROR_DONE 1 /* if set, mirror counterpart is done */ 200 201 static d_open_t ccdopen; 202 static d_close_t ccdclose; 203 static d_strategy_t ccdstrategy; 204 static d_ioctl_t ccdioctl; 205 static d_dump_t ccddump; 206 207 #define NCCDFREEHIWAT 16 208 209 #define CDEV_MAJOR 74 210 211 static struct dev_ops ccd_ops = { 212 { "ccd", CDEV_MAJOR, D_DISK }, 213 .d_open = ccdopen, 214 .d_close = ccdclose, 215 .d_read = physread, 216 .d_write = physwrite, 217 .d_ioctl = ccdioctl, 218 .d_strategy = ccdstrategy, 219 .d_dump = ccddump 220 }; 221 222 /* called during module initialization */ 223 static void ccdattach (void); 224 static int ccd_modevent (module_t, int, void *); 225 226 /* called by biodone() at interrupt time */ 227 static void ccdiodone (struct bio *bio); 228 229 static void ccdstart (struct ccd_softc *, struct bio *); 230 static void ccdinterleave (struct ccd_softc *, int); 231 static void ccdintr (struct ccd_softc *, struct bio *); 232 static int ccdinit (struct ccddevice *, char **, struct ucred *); 233 static int ccdlookup (char *, struct vnode **); 234 static void ccdbuffer (struct ccdbuf **ret, struct ccd_softc *, 235 struct bio *, off_t, caddr_t, long); 236 static int ccdlock (struct ccd_softc *); 237 static void ccdunlock (struct ccd_softc *); 238 239 #ifdef DEBUG 240 static void printiinfo (struct ccdiinfo *); 241 #endif 242 243 /* Non-private for the benefit of libkvm. */ 244 struct ccd_softc *ccd_softc; 245 struct ccddevice *ccddevs; 246 struct ccdbuf *ccdfreebufs; 247 static int numccdfreebufs; 248 static int numccd = 0; 249 250 /* 251 * getccdbuf() - Allocate and zero a ccd buffer. 252 * 253 * This routine is called at splbio(). 254 */ 255 256 static __inline 257 struct ccdbuf * 258 getccdbuf(void) 259 { 260 struct ccdbuf *cbp; 261 262 /* 263 * Allocate from freelist or malloc as necessary 264 */ 265 if ((cbp = ccdfreebufs) != NULL) { 266 ccdfreebufs = cbp->cb_freenext; 267 --numccdfreebufs; 268 reinitbufbio(&cbp->cb_buf); 269 } else { 270 cbp = kmalloc(sizeof(struct ccdbuf), M_DEVBUF, M_WAITOK|M_ZERO); 271 initbufbio(&cbp->cb_buf); 272 } 273 274 /* 275 * independant struct buf initialization 276 */ 277 buf_dep_init(&cbp->cb_buf); 278 BUF_LOCKINIT(&cbp->cb_buf); 279 BUF_LOCK(&cbp->cb_buf, LK_EXCLUSIVE); 280 BUF_KERNPROC(&cbp->cb_buf); 281 cbp->cb_buf.b_flags = B_PAGING | B_BNOCLIP; 282 283 return(cbp); 284 } 285 286 /* 287 * putccdbuf() - Free a ccd buffer. 288 * 289 * This routine is called at splbio(). 290 */ 291 292 static __inline 293 void 294 putccdbuf(struct ccdbuf *cbp) 295 { 296 BUF_UNLOCK(&cbp->cb_buf); 297 BUF_LOCKFREE(&cbp->cb_buf); 298 299 if (numccdfreebufs < NCCDFREEHIWAT) { 300 cbp->cb_freenext = ccdfreebufs; 301 ccdfreebufs = cbp; 302 ++numccdfreebufs; 303 } else { 304 kfree((caddr_t)cbp, M_DEVBUF); 305 } 306 } 307 308 /* 309 * Called by main() during pseudo-device attachment. All we need 310 * to do is allocate enough space for devices to be configured later, and 311 * add devsw entries. 312 */ 313 static void 314 ccdattach(void) 315 { 316 struct disk_info info; 317 struct ccd_softc *cs; 318 int i; 319 int num = NCCD; 320 321 if (num > 1) 322 kprintf("ccd0-%d: Concatenated disk drivers\n", num-1); 323 else 324 kprintf("ccd0: Concatenated disk driver\n"); 325 326 ccd_softc = kmalloc(num * sizeof(struct ccd_softc), M_DEVBUF, 327 M_WAITOK | M_ZERO); 328 ccddevs = kmalloc(num * sizeof(struct ccddevice), M_DEVBUF, 329 M_WAITOK | M_ZERO); 330 numccd = num; 331 332 /* 333 * With normal disk devices the open simply fails if the media 334 * is not present. With CCD we have to be able to open the 335 * raw disk to use the ioctl's to set it up, so create a dummy 336 * disk info structure so dscheck() doesn't blow up. 337 */ 338 bzero(&info, sizeof(info)); 339 info.d_media_blksize = DEV_BSIZE; 340 341 for (i = 0; i < numccd; ++i) { 342 cs = &ccd_softc[i]; 343 cs->sc_dev = disk_create(i, &cs->sc_disk, &ccd_ops); 344 cs->sc_dev->si_drv1 = cs; 345 cs->sc_dev->si_iosize_max = 256 * 512; /* XXX */ 346 disk_setdiskinfo(&cs->sc_disk, &info); 347 } 348 } 349 350 static int 351 ccd_modevent(module_t mod, int type, void *data) 352 { 353 int error = 0; 354 355 switch (type) { 356 case MOD_LOAD: 357 ccdattach(); 358 break; 359 360 case MOD_UNLOAD: 361 kprintf("ccd0: Unload not supported!\n"); 362 error = EOPNOTSUPP; 363 break; 364 365 default: /* MOD_SHUTDOWN etc */ 366 break; 367 } 368 return (error); 369 } 370 371 DEV_MODULE(ccd, ccd_modevent, NULL); 372 373 static int 374 ccdinit(struct ccddevice *ccd, char **cpaths, struct ucred *cred) 375 { 376 struct ccd_softc *cs = &ccd_softc[ccd->ccd_unit]; 377 struct ccdcinfo *ci = NULL; /* XXX */ 378 int ix; 379 struct vnode *vp; 380 u_int64_t skip; 381 u_int64_t size; 382 u_int64_t minsize; 383 int maxsecsize; 384 struct partinfo dpart; 385 struct ccdgeom *ccg = &cs->sc_geom; 386 char tmppath[MAXPATHLEN]; 387 int error = 0; 388 389 #ifdef DEBUG 390 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 391 kprintf("ccdinit: unit %d\n", ccd->ccd_unit); 392 #endif 393 394 cs->sc_size = 0; 395 cs->sc_ileave = ccd->ccd_interleave; 396 cs->sc_nccdisks = ccd->ccd_ndev; 397 398 /* Allocate space for the component info. */ 399 cs->sc_cinfo = kmalloc(cs->sc_nccdisks * sizeof(struct ccdcinfo), 400 M_DEVBUF, M_WAITOK); 401 cs->sc_maxiosize = MAXPHYS; 402 403 /* 404 * Verify that each component piece exists and record 405 * relevant information about it. 406 */ 407 maxsecsize = 0; 408 minsize = 0; 409 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 410 vp = ccd->ccd_vpp[ix]; 411 ci = &cs->sc_cinfo[ix]; 412 ci->ci_vp = vp; 413 414 /* 415 * Copy in the pathname of the component. 416 */ 417 bzero(tmppath, sizeof(tmppath)); /* sanity */ 418 if ((error = copyinstr(cpaths[ix], tmppath, 419 MAXPATHLEN, &ci->ci_pathlen)) != 0) { 420 #ifdef DEBUG 421 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 422 kprintf("ccd%d: can't copy path, error = %d\n", 423 ccd->ccd_unit, error); 424 #endif 425 goto fail; 426 } 427 ci->ci_path = kmalloc(ci->ci_pathlen, M_DEVBUF, M_WAITOK); 428 bcopy(tmppath, ci->ci_path, ci->ci_pathlen); 429 430 ci->ci_dev = vn_todev(vp); 431 if (ci->ci_dev->si_iosize_max && 432 cs->sc_maxiosize > ci->ci_dev->si_iosize_max) { 433 cs->sc_maxiosize = ci->ci_dev->si_iosize_max; 434 } 435 436 /* 437 * Get partition information for the component. 438 */ 439 error = VOP_IOCTL(vp, DIOCGPART, (caddr_t)&dpart, FREAD, cred); 440 if (error) { 441 #ifdef DEBUG 442 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 443 kprintf("ccd%d: %s: ioctl failed, error = %d\n", 444 ccd->ccd_unit, ci->ci_path, error); 445 #endif 446 goto fail; 447 } 448 if (dpart.fstype != FS_CCD && 449 !kuuid_is_ccd(&dpart.fstype_uuid)) { 450 kprintf("ccd%d: %s: filesystem type must be 'ccd'\n", 451 ccd->ccd_unit, ci->ci_path); 452 error = EFTYPE; 453 goto fail; 454 } 455 if (maxsecsize < dpart.media_blksize) 456 maxsecsize = dpart.media_blksize; 457 458 /* 459 * Skip a certain amount of storage at the beginning of 460 * the component to make sure we don't infringe on any 461 * reserved sectors. This is handled entirely by 462 * dpart.reserved_blocks but we also impose a minimum 463 * of 16 sectors for backwards compatibility. 464 */ 465 skip = 16; 466 if (skip < dpart.reserved_blocks) 467 skip = dpart.reserved_blocks; 468 size = dpart.media_blocks - skip; 469 470 /* 471 * Calculate the size, truncating to an interleave 472 * boundary if necessary. 473 */ 474 if (cs->sc_ileave > 1) 475 size -= size % cs->sc_ileave; 476 477 if ((int64_t)size <= 0) { 478 #ifdef DEBUG 479 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 480 kprintf("ccd%d: %s: size == 0\n", 481 ccd->ccd_unit, ci->ci_path); 482 #endif 483 error = ENODEV; 484 goto fail; 485 } 486 487 /* 488 * Calculate the smallest uniform component, used 489 * elsewhere. 490 */ 491 if (minsize == 0 || minsize > size) 492 minsize = size; 493 ci->ci_skip = skip; 494 ci->ci_size = size; 495 cs->sc_size += size; 496 } 497 kprintf("ccd%d: max component iosize is %d\n", 498 cs->sc_unit, cs->sc_maxiosize); 499 500 /* 501 * Don't allow the interleave to be smaller than 502 * the biggest component sector. 503 */ 504 if ((cs->sc_ileave > 0) && 505 (cs->sc_ileave % (maxsecsize / DEV_BSIZE))) { 506 #ifdef DEBUG 507 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 508 kprintf("ccd%d: interleave must be at least %d\n", 509 ccd->ccd_unit, (maxsecsize / DEV_BSIZE)); 510 #endif 511 error = EINVAL; 512 goto fail; 513 } 514 515 /* 516 * If uniform interleave is desired set all sizes to that of 517 * the smallest component. This will guarentee that a single 518 * interleave table is generated. 519 * 520 * Lost space must be taken into account when calculating the 521 * overall size. Half the space is lost when CCDF_MIRROR is 522 * specified. One disk is lost when CCDF_PARITY is specified. 523 */ 524 if (ccd->ccd_flags & CCDF_UNIFORM) { 525 for (ci = cs->sc_cinfo; 526 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 527 ci->ci_size = minsize; 528 } 529 if (ccd->ccd_flags & CCDF_MIRROR) { 530 /* 531 * Check to see if an even number of components 532 * have been specified. The interleave must also 533 * be non-zero in order for us to be able to 534 * guarentee the topology. 535 */ 536 if (cs->sc_nccdisks % 2) { 537 kprintf("ccd%d: mirroring requires an even number of disks\n", ccd->ccd_unit ); 538 error = EINVAL; 539 goto fail; 540 } 541 if (cs->sc_ileave == 0) { 542 kprintf("ccd%d: an interleave must be specified when mirroring\n", ccd->ccd_unit); 543 error = EINVAL; 544 goto fail; 545 } 546 cs->sc_size = (cs->sc_nccdisks/2) * minsize; 547 } else if (ccd->ccd_flags & CCDF_PARITY) { 548 cs->sc_size = (cs->sc_nccdisks-1) * minsize; 549 } else { 550 if (cs->sc_ileave == 0) { 551 kprintf("ccd%d: an interleave must be specified when using parity\n", ccd->ccd_unit); 552 error = EINVAL; 553 goto fail; 554 } 555 cs->sc_size = cs->sc_nccdisks * minsize; 556 } 557 } 558 559 /* 560 * Construct the interleave table. 561 */ 562 ccdinterleave(cs, ccd->ccd_unit); 563 564 /* 565 * Create pseudo-geometry based on 1MB cylinders. It's 566 * pretty close. 567 */ 568 ccg->ccg_secsize = maxsecsize; 569 ccg->ccg_ntracks = 1; 570 ccg->ccg_nsectors = 1024 * 1024 / ccg->ccg_secsize; 571 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 572 573 /* 574 * Add an devstat entry for this device. 575 */ 576 devstat_add_entry(&cs->device_stats, "ccd", ccd->ccd_unit, 577 ccg->ccg_secsize, DEVSTAT_ALL_SUPPORTED, 578 DEVSTAT_TYPE_STORARRAY |DEVSTAT_TYPE_IF_OTHER, 579 DEVSTAT_PRIORITY_ARRAY); 580 581 cs->sc_flags |= CCDF_INITED; 582 cs->sc_cflags = ccd->ccd_flags; /* So we can find out later... */ 583 cs->sc_unit = ccd->ccd_unit; 584 return (0); 585 fail: 586 while (ci > cs->sc_cinfo) { 587 ci--; 588 kfree(ci->ci_path, M_DEVBUF); 589 } 590 kfree(cs->sc_cinfo, M_DEVBUF); 591 cs->sc_cinfo = NULL; 592 return (error); 593 } 594 595 static void 596 ccdinterleave(struct ccd_softc *cs, int unit) 597 { 598 struct ccdcinfo *ci, *smallci; 599 struct ccdiinfo *ii; 600 u_int64_t bn; 601 u_int64_t lbn; 602 u_int64_t size; 603 int icount; 604 int ix; 605 606 #ifdef DEBUG 607 if (ccddebug & CCDB_INIT) 608 kprintf("ccdinterleave(%x): ileave %d\n", cs, cs->sc_ileave); 609 #endif 610 611 /* 612 * Allocate an interleave table. The worst case occurs when each 613 * of N disks is of a different size, resulting in N interleave 614 * tables. 615 * 616 * Chances are this is too big, but we don't care. 617 */ 618 icount = cs->sc_nccdisks + 1; 619 cs->sc_itable = kmalloc(icount * sizeof(struct ccdiinfo), 620 M_DEVBUF, M_WAITOK|M_ZERO); 621 622 /* 623 * Trivial case: no interleave (actually interleave of disk size). 624 * Each table entry represents a single component in its entirety. 625 * 626 * An interleave of 0 may not be used with a mirror or parity setup. 627 */ 628 if (cs->sc_ileave == 0) { 629 bn = 0; 630 ii = cs->sc_itable; 631 632 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 633 /* Allocate space for ii_index. */ 634 ii->ii_index = kmalloc(sizeof(int), M_DEVBUF, M_WAITOK); 635 ii->ii_ndisk = 1; 636 ii->ii_startblk = bn; 637 ii->ii_startoff = 0; 638 ii->ii_index[0] = ix; 639 bn += cs->sc_cinfo[ix].ci_size; 640 ii++; 641 } 642 ii->ii_ndisk = 0; 643 #ifdef DEBUG 644 if (ccddebug & CCDB_INIT) 645 printiinfo(cs->sc_itable); 646 #endif 647 return; 648 } 649 650 /* 651 * The following isn't fast or pretty; it doesn't have to be. 652 */ 653 size = 0; 654 bn = lbn = 0; 655 for (ii = cs->sc_itable; ii < &cs->sc_itable[icount]; ++ii) { 656 /* 657 * Allocate space for ii_index. We might allocate more then 658 * we use. 659 */ 660 ii->ii_index = kmalloc((sizeof(int) * cs->sc_nccdisks), 661 M_DEVBUF, M_WAITOK); 662 663 /* 664 * Locate the smallest of the remaining components 665 */ 666 smallci = NULL; 667 ci = cs->sc_cinfo; 668 while (ci < &cs->sc_cinfo[cs->sc_nccdisks]) { 669 if (ci->ci_size > size && 670 (smallci == NULL || 671 ci->ci_size < smallci->ci_size)) { 672 smallci = ci; 673 } 674 ++ci; 675 } 676 677 /* 678 * Nobody left, all done 679 */ 680 if (smallci == NULL) { 681 ii->ii_ndisk = 0; 682 break; 683 } 684 685 /* 686 * Record starting logical block using an sc_ileave blocksize. 687 */ 688 ii->ii_startblk = bn / cs->sc_ileave; 689 690 /* 691 * Record starting component block using an sc_ileave 692 * blocksize. This value is relative to the beginning of 693 * a component disk. 694 */ 695 ii->ii_startoff = lbn; 696 697 /* 698 * Determine how many disks take part in this interleave 699 * and record their indices. 700 */ 701 ix = 0; 702 for (ci = cs->sc_cinfo; 703 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) { 704 if (ci->ci_size >= smallci->ci_size) { 705 ii->ii_index[ix++] = ci - cs->sc_cinfo; 706 } 707 } 708 ii->ii_ndisk = ix; 709 710 /* 711 * Adjust for loop 712 */ 713 bn += ix * (smallci->ci_size - size); 714 lbn = smallci->ci_size / cs->sc_ileave; 715 size = smallci->ci_size; 716 } 717 if (ii == &cs->sc_itable[icount]) 718 panic("ccdinterlave software bug! table exhausted"); 719 #ifdef DEBUG 720 if (ccddebug & CCDB_INIT) 721 printiinfo(cs->sc_itable); 722 #endif 723 } 724 725 /* ARGSUSED */ 726 static int 727 ccdopen(struct dev_open_args *ap) 728 { 729 cdev_t dev = ap->a_head.a_dev; 730 int unit = ccdunit(dev); 731 struct ccd_softc *cs; 732 int error = 0; 733 734 #ifdef DEBUG 735 if (ccddebug & CCDB_FOLLOW) 736 kprintf("ccdopen(%x, %x)\n", dev, flags); 737 #endif 738 if (unit >= numccd) 739 return (ENXIO); 740 cs = &ccd_softc[unit]; 741 742 if ((error = ccdlock(cs)) == 0) { 743 ccdunlock(cs); 744 } 745 return (error); 746 } 747 748 /* ARGSUSED */ 749 static int 750 ccdclose(struct dev_close_args *ap) 751 { 752 cdev_t dev = ap->a_head.a_dev; 753 int unit = ccdunit(dev); 754 struct ccd_softc *cs; 755 int error = 0; 756 757 #ifdef DEBUG 758 if (ccddebug & CCDB_FOLLOW) 759 kprintf("ccdclose(%x, %x)\n", dev, flags); 760 #endif 761 762 if (unit >= numccd) 763 return (ENXIO); 764 cs = &ccd_softc[unit]; 765 if ((error = ccdlock(cs)) == 0) { 766 ccdunlock(cs); 767 } 768 return (error); 769 } 770 771 static int 772 ccdstrategy(struct dev_strategy_args *ap) 773 { 774 cdev_t dev = ap->a_head.a_dev; 775 struct bio *bio = ap->a_bio; 776 int unit = ccdunit(dev); 777 struct bio *nbio; 778 struct buf *bp = bio->bio_buf; 779 struct ccd_softc *cs = &ccd_softc[unit]; 780 u_int64_t pbn; /* in sc_secsize chunks */ 781 u_int32_t sz; /* in sc_secsize chunks */ 782 783 #ifdef DEBUG 784 if (ccddebug & CCDB_FOLLOW) 785 kprintf("ccdstrategy(%x): unit %d\n", bp, unit); 786 #endif 787 if ((cs->sc_flags & CCDF_INITED) == 0) { 788 bp->b_error = ENXIO; 789 goto error; 790 } 791 792 /* If it's a nil transfer, wake up the top half now. */ 793 if (bp->b_bcount == 0) { 794 bp->b_resid = 0; 795 goto done; 796 } 797 798 /* 799 * Do bounds checking and adjust transfer. If there's an 800 * error, the bounds check will flag that for us. 801 */ 802 803 pbn = bio->bio_offset / cs->sc_geom.ccg_secsize; 804 sz = howmany(bp->b_bcount, cs->sc_geom.ccg_secsize); 805 806 /* 807 * If out of bounds return an error. If the request goes 808 * past EOF, clip the request as appropriate. If exactly 809 * at EOF, return success (don't clip), but with 0 bytes 810 * of I/O. 811 * 812 * Mark EOF B_INVAL (just like bad), indicating that the 813 * contents of the buffer, if any, is invalid. 814 */ 815 if ((int64_t)pbn < 0) 816 goto bad; 817 if (pbn + sz > cs->sc_size) { 818 if (pbn > cs->sc_size || (bp->b_flags & B_BNOCLIP)) 819 goto bad; 820 if (pbn == cs->sc_size) { 821 bp->b_resid = bp->b_bcount; 822 bp->b_flags |= B_INVAL; 823 goto done; 824 } 825 sz = (long)(cs->sc_size - pbn); 826 bp->b_bcount = sz * cs->sc_geom.ccg_secsize; 827 } 828 nbio = bio; 829 830 bp->b_resid = bp->b_bcount; 831 nbio->bio_driver_info = dev; 832 833 /* 834 * "Start" the unit. 835 */ 836 crit_enter(); 837 ccdstart(cs, nbio); 838 crit_exit(); 839 return(0); 840 841 /* 842 * note: bio, not nbio, is valid at the done label. 843 */ 844 bad: 845 bp->b_error = EINVAL; 846 error: 847 bp->b_resid = bp->b_bcount; 848 bp->b_flags |= B_ERROR | B_INVAL; 849 done: 850 biodone(bio); 851 return(0); 852 } 853 854 static void 855 ccdstart(struct ccd_softc *cs, struct bio *bio) 856 { 857 long bcount, rcount; 858 struct ccdbuf *cbp[4]; 859 struct buf *bp = bio->bio_buf; 860 /* XXX! : 2 reads and 2 writes for RAID 4/5 */ 861 caddr_t addr; 862 off_t doffset; 863 864 #ifdef DEBUG 865 if (ccddebug & CCDB_FOLLOW) 866 kprintf("ccdstart(%x, %x)\n", cs, bp); 867 #endif 868 869 /* Record the transaction start */ 870 devstat_start_transaction(&cs->device_stats); 871 872 /* 873 * Allocate component buffers and fire off the requests 874 */ 875 doffset = bio->bio_offset; 876 addr = bp->b_data; 877 878 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 879 ccdbuffer(cbp, cs, bio, doffset, addr, bcount); 880 rcount = cbp[0]->cb_buf.b_bcount; 881 882 if (cs->sc_cflags & CCDF_MIRROR) { 883 /* 884 * Mirroring. Writes go to both disks, reads are 885 * taken from whichever disk seems most appropriate. 886 * 887 * We attempt to localize reads to the disk whos arm 888 * is nearest the read request. We ignore seeks due 889 * to writes when making this determination and we 890 * also try to avoid hogging. 891 */ 892 if (cbp[0]->cb_buf.b_cmd != BUF_CMD_READ) { 893 vn_strategy(cbp[0]->cb_vp, 894 &cbp[0]->cb_buf.b_bio1); 895 vn_strategy(cbp[1]->cb_vp, 896 &cbp[1]->cb_buf.b_bio1); 897 } else { 898 int pick = cs->sc_pick; 899 daddr_t range = cs->sc_size / 16 * cs->sc_geom.ccg_secsize; 900 if (doffset < cs->sc_blk[pick] - range || 901 doffset > cs->sc_blk[pick] + range 902 ) { 903 cs->sc_pick = pick = 1 - pick; 904 } 905 cs->sc_blk[pick] = doffset + rcount; 906 vn_strategy(cbp[pick]->cb_vp, 907 &cbp[pick]->cb_buf.b_bio1); 908 } 909 } else { 910 /* 911 * Not mirroring 912 */ 913 vn_strategy(cbp[0]->cb_vp, 914 &cbp[0]->cb_buf.b_bio1); 915 } 916 doffset += rcount; 917 addr += rcount; 918 } 919 } 920 921 /* 922 * Build a component buffer header. 923 */ 924 static void 925 ccdbuffer(struct ccdbuf **cb, struct ccd_softc *cs, struct bio *bio, 926 off_t doffset, caddr_t addr, long bcount) 927 { 928 struct ccdcinfo *ci, *ci2 = NULL; /* XXX */ 929 struct ccdbuf *cbp; 930 u_int64_t bn; 931 u_int64_t cbn; 932 u_int64_t cboff; 933 off_t cbc; 934 935 #ifdef DEBUG 936 if (ccddebug & CCDB_IO) 937 kprintf("ccdbuffer(%x, %x, %d, %x, %d)\n", 938 cs, bp, bn, addr, bcount); 939 #endif 940 /* 941 * Determine which component bn falls in. 942 */ 943 bn = doffset / cs->sc_geom.ccg_secsize; 944 cbn = bn; 945 cboff = 0; 946 947 if (cs->sc_ileave == 0) { 948 /* 949 * Serially concatenated and neither a mirror nor a parity 950 * config. This is a special case. 951 */ 952 daddr_t sblk; 953 954 sblk = 0; 955 for (ci = cs->sc_cinfo; cbn >= sblk + ci->ci_size; ci++) 956 sblk += ci->ci_size; 957 cbn -= sblk; 958 } else { 959 struct ccdiinfo *ii; 960 int ccdisk, off; 961 962 /* 963 * Calculate cbn, the logical superblock (sc_ileave chunks), 964 * and cboff, a normal block offset (DEV_BSIZE chunks) relative 965 * to cbn. 966 */ 967 cboff = cbn % cs->sc_ileave; /* DEV_BSIZE gran */ 968 cbn = cbn / cs->sc_ileave; /* DEV_BSIZE * ileave gran */ 969 970 /* 971 * Figure out which interleave table to use. 972 */ 973 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) { 974 if (ii->ii_startblk > cbn) 975 break; 976 } 977 ii--; 978 979 /* 980 * off is the logical superblock relative to the beginning 981 * of this interleave block. 982 */ 983 off = cbn - ii->ii_startblk; 984 985 /* 986 * We must calculate which disk component to use (ccdisk), 987 * and recalculate cbn to be the superblock relative to 988 * the beginning of the component. This is typically done by 989 * adding 'off' and ii->ii_startoff together. However, 'off' 990 * must typically be divided by the number of components in 991 * this interleave array to be properly convert it from a 992 * CCD-relative logical superblock number to a 993 * component-relative superblock number. 994 */ 995 if (ii->ii_ndisk == 1) { 996 /* 997 * When we have just one disk, it can't be a mirror 998 * or a parity config. 999 */ 1000 ccdisk = ii->ii_index[0]; 1001 cbn = ii->ii_startoff + off; 1002 } else { 1003 if (cs->sc_cflags & CCDF_MIRROR) { 1004 /* 1005 * We have forced a uniform mapping, resulting 1006 * in a single interleave array. We double 1007 * up on the first half of the available 1008 * components and our mirror is in the second 1009 * half. This only works with a single 1010 * interleave array because doubling up 1011 * doubles the number of sectors, so there 1012 * cannot be another interleave array because 1013 * the next interleave array's calculations 1014 * would be off. 1015 */ 1016 int ndisk2 = ii->ii_ndisk / 2; 1017 ccdisk = ii->ii_index[off % ndisk2]; 1018 cbn = ii->ii_startoff + off / ndisk2; 1019 ci2 = &cs->sc_cinfo[ccdisk + ndisk2]; 1020 } else if (cs->sc_cflags & CCDF_PARITY) { 1021 /* 1022 * XXX not implemented yet 1023 */ 1024 int ndisk2 = ii->ii_ndisk - 1; 1025 ccdisk = ii->ii_index[off % ndisk2]; 1026 cbn = ii->ii_startoff + off / ndisk2; 1027 if (cbn % ii->ii_ndisk <= ccdisk) 1028 ccdisk++; 1029 } else { 1030 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 1031 cbn = ii->ii_startoff + off / ii->ii_ndisk; 1032 } 1033 } 1034 1035 ci = &cs->sc_cinfo[ccdisk]; 1036 1037 /* 1038 * Convert cbn from a superblock to a normal block so it 1039 * can be used to calculate (along with cboff) the normal 1040 * block index into this particular disk. 1041 */ 1042 cbn *= cs->sc_ileave; 1043 } 1044 1045 /* 1046 * Fill in the component buf structure. 1047 * 1048 * NOTE: devices do not use b_bufsize, only b_bcount, but b_bcount 1049 * will be truncated on device EOF so we use b_bufsize to detect 1050 * the case. 1051 */ 1052 cbp = getccdbuf(); 1053 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1054 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1055 cbp->cb_buf.b_data = addr; 1056 cbp->cb_vp = ci->ci_vp; 1057 if (cs->sc_ileave == 0) 1058 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1059 else 1060 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1061 if (cbc > cs->sc_maxiosize) 1062 cbc = cs->sc_maxiosize; 1063 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1064 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1065 1066 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1067 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1068 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci->ci_skip); 1069 1070 /* 1071 * context for ccdiodone 1072 */ 1073 cbp->cb_obio = bio; 1074 cbp->cb_unit = cs - ccd_softc; 1075 cbp->cb_comp = ci - cs->sc_cinfo; 1076 1077 #ifdef DEBUG 1078 if (ccddebug & CCDB_IO) 1079 kprintf(" dev %x(u%d): cbp %x off %lld addr %x bcnt %d\n", 1080 ci->ci_dev, ci-cs->sc_cinfo, cbp, 1081 cbp->cb_buf.b_bio1.bio_offset, 1082 cbp->cb_buf.b_data, cbp->cb_buf.b_bcount); 1083 #endif 1084 cb[0] = cbp; 1085 1086 /* 1087 * Note: both I/O's setup when reading from mirror, but only one 1088 * will be executed. 1089 */ 1090 if (cs->sc_cflags & CCDF_MIRROR) { 1091 /* mirror, setup second I/O */ 1092 cbp = getccdbuf(); 1093 1094 cbp->cb_buf.b_cmd = bio->bio_buf->b_cmd; 1095 cbp->cb_buf.b_flags |= bio->bio_buf->b_flags; 1096 cbp->cb_buf.b_data = addr; 1097 cbp->cb_vp = ci2->ci_vp; 1098 if (cs->sc_ileave == 0) 1099 cbc = dbtob((off_t)(ci->ci_size - cbn)); 1100 else 1101 cbc = dbtob((off_t)(cs->sc_ileave - cboff)); 1102 if (cbc > cs->sc_maxiosize) 1103 cbc = cs->sc_maxiosize; 1104 cbp->cb_buf.b_bcount = (cbc < bcount) ? cbc : bcount; 1105 cbp->cb_buf.b_bufsize = cbp->cb_buf.b_bcount; 1106 1107 cbp->cb_buf.b_bio1.bio_done = ccdiodone; 1108 cbp->cb_buf.b_bio1.bio_caller_info1.ptr = cbp; 1109 cbp->cb_buf.b_bio1.bio_offset = dbtob(cbn + cboff + ci2->ci_skip); 1110 1111 /* 1112 * context for ccdiodone 1113 */ 1114 cbp->cb_obio = bio; 1115 cbp->cb_unit = cs - ccd_softc; 1116 cbp->cb_comp = ci2 - cs->sc_cinfo; 1117 cb[1] = cbp; 1118 /* link together the ccdbuf's and clear "mirror done" flag */ 1119 cb[0]->cb_mirror = cb[1]; 1120 cb[1]->cb_mirror = cb[0]; 1121 cb[0]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1122 cb[1]->cb_pflags &= ~CCDPF_MIRROR_DONE; 1123 } 1124 } 1125 1126 static void 1127 ccdintr(struct ccd_softc *cs, struct bio *bio) 1128 { 1129 struct buf *bp = bio->bio_buf; 1130 1131 #ifdef DEBUG 1132 if (ccddebug & CCDB_FOLLOW) 1133 kprintf("ccdintr(%x, %x)\n", cs, bp); 1134 #endif 1135 /* 1136 * Request is done for better or worse, wakeup the top half. 1137 */ 1138 if (bp->b_flags & B_ERROR) 1139 bp->b_resid = bp->b_bcount; 1140 devstat_end_transaction_buf(&cs->device_stats, bp); 1141 biodone(bio); 1142 } 1143 1144 /* 1145 * Called at interrupt time. 1146 * Mark the component as done and if all components are done, 1147 * take a ccd interrupt. 1148 */ 1149 static void 1150 ccdiodone(struct bio *bio) 1151 { 1152 struct ccdbuf *cbp = bio->bio_caller_info1.ptr; 1153 struct bio *obio = cbp->cb_obio; 1154 struct buf *obp = obio->bio_buf; 1155 int unit = cbp->cb_unit; 1156 int count; 1157 1158 /* 1159 * Since we do not have exclusive access to underlying devices, 1160 * we can't keep cache translations around. 1161 */ 1162 clearbiocache(bio->bio_next); 1163 1164 crit_enter(); 1165 #ifdef DEBUG 1166 if (ccddebug & CCDB_FOLLOW) 1167 kprintf("ccdiodone(%x)\n", cbp); 1168 if (ccddebug & CCDB_IO) { 1169 kprintf("ccdiodone: bp %x bcount %d resid %d\n", 1170 obp, obp->b_bcount, obp->b_resid); 1171 kprintf(" dev %x(u%d), cbp %x off %lld addr %x bcnt %d\n", 1172 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 1173 cbp->cb_buf.b_loffset, cbp->cb_buf.b_data, 1174 cbp->cb_buf.b_bcount); 1175 } 1176 #endif 1177 1178 /* 1179 * If an error occured, report it. If this is a mirrored 1180 * configuration and the first of two possible reads, do not 1181 * set the error in the bp yet because the second read may 1182 * succeed. 1183 */ 1184 if (cbp->cb_buf.b_flags & B_ERROR) { 1185 const char *msg = ""; 1186 1187 if ((ccd_softc[unit].sc_cflags & CCDF_MIRROR) && 1188 (cbp->cb_buf.b_cmd == BUF_CMD_READ) && 1189 (cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1190 /* 1191 * We will try our read on the other disk down 1192 * below, also reverse the default pick so if we 1193 * are doing a scan we do not keep hitting the 1194 * bad disk first. 1195 */ 1196 struct ccd_softc *cs = &ccd_softc[unit]; 1197 1198 msg = ", trying other disk"; 1199 cs->sc_pick = 1 - cs->sc_pick; 1200 cs->sc_blk[cs->sc_pick] = obio->bio_offset; 1201 } else { 1202 obp->b_flags |= B_ERROR; 1203 obp->b_error = cbp->cb_buf.b_error ? 1204 cbp->cb_buf.b_error : EIO; 1205 } 1206 kprintf("ccd%d: error %d on component %d offset %lld (ccd offset %lld)%s\n", 1207 unit, obp->b_error, cbp->cb_comp, 1208 cbp->cb_buf.b_bio2.bio_offset, 1209 obio->bio_offset, msg); 1210 } 1211 1212 /* 1213 * Process mirror. If we are writing, I/O has been initiated on both 1214 * buffers and we fall through only after both are finished. 1215 * 1216 * If we are reading only one I/O is initiated at a time. If an 1217 * error occurs we initiate the second I/O and return, otherwise 1218 * we free the second I/O without initiating it. 1219 */ 1220 1221 if (ccd_softc[unit].sc_cflags & CCDF_MIRROR) { 1222 if (cbp->cb_buf.b_cmd != BUF_CMD_READ) { 1223 /* 1224 * When writing, handshake with the second buffer 1225 * to determine when both are done. If both are not 1226 * done, return here. 1227 */ 1228 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1229 cbp->cb_mirror->cb_pflags |= CCDPF_MIRROR_DONE; 1230 putccdbuf(cbp); 1231 crit_exit(); 1232 return; 1233 } 1234 } else { 1235 /* 1236 * When reading, either dispose of the second buffer 1237 * or initiate I/O on the second buffer if an error 1238 * occured with this one. 1239 */ 1240 if ((cbp->cb_pflags & CCDPF_MIRROR_DONE) == 0) { 1241 if (cbp->cb_buf.b_flags & B_ERROR) { 1242 cbp->cb_mirror->cb_pflags |= 1243 CCDPF_MIRROR_DONE; 1244 vn_strategy( 1245 cbp->cb_mirror->cb_vp, 1246 &cbp->cb_mirror->cb_buf.b_bio1 1247 ); 1248 putccdbuf(cbp); 1249 crit_exit(); 1250 return; 1251 } else { 1252 putccdbuf(cbp->cb_mirror); 1253 /* fall through */ 1254 } 1255 } 1256 } 1257 } 1258 1259 /* 1260 * Use our saved b_bufsize to determine if an unexpected EOF occured. 1261 */ 1262 count = cbp->cb_buf.b_bufsize; 1263 putccdbuf(cbp); 1264 1265 /* 1266 * If all done, "interrupt". 1267 */ 1268 obp->b_resid -= count; 1269 if (obp->b_resid < 0) 1270 panic("ccdiodone: count"); 1271 if (obp->b_resid == 0) 1272 ccdintr(&ccd_softc[unit], obio); 1273 crit_exit(); 1274 } 1275 1276 static int 1277 ccdioctl(struct dev_ioctl_args *ap) 1278 { 1279 cdev_t dev = ap->a_head.a_dev; 1280 int unit = ccdunit(dev); 1281 int i, j, lookedup = 0, error = 0; 1282 struct ccd_softc *cs; 1283 struct ccd_ioctl *ccio = (struct ccd_ioctl *)ap->a_data; 1284 struct ccddevice ccd; 1285 struct disk_info info; 1286 char **cpp; 1287 struct vnode **vpp; 1288 1289 if (unit >= numccd) 1290 return (ENXIO); 1291 cs = &ccd_softc[unit]; 1292 1293 bzero(&ccd, sizeof(ccd)); 1294 1295 switch (ap->a_cmd) { 1296 case CCDIOCSET: 1297 if (cs->sc_flags & CCDF_INITED) 1298 return (EBUSY); 1299 1300 if ((ap->a_fflag & FWRITE) == 0) 1301 return (EBADF); 1302 1303 if ((error = ccdlock(cs)) != 0) 1304 return (error); 1305 1306 if (ccio->ccio_ndisks > CCD_MAXNDISKS) { 1307 ccdunlock(cs); 1308 return (EINVAL); 1309 } 1310 1311 /* Fill in some important bits. */ 1312 ccd.ccd_unit = unit; 1313 ccd.ccd_interleave = ccio->ccio_ileave; 1314 if (ccd.ccd_interleave == 0 && 1315 ((ccio->ccio_flags & CCDF_MIRROR) || 1316 (ccio->ccio_flags & CCDF_PARITY))) { 1317 kprintf("ccd%d: disabling mirror/parity, interleave is 0\n", unit); 1318 ccio->ccio_flags &= ~(CCDF_MIRROR | CCDF_PARITY); 1319 } 1320 if ((ccio->ccio_flags & CCDF_MIRROR) && 1321 (ccio->ccio_flags & CCDF_PARITY)) { 1322 kprintf("ccd%d: can't specify both mirror and parity, using mirror\n", unit); 1323 ccio->ccio_flags &= ~CCDF_PARITY; 1324 } 1325 if ((ccio->ccio_flags & (CCDF_MIRROR | CCDF_PARITY)) && 1326 !(ccio->ccio_flags & CCDF_UNIFORM)) { 1327 kprintf("ccd%d: mirror/parity forces uniform flag\n", 1328 unit); 1329 ccio->ccio_flags |= CCDF_UNIFORM; 1330 } 1331 ccd.ccd_flags = ccio->ccio_flags & CCDF_USERMASK; 1332 1333 /* 1334 * Allocate space for and copy in the array of 1335 * componet pathnames and device numbers. 1336 */ 1337 cpp = kmalloc(ccio->ccio_ndisks * sizeof(char *), 1338 M_DEVBUF, M_WAITOK); 1339 vpp = kmalloc(ccio->ccio_ndisks * sizeof(struct vnode *), 1340 M_DEVBUF, M_WAITOK); 1341 1342 error = copyin((caddr_t)ccio->ccio_disks, (caddr_t)cpp, 1343 ccio->ccio_ndisks * sizeof(char **)); 1344 if (error) { 1345 kfree(vpp, M_DEVBUF); 1346 kfree(cpp, M_DEVBUF); 1347 ccdunlock(cs); 1348 return (error); 1349 } 1350 1351 #ifdef DEBUG 1352 if (ccddebug & CCDB_INIT) { 1353 for (i = 0; i < ccio->ccio_ndisks; ++i) 1354 kprintf("ccdioctl: component %d: 0x%x\n", 1355 i, cpp[i]); 1356 } 1357 #endif 1358 1359 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1360 #ifdef DEBUG 1361 if (ccddebug & CCDB_INIT) 1362 kprintf("ccdioctl: lookedup = %d\n", lookedup); 1363 #endif 1364 if ((error = ccdlookup(cpp[i], &vpp[i])) != 0) { 1365 for (j = 0; j < lookedup; ++j) 1366 (void)vn_close(vpp[j], FREAD|FWRITE); 1367 kfree(vpp, M_DEVBUF); 1368 kfree(cpp, M_DEVBUF); 1369 ccdunlock(cs); 1370 return (error); 1371 } 1372 ++lookedup; 1373 } 1374 ccd.ccd_cpp = cpp; 1375 ccd.ccd_vpp = vpp; 1376 ccd.ccd_ndev = ccio->ccio_ndisks; 1377 1378 /* 1379 * Initialize the ccd. Fills in the softc for us. 1380 */ 1381 if ((error = ccdinit(&ccd, cpp, ap->a_cred)) != 0) { 1382 for (j = 0; j < lookedup; ++j) 1383 (void)vn_close(vpp[j], FREAD|FWRITE); 1384 kfree(vpp, M_DEVBUF); 1385 kfree(cpp, M_DEVBUF); 1386 ccdunlock(cs); 1387 return (error); 1388 } 1389 1390 /* 1391 * The ccd has been successfully initialized, so 1392 * we can place it into the array and read the disklabel. 1393 */ 1394 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1395 ccio->ccio_unit = unit; 1396 ccio->ccio_size = cs->sc_size; 1397 1398 bzero(&info, sizeof(info)); 1399 info.d_media_blksize = cs->sc_geom.ccg_secsize; 1400 info.d_media_blocks = cs->sc_size; 1401 info.d_nheads = cs->sc_geom.ccg_ntracks; 1402 info.d_secpertrack = cs->sc_geom.ccg_nsectors; 1403 info.d_ncylinders = cs->sc_geom.ccg_ncylinders; 1404 info.d_secpercyl = info.d_nheads * info.d_secpertrack; 1405 1406 /* 1407 * For cases where a label is directly applied to the ccd, 1408 * without slices, DSO_COMPATMBR forces one sector be 1409 * reserved for backwards compatibility. 1410 */ 1411 info.d_dsflags = DSO_COMPATMBR; 1412 disk_setdiskinfo(&cs->sc_disk, &info); 1413 1414 ccdunlock(cs); 1415 1416 break; 1417 1418 case CCDIOCCLR: 1419 if ((cs->sc_flags & CCDF_INITED) == 0) 1420 return (ENXIO); 1421 1422 if ((ap->a_fflag & FWRITE) == 0) 1423 return (EBADF); 1424 1425 if ((error = ccdlock(cs)) != 0) 1426 return (error); 1427 1428 if (dev_drefs(cs->sc_dev) > 1) { 1429 ccdunlock(cs); 1430 return (EBUSY); 1431 } 1432 1433 /* 1434 * Free ccd_softc information and clear entry. 1435 */ 1436 1437 /* Close the components and free their pathnames. */ 1438 for (i = 0; i < cs->sc_nccdisks; ++i) { 1439 /* 1440 * XXX: this close could potentially fail and 1441 * cause Bad Things. Maybe we need to force 1442 * the close to happen? 1443 */ 1444 #ifdef DEBUG 1445 if (ccddebug & CCDB_VNODE) 1446 vprint("CCDIOCCLR: vnode info", 1447 cs->sc_cinfo[i].ci_vp); 1448 #endif 1449 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE); 1450 kfree(cs->sc_cinfo[i].ci_path, M_DEVBUF); 1451 } 1452 1453 /* Free interleave index. */ 1454 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) 1455 kfree(cs->sc_itable[i].ii_index, M_DEVBUF); 1456 1457 /* Free component info and interleave table. */ 1458 kfree(cs->sc_cinfo, M_DEVBUF); 1459 kfree(cs->sc_itable, M_DEVBUF); 1460 cs->sc_cinfo = NULL; 1461 cs->sc_itable = NULL; 1462 cs->sc_flags &= ~CCDF_INITED; 1463 1464 /* 1465 * Free ccddevice information and clear entry. 1466 */ 1467 kfree(ccddevs[unit].ccd_cpp, M_DEVBUF); 1468 kfree(ccddevs[unit].ccd_vpp, M_DEVBUF); 1469 bcopy(&ccd, &ccddevs[unit], sizeof(ccd)); 1470 1471 /* 1472 * And remove the devstat entry. 1473 */ 1474 devstat_remove_entry(&cs->device_stats); 1475 1476 /* This must be atomic. */ 1477 crit_enter(); 1478 ccdunlock(cs); 1479 crit_exit(); 1480 1481 break; 1482 1483 default: 1484 return (ENOTTY); 1485 } 1486 1487 return (0); 1488 } 1489 1490 static int 1491 ccddump(struct dev_dump_args *ap) 1492 { 1493 /* Not implemented. */ 1494 return ENXIO; 1495 } 1496 1497 /* 1498 * Lookup the provided name in the filesystem. If the file exists, 1499 * is a valid block device, and isn't being used by anyone else, 1500 * set *vpp to the file's vnode. 1501 */ 1502 static int 1503 ccdlookup(char *path, struct vnode **vpp) 1504 { 1505 struct nlookupdata nd; 1506 struct vnode *vp; 1507 int error; 1508 1509 *vpp = NULL; 1510 1511 error = nlookup_init(&nd, path, UIO_USERSPACE, NLC_FOLLOW|NLC_LOCKVP); 1512 if (error) 1513 return (error); 1514 if ((error = vn_open(&nd, NULL, FREAD|FWRITE, 0)) != 0) { 1515 #ifdef DEBUG 1516 if (ccddebug & CCDB_FOLLOW|CCDB_INIT) 1517 kprintf("ccdlookup: vn_open error = %d\n", error); 1518 #endif 1519 goto done; 1520 } 1521 vp = nd.nl_open_vp; 1522 1523 if (vp->v_opencount > 1) { 1524 error = EBUSY; 1525 goto done; 1526 } 1527 1528 if (!vn_isdisk(vp, &error)) 1529 goto done; 1530 1531 #ifdef DEBUG 1532 if (ccddebug & CCDB_VNODE) 1533 vprint("ccdlookup: vnode info", vp); 1534 #endif 1535 1536 vn_unlock(vp); 1537 nd.nl_open_vp = NULL; 1538 nlookup_done(&nd); 1539 *vpp = vp; /* leave ref intact */ 1540 return (0); 1541 done: 1542 nlookup_done(&nd); 1543 return (error); 1544 } 1545 1546 /* 1547 * Wait interruptibly for an exclusive lock. 1548 * 1549 * XXX 1550 * Several drivers do this; it should be abstracted and made MP-safe. 1551 */ 1552 static int 1553 ccdlock(struct ccd_softc *cs) 1554 { 1555 int error; 1556 1557 while ((cs->sc_flags & CCDF_LOCKED) != 0) { 1558 cs->sc_flags |= CCDF_WANTED; 1559 if ((error = tsleep(cs, PCATCH, "ccdlck", 0)) != 0) 1560 return (error); 1561 } 1562 cs->sc_flags |= CCDF_LOCKED; 1563 return (0); 1564 } 1565 1566 /* 1567 * Unlock and wake up any waiters. 1568 */ 1569 static void 1570 ccdunlock(struct ccd_softc *cs) 1571 { 1572 1573 cs->sc_flags &= ~CCDF_LOCKED; 1574 if ((cs->sc_flags & CCDF_WANTED) != 0) { 1575 cs->sc_flags &= ~CCDF_WANTED; 1576 wakeup(cs); 1577 } 1578 } 1579 1580 #ifdef DEBUG 1581 static void 1582 printiinfo(struct ccdiinfo *ii) 1583 { 1584 int ix, i; 1585 1586 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1587 kprintf(" itab[%d]: #dk %d sblk %d soff %d", 1588 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1589 for (i = 0; i < ii->ii_ndisk; i++) 1590 kprintf(" %d", ii->ii_index[i]); 1591 kprintf("\n"); 1592 } 1593 } 1594 #endif 1595 1596 1597 /* Local Variables: */ 1598 /* c-argdecl-indent: 8 */ 1599 /* c-continued-statement-offset: 8 */ 1600 /* c-indent-level: 8 */ 1601 /* End: */ 1602