1 /* $NetBSD: ccd.c,v 1.172 2017/06/01 02:45:08 chs Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.172 2017/06/01 02:45:08 chs Exp $"); 92 93 #if defined(_KERNEL_OPT) 94 #include "opt_compat_netbsd.h" 95 #endif 96 97 #include <sys/param.h> 98 #include <sys/systm.h> 99 #include <sys/kernel.h> 100 #include <sys/proc.h> 101 #include <sys/errno.h> 102 #include <sys/buf.h> 103 #include <sys/kmem.h> 104 #include <sys/pool.h> 105 #include <sys/module.h> 106 #include <sys/namei.h> 107 #include <sys/stat.h> 108 #include <sys/ioctl.h> 109 #include <sys/disklabel.h> 110 #include <sys/device.h> 111 #include <sys/disk.h> 112 #include <sys/syslog.h> 113 #include <sys/fcntl.h> 114 #include <sys/vnode.h> 115 #include <sys/conf.h> 116 #include <sys/mutex.h> 117 #include <sys/queue.h> 118 #include <sys/kauth.h> 119 #include <sys/kthread.h> 120 #include <sys/bufq.h> 121 #include <sys/sysctl.h> 122 123 #include <uvm/uvm_extern.h> 124 125 #include <dev/ccdvar.h> 126 #include <dev/dkvar.h> 127 128 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 129 130 #include "ioconf.h" 131 132 #if defined(CCDDEBUG) && !defined(DEBUG) 133 #define DEBUG 134 #endif 135 136 #ifdef DEBUG 137 #define CCDB_FOLLOW 0x01 138 #define CCDB_INIT 0x02 139 #define CCDB_IO 0x04 140 #define CCDB_LABEL 0x08 141 #define CCDB_VNODE 0x10 142 int ccddebug = 0x00; 143 #endif 144 145 #define ccdunit(x) DISKUNIT(x) 146 147 struct ccdbuf { 148 struct buf cb_buf; /* new I/O buf */ 149 struct buf *cb_obp; /* ptr. to original I/O buf */ 150 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 151 int cb_comp; /* target component */ 152 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 153 }; 154 155 /* component buffer pool */ 156 static pool_cache_t ccd_cache; 157 158 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 159 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 160 161 #define CCDLABELDEV(dev) \ 162 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 163 164 /* called by main() at boot time */ 165 void ccddetach(void); 166 167 /* called by biodone() at interrupt time */ 168 static void ccdiodone(struct buf *); 169 170 static void ccdinterleave(struct ccd_softc *); 171 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 172 struct lwp *); 173 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 174 daddr_t, void *, long); 175 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 176 static void ccdgetdisklabel(dev_t); 177 static void ccdmakedisklabel(struct ccd_softc *); 178 static void ccdstart(struct ccd_softc *); 179 static void ccdthread(void *); 180 181 static dev_type_open(ccdopen); 182 static dev_type_close(ccdclose); 183 static dev_type_read(ccdread); 184 static dev_type_write(ccdwrite); 185 static dev_type_ioctl(ccdioctl); 186 static dev_type_strategy(ccdstrategy); 187 static dev_type_size(ccdsize); 188 189 const struct bdevsw ccd_bdevsw = { 190 .d_open = ccdopen, 191 .d_close = ccdclose, 192 .d_strategy = ccdstrategy, 193 .d_ioctl = ccdioctl, 194 .d_dump = nodump, 195 .d_psize = ccdsize, 196 .d_discard = nodiscard, 197 .d_flag = D_DISK | D_MPSAFE 198 }; 199 200 const struct cdevsw ccd_cdevsw = { 201 .d_open = ccdopen, 202 .d_close = ccdclose, 203 .d_read = ccdread, 204 .d_write = ccdwrite, 205 .d_ioctl = ccdioctl, 206 .d_stop = nostop, 207 .d_tty = notty, 208 .d_poll = nopoll, 209 .d_mmap = nommap, 210 .d_kqfilter = nokqfilter, 211 .d_discard = nodiscard, 212 .d_flag = D_DISK | D_MPSAFE 213 }; 214 215 #ifdef DEBUG 216 static void printiinfo(struct ccdiinfo *); 217 #endif 218 219 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 220 static kmutex_t ccd_lock; 221 static size_t ccd_nactive = 0; 222 223 #ifdef _MODULE 224 static struct sysctllog *ccd_clog; 225 #endif 226 227 SYSCTL_SETUP_PROTO(sysctl_kern_ccd_setup); 228 229 static struct ccd_softc * 230 ccdcreate(int unit) { 231 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 232 233 /* Initialize per-softc structures. */ 234 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 235 sc->sc_unit = unit; 236 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 237 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 238 cv_init(&sc->sc_stop, "ccdstop"); 239 cv_init(&sc->sc_push, "ccdthr"); 240 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 241 return sc; 242 } 243 244 static void 245 ccddestroy(struct ccd_softc *sc) { 246 mutex_obj_free(sc->sc_iolock); 247 mutex_exit(&sc->sc_dvlock); 248 mutex_destroy(&sc->sc_dvlock); 249 cv_destroy(&sc->sc_stop); 250 cv_destroy(&sc->sc_push); 251 disk_destroy(&sc->sc_dkdev); 252 kmem_free(sc, sizeof(*sc)); 253 } 254 255 static struct ccd_softc * 256 ccdget(int unit, int make) { 257 struct ccd_softc *sc; 258 if (unit < 0) { 259 #ifdef DIAGNOSTIC 260 panic("%s: unit %d!", __func__, unit); 261 #endif 262 return NULL; 263 } 264 mutex_enter(&ccd_lock); 265 LIST_FOREACH(sc, &ccds, sc_link) { 266 if (sc->sc_unit == unit) { 267 mutex_exit(&ccd_lock); 268 return sc; 269 } 270 } 271 mutex_exit(&ccd_lock); 272 if (!make) 273 return NULL; 274 if ((sc = ccdcreate(unit)) == NULL) 275 return NULL; 276 mutex_enter(&ccd_lock); 277 LIST_INSERT_HEAD(&ccds, sc, sc_link); 278 ccd_nactive++; 279 mutex_exit(&ccd_lock); 280 return sc; 281 } 282 283 static void 284 ccdput(struct ccd_softc *sc) { 285 mutex_enter(&ccd_lock); 286 LIST_REMOVE(sc, sc_link); 287 ccd_nactive--; 288 mutex_exit(&ccd_lock); 289 ccddestroy(sc); 290 } 291 292 /* 293 * Called by main() during pseudo-device attachment. All we need 294 * to do is allocate enough space for devices to be configured later. 295 */ 296 void 297 ccdattach(int num) 298 { 299 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 300 301 /* Initialize the component buffer pool. */ 302 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 303 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 304 } 305 306 void 307 ccddetach(void) 308 { 309 pool_cache_destroy(ccd_cache); 310 mutex_destroy(&ccd_lock); 311 } 312 313 static int 314 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 315 struct lwp *l) 316 { 317 struct ccdcinfo *ci = NULL; 318 int ix; 319 struct ccdgeom *ccg = &cs->sc_geom; 320 char *tmppath; 321 int error, path_alloced; 322 uint64_t psize, minsize; 323 unsigned secsize, maxsecsize; 324 struct disk_geom *dg; 325 326 #ifdef DEBUG 327 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 328 printf("%s: ccdinit\n", cs->sc_xname); 329 #endif 330 331 /* Allocate space for the component info. */ 332 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 333 KM_SLEEP); 334 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 335 336 cs->sc_size = 0; 337 338 /* 339 * Verify that each component piece exists and record 340 * relevant information about it. 341 */ 342 maxsecsize = 0; 343 minsize = 0; 344 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 345 ci = &cs->sc_cinfo[ix]; 346 ci->ci_vp = vpp[ix]; 347 348 /* 349 * Copy in the pathname of the component. 350 */ 351 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 352 error = copyinstr(cpaths[ix], tmppath, 353 MAXPATHLEN, &ci->ci_pathlen); 354 if (ci->ci_pathlen == 0) 355 error = EINVAL; 356 if (error) { 357 #ifdef DEBUG 358 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 359 printf("%s: can't copy path, error = %d\n", 360 cs->sc_xname, error); 361 #endif 362 goto out; 363 } 364 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 365 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 366 path_alloced++; 367 368 /* 369 * XXX: Cache the component's dev_t. 370 */ 371 ci->ci_dev = vpp[ix]->v_rdev; 372 373 /* 374 * Get partition information for the component. 375 */ 376 error = getdisksize(vpp[ix], &psize, &secsize); 377 if (error) { 378 #ifdef DEBUG 379 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 380 printf("%s: %s: disksize failed, error = %d\n", 381 cs->sc_xname, ci->ci_path, error); 382 #endif 383 goto out; 384 } 385 386 /* 387 * Calculate the size, truncating to an interleave 388 * boundary if necessary. 389 */ 390 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 391 if (cs->sc_ileave > 1) 392 psize -= psize % cs->sc_ileave; 393 394 if (psize == 0) { 395 #ifdef DEBUG 396 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 397 printf("%s: %s: size == 0\n", 398 cs->sc_xname, ci->ci_path); 399 #endif 400 error = ENODEV; 401 goto out; 402 } 403 404 if (minsize == 0 || psize < minsize) 405 minsize = psize; 406 ci->ci_size = psize; 407 cs->sc_size += psize; 408 } 409 410 /* 411 * Don't allow the interleave to be smaller than 412 * the biggest component sector. 413 */ 414 if ((cs->sc_ileave > 0) && 415 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 416 #ifdef DEBUG 417 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 418 printf("%s: interleave must be at least %d\n", 419 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 420 #endif 421 error = EINVAL; 422 goto out; 423 } 424 425 /* 426 * If uniform interleave is desired set all sizes to that of 427 * the smallest component. 428 */ 429 if (cs->sc_flags & CCDF_UNIFORM) { 430 for (ci = cs->sc_cinfo; 431 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 432 ci->ci_size = minsize; 433 434 cs->sc_size = cs->sc_nccdisks * minsize; 435 } 436 437 /* 438 * Construct the interleave table. 439 */ 440 ccdinterleave(cs); 441 442 /* 443 * Create pseudo-geometry based on 1MB cylinders. It's 444 * pretty close. 445 */ 446 ccg->ccg_secsize = DEV_BSIZE; 447 ccg->ccg_ntracks = 1; 448 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 449 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 450 451 dg = &cs->sc_dkdev.dk_geom; 452 memset(dg, 0, sizeof(*dg)); 453 dg->dg_secperunit = cs->sc_size; 454 dg->dg_secsize = ccg->ccg_secsize; 455 dg->dg_nsectors = ccg->ccg_nsectors; 456 dg->dg_ntracks = ccg->ccg_ntracks; 457 dg->dg_ncylinders = ccg->ccg_ncylinders; 458 459 if (cs->sc_ileave > 0) 460 aprint_normal("%s: Interleaving %d component%s " 461 "(%d block interleave)\n", cs->sc_xname, 462 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""), 463 cs->sc_ileave); 464 else 465 aprint_normal("%s: Concatenating %d component%s\n", 466 cs->sc_xname, 467 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : "")); 468 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 469 ci = &cs->sc_cinfo[ix]; 470 aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname, 471 ci->ci_path, (uintmax_t)ci->ci_size); 472 } 473 aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size); 474 475 /* 476 * Create thread to handle deferred I/O. 477 */ 478 cs->sc_zap = false; 479 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 480 cs, &cs->sc_thread, "%s", cs->sc_xname); 481 if (error) { 482 printf("ccdinit: can't create thread: %d\n", error); 483 goto out; 484 } 485 486 /* 487 * Only now that everything is set up can we enable the device. 488 */ 489 mutex_enter(cs->sc_iolock); 490 cs->sc_flags |= CCDF_INITED; 491 mutex_exit(cs->sc_iolock); 492 kmem_free(tmppath, MAXPATHLEN); 493 return (0); 494 495 out: 496 for (ix = 0; ix < path_alloced; ix++) { 497 kmem_free(cs->sc_cinfo[ix].ci_path, 498 cs->sc_cinfo[ix].ci_pathlen); 499 } 500 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 501 kmem_free(tmppath, MAXPATHLEN); 502 return (error); 503 } 504 505 static void 506 ccdinterleave(struct ccd_softc *cs) 507 { 508 struct ccdcinfo *ci, *smallci; 509 struct ccdiinfo *ii; 510 daddr_t bn, lbn; 511 int ix; 512 u_long size; 513 514 #ifdef DEBUG 515 if (ccddebug & CCDB_INIT) 516 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 517 #endif 518 /* 519 * Allocate an interleave table. 520 * Chances are this is too big, but we don't care. 521 */ 522 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 523 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 524 525 /* 526 * Trivial case: no interleave (actually interleave of disk size). 527 * Each table entry represents a single component in its entirety. 528 */ 529 if (cs->sc_ileave == 0) { 530 bn = 0; 531 ii = cs->sc_itable; 532 533 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 534 /* Allocate space for ii_index. */ 535 ii->ii_indexsz = sizeof(int); 536 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 537 ii->ii_ndisk = 1; 538 ii->ii_startblk = bn; 539 ii->ii_startoff = 0; 540 ii->ii_index[0] = ix; 541 bn += cs->sc_cinfo[ix].ci_size; 542 ii++; 543 } 544 ii->ii_ndisk = 0; 545 #ifdef DEBUG 546 if (ccddebug & CCDB_INIT) 547 printiinfo(cs->sc_itable); 548 #endif 549 return; 550 } 551 552 /* 553 * The following isn't fast or pretty; it doesn't have to be. 554 */ 555 size = 0; 556 bn = lbn = 0; 557 for (ii = cs->sc_itable; ; ii++) { 558 /* Allocate space for ii_index. */ 559 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 560 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 561 562 /* 563 * Locate the smallest of the remaining components 564 */ 565 smallci = NULL; 566 for (ci = cs->sc_cinfo; 567 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 568 if (ci->ci_size > size && 569 (smallci == NULL || 570 ci->ci_size < smallci->ci_size)) 571 smallci = ci; 572 573 /* 574 * Nobody left, all done 575 */ 576 if (smallci == NULL) { 577 ii->ii_ndisk = 0; 578 break; 579 } 580 581 /* 582 * Record starting logical block and component offset 583 */ 584 ii->ii_startblk = bn / cs->sc_ileave; 585 ii->ii_startoff = lbn; 586 587 /* 588 * Determine how many disks take part in this interleave 589 * and record their indices. 590 */ 591 ix = 0; 592 for (ci = cs->sc_cinfo; 593 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 594 if (ci->ci_size >= smallci->ci_size) 595 ii->ii_index[ix++] = ci - cs->sc_cinfo; 596 ii->ii_ndisk = ix; 597 bn += ix * (smallci->ci_size - size); 598 lbn = smallci->ci_size / cs->sc_ileave; 599 size = smallci->ci_size; 600 } 601 #ifdef DEBUG 602 if (ccddebug & CCDB_INIT) 603 printiinfo(cs->sc_itable); 604 #endif 605 } 606 607 /* ARGSUSED */ 608 static int 609 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 610 { 611 int unit = ccdunit(dev); 612 struct ccd_softc *cs; 613 struct disklabel *lp; 614 int error = 0, part, pmask; 615 616 #ifdef DEBUG 617 if (ccddebug & CCDB_FOLLOW) 618 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 619 #endif 620 if ((cs = ccdget(unit, 1)) == NULL) 621 return ENXIO; 622 623 mutex_enter(&cs->sc_dvlock); 624 625 lp = cs->sc_dkdev.dk_label; 626 627 part = DISKPART(dev); 628 pmask = (1 << part); 629 630 /* 631 * If we're initialized, check to see if there are any other 632 * open partitions. If not, then it's safe to update 633 * the in-core disklabel. Only read the disklabel if it is 634 * not already valid. 635 */ 636 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 637 cs->sc_dkdev.dk_openmask == 0) 638 ccdgetdisklabel(dev); 639 640 /* Check that the partition exists. */ 641 if (part != RAW_PART) { 642 if (((cs->sc_flags & CCDF_INITED) == 0) || 643 ((part >= lp->d_npartitions) || 644 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 645 error = ENXIO; 646 goto done; 647 } 648 } 649 650 /* Prevent our unit from being unconfigured while open. */ 651 switch (fmt) { 652 case S_IFCHR: 653 cs->sc_dkdev.dk_copenmask |= pmask; 654 break; 655 656 case S_IFBLK: 657 cs->sc_dkdev.dk_bopenmask |= pmask; 658 break; 659 } 660 cs->sc_dkdev.dk_openmask = 661 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 662 663 done: 664 mutex_exit(&cs->sc_dvlock); 665 return (error); 666 } 667 668 /* ARGSUSED */ 669 static int 670 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 671 { 672 int unit = ccdunit(dev); 673 struct ccd_softc *cs; 674 int part; 675 676 #ifdef DEBUG 677 if (ccddebug & CCDB_FOLLOW) 678 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 679 #endif 680 681 if ((cs = ccdget(unit, 0)) == NULL) 682 return ENXIO; 683 684 mutex_enter(&cs->sc_dvlock); 685 686 part = DISKPART(dev); 687 688 /* ...that much closer to allowing unconfiguration... */ 689 switch (fmt) { 690 case S_IFCHR: 691 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 692 break; 693 694 case S_IFBLK: 695 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 696 break; 697 } 698 cs->sc_dkdev.dk_openmask = 699 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 700 701 if (cs->sc_dkdev.dk_openmask == 0) { 702 if ((cs->sc_flags & CCDF_KLABEL) == 0) 703 cs->sc_flags &= ~CCDF_VLABEL; 704 } 705 706 mutex_exit(&cs->sc_dvlock); 707 return (0); 708 } 709 710 static bool 711 ccdbackoff(struct ccd_softc *cs) 712 { 713 714 /* XXX Arbitrary, should be a uvm call. */ 715 return uvmexp.free < (uvmexp.freemin >> 1) && 716 disk_isbusy(&cs->sc_dkdev); 717 } 718 719 static void 720 ccdthread(void *cookie) 721 { 722 struct ccd_softc *cs; 723 724 cs = cookie; 725 726 #ifdef DEBUG 727 if (ccddebug & CCDB_FOLLOW) 728 printf("ccdthread: hello\n"); 729 #endif 730 731 mutex_enter(cs->sc_iolock); 732 while (__predict_true(!cs->sc_zap)) { 733 if (bufq_peek(cs->sc_bufq) == NULL) { 734 /* Nothing to do. */ 735 cv_wait(&cs->sc_push, cs->sc_iolock); 736 continue; 737 } 738 if (ccdbackoff(cs)) { 739 /* Wait for memory to become available. */ 740 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 741 continue; 742 } 743 #ifdef DEBUG 744 if (ccddebug & CCDB_FOLLOW) 745 printf("ccdthread: dispatching I/O\n"); 746 #endif 747 ccdstart(cs); 748 mutex_enter(cs->sc_iolock); 749 } 750 cs->sc_thread = NULL; 751 mutex_exit(cs->sc_iolock); 752 #ifdef DEBUG 753 if (ccddebug & CCDB_FOLLOW) 754 printf("ccdthread: goodbye\n"); 755 #endif 756 kthread_exit(0); 757 } 758 759 static void 760 ccdstrategy(struct buf *bp) 761 { 762 int unit = ccdunit(bp->b_dev); 763 struct ccd_softc *cs; 764 if ((cs = ccdget(unit, 0)) == NULL) 765 return; 766 767 /* Must be open or reading label. */ 768 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 769 (cs->sc_flags & CCDF_RLABEL) != 0); 770 771 mutex_enter(cs->sc_iolock); 772 /* Synchronize with device init/uninit. */ 773 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 774 mutex_exit(cs->sc_iolock); 775 #ifdef DEBUG 776 if (ccddebug & CCDB_FOLLOW) 777 printf("ccdstrategy: unit %d: not inited\n", unit); 778 #endif 779 bp->b_error = ENXIO; 780 bp->b_resid = bp->b_bcount; 781 biodone(bp); 782 return; 783 } 784 785 /* Defer to thread if system is low on memory. */ 786 bufq_put(cs->sc_bufq, bp); 787 if (__predict_false(ccdbackoff(cs))) { 788 mutex_exit(cs->sc_iolock); 789 #ifdef DEBUG 790 if (ccddebug & CCDB_FOLLOW) 791 printf("ccdstrategy: holding off on I/O\n"); 792 #endif 793 return; 794 } 795 ccdstart(cs); 796 } 797 798 static void 799 ccdstart(struct ccd_softc *cs) 800 { 801 daddr_t blkno; 802 int wlabel; 803 struct disklabel *lp; 804 long bcount, rcount; 805 struct ccdbuf *cbp; 806 char *addr; 807 daddr_t bn; 808 vnode_t *vp; 809 buf_t *bp; 810 811 KASSERT(mutex_owned(cs->sc_iolock)); 812 813 bp = bufq_get(cs->sc_bufq); 814 KASSERT(bp != NULL); 815 816 disk_busy(&cs->sc_dkdev); 817 818 #ifdef DEBUG 819 if (ccddebug & CCDB_FOLLOW) 820 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 821 #endif 822 823 /* If it's a nil transfer, wake up the top half now. */ 824 if (bp->b_bcount == 0) 825 goto done; 826 827 lp = cs->sc_dkdev.dk_label; 828 829 /* 830 * Do bounds checking and adjust transfer. If there's an 831 * error, the bounds check will flag that for us. Convert 832 * the partition relative block number to an absolute. 833 */ 834 blkno = bp->b_blkno; 835 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 836 if (DISKPART(bp->b_dev) != RAW_PART) { 837 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 838 goto done; 839 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 840 } 841 mutex_exit(cs->sc_iolock); 842 bp->b_rawblkno = blkno; 843 844 /* Allocate the component buffers and start I/O! */ 845 bp->b_resid = bp->b_bcount; 846 bn = bp->b_rawblkno; 847 addr = bp->b_data; 848 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 849 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 850 rcount = cbp->cb_buf.b_bcount; 851 bn += btodb(rcount); 852 addr += rcount; 853 vp = cbp->cb_buf.b_vp; 854 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 855 mutex_enter(vp->v_interlock); 856 vp->v_numoutput++; 857 mutex_exit(vp->v_interlock); 858 } 859 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 860 } 861 return; 862 863 done: 864 disk_unbusy(&cs->sc_dkdev, 0, 0); 865 cv_broadcast(&cs->sc_stop); 866 cv_broadcast(&cs->sc_push); 867 mutex_exit(cs->sc_iolock); 868 bp->b_resid = bp->b_bcount; 869 biodone(bp); 870 } 871 872 /* 873 * Build a component buffer header. 874 */ 875 static struct ccdbuf * 876 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 877 long bcount) 878 { 879 struct ccdcinfo *ci; 880 struct ccdbuf *cbp; 881 daddr_t cbn, cboff; 882 u_int64_t cbc; 883 int ccdisk; 884 885 #ifdef DEBUG 886 if (ccddebug & CCDB_IO) 887 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 888 cs, bp, bn, addr, bcount); 889 #endif 890 /* 891 * Determine which component bn falls in. 892 */ 893 cbn = bn; 894 cboff = 0; 895 896 /* 897 * Serially concatenated 898 */ 899 if (cs->sc_ileave == 0) { 900 daddr_t sblk; 901 902 sblk = 0; 903 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 904 cbn >= sblk + ci->ci_size; 905 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 906 sblk += ci->ci_size; 907 cbn -= sblk; 908 } 909 /* 910 * Interleaved 911 */ 912 else { 913 struct ccdiinfo *ii; 914 int off; 915 916 cboff = cbn % cs->sc_ileave; 917 cbn /= cs->sc_ileave; 918 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 919 if (ii->ii_startblk > cbn) 920 break; 921 ii--; 922 off = cbn - ii->ii_startblk; 923 if (ii->ii_ndisk == 1) { 924 ccdisk = ii->ii_index[0]; 925 cbn = ii->ii_startoff + off; 926 } else { 927 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 928 cbn = ii->ii_startoff + off / ii->ii_ndisk; 929 } 930 cbn *= cs->sc_ileave; 931 ci = &cs->sc_cinfo[ccdisk]; 932 } 933 934 /* 935 * Fill in the component buf structure. 936 */ 937 cbp = CCD_GETBUF(); 938 KASSERT(cbp != NULL); 939 buf_init(&cbp->cb_buf); 940 cbp->cb_buf.b_flags = bp->b_flags; 941 cbp->cb_buf.b_oflags = bp->b_oflags; 942 cbp->cb_buf.b_cflags = bp->b_cflags; 943 cbp->cb_buf.b_iodone = ccdiodone; 944 cbp->cb_buf.b_proc = bp->b_proc; 945 cbp->cb_buf.b_dev = ci->ci_dev; 946 cbp->cb_buf.b_blkno = cbn + cboff; 947 cbp->cb_buf.b_data = addr; 948 cbp->cb_buf.b_vp = ci->ci_vp; 949 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 950 if (cs->sc_ileave == 0) 951 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 952 else 953 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 954 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 955 956 /* 957 * context for ccdiodone 958 */ 959 cbp->cb_obp = bp; 960 cbp->cb_sc = cs; 961 cbp->cb_comp = ccdisk; 962 963 BIO_COPYPRIO(&cbp->cb_buf, bp); 964 965 #ifdef DEBUG 966 if (ccddebug & CCDB_IO) 967 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 968 " bcnt %d\n", 969 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 970 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 971 cbp->cb_buf.b_bcount); 972 #endif 973 974 return (cbp); 975 } 976 977 /* 978 * Called at interrupt time. 979 * Mark the component as done and if all components are done, 980 * take a ccd interrupt. 981 */ 982 static void 983 ccdiodone(struct buf *vbp) 984 { 985 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 986 struct buf *bp = cbp->cb_obp; 987 struct ccd_softc *cs = cbp->cb_sc; 988 int count; 989 990 #ifdef DEBUG 991 if (ccddebug & CCDB_FOLLOW) 992 printf("ccdiodone(%p)\n", cbp); 993 if (ccddebug & CCDB_IO) { 994 printf("ccdiodone: bp %p bcount %d resid %d\n", 995 bp, bp->b_bcount, bp->b_resid); 996 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 997 " bcnt %d\n", 998 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 999 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 1000 cbp->cb_buf.b_bcount); 1001 } 1002 #endif 1003 1004 if (cbp->cb_buf.b_error != 0) { 1005 bp->b_error = cbp->cb_buf.b_error; 1006 printf("%s: error %d on component %d\n", 1007 cs->sc_xname, bp->b_error, cbp->cb_comp); 1008 } 1009 count = cbp->cb_buf.b_bcount; 1010 buf_destroy(&cbp->cb_buf); 1011 CCD_PUTBUF(cbp); 1012 1013 /* 1014 * If all done, "interrupt". 1015 */ 1016 mutex_enter(cs->sc_iolock); 1017 bp->b_resid -= count; 1018 if (bp->b_resid < 0) 1019 panic("ccdiodone: count"); 1020 if (bp->b_resid == 0) { 1021 /* 1022 * Request is done for better or worse, wakeup the top half. 1023 */ 1024 if (bp->b_error != 0) 1025 bp->b_resid = bp->b_bcount; 1026 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 1027 (bp->b_flags & B_READ)); 1028 if (!disk_isbusy(&cs->sc_dkdev)) { 1029 if (bufq_peek(cs->sc_bufq) != NULL) { 1030 cv_broadcast(&cs->sc_push); 1031 } 1032 cv_broadcast(&cs->sc_stop); 1033 } 1034 mutex_exit(cs->sc_iolock); 1035 biodone(bp); 1036 } else 1037 mutex_exit(cs->sc_iolock); 1038 } 1039 1040 /* ARGSUSED */ 1041 static int 1042 ccdread(dev_t dev, struct uio *uio, int flags) 1043 { 1044 int unit = ccdunit(dev); 1045 struct ccd_softc *cs; 1046 1047 #ifdef DEBUG 1048 if (ccddebug & CCDB_FOLLOW) 1049 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1050 #endif 1051 if ((cs = ccdget(unit, 0)) == NULL) 1052 return 0; 1053 1054 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1055 if ((cs->sc_flags & CCDF_INITED) == 0) 1056 return (ENXIO); 1057 1058 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1059 } 1060 1061 /* ARGSUSED */ 1062 static int 1063 ccdwrite(dev_t dev, struct uio *uio, int flags) 1064 { 1065 int unit = ccdunit(dev); 1066 struct ccd_softc *cs; 1067 1068 #ifdef DEBUG 1069 if (ccddebug & CCDB_FOLLOW) 1070 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1071 #endif 1072 if ((cs = ccdget(unit, 0)) == NULL) 1073 return ENOENT; 1074 1075 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1076 if ((cs->sc_flags & CCDF_INITED) == 0) 1077 return (ENXIO); 1078 1079 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1080 } 1081 1082 static int 1083 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1084 { 1085 int unit = ccdunit(dev); 1086 int i, j, lookedup = 0, error = 0; 1087 int part, pmask, make; 1088 struct ccd_softc *cs; 1089 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1090 kauth_cred_t uc; 1091 char **cpp; 1092 struct pathbuf *pb; 1093 struct vnode **vpp; 1094 #ifdef __HAVE_OLD_DISKLABEL 1095 struct disklabel newlabel; 1096 #endif 1097 1098 switch (cmd) { 1099 #if defined(COMPAT_60) && !defined(_LP64) 1100 case CCDIOCSET_60: 1101 #endif 1102 case CCDIOCSET: 1103 make = 1; 1104 break; 1105 default: 1106 make = 0; 1107 break; 1108 } 1109 1110 if ((cs = ccdget(unit, make)) == NULL) 1111 return ENOENT; 1112 uc = kauth_cred_get(); 1113 1114 /* 1115 * Compat code must not be called if on a platform where 1116 * sizeof (size_t) == sizeof (uint64_t) as CCDIOCSET will 1117 * be the same as CCDIOCSET_60 1118 */ 1119 #if defined(COMPAT_60) && !defined(_LP64) 1120 switch (cmd) { 1121 case CCDIOCSET_60: { 1122 struct ccd_ioctl ccionew; 1123 struct ccd_ioctl_60 *ccio60 = 1124 (struct ccd_ioctl_60 *)data; 1125 ccionew.ccio_disks = ccio->ccio_disks; 1126 ccionew.ccio_ndisks = ccio->ccio_ndisks; 1127 ccionew.ccio_ileave = ccio->ccio_ileave; 1128 ccionew.ccio_flags = ccio->ccio_flags; 1129 ccionew.ccio_unit = ccio->ccio_unit; 1130 error = ccdioctl(dev, CCDIOCSET, &ccionew, flag, l); 1131 if (!error) { 1132 /* Copy data back, adjust types if necessary */ 1133 ccio60->ccio_disks = ccionew.ccio_disks; 1134 ccio60->ccio_ndisks = ccionew.ccio_ndisks; 1135 ccio60->ccio_ileave = ccionew.ccio_ileave; 1136 ccio60->ccio_flags = ccionew.ccio_flags; 1137 ccio60->ccio_unit = ccionew.ccio_unit; 1138 ccio60->ccio_size = (size_t)ccionew.ccio_size; 1139 } 1140 return error; 1141 } 1142 break; 1143 1144 case CCDIOCCLR_60: 1145 /* 1146 * ccio_size member not used, so existing struct OK 1147 * drop through to existing non-compat version 1148 */ 1149 cmd = CCDIOCCLR; 1150 break; 1151 } 1152 #endif /* COMPAT_60 && !_LP64*/ 1153 1154 /* Must be open for writes for these commands... */ 1155 switch (cmd) { 1156 case CCDIOCSET: 1157 case CCDIOCCLR: 1158 case DIOCSDINFO: 1159 case DIOCWDINFO: 1160 case DIOCCACHESYNC: 1161 case DIOCAWEDGE: 1162 case DIOCDWEDGE: 1163 case DIOCMWEDGES: 1164 #ifdef __HAVE_OLD_DISKLABEL 1165 case ODIOCSDINFO: 1166 case ODIOCWDINFO: 1167 #endif 1168 case DIOCKLABEL: 1169 case DIOCWLABEL: 1170 if ((flag & FWRITE) == 0) 1171 return (EBADF); 1172 } 1173 1174 mutex_enter(&cs->sc_dvlock); 1175 1176 /* Must be initialized for these... */ 1177 switch (cmd) { 1178 case CCDIOCCLR: 1179 case DIOCGDINFO: 1180 case DIOCGSTRATEGY: 1181 case DIOCGCACHE: 1182 case DIOCCACHESYNC: 1183 case DIOCAWEDGE: 1184 case DIOCDWEDGE: 1185 case DIOCLWEDGES: 1186 case DIOCMWEDGES: 1187 case DIOCSDINFO: 1188 case DIOCWDINFO: 1189 case DIOCGPARTINFO: 1190 case DIOCWLABEL: 1191 case DIOCKLABEL: 1192 case DIOCGDEFLABEL: 1193 #ifdef __HAVE_OLD_DISKLABEL 1194 case ODIOCGDINFO: 1195 case ODIOCSDINFO: 1196 case ODIOCWDINFO: 1197 case ODIOCGDEFLABEL: 1198 #endif 1199 if ((cs->sc_flags & CCDF_INITED) == 0) { 1200 error = ENXIO; 1201 goto out; 1202 } 1203 } 1204 1205 error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l); 1206 if (error != EPASSTHROUGH) 1207 goto out; 1208 1209 error = 0; 1210 switch (cmd) { 1211 case CCDIOCSET: 1212 if (cs->sc_flags & CCDF_INITED) { 1213 error = EBUSY; 1214 goto out; 1215 } 1216 1217 /* Validate the flags. */ 1218 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1219 error = EINVAL; 1220 goto out; 1221 } 1222 1223 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1224 ccio->ccio_ndisks == 0) { 1225 error = EINVAL; 1226 goto out; 1227 } 1228 1229 /* Fill in some important bits. */ 1230 cs->sc_ileave = ccio->ccio_ileave; 1231 cs->sc_nccdisks = ccio->ccio_ndisks; 1232 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1233 1234 /* 1235 * Allocate space for and copy in the array of 1236 * component pathnames and device numbers. 1237 */ 1238 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1239 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1240 error = copyin(ccio->ccio_disks, cpp, 1241 ccio->ccio_ndisks * sizeof(*cpp)); 1242 if (error) { 1243 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1244 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1245 goto out; 1246 } 1247 1248 #ifdef DEBUG 1249 if (ccddebug & CCDB_INIT) 1250 for (i = 0; i < ccio->ccio_ndisks; ++i) 1251 printf("ccdioctl: component %d: %p\n", 1252 i, cpp[i]); 1253 #endif 1254 1255 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1256 #ifdef DEBUG 1257 if (ccddebug & CCDB_INIT) 1258 printf("ccdioctl: lookedup = %d\n", lookedup); 1259 #endif 1260 error = pathbuf_copyin(cpp[i], &pb); 1261 if (error == 0) { 1262 error = dk_lookup(pb, l, &vpp[i]); 1263 } 1264 pathbuf_destroy(pb); 1265 if (error != 0) { 1266 for (j = 0; j < lookedup; ++j) 1267 (void)vn_close(vpp[j], FREAD|FWRITE, 1268 uc); 1269 kmem_free(vpp, ccio->ccio_ndisks * 1270 sizeof(*vpp)); 1271 kmem_free(cpp, ccio->ccio_ndisks * 1272 sizeof(*cpp)); 1273 goto out; 1274 } 1275 ++lookedup; 1276 } 1277 1278 /* Attach the disk. */ 1279 disk_attach(&cs->sc_dkdev); 1280 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1281 1282 /* 1283 * Initialize the ccd. Fills in the softc for us. 1284 */ 1285 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1286 for (j = 0; j < lookedup; ++j) 1287 (void)vn_close(vpp[j], FREAD|FWRITE, 1288 uc); 1289 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1290 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1291 disk_detach(&cs->sc_dkdev); 1292 mutex_exit(&cs->sc_dvlock); 1293 bufq_free(cs->sc_bufq); 1294 return error; 1295 } 1296 1297 /* We can free the temporary variables now. */ 1298 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1299 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1300 1301 /* 1302 * The ccd has been successfully initialized, so 1303 * we can place it into the array. Don't try to 1304 * read the disklabel until the disk has been attached, 1305 * because space for the disklabel is allocated 1306 * in disk_attach(); 1307 */ 1308 ccio->ccio_unit = unit; 1309 ccio->ccio_size = cs->sc_size; 1310 1311 /* Try and read the disklabel. */ 1312 ccdgetdisklabel(dev); 1313 disk_set_info(NULL, &cs->sc_dkdev, NULL); 1314 1315 /* discover wedges */ 1316 mutex_exit(&cs->sc_dvlock); 1317 dkwedge_discover(&cs->sc_dkdev); 1318 return 0; 1319 1320 case CCDIOCCLR: 1321 /* 1322 * Don't unconfigure if any other partitions are open 1323 * or if both the character and block flavors of this 1324 * partition are open. 1325 */ 1326 part = DISKPART(dev); 1327 pmask = (1 << part); 1328 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1329 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1330 (cs->sc_dkdev.dk_copenmask & pmask))) { 1331 error = EBUSY; 1332 goto out; 1333 } 1334 1335 /* Delete all of our wedges. */ 1336 dkwedge_delall(&cs->sc_dkdev); 1337 1338 /* Stop new I/O, wait for in-flight I/O to complete. */ 1339 mutex_enter(cs->sc_iolock); 1340 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1341 cs->sc_zap = true; 1342 while (disk_isbusy(&cs->sc_dkdev) || 1343 bufq_peek(cs->sc_bufq) != NULL || 1344 cs->sc_thread != NULL) { 1345 cv_broadcast(&cs->sc_push); 1346 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1347 } 1348 mutex_exit(cs->sc_iolock); 1349 1350 /* 1351 * Free ccd_softc information and clear entry. 1352 */ 1353 1354 /* Close the components and free their pathnames. */ 1355 for (i = 0; i < cs->sc_nccdisks; ++i) { 1356 /* 1357 * XXX: this close could potentially fail and 1358 * cause Bad Things. Maybe we need to force 1359 * the close to happen? 1360 */ 1361 #ifdef DEBUG 1362 if (ccddebug & CCDB_VNODE) 1363 vprint("CCDIOCCLR: vnode info", 1364 cs->sc_cinfo[i].ci_vp); 1365 #endif 1366 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1367 uc); 1368 kmem_free(cs->sc_cinfo[i].ci_path, 1369 cs->sc_cinfo[i].ci_pathlen); 1370 } 1371 1372 /* Free interleave index. */ 1373 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1374 kmem_free(cs->sc_itable[i].ii_index, 1375 cs->sc_itable[i].ii_indexsz); 1376 } 1377 1378 /* Free component info and interleave table. */ 1379 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1380 sizeof(struct ccdcinfo)); 1381 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1382 sizeof(struct ccdiinfo)); 1383 1384 aprint_normal("%s: detached\n", cs->sc_xname); 1385 1386 /* Detach the disk. */ 1387 disk_detach(&cs->sc_dkdev); 1388 bufq_free(cs->sc_bufq); 1389 ccdput(cs); 1390 /* Don't break, otherwise cs is read again. */ 1391 return 0; 1392 1393 case DIOCGSTRATEGY: 1394 { 1395 struct disk_strategy *dks = (void *)data; 1396 1397 mutex_enter(cs->sc_iolock); 1398 if (cs->sc_bufq != NULL) 1399 strlcpy(dks->dks_name, 1400 bufq_getstrategyname(cs->sc_bufq), 1401 sizeof(dks->dks_name)); 1402 else 1403 error = EINVAL; 1404 mutex_exit(cs->sc_iolock); 1405 dks->dks_paramlen = 0; 1406 break; 1407 } 1408 1409 case DIOCGCACHE: 1410 { 1411 int dkcache = 0; 1412 1413 /* 1414 * We pass this call down to all components and report 1415 * intersection of the flags returned by the components. 1416 * If any errors out, we return error. CCD components 1417 * can not change unless the device is unconfigured, so 1418 * device feature flags will remain static. RCE/WCE can change 1419 * of course, if set directly on underlying device. 1420 */ 1421 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1422 error = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, &j, 1423 flag, uc); 1424 if (error) 1425 break; 1426 1427 if (i == 0) 1428 dkcache = j; 1429 else 1430 dkcache = DKCACHE_COMBINE(dkcache, j); 1431 } 1432 1433 *((int *)data) = dkcache; 1434 break; 1435 } 1436 1437 case DIOCCACHESYNC: 1438 /* 1439 * We pass this call down to all components and report 1440 * the first error we encounter. 1441 */ 1442 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1443 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1444 flag, uc); 1445 if (j != 0 && error == 0) 1446 error = j; 1447 } 1448 break; 1449 1450 case DIOCWDINFO: 1451 case DIOCSDINFO: 1452 #ifdef __HAVE_OLD_DISKLABEL 1453 case ODIOCWDINFO: 1454 case ODIOCSDINFO: 1455 #endif 1456 { 1457 struct disklabel *lp; 1458 #ifdef __HAVE_OLD_DISKLABEL 1459 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1460 memset(&newlabel, 0, sizeof newlabel); 1461 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1462 lp = &newlabel; 1463 } else 1464 #endif 1465 lp = (struct disklabel *)data; 1466 1467 cs->sc_flags |= CCDF_LABELLING; 1468 1469 error = setdisklabel(cs->sc_dkdev.dk_label, 1470 lp, 0, cs->sc_dkdev.dk_cpulabel); 1471 if (error == 0) { 1472 if (cmd == DIOCWDINFO 1473 #ifdef __HAVE_OLD_DISKLABEL 1474 || cmd == ODIOCWDINFO 1475 #endif 1476 ) 1477 error = writedisklabel(CCDLABELDEV(dev), 1478 ccdstrategy, cs->sc_dkdev.dk_label, 1479 cs->sc_dkdev.dk_cpulabel); 1480 } 1481 1482 cs->sc_flags &= ~CCDF_LABELLING; 1483 break; 1484 } 1485 1486 case DIOCKLABEL: 1487 if (*(int *)data != 0) 1488 cs->sc_flags |= CCDF_KLABEL; 1489 else 1490 cs->sc_flags &= ~CCDF_KLABEL; 1491 break; 1492 1493 case DIOCWLABEL: 1494 if (*(int *)data != 0) 1495 cs->sc_flags |= CCDF_WLABEL; 1496 else 1497 cs->sc_flags &= ~CCDF_WLABEL; 1498 break; 1499 1500 case DIOCGDEFLABEL: 1501 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1502 break; 1503 1504 #ifdef __HAVE_OLD_DISKLABEL 1505 case ODIOCGDEFLABEL: 1506 ccdgetdefaultlabel(cs, &newlabel); 1507 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1508 return ENOTTY; 1509 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1510 break; 1511 #endif 1512 1513 default: 1514 error = ENOTTY; 1515 } 1516 1517 out: 1518 mutex_exit(&cs->sc_dvlock); 1519 return (error); 1520 } 1521 1522 static int 1523 ccdsize(dev_t dev) 1524 { 1525 struct ccd_softc *cs; 1526 struct disklabel *lp; 1527 int part, unit, omask, size; 1528 1529 unit = ccdunit(dev); 1530 if ((cs = ccdget(unit, 0)) == NULL) 1531 return -1; 1532 1533 if ((cs->sc_flags & CCDF_INITED) == 0) 1534 return (-1); 1535 1536 part = DISKPART(dev); 1537 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1538 lp = cs->sc_dkdev.dk_label; 1539 1540 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1541 return (-1); 1542 1543 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1544 size = -1; 1545 else 1546 size = lp->d_partitions[part].p_size * 1547 (lp->d_secsize / DEV_BSIZE); 1548 1549 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1550 return (-1); 1551 1552 return (size); 1553 } 1554 1555 static void 1556 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1557 { 1558 struct ccdgeom *ccg = &cs->sc_geom; 1559 1560 memset(lp, 0, sizeof(*lp)); 1561 1562 if (cs->sc_size > UINT32_MAX) 1563 lp->d_secperunit = UINT32_MAX; 1564 else 1565 lp->d_secperunit = cs->sc_size; 1566 lp->d_secsize = ccg->ccg_secsize; 1567 lp->d_nsectors = ccg->ccg_nsectors; 1568 lp->d_ntracks = ccg->ccg_ntracks; 1569 lp->d_ncylinders = ccg->ccg_ncylinders; 1570 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1571 1572 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1573 lp->d_type = DKTYPE_CCD; 1574 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1575 lp->d_rpm = 3600; 1576 lp->d_interleave = 1; 1577 lp->d_flags = 0; 1578 1579 lp->d_partitions[RAW_PART].p_offset = 0; 1580 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit; 1581 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1582 lp->d_npartitions = RAW_PART + 1; 1583 1584 lp->d_magic = DISKMAGIC; 1585 lp->d_magic2 = DISKMAGIC; 1586 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1587 } 1588 1589 /* 1590 * Read the disklabel from the ccd. If one is not present, fake one 1591 * up. 1592 */ 1593 static void 1594 ccdgetdisklabel(dev_t dev) 1595 { 1596 int unit = ccdunit(dev); 1597 struct ccd_softc *cs; 1598 const char *errstring; 1599 struct disklabel *lp; 1600 struct cpu_disklabel *clp; 1601 1602 if ((cs = ccdget(unit, 0)) == NULL) 1603 return; 1604 lp = cs->sc_dkdev.dk_label; 1605 clp = cs->sc_dkdev.dk_cpulabel; 1606 KASSERT(mutex_owned(&cs->sc_dvlock)); 1607 1608 memset(clp, 0, sizeof(*clp)); 1609 1610 ccdgetdefaultlabel(cs, lp); 1611 1612 /* 1613 * Call the generic disklabel extraction routine. 1614 */ 1615 cs->sc_flags |= CCDF_RLABEL; 1616 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1617 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1618 else 1619 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1620 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1621 if (errstring) 1622 ccdmakedisklabel(cs); 1623 else { 1624 int i; 1625 struct partition *pp; 1626 1627 /* 1628 * Sanity check whether the found disklabel is valid. 1629 * 1630 * This is necessary since total size of ccd may vary 1631 * when an interleave is changed even though exactly 1632 * same componets are used, and old disklabel may used 1633 * if that is found. 1634 */ 1635 if (lp->d_secperunit < UINT32_MAX ? 1636 lp->d_secperunit != cs->sc_size : 1637 lp->d_secperunit > cs->sc_size) 1638 printf("WARNING: %s: " 1639 "total sector size in disklabel (%ju) != " 1640 "the size of ccd (%ju)\n", cs->sc_xname, 1641 (uintmax_t)lp->d_secperunit, 1642 (uintmax_t)cs->sc_size); 1643 for (i = 0; i < lp->d_npartitions; i++) { 1644 pp = &lp->d_partitions[i]; 1645 if (pp->p_offset + pp->p_size > cs->sc_size) 1646 printf("WARNING: %s: end of partition `%c' " 1647 "exceeds the size of ccd (%ju)\n", 1648 cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size); 1649 } 1650 } 1651 1652 #ifdef DEBUG 1653 /* It's actually extremely common to have unlabeled ccds. */ 1654 if (ccddebug & CCDB_LABEL) 1655 if (errstring != NULL) 1656 printf("%s: %s\n", cs->sc_xname, errstring); 1657 #endif 1658 1659 /* In-core label now valid. */ 1660 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1661 } 1662 1663 /* 1664 * Take care of things one might want to take care of in the event 1665 * that a disklabel isn't present. 1666 */ 1667 static void 1668 ccdmakedisklabel(struct ccd_softc *cs) 1669 { 1670 struct disklabel *lp = cs->sc_dkdev.dk_label; 1671 1672 /* 1673 * For historical reasons, if there's no disklabel present 1674 * the raw partition must be marked FS_BSDFFS. 1675 */ 1676 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1677 1678 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1679 1680 lp->d_checksum = dkcksum(lp); 1681 } 1682 1683 #ifdef DEBUG 1684 static void 1685 printiinfo(struct ccdiinfo *ii) 1686 { 1687 int ix, i; 1688 1689 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1690 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1691 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1692 for (i = 0; i < ii->ii_ndisk; i++) 1693 printf(" %d", ii->ii_index[i]); 1694 printf("\n"); 1695 } 1696 } 1697 #endif 1698 1699 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr"); 1700 1701 static int 1702 ccd_modcmd(modcmd_t cmd, void *arg) 1703 { 1704 int error = 0; 1705 #ifdef _MODULE 1706 int bmajor = -1, cmajor = -1; 1707 #endif 1708 1709 1710 switch (cmd) { 1711 case MODULE_CMD_INIT: 1712 #ifdef _MODULE 1713 ccdattach(0); 1714 1715 error = devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1716 &ccd_cdevsw, &cmajor); 1717 sysctl_kern_ccd_setup(&ccd_clog); 1718 #endif 1719 break; 1720 1721 case MODULE_CMD_FINI: 1722 #ifdef _MODULE 1723 mutex_enter(&ccd_lock); 1724 if (ccd_nactive) { 1725 mutex_exit(&ccd_lock); 1726 error = EBUSY; 1727 } else { 1728 mutex_exit(&ccd_lock); 1729 error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1730 ccddetach(); 1731 } 1732 sysctl_teardown(&ccd_clog); 1733 #endif 1734 break; 1735 1736 case MODULE_CMD_STAT: 1737 return ENOTTY; 1738 1739 default: 1740 return ENOTTY; 1741 } 1742 1743 return error; 1744 } 1745 1746 static int 1747 ccd_units_sysctl(SYSCTLFN_ARGS) 1748 { 1749 struct sysctlnode node; 1750 struct ccd_softc *sc; 1751 int error, i, nccd, *units; 1752 size_t size; 1753 1754 nccd = 0; 1755 mutex_enter(&ccd_lock); 1756 LIST_FOREACH(sc, &ccds, sc_link) 1757 nccd++; 1758 mutex_exit(&ccd_lock); 1759 1760 if (nccd != 0) { 1761 size = nccd * sizeof(*units); 1762 units = kmem_zalloc(size, KM_SLEEP); 1763 i = 0; 1764 mutex_enter(&ccd_lock); 1765 LIST_FOREACH(sc, &ccds, sc_link) { 1766 if (i >= nccd) 1767 break; 1768 units[i] = sc->sc_unit; 1769 } 1770 mutex_exit(&ccd_lock); 1771 } else { 1772 units = NULL; 1773 size = 0; 1774 } 1775 1776 node = *rnode; 1777 node.sysctl_data = units; 1778 node.sysctl_size = size; 1779 1780 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1781 if (units) 1782 kmem_free(units, size); 1783 return error; 1784 } 1785 1786 static int 1787 ccd_info_sysctl(SYSCTLFN_ARGS) 1788 { 1789 struct sysctlnode node; 1790 struct ccddiskinfo ccd; 1791 struct ccd_softc *sc; 1792 int unit; 1793 1794 if (newp == NULL || newlen != sizeof(int)) 1795 return EINVAL; 1796 1797 unit = *(const int *)newp; 1798 newp = NULL; 1799 newlen = 0; 1800 ccd.ccd_ndisks = ~0; 1801 mutex_enter(&ccd_lock); 1802 LIST_FOREACH(sc, &ccds, sc_link) { 1803 if (sc->sc_unit == unit) { 1804 ccd.ccd_ileave = sc->sc_ileave; 1805 ccd.ccd_size = sc->sc_size; 1806 ccd.ccd_ndisks = sc->sc_nccdisks; 1807 ccd.ccd_flags = sc->sc_flags; 1808 break; 1809 } 1810 } 1811 mutex_exit(&ccd_lock); 1812 1813 if (ccd.ccd_ndisks == ~0) 1814 return ENOENT; 1815 1816 node = *rnode; 1817 node.sysctl_data = &ccd; 1818 node.sysctl_size = sizeof(ccd); 1819 1820 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1821 } 1822 1823 static int 1824 ccd_components_sysctl(SYSCTLFN_ARGS) 1825 { 1826 struct sysctlnode node; 1827 int error, unit; 1828 size_t size; 1829 char *names, *p, *ep; 1830 struct ccd_softc *sc; 1831 1832 if (newp == NULL || newlen != sizeof(int)) 1833 return EINVAL; 1834 1835 size = 0; 1836 unit = *(const int *)newp; 1837 newp = NULL; 1838 newlen = 0; 1839 mutex_enter(&ccd_lock); 1840 LIST_FOREACH(sc, &ccds, sc_link) 1841 if (sc->sc_unit == unit) { 1842 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1843 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1844 break; 1845 } 1846 mutex_exit(&ccd_lock); 1847 1848 if (size == 0) 1849 return ENOENT; 1850 names = kmem_zalloc(size, KM_SLEEP); 1851 p = names; 1852 ep = names + size; 1853 mutex_enter(&ccd_lock); 1854 LIST_FOREACH(sc, &ccds, sc_link) 1855 if (sc->sc_unit == unit) { 1856 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1857 char *d = sc->sc_cinfo[i].ci_path; 1858 while (p < ep && (*p++ = *d++) != '\0') 1859 continue; 1860 } 1861 break; 1862 } 1863 mutex_exit(&ccd_lock); 1864 1865 node = *rnode; 1866 node.sysctl_data = names; 1867 node.sysctl_size = ep - names; 1868 1869 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1870 kmem_free(names, size); 1871 return error; 1872 } 1873 1874 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1875 { 1876 const struct sysctlnode *node = NULL; 1877 1878 sysctl_createv(clog, 0, NULL, &node, 1879 CTLFLAG_PERMANENT, 1880 CTLTYPE_NODE, "ccd", 1881 SYSCTL_DESCR("ConCatenated Disk state"), 1882 NULL, 0, NULL, 0, 1883 CTL_KERN, CTL_CREATE, CTL_EOL); 1884 1885 if (node == NULL) 1886 return; 1887 1888 sysctl_createv(clog, 0, &node, NULL, 1889 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1890 CTLTYPE_STRUCT, "units", 1891 SYSCTL_DESCR("List of ccd unit numbers"), 1892 ccd_units_sysctl, 0, NULL, 0, 1893 CTL_CREATE, CTL_EOL); 1894 sysctl_createv(clog, 0, &node, NULL, 1895 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1896 CTLTYPE_STRUCT, "info", 1897 SYSCTL_DESCR("Information about a CCD unit"), 1898 ccd_info_sysctl, 0, NULL, 0, 1899 CTL_CREATE, CTL_EOL); 1900 sysctl_createv(clog, 0, &node, NULL, 1901 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1902 CTLTYPE_STRUCT, "components", 1903 SYSCTL_DESCR("Information about CCD components"), 1904 ccd_components_sysctl, 0, NULL, 0, 1905 CTL_CREATE, CTL_EOL); 1906 } 1907