1 /* $NetBSD: ccd.c,v 1.173 2017/12/19 03:24:09 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.173 2017/12/19 03:24:09 pgoyette Exp $"); 92 93 #if defined(_KERNEL_OPT) 94 #include "opt_compat_netbsd.h" 95 #endif 96 97 #include <sys/param.h> 98 #include <sys/systm.h> 99 #include <sys/kernel.h> 100 #include <sys/proc.h> 101 #include <sys/errno.h> 102 #include <sys/buf.h> 103 #include <sys/kmem.h> 104 #include <sys/pool.h> 105 #include <sys/module.h> 106 #include <sys/namei.h> 107 #include <sys/stat.h> 108 #include <sys/ioctl.h> 109 #include <sys/disklabel.h> 110 #include <sys/device.h> 111 #include <sys/disk.h> 112 #include <sys/syslog.h> 113 #include <sys/fcntl.h> 114 #include <sys/vnode.h> 115 #include <sys/conf.h> 116 #include <sys/mutex.h> 117 #include <sys/queue.h> 118 #include <sys/kauth.h> 119 #include <sys/kthread.h> 120 #include <sys/bufq.h> 121 #include <sys/sysctl.h> 122 123 #include <uvm/uvm_extern.h> 124 125 #include <dev/ccdvar.h> 126 #include <dev/dkvar.h> 127 128 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 129 130 #include "ioconf.h" 131 132 #if defined(CCDDEBUG) && !defined(DEBUG) 133 #define DEBUG 134 #endif 135 136 #ifdef DEBUG 137 #define CCDB_FOLLOW 0x01 138 #define CCDB_INIT 0x02 139 #define CCDB_IO 0x04 140 #define CCDB_LABEL 0x08 141 #define CCDB_VNODE 0x10 142 int ccddebug = 0x00; 143 #endif 144 145 #define ccdunit(x) DISKUNIT(x) 146 147 struct ccdbuf { 148 struct buf cb_buf; /* new I/O buf */ 149 struct buf *cb_obp; /* ptr. to original I/O buf */ 150 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 151 int cb_comp; /* target component */ 152 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 153 }; 154 155 /* component buffer pool */ 156 static pool_cache_t ccd_cache; 157 158 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 159 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 160 161 #define CCDLABELDEV(dev) \ 162 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 163 164 /* called by main() at boot time */ 165 void ccddetach(void); 166 167 /* called by biodone() at interrupt time */ 168 static void ccdiodone(struct buf *); 169 170 static void ccdinterleave(struct ccd_softc *); 171 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 172 struct lwp *); 173 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 174 daddr_t, void *, long); 175 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 176 static void ccdgetdisklabel(dev_t); 177 static void ccdmakedisklabel(struct ccd_softc *); 178 static void ccdstart(struct ccd_softc *); 179 static void ccdthread(void *); 180 181 static dev_type_open(ccdopen); 182 static dev_type_close(ccdclose); 183 static dev_type_read(ccdread); 184 static dev_type_write(ccdwrite); 185 static dev_type_ioctl(ccdioctl); 186 static dev_type_strategy(ccdstrategy); 187 static dev_type_size(ccdsize); 188 189 const struct bdevsw ccd_bdevsw = { 190 .d_open = ccdopen, 191 .d_close = ccdclose, 192 .d_strategy = ccdstrategy, 193 .d_ioctl = ccdioctl, 194 .d_dump = nodump, 195 .d_psize = ccdsize, 196 .d_discard = nodiscard, 197 .d_flag = D_DISK | D_MPSAFE 198 }; 199 200 const struct cdevsw ccd_cdevsw = { 201 .d_open = ccdopen, 202 .d_close = ccdclose, 203 .d_read = ccdread, 204 .d_write = ccdwrite, 205 .d_ioctl = ccdioctl, 206 .d_stop = nostop, 207 .d_tty = notty, 208 .d_poll = nopoll, 209 .d_mmap = nommap, 210 .d_kqfilter = nokqfilter, 211 .d_discard = nodiscard, 212 .d_flag = D_DISK | D_MPSAFE 213 }; 214 215 #ifdef DEBUG 216 static void printiinfo(struct ccdiinfo *); 217 #endif 218 219 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 220 static kmutex_t ccd_lock; 221 222 #ifdef _MODULE 223 static struct sysctllog *ccd_clog; 224 #endif 225 226 SYSCTL_SETUP_PROTO(sysctl_kern_ccd_setup); 227 228 static struct ccd_softc * 229 ccdcreate(int unit) { 230 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 231 232 /* Initialize per-softc structures. */ 233 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 234 sc->sc_unit = unit; 235 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 236 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 237 cv_init(&sc->sc_stop, "ccdstop"); 238 cv_init(&sc->sc_push, "ccdthr"); 239 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 240 return sc; 241 } 242 243 static void 244 ccddestroy(struct ccd_softc *sc) { 245 mutex_obj_free(sc->sc_iolock); 246 mutex_exit(&sc->sc_dvlock); 247 mutex_destroy(&sc->sc_dvlock); 248 cv_destroy(&sc->sc_stop); 249 cv_destroy(&sc->sc_push); 250 disk_destroy(&sc->sc_dkdev); 251 kmem_free(sc, sizeof(*sc)); 252 } 253 254 static struct ccd_softc * 255 ccdget(int unit, int make) { 256 struct ccd_softc *sc; 257 if (unit < 0) { 258 #ifdef DIAGNOSTIC 259 panic("%s: unit %d!", __func__, unit); 260 #endif 261 return NULL; 262 } 263 mutex_enter(&ccd_lock); 264 LIST_FOREACH(sc, &ccds, sc_link) { 265 if (sc->sc_unit == unit) { 266 mutex_exit(&ccd_lock); 267 return sc; 268 } 269 } 270 mutex_exit(&ccd_lock); 271 if (!make) 272 return NULL; 273 if ((sc = ccdcreate(unit)) == NULL) 274 return NULL; 275 mutex_enter(&ccd_lock); 276 LIST_INSERT_HEAD(&ccds, sc, sc_link); 277 mutex_exit(&ccd_lock); 278 return sc; 279 } 280 281 static void 282 ccdput(struct ccd_softc *sc) { 283 mutex_enter(&ccd_lock); 284 LIST_REMOVE(sc, sc_link); 285 mutex_exit(&ccd_lock); 286 ccddestroy(sc); 287 } 288 289 /* 290 * Called by main() during pseudo-device attachment. All we need 291 * to do is allocate enough space for devices to be configured later. 292 */ 293 void 294 ccdattach(int num) 295 { 296 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 297 298 /* Initialize the component buffer pool. */ 299 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 300 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 301 } 302 303 void 304 ccddetach(void) 305 { 306 pool_cache_destroy(ccd_cache); 307 mutex_destroy(&ccd_lock); 308 } 309 310 static int 311 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 312 struct lwp *l) 313 { 314 struct ccdcinfo *ci = NULL; 315 int ix; 316 struct ccdgeom *ccg = &cs->sc_geom; 317 char *tmppath; 318 int error, path_alloced; 319 uint64_t psize, minsize; 320 unsigned secsize, maxsecsize; 321 struct disk_geom *dg; 322 323 #ifdef DEBUG 324 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 325 printf("%s: ccdinit\n", cs->sc_xname); 326 #endif 327 328 /* Allocate space for the component info. */ 329 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 330 KM_SLEEP); 331 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 332 333 cs->sc_size = 0; 334 335 /* 336 * Verify that each component piece exists and record 337 * relevant information about it. 338 */ 339 maxsecsize = 0; 340 minsize = 0; 341 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 342 ci = &cs->sc_cinfo[ix]; 343 ci->ci_vp = vpp[ix]; 344 345 /* 346 * Copy in the pathname of the component. 347 */ 348 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 349 error = copyinstr(cpaths[ix], tmppath, 350 MAXPATHLEN, &ci->ci_pathlen); 351 if (ci->ci_pathlen == 0) 352 error = EINVAL; 353 if (error) { 354 #ifdef DEBUG 355 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 356 printf("%s: can't copy path, error = %d\n", 357 cs->sc_xname, error); 358 #endif 359 goto out; 360 } 361 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 362 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 363 path_alloced++; 364 365 /* 366 * XXX: Cache the component's dev_t. 367 */ 368 ci->ci_dev = vpp[ix]->v_rdev; 369 370 /* 371 * Get partition information for the component. 372 */ 373 error = getdisksize(vpp[ix], &psize, &secsize); 374 if (error) { 375 #ifdef DEBUG 376 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 377 printf("%s: %s: disksize failed, error = %d\n", 378 cs->sc_xname, ci->ci_path, error); 379 #endif 380 goto out; 381 } 382 383 /* 384 * Calculate the size, truncating to an interleave 385 * boundary if necessary. 386 */ 387 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 388 if (cs->sc_ileave > 1) 389 psize -= psize % cs->sc_ileave; 390 391 if (psize == 0) { 392 #ifdef DEBUG 393 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 394 printf("%s: %s: size == 0\n", 395 cs->sc_xname, ci->ci_path); 396 #endif 397 error = ENODEV; 398 goto out; 399 } 400 401 if (minsize == 0 || psize < minsize) 402 minsize = psize; 403 ci->ci_size = psize; 404 cs->sc_size += psize; 405 } 406 407 /* 408 * Don't allow the interleave to be smaller than 409 * the biggest component sector. 410 */ 411 if ((cs->sc_ileave > 0) && 412 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 413 #ifdef DEBUG 414 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 415 printf("%s: interleave must be at least %d\n", 416 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 417 #endif 418 error = EINVAL; 419 goto out; 420 } 421 422 /* 423 * If uniform interleave is desired set all sizes to that of 424 * the smallest component. 425 */ 426 if (cs->sc_flags & CCDF_UNIFORM) { 427 for (ci = cs->sc_cinfo; 428 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 429 ci->ci_size = minsize; 430 431 cs->sc_size = cs->sc_nccdisks * minsize; 432 } 433 434 /* 435 * Construct the interleave table. 436 */ 437 ccdinterleave(cs); 438 439 /* 440 * Create pseudo-geometry based on 1MB cylinders. It's 441 * pretty close. 442 */ 443 ccg->ccg_secsize = DEV_BSIZE; 444 ccg->ccg_ntracks = 1; 445 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 446 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 447 448 dg = &cs->sc_dkdev.dk_geom; 449 memset(dg, 0, sizeof(*dg)); 450 dg->dg_secperunit = cs->sc_size; 451 dg->dg_secsize = ccg->ccg_secsize; 452 dg->dg_nsectors = ccg->ccg_nsectors; 453 dg->dg_ntracks = ccg->ccg_ntracks; 454 dg->dg_ncylinders = ccg->ccg_ncylinders; 455 456 if (cs->sc_ileave > 0) 457 aprint_normal("%s: Interleaving %d component%s " 458 "(%d block interleave)\n", cs->sc_xname, 459 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""), 460 cs->sc_ileave); 461 else 462 aprint_normal("%s: Concatenating %d component%s\n", 463 cs->sc_xname, 464 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : "")); 465 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 466 ci = &cs->sc_cinfo[ix]; 467 aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname, 468 ci->ci_path, (uintmax_t)ci->ci_size); 469 } 470 aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size); 471 472 /* 473 * Create thread to handle deferred I/O. 474 */ 475 cs->sc_zap = false; 476 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 477 cs, &cs->sc_thread, "%s", cs->sc_xname); 478 if (error) { 479 printf("ccdinit: can't create thread: %d\n", error); 480 goto out; 481 } 482 483 /* 484 * Only now that everything is set up can we enable the device. 485 */ 486 mutex_enter(cs->sc_iolock); 487 cs->sc_flags |= CCDF_INITED; 488 mutex_exit(cs->sc_iolock); 489 kmem_free(tmppath, MAXPATHLEN); 490 return (0); 491 492 out: 493 for (ix = 0; ix < path_alloced; ix++) { 494 kmem_free(cs->sc_cinfo[ix].ci_path, 495 cs->sc_cinfo[ix].ci_pathlen); 496 } 497 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 498 kmem_free(tmppath, MAXPATHLEN); 499 return (error); 500 } 501 502 static void 503 ccdinterleave(struct ccd_softc *cs) 504 { 505 struct ccdcinfo *ci, *smallci; 506 struct ccdiinfo *ii; 507 daddr_t bn, lbn; 508 int ix; 509 u_long size; 510 511 #ifdef DEBUG 512 if (ccddebug & CCDB_INIT) 513 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 514 #endif 515 /* 516 * Allocate an interleave table. 517 * Chances are this is too big, but we don't care. 518 */ 519 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 520 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 521 522 /* 523 * Trivial case: no interleave (actually interleave of disk size). 524 * Each table entry represents a single component in its entirety. 525 */ 526 if (cs->sc_ileave == 0) { 527 bn = 0; 528 ii = cs->sc_itable; 529 530 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 531 /* Allocate space for ii_index. */ 532 ii->ii_indexsz = sizeof(int); 533 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 534 ii->ii_ndisk = 1; 535 ii->ii_startblk = bn; 536 ii->ii_startoff = 0; 537 ii->ii_index[0] = ix; 538 bn += cs->sc_cinfo[ix].ci_size; 539 ii++; 540 } 541 ii->ii_ndisk = 0; 542 #ifdef DEBUG 543 if (ccddebug & CCDB_INIT) 544 printiinfo(cs->sc_itable); 545 #endif 546 return; 547 } 548 549 /* 550 * The following isn't fast or pretty; it doesn't have to be. 551 */ 552 size = 0; 553 bn = lbn = 0; 554 for (ii = cs->sc_itable; ; ii++) { 555 /* Allocate space for ii_index. */ 556 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 557 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 558 559 /* 560 * Locate the smallest of the remaining components 561 */ 562 smallci = NULL; 563 for (ci = cs->sc_cinfo; 564 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 565 if (ci->ci_size > size && 566 (smallci == NULL || 567 ci->ci_size < smallci->ci_size)) 568 smallci = ci; 569 570 /* 571 * Nobody left, all done 572 */ 573 if (smallci == NULL) { 574 ii->ii_ndisk = 0; 575 break; 576 } 577 578 /* 579 * Record starting logical block and component offset 580 */ 581 ii->ii_startblk = bn / cs->sc_ileave; 582 ii->ii_startoff = lbn; 583 584 /* 585 * Determine how many disks take part in this interleave 586 * and record their indices. 587 */ 588 ix = 0; 589 for (ci = cs->sc_cinfo; 590 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 591 if (ci->ci_size >= smallci->ci_size) 592 ii->ii_index[ix++] = ci - cs->sc_cinfo; 593 ii->ii_ndisk = ix; 594 bn += ix * (smallci->ci_size - size); 595 lbn = smallci->ci_size / cs->sc_ileave; 596 size = smallci->ci_size; 597 } 598 #ifdef DEBUG 599 if (ccddebug & CCDB_INIT) 600 printiinfo(cs->sc_itable); 601 #endif 602 } 603 604 /* ARGSUSED */ 605 static int 606 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 607 { 608 int unit = ccdunit(dev); 609 struct ccd_softc *cs; 610 struct disklabel *lp; 611 int error = 0, part, pmask; 612 613 #ifdef DEBUG 614 if (ccddebug & CCDB_FOLLOW) 615 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 616 #endif 617 if ((cs = ccdget(unit, 1)) == NULL) 618 return ENXIO; 619 620 mutex_enter(&cs->sc_dvlock); 621 622 lp = cs->sc_dkdev.dk_label; 623 624 part = DISKPART(dev); 625 pmask = (1 << part); 626 627 /* 628 * If we're initialized, check to see if there are any other 629 * open partitions. If not, then it's safe to update 630 * the in-core disklabel. Only read the disklabel if it is 631 * not already valid. 632 */ 633 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 634 cs->sc_dkdev.dk_openmask == 0) 635 ccdgetdisklabel(dev); 636 637 /* Check that the partition exists. */ 638 if (part != RAW_PART) { 639 if (((cs->sc_flags & CCDF_INITED) == 0) || 640 ((part >= lp->d_npartitions) || 641 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 642 error = ENXIO; 643 goto done; 644 } 645 } 646 647 /* Prevent our unit from being unconfigured while open. */ 648 switch (fmt) { 649 case S_IFCHR: 650 cs->sc_dkdev.dk_copenmask |= pmask; 651 break; 652 653 case S_IFBLK: 654 cs->sc_dkdev.dk_bopenmask |= pmask; 655 break; 656 } 657 cs->sc_dkdev.dk_openmask = 658 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 659 660 done: 661 mutex_exit(&cs->sc_dvlock); 662 return (error); 663 } 664 665 /* ARGSUSED */ 666 static int 667 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 668 { 669 int unit = ccdunit(dev); 670 struct ccd_softc *cs; 671 int part; 672 673 #ifdef DEBUG 674 if (ccddebug & CCDB_FOLLOW) 675 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 676 #endif 677 678 if ((cs = ccdget(unit, 0)) == NULL) 679 return ENXIO; 680 681 mutex_enter(&cs->sc_dvlock); 682 683 part = DISKPART(dev); 684 685 /* ...that much closer to allowing unconfiguration... */ 686 switch (fmt) { 687 case S_IFCHR: 688 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 689 break; 690 691 case S_IFBLK: 692 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 693 break; 694 } 695 cs->sc_dkdev.dk_openmask = 696 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 697 698 if (cs->sc_dkdev.dk_openmask == 0) { 699 if ((cs->sc_flags & CCDF_KLABEL) == 0) 700 cs->sc_flags &= ~CCDF_VLABEL; 701 } 702 703 mutex_exit(&cs->sc_dvlock); 704 return (0); 705 } 706 707 static bool 708 ccdbackoff(struct ccd_softc *cs) 709 { 710 711 /* XXX Arbitrary, should be a uvm call. */ 712 return uvmexp.free < (uvmexp.freemin >> 1) && 713 disk_isbusy(&cs->sc_dkdev); 714 } 715 716 static void 717 ccdthread(void *cookie) 718 { 719 struct ccd_softc *cs; 720 721 cs = cookie; 722 723 #ifdef DEBUG 724 if (ccddebug & CCDB_FOLLOW) 725 printf("ccdthread: hello\n"); 726 #endif 727 728 mutex_enter(cs->sc_iolock); 729 while (__predict_true(!cs->sc_zap)) { 730 if (bufq_peek(cs->sc_bufq) == NULL) { 731 /* Nothing to do. */ 732 cv_wait(&cs->sc_push, cs->sc_iolock); 733 continue; 734 } 735 if (ccdbackoff(cs)) { 736 /* Wait for memory to become available. */ 737 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 738 continue; 739 } 740 #ifdef DEBUG 741 if (ccddebug & CCDB_FOLLOW) 742 printf("ccdthread: dispatching I/O\n"); 743 #endif 744 ccdstart(cs); 745 mutex_enter(cs->sc_iolock); 746 } 747 cs->sc_thread = NULL; 748 mutex_exit(cs->sc_iolock); 749 #ifdef DEBUG 750 if (ccddebug & CCDB_FOLLOW) 751 printf("ccdthread: goodbye\n"); 752 #endif 753 kthread_exit(0); 754 } 755 756 static void 757 ccdstrategy(struct buf *bp) 758 { 759 int unit = ccdunit(bp->b_dev); 760 struct ccd_softc *cs; 761 if ((cs = ccdget(unit, 0)) == NULL) 762 return; 763 764 /* Must be open or reading label. */ 765 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 766 (cs->sc_flags & CCDF_RLABEL) != 0); 767 768 mutex_enter(cs->sc_iolock); 769 /* Synchronize with device init/uninit. */ 770 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 771 mutex_exit(cs->sc_iolock); 772 #ifdef DEBUG 773 if (ccddebug & CCDB_FOLLOW) 774 printf("ccdstrategy: unit %d: not inited\n", unit); 775 #endif 776 bp->b_error = ENXIO; 777 bp->b_resid = bp->b_bcount; 778 biodone(bp); 779 return; 780 } 781 782 /* Defer to thread if system is low on memory. */ 783 bufq_put(cs->sc_bufq, bp); 784 if (__predict_false(ccdbackoff(cs))) { 785 mutex_exit(cs->sc_iolock); 786 #ifdef DEBUG 787 if (ccddebug & CCDB_FOLLOW) 788 printf("ccdstrategy: holding off on I/O\n"); 789 #endif 790 return; 791 } 792 ccdstart(cs); 793 } 794 795 static void 796 ccdstart(struct ccd_softc *cs) 797 { 798 daddr_t blkno; 799 int wlabel; 800 struct disklabel *lp; 801 long bcount, rcount; 802 struct ccdbuf *cbp; 803 char *addr; 804 daddr_t bn; 805 vnode_t *vp; 806 buf_t *bp; 807 808 KASSERT(mutex_owned(cs->sc_iolock)); 809 810 bp = bufq_get(cs->sc_bufq); 811 KASSERT(bp != NULL); 812 813 disk_busy(&cs->sc_dkdev); 814 815 #ifdef DEBUG 816 if (ccddebug & CCDB_FOLLOW) 817 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 818 #endif 819 820 /* If it's a nil transfer, wake up the top half now. */ 821 if (bp->b_bcount == 0) 822 goto done; 823 824 lp = cs->sc_dkdev.dk_label; 825 826 /* 827 * Do bounds checking and adjust transfer. If there's an 828 * error, the bounds check will flag that for us. Convert 829 * the partition relative block number to an absolute. 830 */ 831 blkno = bp->b_blkno; 832 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 833 if (DISKPART(bp->b_dev) != RAW_PART) { 834 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 835 goto done; 836 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 837 } 838 mutex_exit(cs->sc_iolock); 839 bp->b_rawblkno = blkno; 840 841 /* Allocate the component buffers and start I/O! */ 842 bp->b_resid = bp->b_bcount; 843 bn = bp->b_rawblkno; 844 addr = bp->b_data; 845 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 846 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 847 rcount = cbp->cb_buf.b_bcount; 848 bn += btodb(rcount); 849 addr += rcount; 850 vp = cbp->cb_buf.b_vp; 851 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 852 mutex_enter(vp->v_interlock); 853 vp->v_numoutput++; 854 mutex_exit(vp->v_interlock); 855 } 856 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 857 } 858 return; 859 860 done: 861 disk_unbusy(&cs->sc_dkdev, 0, 0); 862 cv_broadcast(&cs->sc_stop); 863 cv_broadcast(&cs->sc_push); 864 mutex_exit(cs->sc_iolock); 865 bp->b_resid = bp->b_bcount; 866 biodone(bp); 867 } 868 869 /* 870 * Build a component buffer header. 871 */ 872 static struct ccdbuf * 873 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 874 long bcount) 875 { 876 struct ccdcinfo *ci; 877 struct ccdbuf *cbp; 878 daddr_t cbn, cboff; 879 u_int64_t cbc; 880 int ccdisk; 881 882 #ifdef DEBUG 883 if (ccddebug & CCDB_IO) 884 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 885 cs, bp, bn, addr, bcount); 886 #endif 887 /* 888 * Determine which component bn falls in. 889 */ 890 cbn = bn; 891 cboff = 0; 892 893 /* 894 * Serially concatenated 895 */ 896 if (cs->sc_ileave == 0) { 897 daddr_t sblk; 898 899 sblk = 0; 900 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 901 cbn >= sblk + ci->ci_size; 902 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 903 sblk += ci->ci_size; 904 cbn -= sblk; 905 } 906 /* 907 * Interleaved 908 */ 909 else { 910 struct ccdiinfo *ii; 911 int off; 912 913 cboff = cbn % cs->sc_ileave; 914 cbn /= cs->sc_ileave; 915 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 916 if (ii->ii_startblk > cbn) 917 break; 918 ii--; 919 off = cbn - ii->ii_startblk; 920 if (ii->ii_ndisk == 1) { 921 ccdisk = ii->ii_index[0]; 922 cbn = ii->ii_startoff + off; 923 } else { 924 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 925 cbn = ii->ii_startoff + off / ii->ii_ndisk; 926 } 927 cbn *= cs->sc_ileave; 928 ci = &cs->sc_cinfo[ccdisk]; 929 } 930 931 /* 932 * Fill in the component buf structure. 933 */ 934 cbp = CCD_GETBUF(); 935 KASSERT(cbp != NULL); 936 buf_init(&cbp->cb_buf); 937 cbp->cb_buf.b_flags = bp->b_flags; 938 cbp->cb_buf.b_oflags = bp->b_oflags; 939 cbp->cb_buf.b_cflags = bp->b_cflags; 940 cbp->cb_buf.b_iodone = ccdiodone; 941 cbp->cb_buf.b_proc = bp->b_proc; 942 cbp->cb_buf.b_dev = ci->ci_dev; 943 cbp->cb_buf.b_blkno = cbn + cboff; 944 cbp->cb_buf.b_data = addr; 945 cbp->cb_buf.b_vp = ci->ci_vp; 946 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 947 if (cs->sc_ileave == 0) 948 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 949 else 950 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 951 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 952 953 /* 954 * context for ccdiodone 955 */ 956 cbp->cb_obp = bp; 957 cbp->cb_sc = cs; 958 cbp->cb_comp = ccdisk; 959 960 BIO_COPYPRIO(&cbp->cb_buf, bp); 961 962 #ifdef DEBUG 963 if (ccddebug & CCDB_IO) 964 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 965 " bcnt %d\n", 966 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 967 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 968 cbp->cb_buf.b_bcount); 969 #endif 970 971 return (cbp); 972 } 973 974 /* 975 * Called at interrupt time. 976 * Mark the component as done and if all components are done, 977 * take a ccd interrupt. 978 */ 979 static void 980 ccdiodone(struct buf *vbp) 981 { 982 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 983 struct buf *bp = cbp->cb_obp; 984 struct ccd_softc *cs = cbp->cb_sc; 985 int count; 986 987 #ifdef DEBUG 988 if (ccddebug & CCDB_FOLLOW) 989 printf("ccdiodone(%p)\n", cbp); 990 if (ccddebug & CCDB_IO) { 991 printf("ccdiodone: bp %p bcount %d resid %d\n", 992 bp, bp->b_bcount, bp->b_resid); 993 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 994 " bcnt %d\n", 995 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 996 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 997 cbp->cb_buf.b_bcount); 998 } 999 #endif 1000 1001 if (cbp->cb_buf.b_error != 0) { 1002 bp->b_error = cbp->cb_buf.b_error; 1003 printf("%s: error %d on component %d\n", 1004 cs->sc_xname, bp->b_error, cbp->cb_comp); 1005 } 1006 count = cbp->cb_buf.b_bcount; 1007 buf_destroy(&cbp->cb_buf); 1008 CCD_PUTBUF(cbp); 1009 1010 /* 1011 * If all done, "interrupt". 1012 */ 1013 mutex_enter(cs->sc_iolock); 1014 bp->b_resid -= count; 1015 if (bp->b_resid < 0) 1016 panic("ccdiodone: count"); 1017 if (bp->b_resid == 0) { 1018 /* 1019 * Request is done for better or worse, wakeup the top half. 1020 */ 1021 if (bp->b_error != 0) 1022 bp->b_resid = bp->b_bcount; 1023 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 1024 (bp->b_flags & B_READ)); 1025 if (!disk_isbusy(&cs->sc_dkdev)) { 1026 if (bufq_peek(cs->sc_bufq) != NULL) { 1027 cv_broadcast(&cs->sc_push); 1028 } 1029 cv_broadcast(&cs->sc_stop); 1030 } 1031 mutex_exit(cs->sc_iolock); 1032 biodone(bp); 1033 } else 1034 mutex_exit(cs->sc_iolock); 1035 } 1036 1037 /* ARGSUSED */ 1038 static int 1039 ccdread(dev_t dev, struct uio *uio, int flags) 1040 { 1041 int unit = ccdunit(dev); 1042 struct ccd_softc *cs; 1043 1044 #ifdef DEBUG 1045 if (ccddebug & CCDB_FOLLOW) 1046 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1047 #endif 1048 if ((cs = ccdget(unit, 0)) == NULL) 1049 return 0; 1050 1051 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1052 if ((cs->sc_flags & CCDF_INITED) == 0) 1053 return (ENXIO); 1054 1055 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1056 } 1057 1058 /* ARGSUSED */ 1059 static int 1060 ccdwrite(dev_t dev, struct uio *uio, int flags) 1061 { 1062 int unit = ccdunit(dev); 1063 struct ccd_softc *cs; 1064 1065 #ifdef DEBUG 1066 if (ccddebug & CCDB_FOLLOW) 1067 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1068 #endif 1069 if ((cs = ccdget(unit, 0)) == NULL) 1070 return ENOENT; 1071 1072 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1073 if ((cs->sc_flags & CCDF_INITED) == 0) 1074 return (ENXIO); 1075 1076 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1077 } 1078 1079 static int 1080 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1081 { 1082 int unit = ccdunit(dev); 1083 int i, j, lookedup = 0, error = 0; 1084 int part, pmask, make; 1085 struct ccd_softc *cs; 1086 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1087 kauth_cred_t uc; 1088 char **cpp; 1089 struct pathbuf *pb; 1090 struct vnode **vpp; 1091 #ifdef __HAVE_OLD_DISKLABEL 1092 struct disklabel newlabel; 1093 #endif 1094 1095 switch (cmd) { 1096 #if defined(COMPAT_60) && !defined(_LP64) 1097 case CCDIOCSET_60: 1098 #endif 1099 case CCDIOCSET: 1100 make = 1; 1101 break; 1102 default: 1103 make = 0; 1104 break; 1105 } 1106 1107 if ((cs = ccdget(unit, make)) == NULL) 1108 return ENOENT; 1109 uc = kauth_cred_get(); 1110 1111 /* 1112 * Compat code must not be called if on a platform where 1113 * sizeof (size_t) == sizeof (uint64_t) as CCDIOCSET will 1114 * be the same as CCDIOCSET_60 1115 */ 1116 #if defined(COMPAT_60) && !defined(_LP64) 1117 switch (cmd) { 1118 case CCDIOCSET_60: { 1119 struct ccd_ioctl ccionew; 1120 struct ccd_ioctl_60 *ccio60 = 1121 (struct ccd_ioctl_60 *)data; 1122 ccionew.ccio_disks = ccio->ccio_disks; 1123 ccionew.ccio_ndisks = ccio->ccio_ndisks; 1124 ccionew.ccio_ileave = ccio->ccio_ileave; 1125 ccionew.ccio_flags = ccio->ccio_flags; 1126 ccionew.ccio_unit = ccio->ccio_unit; 1127 error = ccdioctl(dev, CCDIOCSET, &ccionew, flag, l); 1128 if (!error) { 1129 /* Copy data back, adjust types if necessary */ 1130 ccio60->ccio_disks = ccionew.ccio_disks; 1131 ccio60->ccio_ndisks = ccionew.ccio_ndisks; 1132 ccio60->ccio_ileave = ccionew.ccio_ileave; 1133 ccio60->ccio_flags = ccionew.ccio_flags; 1134 ccio60->ccio_unit = ccionew.ccio_unit; 1135 ccio60->ccio_size = (size_t)ccionew.ccio_size; 1136 } 1137 return error; 1138 } 1139 break; 1140 1141 case CCDIOCCLR_60: 1142 /* 1143 * ccio_size member not used, so existing struct OK 1144 * drop through to existing non-compat version 1145 */ 1146 cmd = CCDIOCCLR; 1147 break; 1148 } 1149 #endif /* COMPAT_60 && !_LP64*/ 1150 1151 /* Must be open for writes for these commands... */ 1152 switch (cmd) { 1153 case CCDIOCSET: 1154 case CCDIOCCLR: 1155 case DIOCSDINFO: 1156 case DIOCWDINFO: 1157 case DIOCCACHESYNC: 1158 case DIOCAWEDGE: 1159 case DIOCDWEDGE: 1160 case DIOCMWEDGES: 1161 #ifdef __HAVE_OLD_DISKLABEL 1162 case ODIOCSDINFO: 1163 case ODIOCWDINFO: 1164 #endif 1165 case DIOCKLABEL: 1166 case DIOCWLABEL: 1167 if ((flag & FWRITE) == 0) 1168 return (EBADF); 1169 } 1170 1171 mutex_enter(&cs->sc_dvlock); 1172 1173 /* Must be initialized for these... */ 1174 switch (cmd) { 1175 case CCDIOCCLR: 1176 case DIOCGDINFO: 1177 case DIOCGSTRATEGY: 1178 case DIOCGCACHE: 1179 case DIOCCACHESYNC: 1180 case DIOCAWEDGE: 1181 case DIOCDWEDGE: 1182 case DIOCLWEDGES: 1183 case DIOCMWEDGES: 1184 case DIOCSDINFO: 1185 case DIOCWDINFO: 1186 case DIOCGPARTINFO: 1187 case DIOCWLABEL: 1188 case DIOCKLABEL: 1189 case DIOCGDEFLABEL: 1190 #ifdef __HAVE_OLD_DISKLABEL 1191 case ODIOCGDINFO: 1192 case ODIOCSDINFO: 1193 case ODIOCWDINFO: 1194 case ODIOCGDEFLABEL: 1195 #endif 1196 if ((cs->sc_flags & CCDF_INITED) == 0) { 1197 error = ENXIO; 1198 goto out; 1199 } 1200 } 1201 1202 error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l); 1203 if (error != EPASSTHROUGH) 1204 goto out; 1205 1206 error = 0; 1207 switch (cmd) { 1208 case CCDIOCSET: 1209 if (cs->sc_flags & CCDF_INITED) { 1210 error = EBUSY; 1211 goto out; 1212 } 1213 1214 /* Validate the flags. */ 1215 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1216 error = EINVAL; 1217 goto out; 1218 } 1219 1220 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1221 ccio->ccio_ndisks == 0) { 1222 error = EINVAL; 1223 goto out; 1224 } 1225 1226 /* Fill in some important bits. */ 1227 cs->sc_ileave = ccio->ccio_ileave; 1228 cs->sc_nccdisks = ccio->ccio_ndisks; 1229 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1230 1231 /* 1232 * Allocate space for and copy in the array of 1233 * component pathnames and device numbers. 1234 */ 1235 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1236 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1237 error = copyin(ccio->ccio_disks, cpp, 1238 ccio->ccio_ndisks * sizeof(*cpp)); 1239 if (error) { 1240 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1241 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1242 goto out; 1243 } 1244 1245 #ifdef DEBUG 1246 if (ccddebug & CCDB_INIT) 1247 for (i = 0; i < ccio->ccio_ndisks; ++i) 1248 printf("ccdioctl: component %d: %p\n", 1249 i, cpp[i]); 1250 #endif 1251 1252 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1253 #ifdef DEBUG 1254 if (ccddebug & CCDB_INIT) 1255 printf("ccdioctl: lookedup = %d\n", lookedup); 1256 #endif 1257 error = pathbuf_copyin(cpp[i], &pb); 1258 if (error == 0) { 1259 error = dk_lookup(pb, l, &vpp[i]); 1260 } 1261 pathbuf_destroy(pb); 1262 if (error != 0) { 1263 for (j = 0; j < lookedup; ++j) 1264 (void)vn_close(vpp[j], FREAD|FWRITE, 1265 uc); 1266 kmem_free(vpp, ccio->ccio_ndisks * 1267 sizeof(*vpp)); 1268 kmem_free(cpp, ccio->ccio_ndisks * 1269 sizeof(*cpp)); 1270 goto out; 1271 } 1272 ++lookedup; 1273 } 1274 1275 /* Attach the disk. */ 1276 disk_attach(&cs->sc_dkdev); 1277 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1278 1279 /* 1280 * Initialize the ccd. Fills in the softc for us. 1281 */ 1282 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1283 for (j = 0; j < lookedup; ++j) 1284 (void)vn_close(vpp[j], FREAD|FWRITE, 1285 uc); 1286 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1287 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1288 disk_detach(&cs->sc_dkdev); 1289 mutex_exit(&cs->sc_dvlock); 1290 bufq_free(cs->sc_bufq); 1291 return error; 1292 } 1293 1294 /* We can free the temporary variables now. */ 1295 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1296 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1297 1298 /* 1299 * The ccd has been successfully initialized, so 1300 * we can place it into the array. Don't try to 1301 * read the disklabel until the disk has been attached, 1302 * because space for the disklabel is allocated 1303 * in disk_attach(); 1304 */ 1305 ccio->ccio_unit = unit; 1306 ccio->ccio_size = cs->sc_size; 1307 1308 /* Try and read the disklabel. */ 1309 ccdgetdisklabel(dev); 1310 disk_set_info(NULL, &cs->sc_dkdev, NULL); 1311 1312 /* discover wedges */ 1313 mutex_exit(&cs->sc_dvlock); 1314 dkwedge_discover(&cs->sc_dkdev); 1315 return 0; 1316 1317 case CCDIOCCLR: 1318 /* 1319 * Don't unconfigure if any other partitions are open 1320 * or if both the character and block flavors of this 1321 * partition are open. 1322 */ 1323 part = DISKPART(dev); 1324 pmask = (1 << part); 1325 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1326 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1327 (cs->sc_dkdev.dk_copenmask & pmask))) { 1328 error = EBUSY; 1329 goto out; 1330 } 1331 1332 /* Delete all of our wedges. */ 1333 dkwedge_delall(&cs->sc_dkdev); 1334 1335 /* Stop new I/O, wait for in-flight I/O to complete. */ 1336 mutex_enter(cs->sc_iolock); 1337 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1338 cs->sc_zap = true; 1339 while (disk_isbusy(&cs->sc_dkdev) || 1340 bufq_peek(cs->sc_bufq) != NULL || 1341 cs->sc_thread != NULL) { 1342 cv_broadcast(&cs->sc_push); 1343 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1344 } 1345 mutex_exit(cs->sc_iolock); 1346 1347 /* 1348 * Free ccd_softc information and clear entry. 1349 */ 1350 1351 /* Close the components and free their pathnames. */ 1352 for (i = 0; i < cs->sc_nccdisks; ++i) { 1353 /* 1354 * XXX: this close could potentially fail and 1355 * cause Bad Things. Maybe we need to force 1356 * the close to happen? 1357 */ 1358 #ifdef DEBUG 1359 if (ccddebug & CCDB_VNODE) 1360 vprint("CCDIOCCLR: vnode info", 1361 cs->sc_cinfo[i].ci_vp); 1362 #endif 1363 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1364 uc); 1365 kmem_free(cs->sc_cinfo[i].ci_path, 1366 cs->sc_cinfo[i].ci_pathlen); 1367 } 1368 1369 /* Free interleave index. */ 1370 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1371 kmem_free(cs->sc_itable[i].ii_index, 1372 cs->sc_itable[i].ii_indexsz); 1373 } 1374 1375 /* Free component info and interleave table. */ 1376 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1377 sizeof(struct ccdcinfo)); 1378 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1379 sizeof(struct ccdiinfo)); 1380 1381 aprint_normal("%s: detached\n", cs->sc_xname); 1382 1383 /* Detach the disk. */ 1384 disk_detach(&cs->sc_dkdev); 1385 bufq_free(cs->sc_bufq); 1386 ccdput(cs); 1387 /* Don't break, otherwise cs is read again. */ 1388 return 0; 1389 1390 case DIOCGSTRATEGY: 1391 { 1392 struct disk_strategy *dks = (void *)data; 1393 1394 mutex_enter(cs->sc_iolock); 1395 if (cs->sc_bufq != NULL) 1396 strlcpy(dks->dks_name, 1397 bufq_getstrategyname(cs->sc_bufq), 1398 sizeof(dks->dks_name)); 1399 else 1400 error = EINVAL; 1401 mutex_exit(cs->sc_iolock); 1402 dks->dks_paramlen = 0; 1403 break; 1404 } 1405 1406 case DIOCGCACHE: 1407 { 1408 int dkcache = 0; 1409 1410 /* 1411 * We pass this call down to all components and report 1412 * intersection of the flags returned by the components. 1413 * If any errors out, we return error. CCD components 1414 * can not change unless the device is unconfigured, so 1415 * device feature flags will remain static. RCE/WCE can change 1416 * of course, if set directly on underlying device. 1417 */ 1418 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1419 error = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, &j, 1420 flag, uc); 1421 if (error) 1422 break; 1423 1424 if (i == 0) 1425 dkcache = j; 1426 else 1427 dkcache = DKCACHE_COMBINE(dkcache, j); 1428 } 1429 1430 *((int *)data) = dkcache; 1431 break; 1432 } 1433 1434 case DIOCCACHESYNC: 1435 /* 1436 * We pass this call down to all components and report 1437 * the first error we encounter. 1438 */ 1439 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1440 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1441 flag, uc); 1442 if (j != 0 && error == 0) 1443 error = j; 1444 } 1445 break; 1446 1447 case DIOCWDINFO: 1448 case DIOCSDINFO: 1449 #ifdef __HAVE_OLD_DISKLABEL 1450 case ODIOCWDINFO: 1451 case ODIOCSDINFO: 1452 #endif 1453 { 1454 struct disklabel *lp; 1455 #ifdef __HAVE_OLD_DISKLABEL 1456 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1457 memset(&newlabel, 0, sizeof newlabel); 1458 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1459 lp = &newlabel; 1460 } else 1461 #endif 1462 lp = (struct disklabel *)data; 1463 1464 cs->sc_flags |= CCDF_LABELLING; 1465 1466 error = setdisklabel(cs->sc_dkdev.dk_label, 1467 lp, 0, cs->sc_dkdev.dk_cpulabel); 1468 if (error == 0) { 1469 if (cmd == DIOCWDINFO 1470 #ifdef __HAVE_OLD_DISKLABEL 1471 || cmd == ODIOCWDINFO 1472 #endif 1473 ) 1474 error = writedisklabel(CCDLABELDEV(dev), 1475 ccdstrategy, cs->sc_dkdev.dk_label, 1476 cs->sc_dkdev.dk_cpulabel); 1477 } 1478 1479 cs->sc_flags &= ~CCDF_LABELLING; 1480 break; 1481 } 1482 1483 case DIOCKLABEL: 1484 if (*(int *)data != 0) 1485 cs->sc_flags |= CCDF_KLABEL; 1486 else 1487 cs->sc_flags &= ~CCDF_KLABEL; 1488 break; 1489 1490 case DIOCWLABEL: 1491 if (*(int *)data != 0) 1492 cs->sc_flags |= CCDF_WLABEL; 1493 else 1494 cs->sc_flags &= ~CCDF_WLABEL; 1495 break; 1496 1497 case DIOCGDEFLABEL: 1498 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1499 break; 1500 1501 #ifdef __HAVE_OLD_DISKLABEL 1502 case ODIOCGDEFLABEL: 1503 ccdgetdefaultlabel(cs, &newlabel); 1504 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1505 return ENOTTY; 1506 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1507 break; 1508 #endif 1509 1510 default: 1511 error = ENOTTY; 1512 } 1513 1514 out: 1515 mutex_exit(&cs->sc_dvlock); 1516 return (error); 1517 } 1518 1519 static int 1520 ccdsize(dev_t dev) 1521 { 1522 struct ccd_softc *cs; 1523 struct disklabel *lp; 1524 int part, unit, omask, size; 1525 1526 unit = ccdunit(dev); 1527 if ((cs = ccdget(unit, 0)) == NULL) 1528 return -1; 1529 1530 if ((cs->sc_flags & CCDF_INITED) == 0) 1531 return (-1); 1532 1533 part = DISKPART(dev); 1534 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1535 lp = cs->sc_dkdev.dk_label; 1536 1537 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1538 return (-1); 1539 1540 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1541 size = -1; 1542 else 1543 size = lp->d_partitions[part].p_size * 1544 (lp->d_secsize / DEV_BSIZE); 1545 1546 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1547 return (-1); 1548 1549 return (size); 1550 } 1551 1552 static void 1553 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1554 { 1555 struct ccdgeom *ccg = &cs->sc_geom; 1556 1557 memset(lp, 0, sizeof(*lp)); 1558 1559 if (cs->sc_size > UINT32_MAX) 1560 lp->d_secperunit = UINT32_MAX; 1561 else 1562 lp->d_secperunit = cs->sc_size; 1563 lp->d_secsize = ccg->ccg_secsize; 1564 lp->d_nsectors = ccg->ccg_nsectors; 1565 lp->d_ntracks = ccg->ccg_ntracks; 1566 lp->d_ncylinders = ccg->ccg_ncylinders; 1567 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1568 1569 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1570 lp->d_type = DKTYPE_CCD; 1571 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1572 lp->d_rpm = 3600; 1573 lp->d_interleave = 1; 1574 lp->d_flags = 0; 1575 1576 lp->d_partitions[RAW_PART].p_offset = 0; 1577 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit; 1578 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1579 lp->d_npartitions = RAW_PART + 1; 1580 1581 lp->d_magic = DISKMAGIC; 1582 lp->d_magic2 = DISKMAGIC; 1583 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1584 } 1585 1586 /* 1587 * Read the disklabel from the ccd. If one is not present, fake one 1588 * up. 1589 */ 1590 static void 1591 ccdgetdisklabel(dev_t dev) 1592 { 1593 int unit = ccdunit(dev); 1594 struct ccd_softc *cs; 1595 const char *errstring; 1596 struct disklabel *lp; 1597 struct cpu_disklabel *clp; 1598 1599 if ((cs = ccdget(unit, 0)) == NULL) 1600 return; 1601 lp = cs->sc_dkdev.dk_label; 1602 clp = cs->sc_dkdev.dk_cpulabel; 1603 KASSERT(mutex_owned(&cs->sc_dvlock)); 1604 1605 memset(clp, 0, sizeof(*clp)); 1606 1607 ccdgetdefaultlabel(cs, lp); 1608 1609 /* 1610 * Call the generic disklabel extraction routine. 1611 */ 1612 cs->sc_flags |= CCDF_RLABEL; 1613 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1614 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1615 else 1616 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1617 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1618 if (errstring) 1619 ccdmakedisklabel(cs); 1620 else { 1621 int i; 1622 struct partition *pp; 1623 1624 /* 1625 * Sanity check whether the found disklabel is valid. 1626 * 1627 * This is necessary since total size of ccd may vary 1628 * when an interleave is changed even though exactly 1629 * same componets are used, and old disklabel may used 1630 * if that is found. 1631 */ 1632 if (lp->d_secperunit < UINT32_MAX ? 1633 lp->d_secperunit != cs->sc_size : 1634 lp->d_secperunit > cs->sc_size) 1635 printf("WARNING: %s: " 1636 "total sector size in disklabel (%ju) != " 1637 "the size of ccd (%ju)\n", cs->sc_xname, 1638 (uintmax_t)lp->d_secperunit, 1639 (uintmax_t)cs->sc_size); 1640 for (i = 0; i < lp->d_npartitions; i++) { 1641 pp = &lp->d_partitions[i]; 1642 if (pp->p_offset + pp->p_size > cs->sc_size) 1643 printf("WARNING: %s: end of partition `%c' " 1644 "exceeds the size of ccd (%ju)\n", 1645 cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size); 1646 } 1647 } 1648 1649 #ifdef DEBUG 1650 /* It's actually extremely common to have unlabeled ccds. */ 1651 if (ccddebug & CCDB_LABEL) 1652 if (errstring != NULL) 1653 printf("%s: %s\n", cs->sc_xname, errstring); 1654 #endif 1655 1656 /* In-core label now valid. */ 1657 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1658 } 1659 1660 /* 1661 * Take care of things one might want to take care of in the event 1662 * that a disklabel isn't present. 1663 */ 1664 static void 1665 ccdmakedisklabel(struct ccd_softc *cs) 1666 { 1667 struct disklabel *lp = cs->sc_dkdev.dk_label; 1668 1669 /* 1670 * For historical reasons, if there's no disklabel present 1671 * the raw partition must be marked FS_BSDFFS. 1672 */ 1673 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1674 1675 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1676 1677 lp->d_checksum = dkcksum(lp); 1678 } 1679 1680 #ifdef DEBUG 1681 static void 1682 printiinfo(struct ccdiinfo *ii) 1683 { 1684 int ix, i; 1685 1686 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1687 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1688 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1689 for (i = 0; i < ii->ii_ndisk; i++) 1690 printf(" %d", ii->ii_index[i]); 1691 printf("\n"); 1692 } 1693 } 1694 #endif 1695 1696 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr"); 1697 1698 static int 1699 ccd_modcmd(modcmd_t cmd, void *arg) 1700 { 1701 int error = 0; 1702 #ifdef _MODULE 1703 int bmajor = -1, cmajor = -1; 1704 #endif 1705 1706 1707 switch (cmd) { 1708 case MODULE_CMD_INIT: 1709 #ifdef _MODULE 1710 ccdattach(0); 1711 1712 error = devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1713 &ccd_cdevsw, &cmajor); 1714 sysctl_kern_ccd_setup(&ccd_clog); 1715 #endif 1716 break; 1717 1718 case MODULE_CMD_FINI: 1719 #ifdef _MODULE 1720 mutex_enter(&ccd_lock); 1721 if (!LIST_EMPTY(ccds)) { 1722 mutex_exit(&ccd_lock); 1723 error = EBUSY; 1724 } else { 1725 mutex_exit(&ccd_lock); 1726 error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1727 ccddetach(); 1728 } 1729 sysctl_teardown(&ccd_clog); 1730 #endif 1731 break; 1732 1733 case MODULE_CMD_STAT: 1734 return ENOTTY; 1735 1736 default: 1737 return ENOTTY; 1738 } 1739 1740 return error; 1741 } 1742 1743 static int 1744 ccd_units_sysctl(SYSCTLFN_ARGS) 1745 { 1746 struct sysctlnode node; 1747 struct ccd_softc *sc; 1748 int error, i, nccd, *units; 1749 size_t size; 1750 1751 nccd = 0; 1752 mutex_enter(&ccd_lock); 1753 LIST_FOREACH(sc, &ccds, sc_link) 1754 nccd++; 1755 mutex_exit(&ccd_lock); 1756 1757 if (nccd != 0) { 1758 size = nccd * sizeof(*units); 1759 units = kmem_zalloc(size, KM_SLEEP); 1760 i = 0; 1761 mutex_enter(&ccd_lock); 1762 LIST_FOREACH(sc, &ccds, sc_link) { 1763 if (i >= nccd) 1764 break; 1765 units[i] = sc->sc_unit; 1766 } 1767 mutex_exit(&ccd_lock); 1768 } else { 1769 units = NULL; 1770 size = 0; 1771 } 1772 1773 node = *rnode; 1774 node.sysctl_data = units; 1775 node.sysctl_size = size; 1776 1777 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1778 if (units) 1779 kmem_free(units, size); 1780 return error; 1781 } 1782 1783 static int 1784 ccd_info_sysctl(SYSCTLFN_ARGS) 1785 { 1786 struct sysctlnode node; 1787 struct ccddiskinfo ccd; 1788 struct ccd_softc *sc; 1789 int unit; 1790 1791 if (newp == NULL || newlen != sizeof(int)) 1792 return EINVAL; 1793 1794 unit = *(const int *)newp; 1795 newp = NULL; 1796 newlen = 0; 1797 ccd.ccd_ndisks = ~0; 1798 mutex_enter(&ccd_lock); 1799 LIST_FOREACH(sc, &ccds, sc_link) { 1800 if (sc->sc_unit == unit) { 1801 ccd.ccd_ileave = sc->sc_ileave; 1802 ccd.ccd_size = sc->sc_size; 1803 ccd.ccd_ndisks = sc->sc_nccdisks; 1804 ccd.ccd_flags = sc->sc_flags; 1805 break; 1806 } 1807 } 1808 mutex_exit(&ccd_lock); 1809 1810 if (ccd.ccd_ndisks == ~0) 1811 return ENOENT; 1812 1813 node = *rnode; 1814 node.sysctl_data = &ccd; 1815 node.sysctl_size = sizeof(ccd); 1816 1817 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1818 } 1819 1820 static int 1821 ccd_components_sysctl(SYSCTLFN_ARGS) 1822 { 1823 struct sysctlnode node; 1824 int error, unit; 1825 size_t size; 1826 char *names, *p, *ep; 1827 struct ccd_softc *sc; 1828 1829 if (newp == NULL || newlen != sizeof(int)) 1830 return EINVAL; 1831 1832 size = 0; 1833 unit = *(const int *)newp; 1834 newp = NULL; 1835 newlen = 0; 1836 mutex_enter(&ccd_lock); 1837 LIST_FOREACH(sc, &ccds, sc_link) 1838 if (sc->sc_unit == unit) { 1839 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1840 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1841 break; 1842 } 1843 mutex_exit(&ccd_lock); 1844 1845 if (size == 0) 1846 return ENOENT; 1847 names = kmem_zalloc(size, KM_SLEEP); 1848 p = names; 1849 ep = names + size; 1850 mutex_enter(&ccd_lock); 1851 LIST_FOREACH(sc, &ccds, sc_link) 1852 if (sc->sc_unit == unit) { 1853 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1854 char *d = sc->sc_cinfo[i].ci_path; 1855 while (p < ep && (*p++ = *d++) != '\0') 1856 continue; 1857 } 1858 break; 1859 } 1860 mutex_exit(&ccd_lock); 1861 1862 node = *rnode; 1863 node.sysctl_data = names; 1864 node.sysctl_size = ep - names; 1865 1866 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1867 kmem_free(names, size); 1868 return error; 1869 } 1870 1871 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1872 { 1873 const struct sysctlnode *node = NULL; 1874 1875 sysctl_createv(clog, 0, NULL, &node, 1876 CTLFLAG_PERMANENT, 1877 CTLTYPE_NODE, "ccd", 1878 SYSCTL_DESCR("ConCatenated Disk state"), 1879 NULL, 0, NULL, 0, 1880 CTL_KERN, CTL_CREATE, CTL_EOL); 1881 1882 if (node == NULL) 1883 return; 1884 1885 sysctl_createv(clog, 0, &node, NULL, 1886 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1887 CTLTYPE_STRUCT, "units", 1888 SYSCTL_DESCR("List of ccd unit numbers"), 1889 ccd_units_sysctl, 0, NULL, 0, 1890 CTL_CREATE, CTL_EOL); 1891 sysctl_createv(clog, 0, &node, NULL, 1892 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1893 CTLTYPE_STRUCT, "info", 1894 SYSCTL_DESCR("Information about a CCD unit"), 1895 ccd_info_sysctl, 0, NULL, 0, 1896 CTL_CREATE, CTL_EOL); 1897 sysctl_createv(clog, 0, &node, NULL, 1898 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1899 CTLTYPE_STRUCT, "components", 1900 SYSCTL_DESCR("Information about CCD components"), 1901 ccd_components_sysctl, 0, NULL, 0, 1902 CTL_CREATE, CTL_EOL); 1903 } 1904