1 /* $NetBSD: ccd.c,v 1.177 2019/01/27 02:08:41 pgoyette Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.177 2019/01/27 02:08:41 pgoyette Exp $"); 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/proc.h> 97 #include <sys/errno.h> 98 #include <sys/buf.h> 99 #include <sys/kmem.h> 100 #include <sys/pool.h> 101 #include <sys/module.h> 102 #include <sys/namei.h> 103 #include <sys/stat.h> 104 #include <sys/ioctl.h> 105 #include <sys/disklabel.h> 106 #include <sys/device.h> 107 #include <sys/disk.h> 108 #include <sys/syslog.h> 109 #include <sys/fcntl.h> 110 #include <sys/vnode.h> 111 #include <sys/conf.h> 112 #include <sys/mutex.h> 113 #include <sys/queue.h> 114 #include <sys/kauth.h> 115 #include <sys/kthread.h> 116 #include <sys/bufq.h> 117 #include <sys/sysctl.h> 118 #include <sys/compat_stub.h> 119 120 #include <uvm/uvm_extern.h> 121 122 #include <dev/ccdvar.h> 123 #include <dev/dkvar.h> 124 125 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 126 127 #include "ioconf.h" 128 129 #if defined(CCDDEBUG) && !defined(DEBUG) 130 #define DEBUG 131 #endif 132 133 #ifdef DEBUG 134 #define CCDB_FOLLOW 0x01 135 #define CCDB_INIT 0x02 136 #define CCDB_IO 0x04 137 #define CCDB_LABEL 0x08 138 #define CCDB_VNODE 0x10 139 int ccddebug = 0x00; 140 #endif 141 142 #define ccdunit(x) DISKUNIT(x) 143 144 struct ccdbuf { 145 struct buf cb_buf; /* new I/O buf */ 146 struct buf *cb_obp; /* ptr. to original I/O buf */ 147 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 148 int cb_comp; /* target component */ 149 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 150 }; 151 152 /* component buffer pool */ 153 static pool_cache_t ccd_cache; 154 155 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 156 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 157 158 #define CCDLABELDEV(dev) \ 159 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 160 161 /* called by main() at boot time */ 162 void ccddetach(void); 163 164 /* called by biodone() at interrupt time */ 165 static void ccdiodone(struct buf *); 166 167 static void ccdinterleave(struct ccd_softc *); 168 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 169 struct lwp *); 170 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 171 daddr_t, void *, long); 172 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 173 static void ccdgetdisklabel(dev_t); 174 static void ccdmakedisklabel(struct ccd_softc *); 175 static void ccdstart(struct ccd_softc *); 176 static void ccdthread(void *); 177 178 static dev_type_open(ccdopen); 179 static dev_type_close(ccdclose); 180 static dev_type_read(ccdread); 181 static dev_type_write(ccdwrite); 182 static dev_type_ioctl(ccdioctl); 183 static dev_type_strategy(ccdstrategy); 184 static dev_type_size(ccdsize); 185 186 const struct bdevsw ccd_bdevsw = { 187 .d_open = ccdopen, 188 .d_close = ccdclose, 189 .d_strategy = ccdstrategy, 190 .d_ioctl = ccdioctl, 191 .d_dump = nodump, 192 .d_psize = ccdsize, 193 .d_discard = nodiscard, 194 .d_flag = D_DISK | D_MPSAFE 195 }; 196 197 const struct cdevsw ccd_cdevsw = { 198 .d_open = ccdopen, 199 .d_close = ccdclose, 200 .d_read = ccdread, 201 .d_write = ccdwrite, 202 .d_ioctl = ccdioctl, 203 .d_stop = nostop, 204 .d_tty = notty, 205 .d_poll = nopoll, 206 .d_mmap = nommap, 207 .d_kqfilter = nokqfilter, 208 .d_discard = nodiscard, 209 .d_flag = D_DISK | D_MPSAFE 210 }; 211 212 #ifdef DEBUG 213 static void printiinfo(struct ccdiinfo *); 214 #endif 215 216 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 217 static kmutex_t ccd_lock; 218 219 #ifdef _MODULE 220 static struct sysctllog *ccd_clog; 221 #endif 222 223 SYSCTL_SETUP_PROTO(sysctl_kern_ccd_setup); 224 225 static struct ccd_softc * 226 ccdcreate(int unit) { 227 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 228 229 /* Initialize per-softc structures. */ 230 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 231 sc->sc_unit = unit; 232 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 233 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 234 cv_init(&sc->sc_stop, "ccdstop"); 235 cv_init(&sc->sc_push, "ccdthr"); 236 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 237 return sc; 238 } 239 240 static void 241 ccddestroy(struct ccd_softc *sc) { 242 mutex_obj_free(sc->sc_iolock); 243 mutex_exit(&sc->sc_dvlock); 244 mutex_destroy(&sc->sc_dvlock); 245 cv_destroy(&sc->sc_stop); 246 cv_destroy(&sc->sc_push); 247 disk_destroy(&sc->sc_dkdev); 248 kmem_free(sc, sizeof(*sc)); 249 } 250 251 static struct ccd_softc * 252 ccdget(int unit, int make) { 253 struct ccd_softc *sc; 254 if (unit < 0) { 255 #ifdef DIAGNOSTIC 256 panic("%s: unit %d!", __func__, unit); 257 #endif 258 return NULL; 259 } 260 mutex_enter(&ccd_lock); 261 LIST_FOREACH(sc, &ccds, sc_link) { 262 if (sc->sc_unit == unit) { 263 mutex_exit(&ccd_lock); 264 return sc; 265 } 266 } 267 mutex_exit(&ccd_lock); 268 if (!make) 269 return NULL; 270 if ((sc = ccdcreate(unit)) == NULL) 271 return NULL; 272 mutex_enter(&ccd_lock); 273 LIST_INSERT_HEAD(&ccds, sc, sc_link); 274 mutex_exit(&ccd_lock); 275 return sc; 276 } 277 278 static void 279 ccdput(struct ccd_softc *sc) { 280 mutex_enter(&ccd_lock); 281 LIST_REMOVE(sc, sc_link); 282 mutex_exit(&ccd_lock); 283 ccddestroy(sc); 284 } 285 286 /* 287 * Called by main() during pseudo-device attachment. All we need 288 * to do is allocate enough space for devices to be configured later. 289 */ 290 void 291 ccdattach(int num) 292 { 293 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 294 295 /* Initialize the component buffer pool. */ 296 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 297 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 298 } 299 300 void 301 ccddetach(void) 302 { 303 pool_cache_destroy(ccd_cache); 304 mutex_destroy(&ccd_lock); 305 } 306 307 static int 308 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 309 struct lwp *l) 310 { 311 struct ccdcinfo *ci = NULL; 312 int ix; 313 struct ccdgeom *ccg = &cs->sc_geom; 314 char *tmppath; 315 int error, path_alloced; 316 uint64_t psize, minsize; 317 unsigned secsize, maxsecsize; 318 struct disk_geom *dg; 319 320 #ifdef DEBUG 321 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 322 printf("%s: ccdinit\n", cs->sc_xname); 323 #endif 324 325 /* Allocate space for the component info. */ 326 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 327 KM_SLEEP); 328 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 329 330 cs->sc_size = 0; 331 332 /* 333 * Verify that each component piece exists and record 334 * relevant information about it. 335 */ 336 maxsecsize = 0; 337 minsize = 0; 338 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 339 ci = &cs->sc_cinfo[ix]; 340 ci->ci_vp = vpp[ix]; 341 342 /* 343 * Copy in the pathname of the component. 344 */ 345 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 346 error = copyinstr(cpaths[ix], tmppath, 347 MAXPATHLEN, &ci->ci_pathlen); 348 if (ci->ci_pathlen == 0) 349 error = EINVAL; 350 if (error) { 351 #ifdef DEBUG 352 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 353 printf("%s: can't copy path, error = %d\n", 354 cs->sc_xname, error); 355 #endif 356 goto out; 357 } 358 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 359 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 360 path_alloced++; 361 362 /* 363 * XXX: Cache the component's dev_t. 364 */ 365 ci->ci_dev = vpp[ix]->v_rdev; 366 367 /* 368 * Get partition information for the component. 369 */ 370 error = getdisksize(vpp[ix], &psize, &secsize); 371 if (error) { 372 #ifdef DEBUG 373 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 374 printf("%s: %s: disksize failed, error = %d\n", 375 cs->sc_xname, ci->ci_path, error); 376 #endif 377 goto out; 378 } 379 380 /* 381 * Calculate the size, truncating to an interleave 382 * boundary if necessary. 383 */ 384 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 385 if (cs->sc_ileave > 1) 386 psize -= psize % cs->sc_ileave; 387 388 if (psize == 0) { 389 #ifdef DEBUG 390 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 391 printf("%s: %s: size == 0\n", 392 cs->sc_xname, ci->ci_path); 393 #endif 394 error = ENODEV; 395 goto out; 396 } 397 398 if (minsize == 0 || psize < minsize) 399 minsize = psize; 400 ci->ci_size = psize; 401 cs->sc_size += psize; 402 } 403 404 /* 405 * Don't allow the interleave to be smaller than 406 * the biggest component sector. 407 */ 408 if ((cs->sc_ileave > 0) && 409 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 410 #ifdef DEBUG 411 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 412 printf("%s: interleave must be at least %d\n", 413 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 414 #endif 415 error = EINVAL; 416 goto out; 417 } 418 419 /* 420 * If uniform interleave is desired set all sizes to that of 421 * the smallest component. 422 */ 423 if (cs->sc_flags & CCDF_UNIFORM) { 424 for (ci = cs->sc_cinfo; 425 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 426 ci->ci_size = minsize; 427 428 cs->sc_size = cs->sc_nccdisks * minsize; 429 } 430 431 /* 432 * Construct the interleave table. 433 */ 434 ccdinterleave(cs); 435 436 /* 437 * Create pseudo-geometry based on 1MB cylinders. It's 438 * pretty close. 439 */ 440 ccg->ccg_secsize = DEV_BSIZE; 441 ccg->ccg_ntracks = 1; 442 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 443 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 444 445 dg = &cs->sc_dkdev.dk_geom; 446 memset(dg, 0, sizeof(*dg)); 447 dg->dg_secperunit = cs->sc_size; 448 dg->dg_secsize = ccg->ccg_secsize; 449 dg->dg_nsectors = ccg->ccg_nsectors; 450 dg->dg_ntracks = ccg->ccg_ntracks; 451 dg->dg_ncylinders = ccg->ccg_ncylinders; 452 453 if (cs->sc_ileave > 0) 454 aprint_normal("%s: Interleaving %d component%s " 455 "(%d block interleave)\n", cs->sc_xname, 456 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""), 457 cs->sc_ileave); 458 else 459 aprint_normal("%s: Concatenating %d component%s\n", 460 cs->sc_xname, 461 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : "")); 462 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 463 ci = &cs->sc_cinfo[ix]; 464 aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname, 465 ci->ci_path, (uintmax_t)ci->ci_size); 466 } 467 aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size); 468 469 /* 470 * Create thread to handle deferred I/O. 471 */ 472 cs->sc_zap = false; 473 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 474 cs, &cs->sc_thread, "%s", cs->sc_xname); 475 if (error) { 476 printf("ccdinit: can't create thread: %d\n", error); 477 goto out; 478 } 479 480 /* 481 * Only now that everything is set up can we enable the device. 482 */ 483 mutex_enter(cs->sc_iolock); 484 cs->sc_flags |= CCDF_INITED; 485 mutex_exit(cs->sc_iolock); 486 kmem_free(tmppath, MAXPATHLEN); 487 return (0); 488 489 out: 490 for (ix = 0; ix < path_alloced; ix++) { 491 kmem_free(cs->sc_cinfo[ix].ci_path, 492 cs->sc_cinfo[ix].ci_pathlen); 493 } 494 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 495 kmem_free(tmppath, MAXPATHLEN); 496 return (error); 497 } 498 499 static void 500 ccdinterleave(struct ccd_softc *cs) 501 { 502 struct ccdcinfo *ci, *smallci; 503 struct ccdiinfo *ii; 504 daddr_t bn, lbn; 505 int ix; 506 u_long size; 507 508 #ifdef DEBUG 509 if (ccddebug & CCDB_INIT) 510 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 511 #endif 512 /* 513 * Allocate an interleave table. 514 * Chances are this is too big, but we don't care. 515 */ 516 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 517 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 518 519 /* 520 * Trivial case: no interleave (actually interleave of disk size). 521 * Each table entry represents a single component in its entirety. 522 */ 523 if (cs->sc_ileave == 0) { 524 bn = 0; 525 ii = cs->sc_itable; 526 527 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 528 /* Allocate space for ii_index. */ 529 ii->ii_indexsz = sizeof(int); 530 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 531 ii->ii_ndisk = 1; 532 ii->ii_startblk = bn; 533 ii->ii_startoff = 0; 534 ii->ii_index[0] = ix; 535 bn += cs->sc_cinfo[ix].ci_size; 536 ii++; 537 } 538 ii->ii_ndisk = 0; 539 #ifdef DEBUG 540 if (ccddebug & CCDB_INIT) 541 printiinfo(cs->sc_itable); 542 #endif 543 return; 544 } 545 546 /* 547 * The following isn't fast or pretty; it doesn't have to be. 548 */ 549 size = 0; 550 bn = lbn = 0; 551 for (ii = cs->sc_itable; ; ii++) { 552 /* Allocate space for ii_index. */ 553 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 554 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 555 556 /* 557 * Locate the smallest of the remaining components 558 */ 559 smallci = NULL; 560 for (ci = cs->sc_cinfo; 561 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 562 if (ci->ci_size > size && 563 (smallci == NULL || 564 ci->ci_size < smallci->ci_size)) 565 smallci = ci; 566 567 /* 568 * Nobody left, all done 569 */ 570 if (smallci == NULL) { 571 ii->ii_ndisk = 0; 572 break; 573 } 574 575 /* 576 * Record starting logical block and component offset 577 */ 578 ii->ii_startblk = bn / cs->sc_ileave; 579 ii->ii_startoff = lbn; 580 581 /* 582 * Determine how many disks take part in this interleave 583 * and record their indices. 584 */ 585 ix = 0; 586 for (ci = cs->sc_cinfo; 587 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 588 if (ci->ci_size >= smallci->ci_size) 589 ii->ii_index[ix++] = ci - cs->sc_cinfo; 590 ii->ii_ndisk = ix; 591 bn += ix * (smallci->ci_size - size); 592 lbn = smallci->ci_size / cs->sc_ileave; 593 size = smallci->ci_size; 594 } 595 #ifdef DEBUG 596 if (ccddebug & CCDB_INIT) 597 printiinfo(cs->sc_itable); 598 #endif 599 } 600 601 /* ARGSUSED */ 602 static int 603 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 604 { 605 int unit = ccdunit(dev); 606 struct ccd_softc *cs; 607 struct disklabel *lp; 608 int error = 0, part, pmask; 609 610 #ifdef DEBUG 611 if (ccddebug & CCDB_FOLLOW) 612 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 613 #endif 614 if ((cs = ccdget(unit, 1)) == NULL) 615 return ENXIO; 616 617 mutex_enter(&cs->sc_dvlock); 618 619 lp = cs->sc_dkdev.dk_label; 620 621 part = DISKPART(dev); 622 pmask = (1 << part); 623 624 /* 625 * If we're initialized, check to see if there are any other 626 * open partitions. If not, then it's safe to update 627 * the in-core disklabel. Only read the disklabel if it is 628 * not already valid. 629 */ 630 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 631 cs->sc_dkdev.dk_openmask == 0) 632 ccdgetdisklabel(dev); 633 634 /* Check that the partition exists. */ 635 if (part != RAW_PART) { 636 if (((cs->sc_flags & CCDF_INITED) == 0) || 637 ((part >= lp->d_npartitions) || 638 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 639 error = ENXIO; 640 goto done; 641 } 642 } 643 644 /* Prevent our unit from being unconfigured while open. */ 645 switch (fmt) { 646 case S_IFCHR: 647 cs->sc_dkdev.dk_copenmask |= pmask; 648 break; 649 650 case S_IFBLK: 651 cs->sc_dkdev.dk_bopenmask |= pmask; 652 break; 653 } 654 cs->sc_dkdev.dk_openmask = 655 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 656 657 done: 658 mutex_exit(&cs->sc_dvlock); 659 return (error); 660 } 661 662 /* ARGSUSED */ 663 static int 664 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 665 { 666 int unit = ccdunit(dev); 667 struct ccd_softc *cs; 668 int part; 669 670 #ifdef DEBUG 671 if (ccddebug & CCDB_FOLLOW) 672 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 673 #endif 674 675 if ((cs = ccdget(unit, 0)) == NULL) 676 return ENXIO; 677 678 mutex_enter(&cs->sc_dvlock); 679 680 part = DISKPART(dev); 681 682 /* ...that much closer to allowing unconfiguration... */ 683 switch (fmt) { 684 case S_IFCHR: 685 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 686 break; 687 688 case S_IFBLK: 689 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 690 break; 691 } 692 cs->sc_dkdev.dk_openmask = 693 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 694 695 if (cs->sc_dkdev.dk_openmask == 0) { 696 if ((cs->sc_flags & CCDF_KLABEL) == 0) 697 cs->sc_flags &= ~CCDF_VLABEL; 698 } 699 700 mutex_exit(&cs->sc_dvlock); 701 return (0); 702 } 703 704 static bool 705 ccdbackoff(struct ccd_softc *cs) 706 { 707 708 /* XXX Arbitrary, should be a uvm call. */ 709 return uvmexp.free < (uvmexp.freemin >> 1) && 710 disk_isbusy(&cs->sc_dkdev); 711 } 712 713 static void 714 ccdthread(void *cookie) 715 { 716 struct ccd_softc *cs; 717 718 cs = cookie; 719 720 #ifdef DEBUG 721 if (ccddebug & CCDB_FOLLOW) 722 printf("ccdthread: hello\n"); 723 #endif 724 725 mutex_enter(cs->sc_iolock); 726 while (__predict_true(!cs->sc_zap)) { 727 if (bufq_peek(cs->sc_bufq) == NULL) { 728 /* Nothing to do. */ 729 cv_wait(&cs->sc_push, cs->sc_iolock); 730 continue; 731 } 732 if (ccdbackoff(cs)) { 733 /* Wait for memory to become available. */ 734 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 735 continue; 736 } 737 #ifdef DEBUG 738 if (ccddebug & CCDB_FOLLOW) 739 printf("ccdthread: dispatching I/O\n"); 740 #endif 741 ccdstart(cs); 742 mutex_enter(cs->sc_iolock); 743 } 744 cs->sc_thread = NULL; 745 mutex_exit(cs->sc_iolock); 746 #ifdef DEBUG 747 if (ccddebug & CCDB_FOLLOW) 748 printf("ccdthread: goodbye\n"); 749 #endif 750 kthread_exit(0); 751 } 752 753 static void 754 ccdstrategy(struct buf *bp) 755 { 756 int unit = ccdunit(bp->b_dev); 757 struct ccd_softc *cs; 758 if ((cs = ccdget(unit, 0)) == NULL) 759 return; 760 761 /* Must be open or reading label. */ 762 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 763 (cs->sc_flags & CCDF_RLABEL) != 0); 764 765 mutex_enter(cs->sc_iolock); 766 /* Synchronize with device init/uninit. */ 767 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 768 mutex_exit(cs->sc_iolock); 769 #ifdef DEBUG 770 if (ccddebug & CCDB_FOLLOW) 771 printf("ccdstrategy: unit %d: not inited\n", unit); 772 #endif 773 bp->b_error = ENXIO; 774 bp->b_resid = bp->b_bcount; 775 biodone(bp); 776 return; 777 } 778 779 /* Defer to thread if system is low on memory. */ 780 bufq_put(cs->sc_bufq, bp); 781 if (__predict_false(ccdbackoff(cs))) { 782 mutex_exit(cs->sc_iolock); 783 #ifdef DEBUG 784 if (ccddebug & CCDB_FOLLOW) 785 printf("ccdstrategy: holding off on I/O\n"); 786 #endif 787 return; 788 } 789 ccdstart(cs); 790 } 791 792 static void 793 ccdstart(struct ccd_softc *cs) 794 { 795 daddr_t blkno; 796 int wlabel; 797 struct disklabel *lp; 798 long bcount, rcount; 799 struct ccdbuf *cbp; 800 char *addr; 801 daddr_t bn; 802 vnode_t *vp; 803 buf_t *bp; 804 805 KASSERT(mutex_owned(cs->sc_iolock)); 806 807 bp = bufq_get(cs->sc_bufq); 808 KASSERT(bp != NULL); 809 810 disk_busy(&cs->sc_dkdev); 811 812 #ifdef DEBUG 813 if (ccddebug & CCDB_FOLLOW) 814 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 815 #endif 816 817 /* If it's a nil transfer, wake up the top half now. */ 818 if (bp->b_bcount == 0) 819 goto done; 820 821 lp = cs->sc_dkdev.dk_label; 822 823 /* 824 * Do bounds checking and adjust transfer. If there's an 825 * error, the bounds check will flag that for us. Convert 826 * the partition relative block number to an absolute. 827 */ 828 blkno = bp->b_blkno; 829 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 830 if (DISKPART(bp->b_dev) != RAW_PART) { 831 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 832 goto done; 833 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 834 } 835 mutex_exit(cs->sc_iolock); 836 bp->b_rawblkno = blkno; 837 838 /* Allocate the component buffers and start I/O! */ 839 bp->b_resid = bp->b_bcount; 840 bn = bp->b_rawblkno; 841 addr = bp->b_data; 842 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 843 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 844 rcount = cbp->cb_buf.b_bcount; 845 bn += btodb(rcount); 846 addr += rcount; 847 vp = cbp->cb_buf.b_vp; 848 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 849 mutex_enter(vp->v_interlock); 850 vp->v_numoutput++; 851 mutex_exit(vp->v_interlock); 852 } 853 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 854 } 855 return; 856 857 done: 858 disk_unbusy(&cs->sc_dkdev, 0, 0); 859 cv_broadcast(&cs->sc_stop); 860 cv_broadcast(&cs->sc_push); 861 mutex_exit(cs->sc_iolock); 862 bp->b_resid = bp->b_bcount; 863 biodone(bp); 864 } 865 866 /* 867 * Build a component buffer header. 868 */ 869 static struct ccdbuf * 870 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 871 long bcount) 872 { 873 struct ccdcinfo *ci; 874 struct ccdbuf *cbp; 875 daddr_t cbn, cboff; 876 u_int64_t cbc; 877 int ccdisk; 878 879 #ifdef DEBUG 880 if (ccddebug & CCDB_IO) 881 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 882 cs, bp, bn, addr, bcount); 883 #endif 884 /* 885 * Determine which component bn falls in. 886 */ 887 cbn = bn; 888 cboff = 0; 889 890 /* 891 * Serially concatenated 892 */ 893 if (cs->sc_ileave == 0) { 894 daddr_t sblk; 895 896 sblk = 0; 897 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 898 cbn >= sblk + ci->ci_size; 899 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 900 sblk += ci->ci_size; 901 cbn -= sblk; 902 } 903 /* 904 * Interleaved 905 */ 906 else { 907 struct ccdiinfo *ii; 908 int off; 909 910 cboff = cbn % cs->sc_ileave; 911 cbn /= cs->sc_ileave; 912 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 913 if (ii->ii_startblk > cbn) 914 break; 915 ii--; 916 off = cbn - ii->ii_startblk; 917 if (ii->ii_ndisk == 1) { 918 ccdisk = ii->ii_index[0]; 919 cbn = ii->ii_startoff + off; 920 } else { 921 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 922 cbn = ii->ii_startoff + off / ii->ii_ndisk; 923 } 924 cbn *= cs->sc_ileave; 925 ci = &cs->sc_cinfo[ccdisk]; 926 } 927 928 /* 929 * Fill in the component buf structure. 930 */ 931 cbp = CCD_GETBUF(); 932 KASSERT(cbp != NULL); 933 buf_init(&cbp->cb_buf); 934 cbp->cb_buf.b_flags = bp->b_flags; 935 cbp->cb_buf.b_oflags = bp->b_oflags; 936 cbp->cb_buf.b_cflags = bp->b_cflags; 937 cbp->cb_buf.b_iodone = ccdiodone; 938 cbp->cb_buf.b_proc = bp->b_proc; 939 cbp->cb_buf.b_dev = ci->ci_dev; 940 cbp->cb_buf.b_blkno = cbn + cboff; 941 cbp->cb_buf.b_data = addr; 942 cbp->cb_buf.b_vp = ci->ci_vp; 943 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 944 if (cs->sc_ileave == 0) 945 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 946 else 947 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 948 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 949 950 /* 951 * context for ccdiodone 952 */ 953 cbp->cb_obp = bp; 954 cbp->cb_sc = cs; 955 cbp->cb_comp = ccdisk; 956 957 BIO_COPYPRIO(&cbp->cb_buf, bp); 958 959 #ifdef DEBUG 960 if (ccddebug & CCDB_IO) 961 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 962 " bcnt %d\n", 963 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 964 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 965 cbp->cb_buf.b_bcount); 966 #endif 967 968 return (cbp); 969 } 970 971 /* 972 * Called at interrupt time. 973 * Mark the component as done and if all components are done, 974 * take a ccd interrupt. 975 */ 976 static void 977 ccdiodone(struct buf *vbp) 978 { 979 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 980 struct buf *bp = cbp->cb_obp; 981 struct ccd_softc *cs = cbp->cb_sc; 982 int count; 983 984 #ifdef DEBUG 985 if (ccddebug & CCDB_FOLLOW) 986 printf("ccdiodone(%p)\n", cbp); 987 if (ccddebug & CCDB_IO) { 988 printf("ccdiodone: bp %p bcount %d resid %d\n", 989 bp, bp->b_bcount, bp->b_resid); 990 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 991 " bcnt %d\n", 992 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 993 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 994 cbp->cb_buf.b_bcount); 995 } 996 #endif 997 998 if (cbp->cb_buf.b_error != 0) { 999 bp->b_error = cbp->cb_buf.b_error; 1000 printf("%s: error %d on component %d\n", 1001 cs->sc_xname, bp->b_error, cbp->cb_comp); 1002 } 1003 count = cbp->cb_buf.b_bcount; 1004 buf_destroy(&cbp->cb_buf); 1005 CCD_PUTBUF(cbp); 1006 1007 /* 1008 * If all done, "interrupt". 1009 */ 1010 mutex_enter(cs->sc_iolock); 1011 bp->b_resid -= count; 1012 if (bp->b_resid < 0) 1013 panic("ccdiodone: count"); 1014 if (bp->b_resid == 0) { 1015 /* 1016 * Request is done for better or worse, wakeup the top half. 1017 */ 1018 if (bp->b_error != 0) 1019 bp->b_resid = bp->b_bcount; 1020 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 1021 (bp->b_flags & B_READ)); 1022 if (!disk_isbusy(&cs->sc_dkdev)) { 1023 if (bufq_peek(cs->sc_bufq) != NULL) { 1024 cv_broadcast(&cs->sc_push); 1025 } 1026 cv_broadcast(&cs->sc_stop); 1027 } 1028 mutex_exit(cs->sc_iolock); 1029 biodone(bp); 1030 } else 1031 mutex_exit(cs->sc_iolock); 1032 } 1033 1034 /* ARGSUSED */ 1035 static int 1036 ccdread(dev_t dev, struct uio *uio, int flags) 1037 { 1038 int unit = ccdunit(dev); 1039 struct ccd_softc *cs; 1040 1041 #ifdef DEBUG 1042 if (ccddebug & CCDB_FOLLOW) 1043 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1044 #endif 1045 if ((cs = ccdget(unit, 0)) == NULL) 1046 return 0; 1047 1048 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1049 if ((cs->sc_flags & CCDF_INITED) == 0) 1050 return (ENXIO); 1051 1052 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1053 } 1054 1055 /* ARGSUSED */ 1056 static int 1057 ccdwrite(dev_t dev, struct uio *uio, int flags) 1058 { 1059 int unit = ccdunit(dev); 1060 struct ccd_softc *cs; 1061 1062 #ifdef DEBUG 1063 if (ccddebug & CCDB_FOLLOW) 1064 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1065 #endif 1066 if ((cs = ccdget(unit, 0)) == NULL) 1067 return ENOENT; 1068 1069 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1070 if ((cs->sc_flags & CCDF_INITED) == 0) 1071 return (ENXIO); 1072 1073 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1074 } 1075 1076 int (*compat_ccd_ioctl_60)(dev_t, u_long, void *, int, struct lwp *, 1077 int (*)(dev_t, u_long, void *, int, struct lwp *)) = (void *)enosys; 1078 1079 static int 1080 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1081 { 1082 int unit = ccdunit(dev); 1083 int i, j, lookedup = 0, error = 0; 1084 int part, pmask, make, hook; 1085 struct ccd_softc *cs; 1086 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1087 kauth_cred_t uc; 1088 char **cpp; 1089 struct pathbuf *pb; 1090 struct vnode **vpp; 1091 #ifdef __HAVE_OLD_DISKLABEL 1092 struct disklabel newlabel; 1093 #endif 1094 1095 switch (cmd) { 1096 case CCDIOCSET: 1097 make = 1; 1098 break; 1099 default: 1100 MODULE_CALL_HOOK(ccd_ioctl_60_hook, 1101 (0, cmd, NULL, 0, NULL, NULL), 1102 enosys(), hook); 1103 if (hook == 0) 1104 make = 1; 1105 else 1106 make = 0; 1107 break; 1108 } 1109 1110 if ((cs = ccdget(unit, make)) == NULL) 1111 return ENOENT; 1112 uc = kauth_cred_get(); 1113 1114 MODULE_CALL_HOOK(ccd_ioctl_60_hook, 1115 (dev, cmd, data, flag, l, ccdioctl), 1116 enosys(), error); 1117 if (error != ENOSYS) 1118 return error; 1119 1120 /* Must be open for writes for these commands... */ 1121 switch (cmd) { 1122 case CCDIOCSET: 1123 case CCDIOCCLR: 1124 case DIOCSDINFO: 1125 case DIOCWDINFO: 1126 case DIOCCACHESYNC: 1127 case DIOCAWEDGE: 1128 case DIOCDWEDGE: 1129 case DIOCMWEDGES: 1130 #ifdef __HAVE_OLD_DISKLABEL 1131 case ODIOCSDINFO: 1132 case ODIOCWDINFO: 1133 #endif 1134 case DIOCKLABEL: 1135 case DIOCWLABEL: 1136 if ((flag & FWRITE) == 0) 1137 return (EBADF); 1138 } 1139 1140 mutex_enter(&cs->sc_dvlock); 1141 1142 /* Must be initialized for these... */ 1143 switch (cmd) { 1144 case CCDIOCCLR: 1145 case DIOCGDINFO: 1146 case DIOCGSTRATEGY: 1147 case DIOCGCACHE: 1148 case DIOCCACHESYNC: 1149 case DIOCAWEDGE: 1150 case DIOCDWEDGE: 1151 case DIOCLWEDGES: 1152 case DIOCMWEDGES: 1153 case DIOCSDINFO: 1154 case DIOCWDINFO: 1155 case DIOCGPARTINFO: 1156 case DIOCWLABEL: 1157 case DIOCKLABEL: 1158 case DIOCGDEFLABEL: 1159 #ifdef __HAVE_OLD_DISKLABEL 1160 case ODIOCGDINFO: 1161 case ODIOCSDINFO: 1162 case ODIOCWDINFO: 1163 case ODIOCGDEFLABEL: 1164 #endif 1165 if ((cs->sc_flags & CCDF_INITED) == 0) { 1166 error = ENXIO; 1167 goto out; 1168 } 1169 } 1170 1171 error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l); 1172 if (error != EPASSTHROUGH) 1173 goto out; 1174 1175 error = 0; 1176 switch (cmd) { 1177 case CCDIOCSET: 1178 if (cs->sc_flags & CCDF_INITED) { 1179 error = EBUSY; 1180 goto out; 1181 } 1182 1183 /* Validate the flags. */ 1184 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1185 error = EINVAL; 1186 goto out; 1187 } 1188 1189 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1190 ccio->ccio_ndisks == 0) { 1191 error = EINVAL; 1192 goto out; 1193 } 1194 1195 /* Fill in some important bits. */ 1196 cs->sc_ileave = ccio->ccio_ileave; 1197 cs->sc_nccdisks = ccio->ccio_ndisks; 1198 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1199 1200 /* 1201 * Allocate space for and copy in the array of 1202 * component pathnames and device numbers. 1203 */ 1204 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1205 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1206 error = copyin(ccio->ccio_disks, cpp, 1207 ccio->ccio_ndisks * sizeof(*cpp)); 1208 if (error) { 1209 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1210 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1211 goto out; 1212 } 1213 1214 #ifdef DEBUG 1215 if (ccddebug & CCDB_INIT) 1216 for (i = 0; i < ccio->ccio_ndisks; ++i) 1217 printf("ccdioctl: component %d: %p\n", 1218 i, cpp[i]); 1219 #endif 1220 1221 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1222 #ifdef DEBUG 1223 if (ccddebug & CCDB_INIT) 1224 printf("ccdioctl: lookedup = %d\n", lookedup); 1225 #endif 1226 error = pathbuf_copyin(cpp[i], &pb); 1227 if (error == 0) { 1228 error = dk_lookup(pb, l, &vpp[i]); 1229 } 1230 pathbuf_destroy(pb); 1231 if (error != 0) { 1232 for (j = 0; j < lookedup; ++j) 1233 (void)vn_close(vpp[j], FREAD|FWRITE, 1234 uc); 1235 kmem_free(vpp, ccio->ccio_ndisks * 1236 sizeof(*vpp)); 1237 kmem_free(cpp, ccio->ccio_ndisks * 1238 sizeof(*cpp)); 1239 goto out; 1240 } 1241 ++lookedup; 1242 } 1243 1244 /* Attach the disk. */ 1245 disk_attach(&cs->sc_dkdev); 1246 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1247 1248 /* 1249 * Initialize the ccd. Fills in the softc for us. 1250 */ 1251 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1252 for (j = 0; j < lookedup; ++j) 1253 (void)vn_close(vpp[j], FREAD|FWRITE, 1254 uc); 1255 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1256 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1257 disk_detach(&cs->sc_dkdev); 1258 mutex_exit(&cs->sc_dvlock); 1259 bufq_free(cs->sc_bufq); 1260 return error; 1261 } 1262 1263 /* We can free the temporary variables now. */ 1264 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1265 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1266 1267 /* 1268 * The ccd has been successfully initialized, so 1269 * we can place it into the array. Don't try to 1270 * read the disklabel until the disk has been attached, 1271 * because space for the disklabel is allocated 1272 * in disk_attach(); 1273 */ 1274 ccio->ccio_unit = unit; 1275 ccio->ccio_size = cs->sc_size; 1276 1277 /* Try and read the disklabel. */ 1278 ccdgetdisklabel(dev); 1279 disk_set_info(NULL, &cs->sc_dkdev, NULL); 1280 1281 /* discover wedges */ 1282 mutex_exit(&cs->sc_dvlock); 1283 dkwedge_discover(&cs->sc_dkdev); 1284 return 0; 1285 1286 case CCDIOCCLR: 1287 /* 1288 * Don't unconfigure if any other partitions are open 1289 * or if both the character and block flavors of this 1290 * partition are open. 1291 */ 1292 part = DISKPART(dev); 1293 pmask = (1 << part); 1294 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1295 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1296 (cs->sc_dkdev.dk_copenmask & pmask))) { 1297 error = EBUSY; 1298 goto out; 1299 } 1300 1301 /* Delete all of our wedges. */ 1302 dkwedge_delall(&cs->sc_dkdev); 1303 1304 /* Stop new I/O, wait for in-flight I/O to complete. */ 1305 mutex_enter(cs->sc_iolock); 1306 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1307 cs->sc_zap = true; 1308 while (disk_isbusy(&cs->sc_dkdev) || 1309 bufq_peek(cs->sc_bufq) != NULL || 1310 cs->sc_thread != NULL) { 1311 cv_broadcast(&cs->sc_push); 1312 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1313 } 1314 mutex_exit(cs->sc_iolock); 1315 1316 /* 1317 * Free ccd_softc information and clear entry. 1318 */ 1319 1320 /* Close the components and free their pathnames. */ 1321 for (i = 0; i < cs->sc_nccdisks; ++i) { 1322 /* 1323 * XXX: this close could potentially fail and 1324 * cause Bad Things. Maybe we need to force 1325 * the close to happen? 1326 */ 1327 #ifdef DEBUG 1328 if (ccddebug & CCDB_VNODE) 1329 vprint("CCDIOCCLR: vnode info", 1330 cs->sc_cinfo[i].ci_vp); 1331 #endif 1332 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1333 uc); 1334 kmem_free(cs->sc_cinfo[i].ci_path, 1335 cs->sc_cinfo[i].ci_pathlen); 1336 } 1337 1338 /* Free interleave index. */ 1339 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1340 kmem_free(cs->sc_itable[i].ii_index, 1341 cs->sc_itable[i].ii_indexsz); 1342 } 1343 1344 /* Free component info and interleave table. */ 1345 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1346 sizeof(struct ccdcinfo)); 1347 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1348 sizeof(struct ccdiinfo)); 1349 1350 aprint_normal("%s: detached\n", cs->sc_xname); 1351 1352 /* Detach the disk. */ 1353 disk_detach(&cs->sc_dkdev); 1354 bufq_free(cs->sc_bufq); 1355 ccdput(cs); 1356 /* Don't break, otherwise cs is read again. */ 1357 return 0; 1358 1359 case DIOCGSTRATEGY: 1360 { 1361 struct disk_strategy *dks = (void *)data; 1362 1363 mutex_enter(cs->sc_iolock); 1364 if (cs->sc_bufq != NULL) 1365 strlcpy(dks->dks_name, 1366 bufq_getstrategyname(cs->sc_bufq), 1367 sizeof(dks->dks_name)); 1368 else 1369 error = EINVAL; 1370 mutex_exit(cs->sc_iolock); 1371 dks->dks_paramlen = 0; 1372 break; 1373 } 1374 1375 case DIOCGCACHE: 1376 { 1377 int dkcache = 0; 1378 1379 /* 1380 * We pass this call down to all components and report 1381 * intersection of the flags returned by the components. 1382 * If any errors out, we return error. CCD components 1383 * can not change unless the device is unconfigured, so 1384 * device feature flags will remain static. RCE/WCE can change 1385 * of course, if set directly on underlying device. 1386 */ 1387 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1388 error = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, &j, 1389 flag, uc); 1390 if (error) 1391 break; 1392 1393 if (i == 0) 1394 dkcache = j; 1395 else 1396 dkcache = DKCACHE_COMBINE(dkcache, j); 1397 } 1398 1399 *((int *)data) = dkcache; 1400 break; 1401 } 1402 1403 case DIOCCACHESYNC: 1404 /* 1405 * We pass this call down to all components and report 1406 * the first error we encounter. 1407 */ 1408 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1409 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1410 flag, uc); 1411 if (j != 0 && error == 0) 1412 error = j; 1413 } 1414 break; 1415 1416 case DIOCWDINFO: 1417 case DIOCSDINFO: 1418 #ifdef __HAVE_OLD_DISKLABEL 1419 case ODIOCWDINFO: 1420 case ODIOCSDINFO: 1421 #endif 1422 { 1423 struct disklabel *lp; 1424 #ifdef __HAVE_OLD_DISKLABEL 1425 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1426 memset(&newlabel, 0, sizeof newlabel); 1427 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1428 lp = &newlabel; 1429 } else 1430 #endif 1431 lp = (struct disklabel *)data; 1432 1433 cs->sc_flags |= CCDF_LABELLING; 1434 1435 error = setdisklabel(cs->sc_dkdev.dk_label, 1436 lp, 0, cs->sc_dkdev.dk_cpulabel); 1437 if (error == 0) { 1438 if (cmd == DIOCWDINFO 1439 #ifdef __HAVE_OLD_DISKLABEL 1440 || cmd == ODIOCWDINFO 1441 #endif 1442 ) 1443 error = writedisklabel(CCDLABELDEV(dev), 1444 ccdstrategy, cs->sc_dkdev.dk_label, 1445 cs->sc_dkdev.dk_cpulabel); 1446 } 1447 1448 cs->sc_flags &= ~CCDF_LABELLING; 1449 break; 1450 } 1451 1452 case DIOCKLABEL: 1453 if (*(int *)data != 0) 1454 cs->sc_flags |= CCDF_KLABEL; 1455 else 1456 cs->sc_flags &= ~CCDF_KLABEL; 1457 break; 1458 1459 case DIOCWLABEL: 1460 if (*(int *)data != 0) 1461 cs->sc_flags |= CCDF_WLABEL; 1462 else 1463 cs->sc_flags &= ~CCDF_WLABEL; 1464 break; 1465 1466 case DIOCGDEFLABEL: 1467 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1468 break; 1469 1470 #ifdef __HAVE_OLD_DISKLABEL 1471 case ODIOCGDEFLABEL: 1472 ccdgetdefaultlabel(cs, &newlabel); 1473 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1474 return ENOTTY; 1475 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1476 break; 1477 #endif 1478 1479 default: 1480 error = ENOTTY; 1481 } 1482 1483 out: 1484 mutex_exit(&cs->sc_dvlock); 1485 return (error); 1486 } 1487 1488 static int 1489 ccdsize(dev_t dev) 1490 { 1491 struct ccd_softc *cs; 1492 struct disklabel *lp; 1493 int part, unit, omask, size; 1494 1495 unit = ccdunit(dev); 1496 if ((cs = ccdget(unit, 0)) == NULL) 1497 return -1; 1498 1499 if ((cs->sc_flags & CCDF_INITED) == 0) 1500 return (-1); 1501 1502 part = DISKPART(dev); 1503 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1504 lp = cs->sc_dkdev.dk_label; 1505 1506 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1507 return (-1); 1508 1509 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1510 size = -1; 1511 else 1512 size = lp->d_partitions[part].p_size * 1513 (lp->d_secsize / DEV_BSIZE); 1514 1515 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1516 return (-1); 1517 1518 return (size); 1519 } 1520 1521 static void 1522 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1523 { 1524 struct ccdgeom *ccg = &cs->sc_geom; 1525 1526 memset(lp, 0, sizeof(*lp)); 1527 1528 if (cs->sc_size > UINT32_MAX) 1529 lp->d_secperunit = UINT32_MAX; 1530 else 1531 lp->d_secperunit = cs->sc_size; 1532 lp->d_secsize = ccg->ccg_secsize; 1533 lp->d_nsectors = ccg->ccg_nsectors; 1534 lp->d_ntracks = ccg->ccg_ntracks; 1535 lp->d_ncylinders = ccg->ccg_ncylinders; 1536 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1537 1538 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1539 lp->d_type = DKTYPE_CCD; 1540 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1541 lp->d_rpm = 3600; 1542 lp->d_interleave = 1; 1543 lp->d_flags = 0; 1544 1545 lp->d_partitions[RAW_PART].p_offset = 0; 1546 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit; 1547 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1548 lp->d_npartitions = RAW_PART + 1; 1549 1550 lp->d_magic = DISKMAGIC; 1551 lp->d_magic2 = DISKMAGIC; 1552 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1553 } 1554 1555 /* 1556 * Read the disklabel from the ccd. If one is not present, fake one 1557 * up. 1558 */ 1559 static void 1560 ccdgetdisklabel(dev_t dev) 1561 { 1562 int unit = ccdunit(dev); 1563 struct ccd_softc *cs; 1564 const char *errstring; 1565 struct disklabel *lp; 1566 struct cpu_disklabel *clp; 1567 1568 if ((cs = ccdget(unit, 0)) == NULL) 1569 return; 1570 lp = cs->sc_dkdev.dk_label; 1571 clp = cs->sc_dkdev.dk_cpulabel; 1572 KASSERT(mutex_owned(&cs->sc_dvlock)); 1573 1574 memset(clp, 0, sizeof(*clp)); 1575 1576 ccdgetdefaultlabel(cs, lp); 1577 1578 /* 1579 * Call the generic disklabel extraction routine. 1580 */ 1581 cs->sc_flags |= CCDF_RLABEL; 1582 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1583 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1584 else 1585 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1586 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1587 if (errstring) 1588 ccdmakedisklabel(cs); 1589 else { 1590 int i; 1591 struct partition *pp; 1592 1593 /* 1594 * Sanity check whether the found disklabel is valid. 1595 * 1596 * This is necessary since total size of ccd may vary 1597 * when an interleave is changed even though exactly 1598 * same componets are used, and old disklabel may used 1599 * if that is found. 1600 */ 1601 if (lp->d_secperunit < UINT32_MAX ? 1602 lp->d_secperunit != cs->sc_size : 1603 lp->d_secperunit > cs->sc_size) 1604 printf("WARNING: %s: " 1605 "total sector size in disklabel (%ju) != " 1606 "the size of ccd (%ju)\n", cs->sc_xname, 1607 (uintmax_t)lp->d_secperunit, 1608 (uintmax_t)cs->sc_size); 1609 for (i = 0; i < lp->d_npartitions; i++) { 1610 pp = &lp->d_partitions[i]; 1611 if (pp->p_offset + pp->p_size > cs->sc_size) 1612 printf("WARNING: %s: end of partition `%c' " 1613 "exceeds the size of ccd (%ju)\n", 1614 cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size); 1615 } 1616 } 1617 1618 #ifdef DEBUG 1619 /* It's actually extremely common to have unlabeled ccds. */ 1620 if (ccddebug & CCDB_LABEL) 1621 if (errstring != NULL) 1622 printf("%s: %s\n", cs->sc_xname, errstring); 1623 #endif 1624 1625 /* In-core label now valid. */ 1626 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1627 } 1628 1629 /* 1630 * Take care of things one might want to take care of in the event 1631 * that a disklabel isn't present. 1632 */ 1633 static void 1634 ccdmakedisklabel(struct ccd_softc *cs) 1635 { 1636 struct disklabel *lp = cs->sc_dkdev.dk_label; 1637 1638 /* 1639 * For historical reasons, if there's no disklabel present 1640 * the raw partition must be marked FS_BSDFFS. 1641 */ 1642 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1643 1644 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1645 1646 lp->d_checksum = dkcksum(lp); 1647 } 1648 1649 #ifdef DEBUG 1650 static void 1651 printiinfo(struct ccdiinfo *ii) 1652 { 1653 int ix, i; 1654 1655 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1656 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1657 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1658 for (i = 0; i < ii->ii_ndisk; i++) 1659 printf(" %d", ii->ii_index[i]); 1660 printf("\n"); 1661 } 1662 } 1663 #endif 1664 1665 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr,bufq_fcfs"); 1666 1667 static int 1668 ccd_modcmd(modcmd_t cmd, void *arg) 1669 { 1670 int error = 0; 1671 #ifdef _MODULE 1672 int bmajor = -1, cmajor = -1; 1673 #endif 1674 1675 1676 switch (cmd) { 1677 case MODULE_CMD_INIT: 1678 #ifdef _MODULE 1679 ccdattach(0); 1680 1681 error = devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1682 &ccd_cdevsw, &cmajor); 1683 sysctl_kern_ccd_setup(&ccd_clog); 1684 #endif 1685 break; 1686 1687 case MODULE_CMD_FINI: 1688 #ifdef _MODULE 1689 mutex_enter(&ccd_lock); 1690 if (!LIST_EMPTY(&ccds)) { 1691 mutex_exit(&ccd_lock); 1692 error = EBUSY; 1693 } else { 1694 mutex_exit(&ccd_lock); 1695 error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1696 ccddetach(); 1697 } 1698 sysctl_teardown(&ccd_clog); 1699 #endif 1700 break; 1701 1702 case MODULE_CMD_STAT: 1703 return ENOTTY; 1704 1705 default: 1706 return ENOTTY; 1707 } 1708 1709 return error; 1710 } 1711 1712 static int 1713 ccd_units_sysctl(SYSCTLFN_ARGS) 1714 { 1715 struct sysctlnode node; 1716 struct ccd_softc *sc; 1717 int error, i, nccd, *units; 1718 size_t size; 1719 1720 nccd = 0; 1721 mutex_enter(&ccd_lock); 1722 LIST_FOREACH(sc, &ccds, sc_link) 1723 nccd++; 1724 mutex_exit(&ccd_lock); 1725 1726 if (nccd != 0) { 1727 size = nccd * sizeof(*units); 1728 units = kmem_zalloc(size, KM_SLEEP); 1729 i = 0; 1730 mutex_enter(&ccd_lock); 1731 LIST_FOREACH(sc, &ccds, sc_link) { 1732 if (i >= nccd) 1733 break; 1734 units[i] = sc->sc_unit; 1735 } 1736 mutex_exit(&ccd_lock); 1737 } else { 1738 units = NULL; 1739 size = 0; 1740 } 1741 1742 node = *rnode; 1743 node.sysctl_data = units; 1744 node.sysctl_size = size; 1745 1746 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1747 if (units) 1748 kmem_free(units, size); 1749 return error; 1750 } 1751 1752 static int 1753 ccd_info_sysctl(SYSCTLFN_ARGS) 1754 { 1755 struct sysctlnode node; 1756 struct ccddiskinfo ccd; 1757 struct ccd_softc *sc; 1758 int unit; 1759 1760 if (newp == NULL || newlen != sizeof(int)) 1761 return EINVAL; 1762 1763 unit = *(const int *)newp; 1764 newp = NULL; 1765 newlen = 0; 1766 ccd.ccd_ndisks = ~0; 1767 mutex_enter(&ccd_lock); 1768 LIST_FOREACH(sc, &ccds, sc_link) { 1769 if (sc->sc_unit == unit) { 1770 ccd.ccd_ileave = sc->sc_ileave; 1771 ccd.ccd_size = sc->sc_size; 1772 ccd.ccd_ndisks = sc->sc_nccdisks; 1773 ccd.ccd_flags = sc->sc_flags; 1774 break; 1775 } 1776 } 1777 mutex_exit(&ccd_lock); 1778 1779 if (ccd.ccd_ndisks == ~0) 1780 return ENOENT; 1781 1782 node = *rnode; 1783 node.sysctl_data = &ccd; 1784 node.sysctl_size = sizeof(ccd); 1785 1786 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1787 } 1788 1789 static int 1790 ccd_components_sysctl(SYSCTLFN_ARGS) 1791 { 1792 struct sysctlnode node; 1793 int error, unit; 1794 size_t size; 1795 char *names, *p, *ep; 1796 struct ccd_softc *sc; 1797 1798 if (newp == NULL || newlen != sizeof(int)) 1799 return EINVAL; 1800 1801 size = 0; 1802 unit = *(const int *)newp; 1803 newp = NULL; 1804 newlen = 0; 1805 mutex_enter(&ccd_lock); 1806 LIST_FOREACH(sc, &ccds, sc_link) 1807 if (sc->sc_unit == unit) { 1808 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1809 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1810 break; 1811 } 1812 mutex_exit(&ccd_lock); 1813 1814 if (size == 0) 1815 return ENOENT; 1816 names = kmem_zalloc(size, KM_SLEEP); 1817 p = names; 1818 ep = names + size; 1819 mutex_enter(&ccd_lock); 1820 LIST_FOREACH(sc, &ccds, sc_link) 1821 if (sc->sc_unit == unit) { 1822 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1823 char *d = sc->sc_cinfo[i].ci_path; 1824 while (p < ep && (*p++ = *d++) != '\0') 1825 continue; 1826 } 1827 break; 1828 } 1829 mutex_exit(&ccd_lock); 1830 1831 node = *rnode; 1832 node.sysctl_data = names; 1833 node.sysctl_size = ep - names; 1834 1835 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1836 kmem_free(names, size); 1837 return error; 1838 } 1839 1840 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1841 { 1842 const struct sysctlnode *node = NULL; 1843 1844 sysctl_createv(clog, 0, NULL, &node, 1845 CTLFLAG_PERMANENT, 1846 CTLTYPE_NODE, "ccd", 1847 SYSCTL_DESCR("ConCatenated Disk state"), 1848 NULL, 0, NULL, 0, 1849 CTL_KERN, CTL_CREATE, CTL_EOL); 1850 1851 if (node == NULL) 1852 return; 1853 1854 sysctl_createv(clog, 0, &node, NULL, 1855 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1856 CTLTYPE_STRUCT, "units", 1857 SYSCTL_DESCR("List of ccd unit numbers"), 1858 ccd_units_sysctl, 0, NULL, 0, 1859 CTL_CREATE, CTL_EOL); 1860 sysctl_createv(clog, 0, &node, NULL, 1861 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1862 CTLTYPE_STRUCT, "info", 1863 SYSCTL_DESCR("Information about a CCD unit"), 1864 ccd_info_sysctl, 0, NULL, 0, 1865 CTL_CREATE, CTL_EOL); 1866 sysctl_createv(clog, 0, &node, NULL, 1867 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1868 CTLTYPE_STRUCT, "components", 1869 SYSCTL_DESCR("Information about CCD components"), 1870 ccd_components_sysctl, 0, NULL, 0, 1871 CTL_CREATE, CTL_EOL); 1872 } 1873