1 /* $NetBSD: ccd.c,v 1.182 2019/12/21 13:00:24 ad Exp $ */ 2 3 /*- 4 * Copyright (c) 1996, 1997, 1998, 1999, 2007, 2009 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe, and by Andrew Doran. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 /* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: cd.c 1.6 90/11/28$ 66 * 67 * @(#)cd.c 8.2 (Berkeley) 11/16/93 68 */ 69 70 /* 71 * "Concatenated" disk driver. 72 * 73 * Notes on concurrency: 74 * 75 * => sc_dvlock serializes access to the device nodes, excluding block I/O. 76 * 77 * => sc_iolock serializes access to (sc_flags & CCDF_INITED), disk stats, 78 * sc_stop, sc_bufq and b_resid from master buffers. 79 * 80 * => a combination of CCDF_INITED, sc_inflight, and sc_iolock is used to 81 * serialize I/O and configuration changes. 82 * 83 * => the in-core disk label does not change while the device is open. 84 * 85 * On memory consumption: ccd fans out I/O requests and so needs to 86 * allocate memory. If the system is desperately low on memory, we 87 * single thread I/O. 88 */ 89 90 #include <sys/cdefs.h> 91 __KERNEL_RCSID(0, "$NetBSD: ccd.c,v 1.182 2019/12/21 13:00:24 ad Exp $"); 92 93 #include <sys/param.h> 94 #include <sys/systm.h> 95 #include <sys/kernel.h> 96 #include <sys/proc.h> 97 #include <sys/errno.h> 98 #include <sys/buf.h> 99 #include <sys/kmem.h> 100 #include <sys/pool.h> 101 #include <sys/module.h> 102 #include <sys/namei.h> 103 #include <sys/stat.h> 104 #include <sys/ioctl.h> 105 #include <sys/disklabel.h> 106 #include <sys/device.h> 107 #include <sys/disk.h> 108 #include <sys/syslog.h> 109 #include <sys/fcntl.h> 110 #include <sys/vnode.h> 111 #include <sys/conf.h> 112 #include <sys/mutex.h> 113 #include <sys/queue.h> 114 #include <sys/kauth.h> 115 #include <sys/kthread.h> 116 #include <sys/bufq.h> 117 #include <sys/sysctl.h> 118 #include <sys/compat_stub.h> 119 120 #include <uvm/uvm_extern.h> 121 122 #include <dev/ccdvar.h> 123 #include <dev/dkvar.h> 124 125 #include <miscfs/specfs/specdev.h> /* for v_rdev */ 126 127 #include "ioconf.h" 128 129 #if defined(CCDDEBUG) && !defined(DEBUG) 130 #define DEBUG 131 #endif 132 133 #ifdef DEBUG 134 #define CCDB_FOLLOW 0x01 135 #define CCDB_INIT 0x02 136 #define CCDB_IO 0x04 137 #define CCDB_LABEL 0x08 138 #define CCDB_VNODE 0x10 139 int ccddebug = 0x00; 140 #endif 141 142 #define ccdunit(x) DISKUNIT(x) 143 144 struct ccdbuf { 145 struct buf cb_buf; /* new I/O buf */ 146 struct buf *cb_obp; /* ptr. to original I/O buf */ 147 struct ccd_softc *cb_sc; /* pointer to ccd softc */ 148 int cb_comp; /* target component */ 149 SIMPLEQ_ENTRY(ccdbuf) cb_q; /* fifo of component buffers */ 150 }; 151 152 /* component buffer pool */ 153 static pool_cache_t ccd_cache; 154 155 #define CCD_GETBUF() pool_cache_get(ccd_cache, PR_WAITOK) 156 #define CCD_PUTBUF(cbp) pool_cache_put(ccd_cache, cbp) 157 158 #define CCDLABELDEV(dev) \ 159 (MAKEDISKDEV(major((dev)), ccdunit((dev)), RAW_PART)) 160 161 /* called by main() at boot time */ 162 void ccddetach(void); 163 164 /* called by biodone() at interrupt time */ 165 static void ccdiodone(struct buf *); 166 167 static void ccdinterleave(struct ccd_softc *); 168 static int ccdinit(struct ccd_softc *, char **, struct vnode **, 169 struct lwp *); 170 static struct ccdbuf *ccdbuffer(struct ccd_softc *, struct buf *, 171 daddr_t, void *, long); 172 static void ccdgetdefaultlabel(struct ccd_softc *, struct disklabel *); 173 static void ccdgetdisklabel(dev_t); 174 static void ccdmakedisklabel(struct ccd_softc *); 175 static void ccdstart(struct ccd_softc *); 176 static void ccdthread(void *); 177 178 static dev_type_open(ccdopen); 179 static dev_type_close(ccdclose); 180 static dev_type_read(ccdread); 181 static dev_type_write(ccdwrite); 182 static dev_type_ioctl(ccdioctl); 183 static dev_type_strategy(ccdstrategy); 184 static dev_type_size(ccdsize); 185 186 const struct bdevsw ccd_bdevsw = { 187 .d_open = ccdopen, 188 .d_close = ccdclose, 189 .d_strategy = ccdstrategy, 190 .d_ioctl = ccdioctl, 191 .d_dump = nodump, 192 .d_psize = ccdsize, 193 .d_discard = nodiscard, 194 .d_flag = D_DISK | D_MPSAFE 195 }; 196 197 const struct cdevsw ccd_cdevsw = { 198 .d_open = ccdopen, 199 .d_close = ccdclose, 200 .d_read = ccdread, 201 .d_write = ccdwrite, 202 .d_ioctl = ccdioctl, 203 .d_stop = nostop, 204 .d_tty = notty, 205 .d_poll = nopoll, 206 .d_mmap = nommap, 207 .d_kqfilter = nokqfilter, 208 .d_discard = nodiscard, 209 .d_flag = D_DISK | D_MPSAFE 210 }; 211 212 #ifdef DEBUG 213 static void printiinfo(struct ccdiinfo *); 214 #endif 215 216 static LIST_HEAD(, ccd_softc) ccds = LIST_HEAD_INITIALIZER(ccds); 217 static kmutex_t ccd_lock; 218 219 SYSCTL_SETUP_PROTO(sysctl_kern_ccd_setup); 220 221 static struct ccd_softc * 222 ccdcreate(int unit) { 223 struct ccd_softc *sc = kmem_zalloc(sizeof(*sc), KM_SLEEP); 224 225 /* Initialize per-softc structures. */ 226 snprintf(sc->sc_xname, sizeof(sc->sc_xname), "ccd%d", unit); 227 sc->sc_unit = unit; 228 mutex_init(&sc->sc_dvlock, MUTEX_DEFAULT, IPL_NONE); 229 sc->sc_iolock = mutex_obj_alloc(MUTEX_DEFAULT, IPL_NONE); 230 cv_init(&sc->sc_stop, "ccdstop"); 231 cv_init(&sc->sc_push, "ccdthr"); 232 disk_init(&sc->sc_dkdev, sc->sc_xname, NULL); /* XXX */ 233 return sc; 234 } 235 236 static void 237 ccddestroy(struct ccd_softc *sc) { 238 mutex_obj_free(sc->sc_iolock); 239 mutex_exit(&sc->sc_dvlock); 240 mutex_destroy(&sc->sc_dvlock); 241 cv_destroy(&sc->sc_stop); 242 cv_destroy(&sc->sc_push); 243 disk_destroy(&sc->sc_dkdev); 244 kmem_free(sc, sizeof(*sc)); 245 } 246 247 static struct ccd_softc * 248 ccdget(int unit, int make) { 249 struct ccd_softc *sc; 250 if (unit < 0) { 251 #ifdef DIAGNOSTIC 252 panic("%s: unit %d!", __func__, unit); 253 #endif 254 return NULL; 255 } 256 mutex_enter(&ccd_lock); 257 LIST_FOREACH(sc, &ccds, sc_link) { 258 if (sc->sc_unit == unit) { 259 mutex_exit(&ccd_lock); 260 return sc; 261 } 262 } 263 mutex_exit(&ccd_lock); 264 if (!make) 265 return NULL; 266 if ((sc = ccdcreate(unit)) == NULL) 267 return NULL; 268 mutex_enter(&ccd_lock); 269 LIST_INSERT_HEAD(&ccds, sc, sc_link); 270 mutex_exit(&ccd_lock); 271 return sc; 272 } 273 274 static void 275 ccdput(struct ccd_softc *sc) { 276 mutex_enter(&ccd_lock); 277 LIST_REMOVE(sc, sc_link); 278 mutex_exit(&ccd_lock); 279 ccddestroy(sc); 280 } 281 282 /* 283 * Called by main() during pseudo-device attachment. All we need 284 * to do is allocate enough space for devices to be configured later. 285 */ 286 void 287 ccdattach(int num) 288 { 289 mutex_init(&ccd_lock, MUTEX_DEFAULT, IPL_NONE); 290 291 /* Initialize the component buffer pool. */ 292 ccd_cache = pool_cache_init(sizeof(struct ccdbuf), 0, 293 0, 0, "ccdbuf", NULL, IPL_BIO, NULL, NULL, NULL); 294 } 295 296 void 297 ccddetach(void) 298 { 299 pool_cache_destroy(ccd_cache); 300 mutex_destroy(&ccd_lock); 301 } 302 303 static int 304 ccdinit(struct ccd_softc *cs, char **cpaths, struct vnode **vpp, 305 struct lwp *l) 306 { 307 struct ccdcinfo *ci = NULL; 308 int ix; 309 struct ccdgeom *ccg = &cs->sc_geom; 310 char *tmppath; 311 int error, path_alloced; 312 uint64_t psize, minsize; 313 unsigned secsize, maxsecsize; 314 struct disk_geom *dg; 315 316 #ifdef DEBUG 317 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 318 printf("%s: ccdinit\n", cs->sc_xname); 319 #endif 320 321 /* Allocate space for the component info. */ 322 cs->sc_cinfo = kmem_alloc(cs->sc_nccdisks * sizeof(*cs->sc_cinfo), 323 KM_SLEEP); 324 tmppath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 325 326 cs->sc_size = 0; 327 328 /* 329 * Verify that each component piece exists and record 330 * relevant information about it. 331 */ 332 maxsecsize = 0; 333 minsize = 0; 334 for (ix = 0, path_alloced = 0; ix < cs->sc_nccdisks; ix++) { 335 ci = &cs->sc_cinfo[ix]; 336 ci->ci_vp = vpp[ix]; 337 338 /* 339 * Copy in the pathname of the component. 340 */ 341 memset(tmppath, 0, MAXPATHLEN); /* sanity */ 342 error = copyinstr(cpaths[ix], tmppath, 343 MAXPATHLEN, &ci->ci_pathlen); 344 if (ci->ci_pathlen == 0) 345 error = EINVAL; 346 if (error) { 347 #ifdef DEBUG 348 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 349 printf("%s: can't copy path, error = %d\n", 350 cs->sc_xname, error); 351 #endif 352 goto out; 353 } 354 ci->ci_path = kmem_alloc(ci->ci_pathlen, KM_SLEEP); 355 memcpy(ci->ci_path, tmppath, ci->ci_pathlen); 356 path_alloced++; 357 358 /* 359 * XXX: Cache the component's dev_t. 360 */ 361 ci->ci_dev = vpp[ix]->v_rdev; 362 363 /* 364 * Get partition information for the component. 365 */ 366 error = getdisksize(vpp[ix], &psize, &secsize); 367 if (error) { 368 #ifdef DEBUG 369 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 370 printf("%s: %s: disksize failed, error = %d\n", 371 cs->sc_xname, ci->ci_path, error); 372 #endif 373 goto out; 374 } 375 376 /* 377 * Calculate the size, truncating to an interleave 378 * boundary if necessary. 379 */ 380 maxsecsize = secsize > maxsecsize ? secsize : maxsecsize; 381 if (cs->sc_ileave > 1) 382 psize -= psize % cs->sc_ileave; 383 384 if (psize == 0) { 385 #ifdef DEBUG 386 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 387 printf("%s: %s: size == 0\n", 388 cs->sc_xname, ci->ci_path); 389 #endif 390 error = ENODEV; 391 goto out; 392 } 393 394 if (minsize == 0 || psize < minsize) 395 minsize = psize; 396 ci->ci_size = psize; 397 cs->sc_size += psize; 398 } 399 400 /* 401 * Don't allow the interleave to be smaller than 402 * the biggest component sector. 403 */ 404 if ((cs->sc_ileave > 0) && 405 (cs->sc_ileave < (maxsecsize / DEV_BSIZE))) { 406 #ifdef DEBUG 407 if (ccddebug & (CCDB_FOLLOW|CCDB_INIT)) 408 printf("%s: interleave must be at least %d\n", 409 cs->sc_xname, (maxsecsize / DEV_BSIZE)); 410 #endif 411 error = EINVAL; 412 goto out; 413 } 414 415 /* 416 * If uniform interleave is desired set all sizes to that of 417 * the smallest component. 418 */ 419 if (cs->sc_flags & CCDF_UNIFORM) { 420 for (ci = cs->sc_cinfo; 421 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 422 ci->ci_size = minsize; 423 424 cs->sc_size = cs->sc_nccdisks * minsize; 425 } 426 427 /* 428 * Construct the interleave table. 429 */ 430 ccdinterleave(cs); 431 432 /* 433 * Create pseudo-geometry based on 1MB cylinders. It's 434 * pretty close. 435 */ 436 ccg->ccg_secsize = DEV_BSIZE; 437 ccg->ccg_ntracks = 1; 438 ccg->ccg_nsectors = 1024 * (1024 / ccg->ccg_secsize); 439 ccg->ccg_ncylinders = cs->sc_size / ccg->ccg_nsectors; 440 441 dg = &cs->sc_dkdev.dk_geom; 442 memset(dg, 0, sizeof(*dg)); 443 dg->dg_secperunit = cs->sc_size; 444 dg->dg_secsize = ccg->ccg_secsize; 445 dg->dg_nsectors = ccg->ccg_nsectors; 446 dg->dg_ntracks = ccg->ccg_ntracks; 447 dg->dg_ncylinders = ccg->ccg_ncylinders; 448 449 if (cs->sc_ileave > 0) 450 aprint_normal("%s: Interleaving %d component%s " 451 "(%d block interleave)\n", cs->sc_xname, 452 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : ""), 453 cs->sc_ileave); 454 else 455 aprint_normal("%s: Concatenating %d component%s\n", 456 cs->sc_xname, 457 cs->sc_nccdisks, (cs->sc_nccdisks != 0 ? "s" : "")); 458 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 459 ci = &cs->sc_cinfo[ix]; 460 aprint_normal("%s: %s (%ju blocks)\n", cs->sc_xname, 461 ci->ci_path, (uintmax_t)ci->ci_size); 462 } 463 aprint_normal("%s: total %ju blocks\n", cs->sc_xname, cs->sc_size); 464 465 /* 466 * Create thread to handle deferred I/O. 467 */ 468 cs->sc_zap = false; 469 error = kthread_create(PRI_BIO, KTHREAD_MPSAFE, NULL, ccdthread, 470 cs, &cs->sc_thread, "%s", cs->sc_xname); 471 if (error) { 472 printf("ccdinit: can't create thread: %d\n", error); 473 goto out; 474 } 475 476 /* 477 * Only now that everything is set up can we enable the device. 478 */ 479 mutex_enter(cs->sc_iolock); 480 cs->sc_flags |= CCDF_INITED; 481 mutex_exit(cs->sc_iolock); 482 kmem_free(tmppath, MAXPATHLEN); 483 return (0); 484 485 out: 486 for (ix = 0; ix < path_alloced; ix++) { 487 kmem_free(cs->sc_cinfo[ix].ci_path, 488 cs->sc_cinfo[ix].ci_pathlen); 489 } 490 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * sizeof(struct ccdcinfo)); 491 kmem_free(tmppath, MAXPATHLEN); 492 return (error); 493 } 494 495 static void 496 ccdinterleave(struct ccd_softc *cs) 497 { 498 struct ccdcinfo *ci, *smallci; 499 struct ccdiinfo *ii; 500 daddr_t bn, lbn; 501 int ix; 502 u_long size; 503 504 #ifdef DEBUG 505 if (ccddebug & CCDB_INIT) 506 printf("ccdinterleave(%p): ileave %d\n", cs, cs->sc_ileave); 507 #endif 508 /* 509 * Allocate an interleave table. 510 * Chances are this is too big, but we don't care. 511 */ 512 size = (cs->sc_nccdisks + 1) * sizeof(struct ccdiinfo); 513 cs->sc_itable = kmem_zalloc(size, KM_SLEEP); 514 515 /* 516 * Trivial case: no interleave (actually interleave of disk size). 517 * Each table entry represents a single component in its entirety. 518 */ 519 if (cs->sc_ileave == 0) { 520 bn = 0; 521 ii = cs->sc_itable; 522 523 for (ix = 0; ix < cs->sc_nccdisks; ix++) { 524 /* Allocate space for ii_index. */ 525 ii->ii_indexsz = sizeof(int); 526 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 527 ii->ii_ndisk = 1; 528 ii->ii_startblk = bn; 529 ii->ii_startoff = 0; 530 ii->ii_index[0] = ix; 531 bn += cs->sc_cinfo[ix].ci_size; 532 ii++; 533 } 534 ii->ii_ndisk = 0; 535 #ifdef DEBUG 536 if (ccddebug & CCDB_INIT) 537 printiinfo(cs->sc_itable); 538 #endif 539 return; 540 } 541 542 /* 543 * The following isn't fast or pretty; it doesn't have to be. 544 */ 545 size = 0; 546 bn = lbn = 0; 547 for (ii = cs->sc_itable; ; ii++) { 548 /* Allocate space for ii_index. */ 549 ii->ii_indexsz = sizeof(int) * cs->sc_nccdisks; 550 ii->ii_index = kmem_alloc(ii->ii_indexsz, KM_SLEEP); 551 552 /* 553 * Locate the smallest of the remaining components 554 */ 555 smallci = NULL; 556 for (ci = cs->sc_cinfo; 557 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 558 if (ci->ci_size > size && 559 (smallci == NULL || 560 ci->ci_size < smallci->ci_size)) 561 smallci = ci; 562 563 /* 564 * Nobody left, all done 565 */ 566 if (smallci == NULL) { 567 ii->ii_ndisk = 0; 568 break; 569 } 570 571 /* 572 * Record starting logical block and component offset 573 */ 574 ii->ii_startblk = bn / cs->sc_ileave; 575 ii->ii_startoff = lbn; 576 577 /* 578 * Determine how many disks take part in this interleave 579 * and record their indices. 580 */ 581 ix = 0; 582 for (ci = cs->sc_cinfo; 583 ci < &cs->sc_cinfo[cs->sc_nccdisks]; ci++) 584 if (ci->ci_size >= smallci->ci_size) 585 ii->ii_index[ix++] = ci - cs->sc_cinfo; 586 ii->ii_ndisk = ix; 587 bn += ix * (smallci->ci_size - size); 588 lbn = smallci->ci_size / cs->sc_ileave; 589 size = smallci->ci_size; 590 } 591 #ifdef DEBUG 592 if (ccddebug & CCDB_INIT) 593 printiinfo(cs->sc_itable); 594 #endif 595 } 596 597 /* ARGSUSED */ 598 static int 599 ccdopen(dev_t dev, int flags, int fmt, struct lwp *l) 600 { 601 int unit = ccdunit(dev); 602 struct ccd_softc *cs; 603 struct disklabel *lp; 604 int error = 0, part, pmask; 605 606 #ifdef DEBUG 607 if (ccddebug & CCDB_FOLLOW) 608 printf("ccdopen(0x%"PRIx64", 0x%x)\n", dev, flags); 609 #endif 610 if ((cs = ccdget(unit, 1)) == NULL) 611 return ENXIO; 612 613 mutex_enter(&cs->sc_dvlock); 614 615 lp = cs->sc_dkdev.dk_label; 616 617 part = DISKPART(dev); 618 pmask = (1 << part); 619 620 /* 621 * If we're initialized, check to see if there are any other 622 * open partitions. If not, then it's safe to update 623 * the in-core disklabel. Only read the disklabel if it is 624 * not already valid. 625 */ 626 if ((cs->sc_flags & (CCDF_INITED|CCDF_VLABEL)) == CCDF_INITED && 627 cs->sc_dkdev.dk_openmask == 0) 628 ccdgetdisklabel(dev); 629 630 /* Check that the partition exists. */ 631 if (part != RAW_PART) { 632 if (((cs->sc_flags & CCDF_INITED) == 0) || 633 ((part >= lp->d_npartitions) || 634 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 635 error = ENXIO; 636 goto done; 637 } 638 } 639 640 /* Prevent our unit from being unconfigured while open. */ 641 switch (fmt) { 642 case S_IFCHR: 643 cs->sc_dkdev.dk_copenmask |= pmask; 644 break; 645 646 case S_IFBLK: 647 cs->sc_dkdev.dk_bopenmask |= pmask; 648 break; 649 } 650 cs->sc_dkdev.dk_openmask = 651 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 652 653 done: 654 mutex_exit(&cs->sc_dvlock); 655 return (error); 656 } 657 658 /* ARGSUSED */ 659 static int 660 ccdclose(dev_t dev, int flags, int fmt, struct lwp *l) 661 { 662 int unit = ccdunit(dev); 663 struct ccd_softc *cs; 664 int part; 665 666 #ifdef DEBUG 667 if (ccddebug & CCDB_FOLLOW) 668 printf("ccdclose(0x%"PRIx64", 0x%x)\n", dev, flags); 669 #endif 670 671 if ((cs = ccdget(unit, 0)) == NULL) 672 return ENXIO; 673 674 mutex_enter(&cs->sc_dvlock); 675 676 part = DISKPART(dev); 677 678 /* ...that much closer to allowing unconfiguration... */ 679 switch (fmt) { 680 case S_IFCHR: 681 cs->sc_dkdev.dk_copenmask &= ~(1 << part); 682 break; 683 684 case S_IFBLK: 685 cs->sc_dkdev.dk_bopenmask &= ~(1 << part); 686 break; 687 } 688 cs->sc_dkdev.dk_openmask = 689 cs->sc_dkdev.dk_copenmask | cs->sc_dkdev.dk_bopenmask; 690 691 if (cs->sc_dkdev.dk_openmask == 0) { 692 if ((cs->sc_flags & CCDF_KLABEL) == 0) 693 cs->sc_flags &= ~CCDF_VLABEL; 694 } 695 696 mutex_exit(&cs->sc_dvlock); 697 return (0); 698 } 699 700 static bool 701 ccdbackoff(struct ccd_softc *cs) 702 { 703 704 /* XXX Arbitrary, should be a uvm call. */ 705 return uvm_free() < (uvmexp.freemin >> 1) && 706 disk_isbusy(&cs->sc_dkdev); 707 } 708 709 static void 710 ccdthread(void *cookie) 711 { 712 struct ccd_softc *cs; 713 714 cs = cookie; 715 716 #ifdef DEBUG 717 if (ccddebug & CCDB_FOLLOW) 718 printf("ccdthread: hello\n"); 719 #endif 720 721 mutex_enter(cs->sc_iolock); 722 while (__predict_true(!cs->sc_zap)) { 723 if (bufq_peek(cs->sc_bufq) == NULL) { 724 /* Nothing to do. */ 725 cv_wait(&cs->sc_push, cs->sc_iolock); 726 continue; 727 } 728 if (ccdbackoff(cs)) { 729 /* Wait for memory to become available. */ 730 (void)cv_timedwait(&cs->sc_push, cs->sc_iolock, 1); 731 continue; 732 } 733 #ifdef DEBUG 734 if (ccddebug & CCDB_FOLLOW) 735 printf("ccdthread: dispatching I/O\n"); 736 #endif 737 ccdstart(cs); 738 mutex_enter(cs->sc_iolock); 739 } 740 cs->sc_thread = NULL; 741 mutex_exit(cs->sc_iolock); 742 #ifdef DEBUG 743 if (ccddebug & CCDB_FOLLOW) 744 printf("ccdthread: goodbye\n"); 745 #endif 746 kthread_exit(0); 747 } 748 749 static void 750 ccdstrategy(struct buf *bp) 751 { 752 int unit = ccdunit(bp->b_dev); 753 struct ccd_softc *cs; 754 if ((cs = ccdget(unit, 0)) == NULL) 755 return; 756 757 /* Must be open or reading label. */ 758 KASSERT(cs->sc_dkdev.dk_openmask != 0 || 759 (cs->sc_flags & CCDF_RLABEL) != 0); 760 761 mutex_enter(cs->sc_iolock); 762 /* Synchronize with device init/uninit. */ 763 if (__predict_false((cs->sc_flags & CCDF_INITED) == 0)) { 764 mutex_exit(cs->sc_iolock); 765 #ifdef DEBUG 766 if (ccddebug & CCDB_FOLLOW) 767 printf("ccdstrategy: unit %d: not inited\n", unit); 768 #endif 769 bp->b_error = ENXIO; 770 bp->b_resid = bp->b_bcount; 771 biodone(bp); 772 return; 773 } 774 775 /* Defer to thread if system is low on memory. */ 776 bufq_put(cs->sc_bufq, bp); 777 if (__predict_false(ccdbackoff(cs))) { 778 mutex_exit(cs->sc_iolock); 779 #ifdef DEBUG 780 if (ccddebug & CCDB_FOLLOW) 781 printf("ccdstrategy: holding off on I/O\n"); 782 #endif 783 return; 784 } 785 ccdstart(cs); 786 } 787 788 static void 789 ccdstart(struct ccd_softc *cs) 790 { 791 daddr_t blkno; 792 int wlabel; 793 struct disklabel *lp; 794 long bcount, rcount; 795 struct ccdbuf *cbp; 796 char *addr; 797 daddr_t bn; 798 vnode_t *vp; 799 buf_t *bp; 800 801 KASSERT(mutex_owned(cs->sc_iolock)); 802 803 bp = bufq_get(cs->sc_bufq); 804 KASSERT(bp != NULL); 805 806 disk_busy(&cs->sc_dkdev); 807 808 #ifdef DEBUG 809 if (ccddebug & CCDB_FOLLOW) 810 printf("ccdstart(%s, %p)\n", cs->sc_xname, bp); 811 #endif 812 813 /* If it's a nil transfer, wake up the top half now. */ 814 if (bp->b_bcount == 0) 815 goto done; 816 817 lp = cs->sc_dkdev.dk_label; 818 819 /* 820 * Do bounds checking and adjust transfer. If there's an 821 * error, the bounds check will flag that for us. Convert 822 * the partition relative block number to an absolute. 823 */ 824 blkno = bp->b_blkno; 825 wlabel = cs->sc_flags & (CCDF_WLABEL|CCDF_LABELLING); 826 if (DISKPART(bp->b_dev) != RAW_PART) { 827 if (bounds_check_with_label(&cs->sc_dkdev, bp, wlabel) <= 0) 828 goto done; 829 blkno += lp->d_partitions[DISKPART(bp->b_dev)].p_offset; 830 } 831 mutex_exit(cs->sc_iolock); 832 bp->b_rawblkno = blkno; 833 834 /* Allocate the component buffers and start I/O! */ 835 bp->b_resid = bp->b_bcount; 836 bn = bp->b_rawblkno; 837 addr = bp->b_data; 838 for (bcount = bp->b_bcount; bcount > 0; bcount -= rcount) { 839 cbp = ccdbuffer(cs, bp, bn, addr, bcount); 840 rcount = cbp->cb_buf.b_bcount; 841 bn += btodb(rcount); 842 addr += rcount; 843 vp = cbp->cb_buf.b_vp; 844 if ((cbp->cb_buf.b_flags & B_READ) == 0) { 845 mutex_enter(vp->v_interlock); 846 vp->v_numoutput++; 847 mutex_exit(vp->v_interlock); 848 } 849 (void)VOP_STRATEGY(vp, &cbp->cb_buf); 850 } 851 return; 852 853 done: 854 disk_unbusy(&cs->sc_dkdev, 0, 0); 855 cv_broadcast(&cs->sc_stop); 856 cv_broadcast(&cs->sc_push); 857 mutex_exit(cs->sc_iolock); 858 bp->b_resid = bp->b_bcount; 859 biodone(bp); 860 } 861 862 /* 863 * Build a component buffer header. 864 */ 865 static struct ccdbuf * 866 ccdbuffer(struct ccd_softc *cs, struct buf *bp, daddr_t bn, void *addr, 867 long bcount) 868 { 869 struct ccdcinfo *ci; 870 struct ccdbuf *cbp; 871 daddr_t cbn, cboff; 872 u_int64_t cbc; 873 int ccdisk; 874 875 #ifdef DEBUG 876 if (ccddebug & CCDB_IO) 877 printf("ccdbuffer(%p, %p, %" PRId64 ", %p, %ld)\n", 878 cs, bp, bn, addr, bcount); 879 #endif 880 /* 881 * Determine which component bn falls in. 882 */ 883 cbn = bn; 884 cboff = 0; 885 886 /* 887 * Serially concatenated 888 */ 889 if (cs->sc_ileave == 0) { 890 daddr_t sblk; 891 892 sblk = 0; 893 for (ccdisk = 0, ci = &cs->sc_cinfo[ccdisk]; 894 cbn >= sblk + ci->ci_size; 895 ccdisk++, ci = &cs->sc_cinfo[ccdisk]) 896 sblk += ci->ci_size; 897 cbn -= sblk; 898 } 899 /* 900 * Interleaved 901 */ 902 else { 903 struct ccdiinfo *ii; 904 int off; 905 906 cboff = cbn % cs->sc_ileave; 907 cbn /= cs->sc_ileave; 908 for (ii = cs->sc_itable; ii->ii_ndisk; ii++) 909 if (ii->ii_startblk > cbn) 910 break; 911 ii--; 912 off = cbn - ii->ii_startblk; 913 if (ii->ii_ndisk == 1) { 914 ccdisk = ii->ii_index[0]; 915 cbn = ii->ii_startoff + off; 916 } else { 917 ccdisk = ii->ii_index[off % ii->ii_ndisk]; 918 cbn = ii->ii_startoff + off / ii->ii_ndisk; 919 } 920 cbn *= cs->sc_ileave; 921 ci = &cs->sc_cinfo[ccdisk]; 922 } 923 924 /* 925 * Fill in the component buf structure. 926 */ 927 cbp = CCD_GETBUF(); 928 KASSERT(cbp != NULL); 929 buf_init(&cbp->cb_buf); 930 cbp->cb_buf.b_flags = bp->b_flags; 931 cbp->cb_buf.b_oflags = bp->b_oflags; 932 cbp->cb_buf.b_cflags = bp->b_cflags; 933 cbp->cb_buf.b_iodone = ccdiodone; 934 cbp->cb_buf.b_proc = bp->b_proc; 935 cbp->cb_buf.b_dev = ci->ci_dev; 936 cbp->cb_buf.b_blkno = cbn + cboff; 937 cbp->cb_buf.b_data = addr; 938 cbp->cb_buf.b_vp = ci->ci_vp; 939 cbp->cb_buf.b_objlock = ci->ci_vp->v_interlock; 940 if (cs->sc_ileave == 0) 941 cbc = dbtob((u_int64_t)(ci->ci_size - cbn)); 942 else 943 cbc = dbtob((u_int64_t)(cs->sc_ileave - cboff)); 944 cbp->cb_buf.b_bcount = cbc < bcount ? cbc : bcount; 945 946 /* 947 * context for ccdiodone 948 */ 949 cbp->cb_obp = bp; 950 cbp->cb_sc = cs; 951 cbp->cb_comp = ccdisk; 952 953 BIO_COPYPRIO(&cbp->cb_buf, bp); 954 955 #ifdef DEBUG 956 if (ccddebug & CCDB_IO) 957 printf(" dev 0x%"PRIx64"(u%lu): cbp %p bn %" PRId64 " addr %p" 958 " bcnt %d\n", 959 ci->ci_dev, (unsigned long) (ci-cs->sc_cinfo), cbp, 960 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 961 cbp->cb_buf.b_bcount); 962 #endif 963 964 return (cbp); 965 } 966 967 /* 968 * Called at interrupt time. 969 * Mark the component as done and if all components are done, 970 * take a ccd interrupt. 971 */ 972 static void 973 ccdiodone(struct buf *vbp) 974 { 975 struct ccdbuf *cbp = (struct ccdbuf *) vbp; 976 struct buf *bp = cbp->cb_obp; 977 struct ccd_softc *cs = cbp->cb_sc; 978 int count; 979 980 #ifdef DEBUG 981 if (ccddebug & CCDB_FOLLOW) 982 printf("ccdiodone(%p)\n", cbp); 983 if (ccddebug & CCDB_IO) { 984 printf("ccdiodone: bp %p bcount %d resid %d\n", 985 bp, bp->b_bcount, bp->b_resid); 986 printf(" dev 0x%"PRIx64"(u%d), cbp %p bn %" PRId64 " addr %p" 987 " bcnt %d\n", 988 cbp->cb_buf.b_dev, cbp->cb_comp, cbp, 989 cbp->cb_buf.b_blkno, cbp->cb_buf.b_data, 990 cbp->cb_buf.b_bcount); 991 } 992 #endif 993 994 if (cbp->cb_buf.b_error != 0) { 995 bp->b_error = cbp->cb_buf.b_error; 996 printf("%s: error %d on component %d\n", 997 cs->sc_xname, bp->b_error, cbp->cb_comp); 998 } 999 count = cbp->cb_buf.b_bcount; 1000 buf_destroy(&cbp->cb_buf); 1001 CCD_PUTBUF(cbp); 1002 1003 /* 1004 * If all done, "interrupt". 1005 */ 1006 mutex_enter(cs->sc_iolock); 1007 bp->b_resid -= count; 1008 if (bp->b_resid < 0) 1009 panic("ccdiodone: count"); 1010 if (bp->b_resid == 0) { 1011 /* 1012 * Request is done for better or worse, wakeup the top half. 1013 */ 1014 if (bp->b_error != 0) 1015 bp->b_resid = bp->b_bcount; 1016 disk_unbusy(&cs->sc_dkdev, (bp->b_bcount - bp->b_resid), 1017 (bp->b_flags & B_READ)); 1018 if (!disk_isbusy(&cs->sc_dkdev)) { 1019 if (bufq_peek(cs->sc_bufq) != NULL) { 1020 cv_broadcast(&cs->sc_push); 1021 } 1022 cv_broadcast(&cs->sc_stop); 1023 } 1024 mutex_exit(cs->sc_iolock); 1025 biodone(bp); 1026 } else 1027 mutex_exit(cs->sc_iolock); 1028 } 1029 1030 /* ARGSUSED */ 1031 static int 1032 ccdread(dev_t dev, struct uio *uio, int flags) 1033 { 1034 int unit = ccdunit(dev); 1035 struct ccd_softc *cs; 1036 1037 #ifdef DEBUG 1038 if (ccddebug & CCDB_FOLLOW) 1039 printf("ccdread(0x%"PRIx64", %p)\n", dev, uio); 1040 #endif 1041 if ((cs = ccdget(unit, 0)) == NULL) 1042 return 0; 1043 1044 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1045 if ((cs->sc_flags & CCDF_INITED) == 0) 1046 return (ENXIO); 1047 1048 return (physio(ccdstrategy, NULL, dev, B_READ, minphys, uio)); 1049 } 1050 1051 /* ARGSUSED */ 1052 static int 1053 ccdwrite(dev_t dev, struct uio *uio, int flags) 1054 { 1055 int unit = ccdunit(dev); 1056 struct ccd_softc *cs; 1057 1058 #ifdef DEBUG 1059 if (ccddebug & CCDB_FOLLOW) 1060 printf("ccdwrite(0x%"PRIx64", %p)\n", dev, uio); 1061 #endif 1062 if ((cs = ccdget(unit, 0)) == NULL) 1063 return ENOENT; 1064 1065 /* Unlocked advisory check, ccdstrategy check is synchronous. */ 1066 if ((cs->sc_flags & CCDF_INITED) == 0) 1067 return (ENXIO); 1068 1069 return (physio(ccdstrategy, NULL, dev, B_WRITE, minphys, uio)); 1070 } 1071 1072 int (*compat_ccd_ioctl_60)(dev_t, u_long, void *, int, struct lwp *, 1073 int (*)(dev_t, u_long, void *, int, struct lwp *)) = (void *)enosys; 1074 1075 static int 1076 ccdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1077 { 1078 int unit = ccdunit(dev); 1079 int i, j, lookedup = 0, error = 0; 1080 int part, pmask, make, hook; 1081 struct ccd_softc *cs; 1082 struct ccd_ioctl *ccio = (struct ccd_ioctl *)data; 1083 kauth_cred_t uc; 1084 char **cpp; 1085 struct pathbuf *pb; 1086 struct vnode **vpp; 1087 #ifdef __HAVE_OLD_DISKLABEL 1088 struct disklabel newlabel; 1089 #endif 1090 1091 switch (cmd) { 1092 case CCDIOCSET: 1093 make = 1; 1094 break; 1095 default: 1096 MODULE_HOOK_CALL(ccd_ioctl_60_hook, 1097 (0, cmd, NULL, 0, NULL, NULL), 1098 enosys(), hook); 1099 if (hook == 0) 1100 make = 1; 1101 else 1102 make = 0; 1103 break; 1104 } 1105 1106 if ((cs = ccdget(unit, make)) == NULL) 1107 return ENOENT; 1108 uc = kauth_cred_get(); 1109 1110 MODULE_HOOK_CALL(ccd_ioctl_60_hook, 1111 (dev, cmd, data, flag, l, ccdioctl), 1112 enosys(), error); 1113 if (error != ENOSYS) 1114 return error; 1115 1116 /* Must be open for writes for these commands... */ 1117 switch (cmd) { 1118 case CCDIOCSET: 1119 case CCDIOCCLR: 1120 case DIOCSDINFO: 1121 case DIOCWDINFO: 1122 case DIOCCACHESYNC: 1123 case DIOCAWEDGE: 1124 case DIOCDWEDGE: 1125 case DIOCRMWEDGES: 1126 case DIOCMWEDGES: 1127 #ifdef __HAVE_OLD_DISKLABEL 1128 case ODIOCSDINFO: 1129 case ODIOCWDINFO: 1130 #endif 1131 case DIOCKLABEL: 1132 case DIOCWLABEL: 1133 if ((flag & FWRITE) == 0) 1134 return (EBADF); 1135 } 1136 1137 mutex_enter(&cs->sc_dvlock); 1138 1139 /* Must be initialized for these... */ 1140 switch (cmd) { 1141 case CCDIOCCLR: 1142 case DIOCGDINFO: 1143 case DIOCGSTRATEGY: 1144 case DIOCGCACHE: 1145 case DIOCCACHESYNC: 1146 case DIOCAWEDGE: 1147 case DIOCDWEDGE: 1148 case DIOCLWEDGES: 1149 case DIOCMWEDGES: 1150 case DIOCSDINFO: 1151 case DIOCWDINFO: 1152 case DIOCGPARTINFO: 1153 case DIOCWLABEL: 1154 case DIOCKLABEL: 1155 case DIOCGDEFLABEL: 1156 #ifdef __HAVE_OLD_DISKLABEL 1157 case ODIOCGDINFO: 1158 case ODIOCSDINFO: 1159 case ODIOCWDINFO: 1160 case ODIOCGDEFLABEL: 1161 #endif 1162 if ((cs->sc_flags & CCDF_INITED) == 0) { 1163 error = ENXIO; 1164 goto out; 1165 } 1166 } 1167 1168 error = disk_ioctl(&cs->sc_dkdev, dev, cmd, data, flag, l); 1169 if (error != EPASSTHROUGH) 1170 goto out; 1171 1172 error = 0; 1173 switch (cmd) { 1174 case CCDIOCSET: 1175 if (cs->sc_flags & CCDF_INITED) { 1176 error = EBUSY; 1177 goto out; 1178 } 1179 1180 /* Validate the flags. */ 1181 if ((ccio->ccio_flags & CCDF_USERMASK) != ccio->ccio_flags) { 1182 error = EINVAL; 1183 goto out; 1184 } 1185 1186 if (ccio->ccio_ndisks > CCD_MAXNDISKS || 1187 ccio->ccio_ndisks == 0) { 1188 error = EINVAL; 1189 goto out; 1190 } 1191 1192 /* Fill in some important bits. */ 1193 cs->sc_ileave = ccio->ccio_ileave; 1194 cs->sc_nccdisks = ccio->ccio_ndisks; 1195 cs->sc_flags = ccio->ccio_flags & CCDF_USERMASK; 1196 1197 /* 1198 * Allocate space for and copy in the array of 1199 * component pathnames and device numbers. 1200 */ 1201 cpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*cpp), KM_SLEEP); 1202 vpp = kmem_alloc(ccio->ccio_ndisks * sizeof(*vpp), KM_SLEEP); 1203 error = copyin(ccio->ccio_disks, cpp, 1204 ccio->ccio_ndisks * sizeof(*cpp)); 1205 if (error) { 1206 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1207 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1208 goto out; 1209 } 1210 1211 #ifdef DEBUG 1212 if (ccddebug & CCDB_INIT) 1213 for (i = 0; i < ccio->ccio_ndisks; ++i) 1214 printf("ccdioctl: component %d: %p\n", 1215 i, cpp[i]); 1216 #endif 1217 1218 for (i = 0; i < ccio->ccio_ndisks; ++i) { 1219 #ifdef DEBUG 1220 if (ccddebug & CCDB_INIT) 1221 printf("ccdioctl: lookedup = %d\n", lookedup); 1222 #endif 1223 error = pathbuf_copyin(cpp[i], &pb); 1224 if (error == 0) { 1225 error = vn_bdev_openpath(pb, &vpp[i], l); 1226 } 1227 pathbuf_destroy(pb); 1228 if (error != 0) { 1229 for (j = 0; j < lookedup; ++j) 1230 (void)vn_close(vpp[j], FREAD|FWRITE, 1231 uc); 1232 kmem_free(vpp, ccio->ccio_ndisks * 1233 sizeof(*vpp)); 1234 kmem_free(cpp, ccio->ccio_ndisks * 1235 sizeof(*cpp)); 1236 goto out; 1237 } 1238 ++lookedup; 1239 } 1240 1241 /* Attach the disk. */ 1242 disk_attach(&cs->sc_dkdev); 1243 bufq_alloc(&cs->sc_bufq, "fcfs", 0); 1244 1245 /* 1246 * Initialize the ccd. Fills in the softc for us. 1247 */ 1248 if ((error = ccdinit(cs, cpp, vpp, l)) != 0) { 1249 for (j = 0; j < lookedup; ++j) 1250 (void)vn_close(vpp[j], FREAD|FWRITE, 1251 uc); 1252 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1253 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1254 disk_detach(&cs->sc_dkdev); 1255 mutex_exit(&cs->sc_dvlock); 1256 bufq_free(cs->sc_bufq); 1257 return error; 1258 } 1259 1260 /* We can free the temporary variables now. */ 1261 kmem_free(vpp, ccio->ccio_ndisks * sizeof(*vpp)); 1262 kmem_free(cpp, ccio->ccio_ndisks * sizeof(*cpp)); 1263 1264 /* 1265 * The ccd has been successfully initialized, so 1266 * we can place it into the array. Don't try to 1267 * read the disklabel until the disk has been attached, 1268 * because space for the disklabel is allocated 1269 * in disk_attach(); 1270 */ 1271 ccio->ccio_unit = unit; 1272 ccio->ccio_size = cs->sc_size; 1273 1274 /* Try and read the disklabel. */ 1275 ccdgetdisklabel(dev); 1276 disk_set_info(NULL, &cs->sc_dkdev, NULL); 1277 1278 /* discover wedges */ 1279 mutex_exit(&cs->sc_dvlock); 1280 dkwedge_discover(&cs->sc_dkdev); 1281 return 0; 1282 1283 case CCDIOCCLR: 1284 /* 1285 * Don't unconfigure if any other partitions are open 1286 * or if both the character and block flavors of this 1287 * partition are open. 1288 */ 1289 part = DISKPART(dev); 1290 pmask = (1 << part); 1291 if ((cs->sc_dkdev.dk_openmask & ~pmask) || 1292 ((cs->sc_dkdev.dk_bopenmask & pmask) && 1293 (cs->sc_dkdev.dk_copenmask & pmask))) { 1294 error = EBUSY; 1295 goto out; 1296 } 1297 1298 /* Delete all of our wedges. */ 1299 dkwedge_delall(&cs->sc_dkdev); 1300 1301 /* Stop new I/O, wait for in-flight I/O to complete. */ 1302 mutex_enter(cs->sc_iolock); 1303 cs->sc_flags &= ~(CCDF_INITED|CCDF_VLABEL); 1304 cs->sc_zap = true; 1305 while (disk_isbusy(&cs->sc_dkdev) || 1306 bufq_peek(cs->sc_bufq) != NULL || 1307 cs->sc_thread != NULL) { 1308 cv_broadcast(&cs->sc_push); 1309 (void)cv_timedwait(&cs->sc_stop, cs->sc_iolock, hz); 1310 } 1311 mutex_exit(cs->sc_iolock); 1312 1313 /* 1314 * Free ccd_softc information and clear entry. 1315 */ 1316 1317 /* Close the components and free their pathnames. */ 1318 for (i = 0; i < cs->sc_nccdisks; ++i) { 1319 /* 1320 * XXX: this close could potentially fail and 1321 * cause Bad Things. Maybe we need to force 1322 * the close to happen? 1323 */ 1324 #ifdef DEBUG 1325 if (ccddebug & CCDB_VNODE) 1326 vprint("CCDIOCCLR: vnode info", 1327 cs->sc_cinfo[i].ci_vp); 1328 #endif 1329 (void)vn_close(cs->sc_cinfo[i].ci_vp, FREAD|FWRITE, 1330 uc); 1331 kmem_free(cs->sc_cinfo[i].ci_path, 1332 cs->sc_cinfo[i].ci_pathlen); 1333 } 1334 1335 /* Free interleave index. */ 1336 for (i = 0; cs->sc_itable[i].ii_ndisk; ++i) { 1337 kmem_free(cs->sc_itable[i].ii_index, 1338 cs->sc_itable[i].ii_indexsz); 1339 } 1340 1341 /* Free component info and interleave table. */ 1342 kmem_free(cs->sc_cinfo, cs->sc_nccdisks * 1343 sizeof(struct ccdcinfo)); 1344 kmem_free(cs->sc_itable, (cs->sc_nccdisks + 1) * 1345 sizeof(struct ccdiinfo)); 1346 1347 aprint_normal("%s: detached\n", cs->sc_xname); 1348 1349 /* Detach the disk. */ 1350 disk_detach(&cs->sc_dkdev); 1351 bufq_free(cs->sc_bufq); 1352 ccdput(cs); 1353 /* Don't break, otherwise cs is read again. */ 1354 return 0; 1355 1356 case DIOCGSTRATEGY: 1357 { 1358 struct disk_strategy *dks = (void *)data; 1359 1360 mutex_enter(cs->sc_iolock); 1361 if (cs->sc_bufq != NULL) 1362 strlcpy(dks->dks_name, 1363 bufq_getstrategyname(cs->sc_bufq), 1364 sizeof(dks->dks_name)); 1365 else 1366 error = EINVAL; 1367 mutex_exit(cs->sc_iolock); 1368 dks->dks_paramlen = 0; 1369 break; 1370 } 1371 1372 case DIOCGCACHE: 1373 { 1374 int dkcache = 0; 1375 1376 /* 1377 * We pass this call down to all components and report 1378 * intersection of the flags returned by the components. 1379 * If any errors out, we return error. CCD components 1380 * can not change unless the device is unconfigured, so 1381 * device feature flags will remain static. RCE/WCE can change 1382 * of course, if set directly on underlying device. 1383 */ 1384 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1385 error = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, &j, 1386 flag, uc); 1387 if (error) 1388 break; 1389 1390 if (i == 0) 1391 dkcache = j; 1392 else 1393 dkcache = DKCACHE_COMBINE(dkcache, j); 1394 } 1395 1396 *((int *)data) = dkcache; 1397 break; 1398 } 1399 1400 case DIOCCACHESYNC: 1401 /* 1402 * We pass this call down to all components and report 1403 * the first error we encounter. 1404 */ 1405 for (error = 0, i = 0; i < cs->sc_nccdisks; i++) { 1406 j = VOP_IOCTL(cs->sc_cinfo[i].ci_vp, cmd, data, 1407 flag, uc); 1408 if (j != 0 && error == 0) 1409 error = j; 1410 } 1411 break; 1412 1413 case DIOCWDINFO: 1414 case DIOCSDINFO: 1415 #ifdef __HAVE_OLD_DISKLABEL 1416 case ODIOCWDINFO: 1417 case ODIOCSDINFO: 1418 #endif 1419 { 1420 struct disklabel *lp; 1421 #ifdef __HAVE_OLD_DISKLABEL 1422 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1423 memset(&newlabel, 0, sizeof newlabel); 1424 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1425 lp = &newlabel; 1426 } else 1427 #endif 1428 lp = (struct disklabel *)data; 1429 1430 cs->sc_flags |= CCDF_LABELLING; 1431 1432 error = setdisklabel(cs->sc_dkdev.dk_label, 1433 lp, 0, cs->sc_dkdev.dk_cpulabel); 1434 if (error == 0) { 1435 if (cmd == DIOCWDINFO 1436 #ifdef __HAVE_OLD_DISKLABEL 1437 || cmd == ODIOCWDINFO 1438 #endif 1439 ) 1440 error = writedisklabel(CCDLABELDEV(dev), 1441 ccdstrategy, cs->sc_dkdev.dk_label, 1442 cs->sc_dkdev.dk_cpulabel); 1443 } 1444 1445 cs->sc_flags &= ~CCDF_LABELLING; 1446 break; 1447 } 1448 1449 case DIOCKLABEL: 1450 if (*(int *)data != 0) 1451 cs->sc_flags |= CCDF_KLABEL; 1452 else 1453 cs->sc_flags &= ~CCDF_KLABEL; 1454 break; 1455 1456 case DIOCWLABEL: 1457 if (*(int *)data != 0) 1458 cs->sc_flags |= CCDF_WLABEL; 1459 else 1460 cs->sc_flags &= ~CCDF_WLABEL; 1461 break; 1462 1463 case DIOCGDEFLABEL: 1464 ccdgetdefaultlabel(cs, (struct disklabel *)data); 1465 break; 1466 1467 #ifdef __HAVE_OLD_DISKLABEL 1468 case ODIOCGDEFLABEL: 1469 ccdgetdefaultlabel(cs, &newlabel); 1470 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1471 return ENOTTY; 1472 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1473 break; 1474 #endif 1475 1476 default: 1477 error = ENOTTY; 1478 } 1479 1480 out: 1481 mutex_exit(&cs->sc_dvlock); 1482 return (error); 1483 } 1484 1485 static int 1486 ccdsize(dev_t dev) 1487 { 1488 struct ccd_softc *cs; 1489 struct disklabel *lp; 1490 int part, unit, omask, size; 1491 1492 unit = ccdunit(dev); 1493 if ((cs = ccdget(unit, 0)) == NULL) 1494 return -1; 1495 1496 if ((cs->sc_flags & CCDF_INITED) == 0) 1497 return (-1); 1498 1499 part = DISKPART(dev); 1500 omask = cs->sc_dkdev.dk_openmask & (1 << part); 1501 lp = cs->sc_dkdev.dk_label; 1502 1503 if (omask == 0 && ccdopen(dev, 0, S_IFBLK, curlwp)) 1504 return (-1); 1505 1506 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1507 size = -1; 1508 else 1509 size = lp->d_partitions[part].p_size * 1510 (lp->d_secsize / DEV_BSIZE); 1511 1512 if (omask == 0 && ccdclose(dev, 0, S_IFBLK, curlwp)) 1513 return (-1); 1514 1515 return (size); 1516 } 1517 1518 static void 1519 ccdgetdefaultlabel(struct ccd_softc *cs, struct disklabel *lp) 1520 { 1521 struct ccdgeom *ccg = &cs->sc_geom; 1522 1523 memset(lp, 0, sizeof(*lp)); 1524 1525 if (cs->sc_size > UINT32_MAX) 1526 lp->d_secperunit = UINT32_MAX; 1527 else 1528 lp->d_secperunit = cs->sc_size; 1529 lp->d_secsize = ccg->ccg_secsize; 1530 lp->d_nsectors = ccg->ccg_nsectors; 1531 lp->d_ntracks = ccg->ccg_ntracks; 1532 lp->d_ncylinders = ccg->ccg_ncylinders; 1533 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1534 1535 strncpy(lp->d_typename, "ccd", sizeof(lp->d_typename)); 1536 lp->d_type = DKTYPE_CCD; 1537 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1538 lp->d_rpm = 3600; 1539 lp->d_interleave = 1; 1540 lp->d_flags = 0; 1541 1542 lp->d_partitions[RAW_PART].p_offset = 0; 1543 lp->d_partitions[RAW_PART].p_size = lp->d_secperunit; 1544 lp->d_partitions[RAW_PART].p_fstype = FS_UNUSED; 1545 lp->d_npartitions = RAW_PART + 1; 1546 1547 lp->d_magic = DISKMAGIC; 1548 lp->d_magic2 = DISKMAGIC; 1549 lp->d_checksum = dkcksum(cs->sc_dkdev.dk_label); 1550 } 1551 1552 /* 1553 * Read the disklabel from the ccd. If one is not present, fake one 1554 * up. 1555 */ 1556 static void 1557 ccdgetdisklabel(dev_t dev) 1558 { 1559 int unit = ccdunit(dev); 1560 struct ccd_softc *cs; 1561 const char *errstring; 1562 struct disklabel *lp; 1563 struct cpu_disklabel *clp; 1564 1565 if ((cs = ccdget(unit, 0)) == NULL) 1566 return; 1567 lp = cs->sc_dkdev.dk_label; 1568 clp = cs->sc_dkdev.dk_cpulabel; 1569 KASSERT(mutex_owned(&cs->sc_dvlock)); 1570 1571 memset(clp, 0, sizeof(*clp)); 1572 1573 ccdgetdefaultlabel(cs, lp); 1574 1575 /* 1576 * Call the generic disklabel extraction routine. 1577 */ 1578 cs->sc_flags |= CCDF_RLABEL; 1579 if ((cs->sc_flags & CCDF_NOLABEL) != 0) 1580 errstring = "CCDF_NOLABEL set; ignoring on-disk label"; 1581 else 1582 errstring = readdisklabel(CCDLABELDEV(dev), ccdstrategy, 1583 cs->sc_dkdev.dk_label, cs->sc_dkdev.dk_cpulabel); 1584 if (errstring) 1585 ccdmakedisklabel(cs); 1586 else { 1587 int i; 1588 struct partition *pp; 1589 1590 /* 1591 * Sanity check whether the found disklabel is valid. 1592 * 1593 * This is necessary since total size of ccd may vary 1594 * when an interleave is changed even though exactly 1595 * same componets are used, and old disklabel may used 1596 * if that is found. 1597 */ 1598 if (lp->d_secperunit < UINT32_MAX ? 1599 lp->d_secperunit != cs->sc_size : 1600 lp->d_secperunit > cs->sc_size) 1601 printf("WARNING: %s: " 1602 "total sector size in disklabel (%ju) != " 1603 "the size of ccd (%ju)\n", cs->sc_xname, 1604 (uintmax_t)lp->d_secperunit, 1605 (uintmax_t)cs->sc_size); 1606 for (i = 0; i < lp->d_npartitions; i++) { 1607 pp = &lp->d_partitions[i]; 1608 if (pp->p_offset + pp->p_size > cs->sc_size) 1609 printf("WARNING: %s: end of partition `%c' " 1610 "exceeds the size of ccd (%ju)\n", 1611 cs->sc_xname, 'a' + i, (uintmax_t)cs->sc_size); 1612 } 1613 } 1614 1615 #ifdef DEBUG 1616 /* It's actually extremely common to have unlabeled ccds. */ 1617 if (ccddebug & CCDB_LABEL) 1618 if (errstring != NULL) 1619 printf("%s: %s\n", cs->sc_xname, errstring); 1620 #endif 1621 1622 /* In-core label now valid. */ 1623 cs->sc_flags = (cs->sc_flags | CCDF_VLABEL) & ~CCDF_RLABEL; 1624 } 1625 1626 /* 1627 * Take care of things one might want to take care of in the event 1628 * that a disklabel isn't present. 1629 */ 1630 static void 1631 ccdmakedisklabel(struct ccd_softc *cs) 1632 { 1633 struct disklabel *lp = cs->sc_dkdev.dk_label; 1634 1635 /* 1636 * For historical reasons, if there's no disklabel present 1637 * the raw partition must be marked FS_BSDFFS. 1638 */ 1639 lp->d_partitions[RAW_PART].p_fstype = FS_BSDFFS; 1640 1641 strncpy(lp->d_packname, "default label", sizeof(lp->d_packname)); 1642 1643 lp->d_checksum = dkcksum(lp); 1644 } 1645 1646 #ifdef DEBUG 1647 static void 1648 printiinfo(struct ccdiinfo *ii) 1649 { 1650 int ix, i; 1651 1652 for (ix = 0; ii->ii_ndisk; ix++, ii++) { 1653 printf(" itab[%d]: #dk %d sblk %" PRId64 " soff %" PRId64, 1654 ix, ii->ii_ndisk, ii->ii_startblk, ii->ii_startoff); 1655 for (i = 0; i < ii->ii_ndisk; i++) 1656 printf(" %d", ii->ii_index[i]); 1657 printf("\n"); 1658 } 1659 } 1660 #endif 1661 1662 MODULE(MODULE_CLASS_DRIVER, ccd, "dk_subr,bufq_fcfs"); 1663 1664 static int 1665 ccd_modcmd(modcmd_t cmd, void *arg) 1666 { 1667 int error = 0; 1668 #ifdef _MODULE 1669 int bmajor = -1, cmajor = -1; 1670 #endif 1671 1672 1673 switch (cmd) { 1674 case MODULE_CMD_INIT: 1675 #ifdef _MODULE 1676 ccdattach(0); 1677 1678 error = devsw_attach("ccd", &ccd_bdevsw, &bmajor, 1679 &ccd_cdevsw, &cmajor); 1680 #endif 1681 break; 1682 1683 case MODULE_CMD_FINI: 1684 #ifdef _MODULE 1685 mutex_enter(&ccd_lock); 1686 if (!LIST_EMPTY(&ccds)) { 1687 mutex_exit(&ccd_lock); 1688 error = EBUSY; 1689 } else { 1690 mutex_exit(&ccd_lock); 1691 error = devsw_detach(&ccd_bdevsw, &ccd_cdevsw); 1692 ccddetach(); 1693 } 1694 #endif 1695 break; 1696 1697 case MODULE_CMD_STAT: 1698 return ENOTTY; 1699 1700 default: 1701 return ENOTTY; 1702 } 1703 1704 return error; 1705 } 1706 1707 static int 1708 ccd_units_sysctl(SYSCTLFN_ARGS) 1709 { 1710 struct sysctlnode node; 1711 struct ccd_softc *sc; 1712 int error, i, nccd, *units; 1713 size_t size; 1714 1715 nccd = 0; 1716 mutex_enter(&ccd_lock); 1717 LIST_FOREACH(sc, &ccds, sc_link) 1718 nccd++; 1719 mutex_exit(&ccd_lock); 1720 1721 if (nccd != 0) { 1722 size = nccd * sizeof(*units); 1723 units = kmem_zalloc(size, KM_SLEEP); 1724 i = 0; 1725 mutex_enter(&ccd_lock); 1726 LIST_FOREACH(sc, &ccds, sc_link) { 1727 if (i >= nccd) 1728 break; 1729 units[i] = sc->sc_unit; 1730 } 1731 mutex_exit(&ccd_lock); 1732 } else { 1733 units = NULL; 1734 size = 0; 1735 } 1736 1737 node = *rnode; 1738 node.sysctl_data = units; 1739 node.sysctl_size = size; 1740 1741 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1742 if (units) 1743 kmem_free(units, size); 1744 return error; 1745 } 1746 1747 static int 1748 ccd_info_sysctl(SYSCTLFN_ARGS) 1749 { 1750 struct sysctlnode node; 1751 struct ccddiskinfo ccd; 1752 struct ccd_softc *sc; 1753 int unit; 1754 1755 if (newp == NULL || newlen != sizeof(int)) 1756 return EINVAL; 1757 1758 unit = *(const int *)newp; 1759 newp = NULL; 1760 newlen = 0; 1761 ccd.ccd_ndisks = ~0; 1762 mutex_enter(&ccd_lock); 1763 LIST_FOREACH(sc, &ccds, sc_link) { 1764 if (sc->sc_unit == unit) { 1765 ccd.ccd_ileave = sc->sc_ileave; 1766 ccd.ccd_size = sc->sc_size; 1767 ccd.ccd_ndisks = sc->sc_nccdisks; 1768 ccd.ccd_flags = sc->sc_flags; 1769 break; 1770 } 1771 } 1772 mutex_exit(&ccd_lock); 1773 1774 if (ccd.ccd_ndisks == ~0) 1775 return ENOENT; 1776 1777 node = *rnode; 1778 node.sysctl_data = &ccd; 1779 node.sysctl_size = sizeof(ccd); 1780 1781 return sysctl_lookup(SYSCTLFN_CALL(&node)); 1782 } 1783 1784 static int 1785 ccd_components_sysctl(SYSCTLFN_ARGS) 1786 { 1787 struct sysctlnode node; 1788 int error, unit; 1789 size_t size; 1790 char *names, *p, *ep; 1791 struct ccd_softc *sc; 1792 1793 if (newp == NULL || newlen != sizeof(int)) 1794 return EINVAL; 1795 1796 size = 0; 1797 unit = *(const int *)newp; 1798 newp = NULL; 1799 newlen = 0; 1800 mutex_enter(&ccd_lock); 1801 LIST_FOREACH(sc, &ccds, sc_link) 1802 if (sc->sc_unit == unit) { 1803 for (size_t i = 0; i < sc->sc_nccdisks; i++) 1804 size += strlen(sc->sc_cinfo[i].ci_path) + 1; 1805 break; 1806 } 1807 mutex_exit(&ccd_lock); 1808 1809 if (size == 0) 1810 return ENOENT; 1811 names = kmem_zalloc(size, KM_SLEEP); 1812 p = names; 1813 ep = names + size; 1814 mutex_enter(&ccd_lock); 1815 LIST_FOREACH(sc, &ccds, sc_link) 1816 if (sc->sc_unit == unit) { 1817 for (size_t i = 0; i < sc->sc_nccdisks; i++) { 1818 char *d = sc->sc_cinfo[i].ci_path; 1819 while (p < ep && (*p++ = *d++) != '\0') 1820 continue; 1821 } 1822 break; 1823 } 1824 mutex_exit(&ccd_lock); 1825 1826 node = *rnode; 1827 node.sysctl_data = names; 1828 node.sysctl_size = ep - names; 1829 1830 error = sysctl_lookup(SYSCTLFN_CALL(&node)); 1831 kmem_free(names, size); 1832 return error; 1833 } 1834 1835 SYSCTL_SETUP(sysctl_kern_ccd_setup, "sysctl kern.ccd subtree setup") 1836 { 1837 const struct sysctlnode *node = NULL; 1838 1839 sysctl_createv(clog, 0, NULL, &node, 1840 CTLFLAG_PERMANENT, 1841 CTLTYPE_NODE, "ccd", 1842 SYSCTL_DESCR("ConCatenated Disk state"), 1843 NULL, 0, NULL, 0, 1844 CTL_KERN, CTL_CREATE, CTL_EOL); 1845 1846 if (node == NULL) 1847 return; 1848 1849 sysctl_createv(clog, 0, &node, NULL, 1850 CTLFLAG_PERMANENT | CTLFLAG_READONLY, 1851 CTLTYPE_STRUCT, "units", 1852 SYSCTL_DESCR("List of ccd unit numbers"), 1853 ccd_units_sysctl, 0, NULL, 0, 1854 CTL_CREATE, CTL_EOL); 1855 sysctl_createv(clog, 0, &node, NULL, 1856 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1857 CTLTYPE_STRUCT, "info", 1858 SYSCTL_DESCR("Information about a CCD unit"), 1859 ccd_info_sysctl, 0, NULL, 0, 1860 CTL_CREATE, CTL_EOL); 1861 sysctl_createv(clog, 0, &node, NULL, 1862 CTLFLAG_PERMANENT | CTLFLAG_READWRITE, 1863 CTLTYPE_STRUCT, "components", 1864 SYSCTL_DESCR("Information about CCD components"), 1865 ccd_components_sysctl, 0, NULL, 0, 1866 CTL_CREATE, CTL_EOL); 1867 } 1868