xref: /openbsd/sys/scsi/sd.c (revision 91f110e0)
1 /*	$OpenBSD: sd.c,v 1.253 2014/02/19 10:15:35 mpi Exp $	*/
2 /*	$NetBSD: sd.c,v 1.111 1997/04/02 02:29:41 mycroft Exp $	*/
3 
4 /*-
5  * Copyright (c) 1998 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Charles M. Hannum.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Originally written by Julian Elischer (julian@dialix.oz.au)
35  * for TRW Financial Systems for use under the MACH(2.5) operating system.
36  *
37  * TRW Financial Systems, in accordance with their agreement with Carnegie
38  * Mellon University, makes this software available to CMU to distribute
39  * or use in any manner that they see fit as long as this message is kept with
40  * the software. For this reason TFS also grants any other persons or
41  * organisations permission to use or modify this software.
42  *
43  * TFS supplies this software to be publicly redistributed
44  * on the understanding that TFS is not responsible for the correct
45  * functioning of this software in any circumstances.
46  *
47  * Ported to run under 386BSD by Julian Elischer (julian@dialix.oz.au) Sept 1992
48  */
49 
50 #include <sys/stdint.h>
51 #include <sys/types.h>
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/timeout.h>
55 #include <sys/file.h>
56 #include <sys/stat.h>
57 #include <sys/ioctl.h>
58 #include <sys/mtio.h>
59 #include <sys/mutex.h>
60 #include <sys/buf.h>
61 #include <sys/uio.h>
62 #include <sys/malloc.h>
63 #include <sys/pool.h>
64 #include <sys/errno.h>
65 #include <sys/device.h>
66 #include <sys/disklabel.h>
67 #include <sys/disk.h>
68 #include <sys/proc.h>
69 #include <sys/conf.h>
70 #include <sys/scsiio.h>
71 #include <sys/dkio.h>
72 #include <sys/reboot.h>
73 
74 #include <scsi/scsi_all.h>
75 #include <scsi/scsi_disk.h>
76 #include <scsi/scsiconf.h>
77 #include <scsi/sdvar.h>
78 
79 #include <ufs/ffs/fs.h>			/* for BBSIZE and SBSIZE */
80 
81 #include <sys/vnode.h>
82 
83 int	sdmatch(struct device *, void *, void *);
84 void	sdattach(struct device *, struct device *, void *);
85 int	sdactivate(struct device *, int);
86 int	sddetach(struct device *, int);
87 
88 void	sdminphys(struct buf *);
89 int	sdgetdisklabel(dev_t, struct sd_softc *, struct disklabel *, int);
90 void	sdstart(struct scsi_xfer *);
91 void	sd_shutdown(void *);
92 int	sd_interpret_sense(struct scsi_xfer *);
93 int	sd_read_cap_10(struct sd_softc *, int);
94 int	sd_read_cap_16(struct sd_softc *, int);
95 int	sd_size(struct sd_softc *, int);
96 int	sd_thin_pages(struct sd_softc *, int);
97 int	sd_vpd_block_limits(struct sd_softc *, int);
98 int	sd_vpd_thin(struct sd_softc *, int);
99 int	sd_thin_params(struct sd_softc *, int);
100 int	sd_get_parms(struct sd_softc *, struct disk_parms *, int);
101 void	sd_flush(struct sd_softc *, int);
102 
103 void	viscpy(u_char *, u_char *, int);
104 
105 int	sd_ioctl_inquiry(struct sd_softc *, struct dk_inquiry *);
106 int	sd_ioctl_cache(struct sd_softc *, long, struct dk_cache *);
107 
108 void	sd_cmd_rw6(struct scsi_xfer *, int, u_int64_t, u_int);
109 void	sd_cmd_rw10(struct scsi_xfer *, int, u_int64_t, u_int);
110 void	sd_cmd_rw12(struct scsi_xfer *, int, u_int64_t, u_int);
111 void	sd_cmd_rw16(struct scsi_xfer *, int, u_int64_t, u_int);
112 
113 void	sd_buf_done(struct scsi_xfer *);
114 
115 struct cfattach sd_ca = {
116 	sizeof(struct sd_softc), sdmatch, sdattach,
117 	sddetach, sdactivate
118 };
119 
120 struct cfdriver sd_cd = {
121 	NULL, "sd", DV_DISK
122 };
123 
124 const struct scsi_inquiry_pattern sd_patterns[] = {
125 	{T_DIRECT, T_FIXED,
126 	 "",         "",                 ""},
127 	{T_DIRECT, T_REMOV,
128 	 "",         "",                 ""},
129 	{T_RDIRECT, T_FIXED,
130 	 "",         "",                 ""},
131 	{T_RDIRECT, T_REMOV,
132 	 "",         "",                 ""},
133 	{T_OPTICAL, T_FIXED,
134 	 "",         "",                 ""},
135 	{T_OPTICAL, T_REMOV,
136 	 "",         "",                 ""},
137 };
138 
139 #define sdlookup(unit) (struct sd_softc *)disk_lookup(&sd_cd, (unit))
140 
141 int
142 sdmatch(struct device *parent, void *match, void *aux)
143 {
144 	struct scsi_attach_args *sa = aux;
145 	int priority;
146 
147 	(void)scsi_inqmatch(sa->sa_inqbuf,
148 	    sd_patterns, nitems(sd_patterns),
149 	    sizeof(sd_patterns[0]), &priority);
150 
151 	return (priority);
152 }
153 
154 /*
155  * The routine called by the low level scsi routine when it discovers
156  * a device suitable for this driver.
157  */
158 void
159 sdattach(struct device *parent, struct device *self, void *aux)
160 {
161 	struct sd_softc *sc = (struct sd_softc *)self;
162 	struct scsi_attach_args *sa = aux;
163 	struct disk_parms *dp = &sc->params;
164 	struct scsi_link *sc_link = sa->sa_sc_link;
165 	int sd_autoconf = scsi_autoconf | SCSI_SILENT |
166 	    SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_MEDIA_CHANGE;
167 	struct dk_cache dkc;
168 	int error, result, sortby = BUFQ_DEFAULT;
169 
170 	SC_DEBUG(sc_link, SDEV_DB2, ("sdattach:\n"));
171 
172 	/*
173 	 * Store information needed to contact our base driver
174 	 */
175 	sc->sc_link = sc_link;
176 	sc_link->interpret_sense = sd_interpret_sense;
177 	sc_link->device_softc = sc;
178 
179 	if ((sc_link->flags & SDEV_ATAPI) && (sc_link->flags & SDEV_REMOVABLE))
180 		sc_link->quirks |= SDEV_NOSYNCCACHE;
181 
182 	if (!(sc_link->inqdata.flags & SID_RelAdr))
183 		sc_link->quirks |= SDEV_ONLYBIG;
184 
185 	/*
186 	 * Note if this device is ancient.  This is used in sdminphys().
187 	 */
188 	if (!(sc_link->flags & SDEV_ATAPI) &&
189 	    SCSISPC(sa->sa_inqbuf->version) == 0)
190 		sc->flags |= SDF_ANCIENT;
191 
192 	/*
193 	 * Use the subdriver to request information regarding
194 	 * the drive. We cannot use interrupts yet, so the
195 	 * request must specify this.
196 	 */
197 	printf("\n");
198 
199 	scsi_xsh_set(&sc->sc_xsh, sc_link, sdstart);
200 	timeout_set(&sc->sc_timeout, (void (*)(void *))scsi_xsh_add,
201 	    &sc->sc_xsh);
202 
203 	/* Spin up non-UMASS devices ready or not. */
204 	if ((sc->sc_link->flags & SDEV_UMASS) == 0)
205 		scsi_start(sc_link, SSS_START, sd_autoconf);
206 
207 	/*
208 	 * Some devices (e.g. Blackberry Pearl) won't admit they have
209 	 * media loaded unless its been locked in.
210 	 */
211 	if ((sc_link->flags & SDEV_REMOVABLE) != 0)
212 		scsi_prevent(sc_link, PR_PREVENT, sd_autoconf);
213 
214 	/* Check that it is still responding and ok. */
215 	error = scsi_test_unit_ready(sc->sc_link, TEST_READY_RETRIES * 3,
216 	    sd_autoconf);
217 
218 	if (error)
219 		result = SDGP_RESULT_OFFLINE;
220 	else
221 		result = sd_get_parms(sc, &sc->params, sd_autoconf);
222 
223 	if ((sc_link->flags & SDEV_REMOVABLE) != 0)
224 		scsi_prevent(sc_link, PR_ALLOW, sd_autoconf);
225 
226 	switch (result) {
227 	case SDGP_RESULT_OK:
228 		printf("%s: %lluMB, %lu bytes/sector, %llu sectors",
229 		    sc->sc_dev.dv_xname,
230 		    dp->disksize / (1048576 / dp->secsize), dp->secsize,
231 		    dp->disksize);
232 		if (ISSET(sc->flags, SDF_THIN)) {
233 			sortby = BUFQ_FIFO;
234 			printf(", thin");
235 		}
236 		if (ISSET(sc_link->flags, SDEV_READONLY)) {
237 			printf(", readonly");
238 		}
239 		printf("\n");
240 		break;
241 
242 	case SDGP_RESULT_OFFLINE:
243 		break;
244 
245 #ifdef DIAGNOSTIC
246 	default:
247 		panic("sdattach: unknown result (%#x) from get_parms", result);
248 		break;
249 #endif
250 	}
251 
252 	/*
253 	 * Initialize disk structures.
254 	 */
255 	sc->sc_dk.dk_name = sc->sc_dev.dv_xname;
256 	bufq_init(&sc->sc_bufq, sortby);
257 
258 	/*
259 	 * Enable write cache by default.
260 	 */
261 	memset(&dkc, 0, sizeof(dkc));
262 	if (sd_ioctl_cache(sc, DIOCGCACHE, &dkc) == 0 && dkc.wrcache == 0) {
263 		dkc.wrcache = 1;
264 		sd_ioctl_cache(sc, DIOCSCACHE, &dkc);
265 	}
266 
267 	/*
268 	 * Establish a shutdown hook so that we can ensure that
269 	 * our data has actually made it onto the platter at
270 	 * shutdown time.  Note that this relies on the fact
271 	 * that the shutdown hook code puts us at the head of
272 	 * the list (thus guaranteeing that our hook runs before
273 	 * our ancestors').
274 	 */
275 	if ((sc->sc_sdhook =
276 	    shutdownhook_establish(sd_shutdown, sc)) == NULL)
277 		printf("%s: WARNING: unable to establish shutdown hook\n",
278 		    sc->sc_dev.dv_xname);
279 
280 	/* Attach disk. */
281 	disk_attach(&sc->sc_dev, &sc->sc_dk);
282 }
283 
284 int
285 sdactivate(struct device *self, int act)
286 {
287 	struct sd_softc *sc = (struct sd_softc *)self;
288 	int rv = 0;
289 
290 	switch (act) {
291 	case DVACT_SUSPEND:
292 		/*
293 		 * We flush the cache, since we our next step before
294 		 * DVACT_POWERDOWN might be a hibernate operation.
295 		 */
296 		if ((sc->flags & SDF_DIRTY) != 0)
297 			sd_flush(sc, SCSI_AUTOCONF);
298 		break;
299 	case DVACT_POWERDOWN:
300 		/*
301 		 * Stop the disk.  Stopping the disk should flush the
302 		 * cache, but we are paranoid so we flush the cache
303 		 * first.
304 		 */
305 		if ((sc->flags & SDF_DIRTY) != 0)
306 			sd_flush(sc, SCSI_AUTOCONF);
307 		if (boothowto & RB_POWERDOWN)
308 			scsi_start(sc->sc_link, SSS_STOP,
309 			    SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_AUTOCONF);
310 		break;
311 	case DVACT_RESUME:
312 		scsi_start(sc->sc_link, SSS_START,
313 		    SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_AUTOCONF);
314 		break;
315 	case DVACT_DEACTIVATE:
316 		sc->flags |= SDF_DYING;
317 		scsi_xsh_del(&sc->sc_xsh);
318 		break;
319 	}
320 	return (rv);
321 }
322 
323 int
324 sddetach(struct device *self, int flags)
325 {
326 	struct sd_softc *sc = (struct sd_softc *)self;
327 
328 	bufq_drain(&sc->sc_bufq);
329 
330 	disk_gone(sdopen, self->dv_unit);
331 
332 	/* Get rid of the shutdown hook. */
333 	if (sc->sc_sdhook != NULL)
334 		shutdownhook_disestablish(sc->sc_sdhook);
335 
336 	/* Detach disk. */
337 	bufq_destroy(&sc->sc_bufq);
338 	disk_detach(&sc->sc_dk);
339 
340 	return (0);
341 }
342 
343 /*
344  * Open the device. Make sure the partition info is as up-to-date as can be.
345  */
346 int
347 sdopen(dev_t dev, int flag, int fmt, struct proc *p)
348 {
349 	struct scsi_link *sc_link;
350 	struct sd_softc *sc;
351 	int error = 0, part, rawopen, unit;
352 
353 	unit = DISKUNIT(dev);
354 	part = DISKPART(dev);
355 
356 	rawopen = (part == RAW_PART) && (fmt == S_IFCHR);
357 
358 	sc = sdlookup(unit);
359 	if (sc == NULL)
360 		return (ENXIO);
361 	sc_link = sc->sc_link;
362 
363 	if (sc->flags & SDF_DYING) {
364 		device_unref(&sc->sc_dev);
365 		return (ENXIO);
366 	}
367 	if (ISSET(flag, FWRITE) && ISSET(sc_link->flags, SDEV_READONLY)) {
368 		device_unref(&sc->sc_dev);
369 		return (EACCES);
370 	}
371 
372 	SC_DEBUG(sc_link, SDEV_DB1,
373 	    ("sdopen: dev=0x%x (unit %d (of %d), partition %d)\n", dev, unit,
374 	    sd_cd.cd_ndevs, part));
375 
376 	if ((error = disk_lock(&sc->sc_dk)) != 0) {
377 		device_unref(&sc->sc_dev);
378 		return (error);
379 	}
380 
381 	if (sc->sc_dk.dk_openmask != 0) {
382 		/*
383 		 * If any partition is open, but the disk has been invalidated,
384 		 * disallow further opens of non-raw partition.
385 		 */
386 		if ((sc_link->flags & SDEV_MEDIA_LOADED) == 0) {
387 			if (rawopen)
388 				goto out;
389 			error = EIO;
390 			goto bad;
391 		}
392 	} else {
393 		/* Spin up non-UMASS devices ready or not. */
394 		if ((sc->sc_link->flags & SDEV_UMASS) == 0)
395 			scsi_start(sc_link, SSS_START, (rawopen ? SCSI_SILENT :
396 			    0) | SCSI_IGNORE_ILLEGAL_REQUEST |
397 			    SCSI_IGNORE_MEDIA_CHANGE);
398 
399 		/* Use sd_interpret_sense() for sense errors.
400 		 *
401 		 * But only after spinning the disk up! Just in case a broken
402 		 * device returns "Initialization command required." and causes
403 		 * a loop of scsi_start() calls.
404 		 */
405 		sc_link->flags |= SDEV_OPEN;
406 
407 		/*
408 		 * Try to prevent the unloading of a removable device while
409 		 * it's open. But allow the open to proceed if the device can't
410 		 * be locked in.
411 		 */
412 		if ((sc_link->flags & SDEV_REMOVABLE) != 0) {
413 			scsi_prevent(sc_link, PR_PREVENT, SCSI_SILENT |
414 			    SCSI_IGNORE_ILLEGAL_REQUEST |
415 			    SCSI_IGNORE_MEDIA_CHANGE);
416 		}
417 
418 		/* Check that it is still responding and ok. */
419 		error = scsi_test_unit_ready(sc_link,
420 		    TEST_READY_RETRIES, SCSI_SILENT |
421 		    SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_IGNORE_MEDIA_CHANGE);
422 
423 		if (error) {
424 			if (rawopen) {
425 				error = 0;
426 				goto out;
427 			} else
428 				goto bad;
429 		}
430 
431 		/* Load the physical device parameters. */
432 		sc_link->flags |= SDEV_MEDIA_LOADED;
433 		if (sd_get_parms(sc, &sc->params, (rawopen ? SCSI_SILENT : 0))
434 		    == SDGP_RESULT_OFFLINE) {
435 			sc_link->flags &= ~SDEV_MEDIA_LOADED;
436 			error = ENXIO;
437 			goto bad;
438 		}
439 		SC_DEBUG(sc_link, SDEV_DB3, ("Params loaded\n"));
440 
441 		/* Load the partition info if not already loaded. */
442 		if (sdgetdisklabel(dev, sc, sc->sc_dk.dk_label, 0) == EIO) {
443 			error = EIO;
444 			goto bad;
445 		}
446 		SC_DEBUG(sc_link, SDEV_DB3, ("Disklabel loaded\n"));
447 	}
448 
449 out:
450 	if ((error = disk_openpart(&sc->sc_dk, part, fmt, 1)) != 0)
451 		goto bad;
452 
453 	SC_DEBUG(sc_link, SDEV_DB3, ("open complete\n"));
454 
455 	/* It's OK to fall through because dk_openmask is now non-zero. */
456 bad:
457 	if (sc->sc_dk.dk_openmask == 0) {
458 		if ((sc->sc_link->flags & SDEV_REMOVABLE) != 0)
459 			scsi_prevent(sc_link, PR_ALLOW, SCSI_SILENT |
460 			    SCSI_IGNORE_ILLEGAL_REQUEST |
461 			    SCSI_IGNORE_MEDIA_CHANGE);
462 		sc_link->flags &= ~(SDEV_OPEN | SDEV_MEDIA_LOADED);
463 	}
464 
465 	disk_unlock(&sc->sc_dk);
466 	device_unref(&sc->sc_dev);
467 	return (error);
468 }
469 
470 /*
471  * Close the device. Only called if we are the last occurrence of an open
472  * device.  Convenient now but usually a pain.
473  */
474 int
475 sdclose(dev_t dev, int flag, int fmt, struct proc *p)
476 {
477 	struct sd_softc *sc;
478 	int part = DISKPART(dev);
479 
480 	sc = sdlookup(DISKUNIT(dev));
481 	if (sc == NULL)
482 		return (ENXIO);
483 	if (sc->flags & SDF_DYING) {
484 		device_unref(&sc->sc_dev);
485 		return (ENXIO);
486 	}
487 
488 	disk_lock_nointr(&sc->sc_dk);
489 
490 	disk_closepart(&sc->sc_dk, part, fmt);
491 
492 	if (sc->sc_dk.dk_openmask == 0) {
493 		if ((sc->flags & SDF_DIRTY) != 0)
494 			sd_flush(sc, 0);
495 
496 		if ((sc->sc_link->flags & SDEV_REMOVABLE) != 0)
497 			scsi_prevent(sc->sc_link, PR_ALLOW,
498 			    SCSI_IGNORE_ILLEGAL_REQUEST |
499 			    SCSI_IGNORE_NOT_READY | SCSI_SILENT);
500 		sc->sc_link->flags &= ~(SDEV_OPEN | SDEV_MEDIA_LOADED);
501 
502 		if (sc->sc_link->flags & SDEV_EJECTING) {
503 			scsi_start(sc->sc_link, SSS_STOP|SSS_LOEJ, 0);
504 			sc->sc_link->flags &= ~SDEV_EJECTING;
505 		}
506 
507 		timeout_del(&sc->sc_timeout);
508 		scsi_xsh_del(&sc->sc_xsh);
509 	}
510 
511 	disk_unlock(&sc->sc_dk);
512 	device_unref(&sc->sc_dev);
513 	return 0;
514 }
515 
516 /*
517  * Actually translate the requested transfer into one the physical driver
518  * can understand.  The transfer is described by a buf and will include
519  * only one physical transfer.
520  */
521 void
522 sdstrategy(struct buf *bp)
523 {
524 	struct sd_softc *sc;
525 	int s;
526 
527 	sc = sdlookup(DISKUNIT(bp->b_dev));
528 	if (sc == NULL) {
529 		bp->b_error = ENXIO;
530 		goto bad;
531 	}
532 	if (sc->flags & SDF_DYING) {
533 		bp->b_error = ENXIO;
534 		goto bad;
535 	}
536 
537 	SC_DEBUG(sc->sc_link, SDEV_DB2, ("sdstrategy: %ld bytes @ blk %lld\n",
538 	    bp->b_bcount, (long long)bp->b_blkno));
539 	/*
540 	 * If the device has been made invalid, error out
541 	 */
542 	if ((sc->sc_link->flags & SDEV_MEDIA_LOADED) == 0) {
543 		if (sc->sc_link->flags & SDEV_OPEN)
544 			bp->b_error = EIO;
545 		else
546 			bp->b_error = ENODEV;
547 		goto bad;
548 	}
549 
550 	/* Validate the request. */
551 	if (bounds_check_with_label(bp, sc->sc_dk.dk_label) == -1)
552 		goto done;
553 
554 	/* Place it in the queue of disk activities for this disk. */
555 	bufq_queue(&sc->sc_bufq, bp);
556 
557 	/*
558 	 * Tell the device to get going on the transfer if it's
559 	 * not doing anything, otherwise just wait for completion
560 	 */
561 	scsi_xsh_add(&sc->sc_xsh);
562 
563 	device_unref(&sc->sc_dev);
564 	return;
565 
566  bad:
567 	bp->b_flags |= B_ERROR;
568 	bp->b_resid = bp->b_bcount;
569  done:
570 	s = splbio();
571 	biodone(bp);
572 	splx(s);
573 	if (sc != NULL)
574 		device_unref(&sc->sc_dev);
575 }
576 
577 void
578 sd_cmd_rw6(struct scsi_xfer *xs, int read, u_int64_t secno, u_int nsecs)
579 {
580 	struct scsi_rw *cmd = (struct scsi_rw *)xs->cmd;
581 
582 	cmd->opcode = read ? READ_COMMAND : WRITE_COMMAND;
583 	_lto3b(secno, cmd->addr);
584 	cmd->length = nsecs;
585 
586 	xs->cmdlen = sizeof(*cmd);
587 }
588 
589 void
590 sd_cmd_rw10(struct scsi_xfer *xs, int read, u_int64_t secno, u_int nsecs)
591 {
592 	struct scsi_rw_big *cmd = (struct scsi_rw_big *)xs->cmd;
593 
594 	cmd->opcode = read ? READ_BIG : WRITE_BIG;
595 	_lto4b(secno, cmd->addr);
596 	_lto2b(nsecs, cmd->length);
597 
598 	xs->cmdlen = sizeof(*cmd);
599 }
600 
601 void
602 sd_cmd_rw12(struct scsi_xfer *xs, int read, u_int64_t secno, u_int nsecs)
603 {
604 	struct scsi_rw_12 *cmd = (struct scsi_rw_12 *)xs->cmd;
605 
606 	cmd->opcode = read ? READ_12 : WRITE_12;
607 	_lto4b(secno, cmd->addr);
608 	_lto4b(nsecs, cmd->length);
609 
610 	xs->cmdlen = sizeof(*cmd);
611 }
612 
613 void
614 sd_cmd_rw16(struct scsi_xfer *xs, int read, u_int64_t secno, u_int nsecs)
615 {
616 	struct scsi_rw_16 *cmd = (struct scsi_rw_16 *)xs->cmd;
617 
618 	cmd->opcode = read ? READ_16 : WRITE_16;
619 	_lto8b(secno, cmd->addr);
620 	_lto4b(nsecs, cmd->length);
621 
622 	xs->cmdlen = sizeof(*cmd);
623 }
624 
625 /*
626  * sdstart looks to see if there is a buf waiting for the device
627  * and that the device is not already busy. If both are true,
628  * It dequeues the buf and creates a scsi command to perform the
629  * transfer in the buf. The transfer request will call scsi_done
630  * on completion, which will in turn call this routine again
631  * so that the next queued transfer is performed.
632  * The bufs are queued by the strategy routine (sdstrategy)
633  *
634  * This routine is also called after other non-queued requests
635  * have been made of the scsi driver, to ensure that the queue
636  * continues to be drained.
637  */
638 void
639 sdstart(struct scsi_xfer *xs)
640 {
641 	struct scsi_link *link = xs->sc_link;
642 	struct sd_softc *sc = link->device_softc;
643 	struct buf *bp;
644 	u_int64_t secno;
645 	int nsecs;
646 	int read;
647 	struct partition *p;
648 
649 	if (sc->flags & SDF_DYING) {
650 		scsi_xs_put(xs);
651 		return;
652 	}
653 	if ((link->flags & SDEV_MEDIA_LOADED) == 0) {
654 		bufq_drain(&sc->sc_bufq);
655 		scsi_xs_put(xs);
656 		return;
657 	}
658 
659 	bp = bufq_dequeue(&sc->sc_bufq);
660 	if (bp == NULL) {
661 		scsi_xs_put(xs);
662 		return;
663 	}
664 
665 	secno = DL_BLKTOSEC(sc->sc_dk.dk_label, bp->b_blkno);
666 
667 	p = &sc->sc_dk.dk_label->d_partitions[DISKPART(bp->b_dev)];
668 	secno += DL_GETPOFFSET(p);
669 	nsecs = howmany(bp->b_bcount, sc->sc_dk.dk_label->d_secsize);
670 	read = bp->b_flags & B_READ;
671 
672 	/*
673 	 *  Fill out the scsi command.  If the transfer will
674 	 *  fit in a "small" cdb, use it.
675 	 */
676 	if (!(link->flags & SDEV_ATAPI) &&
677 	    !(link->quirks & SDEV_ONLYBIG) &&
678 	    ((secno & 0x1fffff) == secno) &&
679 	    ((nsecs & 0xff) == nsecs))
680 		sd_cmd_rw6(xs, read, secno, nsecs);
681 	else if (((secno & 0xffffffff) == secno) &&
682 	    ((nsecs & 0xffff) == nsecs))
683 		sd_cmd_rw10(xs, read, secno, nsecs);
684 	else if (((secno & 0xffffffff) == secno) &&
685 	    ((nsecs & 0xffffffff) == nsecs))
686 		sd_cmd_rw12(xs, read, secno, nsecs);
687 	else
688 		sd_cmd_rw16(xs, read, secno, nsecs);
689 
690 	xs->flags |= (read ? SCSI_DATA_IN : SCSI_DATA_OUT);
691 	xs->timeout = 60000;
692 	xs->data = bp->b_data;
693 	xs->datalen = bp->b_bcount;
694 
695 	xs->done = sd_buf_done;
696 	xs->cookie = bp;
697 	xs->bp = bp;
698 
699 	/* Instrumentation. */
700 	disk_busy(&sc->sc_dk);
701 
702 	/* Mark disk as dirty. */
703 	if (!read)
704 		sc->flags |= SDF_DIRTY;
705 
706 	scsi_xs_exec(xs);
707 
708 	/* move onto the next io */
709 	if (ISSET(sc->flags, SDF_WAITING))
710 		CLR(sc->flags, SDF_WAITING);
711 	else if (bufq_peek(&sc->sc_bufq))
712 		scsi_xsh_add(&sc->sc_xsh);
713 }
714 
715 void
716 sd_buf_done(struct scsi_xfer *xs)
717 {
718 	struct sd_softc *sc = xs->sc_link->device_softc;
719 	struct buf *bp = xs->cookie;
720 	int error, s;
721 
722 	switch (xs->error) {
723 	case XS_NOERROR:
724 		bp->b_error = 0;
725 		bp->b_resid = xs->resid;
726 		break;
727 
728 	case XS_NO_CCB:
729 		/* The adapter is busy, requeue the buf and try it later. */
730 		disk_unbusy(&sc->sc_dk, bp->b_bcount - xs->resid,
731 		    bp->b_flags & B_READ);
732 		bufq_requeue(&sc->sc_bufq, bp);
733 		scsi_xs_put(xs);
734 		SET(sc->flags, SDF_WAITING);
735 		timeout_add(&sc->sc_timeout, 1);
736 		return;
737 
738 	case XS_SENSE:
739 	case XS_SHORTSENSE:
740 #ifdef SCSIDEBUG
741 		scsi_sense_print_debug(xs);
742 #endif
743 		error = sd_interpret_sense(xs);
744 		if (error == 0) {
745 			bp->b_error = 0;
746 			bp->b_resid = xs->resid;
747 			break;
748 		}
749 		if (error != ERESTART) {
750 			bp->b_error = error;
751 			xs->retries = 0;
752 		}
753 		goto retry;
754 
755 	case XS_BUSY:
756 		if (xs->retries) {
757 			if (scsi_delay(xs, 1) != ERESTART)
758 				xs->retries = 0;
759 		}
760 		goto retry;
761 
762 	case XS_TIMEOUT:
763 retry:
764 		if (xs->retries--) {
765 			scsi_xs_exec(xs);
766 			return;
767 		}
768 		/* FALLTHROUGH */
769 
770 	default:
771 		if (bp->b_error == 0)
772 			bp->b_error = EIO;
773 		bp->b_flags |= B_ERROR;
774 		bp->b_resid = bp->b_bcount;
775 		break;
776 	}
777 
778 	disk_unbusy(&sc->sc_dk, bp->b_bcount - xs->resid,
779 	    bp->b_flags & B_READ);
780 
781 	s = splbio();
782 	biodone(bp);
783 	splx(s);
784 	scsi_xs_put(xs);
785 }
786 
787 void
788 sdminphys(struct buf *bp)
789 {
790 	struct sd_softc *sc;
791 	long max;
792 
793 	sc = sdlookup(DISKUNIT(bp->b_dev));
794 	if (sc == NULL)
795 		return;  /* XXX - right way to fail this? */
796 
797 	/*
798 	 * If the device is ancient, we want to make sure that
799 	 * the transfer fits into a 6-byte cdb.
800 	 *
801 	 * XXX Note that the SCSI-I spec says that 256-block transfers
802 	 * are allowed in a 6-byte read/write, and are specified
803 	 * by setting the "length" to 0.  However, we're conservative
804 	 * here, allowing only 255-block transfers in case an
805 	 * ancient device gets confused by length == 0.  A length of 0
806 	 * in a 10-byte read/write actually means 0 blocks.
807 	 */
808 	if (sc->flags & SDF_ANCIENT) {
809 		max = sc->sc_dk.dk_label->d_secsize * 0xff;
810 
811 		if (bp->b_bcount > max)
812 			bp->b_bcount = max;
813 	}
814 
815 	(*sc->sc_link->adapter->scsi_minphys)(bp, sc->sc_link);
816 
817 	device_unref(&sc->sc_dev);
818 }
819 
820 int
821 sdread(dev_t dev, struct uio *uio, int ioflag)
822 {
823 	return (physio(sdstrategy, dev, B_READ, sdminphys, uio));
824 }
825 
826 int
827 sdwrite(dev_t dev, struct uio *uio, int ioflag)
828 {
829 	return (physio(sdstrategy, dev, B_WRITE, sdminphys, uio));
830 }
831 
832 /*
833  * Perform special action on behalf of the user
834  * Knows about the internals of this device
835  */
836 int
837 sdioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p)
838 {
839 	struct sd_softc *sc;
840 	struct disklabel *lp;
841 	int error = 0;
842 	int part = DISKPART(dev);
843 
844 	sc = sdlookup(DISKUNIT(dev));
845 	if (sc == NULL)
846 		return (ENXIO);
847 	if (sc->flags & SDF_DYING) {
848 		device_unref(&sc->sc_dev);
849 		return (ENXIO);
850 	}
851 
852 	SC_DEBUG(sc->sc_link, SDEV_DB2, ("sdioctl 0x%lx\n", cmd));
853 
854 	/*
855 	 * If the device is not valid.. abandon ship
856 	 */
857 	if ((sc->sc_link->flags & SDEV_MEDIA_LOADED) == 0) {
858 		switch (cmd) {
859 		case DIOCLOCK:
860 		case DIOCEJECT:
861 		case SCIOCIDENTIFY:
862 		case SCIOCCOMMAND:
863 		case SCIOCDEBUG:
864 			if (part == RAW_PART)
865 				break;
866 		/* FALLTHROUGH */
867 		default:
868 			if ((sc->sc_link->flags & SDEV_OPEN) == 0) {
869 				error = ENODEV;
870 				goto exit;
871 			} else {
872 				error = EIO;
873 				goto exit;
874 			}
875 		}
876 	}
877 
878 	switch (cmd) {
879 	case DIOCRLDINFO:
880 		lp = malloc(sizeof(*lp), M_TEMP, M_WAITOK);
881 		sdgetdisklabel(dev, sc, lp, 0);
882 		bcopy(lp, sc->sc_dk.dk_label, sizeof(*lp));
883 		free(lp, M_TEMP);
884 		goto exit;
885 
886 	case DIOCGPDINFO:
887 		sdgetdisklabel(dev, sc, (struct disklabel *)addr, 1);
888 		goto exit;
889 
890 	case DIOCGDINFO:
891 		*(struct disklabel *)addr = *(sc->sc_dk.dk_label);
892 		goto exit;
893 
894 	case DIOCGPART:
895 		((struct partinfo *)addr)->disklab = sc->sc_dk.dk_label;
896 		((struct partinfo *)addr)->part =
897 		    &sc->sc_dk.dk_label->d_partitions[DISKPART(dev)];
898 		goto exit;
899 
900 	case DIOCWDINFO:
901 	case DIOCSDINFO:
902 		if ((flag & FWRITE) == 0) {
903 			error = EBADF;
904 			goto exit;
905 		}
906 
907 		if ((error = disk_lock(&sc->sc_dk)) != 0)
908 			goto exit;
909 
910 		error = setdisklabel(sc->sc_dk.dk_label,
911 		    (struct disklabel *)addr, sc->sc_dk.dk_openmask);
912 		if (error == 0) {
913 			if (cmd == DIOCWDINFO)
914 				error = writedisklabel(DISKLABELDEV(dev),
915 				    sdstrategy, sc->sc_dk.dk_label);
916 		}
917 
918 		disk_unlock(&sc->sc_dk);
919 		goto exit;
920 
921 	case DIOCLOCK:
922 		error = scsi_prevent(sc->sc_link,
923 		    (*(int *)addr) ? PR_PREVENT : PR_ALLOW, 0);
924 		goto exit;
925 
926 	case MTIOCTOP:
927 		if (((struct mtop *)addr)->mt_op != MTOFFL) {
928 			error = EIO;
929 			goto exit;
930 		}
931 		/* FALLTHROUGH */
932 	case DIOCEJECT:
933 		if ((sc->sc_link->flags & SDEV_REMOVABLE) == 0) {
934 			error = ENOTTY;
935 			goto exit;
936 		}
937 		sc->sc_link->flags |= SDEV_EJECTING;
938 		goto exit;
939 
940 	case DIOCINQ:
941 		error = scsi_do_ioctl(sc->sc_link, cmd, addr, flag);
942 		if (error == ENOTTY)
943 			error = sd_ioctl_inquiry(sc,
944 			    (struct dk_inquiry *)addr);
945 		goto exit;
946 
947 	case DIOCSCACHE:
948 		if (!ISSET(flag, FWRITE)) {
949 			error = EBADF;
950 			goto exit;
951 		}
952 		/* FALLTHROUGH */
953 	case DIOCGCACHE:
954 		error = sd_ioctl_cache(sc, cmd, (struct dk_cache *)addr);
955 		goto exit;
956 
957 	default:
958 		if (part != RAW_PART) {
959 			error = ENOTTY;
960 			goto exit;
961 		}
962 		error = scsi_do_ioctl(sc->sc_link, cmd, addr, flag);
963 	}
964 
965  exit:
966 	device_unref(&sc->sc_dev);
967 	return (error);
968 }
969 
970 int
971 sd_ioctl_inquiry(struct sd_softc *sc, struct dk_inquiry *di)
972 {
973 	struct scsi_vpd_serial *vpd;
974 
975 	vpd = dma_alloc(sizeof(*vpd), PR_WAITOK | PR_ZERO);
976 
977 	bzero(di, sizeof(struct dk_inquiry));
978 	scsi_strvis(di->vendor, sc->sc_link->inqdata.vendor,
979 	    sizeof(sc->sc_link->inqdata.vendor));
980 	scsi_strvis(di->product, sc->sc_link->inqdata.product,
981 	    sizeof(sc->sc_link->inqdata.product));
982 	scsi_strvis(di->revision, sc->sc_link->inqdata.revision,
983 	    sizeof(sc->sc_link->inqdata.revision));
984 
985 	/* the serial vpd page is optional */
986 	if (scsi_inquire_vpd(sc->sc_link, vpd, sizeof(*vpd),
987 	    SI_PG_SERIAL, 0) == 0)
988 		scsi_strvis(di->serial, vpd->serial, sizeof(vpd->serial));
989 	else
990 		strlcpy(di->serial, "(unknown)", sizeof(vpd->serial));
991 
992 	dma_free(vpd, sizeof(*vpd));
993 	return (0);
994 }
995 
996 int
997 sd_ioctl_cache(struct sd_softc *sc, long cmd, struct dk_cache *dkc)
998 {
999 	union scsi_mode_sense_buf *buf;
1000 	struct page_caching_mode *mode = NULL;
1001 	u_int wrcache, rdcache;
1002 	int big;
1003 	int rv;
1004 
1005 	if (ISSET(sc->sc_link->flags, SDEV_UMASS))
1006 		return (EOPNOTSUPP);
1007 
1008 	/* see if the adapter has special handling */
1009 	rv = scsi_do_ioctl(sc->sc_link, cmd, (caddr_t)dkc, 0);
1010 	if (rv != ENOTTY)
1011 		return (rv);
1012 
1013 	buf = dma_alloc(sizeof(*buf), PR_WAITOK);
1014 	if (buf == NULL)
1015 		return (ENOMEM);
1016 
1017 	rv = scsi_do_mode_sense(sc->sc_link, PAGE_CACHING_MODE,
1018 	    buf, (void **)&mode, NULL, NULL, NULL,
1019 	    sizeof(*mode) - 4, scsi_autoconf | SCSI_SILENT, &big);
1020 	if (rv != 0)
1021 		goto done;
1022 
1023 	if ((mode == NULL) || (!DISK_PGCODE(mode, PAGE_CACHING_MODE))) {
1024 		rv = EIO;
1025 		goto done;
1026 	}
1027 
1028 	wrcache = (ISSET(mode->flags, PG_CACHE_FL_WCE) ? 1 : 0);
1029 	rdcache = (ISSET(mode->flags, PG_CACHE_FL_RCD) ? 0 : 1);
1030 
1031 	switch (cmd) {
1032 	case DIOCGCACHE:
1033 		dkc->wrcache = wrcache;
1034 		dkc->rdcache = rdcache;
1035 		break;
1036 
1037 	case DIOCSCACHE:
1038 		if (dkc->wrcache == wrcache && dkc->rdcache == rdcache)
1039 			break;
1040 
1041 		if (dkc->wrcache)
1042 			SET(mode->flags, PG_CACHE_FL_WCE);
1043 		else
1044 			CLR(mode->flags, PG_CACHE_FL_WCE);
1045 
1046 		if (dkc->rdcache)
1047 			CLR(mode->flags, PG_CACHE_FL_RCD);
1048 		else
1049 			SET(mode->flags, PG_CACHE_FL_RCD);
1050 
1051 		if (big) {
1052 			rv = scsi_mode_select_big(sc->sc_link, SMS_PF,
1053 			    &buf->hdr_big, scsi_autoconf | SCSI_SILENT, 20000);
1054 		} else {
1055 			rv = scsi_mode_select(sc->sc_link, SMS_PF,
1056 			    &buf->hdr, scsi_autoconf | SCSI_SILENT, 20000);
1057 		}
1058 		break;
1059 	}
1060 
1061 done:
1062 	dma_free(buf, sizeof(*buf));
1063 	return (rv);
1064 }
1065 
1066 /*
1067  * Load the label information on the named device
1068  */
1069 int
1070 sdgetdisklabel(dev_t dev, struct sd_softc *sc, struct disklabel *lp,
1071     int spoofonly)
1072 {
1073 	size_t len;
1074 	char packname[sizeof(lp->d_packname) + 1];
1075 	char product[17], vendor[9];
1076 
1077 	bzero(lp, sizeof(struct disklabel));
1078 
1079 	lp->d_secsize = sc->params.secsize;
1080 	lp->d_ntracks = sc->params.heads;
1081 	lp->d_nsectors = sc->params.sectors;
1082 	lp->d_ncylinders = sc->params.cyls;
1083 	lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors;
1084 	if (lp->d_secpercyl == 0) {
1085 		lp->d_secpercyl = 100;
1086 		/* as long as it's not 0 - readdisklabel divides by it */
1087 	}
1088 
1089 	lp->d_type = DTYPE_SCSI;
1090 	if ((sc->sc_link->inqdata.device & SID_TYPE) == T_OPTICAL)
1091 		strncpy(lp->d_typename, "SCSI optical",
1092 		    sizeof(lp->d_typename));
1093 	else
1094 		strncpy(lp->d_typename, "SCSI disk",
1095 		    sizeof(lp->d_typename));
1096 
1097 	/*
1098 	 * Try to fit '<vendor> <product>' into d_packname. If that doesn't fit
1099 	 * then leave out '<vendor> ' and use only as much of '<product>' as
1100 	 * does fit.
1101 	 */
1102 	viscpy(vendor, sc->sc_link->inqdata.vendor, 8);
1103 	viscpy(product, sc->sc_link->inqdata.product, 16);
1104 	len = snprintf(packname, sizeof(packname), "%s %s", vendor, product);
1105 	if (len > sizeof(lp->d_packname)) {
1106 		strlcpy(packname, product, sizeof(packname));
1107 		len = strlen(packname);
1108 	}
1109 	/*
1110 	 * It is safe to use len as the count of characters to copy because
1111 	 * packname is sizeof(lp->d_packname)+1, the string in packname is
1112 	 * always null terminated and len does not count the terminating null.
1113 	 * d_packname is not a null terminated string.
1114 	 */
1115 	bcopy(packname, lp->d_packname, len);
1116 
1117 	DL_SETDSIZE(lp, sc->params.disksize);
1118 	lp->d_version = 1;
1119 	lp->d_flags = 0;
1120 
1121 	/* XXX - these values for BBSIZE and SBSIZE assume ffs */
1122 	lp->d_bbsize = BBSIZE;
1123 	lp->d_sbsize = SBSIZE;
1124 
1125 	lp->d_magic = DISKMAGIC;
1126 	lp->d_magic2 = DISKMAGIC;
1127 	lp->d_checksum = dkcksum(lp);
1128 
1129 	/*
1130 	 * Call the generic disklabel extraction routine
1131 	 */
1132 	return readdisklabel(DISKLABELDEV(dev), sdstrategy, lp, spoofonly);
1133 }
1134 
1135 
1136 void
1137 sd_shutdown(void *arg)
1138 {
1139 	struct sd_softc *sc = (struct sd_softc *)arg;
1140 
1141 	/*
1142 	 * If the disk cache needs to be flushed, and the disk supports
1143 	 * it, flush it.  We're cold at this point, so we poll for
1144 	 * completion.
1145 	 */
1146 	if ((sc->flags & SDF_DIRTY) != 0)
1147 		sd_flush(sc, SCSI_AUTOCONF);
1148 }
1149 
1150 /*
1151  * Check Errors
1152  */
1153 int
1154 sd_interpret_sense(struct scsi_xfer *xs)
1155 {
1156 	struct scsi_sense_data *sense = &xs->sense;
1157 	struct scsi_link *sc_link = xs->sc_link;
1158 	struct sd_softc *sc = sc_link->device_softc;
1159 	u_int8_t serr = sense->error_code & SSD_ERRCODE;
1160 	int retval;
1161 
1162 	/*
1163 	 * Let the generic code handle everything except a few categories of
1164 	 * LUN not ready errors on open devices.
1165 	 */
1166 	if (((sc_link->flags & SDEV_OPEN) == 0) ||
1167 	    (serr != SSD_ERRCODE_CURRENT && serr != SSD_ERRCODE_DEFERRED) ||
1168 	    ((sense->flags & SSD_KEY) != SKEY_NOT_READY) ||
1169 	    (sense->extra_len < 6))
1170 		return (scsi_interpret_sense(xs));
1171 
1172 	switch (ASC_ASCQ(sense)) {
1173 	case SENSE_NOT_READY_BECOMING_READY:
1174 		SC_DEBUG(sc_link, SDEV_DB1, ("becoming ready.\n"));
1175 		retval = scsi_delay(xs, 5);
1176 		break;
1177 
1178 	case SENSE_NOT_READY_INIT_REQUIRED:
1179 		SC_DEBUG(sc_link, SDEV_DB1, ("spinning up\n"));
1180 		retval = scsi_start(sc->sc_link, SSS_START,
1181 		    SCSI_IGNORE_ILLEGAL_REQUEST | SCSI_NOSLEEP);
1182 		if (retval == 0)
1183 			retval = ERESTART;
1184 		else if (retval == ENOMEM)
1185 			/* Can't issue the command. Fall back on a delay. */
1186 			retval = scsi_delay(xs, 5);
1187 		else
1188 			SC_DEBUG(sc_link, SDEV_DB1, ("spin up failed (%#x)\n",
1189 			    retval));
1190 		break;
1191 
1192 	default:
1193 		retval = scsi_interpret_sense(xs);
1194 		break;
1195 	}
1196 
1197 	return (retval);
1198 }
1199 
1200 daddr_t
1201 sdsize(dev_t dev)
1202 {
1203 	struct disklabel *lp;
1204 	struct sd_softc *sc;
1205 	int part, omask;
1206 	daddr_t size;
1207 
1208 	sc = sdlookup(DISKUNIT(dev));
1209 	if (sc == NULL)
1210 		return -1;
1211 	if (sc->flags & SDF_DYING) {
1212 		size = -1;
1213 		goto exit;
1214 	}
1215 
1216 	part = DISKPART(dev);
1217 	omask = sc->sc_dk.dk_openmask & (1 << part);
1218 
1219 	if (omask == 0 && sdopen(dev, 0, S_IFBLK, NULL) != 0) {
1220 		size = -1;
1221 		goto exit;
1222 	}
1223 
1224 	lp = sc->sc_dk.dk_label;
1225 	if ((sc->sc_link->flags & SDEV_MEDIA_LOADED) == 0)
1226 		size = -1;
1227 	else if (lp->d_partitions[part].p_fstype != FS_SWAP)
1228 		size = -1;
1229 	else
1230 		size = DL_SECTOBLK(lp, DL_GETPSIZE(&lp->d_partitions[part]));
1231 	if (omask == 0 && sdclose(dev, 0, S_IFBLK, NULL) != 0)
1232 		size = -1;
1233 
1234  exit:
1235 	device_unref(&sc->sc_dev);
1236 	return size;
1237 }
1238 
1239 /* #define SD_DUMP_NOT_TRUSTED if you just want to watch */
1240 static int sddoingadump;
1241 
1242 /*
1243  * dump all of physical memory into the partition specified, starting
1244  * at offset 'dumplo' into the partition.
1245  */
1246 int
1247 sddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size)
1248 {
1249 	struct sd_softc *sc;	/* disk unit to do the I/O */
1250 	struct disklabel *lp;	/* disk's disklabel */
1251 	int	unit, part;
1252 	u_int32_t sectorsize;	/* size of a disk sector */
1253 	u_int64_t nsects;	/* number of sectors in partition */
1254 	u_int64_t sectoff;	/* sector offset of partition */
1255 	u_int64_t totwrt;	/* total number of sectors left to write */
1256 	u_int32_t nwrt;		/* current number of sectors to write */
1257 	struct scsi_xfer *xs;	/* ... convenience */
1258 	int rv;
1259 
1260 	/* Check if recursive dump; if so, punt. */
1261 	if (sddoingadump)
1262 		return EFAULT;
1263 	if (blkno < 0)
1264 		return EINVAL;
1265 
1266 	/* Mark as active early. */
1267 	sddoingadump = 1;
1268 
1269 	unit = DISKUNIT(dev);	/* Decompose unit & partition. */
1270 	part = DISKPART(dev);
1271 
1272 	/* Check for acceptable drive number. */
1273 	if (unit >= sd_cd.cd_ndevs || (sc = sd_cd.cd_devs[unit]) == NULL)
1274 		return ENXIO;
1275 
1276 	/*
1277 	 * XXX Can't do this check, since the media might have been
1278 	 * XXX marked `invalid' by successful unmounting of all
1279 	 * XXX filesystems.
1280 	 */
1281 #if 0
1282 	/* Make sure it was initialized. */
1283 	if ((sc->sc_link->flags & SDEV_MEDIA_LOADED) != SDEV_MEDIA_LOADED)
1284 		return ENXIO;
1285 #endif
1286 
1287 	/* Convert to disk sectors.  Request must be a multiple of size. */
1288 	lp = sc->sc_dk.dk_label;
1289 	sectorsize = lp->d_secsize;
1290 	if ((size % sectorsize) != 0)
1291 		return EFAULT;
1292 	if ((blkno % DL_BLKSPERSEC(lp)) != 0)
1293 		return EFAULT;
1294 	totwrt = size / sectorsize;
1295 	blkno = DL_BLKTOSEC(lp, blkno);
1296 
1297 	nsects = DL_GETPSIZE(&lp->d_partitions[part]);
1298 	sectoff = DL_GETPOFFSET(&lp->d_partitions[part]);
1299 
1300 	/* Check transfer bounds against partition size. */
1301 	if ((blkno + totwrt) > nsects)
1302 		return EINVAL;
1303 
1304 	/* Offset block number to start of partition. */
1305 	blkno += sectoff;
1306 
1307 	while (totwrt > 0) {
1308 		if (totwrt > UINT32_MAX)
1309 			nwrt = UINT32_MAX;
1310 		else
1311 			nwrt = totwrt;
1312 
1313 #ifndef	SD_DUMP_NOT_TRUSTED
1314 		xs = scsi_xs_get(sc->sc_link, SCSI_NOSLEEP);
1315 		if (xs == NULL)
1316 			return (ENOMEM);
1317 
1318 		xs->timeout = 10000;
1319 		xs->flags |= SCSI_DATA_OUT;
1320 		xs->data = va;
1321 		xs->datalen = nwrt * sectorsize;
1322 
1323 		sd_cmd_rw10(xs, 0, blkno, nwrt); /* XXX */
1324 
1325 		rv = scsi_xs_sync(xs);
1326 		scsi_xs_put(xs);
1327 		if (rv != 0)
1328 			return (ENXIO);
1329 #else	/* SD_DUMP_NOT_TRUSTED */
1330 		/* Let's just talk about this first... */
1331 		printf("sd%d: dump addr 0x%x, blk %lld\n", unit, va,
1332 		    (long long)blkno);
1333 		delay(500 * 1000);	/* half a second */
1334 #endif	/* SD_DUMP_NOT_TRUSTED */
1335 
1336 		/* update block count */
1337 		totwrt -= nwrt;
1338 		blkno += nwrt;
1339 		va += sectorsize * nwrt;
1340 	}
1341 
1342 	sddoingadump = 0;
1343 
1344 	return (0);
1345 }
1346 
1347 /*
1348  * Copy up to len chars from src to dst, ignoring non-printables.
1349  * Must be room for len+1 chars in dst so we can write the NUL.
1350  * Does not assume src is NUL-terminated.
1351  */
1352 void
1353 viscpy(u_char *dst, u_char *src, int len)
1354 {
1355 	while (len > 0 && *src != '\0') {
1356 		if (*src < 0x20 || *src >= 0x80) {
1357 			src++;
1358 			continue;
1359 		}
1360 		*dst++ = *src++;
1361 		len--;
1362 	}
1363 	*dst = '\0';
1364 }
1365 
1366 int
1367 sd_read_cap_10(struct sd_softc *sc, int flags)
1368 {
1369 	struct scsi_read_capacity cdb;
1370 	struct scsi_read_cap_data *rdcap;
1371 	struct scsi_xfer *xs;
1372 	int rv = ENOMEM;
1373 
1374 	CLR(flags, SCSI_IGNORE_ILLEGAL_REQUEST);
1375 
1376 	rdcap = dma_alloc(sizeof(*rdcap), (ISSET(flags, SCSI_NOSLEEP) ?
1377 	    PR_NOWAIT : PR_WAITOK) | PR_ZERO);
1378 	if (rdcap == NULL)
1379 		return (ENOMEM);
1380 
1381 	xs = scsi_xs_get(sc->sc_link, flags | SCSI_DATA_IN | SCSI_SILENT);
1382 	if (xs == NULL)
1383 		goto done;
1384 
1385 	bzero(&cdb, sizeof(cdb));
1386 	cdb.opcode = READ_CAPACITY;
1387 
1388 	memcpy(xs->cmd, &cdb, sizeof(cdb));
1389 	xs->cmdlen = sizeof(cdb);
1390 	xs->data = (void *)rdcap;
1391 	xs->datalen = sizeof(*rdcap);
1392 	xs->timeout = 20000;
1393 
1394 	rv = scsi_xs_sync(xs);
1395 	scsi_xs_put(xs);
1396 
1397 	if (rv == 0) {
1398 		sc->params.disksize = _4btol(rdcap->addr) + 1ll;
1399 		sc->params.secsize = _4btol(rdcap->length);
1400 		CLR(sc->flags, SDF_THIN);
1401 	}
1402 
1403  done:
1404 	dma_free(rdcap, sizeof(*rdcap));
1405 	return (rv);
1406 }
1407 
1408 int
1409 sd_read_cap_16(struct sd_softc *sc, int flags)
1410 {
1411 	struct scsi_read_capacity_16 cdb;
1412 	struct scsi_read_cap_data_16 *rdcap;
1413 	struct scsi_xfer *xs;
1414 	int rv = ENOMEM;
1415 
1416 	CLR(flags, SCSI_IGNORE_ILLEGAL_REQUEST);
1417 
1418 	rdcap = dma_alloc(sizeof(*rdcap), (ISSET(flags, SCSI_NOSLEEP) ?
1419 	    PR_NOWAIT : PR_WAITOK) | PR_ZERO);
1420 	if (rdcap == NULL)
1421 		return (ENOMEM);
1422 
1423 	xs = scsi_xs_get(sc->sc_link, flags | SCSI_DATA_IN | SCSI_SILENT);
1424 	if (xs == NULL)
1425 		goto done;
1426 
1427 	bzero(&cdb, sizeof(cdb));
1428 	cdb.opcode = READ_CAPACITY_16;
1429 	cdb.byte2 = SRC16_SERVICE_ACTION;
1430 	_lto4b(sizeof(*rdcap), cdb.length);
1431 
1432 	memcpy(xs->cmd, &cdb, sizeof(cdb));
1433 	xs->cmdlen = sizeof(cdb);
1434 	xs->data = (void *)rdcap;
1435 	xs->datalen = sizeof(*rdcap);
1436 	xs->timeout = 20000;
1437 
1438 	rv = scsi_xs_sync(xs);
1439 	scsi_xs_put(xs);
1440 
1441 	if (rv == 0) {
1442 		if (_8btol(rdcap->addr) == 0) {
1443 			rv = EIO;
1444 			goto done;
1445 		}
1446 
1447 		sc->params.disksize = _8btol(rdcap->addr) + 1;
1448 		sc->params.secsize = _4btol(rdcap->length);
1449 		if (ISSET(_2btol(rdcap->lowest_aligned), READ_CAP_16_TPE))
1450 			SET(sc->flags, SDF_THIN);
1451 		else
1452 			CLR(sc->flags, SDF_THIN);
1453 	}
1454 
1455  done:
1456 	dma_free(rdcap, sizeof(*rdcap));
1457 	return (rv);
1458 }
1459 
1460 int
1461 sd_size(struct sd_softc *sc, int flags)
1462 {
1463 	int rv;
1464 
1465 	if (SCSISPC(sc->sc_link->inqdata.version) >= 3) {
1466 		rv = sd_read_cap_16(sc, flags);
1467 		if (rv != 0)
1468 			rv = sd_read_cap_10(sc, flags);
1469 	} else {
1470 		rv = sd_read_cap_10(sc, flags);
1471 		if (rv == 0 && sc->params.disksize == 0x100000000ll)
1472 			rv = sd_read_cap_16(sc, flags);
1473 	}
1474 
1475 	return (rv);
1476 }
1477 
1478 int
1479 sd_thin_pages(struct sd_softc *sc, int flags)
1480 {
1481 	struct scsi_vpd_hdr *pg;
1482 	size_t len = 0;
1483 	u_int8_t *pages;
1484 	int i, score = 0;
1485 	int rv;
1486 
1487 	pg = dma_alloc(sizeof(*pg), (ISSET(flags, SCSI_NOSLEEP) ?
1488 	    PR_NOWAIT : PR_WAITOK) | PR_ZERO);
1489 	if (pg == NULL)
1490 		return (ENOMEM);
1491 
1492 	rv = scsi_inquire_vpd(sc->sc_link, pg, sizeof(*pg),
1493 	    SI_PG_SUPPORTED, flags);
1494 	if (rv != 0)
1495 		goto done;
1496 
1497 	len = _2btol(pg->page_length);
1498 
1499 	dma_free(pg, sizeof(*pg));
1500 	pg = dma_alloc(sizeof(*pg) + len, (ISSET(flags, SCSI_NOSLEEP) ?
1501 	    PR_NOWAIT : PR_WAITOK) | PR_ZERO);
1502 	if (pg == NULL)
1503 		return (ENOMEM);
1504 
1505 	rv = scsi_inquire_vpd(sc->sc_link, pg, sizeof(*pg) + len,
1506 	    SI_PG_SUPPORTED, flags);
1507 	if (rv != 0)
1508 		goto done;
1509 
1510 	pages = (u_int8_t *)(pg + 1);
1511 	if (pages[0] != SI_PG_SUPPORTED) {
1512 		rv = EIO;
1513 		goto done;
1514 	}
1515 
1516 	for (i = 1; i < len; i++) {
1517 		switch (pages[i]) {
1518 		case SI_PG_DISK_LIMITS:
1519 		case SI_PG_DISK_THIN:
1520 			score++;
1521 			break;
1522 		}
1523 	}
1524 
1525 	if (score < 2)
1526 		rv = EOPNOTSUPP;
1527 
1528  done:
1529 	dma_free(pg, sizeof(*pg) + len);
1530 	return (rv);
1531 }
1532 
1533 int
1534 sd_vpd_block_limits(struct sd_softc *sc, int flags)
1535 {
1536 	struct scsi_vpd_disk_limits *pg;
1537 	int rv;
1538 
1539 	pg = dma_alloc(sizeof(*pg), (ISSET(flags, SCSI_NOSLEEP) ?
1540 	    PR_NOWAIT : PR_WAITOK) | PR_ZERO);
1541 	if (pg == NULL)
1542 		return (ENOMEM);
1543 
1544 	rv = scsi_inquire_vpd(sc->sc_link, pg, sizeof(*pg),
1545 	    SI_PG_DISK_LIMITS, flags);
1546 	if (rv != 0)
1547 		goto done;
1548 
1549 	if (_2btol(pg->hdr.page_length) == SI_PG_DISK_LIMITS_LEN_THIN) {
1550 		sc->params.unmap_sectors = _4btol(pg->max_unmap_lba_count);
1551 		sc->params.unmap_descs = _4btol(pg->max_unmap_desc_count);
1552 	} else
1553 		rv = EOPNOTSUPP;
1554 
1555  done:
1556 	dma_free(pg, sizeof(*pg));
1557 	return (rv);
1558 }
1559 
1560 int
1561 sd_vpd_thin(struct sd_softc *sc, int flags)
1562 {
1563 	struct scsi_vpd_disk_thin *pg;
1564 	int rv;
1565 
1566 	pg = dma_alloc(sizeof(*pg), (ISSET(flags, SCSI_NOSLEEP) ?
1567 	    PR_NOWAIT : PR_WAITOK) | PR_ZERO);
1568 	if (pg == NULL)
1569 		return (ENOMEM);
1570 
1571 	rv = scsi_inquire_vpd(sc->sc_link, pg, sizeof(*pg),
1572 	    SI_PG_DISK_THIN, flags);
1573 	if (rv != 0)
1574 		goto done;
1575 
1576 #ifdef notyet
1577 	if (ISSET(pg->flags, VPD_DISK_THIN_TPU))
1578 		sc->sc_delete = sd_unmap;
1579 	else if (ISSET(pg->flags, VPD_DISK_THIN_TPWS)) {
1580 		sc->sc_delete = sd_write_same_16;
1581 		sc->params.unmap_descs = 1; /* WRITE SAME 16 only does one */
1582 	} else
1583 		rv = EOPNOTSUPP;
1584 #endif
1585 
1586  done:
1587 	dma_free(pg, sizeof(*pg));
1588 	return (rv);
1589 }
1590 
1591 int
1592 sd_thin_params(struct sd_softc *sc, int flags)
1593 {
1594 	int rv;
1595 
1596 	rv = sd_thin_pages(sc, flags);
1597 	if (rv != 0)
1598 		return (rv);
1599 
1600 	rv = sd_vpd_block_limits(sc, flags);
1601 	if (rv != 0)
1602 		return (rv);
1603 
1604 	rv = sd_vpd_thin(sc, flags);
1605 	if (rv != 0)
1606 		return (rv);
1607 
1608 	return (0);
1609 }
1610 
1611 /*
1612  * Fill out the disk parameter structure. Return SDGP_RESULT_OK if the
1613  * structure is correctly filled in, SDGP_RESULT_OFFLINE otherwise. The caller
1614  * is responsible for clearing the SDEV_MEDIA_LOADED flag if the structure
1615  * cannot be completed.
1616  */
1617 int
1618 sd_get_parms(struct sd_softc *sc, struct disk_parms *dp, int flags)
1619 {
1620 	union scsi_mode_sense_buf *buf = NULL;
1621 	struct page_rigid_geometry *rigid = NULL;
1622 	struct page_flex_geometry *flex = NULL;
1623 	struct page_reduced_geometry *reduced = NULL;
1624 	u_char *page0 = NULL;
1625 	u_int32_t heads = 0, sectors = 0, cyls = 0, secsize = 0;
1626 	int err = 0, big;
1627 
1628 	if (sd_size(sc, flags) != 0)
1629 		return (SDGP_RESULT_OFFLINE);
1630 
1631 	if (ISSET(sc->flags, SDF_THIN) && sd_thin_params(sc, flags) != 0) {
1632 		/* we dont know the unmap limits, so we cant use thin shizz */
1633 		CLR(sc->flags, SDF_THIN);
1634 	}
1635 
1636 	buf = dma_alloc(sizeof(*buf), PR_NOWAIT);
1637 	if (buf == NULL)
1638 		goto validate;
1639 
1640 	/*
1641 	 * Ask for page 0 (vendor specific) mode sense data to find
1642 	 * READONLY info. The only thing USB devices will ask for.
1643 	 */
1644 	err = scsi_do_mode_sense(sc->sc_link, 0, buf, (void **)&page0,
1645 	    NULL, NULL, NULL, 1, flags | SCSI_SILENT, &big);
1646 	if (err == 0) {
1647 		if (big && buf->hdr_big.dev_spec & SMH_DSP_WRITE_PROT)
1648 			SET(sc->sc_link->flags, SDEV_READONLY);
1649 		else if (!big && buf->hdr.dev_spec & SMH_DSP_WRITE_PROT)
1650 			SET(sc->sc_link->flags, SDEV_READONLY);
1651 		else
1652 			CLR(sc->sc_link->flags, SDEV_READONLY);
1653 	}
1654 
1655 	/*
1656 	 * Many UMASS devices choke when asked about their geometry. Most
1657 	 * don't have a meaningful geometry anyway, so just fake it if
1658 	 * scsi_size() worked.
1659 	 */
1660 	if ((sc->sc_link->flags & SDEV_UMASS) && (dp->disksize > 0))
1661 		goto validate;
1662 
1663 	switch (sc->sc_link->inqdata.device & SID_TYPE) {
1664 	case T_OPTICAL:
1665 		/* No more information needed or available. */
1666 		break;
1667 
1668 	case T_RDIRECT:
1669 		/* T_RDIRECT supports only PAGE_REDUCED_GEOMETRY (6). */
1670 		err = scsi_do_mode_sense(sc->sc_link, PAGE_REDUCED_GEOMETRY,
1671 		    buf, (void **)&reduced, NULL, NULL, &secsize,
1672 		    sizeof(*reduced), flags | SCSI_SILENT, NULL);
1673 		if (!err && reduced &&
1674 		    DISK_PGCODE(reduced, PAGE_REDUCED_GEOMETRY)) {
1675 			if (dp->disksize == 0)
1676 				dp->disksize = _5btol(reduced->sectors);
1677 			if (secsize == 0)
1678 				secsize = _2btol(reduced->bytes_s);
1679 		}
1680 		break;
1681 
1682 	default:
1683 		/*
1684 		 * NOTE: Some devices leave off the last four bytes of
1685 		 * PAGE_RIGID_GEOMETRY and PAGE_FLEX_GEOMETRY mode sense pages.
1686 		 * The only information in those four bytes is RPM information
1687 		 * so accept the page. The extra bytes will be zero and RPM will
1688 		 * end up with the default value of 3600.
1689 		 */
1690 		if (((sc->sc_link->flags & SDEV_ATAPI) == 0) ||
1691 		    ((sc->sc_link->flags & SDEV_REMOVABLE) == 0))
1692 			err = scsi_do_mode_sense(sc->sc_link,
1693 			    PAGE_RIGID_GEOMETRY, buf, (void **)&rigid, NULL,
1694 			    NULL, &secsize, sizeof(*rigid) - 4,
1695 			    flags | SCSI_SILENT, NULL);
1696 		if (!err && rigid && DISK_PGCODE(rigid, PAGE_RIGID_GEOMETRY)) {
1697 			heads = rigid->nheads;
1698 			cyls = _3btol(rigid->ncyl);
1699 			if (heads * cyls > 0)
1700 				sectors = dp->disksize / (heads * cyls);
1701 		} else {
1702 			err = scsi_do_mode_sense(sc->sc_link,
1703 			    PAGE_FLEX_GEOMETRY, buf, (void **)&flex, NULL, NULL,
1704 			    &secsize, sizeof(*flex) - 4,
1705 			    flags | SCSI_SILENT, NULL);
1706 			if (!err && flex &&
1707 			    DISK_PGCODE(flex, PAGE_FLEX_GEOMETRY)) {
1708 				sectors = flex->ph_sec_tr;
1709 				heads = flex->nheads;
1710 				cyls = _2btol(flex->ncyl);
1711 				if (secsize == 0)
1712 					secsize = _2btol(flex->bytes_s);
1713 				if (dp->disksize == 0)
1714 					dp->disksize = heads * cyls * sectors;
1715 			}
1716 		}
1717 		break;
1718 	}
1719 
1720 validate:
1721 	if (buf)
1722 		dma_free(buf, sizeof(*buf));
1723 
1724 	if (dp->disksize == 0)
1725 		return (SDGP_RESULT_OFFLINE);
1726 
1727 	if (dp->secsize == 0)
1728 		dp->secsize = (secsize == 0) ? 512 : secsize;
1729 
1730 	/*
1731 	 * Restrict secsize values to powers of two between 512 and 64k.
1732 	 */
1733 	switch (dp->secsize) {
1734 	case 0x200:	/* == 512, == DEV_BSIZE on all architectures. */
1735 	case 0x400:
1736 	case 0x800:
1737 	case 0x1000:
1738 	case 0x2000:
1739 	case 0x4000:
1740 	case 0x8000:
1741 	case 0x10000:
1742 		break;
1743 	default:
1744 		SC_DEBUG(sc->sc_link, SDEV_DB1,
1745 		    ("sd_get_parms: bad secsize: %#x\n", dp->secsize));
1746 		return (SDGP_RESULT_OFFLINE);
1747 	}
1748 
1749 	/*
1750 	 * XXX THINK ABOUT THIS!!  Using values such that sectors * heads *
1751 	 * cyls is <= disk_size can lead to wasted space. We need a more
1752 	 * careful calculation/validation to make everything work out
1753 	 * optimally.
1754 	 */
1755 	if (dp->disksize > 0xffffffff && (dp->heads * dp->sectors) < 0xffff) {
1756 		dp->heads = 511;
1757 		dp->sectors = 255;
1758 		cyls = 0;
1759 	} else {
1760 		/*
1761 		 * Use standard geometry values for anything we still don't
1762 		 * know.
1763 		 */
1764 		dp->heads = (heads == 0) ? 255 : heads;
1765 		dp->sectors = (sectors == 0) ? 63 : sectors;
1766 	}
1767 
1768 	dp->cyls = (cyls == 0) ? dp->disksize / (dp->heads * dp->sectors) :
1769 	    cyls;
1770 
1771 	if (dp->cyls == 0) {
1772 		dp->heads = dp->cyls = 1;
1773 		dp->sectors = dp->disksize;
1774 	}
1775 
1776 	return (SDGP_RESULT_OK);
1777 }
1778 
1779 void
1780 sd_flush(struct sd_softc *sc, int flags)
1781 {
1782 	struct scsi_link *link = sc->sc_link;
1783 	struct scsi_xfer *xs;
1784 	struct scsi_synchronize_cache *cmd;
1785 
1786 	if (link->quirks & SDEV_NOSYNCCACHE)
1787 		return;
1788 
1789 	/*
1790 	 * Issue a SYNCHRONIZE CACHE. Address 0, length 0 means "all remaining
1791 	 * blocks starting at address 0". Ignore ILLEGAL REQUEST in the event
1792 	 * that the command is not supported by the device.
1793 	 */
1794 
1795 	xs = scsi_xs_get(link, flags);
1796 	if (xs == NULL) {
1797 		SC_DEBUG(link, SDEV_DB1, ("cache sync failed to get xs\n"));
1798 		return;
1799 	}
1800 
1801 	cmd = (struct scsi_synchronize_cache *)xs->cmd;
1802 	cmd->opcode = SYNCHRONIZE_CACHE;
1803 
1804 	xs->cmdlen = sizeof(*cmd);
1805 	xs->timeout = 100000;
1806 	xs->flags |= SCSI_IGNORE_ILLEGAL_REQUEST;
1807 
1808 	if (scsi_xs_sync(xs) == 0)
1809 		sc->flags &= ~SDF_DIRTY;
1810 	else
1811 		SC_DEBUG(link, SDEV_DB1, ("cache sync failed\n"));
1812 
1813 	scsi_xs_put(xs);
1814 }
1815