xref: /illumos-gate/usr/src/uts/common/io/scsi/targets/sd.c (revision 148fd93e)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1990, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2011 Bayard G. Bell.  All rights reserved.
27  * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
28  * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
29  * Copyright 2019 Joyent, Inc.
30  * Copyright 2017 Nexenta Systems, Inc.
31  * Copyright 2019 Racktop Systems
32  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
33  */
34 /*
35  * Copyright 2011 cyril.galibern@opensvc.com
36  */
37 
38 /*
39  * SCSI disk target driver.
40  */
41 #include <sys/scsi/scsi.h>
42 #include <sys/dkbad.h>
43 #include <sys/dklabel.h>
44 #include <sys/dkio.h>
45 #include <sys/fdio.h>
46 #include <sys/cdio.h>
47 #include <sys/mhd.h>
48 #include <sys/vtoc.h>
49 #include <sys/dktp/fdisk.h>
50 #include <sys/kstat.h>
51 #include <sys/vtrace.h>
52 #include <sys/note.h>
53 #include <sys/thread.h>
54 #include <sys/proc.h>
55 #include <sys/efi_partition.h>
56 #include <sys/var.h>
57 #include <sys/aio_req.h>
58 #include <sys/dkioc_free_util.h>
59 
60 #ifdef __lock_lint
61 #define	_LP64
62 #define	__amd64
63 #endif
64 
65 #if (defined(__fibre))
66 /* Note: is there a leadville version of the following? */
67 #include <sys/fc4/fcal_linkapp.h>
68 #endif
69 #include <sys/taskq.h>
70 #include <sys/uuid.h>
71 #include <sys/byteorder.h>
72 #include <sys/sdt.h>
73 
74 #include "sd_xbuf.h"
75 
76 #include <sys/scsi/targets/sddef.h>
77 #include <sys/cmlb.h>
78 #include <sys/sysevent/eventdefs.h>
79 #include <sys/sysevent/dev.h>
80 
81 #include <sys/fm/protocol.h>
82 
83 /*
84  * Loadable module info.
85  */
86 #if (defined(__fibre))
87 #define	SD_MODULE_NAME	"SCSI SSA/FCAL Disk Driver"
88 #else /* !__fibre */
89 #define	SD_MODULE_NAME	"SCSI Disk Driver"
90 #endif /* !__fibre */
91 
92 /*
93  * Define the interconnect type, to allow the driver to distinguish
94  * between parallel SCSI (sd) and fibre channel (ssd) behaviors.
95  *
96  * This is really for backward compatibility. In the future, the driver
97  * should actually check the "interconnect-type" property as reported by
98  * the HBA; however at present this property is not defined by all HBAs,
99  * so we will use this #define (1) to permit the driver to run in
100  * backward-compatibility mode; and (2) to print a notification message
101  * if an FC HBA does not support the "interconnect-type" property.  The
102  * behavior of the driver will be to assume parallel SCSI behaviors unless
103  * the "interconnect-type" property is defined by the HBA **AND** has a
104  * value of either INTERCONNECT_FIBRE, INTERCONNECT_SSA, or
105  * INTERCONNECT_FABRIC, in which case the driver will assume Fibre
106  * Channel behaviors (as per the old ssd).  (Note that the
107  * INTERCONNECT_1394 and INTERCONNECT_USB types are not supported and
108  * will result in the driver assuming parallel SCSI behaviors.)
109  *
110  * (see common/sys/scsi/impl/services.h)
111  *
112  * Note: For ssd semantics, don't use INTERCONNECT_FABRIC as the default
113  * since some FC HBAs may already support that, and there is some code in
114  * the driver that already looks for it.  Using INTERCONNECT_FABRIC as the
115  * default would confuse that code, and besides things should work fine
116  * anyways if the FC HBA already reports INTERCONNECT_FABRIC for the
117  * "interconnect_type" property.
118  *
119  */
120 #if (defined(__fibre))
121 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_FIBRE
122 #else
123 #define	SD_DEFAULT_INTERCONNECT_TYPE	SD_INTERCONNECT_PARALLEL
124 #endif
125 
126 /*
127  * The name of the driver, established from the module name in _init.
128  */
129 static	char *sd_label			= NULL;
130 
131 /*
132  * Driver name is unfortunately prefixed on some driver.conf properties.
133  */
134 #if (defined(__fibre))
135 #define	sd_max_xfer_size		ssd_max_xfer_size
136 #define	sd_config_list			ssd_config_list
137 static	char *sd_max_xfer_size		= "ssd_max_xfer_size";
138 static	char *sd_config_list		= "ssd-config-list";
139 #else
140 static	char *sd_max_xfer_size		= "sd_max_xfer_size";
141 static	char *sd_config_list		= "sd-config-list";
142 #endif
143 
144 /*
145  * Driver global variables
146  */
147 
148 #if (defined(__fibre))
149 /*
150  * These #defines are to avoid namespace collisions that occur because this
151  * code is currently used to compile two separate driver modules: sd and ssd.
152  * All global variables need to be treated this way (even if declared static)
153  * in order to allow the debugger to resolve the names properly.
154  * It is anticipated that in the near future the ssd module will be obsoleted,
155  * at which time this namespace issue should go away.
156  */
157 #define	sd_state			ssd_state
158 #define	sd_io_time			ssd_io_time
159 #define	sd_failfast_enable		ssd_failfast_enable
160 #define	sd_ua_retry_count		ssd_ua_retry_count
161 #define	sd_report_pfa			ssd_report_pfa
162 #define	sd_max_throttle			ssd_max_throttle
163 #define	sd_min_throttle			ssd_min_throttle
164 #define	sd_rot_delay			ssd_rot_delay
165 
166 #define	sd_retry_on_reservation_conflict	\
167 					ssd_retry_on_reservation_conflict
168 #define	sd_reinstate_resv_delay		ssd_reinstate_resv_delay
169 #define	sd_resv_conflict_name		ssd_resv_conflict_name
170 
171 #define	sd_component_mask		ssd_component_mask
172 #define	sd_level_mask			ssd_level_mask
173 #define	sd_debug_un			ssd_debug_un
174 #define	sd_error_level			ssd_error_level
175 
176 #define	sd_xbuf_active_limit		ssd_xbuf_active_limit
177 #define	sd_xbuf_reserve_limit		ssd_xbuf_reserve_limit
178 
179 #define	sd_tr				ssd_tr
180 #define	sd_reset_throttle_timeout	ssd_reset_throttle_timeout
181 #define	sd_qfull_throttle_timeout	ssd_qfull_throttle_timeout
182 #define	sd_qfull_throttle_enable	ssd_qfull_throttle_enable
183 #define	sd_check_media_time		ssd_check_media_time
184 #define	sd_wait_cmds_complete		ssd_wait_cmds_complete
185 #define	sd_label_mutex			ssd_label_mutex
186 #define	sd_detach_mutex			ssd_detach_mutex
187 #define	sd_log_buf			ssd_log_buf
188 #define	sd_log_mutex			ssd_log_mutex
189 
190 #define	sd_disk_table			ssd_disk_table
191 #define	sd_disk_table_size		ssd_disk_table_size
192 #define	sd_sense_mutex			ssd_sense_mutex
193 #define	sd_cdbtab			ssd_cdbtab
194 
195 #define	sd_cb_ops			ssd_cb_ops
196 #define	sd_ops				ssd_ops
197 #define	sd_additional_codes		ssd_additional_codes
198 #define	sd_tgops			ssd_tgops
199 
200 #define	sd_minor_data			ssd_minor_data
201 #define	sd_minor_data_efi		ssd_minor_data_efi
202 
203 #define	sd_tq				ssd_tq
204 #define	sd_wmr_tq			ssd_wmr_tq
205 #define	sd_taskq_name			ssd_taskq_name
206 #define	sd_wmr_taskq_name		ssd_wmr_taskq_name
207 #define	sd_taskq_minalloc		ssd_taskq_minalloc
208 #define	sd_taskq_maxalloc		ssd_taskq_maxalloc
209 
210 #define	sd_dump_format_string		ssd_dump_format_string
211 
212 #define	sd_iostart_chain		ssd_iostart_chain
213 #define	sd_iodone_chain			ssd_iodone_chain
214 
215 #define	sd_pm_idletime			ssd_pm_idletime
216 
217 #define	sd_force_pm_supported		ssd_force_pm_supported
218 
219 #define	sd_dtype_optical_bind		ssd_dtype_optical_bind
220 
221 #define	sd_ssc_init			ssd_ssc_init
222 #define	sd_ssc_send			ssd_ssc_send
223 #define	sd_ssc_fini			ssd_ssc_fini
224 #define	sd_ssc_assessment		ssd_ssc_assessment
225 #define	sd_ssc_post			ssd_ssc_post
226 #define	sd_ssc_print			ssd_ssc_print
227 #define	sd_ssc_ereport_post		ssd_ssc_ereport_post
228 #define	sd_ssc_set_info			ssd_ssc_set_info
229 #define	sd_ssc_extract_info		ssd_ssc_extract_info
230 
231 #endif
232 
233 #ifdef	SDDEBUG
234 int	sd_force_pm_supported		= 0;
235 #endif	/* SDDEBUG */
236 
237 void *sd_state				= NULL;
238 int sd_io_time				= SD_IO_TIME;
239 int sd_failfast_enable			= 1;
240 int sd_ua_retry_count			= SD_UA_RETRY_COUNT;
241 int sd_report_pfa			= 1;
242 int sd_max_throttle			= SD_MAX_THROTTLE;
243 int sd_min_throttle			= SD_MIN_THROTTLE;
244 int sd_rot_delay			= 4; /* Default 4ms Rotation delay */
245 int sd_qfull_throttle_enable		= TRUE;
246 
247 int sd_retry_on_reservation_conflict	= 1;
248 int sd_reinstate_resv_delay		= SD_REINSTATE_RESV_DELAY;
249 _NOTE(SCHEME_PROTECTS_DATA("safe sharing", sd_reinstate_resv_delay))
250 
251 static int sd_dtype_optical_bind	= -1;
252 
253 /* Note: the following is not a bug, it really is "sd_" and not "ssd_" */
254 static	char *sd_resv_conflict_name	= "sd_retry_on_reservation_conflict";
255 
256 /*
257  * Global data for debug logging. To enable debug printing, sd_component_mask
258  * and sd_level_mask should be set to the desired bit patterns as outlined in
259  * sddef.h.
260  */
261 uint_t	sd_component_mask		= 0x0;
262 uint_t	sd_level_mask			= 0x0;
263 struct	sd_lun *sd_debug_un		= NULL;
264 uint_t	sd_error_level			= SCSI_ERR_RETRYABLE;
265 
266 /* Note: these may go away in the future... */
267 static uint32_t	sd_xbuf_active_limit	= 512;
268 static uint32_t sd_xbuf_reserve_limit	= 16;
269 
270 static struct sd_resv_reclaim_request	sd_tr = { NULL, NULL, NULL, 0, 0, 0 };
271 
272 /*
273  * Timer value used to reset the throttle after it has been reduced
274  * (typically in response to TRAN_BUSY or STATUS_QFULL)
275  */
276 static int sd_reset_throttle_timeout	= SD_RESET_THROTTLE_TIMEOUT;
277 static int sd_qfull_throttle_timeout	= SD_QFULL_THROTTLE_TIMEOUT;
278 
279 /*
280  * Interval value associated with the media change scsi watch.
281  */
282 static int sd_check_media_time		= 3000000;
283 
284 /*
285  * Wait value used for in progress operations during a DDI_SUSPEND
286  */
287 static int sd_wait_cmds_complete	= SD_WAIT_CMDS_COMPLETE;
288 
289 /*
290  * sd_label_mutex protects a static buffer used in the disk label
291  * component of the driver
292  */
293 static kmutex_t sd_label_mutex;
294 
295 /*
296  * sd_detach_mutex protects un_layer_count, un_detach_count, and
297  * un_opens_in_progress in the sd_lun structure.
298  */
299 static kmutex_t sd_detach_mutex;
300 
301 _NOTE(MUTEX_PROTECTS_DATA(sd_detach_mutex,
302 	sd_lun::{un_layer_count un_detach_count un_opens_in_progress}))
303 
304 /*
305  * Global buffer and mutex for debug logging
306  */
307 static char	sd_log_buf[1024];
308 static kmutex_t	sd_log_mutex;
309 
310 /*
311  * Structs and globals for recording attached lun information.
312  * This maintains a chain. Each node in the chain represents a SCSI controller.
313  * The structure records the number of luns attached to each target connected
314  * with the controller.
315  * For parallel scsi device only.
316  */
317 struct sd_scsi_hba_tgt_lun {
318 	struct sd_scsi_hba_tgt_lun	*next;
319 	dev_info_t			*pdip;
320 	int				nlun[NTARGETS_WIDE];
321 };
322 
323 /*
324  * Flag to indicate the lun is attached or detached
325  */
326 #define	SD_SCSI_LUN_ATTACH	0
327 #define	SD_SCSI_LUN_DETACH	1
328 
329 static kmutex_t	sd_scsi_target_lun_mutex;
330 static struct sd_scsi_hba_tgt_lun	*sd_scsi_target_lun_head = NULL;
331 
332 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
333     sd_scsi_hba_tgt_lun::next sd_scsi_hba_tgt_lun::pdip))
334 
335 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_target_lun_mutex,
336     sd_scsi_target_lun_head))
337 
338 /*
339  * "Smart" Probe Caching structs, globals, #defines, etc.
340  * For parallel scsi and non-self-identify device only.
341  */
342 
343 /*
344  * The following resources and routines are implemented to support
345  * "smart" probing, which caches the scsi_probe() results in an array,
346  * in order to help avoid long probe times.
347  */
348 struct sd_scsi_probe_cache {
349 	struct	sd_scsi_probe_cache	*next;
350 	dev_info_t	*pdip;
351 	int		cache[NTARGETS_WIDE];
352 };
353 
354 static kmutex_t	sd_scsi_probe_cache_mutex;
355 static struct	sd_scsi_probe_cache *sd_scsi_probe_cache_head = NULL;
356 
357 /*
358  * Really we only need protection on the head of the linked list, but
359  * better safe than sorry.
360  */
361 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
362     sd_scsi_probe_cache::next sd_scsi_probe_cache::pdip))
363 
364 _NOTE(MUTEX_PROTECTS_DATA(sd_scsi_probe_cache_mutex,
365     sd_scsi_probe_cache_head))
366 
367 /*
368  * Power attribute table
369  */
370 static sd_power_attr_ss sd_pwr_ss = {
371 	{ "NAME=spindle-motor", "0=off", "1=on", NULL },
372 	{0, 100},
373 	{30, 0},
374 	{20000, 0}
375 };
376 
377 static sd_power_attr_pc sd_pwr_pc = {
378 	{ "NAME=spindle-motor", "0=stopped", "1=standby", "2=idle",
379 		"3=active", NULL },
380 	{0, 0, 0, 100},
381 	{90, 90, 20, 0},
382 	{15000, 15000, 1000, 0}
383 };
384 
385 /*
386  * Power level to power condition
387  */
388 static int sd_pl2pc[] = {
389 	SD_TARGET_START_VALID,
390 	SD_TARGET_STANDBY,
391 	SD_TARGET_IDLE,
392 	SD_TARGET_ACTIVE
393 };
394 
395 /*
396  * Vendor specific data name property declarations
397  */
398 
399 #if defined(__fibre) || defined(__x86)
400 
401 static sd_tunables seagate_properties = {
402 	SEAGATE_THROTTLE_VALUE,
403 	0,
404 	0,
405 	0,
406 	0,
407 	0,
408 	0,
409 	0,
410 	0
411 };
412 
413 
414 static sd_tunables fujitsu_properties = {
415 	FUJITSU_THROTTLE_VALUE,
416 	0,
417 	0,
418 	0,
419 	0,
420 	0,
421 	0,
422 	0,
423 	0
424 };
425 
426 static sd_tunables ibm_properties = {
427 	IBM_THROTTLE_VALUE,
428 	0,
429 	0,
430 	0,
431 	0,
432 	0,
433 	0,
434 	0,
435 	0
436 };
437 
438 static sd_tunables sve_properties = {
439 	SVE_THROTTLE_VALUE,
440 	0,
441 	0,
442 	SVE_BUSY_RETRIES,
443 	SVE_RESET_RETRY_COUNT,
444 	SVE_RESERVE_RELEASE_TIME,
445 	SVE_MIN_THROTTLE_VALUE,
446 	SVE_DISKSORT_DISABLED_FLAG,
447 	0
448 };
449 
450 static sd_tunables maserati_properties = {
451 	0,
452 	0,
453 	0,
454 	0,
455 	0,
456 	0,
457 	0,
458 	MASERATI_DISKSORT_DISABLED_FLAG,
459 	MASERATI_LUN_RESET_ENABLED_FLAG
460 };
461 
462 static sd_tunables pirus_properties = {
463 	PIRUS_THROTTLE_VALUE,
464 	0,
465 	PIRUS_NRR_COUNT,
466 	PIRUS_BUSY_RETRIES,
467 	PIRUS_RESET_RETRY_COUNT,
468 	0,
469 	PIRUS_MIN_THROTTLE_VALUE,
470 	PIRUS_DISKSORT_DISABLED_FLAG,
471 	PIRUS_LUN_RESET_ENABLED_FLAG
472 };
473 
474 #endif
475 
476 #if (defined(__sparc) && !defined(__fibre)) || \
477 	(defined(__x86))
478 
479 
480 static sd_tunables elite_properties = {
481 	ELITE_THROTTLE_VALUE,
482 	0,
483 	0,
484 	0,
485 	0,
486 	0,
487 	0,
488 	0,
489 	0
490 };
491 
492 static sd_tunables st31200n_properties = {
493 	ST31200N_THROTTLE_VALUE,
494 	0,
495 	0,
496 	0,
497 	0,
498 	0,
499 	0,
500 	0,
501 	0
502 };
503 
504 #endif /* Fibre or not */
505 
506 static sd_tunables lsi_properties_scsi = {
507 	LSI_THROTTLE_VALUE,
508 	0,
509 	LSI_NOTREADY_RETRIES,
510 	0,
511 	0,
512 	0,
513 	0,
514 	0,
515 	0
516 };
517 
518 static sd_tunables symbios_properties = {
519 	SYMBIOS_THROTTLE_VALUE,
520 	0,
521 	SYMBIOS_NOTREADY_RETRIES,
522 	0,
523 	0,
524 	0,
525 	0,
526 	0,
527 	0
528 };
529 
530 static sd_tunables lsi_properties = {
531 	0,
532 	0,
533 	LSI_NOTREADY_RETRIES,
534 	0,
535 	0,
536 	0,
537 	0,
538 	0,
539 	0
540 };
541 
542 static sd_tunables lsi_oem_properties = {
543 	0,
544 	0,
545 	LSI_OEM_NOTREADY_RETRIES,
546 	0,
547 	0,
548 	0,
549 	0,
550 	0,
551 	0,
552 	1
553 };
554 
555 
556 
557 #if (defined(SD_PROP_TST))
558 
559 #define	SD_TST_CTYPE_VAL	CTYPE_CDROM
560 #define	SD_TST_THROTTLE_VAL	16
561 #define	SD_TST_NOTREADY_VAL	12
562 #define	SD_TST_BUSY_VAL		60
563 #define	SD_TST_RST_RETRY_VAL	36
564 #define	SD_TST_RSV_REL_TIME	60
565 
566 static sd_tunables tst_properties = {
567 	SD_TST_THROTTLE_VAL,
568 	SD_TST_CTYPE_VAL,
569 	SD_TST_NOTREADY_VAL,
570 	SD_TST_BUSY_VAL,
571 	SD_TST_RST_RETRY_VAL,
572 	SD_TST_RSV_REL_TIME,
573 	0,
574 	0,
575 	0
576 };
577 #endif
578 
579 /* This is similar to the ANSI toupper implementation */
580 #define	SD_TOUPPER(C)	(((C) >= 'a' && (C) <= 'z') ? (C) - 'a' + 'A' : (C))
581 
582 /*
583  * Static Driver Configuration Table
584  *
585  * This is the table of disks which need throttle adjustment (or, perhaps
586  * something else as defined by the flags at a future time.)  device_id
587  * is a string consisting of concatenated vid (vendor), pid (product/model)
588  * and revision strings as defined in the scsi_inquiry structure.  Offsets of
589  * the parts of the string are as defined by the sizes in the scsi_inquiry
590  * structure.  Device type is searched as far as the device_id string is
591  * defined.  Flags defines which values are to be set in the driver from the
592  * properties list.
593  *
594  * Entries below which begin and end with a "*" are a special case.
595  * These do not have a specific vendor, and the string which follows
596  * can appear anywhere in the 16 byte PID portion of the inquiry data.
597  *
598  * Entries below which begin and end with a " " (blank) are a special
599  * case. The comparison function will treat multiple consecutive blanks
600  * as equivalent to a single blank. For example, this causes a
601  * sd_disk_table entry of " NEC CDROM " to match a device's id string
602  * of  "NEC       CDROM".
603  *
604  * Note: The MD21 controller type has been obsoleted.
605  *	 ST318202F is a Legacy device
606  *	 MAM3182FC, MAM3364FC, MAM3738FC do not appear to have ever been
607  *	 made with an FC connection. The entries here are a legacy.
608  */
609 static sd_disk_config_t sd_disk_table[] = {
610 #if defined(__fibre) || defined(__x86)
611 	{ "SEAGATE ST34371FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
612 	{ "SEAGATE ST19171FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
613 	{ "SEAGATE ST39102FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
614 	{ "SEAGATE ST39103FC", SD_CONF_BSET_THROTTLE, &seagate_properties },
615 	{ "SEAGATE ST118273F", SD_CONF_BSET_THROTTLE, &seagate_properties },
616 	{ "SEAGATE ST318202F", SD_CONF_BSET_THROTTLE, &seagate_properties },
617 	{ "SEAGATE ST318203F", SD_CONF_BSET_THROTTLE, &seagate_properties },
618 	{ "SEAGATE ST136403F", SD_CONF_BSET_THROTTLE, &seagate_properties },
619 	{ "SEAGATE ST318304F", SD_CONF_BSET_THROTTLE, &seagate_properties },
620 	{ "SEAGATE ST336704F", SD_CONF_BSET_THROTTLE, &seagate_properties },
621 	{ "SEAGATE ST373405F", SD_CONF_BSET_THROTTLE, &seagate_properties },
622 	{ "SEAGATE ST336605F", SD_CONF_BSET_THROTTLE, &seagate_properties },
623 	{ "SEAGATE ST336752F", SD_CONF_BSET_THROTTLE, &seagate_properties },
624 	{ "SEAGATE ST318452F", SD_CONF_BSET_THROTTLE, &seagate_properties },
625 	{ "FUJITSU MAG3091F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
626 	{ "FUJITSU MAG3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
627 	{ "FUJITSU MAA3182F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
628 	{ "FUJITSU MAF3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
629 	{ "FUJITSU MAL3364F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
630 	{ "FUJITSU MAL3738F",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
631 	{ "FUJITSU MAM3182FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
632 	{ "FUJITSU MAM3364FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
633 	{ "FUJITSU MAM3738FC",  SD_CONF_BSET_THROTTLE, &fujitsu_properties },
634 	{ "IBM     DDYFT1835",  SD_CONF_BSET_THROTTLE, &ibm_properties },
635 	{ "IBM     DDYFT3695",  SD_CONF_BSET_THROTTLE, &ibm_properties },
636 	{ "IBM     IC35LF2D2",  SD_CONF_BSET_THROTTLE, &ibm_properties },
637 	{ "IBM     IC35LF2PR",  SD_CONF_BSET_THROTTLE, &ibm_properties },
638 	{ "IBM     1724-100",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
639 	{ "IBM     1726-2xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
640 	{ "IBM     1726-22x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
641 	{ "IBM     1726-4xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
642 	{ "IBM     1726-42x",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
643 	{ "IBM     1726-3xx",   SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
644 	{ "IBM     3526",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
645 	{ "IBM     3542",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
646 	{ "IBM     3552",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
647 	{ "IBM     1722",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
648 	{ "IBM     1742",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
649 	{ "IBM     1815",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
650 	{ "IBM     FAStT",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
651 	{ "IBM     1814",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
652 	{ "IBM     1814-200",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
653 	{ "IBM     1818",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
654 	{ "DELL    MD3000",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
655 	{ "DELL    MD3000i",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
656 	{ "LSI     INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
657 	{ "ENGENIO INF",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
658 	{ "SGI     TP",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
659 	{ "SGI     IS",		SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
660 	{ "*CSM100_*",		SD_CONF_BSET_NRR_COUNT |
661 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
662 	{ "*CSM200_*",		SD_CONF_BSET_NRR_COUNT |
663 			SD_CONF_BSET_CACHE_IS_NV, &lsi_oem_properties },
664 	{ "Fujitsu SX300",	SD_CONF_BSET_THROTTLE,  &lsi_oem_properties },
665 	{ "LSI",		SD_CONF_BSET_NRR_COUNT, &lsi_properties },
666 	{ "SUN     SESS01", SD_CONF_BSET_THROTTLE |
667 		SD_CONF_BSET_BSY_RETRY_COUNT|
668 		SD_CONF_BSET_RST_RETRIES|
669 		SD_CONF_BSET_RSV_REL_TIME|
670 		SD_CONF_BSET_MIN_THROTTLE|
671 		SD_CONF_BSET_DISKSORT_DISABLED,
672 		&sve_properties },
673 	{ "SUN     SVE01", SD_CONF_BSET_DISKSORT_DISABLED |
674 		SD_CONF_BSET_LUN_RESET_ENABLED,
675 		&maserati_properties },
676 	{ "SUN     SE6920", SD_CONF_BSET_THROTTLE |
677 		SD_CONF_BSET_NRR_COUNT|
678 		SD_CONF_BSET_BSY_RETRY_COUNT|
679 		SD_CONF_BSET_RST_RETRIES|
680 		SD_CONF_BSET_MIN_THROTTLE|
681 		SD_CONF_BSET_DISKSORT_DISABLED|
682 		SD_CONF_BSET_LUN_RESET_ENABLED,
683 		&pirus_properties },
684 	{ "SUN     SE6940", SD_CONF_BSET_THROTTLE |
685 		SD_CONF_BSET_NRR_COUNT|
686 		SD_CONF_BSET_BSY_RETRY_COUNT|
687 		SD_CONF_BSET_RST_RETRIES|
688 		SD_CONF_BSET_MIN_THROTTLE|
689 		SD_CONF_BSET_DISKSORT_DISABLED|
690 		SD_CONF_BSET_LUN_RESET_ENABLED,
691 		&pirus_properties },
692 	{ "SUN     StorageTek 6920", SD_CONF_BSET_THROTTLE |
693 		SD_CONF_BSET_NRR_COUNT|
694 		SD_CONF_BSET_BSY_RETRY_COUNT|
695 		SD_CONF_BSET_RST_RETRIES|
696 		SD_CONF_BSET_MIN_THROTTLE|
697 		SD_CONF_BSET_DISKSORT_DISABLED|
698 		SD_CONF_BSET_LUN_RESET_ENABLED,
699 		&pirus_properties },
700 	{ "SUN     StorageTek 6940", SD_CONF_BSET_THROTTLE |
701 		SD_CONF_BSET_NRR_COUNT|
702 		SD_CONF_BSET_BSY_RETRY_COUNT|
703 		SD_CONF_BSET_RST_RETRIES|
704 		SD_CONF_BSET_MIN_THROTTLE|
705 		SD_CONF_BSET_DISKSORT_DISABLED|
706 		SD_CONF_BSET_LUN_RESET_ENABLED,
707 		&pirus_properties },
708 	{ "SUN     PSX1000", SD_CONF_BSET_THROTTLE |
709 		SD_CONF_BSET_NRR_COUNT|
710 		SD_CONF_BSET_BSY_RETRY_COUNT|
711 		SD_CONF_BSET_RST_RETRIES|
712 		SD_CONF_BSET_MIN_THROTTLE|
713 		SD_CONF_BSET_DISKSORT_DISABLED|
714 		SD_CONF_BSET_LUN_RESET_ENABLED,
715 		&pirus_properties },
716 	{ "SUN     SE6330", SD_CONF_BSET_THROTTLE |
717 		SD_CONF_BSET_NRR_COUNT|
718 		SD_CONF_BSET_BSY_RETRY_COUNT|
719 		SD_CONF_BSET_RST_RETRIES|
720 		SD_CONF_BSET_MIN_THROTTLE|
721 		SD_CONF_BSET_DISKSORT_DISABLED|
722 		SD_CONF_BSET_LUN_RESET_ENABLED,
723 		&pirus_properties },
724 	{ "SUN     STK6580_6780", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
725 	{ "SUN     SUN_6180", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
726 	{ "STK     OPENstorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
727 	{ "STK     OpenStorage", SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
728 	{ "STK     BladeCtlr",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
729 	{ "STK     FLEXLINE",	SD_CONF_BSET_NRR_COUNT, &lsi_oem_properties },
730 	{ "SYMBIOS", SD_CONF_BSET_NRR_COUNT, &symbios_properties },
731 #endif /* fibre or NON-sparc platforms */
732 #if ((defined(__sparc) && !defined(__fibre)) ||\
733 	(defined(__x86)))
734 	{ "SEAGATE ST42400N", SD_CONF_BSET_THROTTLE, &elite_properties },
735 	{ "SEAGATE ST31200N", SD_CONF_BSET_THROTTLE, &st31200n_properties },
736 	{ "SEAGATE ST41600N", SD_CONF_BSET_TUR_CHECK, NULL },
737 	{ "CONNER  CP30540",  SD_CONF_BSET_NOCACHE,  NULL },
738 	{ "*SUN0104*", SD_CONF_BSET_FAB_DEVID, NULL },
739 	{ "*SUN0207*", SD_CONF_BSET_FAB_DEVID, NULL },
740 	{ "*SUN0327*", SD_CONF_BSET_FAB_DEVID, NULL },
741 	{ "*SUN0340*", SD_CONF_BSET_FAB_DEVID, NULL },
742 	{ "*SUN0424*", SD_CONF_BSET_FAB_DEVID, NULL },
743 	{ "*SUN0669*", SD_CONF_BSET_FAB_DEVID, NULL },
744 	{ "*SUN1.0G*", SD_CONF_BSET_FAB_DEVID, NULL },
745 	{ "SYMBIOS INF-01-00       ", SD_CONF_BSET_FAB_DEVID, NULL },
746 	{ "SYMBIOS", SD_CONF_BSET_THROTTLE|SD_CONF_BSET_NRR_COUNT,
747 	    &symbios_properties },
748 	{ "LSI", SD_CONF_BSET_THROTTLE | SD_CONF_BSET_NRR_COUNT,
749 	    &lsi_properties_scsi },
750 #if defined(__x86)
751 	{ " NEC CD-ROM DRIVE:260 ", (SD_CONF_BSET_PLAYMSF_BCD
752 				    | SD_CONF_BSET_READSUB_BCD
753 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
754 				    | SD_CONF_BSET_NO_READ_HEADER
755 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
756 
757 	{ " NEC CD-ROM DRIVE:270 ", (SD_CONF_BSET_PLAYMSF_BCD
758 				    | SD_CONF_BSET_READSUB_BCD
759 				    | SD_CONF_BSET_READ_TOC_ADDR_BCD
760 				    | SD_CONF_BSET_NO_READ_HEADER
761 				    | SD_CONF_BSET_READ_CD_XD4), NULL },
762 #endif /* __x86 */
763 #endif /* sparc NON-fibre or NON-sparc platforms */
764 
765 #if (defined(SD_PROP_TST))
766 	{ "VENDOR  PRODUCT ", (SD_CONF_BSET_THROTTLE
767 				| SD_CONF_BSET_CTYPE
768 				| SD_CONF_BSET_NRR_COUNT
769 				| SD_CONF_BSET_FAB_DEVID
770 				| SD_CONF_BSET_NOCACHE
771 				| SD_CONF_BSET_BSY_RETRY_COUNT
772 				| SD_CONF_BSET_PLAYMSF_BCD
773 				| SD_CONF_BSET_READSUB_BCD
774 				| SD_CONF_BSET_READ_TOC_TRK_BCD
775 				| SD_CONF_BSET_READ_TOC_ADDR_BCD
776 				| SD_CONF_BSET_NO_READ_HEADER
777 				| SD_CONF_BSET_READ_CD_XD4
778 				| SD_CONF_BSET_RST_RETRIES
779 				| SD_CONF_BSET_RSV_REL_TIME
780 				| SD_CONF_BSET_TUR_CHECK), &tst_properties},
781 #endif
782 };
783 
784 static const int sd_disk_table_size =
785 	sizeof (sd_disk_table)/ sizeof (sd_disk_config_t);
786 
787 /*
788  * Emulation mode disk drive VID/PID table
789  */
790 static char sd_flash_dev_table[][25] = {
791 	"ATA     MARVELL SD88SA02",
792 	"MARVELL SD88SA02",
793 	"TOSHIBA THNSNV05",
794 };
795 
796 static const int sd_flash_dev_table_size =
797 	sizeof (sd_flash_dev_table) / sizeof (sd_flash_dev_table[0]);
798 
799 #define	SD_INTERCONNECT_PARALLEL	0
800 #define	SD_INTERCONNECT_FABRIC		1
801 #define	SD_INTERCONNECT_FIBRE		2
802 #define	SD_INTERCONNECT_SSA		3
803 #define	SD_INTERCONNECT_SATA		4
804 #define	SD_INTERCONNECT_SAS		5
805 
806 #define	SD_IS_PARALLEL_SCSI(un)		\
807 	((un)->un_interconnect_type == SD_INTERCONNECT_PARALLEL)
808 #define	SD_IS_SERIAL(un)		\
809 	(((un)->un_interconnect_type == SD_INTERCONNECT_SATA) ||\
810 	((un)->un_interconnect_type == SD_INTERCONNECT_SAS))
811 
812 /*
813  * Definitions used by device id registration routines
814  */
815 #define	VPD_HEAD_OFFSET		3	/* size of head for vpd page */
816 #define	VPD_PAGE_LENGTH		3	/* offset for pge length data */
817 #define	VPD_MODE_PAGE		1	/* offset into vpd pg for "page code" */
818 
819 static kmutex_t sd_sense_mutex = {0};
820 
821 /*
822  * Macros for updates of the driver state
823  */
824 #define	New_state(un, s)        \
825 	(un)->un_last_state = (un)->un_state, (un)->un_state = (s)
826 #define	Restore_state(un)	\
827 	{ uchar_t tmp = (un)->un_last_state; New_state((un), tmp); }
828 
829 static struct sd_cdbinfo sd_cdbtab[] = {
830 	{ CDB_GROUP0, 0x00,	   0x1FFFFF,   0xFF,	    },
831 	{ CDB_GROUP1, SCMD_GROUP1, 0xFFFFFFFF, 0xFFFF,	    },
832 	{ CDB_GROUP5, SCMD_GROUP5, 0xFFFFFFFF, 0xFFFFFFFF,  },
833 	{ CDB_GROUP4, SCMD_GROUP4, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFF, },
834 };
835 
836 /*
837  * Specifies the number of seconds that must have elapsed since the last
838  * cmd. has completed for a device to be declared idle to the PM framework.
839  */
840 static int sd_pm_idletime = 1;
841 
842 /*
843  * Internal function prototypes
844  */
845 
846 #if (defined(__fibre))
847 /*
848  * These #defines are to avoid namespace collisions that occur because this
849  * code is currently used to compile two separate driver modules: sd and ssd.
850  * All function names need to be treated this way (even if declared static)
851  * in order to allow the debugger to resolve the names properly.
852  * It is anticipated that in the near future the ssd module will be obsoleted,
853  * at which time this ugliness should go away.
854  */
855 #define	sd_log_trace			ssd_log_trace
856 #define	sd_log_info			ssd_log_info
857 #define	sd_log_err			ssd_log_err
858 #define	sdprobe				ssdprobe
859 #define	sdinfo				ssdinfo
860 #define	sd_prop_op			ssd_prop_op
861 #define	sd_scsi_probe_cache_init	ssd_scsi_probe_cache_init
862 #define	sd_scsi_probe_cache_fini	ssd_scsi_probe_cache_fini
863 #define	sd_scsi_clear_probe_cache	ssd_scsi_clear_probe_cache
864 #define	sd_scsi_probe_with_cache	ssd_scsi_probe_with_cache
865 #define	sd_scsi_target_lun_init		ssd_scsi_target_lun_init
866 #define	sd_scsi_target_lun_fini		ssd_scsi_target_lun_fini
867 #define	sd_scsi_get_target_lun_count	ssd_scsi_get_target_lun_count
868 #define	sd_scsi_update_lun_on_target	ssd_scsi_update_lun_on_target
869 #define	sd_spin_up_unit			ssd_spin_up_unit
870 #define	sd_enable_descr_sense		ssd_enable_descr_sense
871 #define	sd_reenable_dsense_task		ssd_reenable_dsense_task
872 #define	sd_set_mmc_caps			ssd_set_mmc_caps
873 #define	sd_read_unit_properties		ssd_read_unit_properties
874 #define	sd_process_sdconf_file		ssd_process_sdconf_file
875 #define	sd_process_sdconf_table		ssd_process_sdconf_table
876 #define	sd_sdconf_id_match		ssd_sdconf_id_match
877 #define	sd_blank_cmp			ssd_blank_cmp
878 #define	sd_chk_vers1_data		ssd_chk_vers1_data
879 #define	sd_set_vers1_properties		ssd_set_vers1_properties
880 #define	sd_check_bdc_vpd		ssd_check_bdc_vpd
881 #define	sd_check_emulation_mode		ssd_check_emulation_mode
882 
883 #define	sd_get_physical_geometry	ssd_get_physical_geometry
884 #define	sd_get_virtual_geometry		ssd_get_virtual_geometry
885 #define	sd_update_block_info		ssd_update_block_info
886 #define	sd_register_devid		ssd_register_devid
887 #define	sd_get_devid			ssd_get_devid
888 #define	sd_create_devid			ssd_create_devid
889 #define	sd_write_deviceid		ssd_write_deviceid
890 #define	sd_check_vpd_page_support	ssd_check_vpd_page_support
891 #define	sd_setup_pm			ssd_setup_pm
892 #define	sd_create_pm_components		ssd_create_pm_components
893 #define	sd_ddi_suspend			ssd_ddi_suspend
894 #define	sd_ddi_resume			ssd_ddi_resume
895 #define	sd_pm_state_change		ssd_pm_state_change
896 #define	sdpower				ssdpower
897 #define	sdattach			ssdattach
898 #define	sddetach			ssddetach
899 #define	sd_unit_attach			ssd_unit_attach
900 #define	sd_unit_detach			ssd_unit_detach
901 #define	sd_set_unit_attributes		ssd_set_unit_attributes
902 #define	sd_create_errstats		ssd_create_errstats
903 #define	sd_set_errstats			ssd_set_errstats
904 #define	sd_set_pstats			ssd_set_pstats
905 #define	sddump				ssddump
906 #define	sd_scsi_poll			ssd_scsi_poll
907 #define	sd_send_polled_RQS		ssd_send_polled_RQS
908 #define	sd_ddi_scsi_poll		ssd_ddi_scsi_poll
909 #define	sd_init_event_callbacks		ssd_init_event_callbacks
910 #define	sd_event_callback		ssd_event_callback
911 #define	sd_cache_control		ssd_cache_control
912 #define	sd_get_write_cache_enabled	ssd_get_write_cache_enabled
913 #define	sd_get_write_cache_changeable	ssd_get_write_cache_changeable
914 #define	sd_get_nv_sup			ssd_get_nv_sup
915 #define	sd_make_device			ssd_make_device
916 #define	sdopen				ssdopen
917 #define	sdclose				ssdclose
918 #define	sd_ready_and_valid		ssd_ready_and_valid
919 #define	sdmin				ssdmin
920 #define	sdread				ssdread
921 #define	sdwrite				ssdwrite
922 #define	sdaread				ssdaread
923 #define	sdawrite			ssdawrite
924 #define	sdstrategy			ssdstrategy
925 #define	sdioctl				ssdioctl
926 #define	sd_mapblockaddr_iostart		ssd_mapblockaddr_iostart
927 #define	sd_mapblocksize_iostart		ssd_mapblocksize_iostart
928 #define	sd_checksum_iostart		ssd_checksum_iostart
929 #define	sd_checksum_uscsi_iostart	ssd_checksum_uscsi_iostart
930 #define	sd_pm_iostart			ssd_pm_iostart
931 #define	sd_core_iostart			ssd_core_iostart
932 #define	sd_mapblockaddr_iodone		ssd_mapblockaddr_iodone
933 #define	sd_mapblocksize_iodone		ssd_mapblocksize_iodone
934 #define	sd_checksum_iodone		ssd_checksum_iodone
935 #define	sd_checksum_uscsi_iodone	ssd_checksum_uscsi_iodone
936 #define	sd_pm_iodone			ssd_pm_iodone
937 #define	sd_initpkt_for_buf		ssd_initpkt_for_buf
938 #define	sd_destroypkt_for_buf		ssd_destroypkt_for_buf
939 #define	sd_setup_rw_pkt			ssd_setup_rw_pkt
940 #define	sd_setup_next_rw_pkt		ssd_setup_next_rw_pkt
941 #define	sd_buf_iodone			ssd_buf_iodone
942 #define	sd_uscsi_strategy		ssd_uscsi_strategy
943 #define	sd_initpkt_for_uscsi		ssd_initpkt_for_uscsi
944 #define	sd_destroypkt_for_uscsi		ssd_destroypkt_for_uscsi
945 #define	sd_uscsi_iodone			ssd_uscsi_iodone
946 #define	sd_xbuf_strategy		ssd_xbuf_strategy
947 #define	sd_xbuf_init			ssd_xbuf_init
948 #define	sd_pm_entry			ssd_pm_entry
949 #define	sd_pm_exit			ssd_pm_exit
950 
951 #define	sd_pm_idletimeout_handler	ssd_pm_idletimeout_handler
952 #define	sd_pm_timeout_handler		ssd_pm_timeout_handler
953 
954 #define	sd_add_buf_to_waitq		ssd_add_buf_to_waitq
955 #define	sdintr				ssdintr
956 #define	sd_start_cmds			ssd_start_cmds
957 #define	sd_send_scsi_cmd		ssd_send_scsi_cmd
958 #define	sd_bioclone_alloc		ssd_bioclone_alloc
959 #define	sd_bioclone_free		ssd_bioclone_free
960 #define	sd_shadow_buf_alloc		ssd_shadow_buf_alloc
961 #define	sd_shadow_buf_free		ssd_shadow_buf_free
962 #define	sd_print_transport_rejected_message	\
963 					ssd_print_transport_rejected_message
964 #define	sd_retry_command		ssd_retry_command
965 #define	sd_set_retry_bp			ssd_set_retry_bp
966 #define	sd_send_request_sense_command	ssd_send_request_sense_command
967 #define	sd_start_retry_command		ssd_start_retry_command
968 #define	sd_start_direct_priority_command	\
969 					ssd_start_direct_priority_command
970 #define	sd_return_failed_command	ssd_return_failed_command
971 #define	sd_return_failed_command_no_restart	\
972 					ssd_return_failed_command_no_restart
973 #define	sd_return_command		ssd_return_command
974 #define	sd_sync_with_callback		ssd_sync_with_callback
975 #define	sdrunout			ssdrunout
976 #define	sd_mark_rqs_busy		ssd_mark_rqs_busy
977 #define	sd_mark_rqs_idle		ssd_mark_rqs_idle
978 #define	sd_reduce_throttle		ssd_reduce_throttle
979 #define	sd_restore_throttle		ssd_restore_throttle
980 #define	sd_print_incomplete_msg		ssd_print_incomplete_msg
981 #define	sd_init_cdb_limits		ssd_init_cdb_limits
982 #define	sd_pkt_status_good		ssd_pkt_status_good
983 #define	sd_pkt_status_check_condition	ssd_pkt_status_check_condition
984 #define	sd_pkt_status_busy		ssd_pkt_status_busy
985 #define	sd_pkt_status_reservation_conflict	\
986 					ssd_pkt_status_reservation_conflict
987 #define	sd_pkt_status_qfull		ssd_pkt_status_qfull
988 #define	sd_handle_request_sense		ssd_handle_request_sense
989 #define	sd_handle_auto_request_sense	ssd_handle_auto_request_sense
990 #define	sd_print_sense_failed_msg	ssd_print_sense_failed_msg
991 #define	sd_validate_sense_data		ssd_validate_sense_data
992 #define	sd_decode_sense			ssd_decode_sense
993 #define	sd_print_sense_msg		ssd_print_sense_msg
994 #define	sd_sense_key_no_sense		ssd_sense_key_no_sense
995 #define	sd_sense_key_recoverable_error	ssd_sense_key_recoverable_error
996 #define	sd_sense_key_not_ready		ssd_sense_key_not_ready
997 #define	sd_sense_key_medium_or_hardware_error	\
998 					ssd_sense_key_medium_or_hardware_error
999 #define	sd_sense_key_illegal_request	ssd_sense_key_illegal_request
1000 #define	sd_sense_key_unit_attention	ssd_sense_key_unit_attention
1001 #define	sd_sense_key_fail_command	ssd_sense_key_fail_command
1002 #define	sd_sense_key_blank_check	ssd_sense_key_blank_check
1003 #define	sd_sense_key_aborted_command	ssd_sense_key_aborted_command
1004 #define	sd_sense_key_default		ssd_sense_key_default
1005 #define	sd_print_retry_msg		ssd_print_retry_msg
1006 #define	sd_print_cmd_incomplete_msg	ssd_print_cmd_incomplete_msg
1007 #define	sd_pkt_reason_cmd_incomplete	ssd_pkt_reason_cmd_incomplete
1008 #define	sd_pkt_reason_cmd_tran_err	ssd_pkt_reason_cmd_tran_err
1009 #define	sd_pkt_reason_cmd_reset		ssd_pkt_reason_cmd_reset
1010 #define	sd_pkt_reason_cmd_aborted	ssd_pkt_reason_cmd_aborted
1011 #define	sd_pkt_reason_cmd_timeout	ssd_pkt_reason_cmd_timeout
1012 #define	sd_pkt_reason_cmd_unx_bus_free	ssd_pkt_reason_cmd_unx_bus_free
1013 #define	sd_pkt_reason_cmd_tag_reject	ssd_pkt_reason_cmd_tag_reject
1014 #define	sd_pkt_reason_default		ssd_pkt_reason_default
1015 #define	sd_reset_target			ssd_reset_target
1016 #define	sd_start_stop_unit_callback	ssd_start_stop_unit_callback
1017 #define	sd_start_stop_unit_task		ssd_start_stop_unit_task
1018 #define	sd_taskq_create			ssd_taskq_create
1019 #define	sd_taskq_delete			ssd_taskq_delete
1020 #define	sd_target_change_task		ssd_target_change_task
1021 #define	sd_log_dev_status_event		ssd_log_dev_status_event
1022 #define	sd_log_lun_expansion_event	ssd_log_lun_expansion_event
1023 #define	sd_log_eject_request_event	ssd_log_eject_request_event
1024 #define	sd_media_change_task		ssd_media_change_task
1025 #define	sd_handle_mchange		ssd_handle_mchange
1026 #define	sd_send_scsi_DOORLOCK		ssd_send_scsi_DOORLOCK
1027 #define	sd_send_scsi_READ_CAPACITY	ssd_send_scsi_READ_CAPACITY
1028 #define	sd_send_scsi_READ_CAPACITY_16	ssd_send_scsi_READ_CAPACITY_16
1029 #define	sd_send_scsi_GET_CONFIGURATION	ssd_send_scsi_GET_CONFIGURATION
1030 #define	sd_send_scsi_feature_GET_CONFIGURATION	\
1031 					sd_send_scsi_feature_GET_CONFIGURATION
1032 #define	sd_send_scsi_START_STOP_UNIT	ssd_send_scsi_START_STOP_UNIT
1033 #define	sd_send_scsi_INQUIRY		ssd_send_scsi_INQUIRY
1034 #define	sd_send_scsi_TEST_UNIT_READY	ssd_send_scsi_TEST_UNIT_READY
1035 #define	sd_send_scsi_PERSISTENT_RESERVE_IN	\
1036 					ssd_send_scsi_PERSISTENT_RESERVE_IN
1037 #define	sd_send_scsi_PERSISTENT_RESERVE_OUT	\
1038 					ssd_send_scsi_PERSISTENT_RESERVE_OUT
1039 #define	sd_send_scsi_SYNCHRONIZE_CACHE	ssd_send_scsi_SYNCHRONIZE_CACHE
1040 #define	sd_send_scsi_SYNCHRONIZE_CACHE_biodone	\
1041 					ssd_send_scsi_SYNCHRONIZE_CACHE_biodone
1042 #define	sd_send_scsi_MODE_SENSE		ssd_send_scsi_MODE_SENSE
1043 #define	sd_send_scsi_MODE_SELECT	ssd_send_scsi_MODE_SELECT
1044 #define	sd_send_scsi_RDWR		ssd_send_scsi_RDWR
1045 #define	sd_send_scsi_LOG_SENSE		ssd_send_scsi_LOG_SENSE
1046 #define	sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION	\
1047 				ssd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
1048 #define	sd_gesn_media_data_valid	ssd_gesn_media_data_valid
1049 #define	sd_alloc_rqs			ssd_alloc_rqs
1050 #define	sd_free_rqs			ssd_free_rqs
1051 #define	sd_dump_memory			ssd_dump_memory
1052 #define	sd_get_media_info_com		ssd_get_media_info_com
1053 #define	sd_get_media_info		ssd_get_media_info
1054 #define	sd_get_media_info_ext		ssd_get_media_info_ext
1055 #define	sd_dkio_ctrl_info		ssd_dkio_ctrl_info
1056 #define	sd_nvpair_str_decode		ssd_nvpair_str_decode
1057 #define	sd_strtok_r			ssd_strtok_r
1058 #define	sd_set_properties		ssd_set_properties
1059 #define	sd_get_tunables_from_conf	ssd_get_tunables_from_conf
1060 #define	sd_setup_next_xfer		ssd_setup_next_xfer
1061 #define	sd_dkio_get_temp		ssd_dkio_get_temp
1062 #define	sd_check_mhd			ssd_check_mhd
1063 #define	sd_mhd_watch_cb			ssd_mhd_watch_cb
1064 #define	sd_mhd_watch_incomplete		ssd_mhd_watch_incomplete
1065 #define	sd_sname			ssd_sname
1066 #define	sd_mhd_resvd_recover		ssd_mhd_resvd_recover
1067 #define	sd_resv_reclaim_thread		ssd_resv_reclaim_thread
1068 #define	sd_take_ownership		ssd_take_ownership
1069 #define	sd_reserve_release		ssd_reserve_release
1070 #define	sd_rmv_resv_reclaim_req		ssd_rmv_resv_reclaim_req
1071 #define	sd_mhd_reset_notify_cb		ssd_mhd_reset_notify_cb
1072 #define	sd_persistent_reservation_in_read_keys	\
1073 					ssd_persistent_reservation_in_read_keys
1074 #define	sd_persistent_reservation_in_read_resv	\
1075 					ssd_persistent_reservation_in_read_resv
1076 #define	sd_mhdioc_takeown		ssd_mhdioc_takeown
1077 #define	sd_mhdioc_failfast		ssd_mhdioc_failfast
1078 #define	sd_mhdioc_release		ssd_mhdioc_release
1079 #define	sd_mhdioc_register_devid	ssd_mhdioc_register_devid
1080 #define	sd_mhdioc_inkeys		ssd_mhdioc_inkeys
1081 #define	sd_mhdioc_inresv		ssd_mhdioc_inresv
1082 #define	sr_change_blkmode		ssr_change_blkmode
1083 #define	sr_change_speed			ssr_change_speed
1084 #define	sr_atapi_change_speed		ssr_atapi_change_speed
1085 #define	sr_pause_resume			ssr_pause_resume
1086 #define	sr_play_msf			ssr_play_msf
1087 #define	sr_play_trkind			ssr_play_trkind
1088 #define	sr_read_all_subcodes		ssr_read_all_subcodes
1089 #define	sr_read_subchannel		ssr_read_subchannel
1090 #define	sr_read_tocentry		ssr_read_tocentry
1091 #define	sr_read_tochdr			ssr_read_tochdr
1092 #define	sr_read_cdda			ssr_read_cdda
1093 #define	sr_read_cdxa			ssr_read_cdxa
1094 #define	sr_read_mode1			ssr_read_mode1
1095 #define	sr_read_mode2			ssr_read_mode2
1096 #define	sr_read_cd_mode2		ssr_read_cd_mode2
1097 #define	sr_sector_mode			ssr_sector_mode
1098 #define	sr_eject			ssr_eject
1099 #define	sr_ejected			ssr_ejected
1100 #define	sr_check_wp			ssr_check_wp
1101 #define	sd_watch_request_submit		ssd_watch_request_submit
1102 #define	sd_check_media			ssd_check_media
1103 #define	sd_media_watch_cb		ssd_media_watch_cb
1104 #define	sd_delayed_cv_broadcast		ssd_delayed_cv_broadcast
1105 #define	sr_volume_ctrl			ssr_volume_ctrl
1106 #define	sr_read_sony_session_offset	ssr_read_sony_session_offset
1107 #define	sd_log_page_supported		ssd_log_page_supported
1108 #define	sd_check_for_writable_cd	ssd_check_for_writable_cd
1109 #define	sd_wm_cache_constructor		ssd_wm_cache_constructor
1110 #define	sd_wm_cache_destructor		ssd_wm_cache_destructor
1111 #define	sd_range_lock			ssd_range_lock
1112 #define	sd_get_range			ssd_get_range
1113 #define	sd_free_inlist_wmap		ssd_free_inlist_wmap
1114 #define	sd_range_unlock			ssd_range_unlock
1115 #define	sd_read_modify_write_task	ssd_read_modify_write_task
1116 #define	sddump_do_read_of_rmw		ssddump_do_read_of_rmw
1117 
1118 #define	sd_iostart_chain		ssd_iostart_chain
1119 #define	sd_iodone_chain			ssd_iodone_chain
1120 #define	sd_initpkt_map			ssd_initpkt_map
1121 #define	sd_destroypkt_map		ssd_destroypkt_map
1122 #define	sd_chain_type_map		ssd_chain_type_map
1123 #define	sd_chain_index_map		ssd_chain_index_map
1124 
1125 #define	sd_failfast_flushctl		ssd_failfast_flushctl
1126 #define	sd_failfast_flushq		ssd_failfast_flushq
1127 #define	sd_failfast_flushq_callback	ssd_failfast_flushq_callback
1128 
1129 #define	sd_is_lsi			ssd_is_lsi
1130 #define	sd_tg_rdwr			ssd_tg_rdwr
1131 #define	sd_tg_getinfo			ssd_tg_getinfo
1132 #define	sd_rmw_msg_print_handler	ssd_rmw_msg_print_handler
1133 
1134 #endif	/* #if (defined(__fibre)) */
1135 
1136 typedef struct unmap_param_hdr_s {
1137 	uint16_t	uph_data_len;
1138 	uint16_t	uph_descr_data_len;
1139 	uint32_t	uph_reserved;
1140 } unmap_param_hdr_t;
1141 
1142 typedef struct unmap_blk_descr_s {
1143 	uint64_t	ubd_lba;
1144 	uint32_t	ubd_lba_cnt;
1145 	uint32_t	ubd_reserved;
1146 } unmap_blk_descr_t;
1147 
1148 /* Max number of block descriptors in UNMAP command */
1149 #define	SD_UNMAP_MAX_DESCR \
1150 	((UINT16_MAX - sizeof (unmap_param_hdr_t)) / sizeof (unmap_blk_descr_t))
1151 /* Max size of the UNMAP parameter list in bytes */
1152 #define	SD_UNMAP_PARAM_LIST_MAXSZ	(sizeof (unmap_param_hdr_t) + \
1153 	SD_UNMAP_MAX_DESCR * sizeof (unmap_blk_descr_t))
1154 
1155 int _init(void);
1156 int _fini(void);
1157 int _info(struct modinfo *modinfop);
1158 
1159 /*PRINTFLIKE3*/
1160 static void sd_log_trace(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1161 /*PRINTFLIKE3*/
1162 static void sd_log_info(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1163 /*PRINTFLIKE3*/
1164 static void sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...);
1165 
1166 static int sdprobe(dev_info_t *devi);
1167 static int sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg,
1168     void **result);
1169 static int sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op,
1170     int mod_flags, char *name, caddr_t valuep, int *lengthp);
1171 
1172 /*
1173  * Smart probe for parallel scsi
1174  */
1175 static void sd_scsi_probe_cache_init(void);
1176 static void sd_scsi_probe_cache_fini(void);
1177 static void sd_scsi_clear_probe_cache(void);
1178 static int  sd_scsi_probe_with_cache(struct scsi_device *devp, int (*fn)());
1179 
1180 /*
1181  * Attached luns on target for parallel scsi
1182  */
1183 static void sd_scsi_target_lun_init(void);
1184 static void sd_scsi_target_lun_fini(void);
1185 static int  sd_scsi_get_target_lun_count(dev_info_t *dip, int target);
1186 static void sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag);
1187 
1188 static int sd_spin_up_unit(sd_ssc_t *ssc);
1189 
1190 /*
1191  * Using sd_ssc_init to establish sd_ssc_t struct
1192  * Using sd_ssc_send to send uscsi internal command
1193  * Using sd_ssc_fini to free sd_ssc_t struct
1194  */
1195 static sd_ssc_t *sd_ssc_init(struct sd_lun *un);
1196 static int sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd,
1197     int flag, enum uio_seg dataspace, int path_flag);
1198 static void sd_ssc_fini(sd_ssc_t *ssc);
1199 
1200 /*
1201  * Using sd_ssc_assessment to set correct type-of-assessment
1202  * Using sd_ssc_post to post ereport & system log
1203  *       sd_ssc_post will call sd_ssc_print to print system log
1204  *       sd_ssc_post will call sd_ssd_ereport_post to post ereport
1205  */
1206 static void sd_ssc_assessment(sd_ssc_t *ssc,
1207     enum sd_type_assessment tp_assess);
1208 
1209 static void sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess);
1210 static void sd_ssc_print(sd_ssc_t *ssc, int sd_severity);
1211 static void sd_ssc_ereport_post(sd_ssc_t *ssc,
1212     enum sd_driver_assessment drv_assess);
1213 
1214 /*
1215  * Using sd_ssc_set_info to mark an un-decodable-data error.
1216  * Using sd_ssc_extract_info to transfer information from internal
1217  *       data structures to sd_ssc_t.
1218  */
1219 static void sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp,
1220     const char *fmt, ...);
1221 static void sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un,
1222     struct scsi_pkt *pktp, struct buf *bp, struct sd_xbuf *xp);
1223 
1224 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1225     enum uio_seg dataspace, int path_flag);
1226 
1227 #ifdef _LP64
1228 static void	sd_enable_descr_sense(sd_ssc_t *ssc);
1229 static void	sd_reenable_dsense_task(void *arg);
1230 #endif /* _LP64 */
1231 
1232 static void	sd_set_mmc_caps(sd_ssc_t *ssc);
1233 
1234 static void sd_read_unit_properties(struct sd_lun *un);
1235 static int  sd_process_sdconf_file(struct sd_lun *un);
1236 static void sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str);
1237 static char *sd_strtok_r(char *string, const char *sepset, char **lasts);
1238 static void sd_set_properties(struct sd_lun *un, char *name, char *value);
1239 static void sd_get_tunables_from_conf(struct sd_lun *un, int flags,
1240     int *data_list, sd_tunables *values);
1241 static void sd_process_sdconf_table(struct sd_lun *un);
1242 static int  sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen);
1243 static int  sd_blank_cmp(struct sd_lun *un, char *id, int idlen);
1244 static int  sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
1245     int list_len, char *dataname_ptr);
1246 static void sd_set_vers1_properties(struct sd_lun *un, int flags,
1247     sd_tunables *prop_list);
1248 
1249 static void sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi,
1250     int reservation_flag);
1251 static int  sd_get_devid(sd_ssc_t *ssc);
1252 static ddi_devid_t sd_create_devid(sd_ssc_t *ssc);
1253 static int  sd_write_deviceid(sd_ssc_t *ssc);
1254 static int  sd_check_vpd_page_support(sd_ssc_t *ssc);
1255 
1256 static void sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi);
1257 static void sd_create_pm_components(dev_info_t *devi, struct sd_lun *un);
1258 
1259 static int  sd_ddi_suspend(dev_info_t *devi);
1260 static int  sd_ddi_resume(dev_info_t *devi);
1261 static int  sd_pm_state_change(struct sd_lun *un, int level, int flag);
1262 static int  sdpower(dev_info_t *devi, int component, int level);
1263 
1264 static int  sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd);
1265 static int  sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd);
1266 static int  sd_unit_attach(dev_info_t *devi);
1267 static int  sd_unit_detach(dev_info_t *devi);
1268 
1269 static void sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi);
1270 static void sd_create_errstats(struct sd_lun *un, int instance);
1271 static void sd_set_errstats(struct sd_lun *un);
1272 static void sd_set_pstats(struct sd_lun *un);
1273 
1274 static int  sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk);
1275 static int  sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pkt);
1276 static int  sd_send_polled_RQS(struct sd_lun *un);
1277 static int  sd_ddi_scsi_poll(struct scsi_pkt *pkt);
1278 
1279 #if (defined(__fibre))
1280 /*
1281  * Event callbacks (photon)
1282  */
1283 static void sd_init_event_callbacks(struct sd_lun *un);
1284 static void  sd_event_callback(dev_info_t *, ddi_eventcookie_t, void *, void *);
1285 #endif
1286 
1287 /*
1288  * Defines for sd_cache_control
1289  */
1290 
1291 #define	SD_CACHE_ENABLE		1
1292 #define	SD_CACHE_DISABLE	0
1293 #define	SD_CACHE_NOCHANGE	-1
1294 
1295 static int   sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag);
1296 static int   sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled);
1297 static void  sd_get_write_cache_changeable(sd_ssc_t *ssc, int *is_changeable);
1298 static void  sd_get_nv_sup(sd_ssc_t *ssc);
1299 static dev_t sd_make_device(dev_info_t *devi);
1300 static void  sd_check_bdc_vpd(sd_ssc_t *ssc);
1301 static void  sd_check_emulation_mode(sd_ssc_t *ssc);
1302 static void  sd_update_block_info(struct sd_lun *un, uint32_t lbasize,
1303     uint64_t capacity);
1304 
1305 /*
1306  * Driver entry point functions.
1307  */
1308 static int  sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p);
1309 static int  sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p);
1310 static int  sd_ready_and_valid(sd_ssc_t *ssc, int part);
1311 
1312 static void sdmin(struct buf *bp);
1313 static int sdread(dev_t dev, struct uio *uio, cred_t *cred_p);
1314 static int sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p);
1315 static int sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1316 static int sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p);
1317 
1318 static int sdstrategy(struct buf *bp);
1319 static int sdioctl(dev_t, int, intptr_t, int, cred_t *, int *);
1320 
1321 /*
1322  * Function prototypes for layering functions in the iostart chain.
1323  */
1324 static void sd_mapblockaddr_iostart(int index, struct sd_lun *un,
1325     struct buf *bp);
1326 static void sd_mapblocksize_iostart(int index, struct sd_lun *un,
1327     struct buf *bp);
1328 static void sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp);
1329 static void sd_checksum_uscsi_iostart(int index, struct sd_lun *un,
1330     struct buf *bp);
1331 static void sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp);
1332 static void sd_core_iostart(int index, struct sd_lun *un, struct buf *bp);
1333 
1334 /*
1335  * Function prototypes for layering functions in the iodone chain.
1336  */
1337 static void sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp);
1338 static void sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp);
1339 static void sd_mapblockaddr_iodone(int index, struct sd_lun *un,
1340     struct buf *bp);
1341 static void sd_mapblocksize_iodone(int index, struct sd_lun *un,
1342     struct buf *bp);
1343 static void sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp);
1344 static void sd_checksum_uscsi_iodone(int index, struct sd_lun *un,
1345     struct buf *bp);
1346 static void sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp);
1347 
1348 /*
1349  * Prototypes for functions to support buf(9S) based IO.
1350  */
1351 static void sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg);
1352 static int sd_initpkt_for_buf(struct buf *, struct scsi_pkt **);
1353 static void sd_destroypkt_for_buf(struct buf *);
1354 static int sd_setup_rw_pkt(struct sd_lun *un, struct scsi_pkt **pktpp,
1355     struct buf *bp, int flags,
1356     int (*callback)(caddr_t), caddr_t callback_arg,
1357     diskaddr_t lba, uint32_t blockcount);
1358 static int sd_setup_next_rw_pkt(struct sd_lun *un, struct scsi_pkt *pktp,
1359     struct buf *bp, diskaddr_t lba, uint32_t blockcount);
1360 
1361 /*
1362  * Prototypes for functions to support USCSI IO.
1363  */
1364 static int sd_uscsi_strategy(struct buf *bp);
1365 static int sd_initpkt_for_uscsi(struct buf *, struct scsi_pkt **);
1366 static void sd_destroypkt_for_uscsi(struct buf *);
1367 
1368 static void sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
1369     uchar_t chain_type, void *pktinfop);
1370 
1371 static int  sd_pm_entry(struct sd_lun *un);
1372 static void sd_pm_exit(struct sd_lun *un);
1373 
1374 static void sd_pm_idletimeout_handler(void *arg);
1375 
1376 /*
1377  * sd_core internal functions (used at the sd_core_io layer).
1378  */
1379 static void sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp);
1380 static void sdintr(struct scsi_pkt *pktp);
1381 static void sd_start_cmds(struct sd_lun *un, struct buf *immed_bp);
1382 
1383 static int sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
1384     enum uio_seg dataspace, int path_flag);
1385 
1386 static struct buf *sd_bioclone_alloc(struct buf *bp, size_t datalen,
1387     daddr_t blkno, int (*func)(struct buf *));
1388 static struct buf *sd_shadow_buf_alloc(struct buf *bp, size_t datalen,
1389     uint_t bflags, daddr_t blkno, int (*func)(struct buf *));
1390 static void sd_bioclone_free(struct buf *bp);
1391 static void sd_shadow_buf_free(struct buf *bp);
1392 
1393 static void sd_print_transport_rejected_message(struct sd_lun *un,
1394     struct sd_xbuf *xp, int code);
1395 static void sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp,
1396     void *arg, int code);
1397 static void sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp,
1398     void *arg, int code);
1399 static void sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp,
1400     void *arg, int code);
1401 
1402 static void sd_retry_command(struct sd_lun *un, struct buf *bp,
1403     int retry_check_flag,
1404     void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int c),
1405     void *user_arg, int failure_code,  clock_t retry_delay,
1406     void (*statp)(kstat_io_t *));
1407 
1408 static void sd_set_retry_bp(struct sd_lun *un, struct buf *bp,
1409     clock_t retry_delay, void (*statp)(kstat_io_t *));
1410 
1411 static void sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
1412     struct scsi_pkt *pktp);
1413 static void sd_start_retry_command(void *arg);
1414 static void sd_start_direct_priority_command(void *arg);
1415 static void sd_return_failed_command(struct sd_lun *un, struct buf *bp,
1416     int errcode);
1417 static void sd_return_failed_command_no_restart(struct sd_lun *un,
1418     struct buf *bp, int errcode);
1419 static void sd_return_command(struct sd_lun *un, struct buf *bp);
1420 static void sd_sync_with_callback(struct sd_lun *un);
1421 static int sdrunout(caddr_t arg);
1422 
1423 static void sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp);
1424 static struct buf *sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *xp);
1425 
1426 static void sd_reduce_throttle(struct sd_lun *un, int throttle_type);
1427 static void sd_restore_throttle(void *arg);
1428 
1429 static void sd_init_cdb_limits(struct sd_lun *un);
1430 
1431 static void sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
1432     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1433 
1434 /*
1435  * Error handling functions
1436  */
1437 static void sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
1438     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1439 static void sd_pkt_status_busy(struct sd_lun *un, struct buf *bp,
1440     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1441 static void sd_pkt_status_reservation_conflict(struct sd_lun *un,
1442     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1443 static void sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp,
1444     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1445 
1446 static void sd_handle_request_sense(struct sd_lun *un, struct buf *bp,
1447     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1448 static void sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
1449     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1450 static int sd_validate_sense_data(struct sd_lun *un, struct buf *bp,
1451     struct sd_xbuf *xp, size_t actual_len);
1452 static void sd_decode_sense(struct sd_lun *un, struct buf *bp,
1453     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1454 
1455 static void sd_print_sense_msg(struct sd_lun *un, struct buf *bp,
1456     void *arg, int code);
1457 
1458 static void sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp,
1459     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1460 static void sd_sense_key_recoverable_error(struct sd_lun *un,
1461     uint8_t *sense_datap,
1462     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1463 static void sd_sense_key_not_ready(struct sd_lun *un,
1464     uint8_t *sense_datap,
1465     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1466 static void sd_sense_key_medium_or_hardware_error(struct sd_lun *un,
1467     uint8_t *sense_datap,
1468     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1469 static void sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
1470     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1471 static void sd_sense_key_unit_attention(struct sd_lun *un,
1472     uint8_t *sense_datap,
1473     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1474 static void sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp,
1475     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1476 static void sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp,
1477     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1478 static void sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
1479     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1480 static void sd_sense_key_default(struct sd_lun *un,
1481     uint8_t *sense_datap,
1482     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp);
1483 
1484 static void sd_print_retry_msg(struct sd_lun *un, struct buf *bp,
1485     void *arg, int flag);
1486 
1487 static void sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
1488     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1489 static void sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
1490     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1491 static void sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp,
1492     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1493 static void sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp,
1494     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1495 static void sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp,
1496     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1497 static void sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
1498     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1499 static void sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
1500     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1501 static void sd_pkt_reason_default(struct sd_lun *un, struct buf *bp,
1502     struct sd_xbuf *xp, struct scsi_pkt *pktp);
1503 
1504 static void sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp);
1505 
1506 static void sd_start_stop_unit_callback(void *arg);
1507 static void sd_start_stop_unit_task(void *arg);
1508 
1509 static void sd_taskq_create(void);
1510 static void sd_taskq_delete(void);
1511 static void sd_target_change_task(void *arg);
1512 static void sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag);
1513 static void sd_log_lun_expansion_event(struct sd_lun *un, int km_flag);
1514 static void sd_log_eject_request_event(struct sd_lun *un, int km_flag);
1515 static void sd_media_change_task(void *arg);
1516 
1517 static int sd_handle_mchange(struct sd_lun *un);
1518 static int sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag);
1519 static int sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp,
1520     uint32_t *lbap, int path_flag);
1521 static int sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp,
1522     uint32_t *lbap, uint32_t *psp, int path_flag);
1523 static int sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag,
1524     int flag, int path_flag);
1525 static int sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr,
1526     size_t buflen, uchar_t evpd, uchar_t page_code, size_t *residp);
1527 static int sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag);
1528 static int sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc,
1529     uchar_t usr_cmd, uint16_t data_len, uchar_t *data_bufp);
1530 static int sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc,
1531     uchar_t usr_cmd, uchar_t *usr_bufp);
1532 static int sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un,
1533     struct dk_callback *dkc);
1534 static int sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp);
1535 static int sd_send_scsi_UNMAP(dev_t dev, sd_ssc_t *ssc, dkioc_free_list_t *dfl,
1536     int flag);
1537 static int sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc,
1538     struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1539     uchar_t *bufaddr, uint_t buflen, int path_flag);
1540 static int sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc,
1541     struct uscsi_cmd *ucmdbuf, uchar_t *rqbuf, uint_t rqbuflen,
1542     uchar_t *bufaddr, uint_t buflen, char feature, int path_flag);
1543 static int sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize,
1544     uchar_t *bufaddr, size_t buflen, uchar_t page_code, int path_flag);
1545 static int sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize,
1546     uchar_t *bufaddr, size_t buflen, uchar_t save_page, int path_flag);
1547 static int sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
1548     size_t buflen, daddr_t start_block, int path_flag);
1549 #define	sd_send_scsi_READ(ssc, bufaddr, buflen, start_block, path_flag)	\
1550     sd_send_scsi_RDWR(ssc, SCMD_READ, bufaddr, buflen, start_block, \
1551     path_flag)
1552 #define	sd_send_scsi_WRITE(ssc, bufaddr, buflen, start_block, path_flag)\
1553     sd_send_scsi_RDWR(ssc, SCMD_WRITE, bufaddr, buflen, start_block,\
1554     path_flag)
1555 
1556 static int sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr,
1557     uint16_t buflen, uchar_t page_code, uchar_t page_control,
1558     uint16_t param_ptr, int path_flag);
1559 static int sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc,
1560     uchar_t *bufaddr, size_t buflen, uchar_t class_req);
1561 static boolean_t sd_gesn_media_data_valid(uchar_t *data);
1562 
1563 static int  sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un);
1564 static void sd_free_rqs(struct sd_lun *un);
1565 
1566 static void sd_dump_memory(struct sd_lun *un, uint_t comp, char *title,
1567     uchar_t *data, int len, int fmt);
1568 static void sd_panic_for_res_conflict(struct sd_lun *un);
1569 
1570 /*
1571  * Disk Ioctl Function Prototypes
1572  */
1573 static int sd_get_media_info(dev_t dev, caddr_t arg, int flag);
1574 static int sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag);
1575 static int sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag);
1576 static int sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag);
1577 
1578 /*
1579  * Multi-host Ioctl Prototypes
1580  */
1581 static int sd_check_mhd(dev_t dev, int interval);
1582 static int sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1583 static void sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt);
1584 static char *sd_sname(uchar_t status);
1585 static void sd_mhd_resvd_recover(void *arg);
1586 static void sd_resv_reclaim_thread();
1587 static int sd_take_ownership(dev_t dev, struct mhioctkown *p);
1588 static int sd_reserve_release(dev_t dev, int cmd);
1589 static void sd_rmv_resv_reclaim_req(dev_t dev);
1590 static void sd_mhd_reset_notify_cb(caddr_t arg);
1591 static int sd_persistent_reservation_in_read_keys(struct sd_lun *un,
1592     mhioc_inkeys_t *usrp, int flag);
1593 static int sd_persistent_reservation_in_read_resv(struct sd_lun *un,
1594     mhioc_inresvs_t *usrp, int flag);
1595 static int sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag);
1596 static int sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag);
1597 static int sd_mhdioc_release(dev_t dev);
1598 static int sd_mhdioc_register_devid(dev_t dev);
1599 static int sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag);
1600 static int sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag);
1601 
1602 /*
1603  * SCSI removable prototypes
1604  */
1605 static int sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag);
1606 static int sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1607 static int sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag);
1608 static int sr_pause_resume(dev_t dev, int mode);
1609 static int sr_play_msf(dev_t dev, caddr_t data, int flag);
1610 static int sr_play_trkind(dev_t dev, caddr_t data, int flag);
1611 static int sr_read_all_subcodes(dev_t dev, caddr_t data, int flag);
1612 static int sr_read_subchannel(dev_t dev, caddr_t data, int flag);
1613 static int sr_read_tocentry(dev_t dev, caddr_t data, int flag);
1614 static int sr_read_tochdr(dev_t dev, caddr_t data, int flag);
1615 static int sr_read_cdda(dev_t dev, caddr_t data, int flag);
1616 static int sr_read_cdxa(dev_t dev, caddr_t data, int flag);
1617 static int sr_read_mode1(dev_t dev, caddr_t data, int flag);
1618 static int sr_read_mode2(dev_t dev, caddr_t data, int flag);
1619 static int sr_read_cd_mode2(dev_t dev, caddr_t data, int flag);
1620 static int sr_sector_mode(dev_t dev, uint32_t blksize);
1621 static int sr_eject(dev_t dev);
1622 static void sr_ejected(register struct sd_lun *un);
1623 static int sr_check_wp(dev_t dev);
1624 static opaque_t sd_watch_request_submit(struct sd_lun *un);
1625 static int sd_check_media(dev_t dev, enum dkio_state state);
1626 static int sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp);
1627 static void sd_delayed_cv_broadcast(void *arg);
1628 static int sr_volume_ctrl(dev_t dev, caddr_t data, int flag);
1629 static int sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag);
1630 
1631 static int sd_log_page_supported(sd_ssc_t *ssc, int log_page);
1632 
1633 /*
1634  * Function Prototype for the non-512 support (DVDRAM, MO etc.) functions.
1635  */
1636 static void sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag);
1637 static int sd_wm_cache_constructor(void *wm, void *un, int flags);
1638 static void sd_wm_cache_destructor(void *wm, void *un);
1639 static struct sd_w_map *sd_range_lock(struct sd_lun *un, daddr_t startb,
1640     daddr_t endb, ushort_t typ);
1641 static struct sd_w_map *sd_get_range(struct sd_lun *un, daddr_t startb,
1642     daddr_t endb);
1643 static void sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp);
1644 static void sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm);
1645 static void sd_read_modify_write_task(void * arg);
1646 static int
1647 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
1648     struct buf **bpp);
1649 
1650 
1651 /*
1652  * Function prototypes for failfast support.
1653  */
1654 static void sd_failfast_flushq(struct sd_lun *un);
1655 static int sd_failfast_flushq_callback(struct buf *bp);
1656 
1657 /*
1658  * Function prototypes to check for lsi devices
1659  */
1660 static void sd_is_lsi(struct sd_lun *un);
1661 
1662 /*
1663  * Function prototypes for partial DMA support
1664  */
1665 static int sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
1666 		struct scsi_pkt *pkt, struct sd_xbuf *xp);
1667 
1668 
1669 /* Function prototypes for cmlb */
1670 static int sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
1671     diskaddr_t start_block, size_t reqlength, void *tg_cookie);
1672 
1673 static int sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie);
1674 
1675 /*
1676  * For printing RMW warning message timely
1677  */
1678 static void sd_rmw_msg_print_handler(void *arg);
1679 
1680 /*
1681  * Constants for failfast support:
1682  *
1683  * SD_FAILFAST_INACTIVE: Instance is currently in a normal state, with NO
1684  * failfast processing being performed.
1685  *
1686  * SD_FAILFAST_ACTIVE: Instance is in the failfast state and is performing
1687  * failfast processing on all bufs with B_FAILFAST set.
1688  */
1689 
1690 #define	SD_FAILFAST_INACTIVE		0
1691 #define	SD_FAILFAST_ACTIVE		1
1692 
1693 /*
1694  * Bitmask to control behavior of buf(9S) flushes when a transition to
1695  * the failfast state occurs. Optional bits include:
1696  *
1697  * SD_FAILFAST_FLUSH_ALL_BUFS: When set, flush ALL bufs including those that
1698  * do NOT have B_FAILFAST set. When clear, only bufs with B_FAILFAST will
1699  * be flushed.
1700  *
1701  * SD_FAILFAST_FLUSH_ALL_QUEUES: When set, flush any/all other queues in the
1702  * driver, in addition to the regular wait queue. This includes the xbuf
1703  * queues. When clear, only the driver's wait queue will be flushed.
1704  */
1705 #define	SD_FAILFAST_FLUSH_ALL_BUFS	0x01
1706 #define	SD_FAILFAST_FLUSH_ALL_QUEUES	0x02
1707 
1708 /*
1709  * The default behavior is to only flush bufs that have B_FAILFAST set, but
1710  * to flush all queues within the driver.
1711  */
1712 static int sd_failfast_flushctl = SD_FAILFAST_FLUSH_ALL_QUEUES;
1713 
1714 
1715 /*
1716  * SD Testing Fault Injection
1717  */
1718 #ifdef SD_FAULT_INJECTION
1719 static void sd_faultinjection_ioctl(int cmd, intptr_t arg, struct sd_lun *un);
1720 static void sd_faultinjection(struct scsi_pkt *pktp);
1721 static void sd_injection_log(char *buf, struct sd_lun *un);
1722 #endif
1723 
1724 /*
1725  * Device driver ops vector
1726  */
1727 static struct cb_ops sd_cb_ops = {
1728 	sdopen,			/* open */
1729 	sdclose,		/* close */
1730 	sdstrategy,		/* strategy */
1731 	nodev,			/* print */
1732 	sddump,			/* dump */
1733 	sdread,			/* read */
1734 	sdwrite,		/* write */
1735 	sdioctl,		/* ioctl */
1736 	nodev,			/* devmap */
1737 	nodev,			/* mmap */
1738 	nodev,			/* segmap */
1739 	nochpoll,		/* poll */
1740 	sd_prop_op,		/* cb_prop_op */
1741 	0,			/* streamtab  */
1742 	D_64BIT | D_MP | D_NEW | D_HOTPLUG, /* Driver compatibility flags */
1743 	CB_REV,			/* cb_rev */
1744 	sdaread,		/* async I/O read entry point */
1745 	sdawrite		/* async I/O write entry point */
1746 };
1747 
1748 struct dev_ops sd_ops = {
1749 	DEVO_REV,		/* devo_rev, */
1750 	0,			/* refcnt  */
1751 	sdinfo,			/* info */
1752 	nulldev,		/* identify */
1753 	sdprobe,		/* probe */
1754 	sdattach,		/* attach */
1755 	sddetach,		/* detach */
1756 	nodev,			/* reset */
1757 	&sd_cb_ops,		/* driver operations */
1758 	NULL,			/* bus operations */
1759 	sdpower,		/* power */
1760 	ddi_quiesce_not_needed,		/* quiesce */
1761 };
1762 
1763 /*
1764  * This is the loadable module wrapper.
1765  */
1766 #include <sys/modctl.h>
1767 
1768 static struct modldrv modldrv = {
1769 	&mod_driverops,		/* Type of module. This one is a driver */
1770 	SD_MODULE_NAME,		/* Module name. */
1771 	&sd_ops			/* driver ops */
1772 };
1773 
1774 static struct modlinkage modlinkage = {
1775 	MODREV_1, &modldrv, NULL
1776 };
1777 
1778 static cmlb_tg_ops_t sd_tgops = {
1779 	TG_DK_OPS_VERSION_1,
1780 	sd_tg_rdwr,
1781 	sd_tg_getinfo
1782 };
1783 
1784 static struct scsi_asq_key_strings sd_additional_codes[] = {
1785 	0x81, 0, "Logical Unit is Reserved",
1786 	0x85, 0, "Audio Address Not Valid",
1787 	0xb6, 0, "Media Load Mechanism Failed",
1788 	0xB9, 0, "Audio Play Operation Aborted",
1789 	0xbf, 0, "Buffer Overflow for Read All Subcodes Command",
1790 	0x53, 2, "Medium removal prevented",
1791 	0x6f, 0, "Authentication failed during key exchange",
1792 	0x6f, 1, "Key not present",
1793 	0x6f, 2, "Key not established",
1794 	0x6f, 3, "Read without proper authentication",
1795 	0x6f, 4, "Mismatched region to this logical unit",
1796 	0x6f, 5, "Region reset count error",
1797 	0xffff, 0x0, NULL
1798 };
1799 
1800 
1801 /*
1802  * Struct for passing printing information for sense data messages
1803  */
1804 struct sd_sense_info {
1805 	int	ssi_severity;
1806 	int	ssi_pfa_flag;
1807 };
1808 
1809 /*
1810  * Table of function pointers for iostart-side routines. Separate "chains"
1811  * of layered function calls are formed by placing the function pointers
1812  * sequentially in the desired order. Functions are called according to an
1813  * incrementing table index ordering. The last function in each chain must
1814  * be sd_core_iostart(). The corresponding iodone-side routines are expected
1815  * in the sd_iodone_chain[] array.
1816  *
1817  * Note: It may seem more natural to organize both the iostart and iodone
1818  * functions together, into an array of structures (or some similar
1819  * organization) with a common index, rather than two separate arrays which
1820  * must be maintained in synchronization. The purpose of this division is
1821  * to achieve improved performance: individual arrays allows for more
1822  * effective cache line utilization on certain platforms.
1823  */
1824 
1825 typedef void (*sd_chain_t)(int index, struct sd_lun *un, struct buf *bp);
1826 
1827 
1828 static sd_chain_t sd_iostart_chain[] = {
1829 
1830 	/* Chain for buf IO for disk drive targets (PM enabled) */
1831 	sd_mapblockaddr_iostart,	/* Index: 0 */
1832 	sd_pm_iostart,			/* Index: 1 */
1833 	sd_core_iostart,		/* Index: 2 */
1834 
1835 	/* Chain for buf IO for disk drive targets (PM disabled) */
1836 	sd_mapblockaddr_iostart,	/* Index: 3 */
1837 	sd_core_iostart,		/* Index: 4 */
1838 
1839 	/*
1840 	 * Chain for buf IO for removable-media or large sector size
1841 	 * disk drive targets with RMW needed (PM enabled)
1842 	 */
1843 	sd_mapblockaddr_iostart,	/* Index: 5 */
1844 	sd_mapblocksize_iostart,	/* Index: 6 */
1845 	sd_pm_iostart,			/* Index: 7 */
1846 	sd_core_iostart,		/* Index: 8 */
1847 
1848 	/*
1849 	 * Chain for buf IO for removable-media or large sector size
1850 	 * disk drive targets with RMW needed (PM disabled)
1851 	 */
1852 	sd_mapblockaddr_iostart,	/* Index: 9 */
1853 	sd_mapblocksize_iostart,	/* Index: 10 */
1854 	sd_core_iostart,		/* Index: 11 */
1855 
1856 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1857 	sd_mapblockaddr_iostart,	/* Index: 12 */
1858 	sd_checksum_iostart,		/* Index: 13 */
1859 	sd_pm_iostart,			/* Index: 14 */
1860 	sd_core_iostart,		/* Index: 15 */
1861 
1862 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1863 	sd_mapblockaddr_iostart,	/* Index: 16 */
1864 	sd_checksum_iostart,		/* Index: 17 */
1865 	sd_core_iostart,		/* Index: 18 */
1866 
1867 	/* Chain for USCSI commands (all targets) */
1868 	sd_pm_iostart,			/* Index: 19 */
1869 	sd_core_iostart,		/* Index: 20 */
1870 
1871 	/* Chain for checksumming USCSI commands (all targets) */
1872 	sd_checksum_uscsi_iostart,	/* Index: 21 */
1873 	sd_pm_iostart,			/* Index: 22 */
1874 	sd_core_iostart,		/* Index: 23 */
1875 
1876 	/* Chain for "direct" USCSI commands (all targets) */
1877 	sd_core_iostart,		/* Index: 24 */
1878 
1879 	/* Chain for "direct priority" USCSI commands (all targets) */
1880 	sd_core_iostart,		/* Index: 25 */
1881 
1882 	/*
1883 	 * Chain for buf IO for large sector size disk drive targets
1884 	 * with RMW needed with checksumming (PM enabled)
1885 	 */
1886 	sd_mapblockaddr_iostart,	/* Index: 26 */
1887 	sd_mapblocksize_iostart,	/* Index: 27 */
1888 	sd_checksum_iostart,		/* Index: 28 */
1889 	sd_pm_iostart,			/* Index: 29 */
1890 	sd_core_iostart,		/* Index: 30 */
1891 
1892 	/*
1893 	 * Chain for buf IO for large sector size disk drive targets
1894 	 * with RMW needed with checksumming (PM disabled)
1895 	 */
1896 	sd_mapblockaddr_iostart,	/* Index: 31 */
1897 	sd_mapblocksize_iostart,	/* Index: 32 */
1898 	sd_checksum_iostart,		/* Index: 33 */
1899 	sd_core_iostart,		/* Index: 34 */
1900 
1901 };
1902 
1903 /*
1904  * Macros to locate the first function of each iostart chain in the
1905  * sd_iostart_chain[] array. These are located by the index in the array.
1906  */
1907 #define	SD_CHAIN_DISK_IOSTART			0
1908 #define	SD_CHAIN_DISK_IOSTART_NO_PM		3
1909 #define	SD_CHAIN_MSS_DISK_IOSTART		5
1910 #define	SD_CHAIN_RMMEDIA_IOSTART		5
1911 #define	SD_CHAIN_MSS_DISK_IOSTART_NO_PM		9
1912 #define	SD_CHAIN_RMMEDIA_IOSTART_NO_PM		9
1913 #define	SD_CHAIN_CHKSUM_IOSTART			12
1914 #define	SD_CHAIN_CHKSUM_IOSTART_NO_PM		16
1915 #define	SD_CHAIN_USCSI_CMD_IOSTART		19
1916 #define	SD_CHAIN_USCSI_CHKSUM_IOSTART		21
1917 #define	SD_CHAIN_DIRECT_CMD_IOSTART		24
1918 #define	SD_CHAIN_PRIORITY_CMD_IOSTART		25
1919 #define	SD_CHAIN_MSS_CHKSUM_IOSTART		26
1920 #define	SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM	31
1921 
1922 
1923 /*
1924  * Table of function pointers for the iodone-side routines for the driver-
1925  * internal layering mechanism.  The calling sequence for iodone routines
1926  * uses a decrementing table index, so the last routine called in a chain
1927  * must be at the lowest array index location for that chain.  The last
1928  * routine for each chain must be either sd_buf_iodone() (for buf(9S) IOs)
1929  * or sd_uscsi_iodone() (for uscsi IOs).  Other than this, the ordering
1930  * of the functions in an iodone side chain must correspond to the ordering
1931  * of the iostart routines for that chain.  Note that there is no iodone
1932  * side routine that corresponds to sd_core_iostart(), so there is no
1933  * entry in the table for this.
1934  */
1935 
1936 static sd_chain_t sd_iodone_chain[] = {
1937 
1938 	/* Chain for buf IO for disk drive targets (PM enabled) */
1939 	sd_buf_iodone,			/* Index: 0 */
1940 	sd_mapblockaddr_iodone,		/* Index: 1 */
1941 	sd_pm_iodone,			/* Index: 2 */
1942 
1943 	/* Chain for buf IO for disk drive targets (PM disabled) */
1944 	sd_buf_iodone,			/* Index: 3 */
1945 	sd_mapblockaddr_iodone,		/* Index: 4 */
1946 
1947 	/*
1948 	 * Chain for buf IO for removable-media or large sector size
1949 	 * disk drive targets with RMW needed (PM enabled)
1950 	 */
1951 	sd_buf_iodone,			/* Index: 5 */
1952 	sd_mapblockaddr_iodone,		/* Index: 6 */
1953 	sd_mapblocksize_iodone,		/* Index: 7 */
1954 	sd_pm_iodone,			/* Index: 8 */
1955 
1956 	/*
1957 	 * Chain for buf IO for removable-media or large sector size
1958 	 * disk drive targets with RMW needed (PM disabled)
1959 	 */
1960 	sd_buf_iodone,			/* Index: 9 */
1961 	sd_mapblockaddr_iodone,		/* Index: 10 */
1962 	sd_mapblocksize_iodone,		/* Index: 11 */
1963 
1964 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
1965 	sd_buf_iodone,			/* Index: 12 */
1966 	sd_mapblockaddr_iodone,		/* Index: 13 */
1967 	sd_checksum_iodone,		/* Index: 14 */
1968 	sd_pm_iodone,			/* Index: 15 */
1969 
1970 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
1971 	sd_buf_iodone,			/* Index: 16 */
1972 	sd_mapblockaddr_iodone,		/* Index: 17 */
1973 	sd_checksum_iodone,		/* Index: 18 */
1974 
1975 	/* Chain for USCSI commands (non-checksum targets) */
1976 	sd_uscsi_iodone,		/* Index: 19 */
1977 	sd_pm_iodone,			/* Index: 20 */
1978 
1979 	/* Chain for USCSI commands (checksum targets) */
1980 	sd_uscsi_iodone,		/* Index: 21 */
1981 	sd_checksum_uscsi_iodone,	/* Index: 22 */
1982 	sd_pm_iodone,			/* Index: 22 */
1983 
1984 	/* Chain for "direct" USCSI commands (all targets) */
1985 	sd_uscsi_iodone,		/* Index: 24 */
1986 
1987 	/* Chain for "direct priority" USCSI commands (all targets) */
1988 	sd_uscsi_iodone,		/* Index: 25 */
1989 
1990 	/*
1991 	 * Chain for buf IO for large sector size disk drive targets
1992 	 * with checksumming (PM enabled)
1993 	 */
1994 	sd_buf_iodone,			/* Index: 26 */
1995 	sd_mapblockaddr_iodone,		/* Index: 27 */
1996 	sd_mapblocksize_iodone,		/* Index: 28 */
1997 	sd_checksum_iodone,		/* Index: 29 */
1998 	sd_pm_iodone,			/* Index: 30 */
1999 
2000 	/*
2001 	 * Chain for buf IO for large sector size disk drive targets
2002 	 * with checksumming (PM disabled)
2003 	 */
2004 	sd_buf_iodone,			/* Index: 31 */
2005 	sd_mapblockaddr_iodone,		/* Index: 32 */
2006 	sd_mapblocksize_iodone,		/* Index: 33 */
2007 	sd_checksum_iodone,		/* Index: 34 */
2008 };
2009 
2010 
2011 /*
2012  * Macros to locate the "first" function in the sd_iodone_chain[] array for
2013  * each iodone-side chain. These are located by the array index, but as the
2014  * iodone side functions are called in a decrementing-index order, the
2015  * highest index number in each chain must be specified (as these correspond
2016  * to the first function in the iodone chain that will be called by the core
2017  * at IO completion time).
2018  */
2019 
2020 #define	SD_CHAIN_DISK_IODONE			2
2021 #define	SD_CHAIN_DISK_IODONE_NO_PM		4
2022 #define	SD_CHAIN_RMMEDIA_IODONE			8
2023 #define	SD_CHAIN_MSS_DISK_IODONE		8
2024 #define	SD_CHAIN_RMMEDIA_IODONE_NO_PM		11
2025 #define	SD_CHAIN_MSS_DISK_IODONE_NO_PM		11
2026 #define	SD_CHAIN_CHKSUM_IODONE			15
2027 #define	SD_CHAIN_CHKSUM_IODONE_NO_PM		18
2028 #define	SD_CHAIN_USCSI_CMD_IODONE		20
2029 #define	SD_CHAIN_USCSI_CHKSUM_IODONE		22
2030 #define	SD_CHAIN_DIRECT_CMD_IODONE		24
2031 #define	SD_CHAIN_PRIORITY_CMD_IODONE		25
2032 #define	SD_CHAIN_MSS_CHKSUM_IODONE		30
2033 #define	SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM	34
2034 
2035 
2036 
2037 /*
2038  * Array to map a layering chain index to the appropriate initpkt routine.
2039  * The redundant entries are present so that the index used for accessing
2040  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2041  * with this table as well.
2042  */
2043 typedef int (*sd_initpkt_t)(struct buf *, struct scsi_pkt **);
2044 
2045 static sd_initpkt_t	sd_initpkt_map[] = {
2046 
2047 	/* Chain for buf IO for disk drive targets (PM enabled) */
2048 	sd_initpkt_for_buf,		/* Index: 0 */
2049 	sd_initpkt_for_buf,		/* Index: 1 */
2050 	sd_initpkt_for_buf,		/* Index: 2 */
2051 
2052 	/* Chain for buf IO for disk drive targets (PM disabled) */
2053 	sd_initpkt_for_buf,		/* Index: 3 */
2054 	sd_initpkt_for_buf,		/* Index: 4 */
2055 
2056 	/*
2057 	 * Chain for buf IO for removable-media or large sector size
2058 	 * disk drive targets (PM enabled)
2059 	 */
2060 	sd_initpkt_for_buf,		/* Index: 5 */
2061 	sd_initpkt_for_buf,		/* Index: 6 */
2062 	sd_initpkt_for_buf,		/* Index: 7 */
2063 	sd_initpkt_for_buf,		/* Index: 8 */
2064 
2065 	/*
2066 	 * Chain for buf IO for removable-media or large sector size
2067 	 * disk drive targets (PM disabled)
2068 	 */
2069 	sd_initpkt_for_buf,		/* Index: 9 */
2070 	sd_initpkt_for_buf,		/* Index: 10 */
2071 	sd_initpkt_for_buf,		/* Index: 11 */
2072 
2073 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2074 	sd_initpkt_for_buf,		/* Index: 12 */
2075 	sd_initpkt_for_buf,		/* Index: 13 */
2076 	sd_initpkt_for_buf,		/* Index: 14 */
2077 	sd_initpkt_for_buf,		/* Index: 15 */
2078 
2079 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2080 	sd_initpkt_for_buf,		/* Index: 16 */
2081 	sd_initpkt_for_buf,		/* Index: 17 */
2082 	sd_initpkt_for_buf,		/* Index: 18 */
2083 
2084 	/* Chain for USCSI commands (non-checksum targets) */
2085 	sd_initpkt_for_uscsi,		/* Index: 19 */
2086 	sd_initpkt_for_uscsi,		/* Index: 20 */
2087 
2088 	/* Chain for USCSI commands (checksum targets) */
2089 	sd_initpkt_for_uscsi,		/* Index: 21 */
2090 	sd_initpkt_for_uscsi,		/* Index: 22 */
2091 	sd_initpkt_for_uscsi,		/* Index: 22 */
2092 
2093 	/* Chain for "direct" USCSI commands (all targets) */
2094 	sd_initpkt_for_uscsi,		/* Index: 24 */
2095 
2096 	/* Chain for "direct priority" USCSI commands (all targets) */
2097 	sd_initpkt_for_uscsi,		/* Index: 25 */
2098 
2099 	/*
2100 	 * Chain for buf IO for large sector size disk drive targets
2101 	 * with checksumming (PM enabled)
2102 	 */
2103 	sd_initpkt_for_buf,		/* Index: 26 */
2104 	sd_initpkt_for_buf,		/* Index: 27 */
2105 	sd_initpkt_for_buf,		/* Index: 28 */
2106 	sd_initpkt_for_buf,		/* Index: 29 */
2107 	sd_initpkt_for_buf,		/* Index: 30 */
2108 
2109 	/*
2110 	 * Chain for buf IO for large sector size disk drive targets
2111 	 * with checksumming (PM disabled)
2112 	 */
2113 	sd_initpkt_for_buf,		/* Index: 31 */
2114 	sd_initpkt_for_buf,		/* Index: 32 */
2115 	sd_initpkt_for_buf,		/* Index: 33 */
2116 	sd_initpkt_for_buf,		/* Index: 34 */
2117 };
2118 
2119 
2120 /*
2121  * Array to map a layering chain index to the appropriate destroypktpkt routine.
2122  * The redundant entries are present so that the index used for accessing
2123  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2124  * with this table as well.
2125  */
2126 typedef void (*sd_destroypkt_t)(struct buf *);
2127 
2128 static sd_destroypkt_t	sd_destroypkt_map[] = {
2129 
2130 	/* Chain for buf IO for disk drive targets (PM enabled) */
2131 	sd_destroypkt_for_buf,		/* Index: 0 */
2132 	sd_destroypkt_for_buf,		/* Index: 1 */
2133 	sd_destroypkt_for_buf,		/* Index: 2 */
2134 
2135 	/* Chain for buf IO for disk drive targets (PM disabled) */
2136 	sd_destroypkt_for_buf,		/* Index: 3 */
2137 	sd_destroypkt_for_buf,		/* Index: 4 */
2138 
2139 	/*
2140 	 * Chain for buf IO for removable-media or large sector size
2141 	 * disk drive targets (PM enabled)
2142 	 */
2143 	sd_destroypkt_for_buf,		/* Index: 5 */
2144 	sd_destroypkt_for_buf,		/* Index: 6 */
2145 	sd_destroypkt_for_buf,		/* Index: 7 */
2146 	sd_destroypkt_for_buf,		/* Index: 8 */
2147 
2148 	/*
2149 	 * Chain for buf IO for removable-media or large sector size
2150 	 * disk drive targets (PM disabled)
2151 	 */
2152 	sd_destroypkt_for_buf,		/* Index: 9 */
2153 	sd_destroypkt_for_buf,		/* Index: 10 */
2154 	sd_destroypkt_for_buf,		/* Index: 11 */
2155 
2156 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2157 	sd_destroypkt_for_buf,		/* Index: 12 */
2158 	sd_destroypkt_for_buf,		/* Index: 13 */
2159 	sd_destroypkt_for_buf,		/* Index: 14 */
2160 	sd_destroypkt_for_buf,		/* Index: 15 */
2161 
2162 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2163 	sd_destroypkt_for_buf,		/* Index: 16 */
2164 	sd_destroypkt_for_buf,		/* Index: 17 */
2165 	sd_destroypkt_for_buf,		/* Index: 18 */
2166 
2167 	/* Chain for USCSI commands (non-checksum targets) */
2168 	sd_destroypkt_for_uscsi,	/* Index: 19 */
2169 	sd_destroypkt_for_uscsi,	/* Index: 20 */
2170 
2171 	/* Chain for USCSI commands (checksum targets) */
2172 	sd_destroypkt_for_uscsi,	/* Index: 21 */
2173 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2174 	sd_destroypkt_for_uscsi,	/* Index: 22 */
2175 
2176 	/* Chain for "direct" USCSI commands (all targets) */
2177 	sd_destroypkt_for_uscsi,	/* Index: 24 */
2178 
2179 	/* Chain for "direct priority" USCSI commands (all targets) */
2180 	sd_destroypkt_for_uscsi,	/* Index: 25 */
2181 
2182 	/*
2183 	 * Chain for buf IO for large sector size disk drive targets
2184 	 * with checksumming (PM disabled)
2185 	 */
2186 	sd_destroypkt_for_buf,		/* Index: 26 */
2187 	sd_destroypkt_for_buf,		/* Index: 27 */
2188 	sd_destroypkt_for_buf,		/* Index: 28 */
2189 	sd_destroypkt_for_buf,		/* Index: 29 */
2190 	sd_destroypkt_for_buf,		/* Index: 30 */
2191 
2192 	/*
2193 	 * Chain for buf IO for large sector size disk drive targets
2194 	 * with checksumming (PM enabled)
2195 	 */
2196 	sd_destroypkt_for_buf,		/* Index: 31 */
2197 	sd_destroypkt_for_buf,		/* Index: 32 */
2198 	sd_destroypkt_for_buf,		/* Index: 33 */
2199 	sd_destroypkt_for_buf,		/* Index: 34 */
2200 };
2201 
2202 
2203 
2204 /*
2205  * Array to map a layering chain index to the appropriate chain "type".
2206  * The chain type indicates a specific property/usage of the chain.
2207  * The redundant entries are present so that the index used for accessing
2208  * the above sd_iostart_chain and sd_iodone_chain tables can be used directly
2209  * with this table as well.
2210  */
2211 
2212 #define	SD_CHAIN_NULL			0	/* for the special RQS cmd */
2213 #define	SD_CHAIN_BUFIO			1	/* regular buf IO */
2214 #define	SD_CHAIN_USCSI			2	/* regular USCSI commands */
2215 #define	SD_CHAIN_DIRECT			3	/* uscsi, w/ bypass power mgt */
2216 #define	SD_CHAIN_DIRECT_PRIORITY	4	/* uscsi, w/ bypass power mgt */
2217 						/* (for error recovery) */
2218 
2219 static int sd_chain_type_map[] = {
2220 
2221 	/* Chain for buf IO for disk drive targets (PM enabled) */
2222 	SD_CHAIN_BUFIO,			/* Index: 0 */
2223 	SD_CHAIN_BUFIO,			/* Index: 1 */
2224 	SD_CHAIN_BUFIO,			/* Index: 2 */
2225 
2226 	/* Chain for buf IO for disk drive targets (PM disabled) */
2227 	SD_CHAIN_BUFIO,			/* Index: 3 */
2228 	SD_CHAIN_BUFIO,			/* Index: 4 */
2229 
2230 	/*
2231 	 * Chain for buf IO for removable-media or large sector size
2232 	 * disk drive targets (PM enabled)
2233 	 */
2234 	SD_CHAIN_BUFIO,			/* Index: 5 */
2235 	SD_CHAIN_BUFIO,			/* Index: 6 */
2236 	SD_CHAIN_BUFIO,			/* Index: 7 */
2237 	SD_CHAIN_BUFIO,			/* Index: 8 */
2238 
2239 	/*
2240 	 * Chain for buf IO for removable-media or large sector size
2241 	 * disk drive targets (PM disabled)
2242 	 */
2243 	SD_CHAIN_BUFIO,			/* Index: 9 */
2244 	SD_CHAIN_BUFIO,			/* Index: 10 */
2245 	SD_CHAIN_BUFIO,			/* Index: 11 */
2246 
2247 	/* Chain for buf IO for disk drives with checksumming (PM enabled) */
2248 	SD_CHAIN_BUFIO,			/* Index: 12 */
2249 	SD_CHAIN_BUFIO,			/* Index: 13 */
2250 	SD_CHAIN_BUFIO,			/* Index: 14 */
2251 	SD_CHAIN_BUFIO,			/* Index: 15 */
2252 
2253 	/* Chain for buf IO for disk drives with checksumming (PM disabled) */
2254 	SD_CHAIN_BUFIO,			/* Index: 16 */
2255 	SD_CHAIN_BUFIO,			/* Index: 17 */
2256 	SD_CHAIN_BUFIO,			/* Index: 18 */
2257 
2258 	/* Chain for USCSI commands (non-checksum targets) */
2259 	SD_CHAIN_USCSI,			/* Index: 19 */
2260 	SD_CHAIN_USCSI,			/* Index: 20 */
2261 
2262 	/* Chain for USCSI commands (checksum targets) */
2263 	SD_CHAIN_USCSI,			/* Index: 21 */
2264 	SD_CHAIN_USCSI,			/* Index: 22 */
2265 	SD_CHAIN_USCSI,			/* Index: 23 */
2266 
2267 	/* Chain for "direct" USCSI commands (all targets) */
2268 	SD_CHAIN_DIRECT,		/* Index: 24 */
2269 
2270 	/* Chain for "direct priority" USCSI commands (all targets) */
2271 	SD_CHAIN_DIRECT_PRIORITY,	/* Index: 25 */
2272 
2273 	/*
2274 	 * Chain for buf IO for large sector size disk drive targets
2275 	 * with checksumming (PM enabled)
2276 	 */
2277 	SD_CHAIN_BUFIO,			/* Index: 26 */
2278 	SD_CHAIN_BUFIO,			/* Index: 27 */
2279 	SD_CHAIN_BUFIO,			/* Index: 28 */
2280 	SD_CHAIN_BUFIO,			/* Index: 29 */
2281 	SD_CHAIN_BUFIO,			/* Index: 30 */
2282 
2283 	/*
2284 	 * Chain for buf IO for large sector size disk drive targets
2285 	 * with checksumming (PM disabled)
2286 	 */
2287 	SD_CHAIN_BUFIO,			/* Index: 31 */
2288 	SD_CHAIN_BUFIO,			/* Index: 32 */
2289 	SD_CHAIN_BUFIO,			/* Index: 33 */
2290 	SD_CHAIN_BUFIO,			/* Index: 34 */
2291 };
2292 
2293 
2294 /* Macro to return TRUE if the IO has come from the sd_buf_iostart() chain. */
2295 #define	SD_IS_BUFIO(xp)			\
2296 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_BUFIO)
2297 
2298 /* Macro to return TRUE if the IO has come from the "direct priority" chain. */
2299 #define	SD_IS_DIRECT_PRIORITY(xp)	\
2300 	(sd_chain_type_map[(xp)->xb_chain_iostart] == SD_CHAIN_DIRECT_PRIORITY)
2301 
2302 
2303 
2304 /*
2305  * Struct, array, and macros to map a specific chain to the appropriate
2306  * layering indexes in the sd_iostart_chain[] and sd_iodone_chain[] arrays.
2307  *
2308  * The sd_chain_index_map[] array is used at attach time to set the various
2309  * un_xxx_chain type members of the sd_lun softstate to the specific layering
2310  * chain to be used with the instance. This allows different instances to use
2311  * different chain for buf IO, uscsi IO, etc.. Also, since the xb_chain_iostart
2312  * and xb_chain_iodone index values in the sd_xbuf are initialized to these
2313  * values at sd_xbuf init time, this allows (1) layering chains may be changed
2314  * dynamically & without the use of locking; and (2) a layer may update the
2315  * xb_chain_io[start|done] member in a given xbuf with its current index value,
2316  * to allow for deferred processing of an IO within the same chain from a
2317  * different execution context.
2318  */
2319 
2320 struct sd_chain_index {
2321 	int	sci_iostart_index;
2322 	int	sci_iodone_index;
2323 };
2324 
2325 static struct sd_chain_index	sd_chain_index_map[] = {
2326 	{ SD_CHAIN_DISK_IOSTART,		SD_CHAIN_DISK_IODONE },
2327 	{ SD_CHAIN_DISK_IOSTART_NO_PM,		SD_CHAIN_DISK_IODONE_NO_PM },
2328 	{ SD_CHAIN_RMMEDIA_IOSTART,		SD_CHAIN_RMMEDIA_IODONE },
2329 	{ SD_CHAIN_RMMEDIA_IOSTART_NO_PM,	SD_CHAIN_RMMEDIA_IODONE_NO_PM },
2330 	{ SD_CHAIN_CHKSUM_IOSTART,		SD_CHAIN_CHKSUM_IODONE },
2331 	{ SD_CHAIN_CHKSUM_IOSTART_NO_PM,	SD_CHAIN_CHKSUM_IODONE_NO_PM },
2332 	{ SD_CHAIN_USCSI_CMD_IOSTART,		SD_CHAIN_USCSI_CMD_IODONE },
2333 	{ SD_CHAIN_USCSI_CHKSUM_IOSTART,	SD_CHAIN_USCSI_CHKSUM_IODONE },
2334 	{ SD_CHAIN_DIRECT_CMD_IOSTART,		SD_CHAIN_DIRECT_CMD_IODONE },
2335 	{ SD_CHAIN_PRIORITY_CMD_IOSTART,	SD_CHAIN_PRIORITY_CMD_IODONE },
2336 	{ SD_CHAIN_MSS_CHKSUM_IOSTART,		SD_CHAIN_MSS_CHKSUM_IODONE },
2337 	{ SD_CHAIN_MSS_CHKSUM_IOSTART_NO_PM, SD_CHAIN_MSS_CHKSUM_IODONE_NO_PM },
2338 
2339 };
2340 
2341 
2342 /*
2343  * The following are indexes into the sd_chain_index_map[] array.
2344  */
2345 
2346 /* un->un_buf_chain_type must be set to one of these */
2347 #define	SD_CHAIN_INFO_DISK		0
2348 #define	SD_CHAIN_INFO_DISK_NO_PM	1
2349 #define	SD_CHAIN_INFO_RMMEDIA		2
2350 #define	SD_CHAIN_INFO_MSS_DISK		2
2351 #define	SD_CHAIN_INFO_RMMEDIA_NO_PM	3
2352 #define	SD_CHAIN_INFO_MSS_DSK_NO_PM	3
2353 #define	SD_CHAIN_INFO_CHKSUM		4
2354 #define	SD_CHAIN_INFO_CHKSUM_NO_PM	5
2355 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM	10
2356 #define	SD_CHAIN_INFO_MSS_DISK_CHKSUM_NO_PM	11
2357 
2358 /* un->un_uscsi_chain_type must be set to one of these */
2359 #define	SD_CHAIN_INFO_USCSI_CMD		6
2360 /* USCSI with PM disabled is the same as DIRECT */
2361 #define	SD_CHAIN_INFO_USCSI_CMD_NO_PM	8
2362 #define	SD_CHAIN_INFO_USCSI_CHKSUM	7
2363 
2364 /* un->un_direct_chain_type must be set to one of these */
2365 #define	SD_CHAIN_INFO_DIRECT_CMD	8
2366 
2367 /* un->un_priority_chain_type must be set to one of these */
2368 #define	SD_CHAIN_INFO_PRIORITY_CMD	9
2369 
2370 /* size for devid inquiries */
2371 #define	MAX_INQUIRY_SIZE		0xF0
2372 
2373 /*
2374  * Macros used by functions to pass a given buf(9S) struct along to the
2375  * next function in the layering chain for further processing.
2376  *
2377  * In the following macros, passing more than three arguments to the called
2378  * routines causes the optimizer for the SPARC compiler to stop doing tail
2379  * call elimination which results in significant performance degradation.
2380  */
2381 #define	SD_BEGIN_IOSTART(index, un, bp)	\
2382 	((*(sd_iostart_chain[index]))(index, un, bp))
2383 
2384 #define	SD_BEGIN_IODONE(index, un, bp)	\
2385 	((*(sd_iodone_chain[index]))(index, un, bp))
2386 
2387 #define	SD_NEXT_IOSTART(index, un, bp)				\
2388 	((*(sd_iostart_chain[(index) + 1]))((index) + 1, un, bp))
2389 
2390 #define	SD_NEXT_IODONE(index, un, bp)				\
2391 	((*(sd_iodone_chain[(index) - 1]))((index) - 1, un, bp))
2392 
2393 /*
2394  *    Function: _init
2395  *
2396  * Description: This is the driver _init(9E) entry point.
2397  *
2398  * Return Code: Returns the value from mod_install(9F) or
2399  *		ddi_soft_state_init(9F) as appropriate.
2400  *
2401  *     Context: Called when driver module loaded.
2402  */
2403 
2404 int
2405 _init(void)
2406 {
2407 	int	err;
2408 
2409 	/* establish driver name from module name */
2410 	sd_label = (char *)mod_modname(&modlinkage);
2411 
2412 	err = ddi_soft_state_init(&sd_state, sizeof (struct sd_lun),
2413 	    SD_MAXUNIT);
2414 	if (err != 0) {
2415 		return (err);
2416 	}
2417 
2418 	mutex_init(&sd_detach_mutex, NULL, MUTEX_DRIVER, NULL);
2419 	mutex_init(&sd_log_mutex,    NULL, MUTEX_DRIVER, NULL);
2420 	mutex_init(&sd_label_mutex,  NULL, MUTEX_DRIVER, NULL);
2421 
2422 	mutex_init(&sd_tr.srq_resv_reclaim_mutex, NULL, MUTEX_DRIVER, NULL);
2423 	cv_init(&sd_tr.srq_resv_reclaim_cv, NULL, CV_DRIVER, NULL);
2424 	cv_init(&sd_tr.srq_inprocess_cv, NULL, CV_DRIVER, NULL);
2425 
2426 	/*
2427 	 * it's ok to init here even for fibre device
2428 	 */
2429 	sd_scsi_probe_cache_init();
2430 
2431 	sd_scsi_target_lun_init();
2432 
2433 	/*
2434 	 * Creating taskq before mod_install ensures that all callers (threads)
2435 	 * that enter the module after a successful mod_install encounter
2436 	 * a valid taskq.
2437 	 */
2438 	sd_taskq_create();
2439 
2440 	err = mod_install(&modlinkage);
2441 	if (err != 0) {
2442 		/* delete taskq if install fails */
2443 		sd_taskq_delete();
2444 
2445 		mutex_destroy(&sd_detach_mutex);
2446 		mutex_destroy(&sd_log_mutex);
2447 		mutex_destroy(&sd_label_mutex);
2448 
2449 		mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2450 		cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2451 		cv_destroy(&sd_tr.srq_inprocess_cv);
2452 
2453 		sd_scsi_probe_cache_fini();
2454 
2455 		sd_scsi_target_lun_fini();
2456 
2457 		ddi_soft_state_fini(&sd_state);
2458 
2459 		return (err);
2460 	}
2461 
2462 	return (err);
2463 }
2464 
2465 
2466 /*
2467  *    Function: _fini
2468  *
2469  * Description: This is the driver _fini(9E) entry point.
2470  *
2471  * Return Code: Returns the value from mod_remove(9F)
2472  *
2473  *     Context: Called when driver module is unloaded.
2474  */
2475 
2476 int
2477 _fini(void)
2478 {
2479 	int err;
2480 
2481 	if ((err = mod_remove(&modlinkage)) != 0) {
2482 		return (err);
2483 	}
2484 
2485 	sd_taskq_delete();
2486 
2487 	mutex_destroy(&sd_detach_mutex);
2488 	mutex_destroy(&sd_log_mutex);
2489 	mutex_destroy(&sd_label_mutex);
2490 	mutex_destroy(&sd_tr.srq_resv_reclaim_mutex);
2491 
2492 	sd_scsi_probe_cache_fini();
2493 
2494 	sd_scsi_target_lun_fini();
2495 
2496 	cv_destroy(&sd_tr.srq_resv_reclaim_cv);
2497 	cv_destroy(&sd_tr.srq_inprocess_cv);
2498 
2499 	ddi_soft_state_fini(&sd_state);
2500 
2501 	return (err);
2502 }
2503 
2504 
2505 /*
2506  *    Function: _info
2507  *
2508  * Description: This is the driver _info(9E) entry point.
2509  *
2510  *   Arguments: modinfop - pointer to the driver modinfo structure
2511  *
2512  * Return Code: Returns the value from mod_info(9F).
2513  *
2514  *     Context: Kernel thread context
2515  */
2516 
2517 int
2518 _info(struct modinfo *modinfop)
2519 {
2520 	return (mod_info(&modlinkage, modinfop));
2521 }
2522 
2523 
2524 /*
2525  * The following routines implement the driver message logging facility.
2526  * They provide component- and level- based debug output filtering.
2527  * Output may also be restricted to messages for a single instance by
2528  * specifying a soft state pointer in sd_debug_un. If sd_debug_un is set
2529  * to NULL, then messages for all instances are printed.
2530  *
2531  * These routines have been cloned from each other due to the language
2532  * constraints of macros and variable argument list processing.
2533  */
2534 
2535 
2536 /*
2537  *    Function: sd_log_err
2538  *
2539  * Description: This routine is called by the SD_ERROR macro for debug
2540  *		logging of error conditions.
2541  *
2542  *   Arguments: comp - driver component being logged
2543  *		dev  - pointer to driver info structure
2544  *		fmt  - error string and format to be logged
2545  */
2546 
2547 static void
2548 sd_log_err(uint_t comp, struct sd_lun *un, const char *fmt, ...)
2549 {
2550 	va_list		ap;
2551 	dev_info_t	*dev;
2552 
2553 	ASSERT(un != NULL);
2554 	dev = SD_DEVINFO(un);
2555 	ASSERT(dev != NULL);
2556 
2557 	/*
2558 	 * Filter messages based on the global component and level masks.
2559 	 * Also print if un matches the value of sd_debug_un, or if
2560 	 * sd_debug_un is set to NULL.
2561 	 */
2562 	if ((sd_component_mask & comp) && (sd_level_mask & SD_LOGMASK_ERROR) &&
2563 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2564 		mutex_enter(&sd_log_mutex);
2565 		va_start(ap, fmt);
2566 		(void) vsprintf(sd_log_buf, fmt, ap);
2567 		va_end(ap);
2568 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2569 		mutex_exit(&sd_log_mutex);
2570 	}
2571 #ifdef SD_FAULT_INJECTION
2572 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2573 	if (un->sd_injection_mask & comp) {
2574 		mutex_enter(&sd_log_mutex);
2575 		va_start(ap, fmt);
2576 		(void) vsprintf(sd_log_buf, fmt, ap);
2577 		va_end(ap);
2578 		sd_injection_log(sd_log_buf, un);
2579 		mutex_exit(&sd_log_mutex);
2580 	}
2581 #endif
2582 }
2583 
2584 
2585 /*
2586  *    Function: sd_log_info
2587  *
2588  * Description: This routine is called by the SD_INFO macro for debug
2589  *		logging of general purpose informational conditions.
2590  *
2591  *   Arguments: comp - driver component being logged
2592  *		dev  - pointer to driver info structure
2593  *		fmt  - info string and format to be logged
2594  */
2595 
2596 static void
2597 sd_log_info(uint_t component, struct sd_lun *un, const char *fmt, ...)
2598 {
2599 	va_list		ap;
2600 	dev_info_t	*dev;
2601 
2602 	ASSERT(un != NULL);
2603 	dev = SD_DEVINFO(un);
2604 	ASSERT(dev != NULL);
2605 
2606 	/*
2607 	 * Filter messages based on the global component and level masks.
2608 	 * Also print if un matches the value of sd_debug_un, or if
2609 	 * sd_debug_un is set to NULL.
2610 	 */
2611 	if ((sd_component_mask & component) &&
2612 	    (sd_level_mask & SD_LOGMASK_INFO) &&
2613 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2614 		mutex_enter(&sd_log_mutex);
2615 		va_start(ap, fmt);
2616 		(void) vsprintf(sd_log_buf, fmt, ap);
2617 		va_end(ap);
2618 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2619 		mutex_exit(&sd_log_mutex);
2620 	}
2621 #ifdef SD_FAULT_INJECTION
2622 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2623 	if (un->sd_injection_mask & component) {
2624 		mutex_enter(&sd_log_mutex);
2625 		va_start(ap, fmt);
2626 		(void) vsprintf(sd_log_buf, fmt, ap);
2627 		va_end(ap);
2628 		sd_injection_log(sd_log_buf, un);
2629 		mutex_exit(&sd_log_mutex);
2630 	}
2631 #endif
2632 }
2633 
2634 
2635 /*
2636  *    Function: sd_log_trace
2637  *
2638  * Description: This routine is called by the SD_TRACE macro for debug
2639  *		logging of trace conditions (i.e. function entry/exit).
2640  *
2641  *   Arguments: comp - driver component being logged
2642  *		dev  - pointer to driver info structure
2643  *		fmt  - trace string and format to be logged
2644  */
2645 
2646 static void
2647 sd_log_trace(uint_t component, struct sd_lun *un, const char *fmt, ...)
2648 {
2649 	va_list		ap;
2650 	dev_info_t	*dev;
2651 
2652 	ASSERT(un != NULL);
2653 	dev = SD_DEVINFO(un);
2654 	ASSERT(dev != NULL);
2655 
2656 	/*
2657 	 * Filter messages based on the global component and level masks.
2658 	 * Also print if un matches the value of sd_debug_un, or if
2659 	 * sd_debug_un is set to NULL.
2660 	 */
2661 	if ((sd_component_mask & component) &&
2662 	    (sd_level_mask & SD_LOGMASK_TRACE) &&
2663 	    ((sd_debug_un == NULL) || (sd_debug_un == un))) {
2664 		mutex_enter(&sd_log_mutex);
2665 		va_start(ap, fmt);
2666 		(void) vsprintf(sd_log_buf, fmt, ap);
2667 		va_end(ap);
2668 		scsi_log(dev, sd_label, CE_CONT, "%s", sd_log_buf);
2669 		mutex_exit(&sd_log_mutex);
2670 	}
2671 #ifdef SD_FAULT_INJECTION
2672 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::sd_injection_mask));
2673 	if (un->sd_injection_mask & component) {
2674 		mutex_enter(&sd_log_mutex);
2675 		va_start(ap, fmt);
2676 		(void) vsprintf(sd_log_buf, fmt, ap);
2677 		va_end(ap);
2678 		sd_injection_log(sd_log_buf, un);
2679 		mutex_exit(&sd_log_mutex);
2680 	}
2681 #endif
2682 }
2683 
2684 
2685 /*
2686  *    Function: sdprobe
2687  *
2688  * Description: This is the driver probe(9e) entry point function.
2689  *
2690  *   Arguments: devi - opaque device info handle
2691  *
2692  * Return Code: DDI_PROBE_SUCCESS: If the probe was successful.
2693  *              DDI_PROBE_FAILURE: If the probe failed.
2694  *              DDI_PROBE_PARTIAL: If the instance is not present now,
2695  *				   but may be present in the future.
2696  */
2697 
2698 static int
2699 sdprobe(dev_info_t *devi)
2700 {
2701 	struct scsi_device	*devp;
2702 	int			rval;
2703 	int			instance = ddi_get_instance(devi);
2704 
2705 	/*
2706 	 * if it wasn't for pln, sdprobe could actually be nulldev
2707 	 * in the "__fibre" case.
2708 	 */
2709 	if (ddi_dev_is_sid(devi) == DDI_SUCCESS) {
2710 		return (DDI_PROBE_DONTCARE);
2711 	}
2712 
2713 	devp = ddi_get_driver_private(devi);
2714 
2715 	if (devp == NULL) {
2716 		/* Ooops... nexus driver is mis-configured... */
2717 		return (DDI_PROBE_FAILURE);
2718 	}
2719 
2720 	if (ddi_get_soft_state(sd_state, instance) != NULL) {
2721 		return (DDI_PROBE_PARTIAL);
2722 	}
2723 
2724 	/*
2725 	 * Call the SCSA utility probe routine to see if we actually
2726 	 * have a target at this SCSI nexus.
2727 	 */
2728 	switch (sd_scsi_probe_with_cache(devp, NULL_FUNC)) {
2729 	case SCSIPROBE_EXISTS:
2730 		switch (devp->sd_inq->inq_dtype) {
2731 		case DTYPE_DIRECT:
2732 			rval = DDI_PROBE_SUCCESS;
2733 			break;
2734 		case DTYPE_RODIRECT:
2735 			/* CDs etc. Can be removable media */
2736 			rval = DDI_PROBE_SUCCESS;
2737 			break;
2738 		case DTYPE_OPTICAL:
2739 			/*
2740 			 * Rewritable optical driver HP115AA
2741 			 * Can also be removable media
2742 			 */
2743 
2744 			/*
2745 			 * Do not attempt to bind to  DTYPE_OPTICAL if
2746 			 * pre solaris 9 sparc sd behavior is required
2747 			 *
2748 			 * If first time through and sd_dtype_optical_bind
2749 			 * has not been set in /etc/system check properties
2750 			 */
2751 
2752 			if (sd_dtype_optical_bind  < 0) {
2753 				sd_dtype_optical_bind = ddi_prop_get_int
2754 				    (DDI_DEV_T_ANY, devi, 0,
2755 				    "optical-device-bind", 1);
2756 			}
2757 
2758 			if (sd_dtype_optical_bind == 0) {
2759 				rval = DDI_PROBE_FAILURE;
2760 			} else {
2761 				rval = DDI_PROBE_SUCCESS;
2762 			}
2763 			break;
2764 
2765 		case DTYPE_NOTPRESENT:
2766 		default:
2767 			rval = DDI_PROBE_FAILURE;
2768 			break;
2769 		}
2770 		break;
2771 	default:
2772 		rval = DDI_PROBE_PARTIAL;
2773 		break;
2774 	}
2775 
2776 	/*
2777 	 * This routine checks for resource allocation prior to freeing,
2778 	 * so it will take care of the "smart probing" case where a
2779 	 * scsi_probe() may or may not have been issued and will *not*
2780 	 * free previously-freed resources.
2781 	 */
2782 	scsi_unprobe(devp);
2783 	return (rval);
2784 }
2785 
2786 
2787 /*
2788  *    Function: sdinfo
2789  *
2790  * Description: This is the driver getinfo(9e) entry point function.
2791  *		Given the device number, return the devinfo pointer from
2792  *		the scsi_device structure or the instance number
2793  *		associated with the dev_t.
2794  *
2795  *   Arguments: dip     - pointer to device info structure
2796  *		infocmd - command argument (DDI_INFO_DEVT2DEVINFO,
2797  *			  DDI_INFO_DEVT2INSTANCE)
2798  *		arg     - driver dev_t
2799  *		resultp - user buffer for request response
2800  *
2801  * Return Code: DDI_SUCCESS
2802  *              DDI_FAILURE
2803  */
2804 /* ARGSUSED */
2805 static int
2806 sdinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
2807 {
2808 	struct sd_lun	*un;
2809 	dev_t		dev;
2810 	int		instance;
2811 	int		error;
2812 
2813 	switch (infocmd) {
2814 	case DDI_INFO_DEVT2DEVINFO:
2815 		dev = (dev_t)arg;
2816 		instance = SDUNIT(dev);
2817 		if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
2818 			return (DDI_FAILURE);
2819 		}
2820 		*result = (void *) SD_DEVINFO(un);
2821 		error = DDI_SUCCESS;
2822 		break;
2823 	case DDI_INFO_DEVT2INSTANCE:
2824 		dev = (dev_t)arg;
2825 		instance = SDUNIT(dev);
2826 		*result = (void *)(uintptr_t)instance;
2827 		error = DDI_SUCCESS;
2828 		break;
2829 	default:
2830 		error = DDI_FAILURE;
2831 	}
2832 	return (error);
2833 }
2834 
2835 /*
2836  *    Function: sd_prop_op
2837  *
2838  * Description: This is the driver prop_op(9e) entry point function.
2839  *		Return the number of blocks for the partition in question
2840  *		or forward the request to the property facilities.
2841  *
2842  *   Arguments: dev       - device number
2843  *		dip       - pointer to device info structure
2844  *		prop_op   - property operator
2845  *		mod_flags - DDI_PROP_DONTPASS, don't pass to parent
2846  *		name      - pointer to property name
2847  *		valuep    - pointer or address of the user buffer
2848  *		lengthp   - property length
2849  *
2850  * Return Code: DDI_PROP_SUCCESS
2851  *              DDI_PROP_NOT_FOUND
2852  *              DDI_PROP_UNDEFINED
2853  *              DDI_PROP_NO_MEMORY
2854  *              DDI_PROP_BUF_TOO_SMALL
2855  */
2856 
2857 static int
2858 sd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
2859     char *name, caddr_t valuep, int *lengthp)
2860 {
2861 	struct sd_lun	*un;
2862 
2863 	if ((un = ddi_get_soft_state(sd_state, ddi_get_instance(dip))) == NULL)
2864 		return (ddi_prop_op(dev, dip, prop_op, mod_flags,
2865 		    name, valuep, lengthp));
2866 
2867 	return (cmlb_prop_op(un->un_cmlbhandle,
2868 	    dev, dip, prop_op, mod_flags, name, valuep, lengthp,
2869 	    SDPART(dev), (void *)SD_PATH_DIRECT));
2870 }
2871 
2872 /*
2873  * The following functions are for smart probing:
2874  * sd_scsi_probe_cache_init()
2875  * sd_scsi_probe_cache_fini()
2876  * sd_scsi_clear_probe_cache()
2877  * sd_scsi_probe_with_cache()
2878  */
2879 
2880 /*
2881  *    Function: sd_scsi_probe_cache_init
2882  *
2883  * Description: Initializes the probe response cache mutex and head pointer.
2884  *
2885  *     Context: Kernel thread context
2886  */
2887 
2888 static void
2889 sd_scsi_probe_cache_init(void)
2890 {
2891 	mutex_init(&sd_scsi_probe_cache_mutex, NULL, MUTEX_DRIVER, NULL);
2892 	sd_scsi_probe_cache_head = NULL;
2893 }
2894 
2895 
2896 /*
2897  *    Function: sd_scsi_probe_cache_fini
2898  *
2899  * Description: Frees all resources associated with the probe response cache.
2900  *
2901  *     Context: Kernel thread context
2902  */
2903 
2904 static void
2905 sd_scsi_probe_cache_fini(void)
2906 {
2907 	struct sd_scsi_probe_cache *cp;
2908 	struct sd_scsi_probe_cache *ncp;
2909 
2910 	/* Clean up our smart probing linked list */
2911 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = ncp) {
2912 		ncp = cp->next;
2913 		kmem_free(cp, sizeof (struct sd_scsi_probe_cache));
2914 	}
2915 	sd_scsi_probe_cache_head = NULL;
2916 	mutex_destroy(&sd_scsi_probe_cache_mutex);
2917 }
2918 
2919 
2920 /*
2921  *    Function: sd_scsi_clear_probe_cache
2922  *
2923  * Description: This routine clears the probe response cache. This is
2924  *		done when open() returns ENXIO so that when deferred
2925  *		attach is attempted (possibly after a device has been
2926  *		turned on) we will retry the probe. Since we don't know
2927  *		which target we failed to open, we just clear the
2928  *		entire cache.
2929  *
2930  *     Context: Kernel thread context
2931  */
2932 
2933 static void
2934 sd_scsi_clear_probe_cache(void)
2935 {
2936 	struct sd_scsi_probe_cache	*cp;
2937 	int				i;
2938 
2939 	mutex_enter(&sd_scsi_probe_cache_mutex);
2940 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
2941 		/*
2942 		 * Reset all entries to SCSIPROBE_EXISTS.  This will
2943 		 * force probing to be performed the next time
2944 		 * sd_scsi_probe_with_cache is called.
2945 		 */
2946 		for (i = 0; i < NTARGETS_WIDE; i++) {
2947 			cp->cache[i] = SCSIPROBE_EXISTS;
2948 		}
2949 	}
2950 	mutex_exit(&sd_scsi_probe_cache_mutex);
2951 }
2952 
2953 
2954 /*
2955  *    Function: sd_scsi_probe_with_cache
2956  *
2957  * Description: This routine implements support for a scsi device probe
2958  *		with cache. The driver maintains a cache of the target
2959  *		responses to scsi probes. If we get no response from a
2960  *		target during a probe inquiry, we remember that, and we
2961  *		avoid additional calls to scsi_probe on non-zero LUNs
2962  *		on the same target until the cache is cleared. By doing
2963  *		so we avoid the 1/4 sec selection timeout for nonzero
2964  *		LUNs. lun0 of a target is always probed.
2965  *
2966  *   Arguments: devp     - Pointer to a scsi_device(9S) structure
2967  *              waitfunc - indicates what the allocator routines should
2968  *			   do when resources are not available. This value
2969  *			   is passed on to scsi_probe() when that routine
2970  *			   is called.
2971  *
2972  * Return Code: SCSIPROBE_NORESP if a NORESP in probe response cache;
2973  *		otherwise the value returned by scsi_probe(9F).
2974  *
2975  *     Context: Kernel thread context
2976  */
2977 
2978 static int
2979 sd_scsi_probe_with_cache(struct scsi_device *devp, int (*waitfn)())
2980 {
2981 	struct sd_scsi_probe_cache	*cp;
2982 	dev_info_t	*pdip = ddi_get_parent(devp->sd_dev);
2983 	int		lun, tgt;
2984 
2985 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2986 	    SCSI_ADDR_PROP_LUN, 0);
2987 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devp->sd_dev, DDI_PROP_DONTPASS,
2988 	    SCSI_ADDR_PROP_TARGET, -1);
2989 
2990 	/* Make sure caching enabled and target in range */
2991 	if ((tgt < 0) || (tgt >= NTARGETS_WIDE)) {
2992 		/* do it the old way (no cache) */
2993 		return (scsi_probe(devp, waitfn));
2994 	}
2995 
2996 	mutex_enter(&sd_scsi_probe_cache_mutex);
2997 
2998 	/* Find the cache for this scsi bus instance */
2999 	for (cp = sd_scsi_probe_cache_head; cp != NULL; cp = cp->next) {
3000 		if (cp->pdip == pdip) {
3001 			break;
3002 		}
3003 	}
3004 
3005 	/* If we can't find a cache for this pdip, create one */
3006 	if (cp == NULL) {
3007 		int i;
3008 
3009 		cp = kmem_zalloc(sizeof (struct sd_scsi_probe_cache),
3010 		    KM_SLEEP);
3011 		cp->pdip = pdip;
3012 		cp->next = sd_scsi_probe_cache_head;
3013 		sd_scsi_probe_cache_head = cp;
3014 		for (i = 0; i < NTARGETS_WIDE; i++) {
3015 			cp->cache[i] = SCSIPROBE_EXISTS;
3016 		}
3017 	}
3018 
3019 	mutex_exit(&sd_scsi_probe_cache_mutex);
3020 
3021 	/* Recompute the cache for this target if LUN zero */
3022 	if (lun == 0) {
3023 		cp->cache[tgt] = SCSIPROBE_EXISTS;
3024 	}
3025 
3026 	/* Don't probe if cache remembers a NORESP from a previous LUN. */
3027 	if (cp->cache[tgt] != SCSIPROBE_EXISTS) {
3028 		return (SCSIPROBE_NORESP);
3029 	}
3030 
3031 	/* Do the actual probe; save & return the result */
3032 	return (cp->cache[tgt] = scsi_probe(devp, waitfn));
3033 }
3034 
3035 
3036 /*
3037  *    Function: sd_scsi_target_lun_init
3038  *
3039  * Description: Initializes the attached lun chain mutex and head pointer.
3040  *
3041  *     Context: Kernel thread context
3042  */
3043 
3044 static void
3045 sd_scsi_target_lun_init(void)
3046 {
3047 	mutex_init(&sd_scsi_target_lun_mutex, NULL, MUTEX_DRIVER, NULL);
3048 	sd_scsi_target_lun_head = NULL;
3049 }
3050 
3051 
3052 /*
3053  *    Function: sd_scsi_target_lun_fini
3054  *
3055  * Description: Frees all resources associated with the attached lun
3056  *              chain
3057  *
3058  *     Context: Kernel thread context
3059  */
3060 
3061 static void
3062 sd_scsi_target_lun_fini(void)
3063 {
3064 	struct sd_scsi_hba_tgt_lun	*cp;
3065 	struct sd_scsi_hba_tgt_lun	*ncp;
3066 
3067 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = ncp) {
3068 		ncp = cp->next;
3069 		kmem_free(cp, sizeof (struct sd_scsi_hba_tgt_lun));
3070 	}
3071 	sd_scsi_target_lun_head = NULL;
3072 	mutex_destroy(&sd_scsi_target_lun_mutex);
3073 }
3074 
3075 
3076 /*
3077  *    Function: sd_scsi_get_target_lun_count
3078  *
3079  * Description: This routine will check in the attached lun chain to see
3080  *		how many luns are attached on the required SCSI controller
3081  *		and target. Currently, some capabilities like tagged queue
3082  *		are supported per target based by HBA. So all luns in a
3083  *		target have the same capabilities. Based on this assumption,
3084  *		sd should only set these capabilities once per target. This
3085  *		function is called when sd needs to decide how many luns
3086  *		already attached on a target.
3087  *
3088  *   Arguments: dip	- Pointer to the system's dev_info_t for the SCSI
3089  *			  controller device.
3090  *              target	- The target ID on the controller's SCSI bus.
3091  *
3092  * Return Code: The number of luns attached on the required target and
3093  *		controller.
3094  *		-1 if target ID is not in parallel SCSI scope or the given
3095  *		dip is not in the chain.
3096  *
3097  *     Context: Kernel thread context
3098  */
3099 
3100 static int
3101 sd_scsi_get_target_lun_count(dev_info_t *dip, int target)
3102 {
3103 	struct sd_scsi_hba_tgt_lun	*cp;
3104 
3105 	if ((target < 0) || (target >= NTARGETS_WIDE)) {
3106 		return (-1);
3107 	}
3108 
3109 	mutex_enter(&sd_scsi_target_lun_mutex);
3110 
3111 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3112 		if (cp->pdip == dip) {
3113 			break;
3114 		}
3115 	}
3116 
3117 	mutex_exit(&sd_scsi_target_lun_mutex);
3118 
3119 	if (cp == NULL) {
3120 		return (-1);
3121 	}
3122 
3123 	return (cp->nlun[target]);
3124 }
3125 
3126 
3127 /*
3128  *    Function: sd_scsi_update_lun_on_target
3129  *
3130  * Description: This routine is used to update the attached lun chain when a
3131  *		lun is attached or detached on a target.
3132  *
3133  *   Arguments: dip     - Pointer to the system's dev_info_t for the SCSI
3134  *                        controller device.
3135  *              target  - The target ID on the controller's SCSI bus.
3136  *		flag	- Indicate the lun is attached or detached.
3137  *
3138  *     Context: Kernel thread context
3139  */
3140 
3141 static void
3142 sd_scsi_update_lun_on_target(dev_info_t *dip, int target, int flag)
3143 {
3144 	struct sd_scsi_hba_tgt_lun	*cp;
3145 
3146 	mutex_enter(&sd_scsi_target_lun_mutex);
3147 
3148 	for (cp = sd_scsi_target_lun_head; cp != NULL; cp = cp->next) {
3149 		if (cp->pdip == dip) {
3150 			break;
3151 		}
3152 	}
3153 
3154 	if ((cp == NULL) && (flag == SD_SCSI_LUN_ATTACH)) {
3155 		cp = kmem_zalloc(sizeof (struct sd_scsi_hba_tgt_lun),
3156 		    KM_SLEEP);
3157 		cp->pdip = dip;
3158 		cp->next = sd_scsi_target_lun_head;
3159 		sd_scsi_target_lun_head = cp;
3160 	}
3161 
3162 	mutex_exit(&sd_scsi_target_lun_mutex);
3163 
3164 	if (cp != NULL) {
3165 		if (flag == SD_SCSI_LUN_ATTACH) {
3166 			cp->nlun[target] ++;
3167 		} else {
3168 			cp->nlun[target] --;
3169 		}
3170 	}
3171 }
3172 
3173 
3174 /*
3175  *    Function: sd_spin_up_unit
3176  *
3177  * Description: Issues the following commands to spin-up the device:
3178  *		START STOP UNIT, and INQUIRY.
3179  *
3180  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3181  *                      structure for this target.
3182  *
3183  * Return Code: 0 - success
3184  *		EIO - failure
3185  *		EACCES - reservation conflict
3186  *
3187  *     Context: Kernel thread context
3188  */
3189 
3190 static int
3191 sd_spin_up_unit(sd_ssc_t *ssc)
3192 {
3193 	size_t	resid		= 0;
3194 	int	has_conflict	= FALSE;
3195 	uchar_t *bufaddr;
3196 	int	status;
3197 	struct sd_lun	*un;
3198 
3199 	ASSERT(ssc != NULL);
3200 	un = ssc->ssc_un;
3201 	ASSERT(un != NULL);
3202 
3203 	/*
3204 	 * Send a throwaway START UNIT command.
3205 	 *
3206 	 * If we fail on this, we don't care presently what precisely
3207 	 * is wrong.  EMC's arrays will also fail this with a check
3208 	 * condition (0x2/0x4/0x3) if the device is "inactive," but
3209 	 * we don't want to fail the attach because it may become
3210 	 * "active" later.
3211 	 * We don't know if power condition is supported or not at
3212 	 * this stage, use START STOP bit.
3213 	 */
3214 	status = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
3215 	    SD_TARGET_START, SD_PATH_DIRECT);
3216 
3217 	if (status != 0) {
3218 		if (status == EACCES)
3219 			has_conflict = TRUE;
3220 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3221 	}
3222 
3223 	/*
3224 	 * Send another INQUIRY command to the target. This is necessary for
3225 	 * non-removable media direct access devices because their INQUIRY data
3226 	 * may not be fully qualified until they are spun up (perhaps via the
3227 	 * START command above).  Note: This seems to be needed for some
3228 	 * legacy devices only.) The INQUIRY command should succeed even if a
3229 	 * Reservation Conflict is present.
3230 	 */
3231 	bufaddr = kmem_zalloc(SUN_INQSIZE, KM_SLEEP);
3232 
3233 	if (sd_send_scsi_INQUIRY(ssc, bufaddr, SUN_INQSIZE, 0, 0, &resid)
3234 	    != 0) {
3235 		kmem_free(bufaddr, SUN_INQSIZE);
3236 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
3237 		return (EIO);
3238 	}
3239 
3240 	/*
3241 	 * If we got enough INQUIRY data, copy it over the old INQUIRY data.
3242 	 * Note that this routine does not return a failure here even if the
3243 	 * INQUIRY command did not return any data.  This is a legacy behavior.
3244 	 */
3245 	if ((SUN_INQSIZE - resid) >= SUN_MIN_INQLEN) {
3246 		bcopy(bufaddr, SD_INQUIRY(un), SUN_INQSIZE);
3247 	}
3248 
3249 	kmem_free(bufaddr, SUN_INQSIZE);
3250 
3251 	/* If we hit a reservation conflict above, tell the caller. */
3252 	if (has_conflict == TRUE) {
3253 		return (EACCES);
3254 	}
3255 
3256 	return (0);
3257 }
3258 
3259 #ifdef _LP64
3260 /*
3261  *    Function: sd_enable_descr_sense
3262  *
3263  * Description: This routine attempts to select descriptor sense format
3264  *		using the Control mode page.  Devices that support 64 bit
3265  *		LBAs (for >2TB luns) should also implement descriptor
3266  *		sense data so we will call this function whenever we see
3267  *		a lun larger than 2TB.  If for some reason the device
3268  *		supports 64 bit LBAs but doesn't support descriptor sense
3269  *		presumably the mode select will fail.  Everything will
3270  *		continue to work normally except that we will not get
3271  *		complete sense data for commands that fail with an LBA
3272  *		larger than 32 bits.
3273  *
3274  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3275  *                      structure for this target.
3276  *
3277  *     Context: Kernel thread context only
3278  */
3279 
3280 static void
3281 sd_enable_descr_sense(sd_ssc_t *ssc)
3282 {
3283 	uchar_t			*header;
3284 	struct mode_control_scsi3 *ctrl_bufp;
3285 	size_t			buflen;
3286 	size_t			bd_len;
3287 	int			status;
3288 	struct sd_lun		*un;
3289 
3290 	ASSERT(ssc != NULL);
3291 	un = ssc->ssc_un;
3292 	ASSERT(un != NULL);
3293 
3294 	/*
3295 	 * Read MODE SENSE page 0xA, Control Mode Page
3296 	 */
3297 	buflen = MODE_HEADER_LENGTH + MODE_BLK_DESC_LENGTH +
3298 	    sizeof (struct mode_control_scsi3);
3299 	header = kmem_zalloc(buflen, KM_SLEEP);
3300 
3301 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, header, buflen,
3302 	    MODEPAGE_CTRL_MODE, SD_PATH_DIRECT);
3303 
3304 	if (status != 0) {
3305 		SD_ERROR(SD_LOG_COMMON, un,
3306 		    "sd_enable_descr_sense: mode sense ctrl page failed\n");
3307 		goto eds_exit;
3308 	}
3309 
3310 	/*
3311 	 * Determine size of Block Descriptors in order to locate
3312 	 * the mode page data. ATAPI devices return 0, SCSI devices
3313 	 * should return MODE_BLK_DESC_LENGTH.
3314 	 */
3315 	bd_len  = ((struct mode_header *)header)->bdesc_length;
3316 
3317 	/* Clear the mode data length field for MODE SELECT */
3318 	((struct mode_header *)header)->length = 0;
3319 
3320 	ctrl_bufp = (struct mode_control_scsi3 *)
3321 	    (header + MODE_HEADER_LENGTH + bd_len);
3322 
3323 	/*
3324 	 * If the page length is smaller than the expected value,
3325 	 * the target device doesn't support D_SENSE. Bail out here.
3326 	 */
3327 	if (ctrl_bufp->mode_page.length <
3328 	    sizeof (struct mode_control_scsi3) - 2) {
3329 		SD_ERROR(SD_LOG_COMMON, un,
3330 		    "sd_enable_descr_sense: enable D_SENSE failed\n");
3331 		goto eds_exit;
3332 	}
3333 
3334 	/*
3335 	 * Clear PS bit for MODE SELECT
3336 	 */
3337 	ctrl_bufp->mode_page.ps = 0;
3338 
3339 	/*
3340 	 * Set D_SENSE to enable descriptor sense format.
3341 	 */
3342 	ctrl_bufp->d_sense = 1;
3343 
3344 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3345 
3346 	/*
3347 	 * Use MODE SELECT to commit the change to the D_SENSE bit
3348 	 */
3349 	status = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, header,
3350 	    buflen, SD_DONTSAVE_PAGE, SD_PATH_DIRECT);
3351 
3352 	if (status != 0) {
3353 		SD_INFO(SD_LOG_COMMON, un,
3354 		    "sd_enable_descr_sense: mode select ctrl page failed\n");
3355 	} else {
3356 		kmem_free(header, buflen);
3357 		return;
3358 	}
3359 
3360 eds_exit:
3361 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3362 	kmem_free(header, buflen);
3363 }
3364 
3365 /*
3366  *    Function: sd_reenable_dsense_task
3367  *
3368  * Description: Re-enable descriptor sense after device or bus reset
3369  *
3370  *     Context: Executes in a taskq() thread context
3371  */
3372 static void
3373 sd_reenable_dsense_task(void *arg)
3374 {
3375 	struct	sd_lun	*un = arg;
3376 	sd_ssc_t	*ssc;
3377 
3378 	ASSERT(un != NULL);
3379 
3380 	ssc = sd_ssc_init(un);
3381 	sd_enable_descr_sense(ssc);
3382 	sd_ssc_fini(ssc);
3383 }
3384 #endif /* _LP64 */
3385 
3386 /*
3387  *    Function: sd_set_mmc_caps
3388  *
3389  * Description: This routine determines if the device is MMC compliant and if
3390  *		the device supports CDDA via a mode sense of the CDVD
3391  *		capabilities mode page. Also checks if the device is a
3392  *		dvdram writable device.
3393  *
3394  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
3395  *                      structure for this target.
3396  *
3397  *     Context: Kernel thread context only
3398  */
3399 
3400 static void
3401 sd_set_mmc_caps(sd_ssc_t *ssc)
3402 {
3403 	struct mode_header_grp2		*sense_mhp;
3404 	uchar_t				*sense_page;
3405 	caddr_t				buf;
3406 	int				bd_len;
3407 	int				status;
3408 	struct uscsi_cmd		com;
3409 	int				rtn;
3410 	uchar_t				*out_data_rw, *out_data_hd;
3411 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3412 	uchar_t				*out_data_gesn;
3413 	int				gesn_len;
3414 	struct sd_lun			*un;
3415 
3416 	ASSERT(ssc != NULL);
3417 	un = ssc->ssc_un;
3418 	ASSERT(un != NULL);
3419 
3420 	/*
3421 	 * The flags which will be set in this function are - mmc compliant,
3422 	 * dvdram writable device, cdda support. Initialize them to FALSE
3423 	 * and if a capability is detected - it will be set to TRUE.
3424 	 */
3425 	un->un_f_mmc_cap = FALSE;
3426 	un->un_f_dvdram_writable_device = FALSE;
3427 	un->un_f_cfg_cdda = FALSE;
3428 
3429 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3430 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3431 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, SD_PATH_DIRECT);
3432 
3433 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3434 
3435 	if (status != 0) {
3436 		/* command failed; just return */
3437 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3438 		return;
3439 	}
3440 	/*
3441 	 * If the mode sense request for the CDROM CAPABILITIES
3442 	 * page (0x2A) succeeds the device is assumed to be MMC.
3443 	 */
3444 	un->un_f_mmc_cap = TRUE;
3445 
3446 	/* See if GET STATUS EVENT NOTIFICATION is supported */
3447 	if (un->un_f_mmc_gesn_polling) {
3448 		gesn_len = SD_GESN_HEADER_LEN + SD_GESN_MEDIA_DATA_LEN;
3449 		out_data_gesn = kmem_zalloc(gesn_len, KM_SLEEP);
3450 
3451 		rtn = sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(ssc,
3452 		    out_data_gesn, gesn_len, 1 << SD_GESN_MEDIA_CLASS);
3453 
3454 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3455 
3456 		if ((rtn != 0) || !sd_gesn_media_data_valid(out_data_gesn)) {
3457 			un->un_f_mmc_gesn_polling = FALSE;
3458 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3459 			    "sd_set_mmc_caps: gesn not supported "
3460 			    "%d %x %x %x %x\n", rtn,
3461 			    out_data_gesn[0], out_data_gesn[1],
3462 			    out_data_gesn[2], out_data_gesn[3]);
3463 		}
3464 
3465 		kmem_free(out_data_gesn, gesn_len);
3466 	}
3467 
3468 	/* Get to the page data */
3469 	sense_mhp = (struct mode_header_grp2 *)buf;
3470 	bd_len = (sense_mhp->bdesc_length_hi << 8) |
3471 	    sense_mhp->bdesc_length_lo;
3472 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3473 		/*
3474 		 * We did not get back the expected block descriptor
3475 		 * length so we cannot determine if the device supports
3476 		 * CDDA. However, we still indicate the device is MMC
3477 		 * according to the successful response to the page
3478 		 * 0x2A mode sense request.
3479 		 */
3480 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3481 		    "sd_set_mmc_caps: Mode Sense returned "
3482 		    "invalid block descriptor length\n");
3483 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3484 		return;
3485 	}
3486 
3487 	/* See if read CDDA is supported */
3488 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 +
3489 	    bd_len);
3490 	un->un_f_cfg_cdda = (sense_page[5] & 0x01) ? TRUE : FALSE;
3491 
3492 	/* See if writing DVD RAM is supported. */
3493 	un->un_f_dvdram_writable_device = (sense_page[3] & 0x20) ? TRUE : FALSE;
3494 	if (un->un_f_dvdram_writable_device == TRUE) {
3495 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3496 		return;
3497 	}
3498 
3499 	/*
3500 	 * If the device presents DVD or CD capabilities in the mode
3501 	 * page, we can return here since a RRD will not have
3502 	 * these capabilities.
3503 	 */
3504 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3505 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3506 		return;
3507 	}
3508 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3509 
3510 	/*
3511 	 * If un->un_f_dvdram_writable_device is still FALSE,
3512 	 * check for a Removable Rigid Disk (RRD).  A RRD
3513 	 * device is identified by the features RANDOM_WRITABLE and
3514 	 * HARDWARE_DEFECT_MANAGEMENT.
3515 	 */
3516 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3517 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3518 
3519 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3520 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3521 	    RANDOM_WRITABLE, SD_PATH_STANDARD);
3522 
3523 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3524 
3525 	if (rtn != 0) {
3526 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3527 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3528 		return;
3529 	}
3530 
3531 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3532 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3533 
3534 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3535 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3536 	    HARDWARE_DEFECT_MANAGEMENT, SD_PATH_STANDARD);
3537 
3538 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3539 
3540 	if (rtn == 0) {
3541 		/*
3542 		 * We have good information, check for random writable
3543 		 * and hardware defect features.
3544 		 */
3545 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3546 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT)) {
3547 			un->un_f_dvdram_writable_device = TRUE;
3548 		}
3549 	}
3550 
3551 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3552 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3553 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3554 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3555 }
3556 
3557 /*
3558  *    Function: sd_check_for_writable_cd
3559  *
3560  * Description: This routine determines if the media in the device is
3561  *		writable or not. It uses the get configuration command (0x46)
3562  *		to determine if the media is writable
3563  *
3564  *   Arguments: un - driver soft state (unit) structure
3565  *              path_flag - SD_PATH_DIRECT to use the USCSI "direct"
3566  *                           chain and the normal command waitq, or
3567  *                           SD_PATH_DIRECT_PRIORITY to use the USCSI
3568  *                           "direct" chain and bypass the normal command
3569  *                           waitq.
3570  *
3571  *     Context: Never called at interrupt context.
3572  */
3573 
3574 static void
3575 sd_check_for_writable_cd(sd_ssc_t *ssc, int path_flag)
3576 {
3577 	struct uscsi_cmd		com;
3578 	uchar_t				*out_data;
3579 	uchar_t				*rqbuf;
3580 	int				rtn;
3581 	uchar_t				*out_data_rw, *out_data_hd;
3582 	uchar_t				*rqbuf_rw, *rqbuf_hd;
3583 	struct mode_header_grp2		*sense_mhp;
3584 	uchar_t				*sense_page;
3585 	caddr_t				buf;
3586 	int				bd_len;
3587 	int				status;
3588 	struct sd_lun			*un;
3589 
3590 	ASSERT(ssc != NULL);
3591 	un = ssc->ssc_un;
3592 	ASSERT(un != NULL);
3593 	ASSERT(mutex_owned(SD_MUTEX(un)));
3594 
3595 	/*
3596 	 * Initialize the writable media to false, if configuration info.
3597 	 * tells us otherwise then only we will set it.
3598 	 */
3599 	un->un_f_mmc_writable_media = FALSE;
3600 	mutex_exit(SD_MUTEX(un));
3601 
3602 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
3603 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3604 
3605 	rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf, SENSE_LENGTH,
3606 	    out_data, SD_PROFILE_HEADER_LEN, path_flag);
3607 
3608 	if (rtn != 0)
3609 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3610 
3611 	mutex_enter(SD_MUTEX(un));
3612 	if (rtn == 0) {
3613 		/*
3614 		 * We have good information, check for writable DVD.
3615 		 */
3616 		if ((out_data[6] == 0) && (out_data[7] == 0x12)) {
3617 			un->un_f_mmc_writable_media = TRUE;
3618 			kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3619 			kmem_free(rqbuf, SENSE_LENGTH);
3620 			return;
3621 		}
3622 	}
3623 
3624 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
3625 	kmem_free(rqbuf, SENSE_LENGTH);
3626 
3627 	/*
3628 	 * Determine if this is a RRD type device.
3629 	 */
3630 	mutex_exit(SD_MUTEX(un));
3631 	buf = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
3632 	status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, (uchar_t *)buf,
3633 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP, path_flag);
3634 
3635 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3636 
3637 	mutex_enter(SD_MUTEX(un));
3638 	if (status != 0) {
3639 		/* command failed; just return */
3640 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3641 		return;
3642 	}
3643 
3644 	/* Get to the page data */
3645 	sense_mhp = (struct mode_header_grp2 *)buf;
3646 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
3647 	if (bd_len > MODE_BLK_DESC_LENGTH) {
3648 		/*
3649 		 * We did not get back the expected block descriptor length so
3650 		 * we cannot check the mode page.
3651 		 */
3652 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3653 		    "sd_check_for_writable_cd: Mode Sense returned "
3654 		    "invalid block descriptor length\n");
3655 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3656 		return;
3657 	}
3658 
3659 	/*
3660 	 * If the device presents DVD or CD capabilities in the mode
3661 	 * page, we can return here since a RRD device will not have
3662 	 * these capabilities.
3663 	 */
3664 	sense_page = (uchar_t *)(buf + MODE_HEADER_LENGTH_GRP2 + bd_len);
3665 	if ((sense_page[2] & 0x3f) || (sense_page[3] & 0x3f)) {
3666 		kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3667 		return;
3668 	}
3669 	kmem_free(buf, BUFLEN_MODE_CDROM_CAP);
3670 
3671 	/*
3672 	 * If un->un_f_mmc_writable_media is still FALSE,
3673 	 * check for RRD type media.  A RRD device is identified
3674 	 * by the features RANDOM_WRITABLE and HARDWARE_DEFECT_MANAGEMENT.
3675 	 */
3676 	mutex_exit(SD_MUTEX(un));
3677 	out_data_rw = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3678 	rqbuf_rw = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3679 
3680 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_rw,
3681 	    SENSE_LENGTH, out_data_rw, SD_CURRENT_FEATURE_LEN,
3682 	    RANDOM_WRITABLE, path_flag);
3683 
3684 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3685 	if (rtn != 0) {
3686 		kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3687 		kmem_free(rqbuf_rw, SENSE_LENGTH);
3688 		mutex_enter(SD_MUTEX(un));
3689 		return;
3690 	}
3691 
3692 	out_data_hd = kmem_zalloc(SD_CURRENT_FEATURE_LEN, KM_SLEEP);
3693 	rqbuf_hd = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
3694 
3695 	rtn = sd_send_scsi_feature_GET_CONFIGURATION(ssc, &com, rqbuf_hd,
3696 	    SENSE_LENGTH, out_data_hd, SD_CURRENT_FEATURE_LEN,
3697 	    HARDWARE_DEFECT_MANAGEMENT, path_flag);
3698 
3699 	sd_ssc_assessment(ssc, SD_FMT_IGNORE);
3700 	mutex_enter(SD_MUTEX(un));
3701 	if (rtn == 0) {
3702 		/*
3703 		 * We have good information, check for random writable
3704 		 * and hardware defect features as current.
3705 		 */
3706 		if ((out_data_rw[9] & RANDOM_WRITABLE) &&
3707 		    (out_data_rw[10] & 0x1) &&
3708 		    (out_data_hd[9] & HARDWARE_DEFECT_MANAGEMENT) &&
3709 		    (out_data_hd[10] & 0x1)) {
3710 			un->un_f_mmc_writable_media = TRUE;
3711 		}
3712 	}
3713 
3714 	kmem_free(out_data_rw, SD_CURRENT_FEATURE_LEN);
3715 	kmem_free(rqbuf_rw, SENSE_LENGTH);
3716 	kmem_free(out_data_hd, SD_CURRENT_FEATURE_LEN);
3717 	kmem_free(rqbuf_hd, SENSE_LENGTH);
3718 }
3719 
3720 /*
3721  *    Function: sd_read_unit_properties
3722  *
3723  * Description: The following implements a property lookup mechanism.
3724  *		Properties for particular disks (keyed on vendor, model
3725  *		and rev numbers) are sought in the sd.conf file via
3726  *		sd_process_sdconf_file(), and if not found there, are
3727  *		looked for in a list hardcoded in this driver via
3728  *		sd_process_sdconf_table() Once located the properties
3729  *		are used to update the driver unit structure.
3730  *
3731  *   Arguments: un - driver soft state (unit) structure
3732  */
3733 
3734 static void
3735 sd_read_unit_properties(struct sd_lun *un)
3736 {
3737 	/*
3738 	 * sd_process_sdconf_file returns SD_FAILURE if it cannot find
3739 	 * the "sd-config-list" property (from the sd.conf file) or if
3740 	 * there was not a match for the inquiry vid/pid. If this event
3741 	 * occurs the static driver configuration table is searched for
3742 	 * a match.
3743 	 */
3744 	ASSERT(un != NULL);
3745 	if (sd_process_sdconf_file(un) == SD_FAILURE) {
3746 		sd_process_sdconf_table(un);
3747 	}
3748 
3749 	/* check for LSI device */
3750 	sd_is_lsi(un);
3751 
3752 
3753 }
3754 
3755 
3756 /*
3757  *    Function: sd_process_sdconf_file
3758  *
3759  * Description: Use ddi_prop_lookup(9F) to obtain the properties from the
3760  *		driver's config file (ie, sd.conf) and update the driver
3761  *		soft state structure accordingly.
3762  *
3763  *   Arguments: un - driver soft state (unit) structure
3764  *
3765  * Return Code: SD_SUCCESS - The properties were successfully set according
3766  *			     to the driver configuration file.
3767  *		SD_FAILURE - The driver config list was not obtained or
3768  *			     there was no vid/pid match. This indicates that
3769  *			     the static config table should be used.
3770  *
3771  * The config file has a property, "sd-config-list". Currently we support
3772  * two kinds of formats. For both formats, the value of this property
3773  * is a list of duplets:
3774  *
3775  *  sd-config-list=
3776  *	<duplet>,
3777  *	[,<duplet>]*;
3778  *
3779  * For the improved format, where
3780  *
3781  *     <duplet>:= "<vid+pid>","<tunable-list>"
3782  *
3783  * and
3784  *
3785  *     <tunable-list>:=   <tunable> [, <tunable> ]*;
3786  *     <tunable> =        <name> : <value>
3787  *
3788  * The <vid+pid> is the string that is returned by the target device on a
3789  * SCSI inquiry command, the <tunable-list> contains one or more tunables
3790  * to apply to all target devices with the specified <vid+pid>.
3791  *
3792  * Each <tunable> is a "<name> : <value>" pair.
3793  *
3794  * For the old format, the structure of each duplet is as follows:
3795  *
3796  *  <duplet>:= "<vid+pid>","<data-property-name_list>"
3797  *
3798  * The first entry of the duplet is the device ID string (the concatenated
3799  * vid & pid; not to be confused with a device_id).  This is defined in
3800  * the same way as in the sd_disk_table.
3801  *
3802  * The second part of the duplet is a string that identifies a
3803  * data-property-name-list. The data-property-name-list is defined as
3804  * follows:
3805  *
3806  *  <data-property-name-list>:=<data-property-name> [<data-property-name>]
3807  *
3808  * The syntax of <data-property-name> depends on the <version> field.
3809  *
3810  * If version = SD_CONF_VERSION_1 we have the following syntax:
3811  *
3812  *	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
3813  *
3814  * where the prop0 value will be used to set prop0 if bit0 set in the
3815  * flags, prop1 if bit1 set, etc. and N = SD_CONF_MAX_ITEMS -1
3816  *
3817  */
3818 
3819 static int
3820 sd_process_sdconf_file(struct sd_lun *un)
3821 {
3822 	char	**config_list = NULL;
3823 	uint_t	nelements;
3824 	char	*vidptr;
3825 	int	vidlen;
3826 	char	*dnlist_ptr;
3827 	char	*dataname_ptr;
3828 	char	*dataname_lasts;
3829 	int	*data_list = NULL;
3830 	uint_t	data_list_len;
3831 	int	rval = SD_FAILURE;
3832 	int	i;
3833 
3834 	ASSERT(un != NULL);
3835 
3836 	/* Obtain the configuration list associated with the .conf file */
3837 	if (ddi_prop_lookup_string_array(DDI_DEV_T_ANY, SD_DEVINFO(un),
3838 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, sd_config_list,
3839 	    &config_list, &nelements) != DDI_PROP_SUCCESS) {
3840 		return (SD_FAILURE);
3841 	}
3842 
3843 	/*
3844 	 * Compare vids in each duplet to the inquiry vid - if a match is
3845 	 * made, get the data value and update the soft state structure
3846 	 * accordingly.
3847 	 *
3848 	 * Each duplet should show as a pair of strings, return SD_FAILURE
3849 	 * otherwise.
3850 	 */
3851 	if (nelements & 1) {
3852 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
3853 		    "sd-config-list should show as pairs of strings.\n");
3854 		if (config_list)
3855 			ddi_prop_free(config_list);
3856 		return (SD_FAILURE);
3857 	}
3858 
3859 	for (i = 0; i < nelements; i += 2) {
3860 		/*
3861 		 * Note: The assumption here is that each vid entry is on
3862 		 * a unique line from its associated duplet.
3863 		 */
3864 		vidptr = config_list[i];
3865 		vidlen = (int)strlen(vidptr);
3866 		if (sd_sdconf_id_match(un, vidptr, vidlen) != SD_SUCCESS) {
3867 			continue;
3868 		}
3869 
3870 		/*
3871 		 * dnlist contains 1 or more blank separated
3872 		 * data-property-name entries
3873 		 */
3874 		dnlist_ptr = config_list[i + 1];
3875 
3876 		if (strchr(dnlist_ptr, ':') != NULL) {
3877 			/*
3878 			 * Decode the improved format sd-config-list.
3879 			 */
3880 			sd_nvpair_str_decode(un, dnlist_ptr);
3881 		} else {
3882 			/*
3883 			 * The old format sd-config-list, loop through all
3884 			 * data-property-name entries in the
3885 			 * data-property-name-list
3886 			 * setting the properties for each.
3887 			 */
3888 			for (dataname_ptr = sd_strtok_r(dnlist_ptr, " \t",
3889 			    &dataname_lasts); dataname_ptr != NULL;
3890 			    dataname_ptr = sd_strtok_r(NULL, " \t",
3891 			    &dataname_lasts)) {
3892 				int version;
3893 
3894 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
3895 				    "sd_process_sdconf_file: disk:%s, "
3896 				    "data:%s\n", vidptr, dataname_ptr);
3897 
3898 				/* Get the data list */
3899 				if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY,
3900 				    SD_DEVINFO(un), 0, dataname_ptr, &data_list,
3901 				    &data_list_len) != DDI_PROP_SUCCESS) {
3902 					SD_INFO(SD_LOG_ATTACH_DETACH, un,
3903 					    "sd_process_sdconf_file: data "
3904 					    "property (%s) has no value\n",
3905 					    dataname_ptr);
3906 					continue;
3907 				}
3908 
3909 				version = data_list[0];
3910 
3911 				if (version == SD_CONF_VERSION_1) {
3912 					sd_tunables values;
3913 
3914 					/* Set the properties */
3915 					if (sd_chk_vers1_data(un, data_list[1],
3916 					    &data_list[2], data_list_len,
3917 					    dataname_ptr) == SD_SUCCESS) {
3918 						sd_get_tunables_from_conf(un,
3919 						    data_list[1], &data_list[2],
3920 						    &values);
3921 						sd_set_vers1_properties(un,
3922 						    data_list[1], &values);
3923 						rval = SD_SUCCESS;
3924 					} else {
3925 						rval = SD_FAILURE;
3926 					}
3927 				} else {
3928 					scsi_log(SD_DEVINFO(un), sd_label,
3929 					    CE_WARN, "data property %s version "
3930 					    "0x%x is invalid.",
3931 					    dataname_ptr, version);
3932 					rval = SD_FAILURE;
3933 				}
3934 				if (data_list)
3935 					ddi_prop_free(data_list);
3936 			}
3937 		}
3938 	}
3939 
3940 	/* free up the memory allocated by ddi_prop_lookup_string_array(). */
3941 	if (config_list) {
3942 		ddi_prop_free(config_list);
3943 	}
3944 
3945 	return (rval);
3946 }
3947 
3948 /*
3949  *    Function: sd_nvpair_str_decode()
3950  *
3951  * Description: Parse the improved format sd-config-list to get
3952  *    each entry of tunable, which includes a name-value pair.
3953  *    Then call sd_set_properties() to set the property.
3954  *
3955  *   Arguments: un - driver soft state (unit) structure
3956  *    nvpair_str - the tunable list
3957  */
3958 static void
3959 sd_nvpair_str_decode(struct sd_lun *un, char *nvpair_str)
3960 {
3961 	char	*nv, *name, *value, *token;
3962 	char	*nv_lasts, *v_lasts, *x_lasts;
3963 
3964 	for (nv = sd_strtok_r(nvpair_str, ",", &nv_lasts); nv != NULL;
3965 	    nv = sd_strtok_r(NULL, ",", &nv_lasts)) {
3966 		token = sd_strtok_r(nv, ":", &v_lasts);
3967 		name  = sd_strtok_r(token, " \t", &x_lasts);
3968 		token = sd_strtok_r(NULL, ":", &v_lasts);
3969 		value = sd_strtok_r(token, " \t", &x_lasts);
3970 		if (name == NULL || value == NULL) {
3971 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
3972 			    "sd_nvpair_str_decode: "
3973 			    "name or value is not valid!\n");
3974 		} else {
3975 			sd_set_properties(un, name, value);
3976 		}
3977 	}
3978 }
3979 
3980 /*
3981  *    Function: sd_strtok_r()
3982  *
3983  * Description: This function uses strpbrk and strspn to break
3984  *    string into tokens on sequentially subsequent calls. Return
3985  *    NULL when no non-separator characters remain. The first
3986  *    argument is NULL for subsequent calls.
3987  */
3988 static char *
3989 sd_strtok_r(char *string, const char *sepset, char **lasts)
3990 {
3991 	char	*q, *r;
3992 
3993 	/* First or subsequent call */
3994 	if (string == NULL)
3995 		string = *lasts;
3996 
3997 	if (string == NULL)
3998 		return (NULL);
3999 
4000 	/* Skip leading separators */
4001 	q = string + strspn(string, sepset);
4002 
4003 	if (*q == '\0')
4004 		return (NULL);
4005 
4006 	if ((r = strpbrk(q, sepset)) == NULL) {
4007 		*lasts = NULL;
4008 	} else {
4009 		*r = '\0';
4010 		*lasts = r + 1;
4011 	}
4012 	return (q);
4013 }
4014 
4015 /*
4016  *    Function: sd_set_properties()
4017  *
4018  * Description: Set device properties based on the improved
4019  *    format sd-config-list.
4020  *
4021  *   Arguments: un - driver soft state (unit) structure
4022  *    name  - supported tunable name
4023  *    value - tunable value
4024  */
4025 static void
4026 sd_set_properties(struct sd_lun *un, char *name, char *value)
4027 {
4028 	char	*endptr = NULL;
4029 	long	val = 0;
4030 
4031 	if (strcasecmp(name, "cache-nonvolatile") == 0) {
4032 		if (strcasecmp(value, "true") == 0) {
4033 			un->un_f_suppress_cache_flush = TRUE;
4034 		} else if (strcasecmp(value, "false") == 0) {
4035 			un->un_f_suppress_cache_flush = FALSE;
4036 		} else {
4037 			goto value_invalid;
4038 		}
4039 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4040 		    "suppress_cache_flush flag set to %d\n",
4041 		    un->un_f_suppress_cache_flush);
4042 		return;
4043 	}
4044 
4045 	if (strcasecmp(name, "controller-type") == 0) {
4046 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4047 			un->un_ctype = val;
4048 		} else {
4049 			goto value_invalid;
4050 		}
4051 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4052 		    "ctype set to %d\n", un->un_ctype);
4053 		return;
4054 	}
4055 
4056 	if (strcasecmp(name, "delay-busy") == 0) {
4057 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4058 			un->un_busy_timeout = drv_usectohz(val / 1000);
4059 		} else {
4060 			goto value_invalid;
4061 		}
4062 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4063 		    "busy_timeout set to %d\n", un->un_busy_timeout);
4064 		return;
4065 	}
4066 
4067 	if (strcasecmp(name, "disksort") == 0) {
4068 		if (strcasecmp(value, "true") == 0) {
4069 			un->un_f_disksort_disabled = FALSE;
4070 		} else if (strcasecmp(value, "false") == 0) {
4071 			un->un_f_disksort_disabled = TRUE;
4072 		} else {
4073 			goto value_invalid;
4074 		}
4075 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4076 		    "disksort disabled flag set to %d\n",
4077 		    un->un_f_disksort_disabled);
4078 		return;
4079 	}
4080 
4081 	if (strcasecmp(name, "power-condition") == 0) {
4082 		if (strcasecmp(value, "true") == 0) {
4083 			un->un_f_power_condition_disabled = FALSE;
4084 		} else if (strcasecmp(value, "false") == 0) {
4085 			un->un_f_power_condition_disabled = TRUE;
4086 		} else {
4087 			goto value_invalid;
4088 		}
4089 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4090 		    "power condition disabled flag set to %d\n",
4091 		    un->un_f_power_condition_disabled);
4092 		return;
4093 	}
4094 
4095 	if (strcasecmp(name, "timeout-releasereservation") == 0) {
4096 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4097 			un->un_reserve_release_time = val;
4098 		} else {
4099 			goto value_invalid;
4100 		}
4101 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4102 		    "reservation release timeout set to %d\n",
4103 		    un->un_reserve_release_time);
4104 		return;
4105 	}
4106 
4107 	if (strcasecmp(name, "reset-lun") == 0) {
4108 		if (strcasecmp(value, "true") == 0) {
4109 			un->un_f_lun_reset_enabled = TRUE;
4110 		} else if (strcasecmp(value, "false") == 0) {
4111 			un->un_f_lun_reset_enabled = FALSE;
4112 		} else {
4113 			goto value_invalid;
4114 		}
4115 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4116 		    "lun reset enabled flag set to %d\n",
4117 		    un->un_f_lun_reset_enabled);
4118 		return;
4119 	}
4120 
4121 	if (strcasecmp(name, "retries-busy") == 0) {
4122 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4123 			un->un_busy_retry_count = val;
4124 		} else {
4125 			goto value_invalid;
4126 		}
4127 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4128 		    "busy retry count set to %d\n", un->un_busy_retry_count);
4129 		return;
4130 	}
4131 
4132 	if (strcasecmp(name, "retries-timeout") == 0) {
4133 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4134 			un->un_retry_count = val;
4135 		} else {
4136 			goto value_invalid;
4137 		}
4138 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4139 		    "timeout retry count set to %d\n", un->un_retry_count);
4140 		return;
4141 	}
4142 
4143 	if (strcasecmp(name, "retries-notready") == 0) {
4144 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4145 			un->un_notready_retry_count = val;
4146 		} else {
4147 			goto value_invalid;
4148 		}
4149 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4150 		    "notready retry count set to %d\n",
4151 		    un->un_notready_retry_count);
4152 		return;
4153 	}
4154 
4155 	if (strcasecmp(name, "retries-reset") == 0) {
4156 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4157 			un->un_reset_retry_count = val;
4158 		} else {
4159 			goto value_invalid;
4160 		}
4161 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4162 		    "reset retry count set to %d\n",
4163 		    un->un_reset_retry_count);
4164 		return;
4165 	}
4166 
4167 	if (strcasecmp(name, "throttle-max") == 0) {
4168 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4169 			un->un_saved_throttle = un->un_throttle = val;
4170 		} else {
4171 			goto value_invalid;
4172 		}
4173 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4174 		    "throttle set to %d\n", un->un_throttle);
4175 	}
4176 
4177 	if (strcasecmp(name, "throttle-min") == 0) {
4178 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4179 			un->un_min_throttle = val;
4180 		} else {
4181 			goto value_invalid;
4182 		}
4183 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4184 		    "min throttle set to %d\n", un->un_min_throttle);
4185 	}
4186 
4187 	if (strcasecmp(name, "rmw-type") == 0) {
4188 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4189 			un->un_f_rmw_type = val;
4190 		} else {
4191 			goto value_invalid;
4192 		}
4193 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4194 		    "RMW type set to %d\n", un->un_f_rmw_type);
4195 	}
4196 
4197 	if (strcasecmp(name, "physical-block-size") == 0) {
4198 		if (ddi_strtol(value, &endptr, 0, &val) == 0 &&
4199 		    ISP2(val) && val >= un->un_tgt_blocksize &&
4200 		    val >= un->un_sys_blocksize) {
4201 			un->un_phy_blocksize = val;
4202 		} else {
4203 			goto value_invalid;
4204 		}
4205 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4206 		    "physical block size set to %d\n", un->un_phy_blocksize);
4207 	}
4208 
4209 	if (strcasecmp(name, "retries-victim") == 0) {
4210 		if (ddi_strtol(value, &endptr, 0, &val) == 0) {
4211 			un->un_victim_retry_count = val;
4212 		} else {
4213 			goto value_invalid;
4214 		}
4215 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4216 		    "victim retry count set to %d\n",
4217 		    un->un_victim_retry_count);
4218 		return;
4219 	}
4220 
4221 	/*
4222 	 * Validate the throttle values.
4223 	 * If any of the numbers are invalid, set everything to defaults.
4224 	 */
4225 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4226 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4227 	    (un->un_min_throttle > un->un_throttle)) {
4228 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4229 		un->un_min_throttle = sd_min_throttle;
4230 	}
4231 
4232 	if (strcasecmp(name, "mmc-gesn-polling") == 0) {
4233 		if (strcasecmp(value, "true") == 0) {
4234 			un->un_f_mmc_gesn_polling = TRUE;
4235 		} else if (strcasecmp(value, "false") == 0) {
4236 			un->un_f_mmc_gesn_polling = FALSE;
4237 		} else {
4238 			goto value_invalid;
4239 		}
4240 		SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4241 		    "mmc-gesn-polling set to %d\n",
4242 		    un->un_f_mmc_gesn_polling);
4243 	}
4244 
4245 	return;
4246 
4247 value_invalid:
4248 	SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_set_properties: "
4249 	    "value of prop %s is invalid\n", name);
4250 }
4251 
4252 /*
4253  *    Function: sd_get_tunables_from_conf()
4254  *
4255  *
4256  *    This function reads the data list from the sd.conf file and pulls
4257  *    the values that can have numeric values as arguments and places
4258  *    the values in the appropriate sd_tunables member.
4259  *    Since the order of the data list members varies across platforms
4260  *    This function reads them from the data list in a platform specific
4261  *    order and places them into the correct sd_tunable member that is
4262  *    consistent across all platforms.
4263  */
4264 static void
4265 sd_get_tunables_from_conf(struct sd_lun *un, int flags, int *data_list,
4266     sd_tunables *values)
4267 {
4268 	int i;
4269 	int mask;
4270 
4271 	bzero(values, sizeof (sd_tunables));
4272 
4273 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4274 
4275 		mask = 1 << i;
4276 		if (mask > flags) {
4277 			break;
4278 		}
4279 
4280 		switch (mask & flags) {
4281 		case 0:	/* This mask bit not set in flags */
4282 			continue;
4283 		case SD_CONF_BSET_THROTTLE:
4284 			values->sdt_throttle = data_list[i];
4285 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4286 			    "sd_get_tunables_from_conf: throttle = %d\n",
4287 			    values->sdt_throttle);
4288 			break;
4289 		case SD_CONF_BSET_CTYPE:
4290 			values->sdt_ctype = data_list[i];
4291 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4292 			    "sd_get_tunables_from_conf: ctype = %d\n",
4293 			    values->sdt_ctype);
4294 			break;
4295 		case SD_CONF_BSET_NRR_COUNT:
4296 			values->sdt_not_rdy_retries = data_list[i];
4297 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4298 			    "sd_get_tunables_from_conf: not_rdy_retries = %d\n",
4299 			    values->sdt_not_rdy_retries);
4300 			break;
4301 		case SD_CONF_BSET_BSY_RETRY_COUNT:
4302 			values->sdt_busy_retries = data_list[i];
4303 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4304 			    "sd_get_tunables_from_conf: busy_retries = %d\n",
4305 			    values->sdt_busy_retries);
4306 			break;
4307 		case SD_CONF_BSET_RST_RETRIES:
4308 			values->sdt_reset_retries = data_list[i];
4309 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4310 			    "sd_get_tunables_from_conf: reset_retries = %d\n",
4311 			    values->sdt_reset_retries);
4312 			break;
4313 		case SD_CONF_BSET_RSV_REL_TIME:
4314 			values->sdt_reserv_rel_time = data_list[i];
4315 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4316 			    "sd_get_tunables_from_conf: reserv_rel_time = %d\n",
4317 			    values->sdt_reserv_rel_time);
4318 			break;
4319 		case SD_CONF_BSET_MIN_THROTTLE:
4320 			values->sdt_min_throttle = data_list[i];
4321 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4322 			    "sd_get_tunables_from_conf: min_throttle = %d\n",
4323 			    values->sdt_min_throttle);
4324 			break;
4325 		case SD_CONF_BSET_DISKSORT_DISABLED:
4326 			values->sdt_disk_sort_dis = data_list[i];
4327 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4328 			    "sd_get_tunables_from_conf: disk_sort_dis = %d\n",
4329 			    values->sdt_disk_sort_dis);
4330 			break;
4331 		case SD_CONF_BSET_LUN_RESET_ENABLED:
4332 			values->sdt_lun_reset_enable = data_list[i];
4333 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4334 			    "sd_get_tunables_from_conf: lun_reset_enable = %d"
4335 			    "\n", values->sdt_lun_reset_enable);
4336 			break;
4337 		case SD_CONF_BSET_CACHE_IS_NV:
4338 			values->sdt_suppress_cache_flush = data_list[i];
4339 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4340 			    "sd_get_tunables_from_conf: \
4341 			    suppress_cache_flush = %d"
4342 			    "\n", values->sdt_suppress_cache_flush);
4343 			break;
4344 		case SD_CONF_BSET_PC_DISABLED:
4345 			values->sdt_disk_sort_dis = data_list[i];
4346 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4347 			    "sd_get_tunables_from_conf: power_condition_dis = "
4348 			    "%d\n", values->sdt_power_condition_dis);
4349 			break;
4350 		}
4351 	}
4352 }
4353 
4354 /*
4355  *    Function: sd_process_sdconf_table
4356  *
4357  * Description: Search the static configuration table for a match on the
4358  *		inquiry vid/pid and update the driver soft state structure
4359  *		according to the table property values for the device.
4360  *
4361  *		The form of a configuration table entry is:
4362  *		  <vid+pid>,<flags>,<property-data>
4363  *		  "SEAGATE ST42400N",1,0x40000,
4364  *		  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1;
4365  *
4366  *   Arguments: un - driver soft state (unit) structure
4367  */
4368 
4369 static void
4370 sd_process_sdconf_table(struct sd_lun *un)
4371 {
4372 	char	*id = NULL;
4373 	int	table_index;
4374 	int	idlen;
4375 
4376 	ASSERT(un != NULL);
4377 	for (table_index = 0; table_index < sd_disk_table_size;
4378 	    table_index++) {
4379 		id = sd_disk_table[table_index].device_id;
4380 		idlen = strlen(id);
4381 
4382 		/*
4383 		 * The static configuration table currently does not
4384 		 * implement version 10 properties. Additionally,
4385 		 * multiple data-property-name entries are not
4386 		 * implemented in the static configuration table.
4387 		 */
4388 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4389 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4390 			    "sd_process_sdconf_table: disk %s\n", id);
4391 			sd_set_vers1_properties(un,
4392 			    sd_disk_table[table_index].flags,
4393 			    sd_disk_table[table_index].properties);
4394 			break;
4395 		}
4396 	}
4397 }
4398 
4399 
4400 /*
4401  *    Function: sd_sdconf_id_match
4402  *
4403  * Description: This local function implements a case sensitive vid/pid
4404  *		comparison as well as the boundary cases of wild card and
4405  *		multiple blanks.
4406  *
4407  *		Note: An implicit assumption made here is that the scsi
4408  *		inquiry structure will always keep the vid, pid and
4409  *		revision strings in consecutive sequence, so they can be
4410  *		read as a single string. If this assumption is not the
4411  *		case, a separate string, to be used for the check, needs
4412  *		to be built with these strings concatenated.
4413  *
4414  *   Arguments: un - driver soft state (unit) structure
4415  *		id - table or config file vid/pid
4416  *		idlen  - length of the vid/pid (bytes)
4417  *
4418  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4419  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4420  */
4421 
4422 static int
4423 sd_sdconf_id_match(struct sd_lun *un, char *id, int idlen)
4424 {
4425 	struct scsi_inquiry	*sd_inq;
4426 	int			rval = SD_SUCCESS;
4427 
4428 	ASSERT(un != NULL);
4429 	sd_inq = un->un_sd->sd_inq;
4430 	ASSERT(id != NULL);
4431 
4432 	/*
4433 	 * We use the inq_vid as a pointer to a buffer containing the
4434 	 * vid and pid and use the entire vid/pid length of the table
4435 	 * entry for the comparison. This works because the inq_pid
4436 	 * data member follows inq_vid in the scsi_inquiry structure.
4437 	 */
4438 	if (strncasecmp(sd_inq->inq_vid, id, idlen) != 0) {
4439 		/*
4440 		 * The user id string is compared to the inquiry vid/pid
4441 		 * using a case insensitive comparison and ignoring
4442 		 * multiple spaces.
4443 		 */
4444 		rval = sd_blank_cmp(un, id, idlen);
4445 		if (rval != SD_SUCCESS) {
4446 			/*
4447 			 * User id strings that start and end with a "*"
4448 			 * are a special case. These do not have a
4449 			 * specific vendor, and the product string can
4450 			 * appear anywhere in the 16 byte PID portion of
4451 			 * the inquiry data. This is a simple strstr()
4452 			 * type search for the user id in the inquiry data.
4453 			 */
4454 			if ((id[0] == '*') && (id[idlen - 1] == '*')) {
4455 				char	*pidptr = &id[1];
4456 				int	i;
4457 				int	j;
4458 				int	pidstrlen = idlen - 2;
4459 				j = sizeof (SD_INQUIRY(un)->inq_pid) -
4460 				    pidstrlen;
4461 
4462 				if (j < 0) {
4463 					return (SD_FAILURE);
4464 				}
4465 				for (i = 0; i < j; i++) {
4466 					if (bcmp(&SD_INQUIRY(un)->inq_pid[i],
4467 					    pidptr, pidstrlen) == 0) {
4468 						rval = SD_SUCCESS;
4469 						break;
4470 					}
4471 				}
4472 			}
4473 		}
4474 	}
4475 	return (rval);
4476 }
4477 
4478 
4479 /*
4480  *    Function: sd_blank_cmp
4481  *
4482  * Description: If the id string starts and ends with a space, treat
4483  *		multiple consecutive spaces as equivalent to a single
4484  *		space. For example, this causes a sd_disk_table entry
4485  *		of " NEC CDROM " to match a device's id string of
4486  *		"NEC       CDROM".
4487  *
4488  *		Note: The success exit condition for this routine is if
4489  *		the pointer to the table entry is '\0' and the cnt of
4490  *		the inquiry length is zero. This will happen if the inquiry
4491  *		string returned by the device is padded with spaces to be
4492  *		exactly 24 bytes in length (8 byte vid + 16 byte pid). The
4493  *		SCSI spec states that the inquiry string is to be padded with
4494  *		spaces.
4495  *
4496  *   Arguments: un - driver soft state (unit) structure
4497  *		id - table or config file vid/pid
4498  *		idlen  - length of the vid/pid (bytes)
4499  *
4500  * Return Code: SD_SUCCESS - Indicates a match with the inquiry vid/pid
4501  *		SD_FAILURE - Indicates no match with the inquiry vid/pid
4502  */
4503 
4504 static int
4505 sd_blank_cmp(struct sd_lun *un, char *id, int idlen)
4506 {
4507 	char		*p1;
4508 	char		*p2;
4509 	int		cnt;
4510 	cnt = sizeof (SD_INQUIRY(un)->inq_vid) +
4511 	    sizeof (SD_INQUIRY(un)->inq_pid);
4512 
4513 	ASSERT(un != NULL);
4514 	p2 = un->un_sd->sd_inq->inq_vid;
4515 	ASSERT(id != NULL);
4516 	p1 = id;
4517 
4518 	if ((id[0] == ' ') && (id[idlen - 1] == ' ')) {
4519 		/*
4520 		 * Note: string p1 is terminated by a NUL but string p2
4521 		 * isn't.  The end of p2 is determined by cnt.
4522 		 */
4523 		for (;;) {
4524 			/* skip over any extra blanks in both strings */
4525 			while ((*p1 != '\0') && (*p1 == ' ')) {
4526 				p1++;
4527 			}
4528 			while ((cnt != 0) && (*p2 == ' ')) {
4529 				p2++;
4530 				cnt--;
4531 			}
4532 
4533 			/* compare the two strings */
4534 			if ((cnt == 0) ||
4535 			    (SD_TOUPPER(*p1) != SD_TOUPPER(*p2))) {
4536 				break;
4537 			}
4538 			while ((cnt > 0) &&
4539 			    (SD_TOUPPER(*p1) == SD_TOUPPER(*p2))) {
4540 				p1++;
4541 				p2++;
4542 				cnt--;
4543 			}
4544 		}
4545 	}
4546 
4547 	/* return SD_SUCCESS if both strings match */
4548 	return (((*p1 == '\0') && (cnt == 0)) ? SD_SUCCESS : SD_FAILURE);
4549 }
4550 
4551 
4552 /*
4553  *    Function: sd_chk_vers1_data
4554  *
4555  * Description: Verify the version 1 device properties provided by the
4556  *		user via the configuration file
4557  *
4558  *   Arguments: un	     - driver soft state (unit) structure
4559  *		flags	     - integer mask indicating properties to be set
4560  *		prop_list    - integer list of property values
4561  *		list_len     - number of the elements
4562  *
4563  * Return Code: SD_SUCCESS - Indicates the user provided data is valid
4564  *		SD_FAILURE - Indicates the user provided data is invalid
4565  */
4566 
4567 static int
4568 sd_chk_vers1_data(struct sd_lun *un, int flags, int *prop_list,
4569     int list_len, char *dataname_ptr)
4570 {
4571 	int i;
4572 	int mask = 1;
4573 	int index = 0;
4574 
4575 	ASSERT(un != NULL);
4576 
4577 	/* Check for a NULL property name and list */
4578 	if (dataname_ptr == NULL) {
4579 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4580 		    "sd_chk_vers1_data: NULL data property name.");
4581 		return (SD_FAILURE);
4582 	}
4583 	if (prop_list == NULL) {
4584 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4585 		    "sd_chk_vers1_data: %s NULL data property list.",
4586 		    dataname_ptr);
4587 		return (SD_FAILURE);
4588 	}
4589 
4590 	/* Display a warning if undefined bits are set in the flags */
4591 	if (flags & ~SD_CONF_BIT_MASK) {
4592 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4593 		    "sd_chk_vers1_data: invalid bits 0x%x in data list %s. "
4594 		    "Properties not set.",
4595 		    (flags & ~SD_CONF_BIT_MASK), dataname_ptr);
4596 		return (SD_FAILURE);
4597 	}
4598 
4599 	/*
4600 	 * Verify the length of the list by identifying the highest bit set
4601 	 * in the flags and validating that the property list has a length
4602 	 * up to the index of this bit.
4603 	 */
4604 	for (i = 0; i < SD_CONF_MAX_ITEMS; i++) {
4605 		if (flags & mask) {
4606 			index++;
4607 		}
4608 		mask = 1 << i;
4609 	}
4610 	if (list_len < (index + 2)) {
4611 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4612 		    "sd_chk_vers1_data: "
4613 		    "Data property list %s size is incorrect. "
4614 		    "Properties not set.", dataname_ptr);
4615 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT, "Size expected: "
4616 		    "version + 1 flagword + %d properties", SD_CONF_MAX_ITEMS);
4617 		return (SD_FAILURE);
4618 	}
4619 	return (SD_SUCCESS);
4620 }
4621 
4622 
4623 /*
4624  *    Function: sd_set_vers1_properties
4625  *
4626  * Description: Set version 1 device properties based on a property list
4627  *		retrieved from the driver configuration file or static
4628  *		configuration table. Version 1 properties have the format:
4629  *
4630  *	<data-property-name>:=<version>,<flags>,<prop0>,<prop1>,.....<propN>
4631  *
4632  *		where the prop0 value will be used to set prop0 if bit0
4633  *		is set in the flags
4634  *
4635  *   Arguments: un	     - driver soft state (unit) structure
4636  *		flags	     - integer mask indicating properties to be set
4637  *		prop_list    - integer list of property values
4638  */
4639 
4640 static void
4641 sd_set_vers1_properties(struct sd_lun *un, int flags, sd_tunables *prop_list)
4642 {
4643 	ASSERT(un != NULL);
4644 
4645 	/*
4646 	 * Set the flag to indicate cache is to be disabled. An attempt
4647 	 * to disable the cache via sd_cache_control() will be made
4648 	 * later during attach once the basic initialization is complete.
4649 	 */
4650 	if (flags & SD_CONF_BSET_NOCACHE) {
4651 		un->un_f_opt_disable_cache = TRUE;
4652 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4653 		    "sd_set_vers1_properties: caching disabled flag set\n");
4654 	}
4655 
4656 	/* CD-specific configuration parameters */
4657 	if (flags & SD_CONF_BSET_PLAYMSF_BCD) {
4658 		un->un_f_cfg_playmsf_bcd = TRUE;
4659 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4660 		    "sd_set_vers1_properties: playmsf_bcd set\n");
4661 	}
4662 	if (flags & SD_CONF_BSET_READSUB_BCD) {
4663 		un->un_f_cfg_readsub_bcd = TRUE;
4664 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4665 		    "sd_set_vers1_properties: readsub_bcd set\n");
4666 	}
4667 	if (flags & SD_CONF_BSET_READ_TOC_TRK_BCD) {
4668 		un->un_f_cfg_read_toc_trk_bcd = TRUE;
4669 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4670 		    "sd_set_vers1_properties: read_toc_trk_bcd set\n");
4671 	}
4672 	if (flags & SD_CONF_BSET_READ_TOC_ADDR_BCD) {
4673 		un->un_f_cfg_read_toc_addr_bcd = TRUE;
4674 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4675 		    "sd_set_vers1_properties: read_toc_addr_bcd set\n");
4676 	}
4677 	if (flags & SD_CONF_BSET_NO_READ_HEADER) {
4678 		un->un_f_cfg_no_read_header = TRUE;
4679 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4680 		    "sd_set_vers1_properties: no_read_header set\n");
4681 	}
4682 	if (flags & SD_CONF_BSET_READ_CD_XD4) {
4683 		un->un_f_cfg_read_cd_xd4 = TRUE;
4684 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4685 		    "sd_set_vers1_properties: read_cd_xd4 set\n");
4686 	}
4687 
4688 	/* Support for devices which do not have valid/unique serial numbers */
4689 	if (flags & SD_CONF_BSET_FAB_DEVID) {
4690 		un->un_f_opt_fab_devid = TRUE;
4691 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4692 		    "sd_set_vers1_properties: fab_devid bit set\n");
4693 	}
4694 
4695 	/* Support for user throttle configuration */
4696 	if (flags & SD_CONF_BSET_THROTTLE) {
4697 		ASSERT(prop_list != NULL);
4698 		un->un_saved_throttle = un->un_throttle =
4699 		    prop_list->sdt_throttle;
4700 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4701 		    "sd_set_vers1_properties: throttle set to %d\n",
4702 		    prop_list->sdt_throttle);
4703 	}
4704 
4705 	/* Set the per disk retry count according to the conf file or table. */
4706 	if (flags & SD_CONF_BSET_NRR_COUNT) {
4707 		ASSERT(prop_list != NULL);
4708 		if (prop_list->sdt_not_rdy_retries) {
4709 			un->un_notready_retry_count =
4710 			    prop_list->sdt_not_rdy_retries;
4711 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4712 			    "sd_set_vers1_properties: not ready retry count"
4713 			    " set to %d\n", un->un_notready_retry_count);
4714 		}
4715 	}
4716 
4717 	/* The controller type is reported for generic disk driver ioctls */
4718 	if (flags & SD_CONF_BSET_CTYPE) {
4719 		ASSERT(prop_list != NULL);
4720 		switch (prop_list->sdt_ctype) {
4721 		case CTYPE_CDROM:
4722 			un->un_ctype = prop_list->sdt_ctype;
4723 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4724 			    "sd_set_vers1_properties: ctype set to "
4725 			    "CTYPE_CDROM\n");
4726 			break;
4727 		case CTYPE_CCS:
4728 			un->un_ctype = prop_list->sdt_ctype;
4729 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4730 			    "sd_set_vers1_properties: ctype set to "
4731 			    "CTYPE_CCS\n");
4732 			break;
4733 		case CTYPE_ROD:		/* RW optical */
4734 			un->un_ctype = prop_list->sdt_ctype;
4735 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
4736 			    "sd_set_vers1_properties: ctype set to "
4737 			    "CTYPE_ROD\n");
4738 			break;
4739 		default:
4740 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
4741 			    "sd_set_vers1_properties: Could not set "
4742 			    "invalid ctype value (%d)",
4743 			    prop_list->sdt_ctype);
4744 		}
4745 	}
4746 
4747 	/* Purple failover timeout */
4748 	if (flags & SD_CONF_BSET_BSY_RETRY_COUNT) {
4749 		ASSERT(prop_list != NULL);
4750 		un->un_busy_retry_count =
4751 		    prop_list->sdt_busy_retries;
4752 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4753 		    "sd_set_vers1_properties: "
4754 		    "busy retry count set to %d\n",
4755 		    un->un_busy_retry_count);
4756 	}
4757 
4758 	/* Purple reset retry count */
4759 	if (flags & SD_CONF_BSET_RST_RETRIES) {
4760 		ASSERT(prop_list != NULL);
4761 		un->un_reset_retry_count =
4762 		    prop_list->sdt_reset_retries;
4763 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4764 		    "sd_set_vers1_properties: "
4765 		    "reset retry count set to %d\n",
4766 		    un->un_reset_retry_count);
4767 	}
4768 
4769 	/* Purple reservation release timeout */
4770 	if (flags & SD_CONF_BSET_RSV_REL_TIME) {
4771 		ASSERT(prop_list != NULL);
4772 		un->un_reserve_release_time =
4773 		    prop_list->sdt_reserv_rel_time;
4774 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4775 		    "sd_set_vers1_properties: "
4776 		    "reservation release timeout set to %d\n",
4777 		    un->un_reserve_release_time);
4778 	}
4779 
4780 	/*
4781 	 * Driver flag telling the driver to verify that no commands are pending
4782 	 * for a device before issuing a Test Unit Ready. This is a workaround
4783 	 * for a firmware bug in some Seagate eliteI drives.
4784 	 */
4785 	if (flags & SD_CONF_BSET_TUR_CHECK) {
4786 		un->un_f_cfg_tur_check = TRUE;
4787 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4788 		    "sd_set_vers1_properties: tur queue check set\n");
4789 	}
4790 
4791 	if (flags & SD_CONF_BSET_MIN_THROTTLE) {
4792 		un->un_min_throttle = prop_list->sdt_min_throttle;
4793 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4794 		    "sd_set_vers1_properties: min throttle set to %d\n",
4795 		    un->un_min_throttle);
4796 	}
4797 
4798 	if (flags & SD_CONF_BSET_DISKSORT_DISABLED) {
4799 		un->un_f_disksort_disabled =
4800 		    (prop_list->sdt_disk_sort_dis != 0) ?
4801 		    TRUE : FALSE;
4802 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4803 		    "sd_set_vers1_properties: disksort disabled "
4804 		    "flag set to %d\n",
4805 		    prop_list->sdt_disk_sort_dis);
4806 	}
4807 
4808 	if (flags & SD_CONF_BSET_LUN_RESET_ENABLED) {
4809 		un->un_f_lun_reset_enabled =
4810 		    (prop_list->sdt_lun_reset_enable != 0) ?
4811 		    TRUE : FALSE;
4812 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4813 		    "sd_set_vers1_properties: lun reset enabled "
4814 		    "flag set to %d\n",
4815 		    prop_list->sdt_lun_reset_enable);
4816 	}
4817 
4818 	if (flags & SD_CONF_BSET_CACHE_IS_NV) {
4819 		un->un_f_suppress_cache_flush =
4820 		    (prop_list->sdt_suppress_cache_flush != 0) ?
4821 		    TRUE : FALSE;
4822 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4823 		    "sd_set_vers1_properties: suppress_cache_flush "
4824 		    "flag set to %d\n",
4825 		    prop_list->sdt_suppress_cache_flush);
4826 	}
4827 
4828 	if (flags & SD_CONF_BSET_PC_DISABLED) {
4829 		un->un_f_power_condition_disabled =
4830 		    (prop_list->sdt_power_condition_dis != 0) ?
4831 		    TRUE : FALSE;
4832 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
4833 		    "sd_set_vers1_properties: power_condition_disabled "
4834 		    "flag set to %d\n",
4835 		    prop_list->sdt_power_condition_dis);
4836 	}
4837 
4838 	/*
4839 	 * Validate the throttle values.
4840 	 * If any of the numbers are invalid, set everything to defaults.
4841 	 */
4842 	if ((un->un_throttle < SD_LOWEST_VALID_THROTTLE) ||
4843 	    (un->un_min_throttle < SD_LOWEST_VALID_THROTTLE) ||
4844 	    (un->un_min_throttle > un->un_throttle)) {
4845 		un->un_saved_throttle = un->un_throttle = sd_max_throttle;
4846 		un->un_min_throttle = sd_min_throttle;
4847 	}
4848 }
4849 
4850 /*
4851  *   Function: sd_is_lsi()
4852  *
4853  *   Description: Check for lsi devices, step through the static device
4854  *	table to match vid/pid.
4855  *
4856  *   Args: un - ptr to sd_lun
4857  *
4858  *   Notes:  When creating new LSI property, need to add the new LSI property
4859  *		to this function.
4860  */
4861 static void
4862 sd_is_lsi(struct sd_lun *un)
4863 {
4864 	char	*id = NULL;
4865 	int	table_index;
4866 	int	idlen;
4867 	void	*prop;
4868 
4869 	ASSERT(un != NULL);
4870 	for (table_index = 0; table_index < sd_disk_table_size;
4871 	    table_index++) {
4872 		id = sd_disk_table[table_index].device_id;
4873 		idlen = strlen(id);
4874 		if (idlen == 0) {
4875 			continue;
4876 		}
4877 
4878 		if (sd_sdconf_id_match(un, id, idlen) == SD_SUCCESS) {
4879 			prop = sd_disk_table[table_index].properties;
4880 			if (prop == &lsi_properties ||
4881 			    prop == &lsi_oem_properties ||
4882 			    prop == &lsi_properties_scsi ||
4883 			    prop == &symbios_properties) {
4884 				un->un_f_cfg_is_lsi = TRUE;
4885 			}
4886 			break;
4887 		}
4888 	}
4889 }
4890 
4891 /*
4892  *    Function: sd_get_physical_geometry
4893  *
4894  * Description: Retrieve the MODE SENSE page 3 (Format Device Page) and
4895  *		MODE SENSE page 4 (Rigid Disk Drive Geometry Page) from the
4896  *		target, and use this information to initialize the physical
4897  *		geometry cache specified by pgeom_p.
4898  *
4899  *		MODE SENSE is an optional command, so failure in this case
4900  *		does not necessarily denote an error. We want to use the
4901  *		MODE SENSE commands to derive the physical geometry of the
4902  *		device, but if either command fails, the logical geometry is
4903  *		used as the fallback for disk label geometry in cmlb.
4904  *
4905  *		This requires that un->un_blockcount and un->un_tgt_blocksize
4906  *		have already been initialized for the current target and
4907  *		that the current values be passed as args so that we don't
4908  *		end up ever trying to use -1 as a valid value. This could
4909  *		happen if either value is reset while we're not holding
4910  *		the mutex.
4911  *
4912  *   Arguments: un - driver soft state (unit) structure
4913  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
4914  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
4915  *			to use the USCSI "direct" chain and bypass the normal
4916  *			command waitq.
4917  *
4918  *     Context: Kernel thread only (can sleep).
4919  */
4920 
4921 static int
4922 sd_get_physical_geometry(struct sd_lun *un, cmlb_geom_t *pgeom_p,
4923     diskaddr_t capacity, int lbasize, int path_flag)
4924 {
4925 	struct	mode_format	*page3p;
4926 	struct	mode_geometry	*page4p;
4927 	struct	mode_header	*headerp;
4928 	int	sector_size;
4929 	int	nsect;
4930 	int	nhead;
4931 	int	ncyl;
4932 	int	intrlv;
4933 	int	spc;
4934 	diskaddr_t	modesense_capacity;
4935 	int	rpm;
4936 	int	bd_len;
4937 	int	mode_header_length;
4938 	uchar_t	*p3bufp;
4939 	uchar_t	*p4bufp;
4940 	int	cdbsize;
4941 	int	ret = EIO;
4942 	sd_ssc_t *ssc;
4943 	int	status;
4944 
4945 	ASSERT(un != NULL);
4946 
4947 	if (lbasize == 0) {
4948 		if (ISCD(un)) {
4949 			lbasize = 2048;
4950 		} else {
4951 			lbasize = un->un_sys_blocksize;
4952 		}
4953 	}
4954 	pgeom_p->g_secsize = (unsigned short)lbasize;
4955 
4956 	/*
4957 	 * If the unit is a cd/dvd drive MODE SENSE page three
4958 	 * and MODE SENSE page four are reserved (see SBC spec
4959 	 * and MMC spec). To prevent soft errors just return
4960 	 * using the default LBA size.
4961 	 *
4962 	 * Since SATA MODE SENSE function (sata_txlt_mode_sense()) does not
4963 	 * implement support for mode pages 3 and 4 return here to prevent
4964 	 * illegal requests on SATA drives.
4965 	 *
4966 	 * These pages are also reserved in SBC-2 and later.  We assume SBC-2
4967 	 * or later for a direct-attached block device if the SCSI version is
4968 	 * at least SPC-3.
4969 	 */
4970 
4971 	if (ISCD(un) ||
4972 	    un->un_interconnect_type == SD_INTERCONNECT_SATA ||
4973 	    (un->un_ctype == CTYPE_CCS && SD_INQUIRY(un)->inq_ansi >= 5))
4974 		return (ret);
4975 
4976 	cdbsize = (un->un_f_cfg_is_atapi == TRUE) ? CDB_GROUP2 : CDB_GROUP0;
4977 
4978 	/*
4979 	 * Retrieve MODE SENSE page 3 - Format Device Page
4980 	 */
4981 	p3bufp = kmem_zalloc(SD_MODE_SENSE_PAGE3_LENGTH, KM_SLEEP);
4982 	ssc = sd_ssc_init(un);
4983 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p3bufp,
4984 	    SD_MODE_SENSE_PAGE3_LENGTH, SD_MODE_SENSE_PAGE3_CODE, path_flag);
4985 	if (status != 0) {
4986 		SD_ERROR(SD_LOG_COMMON, un,
4987 		    "sd_get_physical_geometry: mode sense page 3 failed\n");
4988 		goto page3_exit;
4989 	}
4990 
4991 	/*
4992 	 * Determine size of Block Descriptors in order to locate the mode
4993 	 * page data.  ATAPI devices return 0, SCSI devices should return
4994 	 * MODE_BLK_DESC_LENGTH.
4995 	 */
4996 	headerp = (struct mode_header *)p3bufp;
4997 	if (un->un_f_cfg_is_atapi == TRUE) {
4998 		struct mode_header_grp2 *mhp =
4999 		    (struct mode_header_grp2 *)headerp;
5000 		mode_header_length = MODE_HEADER_LENGTH_GRP2;
5001 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5002 	} else {
5003 		mode_header_length = MODE_HEADER_LENGTH;
5004 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5005 	}
5006 
5007 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5008 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5009 		    "sd_get_physical_geometry: received unexpected bd_len "
5010 		    "of %d, page3\n", bd_len);
5011 		status = EIO;
5012 		goto page3_exit;
5013 	}
5014 
5015 	page3p = (struct mode_format *)
5016 	    ((caddr_t)headerp + mode_header_length + bd_len);
5017 
5018 	if (page3p->mode_page.code != SD_MODE_SENSE_PAGE3_CODE) {
5019 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5020 		    "sd_get_physical_geometry: mode sense pg3 code mismatch "
5021 		    "%d\n", page3p->mode_page.code);
5022 		status = EIO;
5023 		goto page3_exit;
5024 	}
5025 
5026 	/*
5027 	 * Use this physical geometry data only if BOTH MODE SENSE commands
5028 	 * complete successfully; otherwise, revert to the logical geometry.
5029 	 * So, we need to save everything in temporary variables.
5030 	 */
5031 	sector_size = BE_16(page3p->data_bytes_sect);
5032 
5033 	/*
5034 	 * 1243403: The NEC D38x7 drives do not support MODE SENSE sector size
5035 	 */
5036 	if (sector_size == 0) {
5037 		sector_size = un->un_sys_blocksize;
5038 	} else {
5039 		sector_size &= ~(un->un_sys_blocksize - 1);
5040 	}
5041 
5042 	nsect  = BE_16(page3p->sect_track);
5043 	intrlv = BE_16(page3p->interleave);
5044 
5045 	SD_INFO(SD_LOG_COMMON, un,
5046 	    "sd_get_physical_geometry: Format Parameters (page 3)\n");
5047 	SD_INFO(SD_LOG_COMMON, un,
5048 	    "   mode page: %d; nsect: %d; sector size: %d;\n",
5049 	    page3p->mode_page.code, nsect, sector_size);
5050 	SD_INFO(SD_LOG_COMMON, un,
5051 	    "   interleave: %d; track skew: %d; cylinder skew: %d;\n", intrlv,
5052 	    BE_16(page3p->track_skew),
5053 	    BE_16(page3p->cylinder_skew));
5054 
5055 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
5056 
5057 	/*
5058 	 * Retrieve MODE SENSE page 4 - Rigid Disk Drive Geometry Page
5059 	 */
5060 	p4bufp = kmem_zalloc(SD_MODE_SENSE_PAGE4_LENGTH, KM_SLEEP);
5061 	status = sd_send_scsi_MODE_SENSE(ssc, cdbsize, p4bufp,
5062 	    SD_MODE_SENSE_PAGE4_LENGTH, SD_MODE_SENSE_PAGE4_CODE, path_flag);
5063 	if (status != 0) {
5064 		SD_ERROR(SD_LOG_COMMON, un,
5065 		    "sd_get_physical_geometry: mode sense page 4 failed\n");
5066 		goto page4_exit;
5067 	}
5068 
5069 	/*
5070 	 * Determine size of Block Descriptors in order to locate the mode
5071 	 * page data.  ATAPI devices return 0, SCSI devices should return
5072 	 * MODE_BLK_DESC_LENGTH.
5073 	 */
5074 	headerp = (struct mode_header *)p4bufp;
5075 	if (un->un_f_cfg_is_atapi == TRUE) {
5076 		struct mode_header_grp2 *mhp =
5077 		    (struct mode_header_grp2 *)headerp;
5078 		bd_len = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
5079 	} else {
5080 		bd_len = ((struct mode_header *)headerp)->bdesc_length;
5081 	}
5082 
5083 	if (bd_len > MODE_BLK_DESC_LENGTH) {
5084 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5085 		    "sd_get_physical_geometry: received unexpected bd_len of "
5086 		    "%d, page4\n", bd_len);
5087 		status = EIO;
5088 		goto page4_exit;
5089 	}
5090 
5091 	page4p = (struct mode_geometry *)
5092 	    ((caddr_t)headerp + mode_header_length + bd_len);
5093 
5094 	if (page4p->mode_page.code != SD_MODE_SENSE_PAGE4_CODE) {
5095 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
5096 		    "sd_get_physical_geometry: mode sense pg4 code mismatch "
5097 		    "%d\n", page4p->mode_page.code);
5098 		status = EIO;
5099 		goto page4_exit;
5100 	}
5101 
5102 	/*
5103 	 * Stash the data now, after we know that both commands completed.
5104 	 */
5105 
5106 
5107 	nhead = (int)page4p->heads;	/* uchar, so no conversion needed */
5108 	spc   = nhead * nsect;
5109 	ncyl  = (page4p->cyl_ub << 16) + (page4p->cyl_mb << 8) + page4p->cyl_lb;
5110 	rpm   = BE_16(page4p->rpm);
5111 
5112 	modesense_capacity = spc * ncyl;
5113 
5114 	SD_INFO(SD_LOG_COMMON, un,
5115 	    "sd_get_physical_geometry: Geometry Parameters (page 4)\n");
5116 	SD_INFO(SD_LOG_COMMON, un,
5117 	    "   cylinders: %d; heads: %d; rpm: %d;\n", ncyl, nhead, rpm);
5118 	SD_INFO(SD_LOG_COMMON, un,
5119 	    "   computed capacity(h*s*c): %d;\n", modesense_capacity);
5120 	SD_INFO(SD_LOG_COMMON, un, "   pgeom_p: %p; read cap: %d\n",
5121 	    (void *)pgeom_p, capacity);
5122 
5123 	/*
5124 	 * Compensate if the drive's geometry is not rectangular, i.e.,
5125 	 * the product of C * H * S returned by MODE SENSE >= that returned
5126 	 * by read capacity. This is an idiosyncrasy of the original x86
5127 	 * disk subsystem.
5128 	 */
5129 	if (modesense_capacity >= capacity) {
5130 		SD_INFO(SD_LOG_COMMON, un,
5131 		    "sd_get_physical_geometry: adjusting acyl; "
5132 		    "old: %d; new: %d\n", pgeom_p->g_acyl,
5133 		    (modesense_capacity - capacity + spc - 1) / spc);
5134 		if (sector_size != 0) {
5135 			/* 1243403: NEC D38x7 drives don't support sec size */
5136 			pgeom_p->g_secsize = (unsigned short)sector_size;
5137 		}
5138 		pgeom_p->g_nsect    = (unsigned short)nsect;
5139 		pgeom_p->g_nhead    = (unsigned short)nhead;
5140 		pgeom_p->g_capacity = capacity;
5141 		pgeom_p->g_acyl	    =
5142 		    (modesense_capacity - pgeom_p->g_capacity + spc - 1) / spc;
5143 		pgeom_p->g_ncyl	    = ncyl - pgeom_p->g_acyl;
5144 	}
5145 
5146 	pgeom_p->g_rpm    = (unsigned short)rpm;
5147 	pgeom_p->g_intrlv = (unsigned short)intrlv;
5148 	ret = 0;
5149 
5150 	SD_INFO(SD_LOG_COMMON, un,
5151 	    "sd_get_physical_geometry: mode sense geometry:\n");
5152 	SD_INFO(SD_LOG_COMMON, un,
5153 	    "   nsect: %d; sector size: %d; interlv: %d\n",
5154 	    nsect, sector_size, intrlv);
5155 	SD_INFO(SD_LOG_COMMON, un,
5156 	    "   nhead: %d; ncyl: %d; rpm: %d; capacity(ms): %d\n",
5157 	    nhead, ncyl, rpm, modesense_capacity);
5158 	SD_INFO(SD_LOG_COMMON, un,
5159 	    "sd_get_physical_geometry: (cached)\n");
5160 	SD_INFO(SD_LOG_COMMON, un,
5161 	    "   ncyl: %ld; acyl: %d; nhead: %d; nsect: %d\n",
5162 	    pgeom_p->g_ncyl,  pgeom_p->g_acyl,
5163 	    pgeom_p->g_nhead, pgeom_p->g_nsect);
5164 	SD_INFO(SD_LOG_COMMON, un,
5165 	    "   lbasize: %d; capacity: %ld; intrlv: %d; rpm: %d\n",
5166 	    pgeom_p->g_secsize, pgeom_p->g_capacity,
5167 	    pgeom_p->g_intrlv, pgeom_p->g_rpm);
5168 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
5169 
5170 page4_exit:
5171 	kmem_free(p4bufp, SD_MODE_SENSE_PAGE4_LENGTH);
5172 
5173 page3_exit:
5174 	kmem_free(p3bufp, SD_MODE_SENSE_PAGE3_LENGTH);
5175 
5176 	if (status != 0) {
5177 		if (status == EIO) {
5178 			/*
5179 			 * Some disks do not support mode sense(6), we
5180 			 * should ignore this kind of error(sense key is
5181 			 * 0x5 - illegal request).
5182 			 */
5183 			uint8_t *sensep;
5184 			int senlen;
5185 
5186 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
5187 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
5188 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
5189 
5190 			if (senlen > 0 &&
5191 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
5192 				sd_ssc_assessment(ssc,
5193 				    SD_FMT_IGNORE_COMPROMISE);
5194 			} else {
5195 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
5196 			}
5197 		} else {
5198 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5199 		}
5200 	}
5201 	sd_ssc_fini(ssc);
5202 	return (ret);
5203 }
5204 
5205 /*
5206  *    Function: sd_get_virtual_geometry
5207  *
5208  * Description: Ask the controller to tell us about the target device.
5209  *
5210  *   Arguments: un - pointer to softstate
5211  *		capacity - disk capacity in #blocks
5212  *		lbasize - disk block size in bytes
5213  *
5214  *     Context: Kernel thread only
5215  */
5216 
5217 static int
5218 sd_get_virtual_geometry(struct sd_lun *un, cmlb_geom_t *lgeom_p,
5219     diskaddr_t capacity, int lbasize)
5220 {
5221 	uint_t	geombuf;
5222 	int	spc;
5223 
5224 	ASSERT(un != NULL);
5225 
5226 	/* Set sector size, and total number of sectors */
5227 	(void) scsi_ifsetcap(SD_ADDRESS(un), "sector-size",   lbasize,  1);
5228 	(void) scsi_ifsetcap(SD_ADDRESS(un), "total-sectors", capacity, 1);
5229 
5230 	/* Let the HBA tell us its geometry */
5231 	geombuf = (uint_t)scsi_ifgetcap(SD_ADDRESS(un), "geometry", 1);
5232 
5233 	/* A value of -1 indicates an undefined "geometry" property */
5234 	if (geombuf == (-1)) {
5235 		return (EINVAL);
5236 	}
5237 
5238 	/* Initialize the logical geometry cache. */
5239 	lgeom_p->g_nhead   = (geombuf >> 16) & 0xffff;
5240 	lgeom_p->g_nsect   = geombuf & 0xffff;
5241 	lgeom_p->g_secsize = un->un_sys_blocksize;
5242 
5243 	spc = lgeom_p->g_nhead * lgeom_p->g_nsect;
5244 
5245 	/*
5246 	 * Note: The driver originally converted the capacity value from
5247 	 * target blocks to system blocks. However, the capacity value passed
5248 	 * to this routine is already in terms of system blocks (this scaling
5249 	 * is done when the READ CAPACITY command is issued and processed).
5250 	 * This 'error' may have gone undetected because the usage of g_ncyl
5251 	 * (which is based upon g_capacity) is very limited within the driver
5252 	 */
5253 	lgeom_p->g_capacity = capacity;
5254 
5255 	/*
5256 	 * Set ncyl to zero if the hba returned a zero nhead or nsect value. The
5257 	 * hba may return zero values if the device has been removed.
5258 	 */
5259 	if (spc == 0) {
5260 		lgeom_p->g_ncyl = 0;
5261 	} else {
5262 		lgeom_p->g_ncyl = lgeom_p->g_capacity / spc;
5263 	}
5264 	lgeom_p->g_acyl = 0;
5265 
5266 	SD_INFO(SD_LOG_COMMON, un, "sd_get_virtual_geometry: (cached)\n");
5267 	return (0);
5268 
5269 }
5270 /*
5271  *    Function: sd_update_block_info
5272  *
5273  * Description: Calculate a byte count to sector count bitshift value
5274  *		from sector size.
5275  *
5276  *   Arguments: un: unit struct.
5277  *		lbasize: new target sector size
5278  *		capacity: new target capacity, ie. block count
5279  *
5280  *     Context: Kernel thread context
5281  */
5282 
5283 static void
5284 sd_update_block_info(struct sd_lun *un, uint32_t lbasize, uint64_t capacity)
5285 {
5286 	if (lbasize != 0) {
5287 		un->un_tgt_blocksize = lbasize;
5288 		un->un_f_tgt_blocksize_is_valid = TRUE;
5289 		if (!un->un_f_has_removable_media) {
5290 			un->un_sys_blocksize = lbasize;
5291 		}
5292 	}
5293 
5294 	if (capacity != 0) {
5295 		un->un_blockcount		= capacity;
5296 		un->un_f_blockcount_is_valid	= TRUE;
5297 
5298 		/*
5299 		 * The capacity has changed so update the errstats.
5300 		 */
5301 		if (un->un_errstats != NULL) {
5302 			struct sd_errstats *stp;
5303 
5304 			capacity *= un->un_sys_blocksize;
5305 			stp = (struct sd_errstats *)un->un_errstats->ks_data;
5306 			if (stp->sd_capacity.value.ui64 < capacity)
5307 				stp->sd_capacity.value.ui64 = capacity;
5308 		}
5309 	}
5310 }
5311 
5312 /*
5313  * Parses the SCSI Block Limits VPD page (0xB0). It's legal to pass NULL for
5314  * vpd_pg, in which case all the block limits will be reset to the defaults.
5315  */
5316 static void
5317 sd_parse_blk_limits_vpd(struct sd_lun *un, uchar_t *vpd_pg)
5318 {
5319 	sd_blk_limits_t *lim = &un->un_blk_lim;
5320 	unsigned pg_len;
5321 
5322 	if (vpd_pg != NULL)
5323 		pg_len = BE_IN16(&vpd_pg[2]);
5324 	else
5325 		pg_len = 0;
5326 
5327 	/* Block Limits VPD can be 16 bytes or 64 bytes long - support both */
5328 	if (pg_len >= 0x10) {
5329 		lim->lim_opt_xfer_len_gran = BE_IN16(&vpd_pg[6]);
5330 		lim->lim_max_xfer_len = BE_IN32(&vpd_pg[8]);
5331 		lim->lim_opt_xfer_len = BE_IN32(&vpd_pg[12]);
5332 
5333 		/* Zero means not reported, so use "unlimited" */
5334 		if (lim->lim_max_xfer_len == 0)
5335 			lim->lim_max_xfer_len = UINT32_MAX;
5336 		if (lim->lim_opt_xfer_len == 0)
5337 			lim->lim_opt_xfer_len = UINT32_MAX;
5338 	} else {
5339 		lim->lim_opt_xfer_len_gran = 0;
5340 		lim->lim_max_xfer_len = UINT32_MAX;
5341 		lim->lim_opt_xfer_len = UINT32_MAX;
5342 	}
5343 	if (pg_len >= 0x3c) {
5344 		lim->lim_max_pfetch_len = BE_IN32(&vpd_pg[16]);
5345 		/*
5346 		 * A zero in either of the following two fields indicates lack
5347 		 * of UNMAP support.
5348 		 */
5349 		lim->lim_max_unmap_lba_cnt = BE_IN32(&vpd_pg[20]);
5350 		lim->lim_max_unmap_descr_cnt = BE_IN32(&vpd_pg[24]);
5351 		lim->lim_opt_unmap_gran = BE_IN32(&vpd_pg[28]);
5352 		if ((vpd_pg[32] >> 7) == 1) {
5353 			lim->lim_unmap_gran_align =
5354 			    ((vpd_pg[32] & 0x7f) << 24) | (vpd_pg[33] << 16) |
5355 			    (vpd_pg[34] << 8) | vpd_pg[35];
5356 		} else {
5357 			lim->lim_unmap_gran_align = 0;
5358 		}
5359 		lim->lim_max_write_same_len = BE_IN64(&vpd_pg[36]);
5360 	} else {
5361 		lim->lim_max_pfetch_len = UINT32_MAX;
5362 		lim->lim_max_unmap_lba_cnt = UINT32_MAX;
5363 		lim->lim_max_unmap_descr_cnt = SD_UNMAP_MAX_DESCR;
5364 		lim->lim_opt_unmap_gran = 0;
5365 		lim->lim_unmap_gran_align = 0;
5366 		lim->lim_max_write_same_len = UINT64_MAX;
5367 	}
5368 }
5369 
5370 /*
5371  * Collects VPD page B0 data if available (block limits). If the data is
5372  * not available or querying the device failed, we revert to the defaults.
5373  */
5374 static void
5375 sd_setup_blk_limits(sd_ssc_t *ssc)
5376 {
5377 	struct sd_lun	*un		= ssc->ssc_un;
5378 	uchar_t		*inqB0		= NULL;
5379 	size_t		inqB0_resid	= 0;
5380 	int		rval;
5381 
5382 	if (un->un_vpd_page_mask & SD_VPD_BLK_LIMITS_PG) {
5383 		inqB0 = kmem_zalloc(MAX_INQUIRY_SIZE, KM_SLEEP);
5384 		rval = sd_send_scsi_INQUIRY(ssc, inqB0, MAX_INQUIRY_SIZE, 0x01,
5385 		    0xB0, &inqB0_resid);
5386 		if (rval != 0) {
5387 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5388 			kmem_free(inqB0, MAX_INQUIRY_SIZE);
5389 			inqB0 = NULL;
5390 		}
5391 	}
5392 	/* passing NULL inqB0 will reset to defaults */
5393 	sd_parse_blk_limits_vpd(ssc->ssc_un, inqB0);
5394 	if (inqB0)
5395 		kmem_free(inqB0, MAX_INQUIRY_SIZE);
5396 }
5397 
5398 /*
5399  *    Function: sd_register_devid
5400  *
5401  * Description: This routine will obtain the device id information from the
5402  *		target, obtain the serial number, and register the device
5403  *		id with the ddi framework.
5404  *
5405  *   Arguments: devi - the system's dev_info_t for the device.
5406  *		un - driver soft state (unit) structure
5407  *		reservation_flag - indicates if a reservation conflict
5408  *		occurred during attach
5409  *
5410  *     Context: Kernel Thread
5411  */
5412 static void
5413 sd_register_devid(sd_ssc_t *ssc, dev_info_t *devi, int reservation_flag)
5414 {
5415 	int		rval		= 0;
5416 	uchar_t		*inq80		= NULL;
5417 	size_t		inq80_len	= MAX_INQUIRY_SIZE;
5418 	size_t		inq80_resid	= 0;
5419 	uchar_t		*inq83		= NULL;
5420 	size_t		inq83_len	= MAX_INQUIRY_SIZE;
5421 	size_t		inq83_resid	= 0;
5422 	int		dlen, len;
5423 	char		*sn;
5424 	struct sd_lun	*un;
5425 
5426 	ASSERT(ssc != NULL);
5427 	un = ssc->ssc_un;
5428 	ASSERT(un != NULL);
5429 	ASSERT(mutex_owned(SD_MUTEX(un)));
5430 	ASSERT((SD_DEVINFO(un)) == devi);
5431 
5432 
5433 	/*
5434 	 * We check the availability of the World Wide Name (0x83) and Unit
5435 	 * Serial Number (0x80) pages in sd_check_vpd_page_support(), and using
5436 	 * un_vpd_page_mask from them, we decide which way to get the WWN.  If
5437 	 * 0x83 is available, that is the best choice.  Our next choice is
5438 	 * 0x80.  If neither are available, we munge the devid from the device
5439 	 * vid/pid/serial # for Sun qualified disks, or use the ddi framework
5440 	 * to fabricate a devid for non-Sun qualified disks.
5441 	 */
5442 	if (sd_check_vpd_page_support(ssc) == 0) {
5443 		/* collect page 80 data if available */
5444 		if (un->un_vpd_page_mask & SD_VPD_UNIT_SERIAL_PG) {
5445 
5446 			mutex_exit(SD_MUTEX(un));
5447 			inq80 = kmem_zalloc(inq80_len, KM_SLEEP);
5448 
5449 			rval = sd_send_scsi_INQUIRY(ssc, inq80, inq80_len,
5450 			    0x01, 0x80, &inq80_resid);
5451 
5452 			if (rval != 0) {
5453 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5454 				kmem_free(inq80, inq80_len);
5455 				inq80 = NULL;
5456 				inq80_len = 0;
5457 			} else if (ddi_prop_exists(
5458 			    DDI_DEV_T_NONE, SD_DEVINFO(un),
5459 			    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
5460 			    INQUIRY_SERIAL_NO) == 0) {
5461 				/*
5462 				 * If we don't already have a serial number
5463 				 * property, do quick verify of data returned
5464 				 * and define property.
5465 				 */
5466 				dlen = inq80_len - inq80_resid;
5467 				len = (size_t)inq80[3];
5468 				if ((dlen >= 4) && ((len + 4) <= dlen)) {
5469 					/*
5470 					 * Ensure sn termination, skip leading
5471 					 * blanks, and create property
5472 					 * 'inquiry-serial-no'.
5473 					 */
5474 					sn = (char *)&inq80[4];
5475 					sn[len] = 0;
5476 					while (*sn && (*sn == ' '))
5477 						sn++;
5478 					if (*sn) {
5479 						(void) ddi_prop_update_string(
5480 						    DDI_DEV_T_NONE,
5481 						    SD_DEVINFO(un),
5482 						    INQUIRY_SERIAL_NO, sn);
5483 					}
5484 				}
5485 			}
5486 			mutex_enter(SD_MUTEX(un));
5487 		}
5488 
5489 		/* collect page 83 data if available */
5490 		if (un->un_vpd_page_mask & SD_VPD_DEVID_WWN_PG) {
5491 			mutex_exit(SD_MUTEX(un));
5492 			inq83 = kmem_zalloc(inq83_len, KM_SLEEP);
5493 
5494 			rval = sd_send_scsi_INQUIRY(ssc, inq83, inq83_len,
5495 			    0x01, 0x83, &inq83_resid);
5496 
5497 			if (rval != 0) {
5498 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5499 				kmem_free(inq83, inq83_len);
5500 				inq83 = NULL;
5501 				inq83_len = 0;
5502 			}
5503 			mutex_enter(SD_MUTEX(un));
5504 		}
5505 	}
5506 
5507 	/*
5508 	 * If transport has already registered a devid for this target
5509 	 * then that takes precedence over the driver's determination
5510 	 * of the devid.
5511 	 *
5512 	 * NOTE: The reason this check is done here instead of at the beginning
5513 	 * of the function is to allow the code above to create the
5514 	 * 'inquiry-serial-no' property.
5515 	 */
5516 	if (ddi_devid_get(SD_DEVINFO(un), &un->un_devid) == DDI_SUCCESS) {
5517 		ASSERT(un->un_devid);
5518 		un->un_f_devid_transport_defined = TRUE;
5519 		goto cleanup; /* use devid registered by the transport */
5520 	}
5521 
5522 	/*
5523 	 * This is the case of antiquated Sun disk drives that have the
5524 	 * FAB_DEVID property set in the disk_table.  These drives
5525 	 * manage the devid's by storing them in last 2 available sectors
5526 	 * on the drive and have them fabricated by the ddi layer by calling
5527 	 * ddi_devid_init and passing the DEVID_FAB flag.
5528 	 */
5529 	if (un->un_f_opt_fab_devid == TRUE) {
5530 		/*
5531 		 * Depending on EINVAL isn't reliable, since a reserved disk
5532 		 * may result in invalid geometry, so check to make sure a
5533 		 * reservation conflict did not occur during attach.
5534 		 */
5535 		if ((sd_get_devid(ssc) == EINVAL) &&
5536 		    (reservation_flag != SD_TARGET_IS_RESERVED)) {
5537 			/*
5538 			 * The devid is invalid AND there is no reservation
5539 			 * conflict.  Fabricate a new devid.
5540 			 */
5541 			(void) sd_create_devid(ssc);
5542 		}
5543 
5544 		/* Register the devid if it exists */
5545 		if (un->un_devid != NULL) {
5546 			(void) ddi_devid_register(SD_DEVINFO(un),
5547 			    un->un_devid);
5548 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5549 			    "sd_register_devid: Devid Fabricated\n");
5550 		}
5551 		goto cleanup;
5552 	}
5553 
5554 	/* encode best devid possible based on data available */
5555 	if (ddi_devid_scsi_encode(DEVID_SCSI_ENCODE_VERSION_LATEST,
5556 	    (char *)ddi_driver_name(SD_DEVINFO(un)),
5557 	    (uchar_t *)SD_INQUIRY(un), sizeof (*SD_INQUIRY(un)),
5558 	    inq80, inq80_len - inq80_resid, inq83, inq83_len -
5559 	    inq83_resid, &un->un_devid) == DDI_SUCCESS) {
5560 
5561 		/* devid successfully encoded, register devid */
5562 		(void) ddi_devid_register(SD_DEVINFO(un), un->un_devid);
5563 
5564 	} else {
5565 		/*
5566 		 * Unable to encode a devid based on data available.
5567 		 * This is not a Sun qualified disk.  Older Sun disk
5568 		 * drives that have the SD_FAB_DEVID property
5569 		 * set in the disk_table and non Sun qualified
5570 		 * disks are treated in the same manner.  These
5571 		 * drives manage the devid's by storing them in
5572 		 * last 2 available sectors on the drive and
5573 		 * have them fabricated by the ddi layer by
5574 		 * calling ddi_devid_init and passing the
5575 		 * DEVID_FAB flag.
5576 		 * Create a fabricate devid only if there's no
5577 		 * fabricate devid existed.
5578 		 */
5579 		if (sd_get_devid(ssc) == EINVAL) {
5580 			(void) sd_create_devid(ssc);
5581 		}
5582 		un->un_f_opt_fab_devid = TRUE;
5583 
5584 		/* Register the devid if it exists */
5585 		if (un->un_devid != NULL) {
5586 			(void) ddi_devid_register(SD_DEVINFO(un),
5587 			    un->un_devid);
5588 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
5589 			    "sd_register_devid: devid fabricated using "
5590 			    "ddi framework\n");
5591 		}
5592 	}
5593 
5594 cleanup:
5595 	/* clean up resources */
5596 	if (inq80 != NULL) {
5597 		kmem_free(inq80, inq80_len);
5598 	}
5599 	if (inq83 != NULL) {
5600 		kmem_free(inq83, inq83_len);
5601 	}
5602 }
5603 
5604 
5605 
5606 /*
5607  *    Function: sd_get_devid
5608  *
5609  * Description: This routine will return 0 if a valid device id has been
5610  *		obtained from the target and stored in the soft state. If a
5611  *		valid device id has not been previously read and stored, a
5612  *		read attempt will be made.
5613  *
5614  *   Arguments: un - driver soft state (unit) structure
5615  *
5616  * Return Code: 0 if we successfully get the device id
5617  *
5618  *     Context: Kernel Thread
5619  */
5620 
5621 static int
5622 sd_get_devid(sd_ssc_t *ssc)
5623 {
5624 	struct dk_devid		*dkdevid;
5625 	ddi_devid_t		tmpid;
5626 	uint_t			*ip;
5627 	size_t			sz;
5628 	diskaddr_t		blk;
5629 	int			status;
5630 	int			chksum;
5631 	int			i;
5632 	size_t			buffer_size;
5633 	struct sd_lun		*un;
5634 
5635 	ASSERT(ssc != NULL);
5636 	un = ssc->ssc_un;
5637 	ASSERT(un != NULL);
5638 	ASSERT(mutex_owned(SD_MUTEX(un)));
5639 
5640 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: entry: un: 0x%p\n",
5641 	    un);
5642 
5643 	if (un->un_devid != NULL) {
5644 		return (0);
5645 	}
5646 
5647 	mutex_exit(SD_MUTEX(un));
5648 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5649 	    (void *)SD_PATH_DIRECT) != 0) {
5650 		mutex_enter(SD_MUTEX(un));
5651 		return (EINVAL);
5652 	}
5653 
5654 	/*
5655 	 * Read and verify device id, stored in the reserved cylinders at the
5656 	 * end of the disk. Backup label is on the odd sectors of the last
5657 	 * track of the last cylinder. Device id will be on track of the next
5658 	 * to last cylinder.
5659 	 */
5660 	mutex_enter(SD_MUTEX(un));
5661 	buffer_size = SD_REQBYTES2TGTBYTES(un, sizeof (struct dk_devid));
5662 	mutex_exit(SD_MUTEX(un));
5663 	dkdevid = kmem_alloc(buffer_size, KM_SLEEP);
5664 	status = sd_send_scsi_READ(ssc, dkdevid, buffer_size, blk,
5665 	    SD_PATH_DIRECT);
5666 
5667 	if (status != 0) {
5668 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5669 		goto error;
5670 	}
5671 
5672 	/* Validate the revision */
5673 	if ((dkdevid->dkd_rev_hi != DK_DEVID_REV_MSB) ||
5674 	    (dkdevid->dkd_rev_lo != DK_DEVID_REV_LSB)) {
5675 		status = EINVAL;
5676 		goto error;
5677 	}
5678 
5679 	/* Calculate the checksum */
5680 	chksum = 0;
5681 	ip = (uint_t *)dkdevid;
5682 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5683 	    i++) {
5684 		chksum ^= ip[i];
5685 	}
5686 
5687 	/* Compare the checksums */
5688 	if (DKD_GETCHKSUM(dkdevid) != chksum) {
5689 		status = EINVAL;
5690 		goto error;
5691 	}
5692 
5693 	/* Validate the device id */
5694 	if (ddi_devid_valid((ddi_devid_t)&dkdevid->dkd_devid) != DDI_SUCCESS) {
5695 		status = EINVAL;
5696 		goto error;
5697 	}
5698 
5699 	/*
5700 	 * Store the device id in the driver soft state
5701 	 */
5702 	sz = ddi_devid_sizeof((ddi_devid_t)&dkdevid->dkd_devid);
5703 	tmpid = kmem_alloc(sz, KM_SLEEP);
5704 
5705 	mutex_enter(SD_MUTEX(un));
5706 
5707 	un->un_devid = tmpid;
5708 	bcopy(&dkdevid->dkd_devid, un->un_devid, sz);
5709 
5710 	kmem_free(dkdevid, buffer_size);
5711 
5712 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_get_devid: exit: un:0x%p\n", un);
5713 
5714 	return (status);
5715 error:
5716 	mutex_enter(SD_MUTEX(un));
5717 	kmem_free(dkdevid, buffer_size);
5718 	return (status);
5719 }
5720 
5721 
5722 /*
5723  *    Function: sd_create_devid
5724  *
5725  * Description: This routine will fabricate the device id and write it
5726  *		to the disk.
5727  *
5728  *   Arguments: un - driver soft state (unit) structure
5729  *
5730  * Return Code: value of the fabricated device id
5731  *
5732  *     Context: Kernel Thread
5733  */
5734 
5735 static ddi_devid_t
5736 sd_create_devid(sd_ssc_t *ssc)
5737 {
5738 	struct sd_lun	*un;
5739 
5740 	ASSERT(ssc != NULL);
5741 	un = ssc->ssc_un;
5742 	ASSERT(un != NULL);
5743 
5744 	/* Fabricate the devid */
5745 	if (ddi_devid_init(SD_DEVINFO(un), DEVID_FAB, 0, NULL, &un->un_devid)
5746 	    == DDI_FAILURE) {
5747 		return (NULL);
5748 	}
5749 
5750 	/* Write the devid to disk */
5751 	if (sd_write_deviceid(ssc) != 0) {
5752 		ddi_devid_free(un->un_devid);
5753 		un->un_devid = NULL;
5754 	}
5755 
5756 	return (un->un_devid);
5757 }
5758 
5759 
5760 /*
5761  *    Function: sd_write_deviceid
5762  *
5763  * Description: This routine will write the device id to the disk
5764  *		reserved sector.
5765  *
5766  *   Arguments: un - driver soft state (unit) structure
5767  *
5768  * Return Code: EINVAL
5769  *		value returned by sd_send_scsi_cmd
5770  *
5771  *     Context: Kernel Thread
5772  */
5773 
5774 static int
5775 sd_write_deviceid(sd_ssc_t *ssc)
5776 {
5777 	struct dk_devid		*dkdevid;
5778 	uchar_t			*buf;
5779 	diskaddr_t		blk;
5780 	uint_t			*ip, chksum;
5781 	int			status;
5782 	int			i;
5783 	struct sd_lun		*un;
5784 
5785 	ASSERT(ssc != NULL);
5786 	un = ssc->ssc_un;
5787 	ASSERT(un != NULL);
5788 	ASSERT(mutex_owned(SD_MUTEX(un)));
5789 
5790 	mutex_exit(SD_MUTEX(un));
5791 	if (cmlb_get_devid_block(un->un_cmlbhandle, &blk,
5792 	    (void *)SD_PATH_DIRECT) != 0) {
5793 		mutex_enter(SD_MUTEX(un));
5794 		return (-1);
5795 	}
5796 
5797 
5798 	/* Allocate the buffer */
5799 	buf = kmem_zalloc(un->un_sys_blocksize, KM_SLEEP);
5800 	dkdevid = (struct dk_devid *)buf;
5801 
5802 	/* Fill in the revision */
5803 	dkdevid->dkd_rev_hi = DK_DEVID_REV_MSB;
5804 	dkdevid->dkd_rev_lo = DK_DEVID_REV_LSB;
5805 
5806 	/* Copy in the device id */
5807 	mutex_enter(SD_MUTEX(un));
5808 	bcopy(un->un_devid, &dkdevid->dkd_devid,
5809 	    ddi_devid_sizeof(un->un_devid));
5810 	mutex_exit(SD_MUTEX(un));
5811 
5812 	/* Calculate the checksum */
5813 	chksum = 0;
5814 	ip = (uint_t *)dkdevid;
5815 	for (i = 0; i < ((DEV_BSIZE - sizeof (int)) / sizeof (int));
5816 	    i++) {
5817 		chksum ^= ip[i];
5818 	}
5819 
5820 	/* Fill-in checksum */
5821 	DKD_FORMCHKSUM(chksum, dkdevid);
5822 
5823 	/* Write the reserved sector */
5824 	status = sd_send_scsi_WRITE(ssc, buf, un->un_sys_blocksize, blk,
5825 	    SD_PATH_DIRECT);
5826 	if (status != 0)
5827 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5828 
5829 	kmem_free(buf, un->un_sys_blocksize);
5830 
5831 	mutex_enter(SD_MUTEX(un));
5832 	return (status);
5833 }
5834 
5835 
5836 /*
5837  *    Function: sd_check_vpd_page_support
5838  *
5839  * Description: This routine sends an inquiry command with the EVPD bit set and
5840  *		a page code of 0x00 to the device. It is used to determine which
5841  *		vital product pages are available to find the devid. We are
5842  *		looking for pages 0x83 0x80 or 0xB1.  If we return a negative 1,
5843  *		the device does not support that command.
5844  *
5845  *   Arguments: un  - driver soft state (unit) structure
5846  *
5847  * Return Code: 0 - success
5848  *		1 - check condition
5849  *
5850  *     Context: This routine can sleep.
5851  */
5852 
5853 static int
5854 sd_check_vpd_page_support(sd_ssc_t *ssc)
5855 {
5856 	uchar_t	*page_list	= NULL;
5857 	uchar_t	page_length	= 0xff;	/* Use max possible length */
5858 	uchar_t	evpd		= 0x01;	/* Set the EVPD bit */
5859 	uchar_t	page_code	= 0x00;	/* Supported VPD Pages */
5860 	int	rval		= 0;
5861 	int	counter;
5862 	struct sd_lun		*un;
5863 
5864 	ASSERT(ssc != NULL);
5865 	un = ssc->ssc_un;
5866 	ASSERT(un != NULL);
5867 	ASSERT(mutex_owned(SD_MUTEX(un)));
5868 
5869 	mutex_exit(SD_MUTEX(un));
5870 
5871 	/*
5872 	 * We'll set the page length to the maximum to save figuring it out
5873 	 * with an additional call.
5874 	 */
5875 	page_list =  kmem_zalloc(page_length, KM_SLEEP);
5876 
5877 	rval = sd_send_scsi_INQUIRY(ssc, page_list, page_length, evpd,
5878 	    page_code, NULL);
5879 
5880 	if (rval != 0)
5881 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
5882 
5883 	mutex_enter(SD_MUTEX(un));
5884 
5885 	/*
5886 	 * Now we must validate that the device accepted the command, as some
5887 	 * drives do not support it.  If the drive does support it, we will
5888 	 * return 0, and the supported pages will be in un_vpd_page_mask.  If
5889 	 * not, we return -1.
5890 	 */
5891 	if ((rval == 0) && (page_list[VPD_MODE_PAGE] == 0x00)) {
5892 		/* Loop to find one of the 2 pages we need */
5893 		counter = 4;  /* Supported pages start at byte 4, with 0x00 */
5894 
5895 		/*
5896 		 * Pages are returned in ascending order, and 0x83 is what we
5897 		 * are hoping for.
5898 		 */
5899 		while ((page_list[counter] <= 0xB1) &&
5900 		    (counter <= (page_list[VPD_PAGE_LENGTH] +
5901 		    VPD_HEAD_OFFSET))) {
5902 			/*
5903 			 * Add 3 because page_list[3] is the number of
5904 			 * pages minus 3
5905 			 */
5906 
5907 			switch (page_list[counter]) {
5908 			case 0x00:
5909 				un->un_vpd_page_mask |= SD_VPD_SUPPORTED_PG;
5910 				break;
5911 			case 0x80:
5912 				un->un_vpd_page_mask |= SD_VPD_UNIT_SERIAL_PG;
5913 				break;
5914 			case 0x81:
5915 				un->un_vpd_page_mask |= SD_VPD_OPERATING_PG;
5916 				break;
5917 			case 0x82:
5918 				un->un_vpd_page_mask |= SD_VPD_ASCII_OP_PG;
5919 				break;
5920 			case 0x83:
5921 				un->un_vpd_page_mask |= SD_VPD_DEVID_WWN_PG;
5922 				break;
5923 			case 0x86:
5924 				un->un_vpd_page_mask |= SD_VPD_EXTENDED_DATA_PG;
5925 				break;
5926 			case 0xB0:
5927 				un->un_vpd_page_mask |= SD_VPD_BLK_LIMITS_PG;
5928 				break;
5929 			case 0xB1:
5930 				un->un_vpd_page_mask |= SD_VPD_DEV_CHARACTER_PG;
5931 				break;
5932 			}
5933 			counter++;
5934 		}
5935 
5936 	} else {
5937 		rval = -1;
5938 
5939 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
5940 		    "sd_check_vpd_page_support: This drive does not implement "
5941 		    "VPD pages.\n");
5942 	}
5943 
5944 	kmem_free(page_list, page_length);
5945 
5946 	return (rval);
5947 }
5948 
5949 
5950 /*
5951  *    Function: sd_setup_pm
5952  *
5953  * Description: Initialize Power Management on the device
5954  *
5955  *     Context: Kernel Thread
5956  */
5957 
5958 static void
5959 sd_setup_pm(sd_ssc_t *ssc, dev_info_t *devi)
5960 {
5961 	uint_t		log_page_size;
5962 	uchar_t		*log_page_data;
5963 	int		rval = 0;
5964 	struct sd_lun	*un;
5965 
5966 	ASSERT(ssc != NULL);
5967 	un = ssc->ssc_un;
5968 	ASSERT(un != NULL);
5969 
5970 	/*
5971 	 * Since we are called from attach, holding a mutex for
5972 	 * un is unnecessary. Because some of the routines called
5973 	 * from here require SD_MUTEX to not be held, assert this
5974 	 * right up front.
5975 	 */
5976 	ASSERT(!mutex_owned(SD_MUTEX(un)));
5977 	/*
5978 	 * Since the sd device does not have the 'reg' property,
5979 	 * cpr will not call its DDI_SUSPEND/DDI_RESUME entries.
5980 	 * The following code is to tell cpr that this device
5981 	 * DOES need to be suspended and resumed.
5982 	 */
5983 	(void) ddi_prop_update_string(DDI_DEV_T_NONE, devi,
5984 	    "pm-hardware-state", "needs-suspend-resume");
5985 
5986 	/*
5987 	 * This complies with the new power management framework
5988 	 * for certain desktop machines. Create the pm_components
5989 	 * property as a string array property.
5990 	 * If un_f_pm_supported is TRUE, that means the disk
5991 	 * attached HBA has set the "pm-capable" property and
5992 	 * the value of this property is bigger than 0.
5993 	 */
5994 	if (un->un_f_pm_supported) {
5995 		/*
5996 		 * not all devices have a motor, try it first.
5997 		 * some devices may return ILLEGAL REQUEST, some
5998 		 * will hang
5999 		 * The following START_STOP_UNIT is used to check if target
6000 		 * device has a motor.
6001 		 */
6002 		un->un_f_start_stop_supported = TRUE;
6003 
6004 		if (un->un_f_power_condition_supported) {
6005 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
6006 			    SD_POWER_CONDITION, SD_TARGET_ACTIVE,
6007 			    SD_PATH_DIRECT);
6008 			if (rval != 0) {
6009 				un->un_f_power_condition_supported = FALSE;
6010 			}
6011 		}
6012 		if (!un->un_f_power_condition_supported) {
6013 			rval = sd_send_scsi_START_STOP_UNIT(ssc,
6014 			    SD_START_STOP, SD_TARGET_START, SD_PATH_DIRECT);
6015 		}
6016 		if (rval != 0) {
6017 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6018 			un->un_f_start_stop_supported = FALSE;
6019 		}
6020 
6021 		/*
6022 		 * create pm properties anyways otherwise the parent can't
6023 		 * go to sleep
6024 		 */
6025 		un->un_f_pm_is_enabled = TRUE;
6026 		(void) sd_create_pm_components(devi, un);
6027 
6028 		/*
6029 		 * If it claims that log sense is supported, check it out.
6030 		 */
6031 		if (un->un_f_log_sense_supported) {
6032 			rval = sd_log_page_supported(ssc,
6033 			    START_STOP_CYCLE_PAGE);
6034 			if (rval == 1) {
6035 				/* Page found, use it. */
6036 				un->un_start_stop_cycle_page =
6037 				    START_STOP_CYCLE_PAGE;
6038 			} else {
6039 				/*
6040 				 * Page not found or log sense is not
6041 				 * supported.
6042 				 * Notice we do not check the old style
6043 				 * START_STOP_CYCLE_VU_PAGE because this
6044 				 * code path does not apply to old disks.
6045 				 */
6046 				un->un_f_log_sense_supported = FALSE;
6047 				un->un_f_pm_log_sense_smart = FALSE;
6048 			}
6049 		}
6050 
6051 		return;
6052 	}
6053 
6054 	/*
6055 	 * For the disk whose attached HBA has not set the "pm-capable"
6056 	 * property, check if it supports the power management.
6057 	 */
6058 	if (!un->un_f_log_sense_supported) {
6059 		un->un_power_level = SD_SPINDLE_ON;
6060 		un->un_f_pm_is_enabled = FALSE;
6061 		return;
6062 	}
6063 
6064 	rval = sd_log_page_supported(ssc, START_STOP_CYCLE_PAGE);
6065 
6066 #ifdef	SDDEBUG
6067 	if (sd_force_pm_supported) {
6068 		/* Force a successful result */
6069 		rval = 1;
6070 	}
6071 #endif
6072 
6073 	/*
6074 	 * If the start-stop cycle counter log page is not supported
6075 	 * or if the pm-capable property is set to be false (0),
6076 	 * then we should not create the pm_components property.
6077 	 */
6078 	if (rval == -1) {
6079 		/*
6080 		 * Error.
6081 		 * Reading log sense failed, most likely this is
6082 		 * an older drive that does not support log sense.
6083 		 * If this fails auto-pm is not supported.
6084 		 */
6085 		un->un_power_level = SD_SPINDLE_ON;
6086 		un->un_f_pm_is_enabled = FALSE;
6087 
6088 	} else if (rval == 0) {
6089 		/*
6090 		 * Page not found.
6091 		 * The start stop cycle counter is implemented as page
6092 		 * START_STOP_CYCLE_PAGE_VU_PAGE (0x31) in older disks. For
6093 		 * newer disks it is implemented as START_STOP_CYCLE_PAGE (0xE).
6094 		 */
6095 		if (sd_log_page_supported(ssc, START_STOP_CYCLE_VU_PAGE) == 1) {
6096 			/*
6097 			 * Page found, use this one.
6098 			 */
6099 			un->un_start_stop_cycle_page = START_STOP_CYCLE_VU_PAGE;
6100 			un->un_f_pm_is_enabled = TRUE;
6101 		} else {
6102 			/*
6103 			 * Error or page not found.
6104 			 * auto-pm is not supported for this device.
6105 			 */
6106 			un->un_power_level = SD_SPINDLE_ON;
6107 			un->un_f_pm_is_enabled = FALSE;
6108 		}
6109 	} else {
6110 		/*
6111 		 * Page found, use it.
6112 		 */
6113 		un->un_start_stop_cycle_page = START_STOP_CYCLE_PAGE;
6114 		un->un_f_pm_is_enabled = TRUE;
6115 	}
6116 
6117 
6118 	if (un->un_f_pm_is_enabled == TRUE) {
6119 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6120 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6121 
6122 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6123 		    log_page_size, un->un_start_stop_cycle_page,
6124 		    0x01, 0, SD_PATH_DIRECT);
6125 
6126 		if (rval != 0) {
6127 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6128 		}
6129 
6130 #ifdef	SDDEBUG
6131 		if (sd_force_pm_supported) {
6132 			/* Force a successful result */
6133 			rval = 0;
6134 		}
6135 #endif
6136 
6137 		/*
6138 		 * If the Log sense for Page( Start/stop cycle counter page)
6139 		 * succeeds, then power management is supported and we can
6140 		 * enable auto-pm.
6141 		 */
6142 		if (rval == 0)  {
6143 			(void) sd_create_pm_components(devi, un);
6144 		} else {
6145 			un->un_power_level = SD_SPINDLE_ON;
6146 			un->un_f_pm_is_enabled = FALSE;
6147 		}
6148 
6149 		kmem_free(log_page_data, log_page_size);
6150 	}
6151 }
6152 
6153 
6154 /*
6155  *    Function: sd_create_pm_components
6156  *
6157  * Description: Initialize PM property.
6158  *
6159  *     Context: Kernel thread context
6160  */
6161 
6162 static void
6163 sd_create_pm_components(dev_info_t *devi, struct sd_lun *un)
6164 {
6165 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6166 
6167 	if (un->un_f_power_condition_supported) {
6168 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6169 		    "pm-components", sd_pwr_pc.pm_comp, 5)
6170 		    != DDI_PROP_SUCCESS) {
6171 			un->un_power_level = SD_SPINDLE_ACTIVE;
6172 			un->un_f_pm_is_enabled = FALSE;
6173 			return;
6174 		}
6175 	} else {
6176 		if (ddi_prop_update_string_array(DDI_DEV_T_NONE, devi,
6177 		    "pm-components", sd_pwr_ss.pm_comp, 3)
6178 		    != DDI_PROP_SUCCESS) {
6179 			un->un_power_level = SD_SPINDLE_ON;
6180 			un->un_f_pm_is_enabled = FALSE;
6181 			return;
6182 		}
6183 	}
6184 	/*
6185 	 * When components are initially created they are idle,
6186 	 * power up any non-removables.
6187 	 * Note: the return value of pm_raise_power can't be used
6188 	 * for determining if PM should be enabled for this device.
6189 	 * Even if you check the return values and remove this
6190 	 * property created above, the PM framework will not honor the
6191 	 * change after the first call to pm_raise_power. Hence,
6192 	 * removal of that property does not help if pm_raise_power
6193 	 * fails. In the case of removable media, the start/stop
6194 	 * will fail if the media is not present.
6195 	 */
6196 	if (un->un_f_attach_spinup && (pm_raise_power(SD_DEVINFO(un), 0,
6197 	    SD_PM_STATE_ACTIVE(un)) == DDI_SUCCESS)) {
6198 		mutex_enter(SD_MUTEX(un));
6199 		un->un_power_level = SD_PM_STATE_ACTIVE(un);
6200 		mutex_enter(&un->un_pm_mutex);
6201 		/* Set to on and not busy. */
6202 		un->un_pm_count = 0;
6203 	} else {
6204 		mutex_enter(SD_MUTEX(un));
6205 		un->un_power_level = SD_PM_STATE_STOPPED(un);
6206 		mutex_enter(&un->un_pm_mutex);
6207 		/* Set to off. */
6208 		un->un_pm_count = -1;
6209 	}
6210 	mutex_exit(&un->un_pm_mutex);
6211 	mutex_exit(SD_MUTEX(un));
6212 }
6213 
6214 
6215 /*
6216  *    Function: sd_ddi_suspend
6217  *
6218  * Description: Performs system power-down operations. This includes
6219  *		setting the drive state to indicate its suspended so
6220  *		that no new commands will be accepted. Also, wait for
6221  *		all commands that are in transport or queued to a timer
6222  *		for retry to complete. All timeout threads are cancelled.
6223  *
6224  * Return Code: DDI_FAILURE or DDI_SUCCESS
6225  *
6226  *     Context: Kernel thread context
6227  */
6228 
6229 static int
6230 sd_ddi_suspend(dev_info_t *devi)
6231 {
6232 	struct	sd_lun	*un;
6233 	clock_t		wait_cmds_complete;
6234 
6235 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6236 	if (un == NULL) {
6237 		return (DDI_FAILURE);
6238 	}
6239 
6240 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: entry\n");
6241 
6242 	mutex_enter(SD_MUTEX(un));
6243 
6244 	/* Return success if the device is already suspended. */
6245 	if (un->un_state == SD_STATE_SUSPENDED) {
6246 		mutex_exit(SD_MUTEX(un));
6247 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6248 		    "device already suspended, exiting\n");
6249 		return (DDI_SUCCESS);
6250 	}
6251 
6252 	/* Return failure if the device is being used by HA */
6253 	if (un->un_resvd_status &
6254 	    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE)) {
6255 		mutex_exit(SD_MUTEX(un));
6256 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6257 		    "device in use by HA, exiting\n");
6258 		return (DDI_FAILURE);
6259 	}
6260 
6261 	/*
6262 	 * Return failure if the device is in a resource wait
6263 	 * or power changing state.
6264 	 */
6265 	if ((un->un_state == SD_STATE_RWAIT) ||
6266 	    (un->un_state == SD_STATE_PM_CHANGING)) {
6267 		mutex_exit(SD_MUTEX(un));
6268 		SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: "
6269 		    "device in resource wait state, exiting\n");
6270 		return (DDI_FAILURE);
6271 	}
6272 
6273 
6274 	un->un_save_state = un->un_last_state;
6275 	New_state(un, SD_STATE_SUSPENDED);
6276 
6277 	/*
6278 	 * Wait for all commands that are in transport or queued to a timer
6279 	 * for retry to complete.
6280 	 *
6281 	 * While waiting, no new commands will be accepted or sent because of
6282 	 * the new state we set above.
6283 	 *
6284 	 * Wait till current operation has completed. If we are in the resource
6285 	 * wait state (with an intr outstanding) then we need to wait till the
6286 	 * intr completes and starts the next cmd. We want to wait for
6287 	 * SD_WAIT_CMDS_COMPLETE seconds before failing the DDI_SUSPEND.
6288 	 */
6289 	wait_cmds_complete = ddi_get_lbolt() +
6290 	    (sd_wait_cmds_complete * drv_usectohz(1000000));
6291 
6292 	while (un->un_ncmds_in_transport != 0) {
6293 		/*
6294 		 * Fail if commands do not finish in the specified time.
6295 		 */
6296 		if (cv_timedwait(&un->un_disk_busy_cv, SD_MUTEX(un),
6297 		    wait_cmds_complete) == -1) {
6298 			/*
6299 			 * Undo the state changes made above. Everything
6300 			 * must go back to it's original value.
6301 			 */
6302 			Restore_state(un);
6303 			un->un_last_state = un->un_save_state;
6304 			/* Wake up any threads that might be waiting. */
6305 			cv_broadcast(&un->un_suspend_cv);
6306 			mutex_exit(SD_MUTEX(un));
6307 			SD_ERROR(SD_LOG_IO_PM, un,
6308 			    "sd_ddi_suspend: failed due to outstanding cmds\n");
6309 			SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exiting\n");
6310 			return (DDI_FAILURE);
6311 		}
6312 	}
6313 
6314 	/*
6315 	 * Cancel SCSI watch thread and timeouts, if any are active
6316 	 */
6317 
6318 	if (SD_OK_TO_SUSPEND_SCSI_WATCHER(un)) {
6319 		opaque_t temp_token = un->un_swr_token;
6320 		mutex_exit(SD_MUTEX(un));
6321 		scsi_watch_suspend(temp_token);
6322 		mutex_enter(SD_MUTEX(un));
6323 	}
6324 
6325 	if (un->un_reset_throttle_timeid != NULL) {
6326 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
6327 		un->un_reset_throttle_timeid = NULL;
6328 		mutex_exit(SD_MUTEX(un));
6329 		(void) untimeout(temp_id);
6330 		mutex_enter(SD_MUTEX(un));
6331 	}
6332 
6333 	if (un->un_dcvb_timeid != NULL) {
6334 		timeout_id_t temp_id = un->un_dcvb_timeid;
6335 		un->un_dcvb_timeid = NULL;
6336 		mutex_exit(SD_MUTEX(un));
6337 		(void) untimeout(temp_id);
6338 		mutex_enter(SD_MUTEX(un));
6339 	}
6340 
6341 	mutex_enter(&un->un_pm_mutex);
6342 	if (un->un_pm_timeid != NULL) {
6343 		timeout_id_t temp_id = un->un_pm_timeid;
6344 		un->un_pm_timeid = NULL;
6345 		mutex_exit(&un->un_pm_mutex);
6346 		mutex_exit(SD_MUTEX(un));
6347 		(void) untimeout(temp_id);
6348 		mutex_enter(SD_MUTEX(un));
6349 	} else {
6350 		mutex_exit(&un->un_pm_mutex);
6351 	}
6352 
6353 	if (un->un_rmw_msg_timeid != NULL) {
6354 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
6355 		un->un_rmw_msg_timeid = NULL;
6356 		mutex_exit(SD_MUTEX(un));
6357 		(void) untimeout(temp_id);
6358 		mutex_enter(SD_MUTEX(un));
6359 	}
6360 
6361 	if (un->un_retry_timeid != NULL) {
6362 		timeout_id_t temp_id = un->un_retry_timeid;
6363 		un->un_retry_timeid = NULL;
6364 		mutex_exit(SD_MUTEX(un));
6365 		(void) untimeout(temp_id);
6366 		mutex_enter(SD_MUTEX(un));
6367 
6368 		if (un->un_retry_bp != NULL) {
6369 			un->un_retry_bp->av_forw = un->un_waitq_headp;
6370 			un->un_waitq_headp = un->un_retry_bp;
6371 			if (un->un_waitq_tailp == NULL) {
6372 				un->un_waitq_tailp = un->un_retry_bp;
6373 			}
6374 			un->un_retry_bp = NULL;
6375 			un->un_retry_statp = NULL;
6376 		}
6377 	}
6378 
6379 	if (un->un_direct_priority_timeid != NULL) {
6380 		timeout_id_t temp_id = un->un_direct_priority_timeid;
6381 		un->un_direct_priority_timeid = NULL;
6382 		mutex_exit(SD_MUTEX(un));
6383 		(void) untimeout(temp_id);
6384 		mutex_enter(SD_MUTEX(un));
6385 	}
6386 
6387 	if (un->un_f_is_fibre == TRUE) {
6388 		/*
6389 		 * Remove callbacks for insert and remove events
6390 		 */
6391 		if (un->un_insert_event != NULL) {
6392 			mutex_exit(SD_MUTEX(un));
6393 			(void) ddi_remove_event_handler(un->un_insert_cb_id);
6394 			mutex_enter(SD_MUTEX(un));
6395 			un->un_insert_event = NULL;
6396 		}
6397 
6398 		if (un->un_remove_event != NULL) {
6399 			mutex_exit(SD_MUTEX(un));
6400 			(void) ddi_remove_event_handler(un->un_remove_cb_id);
6401 			mutex_enter(SD_MUTEX(un));
6402 			un->un_remove_event = NULL;
6403 		}
6404 	}
6405 
6406 	mutex_exit(SD_MUTEX(un));
6407 
6408 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_suspend: exit\n");
6409 
6410 	return (DDI_SUCCESS);
6411 }
6412 
6413 
6414 /*
6415  *    Function: sd_ddi_resume
6416  *
6417  * Description: Performs system power-up operations..
6418  *
6419  * Return Code: DDI_SUCCESS
6420  *		DDI_FAILURE
6421  *
6422  *     Context: Kernel thread context
6423  */
6424 
6425 static int
6426 sd_ddi_resume(dev_info_t *devi)
6427 {
6428 	struct	sd_lun	*un;
6429 
6430 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
6431 	if (un == NULL) {
6432 		return (DDI_FAILURE);
6433 	}
6434 
6435 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: entry\n");
6436 
6437 	mutex_enter(SD_MUTEX(un));
6438 	Restore_state(un);
6439 
6440 	/*
6441 	 * Restore the state which was saved to give the
6442 	 * the right state in un_last_state
6443 	 */
6444 	un->un_last_state = un->un_save_state;
6445 	/*
6446 	 * Note: throttle comes back at full.
6447 	 * Also note: this MUST be done before calling pm_raise_power
6448 	 * otherwise the system can get hung in biowait. The scenario where
6449 	 * this'll happen is under cpr suspend. Writing of the system
6450 	 * state goes through sddump, which writes 0 to un_throttle. If
6451 	 * writing the system state then fails, example if the partition is
6452 	 * too small, then cpr attempts a resume. If throttle isn't restored
6453 	 * from the saved value until after calling pm_raise_power then
6454 	 * cmds sent in sdpower are not transported and sd_send_scsi_cmd hangs
6455 	 * in biowait.
6456 	 */
6457 	un->un_throttle = un->un_saved_throttle;
6458 
6459 	/*
6460 	 * The chance of failure is very rare as the only command done in power
6461 	 * entry point is START command when you transition from 0->1 or
6462 	 * unknown->1. Put it to SPINDLE ON state irrespective of the state at
6463 	 * which suspend was done. Ignore the return value as the resume should
6464 	 * not be failed. In the case of removable media the media need not be
6465 	 * inserted and hence there is a chance that raise power will fail with
6466 	 * media not present.
6467 	 */
6468 	if (un->un_f_attach_spinup) {
6469 		mutex_exit(SD_MUTEX(un));
6470 		(void) pm_raise_power(SD_DEVINFO(un), 0,
6471 		    SD_PM_STATE_ACTIVE(un));
6472 		mutex_enter(SD_MUTEX(un));
6473 	}
6474 
6475 	/*
6476 	 * Don't broadcast to the suspend cv and therefore possibly
6477 	 * start I/O until after power has been restored.
6478 	 */
6479 	cv_broadcast(&un->un_suspend_cv);
6480 	cv_broadcast(&un->un_state_cv);
6481 
6482 	/* restart thread */
6483 	if (SD_OK_TO_RESUME_SCSI_WATCHER(un)) {
6484 		scsi_watch_resume(un->un_swr_token);
6485 	}
6486 
6487 #if (defined(__fibre))
6488 	if (un->un_f_is_fibre == TRUE) {
6489 		/*
6490 		 * Add callbacks for insert and remove events
6491 		 */
6492 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
6493 			sd_init_event_callbacks(un);
6494 		}
6495 	}
6496 #endif
6497 
6498 	/*
6499 	 * Transport any pending commands to the target.
6500 	 *
6501 	 * If this is a low-activity device commands in queue will have to wait
6502 	 * until new commands come in, which may take awhile. Also, we
6503 	 * specifically don't check un_ncmds_in_transport because we know that
6504 	 * there really are no commands in progress after the unit was
6505 	 * suspended and we could have reached the throttle level, been
6506 	 * suspended, and have no new commands coming in for awhile. Highly
6507 	 * unlikely, but so is the low-activity disk scenario.
6508 	 */
6509 	ddi_xbuf_dispatch(un->un_xbuf_attr);
6510 
6511 	sd_start_cmds(un, NULL);
6512 	mutex_exit(SD_MUTEX(un));
6513 
6514 	SD_TRACE(SD_LOG_IO_PM, un, "sd_ddi_resume: exit\n");
6515 
6516 	return (DDI_SUCCESS);
6517 }
6518 
6519 
6520 /*
6521  *    Function: sd_pm_state_change
6522  *
6523  * Description: Change the driver power state.
6524  *		Someone else is required to actually change the driver
6525  *		power level.
6526  *
6527  *   Arguments: un - driver soft state (unit) structure
6528  *              level - the power level that is changed to
6529  *              flag - to decide how to change the power state
6530  *
6531  * Return Code: DDI_SUCCESS
6532  *
6533  *     Context: Kernel thread context
6534  */
6535 static int
6536 sd_pm_state_change(struct sd_lun *un, int level, int flag)
6537 {
6538 	ASSERT(un != NULL);
6539 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: entry\n");
6540 
6541 	ASSERT(!mutex_owned(SD_MUTEX(un)));
6542 	mutex_enter(SD_MUTEX(un));
6543 
6544 	if (flag == SD_PM_STATE_ROLLBACK || SD_PM_IS_IO_CAPABLE(un, level)) {
6545 		un->un_power_level = level;
6546 		ASSERT(!mutex_owned(&un->un_pm_mutex));
6547 		mutex_enter(&un->un_pm_mutex);
6548 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
6549 			un->un_pm_count++;
6550 			ASSERT(un->un_pm_count == 0);
6551 		}
6552 		mutex_exit(&un->un_pm_mutex);
6553 	} else {
6554 		/*
6555 		 * Exit if power management is not enabled for this device,
6556 		 * or if the device is being used by HA.
6557 		 */
6558 		if ((un->un_f_pm_is_enabled == FALSE) || (un->un_resvd_status &
6559 		    (SD_RESERVE | SD_WANT_RESERVE | SD_LOST_RESERVE))) {
6560 			mutex_exit(SD_MUTEX(un));
6561 			SD_TRACE(SD_LOG_POWER, un,
6562 			    "sd_pm_state_change: exiting\n");
6563 			return (DDI_FAILURE);
6564 		}
6565 
6566 		SD_INFO(SD_LOG_POWER, un, "sd_pm_state_change: "
6567 		    "un_ncmds_in_driver=%ld\n", un->un_ncmds_in_driver);
6568 
6569 		/*
6570 		 * See if the device is not busy, ie.:
6571 		 *    - we have no commands in the driver for this device
6572 		 *    - not waiting for resources
6573 		 */
6574 		if ((un->un_ncmds_in_driver == 0) &&
6575 		    (un->un_state != SD_STATE_RWAIT)) {
6576 			/*
6577 			 * The device is not busy, so it is OK to go to low
6578 			 * power state. Indicate low power, but rely on someone
6579 			 * else to actually change it.
6580 			 */
6581 			mutex_enter(&un->un_pm_mutex);
6582 			un->un_pm_count = -1;
6583 			mutex_exit(&un->un_pm_mutex);
6584 			un->un_power_level = level;
6585 		}
6586 	}
6587 
6588 	mutex_exit(SD_MUTEX(un));
6589 
6590 	SD_TRACE(SD_LOG_POWER, un, "sd_pm_state_change: exit\n");
6591 
6592 	return (DDI_SUCCESS);
6593 }
6594 
6595 
6596 /*
6597  *    Function: sd_pm_idletimeout_handler
6598  *
6599  * Description: A timer routine that's active only while a device is busy.
6600  *		The purpose is to extend slightly the pm framework's busy
6601  *		view of the device to prevent busy/idle thrashing for
6602  *		back-to-back commands. Do this by comparing the current time
6603  *		to the time at which the last command completed and when the
6604  *		difference is greater than sd_pm_idletime, call
6605  *		pm_idle_component. In addition to indicating idle to the pm
6606  *		framework, update the chain type to again use the internal pm
6607  *		layers of the driver.
6608  *
6609  *   Arguments: arg - driver soft state (unit) structure
6610  *
6611  *     Context: Executes in a timeout(9F) thread context
6612  */
6613 
6614 static void
6615 sd_pm_idletimeout_handler(void *arg)
6616 {
6617 	const hrtime_t idletime = sd_pm_idletime * NANOSEC;
6618 	struct sd_lun *un = arg;
6619 
6620 	mutex_enter(&sd_detach_mutex);
6621 	if (un->un_detach_count != 0) {
6622 		/* Abort if the instance is detaching */
6623 		mutex_exit(&sd_detach_mutex);
6624 		return;
6625 	}
6626 	mutex_exit(&sd_detach_mutex);
6627 
6628 	/*
6629 	 * Grab both mutexes, in the proper order, since we're accessing
6630 	 * both PM and softstate variables.
6631 	 */
6632 	mutex_enter(SD_MUTEX(un));
6633 	mutex_enter(&un->un_pm_mutex);
6634 	if (((gethrtime() - un->un_pm_idle_time) > idletime) &&
6635 	    (un->un_ncmds_in_driver == 0) && (un->un_pm_count == 0)) {
6636 		/*
6637 		 * Update the chain types.
6638 		 * This takes affect on the next new command received.
6639 		 */
6640 		if (un->un_f_non_devbsize_supported) {
6641 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
6642 		} else {
6643 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
6644 		}
6645 		un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD;
6646 
6647 		SD_TRACE(SD_LOG_IO_PM, un,
6648 		    "sd_pm_idletimeout_handler: idling device\n");
6649 		(void) pm_idle_component(SD_DEVINFO(un), 0);
6650 		un->un_pm_idle_timeid = NULL;
6651 	} else {
6652 		un->un_pm_idle_timeid =
6653 		    timeout(sd_pm_idletimeout_handler, un,
6654 		    (drv_usectohz((clock_t)300000))); /* 300 ms. */
6655 	}
6656 	mutex_exit(&un->un_pm_mutex);
6657 	mutex_exit(SD_MUTEX(un));
6658 }
6659 
6660 
6661 /*
6662  *    Function: sd_pm_timeout_handler
6663  *
6664  * Description: Callback to tell framework we are idle.
6665  *
6666  *     Context: timeout(9f) thread context.
6667  */
6668 
6669 static void
6670 sd_pm_timeout_handler(void *arg)
6671 {
6672 	struct sd_lun *un = arg;
6673 
6674 	(void) pm_idle_component(SD_DEVINFO(un), 0);
6675 	mutex_enter(&un->un_pm_mutex);
6676 	un->un_pm_timeid = NULL;
6677 	mutex_exit(&un->un_pm_mutex);
6678 }
6679 
6680 
6681 /*
6682  *    Function: sdpower
6683  *
6684  * Description: PM entry point.
6685  *
6686  * Return Code: DDI_SUCCESS
6687  *		DDI_FAILURE
6688  *
6689  *     Context: Kernel thread context
6690  */
6691 
6692 static int
6693 sdpower(dev_info_t *devi, int component, int level)
6694 {
6695 	struct sd_lun	*un;
6696 	int		instance;
6697 	int		rval = DDI_SUCCESS;
6698 	uint_t		i, log_page_size, maxcycles, ncycles;
6699 	uchar_t		*log_page_data;
6700 	int		log_sense_page;
6701 	int		medium_present;
6702 	time_t		intvlp;
6703 	struct pm_trans_data	sd_pm_tran_data;
6704 	uchar_t		save_state = SD_STATE_NORMAL;
6705 	int		sval;
6706 	uchar_t		state_before_pm;
6707 	int		got_semaphore_here;
6708 	sd_ssc_t	*ssc;
6709 	int	last_power_level = SD_SPINDLE_UNINIT;
6710 
6711 	instance = ddi_get_instance(devi);
6712 
6713 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
6714 	    !SD_PM_IS_LEVEL_VALID(un, level) || component != 0) {
6715 		return (DDI_FAILURE);
6716 	}
6717 
6718 	ssc = sd_ssc_init(un);
6719 
6720 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: entry, level = %d\n", level);
6721 
6722 	/*
6723 	 * Must synchronize power down with close.
6724 	 * Attempt to decrement/acquire the open/close semaphore,
6725 	 * but do NOT wait on it. If it's not greater than zero,
6726 	 * ie. it can't be decremented without waiting, then
6727 	 * someone else, either open or close, already has it
6728 	 * and the try returns 0. Use that knowledge here to determine
6729 	 * if it's OK to change the device power level.
6730 	 * Also, only increment it on exit if it was decremented, ie. gotten,
6731 	 * here.
6732 	 */
6733 	got_semaphore_here = sema_tryp(&un->un_semoclose);
6734 
6735 	mutex_enter(SD_MUTEX(un));
6736 
6737 	SD_INFO(SD_LOG_POWER, un, "sdpower: un_ncmds_in_driver = %ld\n",
6738 	    un->un_ncmds_in_driver);
6739 
6740 	/*
6741 	 * If un_ncmds_in_driver is non-zero it indicates commands are
6742 	 * already being processed in the driver, or if the semaphore was
6743 	 * not gotten here it indicates an open or close is being processed.
6744 	 * At the same time somebody is requesting to go to a lower power
6745 	 * that can't perform I/O, which can't happen, therefore we need to
6746 	 * return failure.
6747 	 */
6748 	if ((!SD_PM_IS_IO_CAPABLE(un, level)) &&
6749 	    ((un->un_ncmds_in_driver != 0) || (got_semaphore_here == 0))) {
6750 		mutex_exit(SD_MUTEX(un));
6751 
6752 		if (got_semaphore_here != 0) {
6753 			sema_v(&un->un_semoclose);
6754 		}
6755 		SD_TRACE(SD_LOG_IO_PM, un,
6756 		    "sdpower: exit, device has queued cmds.\n");
6757 
6758 		goto sdpower_failed;
6759 	}
6760 
6761 	/*
6762 	 * if it is OFFLINE that means the disk is completely dead
6763 	 * in our case we have to put the disk in on or off by sending commands
6764 	 * Of course that will fail anyway so return back here.
6765 	 *
6766 	 * Power changes to a device that's OFFLINE or SUSPENDED
6767 	 * are not allowed.
6768 	 */
6769 	if ((un->un_state == SD_STATE_OFFLINE) ||
6770 	    (un->un_state == SD_STATE_SUSPENDED)) {
6771 		mutex_exit(SD_MUTEX(un));
6772 
6773 		if (got_semaphore_here != 0) {
6774 			sema_v(&un->un_semoclose);
6775 		}
6776 		SD_TRACE(SD_LOG_IO_PM, un,
6777 		    "sdpower: exit, device is off-line.\n");
6778 
6779 		goto sdpower_failed;
6780 	}
6781 
6782 	/*
6783 	 * Change the device's state to indicate it's power level
6784 	 * is being changed. Do this to prevent a power off in the
6785 	 * middle of commands, which is especially bad on devices
6786 	 * that are really powered off instead of just spun down.
6787 	 */
6788 	state_before_pm = un->un_state;
6789 	un->un_state = SD_STATE_PM_CHANGING;
6790 
6791 	mutex_exit(SD_MUTEX(un));
6792 
6793 	/*
6794 	 * If log sense command is not supported, bypass the
6795 	 * following checking, otherwise, check the log sense
6796 	 * information for this device.
6797 	 */
6798 	if (SD_PM_STOP_MOTOR_NEEDED(un, level) &&
6799 	    un->un_f_log_sense_supported) {
6800 		/*
6801 		 * Get the log sense information to understand whether the
6802 		 * the powercycle counts have gone beyond the threshhold.
6803 		 */
6804 		log_page_size = START_STOP_CYCLE_COUNTER_PAGE_SIZE;
6805 		log_page_data = kmem_zalloc(log_page_size, KM_SLEEP);
6806 
6807 		mutex_enter(SD_MUTEX(un));
6808 		log_sense_page = un->un_start_stop_cycle_page;
6809 		mutex_exit(SD_MUTEX(un));
6810 
6811 		rval = sd_send_scsi_LOG_SENSE(ssc, log_page_data,
6812 		    log_page_size, log_sense_page, 0x01, 0, SD_PATH_DIRECT);
6813 
6814 		if (rval != 0) {
6815 			if (rval == EIO)
6816 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
6817 			else
6818 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
6819 		}
6820 
6821 #ifdef	SDDEBUG
6822 		if (sd_force_pm_supported) {
6823 			/* Force a successful result */
6824 			rval = 0;
6825 		}
6826 #endif
6827 		if (rval != 0) {
6828 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
6829 			    "Log Sense Failed\n");
6830 
6831 			kmem_free(log_page_data, log_page_size);
6832 			/* Cannot support power management on those drives */
6833 
6834 			if (got_semaphore_here != 0) {
6835 				sema_v(&un->un_semoclose);
6836 			}
6837 			/*
6838 			 * On exit put the state back to it's original value
6839 			 * and broadcast to anyone waiting for the power
6840 			 * change completion.
6841 			 */
6842 			mutex_enter(SD_MUTEX(un));
6843 			un->un_state = state_before_pm;
6844 			cv_broadcast(&un->un_suspend_cv);
6845 			mutex_exit(SD_MUTEX(un));
6846 			SD_TRACE(SD_LOG_IO_PM, un,
6847 			    "sdpower: exit, Log Sense Failed.\n");
6848 
6849 			goto sdpower_failed;
6850 		}
6851 
6852 		/*
6853 		 * From the page data - Convert the essential information to
6854 		 * pm_trans_data
6855 		 */
6856 		maxcycles =
6857 		    (log_page_data[0x1c] << 24) | (log_page_data[0x1d] << 16) |
6858 		    (log_page_data[0x1E] << 8)  | log_page_data[0x1F];
6859 
6860 		ncycles =
6861 		    (log_page_data[0x24] << 24) | (log_page_data[0x25] << 16) |
6862 		    (log_page_data[0x26] << 8)  | log_page_data[0x27];
6863 
6864 		if (un->un_f_pm_log_sense_smart) {
6865 			sd_pm_tran_data.un.smart_count.allowed = maxcycles;
6866 			sd_pm_tran_data.un.smart_count.consumed = ncycles;
6867 			sd_pm_tran_data.un.smart_count.flag = 0;
6868 			sd_pm_tran_data.format = DC_SMART_FORMAT;
6869 		} else {
6870 			sd_pm_tran_data.un.scsi_cycles.lifemax = maxcycles;
6871 			sd_pm_tran_data.un.scsi_cycles.ncycles = ncycles;
6872 			for (i = 0; i < DC_SCSI_MFR_LEN; i++) {
6873 				sd_pm_tran_data.un.scsi_cycles.svc_date[i] =
6874 				    log_page_data[8+i];
6875 			}
6876 			sd_pm_tran_data.un.scsi_cycles.flag = 0;
6877 			sd_pm_tran_data.format = DC_SCSI_FORMAT;
6878 		}
6879 
6880 		kmem_free(log_page_data, log_page_size);
6881 
6882 		/*
6883 		 * Call pm_trans_check routine to get the Ok from
6884 		 * the global policy
6885 		 */
6886 		rval = pm_trans_check(&sd_pm_tran_data, &intvlp);
6887 #ifdef	SDDEBUG
6888 		if (sd_force_pm_supported) {
6889 			/* Force a successful result */
6890 			rval = 1;
6891 		}
6892 #endif
6893 		switch (rval) {
6894 		case 0:
6895 			/*
6896 			 * Not Ok to Power cycle or error in parameters passed
6897 			 * Would have given the advised time to consider power
6898 			 * cycle. Based on the new intvlp parameter we are
6899 			 * supposed to pretend we are busy so that pm framework
6900 			 * will never call our power entry point. Because of
6901 			 * that install a timeout handler and wait for the
6902 			 * recommended time to elapse so that power management
6903 			 * can be effective again.
6904 			 *
6905 			 * To effect this behavior, call pm_busy_component to
6906 			 * indicate to the framework this device is busy.
6907 			 * By not adjusting un_pm_count the rest of PM in
6908 			 * the driver will function normally, and independent
6909 			 * of this but because the framework is told the device
6910 			 * is busy it won't attempt powering down until it gets
6911 			 * a matching idle. The timeout handler sends this.
6912 			 * Note: sd_pm_entry can't be called here to do this
6913 			 * because sdpower may have been called as a result
6914 			 * of a call to pm_raise_power from within sd_pm_entry.
6915 			 *
6916 			 * If a timeout handler is already active then
6917 			 * don't install another.
6918 			 */
6919 			mutex_enter(&un->un_pm_mutex);
6920 			if (un->un_pm_timeid == NULL) {
6921 				un->un_pm_timeid =
6922 				    timeout(sd_pm_timeout_handler,
6923 				    un, intvlp * drv_usectohz(1000000));
6924 				mutex_exit(&un->un_pm_mutex);
6925 				(void) pm_busy_component(SD_DEVINFO(un), 0);
6926 			} else {
6927 				mutex_exit(&un->un_pm_mutex);
6928 			}
6929 			if (got_semaphore_here != 0) {
6930 				sema_v(&un->un_semoclose);
6931 			}
6932 			/*
6933 			 * On exit put the state back to it's original value
6934 			 * and broadcast to anyone waiting for the power
6935 			 * change completion.
6936 			 */
6937 			mutex_enter(SD_MUTEX(un));
6938 			un->un_state = state_before_pm;
6939 			cv_broadcast(&un->un_suspend_cv);
6940 			mutex_exit(SD_MUTEX(un));
6941 
6942 			SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, "
6943 			    "trans check Failed, not ok to power cycle.\n");
6944 
6945 			goto sdpower_failed;
6946 		case -1:
6947 			if (got_semaphore_here != 0) {
6948 				sema_v(&un->un_semoclose);
6949 			}
6950 			/*
6951 			 * On exit put the state back to it's original value
6952 			 * and broadcast to anyone waiting for the power
6953 			 * change completion.
6954 			 */
6955 			mutex_enter(SD_MUTEX(un));
6956 			un->un_state = state_before_pm;
6957 			cv_broadcast(&un->un_suspend_cv);
6958 			mutex_exit(SD_MUTEX(un));
6959 			SD_TRACE(SD_LOG_IO_PM, un,
6960 			    "sdpower: exit, trans check command Failed.\n");
6961 
6962 			goto sdpower_failed;
6963 		}
6964 	}
6965 
6966 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
6967 		/*
6968 		 * Save the last state... if the STOP FAILS we need it
6969 		 * for restoring
6970 		 */
6971 		mutex_enter(SD_MUTEX(un));
6972 		save_state = un->un_last_state;
6973 		last_power_level = un->un_power_level;
6974 		/*
6975 		 * There must not be any cmds. getting processed
6976 		 * in the driver when we get here. Power to the
6977 		 * device is potentially going off.
6978 		 */
6979 		ASSERT(un->un_ncmds_in_driver == 0);
6980 		mutex_exit(SD_MUTEX(un));
6981 
6982 		/*
6983 		 * For now PM suspend the device completely before spindle is
6984 		 * turned off
6985 		 */
6986 		if ((rval = sd_pm_state_change(un, level, SD_PM_STATE_CHANGE))
6987 		    == DDI_FAILURE) {
6988 			if (got_semaphore_here != 0) {
6989 				sema_v(&un->un_semoclose);
6990 			}
6991 			/*
6992 			 * On exit put the state back to it's original value
6993 			 * and broadcast to anyone waiting for the power
6994 			 * change completion.
6995 			 */
6996 			mutex_enter(SD_MUTEX(un));
6997 			un->un_state = state_before_pm;
6998 			un->un_power_level = last_power_level;
6999 			cv_broadcast(&un->un_suspend_cv);
7000 			mutex_exit(SD_MUTEX(un));
7001 			SD_TRACE(SD_LOG_IO_PM, un,
7002 			    "sdpower: exit, PM suspend Failed.\n");
7003 
7004 			goto sdpower_failed;
7005 		}
7006 	}
7007 
7008 	/*
7009 	 * The transition from SPINDLE_OFF to SPINDLE_ON can happen in open,
7010 	 * close, or strategy. Dump no long uses this routine, it uses it's
7011 	 * own code so it can be done in polled mode.
7012 	 */
7013 
7014 	medium_present = TRUE;
7015 
7016 	/*
7017 	 * When powering up, issue a TUR in case the device is at unit
7018 	 * attention.  Don't do retries. Bypass the PM layer, otherwise
7019 	 * a deadlock on un_pm_busy_cv will occur.
7020 	 */
7021 	if (SD_PM_IS_IO_CAPABLE(un, level)) {
7022 		sval = sd_send_scsi_TEST_UNIT_READY(ssc,
7023 		    SD_DONT_RETRY_TUR | SD_BYPASS_PM);
7024 		if (sval != 0)
7025 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7026 	}
7027 
7028 	if (un->un_f_power_condition_supported) {
7029 		char *pm_condition_name[] = {"STOPPED", "STANDBY",
7030 		    "IDLE", "ACTIVE"};
7031 		SD_TRACE(SD_LOG_IO_PM, un,
7032 		    "sdpower: sending \'%s\' power condition",
7033 		    pm_condition_name[level]);
7034 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
7035 		    sd_pl2pc[level], SD_PATH_DIRECT);
7036 	} else {
7037 		SD_TRACE(SD_LOG_IO_PM, un, "sdpower: sending \'%s\' unit\n",
7038 		    ((level == SD_SPINDLE_ON) ? "START" : "STOP"));
7039 		sval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
7040 		    ((level == SD_SPINDLE_ON) ? SD_TARGET_START :
7041 		    SD_TARGET_STOP), SD_PATH_DIRECT);
7042 	}
7043 	if (sval != 0) {
7044 		if (sval == EIO)
7045 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
7046 		else
7047 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
7048 	}
7049 
7050 	/* Command failed, check for media present. */
7051 	if ((sval == ENXIO) && un->un_f_has_removable_media) {
7052 		medium_present = FALSE;
7053 	}
7054 
7055 	/*
7056 	 * The conditions of interest here are:
7057 	 *   if a spindle off with media present fails,
7058 	 *	then restore the state and return an error.
7059 	 *   else if a spindle on fails,
7060 	 *	then return an error (there's no state to restore).
7061 	 * In all other cases we setup for the new state
7062 	 * and return success.
7063 	 */
7064 	if (!SD_PM_IS_IO_CAPABLE(un, level)) {
7065 		if ((medium_present == TRUE) && (sval != 0)) {
7066 			/* The stop command from above failed */
7067 			rval = DDI_FAILURE;
7068 			/*
7069 			 * The stop command failed, and we have media
7070 			 * present. Put the level back by calling the
7071 			 * sd_pm_resume() and set the state back to
7072 			 * it's previous value.
7073 			 */
7074 			(void) sd_pm_state_change(un, last_power_level,
7075 			    SD_PM_STATE_ROLLBACK);
7076 			mutex_enter(SD_MUTEX(un));
7077 			un->un_last_state = save_state;
7078 			mutex_exit(SD_MUTEX(un));
7079 		} else if (un->un_f_monitor_media_state) {
7080 			/*
7081 			 * The stop command from above succeeded.
7082 			 * Terminate watch thread in case of removable media
7083 			 * devices going into low power state. This is as per
7084 			 * the requirements of pm framework, otherwise commands
7085 			 * will be generated for the device (through watch
7086 			 * thread), even when the device is in low power state.
7087 			 */
7088 			mutex_enter(SD_MUTEX(un));
7089 			un->un_f_watcht_stopped = FALSE;
7090 			if (un->un_swr_token != NULL) {
7091 				opaque_t temp_token = un->un_swr_token;
7092 				un->un_f_watcht_stopped = TRUE;
7093 				un->un_swr_token = NULL;
7094 				mutex_exit(SD_MUTEX(un));
7095 				(void) scsi_watch_request_terminate(temp_token,
7096 				    SCSI_WATCH_TERMINATE_ALL_WAIT);
7097 			} else {
7098 				mutex_exit(SD_MUTEX(un));
7099 			}
7100 		}
7101 	} else {
7102 		/*
7103 		 * The level requested is I/O capable.
7104 		 * Legacy behavior: return success on a failed spinup
7105 		 * if there is no media in the drive.
7106 		 * Do this by looking at medium_present here.
7107 		 */
7108 		if ((sval != 0) && medium_present) {
7109 			/* The start command from above failed */
7110 			rval = DDI_FAILURE;
7111 		} else {
7112 			/*
7113 			 * The start command from above succeeded
7114 			 * PM resume the devices now that we have
7115 			 * started the disks
7116 			 */
7117 			(void) sd_pm_state_change(un, level,
7118 			    SD_PM_STATE_CHANGE);
7119 
7120 			/*
7121 			 * Resume the watch thread since it was suspended
7122 			 * when the device went into low power mode.
7123 			 */
7124 			if (un->un_f_monitor_media_state) {
7125 				mutex_enter(SD_MUTEX(un));
7126 				if (un->un_f_watcht_stopped == TRUE) {
7127 					opaque_t temp_token;
7128 
7129 					un->un_f_watcht_stopped = FALSE;
7130 					mutex_exit(SD_MUTEX(un));
7131 					temp_token =
7132 					    sd_watch_request_submit(un);
7133 					mutex_enter(SD_MUTEX(un));
7134 					un->un_swr_token = temp_token;
7135 				}
7136 				mutex_exit(SD_MUTEX(un));
7137 			}
7138 		}
7139 	}
7140 
7141 	if (got_semaphore_here != 0) {
7142 		sema_v(&un->un_semoclose);
7143 	}
7144 	/*
7145 	 * On exit put the state back to it's original value
7146 	 * and broadcast to anyone waiting for the power
7147 	 * change completion.
7148 	 */
7149 	mutex_enter(SD_MUTEX(un));
7150 	un->un_state = state_before_pm;
7151 	cv_broadcast(&un->un_suspend_cv);
7152 	mutex_exit(SD_MUTEX(un));
7153 
7154 	SD_TRACE(SD_LOG_IO_PM, un, "sdpower: exit, status = 0x%x\n", rval);
7155 
7156 	sd_ssc_fini(ssc);
7157 	return (rval);
7158 
7159 sdpower_failed:
7160 
7161 	sd_ssc_fini(ssc);
7162 	return (DDI_FAILURE);
7163 }
7164 
7165 
7166 
7167 /*
7168  *    Function: sdattach
7169  *
7170  * Description: Driver's attach(9e) entry point function.
7171  *
7172  *   Arguments: devi - opaque device info handle
7173  *		cmd  - attach  type
7174  *
7175  * Return Code: DDI_SUCCESS
7176  *		DDI_FAILURE
7177  *
7178  *     Context: Kernel thread context
7179  */
7180 
7181 static int
7182 sdattach(dev_info_t *devi, ddi_attach_cmd_t cmd)
7183 {
7184 	switch (cmd) {
7185 	case DDI_ATTACH:
7186 		return (sd_unit_attach(devi));
7187 	case DDI_RESUME:
7188 		return (sd_ddi_resume(devi));
7189 	default:
7190 		break;
7191 	}
7192 	return (DDI_FAILURE);
7193 }
7194 
7195 
7196 /*
7197  *    Function: sddetach
7198  *
7199  * Description: Driver's detach(9E) entry point function.
7200  *
7201  *   Arguments: devi - opaque device info handle
7202  *		cmd  - detach  type
7203  *
7204  * Return Code: DDI_SUCCESS
7205  *		DDI_FAILURE
7206  *
7207  *     Context: Kernel thread context
7208  */
7209 
7210 static int
7211 sddetach(dev_info_t *devi, ddi_detach_cmd_t cmd)
7212 {
7213 	switch (cmd) {
7214 	case DDI_DETACH:
7215 		return (sd_unit_detach(devi));
7216 	case DDI_SUSPEND:
7217 		return (sd_ddi_suspend(devi));
7218 	default:
7219 		break;
7220 	}
7221 	return (DDI_FAILURE);
7222 }
7223 
7224 
7225 /*
7226  *     Function: sd_sync_with_callback
7227  *
7228  *  Description: Prevents sd_unit_attach or sd_unit_detach from freeing the soft
7229  *		 state while the callback routine is active.
7230  *
7231  *    Arguments: un: softstate structure for the instance
7232  *
7233  *	Context: Kernel thread context
7234  */
7235 
7236 static void
7237 sd_sync_with_callback(struct sd_lun *un)
7238 {
7239 	ASSERT(un != NULL);
7240 
7241 	mutex_enter(SD_MUTEX(un));
7242 
7243 	ASSERT(un->un_in_callback >= 0);
7244 
7245 	while (un->un_in_callback > 0) {
7246 		mutex_exit(SD_MUTEX(un));
7247 		delay(2);
7248 		mutex_enter(SD_MUTEX(un));
7249 	}
7250 
7251 	mutex_exit(SD_MUTEX(un));
7252 }
7253 
7254 /*
7255  *    Function: sd_unit_attach
7256  *
7257  * Description: Performs DDI_ATTACH processing for sdattach(). Allocates
7258  *		the soft state structure for the device and performs
7259  *		all necessary structure and device initializations.
7260  *
7261  *   Arguments: devi: the system's dev_info_t for the device.
7262  *
7263  * Return Code: DDI_SUCCESS if attach is successful.
7264  *		DDI_FAILURE if any part of the attach fails.
7265  *
7266  *     Context: Called at attach(9e) time for the DDI_ATTACH flag.
7267  *		Kernel thread context only.  Can sleep.
7268  */
7269 
7270 static int
7271 sd_unit_attach(dev_info_t *devi)
7272 {
7273 	struct	scsi_device	*devp;
7274 	struct	sd_lun		*un;
7275 	char			*variantp;
7276 	char			name_str[48];
7277 	int	reservation_flag = SD_TARGET_IS_UNRESERVED;
7278 	int	instance;
7279 	int	rval;
7280 	int	wc_enabled;
7281 	int	wc_changeable;
7282 	int	tgt;
7283 	uint64_t	capacity;
7284 	uint_t		lbasize = 0;
7285 	dev_info_t	*pdip = ddi_get_parent(devi);
7286 	int		offbyone = 0;
7287 	int		geom_label_valid = 0;
7288 	sd_ssc_t	*ssc;
7289 	int		status;
7290 	struct sd_fm_internal	*sfip = NULL;
7291 	int		max_xfer_size;
7292 
7293 	/*
7294 	 * Retrieve the target driver's private data area. This was set
7295 	 * up by the HBA.
7296 	 */
7297 	devp = ddi_get_driver_private(devi);
7298 
7299 	/*
7300 	 * Retrieve the target ID of the device.
7301 	 */
7302 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7303 	    SCSI_ADDR_PROP_TARGET, -1);
7304 
7305 	/*
7306 	 * Since we have no idea what state things were left in by the last
7307 	 * user of the device, set up some 'default' settings, ie. turn 'em
7308 	 * off. The scsi_ifsetcap calls force re-negotiations with the drive.
7309 	 * Do this before the scsi_probe, which sends an inquiry.
7310 	 * This is a fix for bug (4430280).
7311 	 * Of special importance is wide-xfer. The drive could have been left
7312 	 * in wide transfer mode by the last driver to communicate with it,
7313 	 * this includes us. If that's the case, and if the following is not
7314 	 * setup properly or we don't re-negotiate with the drive prior to
7315 	 * transferring data to/from the drive, it causes bus parity errors,
7316 	 * data overruns, and unexpected interrupts. This first occurred when
7317 	 * the fix for bug (4378686) was made.
7318 	 */
7319 	(void) scsi_ifsetcap(&devp->sd_address, "lun-reset", 0, 1);
7320 	(void) scsi_ifsetcap(&devp->sd_address, "wide-xfer", 0, 1);
7321 	(void) scsi_ifsetcap(&devp->sd_address, "auto-rqsense", 0, 1);
7322 
7323 	/*
7324 	 * Currently, scsi_ifsetcap sets tagged-qing capability for all LUNs
7325 	 * on a target. Setting it per lun instance actually sets the
7326 	 * capability of this target, which affects those luns already
7327 	 * attached on the same target. So during attach, we can only disable
7328 	 * this capability only when no other lun has been attached on this
7329 	 * target. By doing this, we assume a target has the same tagged-qing
7330 	 * capability for every lun. The condition can be removed when HBA
7331 	 * is changed to support per lun based tagged-qing capability.
7332 	 */
7333 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
7334 		(void) scsi_ifsetcap(&devp->sd_address, "tagged-qing", 0, 1);
7335 	}
7336 
7337 	/*
7338 	 * Use scsi_probe() to issue an INQUIRY command to the device.
7339 	 * This call will allocate and fill in the scsi_inquiry structure
7340 	 * and point the sd_inq member of the scsi_device structure to it.
7341 	 * If the attach succeeds, then this memory will not be de-allocated
7342 	 * (via scsi_unprobe()) until the instance is detached.
7343 	 */
7344 	if (scsi_probe(devp, SLEEP_FUNC) != SCSIPROBE_EXISTS) {
7345 		goto probe_failed;
7346 	}
7347 
7348 	/*
7349 	 * Check the device type as specified in the inquiry data and
7350 	 * claim it if it is of a type that we support.
7351 	 */
7352 	switch (devp->sd_inq->inq_dtype) {
7353 	case DTYPE_DIRECT:
7354 		break;
7355 	case DTYPE_RODIRECT:
7356 		break;
7357 	case DTYPE_OPTICAL:
7358 		break;
7359 	case DTYPE_NOTPRESENT:
7360 	default:
7361 		/* Unsupported device type; fail the attach. */
7362 		goto probe_failed;
7363 	}
7364 
7365 	/*
7366 	 * Allocate the soft state structure for this unit.
7367 	 *
7368 	 * We rely upon this memory being set to all zeroes by
7369 	 * ddi_soft_state_zalloc().  We assume that any member of the
7370 	 * soft state structure that is not explicitly initialized by
7371 	 * this routine will have a value of zero.
7372 	 */
7373 	instance = ddi_get_instance(devp->sd_dev);
7374 	if (ddi_soft_state_zalloc(sd_state, instance) != DDI_SUCCESS) {
7375 		goto probe_failed;
7376 	}
7377 
7378 	/*
7379 	 * Retrieve a pointer to the newly-allocated soft state.
7380 	 *
7381 	 * This should NEVER fail if the ddi_soft_state_zalloc() call above
7382 	 * was successful, unless something has gone horribly wrong and the
7383 	 * ddi's soft state internals are corrupt (in which case it is
7384 	 * probably better to halt here than just fail the attach....)
7385 	 */
7386 	if ((un = ddi_get_soft_state(sd_state, instance)) == NULL) {
7387 		panic("sd_unit_attach: NULL soft state on instance:0x%x",
7388 		    instance);
7389 		/*NOTREACHED*/
7390 	}
7391 
7392 	/*
7393 	 * Link the back ptr of the driver soft state to the scsi_device
7394 	 * struct for this lun.
7395 	 * Save a pointer to the softstate in the driver-private area of
7396 	 * the scsi_device struct.
7397 	 * Note: We cannot call SD_INFO, SD_TRACE, SD_ERROR, or SD_DIAG until
7398 	 * we first set un->un_sd below.
7399 	 */
7400 	un->un_sd = devp;
7401 	devp->sd_private = (opaque_t)un;
7402 
7403 	/*
7404 	 * The following must be after devp is stored in the soft state struct.
7405 	 */
7406 #ifdef SDDEBUG
7407 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7408 	    "%s_unit_attach: un:0x%p instance:%d\n",
7409 	    ddi_driver_name(devi), un, instance);
7410 #endif
7411 
7412 	/*
7413 	 * Set up the device type and node type (for the minor nodes).
7414 	 * By default we assume that the device can at least support the
7415 	 * Common Command Set. Call it a CD-ROM if it reports itself
7416 	 * as a RODIRECT device.
7417 	 */
7418 	switch (devp->sd_inq->inq_dtype) {
7419 	case DTYPE_RODIRECT:
7420 		un->un_node_type = DDI_NT_CD_CHAN;
7421 		un->un_ctype	 = CTYPE_CDROM;
7422 		break;
7423 	case DTYPE_OPTICAL:
7424 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7425 		un->un_ctype	 = CTYPE_ROD;
7426 		break;
7427 	default:
7428 		un->un_node_type = DDI_NT_BLOCK_CHAN;
7429 		un->un_ctype	 = CTYPE_CCS;
7430 		break;
7431 	}
7432 
7433 	/*
7434 	 * Try to read the interconnect type from the HBA.
7435 	 *
7436 	 * Note: This driver is currently compiled as two binaries, a parallel
7437 	 * scsi version (sd) and a fibre channel version (ssd). All functional
7438 	 * differences are determined at compile time. In the future a single
7439 	 * binary will be provided and the interconnect type will be used to
7440 	 * differentiate between fibre and parallel scsi behaviors. At that time
7441 	 * it will be necessary for all fibre channel HBAs to support this
7442 	 * property.
7443 	 *
7444 	 * set un_f_is_fiber to TRUE ( default fiber )
7445 	 */
7446 	un->un_f_is_fibre = TRUE;
7447 	switch (scsi_ifgetcap(SD_ADDRESS(un), "interconnect-type", -1)) {
7448 	case INTERCONNECT_SSA:
7449 		un->un_interconnect_type = SD_INTERCONNECT_SSA;
7450 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7451 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SSA\n", un);
7452 		break;
7453 	case INTERCONNECT_PARALLEL:
7454 		un->un_f_is_fibre = FALSE;
7455 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7456 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7457 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_PARALLEL\n", un);
7458 		break;
7459 	case INTERCONNECT_SAS:
7460 		un->un_f_is_fibre = FALSE;
7461 		un->un_interconnect_type = SD_INTERCONNECT_SAS;
7462 		un->un_node_type = DDI_NT_BLOCK_SAS;
7463 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7464 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SAS\n", un);
7465 		break;
7466 	case INTERCONNECT_SATA:
7467 		un->un_f_is_fibre = FALSE;
7468 		un->un_interconnect_type = SD_INTERCONNECT_SATA;
7469 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7470 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_SATA\n", un);
7471 		break;
7472 	case INTERCONNECT_FIBRE:
7473 		un->un_interconnect_type = SD_INTERCONNECT_FIBRE;
7474 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7475 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FIBRE\n", un);
7476 		break;
7477 	case INTERCONNECT_FABRIC:
7478 		un->un_interconnect_type = SD_INTERCONNECT_FABRIC;
7479 		un->un_node_type = DDI_NT_BLOCK_FABRIC;
7480 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7481 		    "sd_unit_attach: un:0x%p SD_INTERCONNECT_FABRIC\n", un);
7482 		break;
7483 	default:
7484 #ifdef SD_DEFAULT_INTERCONNECT_TYPE
7485 		/*
7486 		 * The HBA does not support the "interconnect-type" property
7487 		 * (or did not provide a recognized type).
7488 		 *
7489 		 * Note: This will be obsoleted when a single fibre channel
7490 		 * and parallel scsi driver is delivered. In the meantime the
7491 		 * interconnect type will be set to the platform default.If that
7492 		 * type is not parallel SCSI, it means that we should be
7493 		 * assuming "ssd" semantics. However, here this also means that
7494 		 * the FC HBA is not supporting the "interconnect-type" property
7495 		 * like we expect it to, so log this occurrence.
7496 		 */
7497 		un->un_interconnect_type = SD_DEFAULT_INTERCONNECT_TYPE;
7498 		if (!SD_IS_PARALLEL_SCSI(un)) {
7499 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7500 			    "sd_unit_attach: un:0x%p Assuming "
7501 			    "INTERCONNECT_FIBRE\n", un);
7502 		} else {
7503 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7504 			    "sd_unit_attach: un:0x%p Assuming "
7505 			    "INTERCONNECT_PARALLEL\n", un);
7506 			un->un_f_is_fibre = FALSE;
7507 		}
7508 #else
7509 		/*
7510 		 * Note: This source will be implemented when a single fibre
7511 		 * channel and parallel scsi driver is delivered. The default
7512 		 * will be to assume that if a device does not support the
7513 		 * "interconnect-type" property it is a parallel SCSI HBA and
7514 		 * we will set the interconnect type for parallel scsi.
7515 		 */
7516 		un->un_interconnect_type = SD_INTERCONNECT_PARALLEL;
7517 		un->un_f_is_fibre = FALSE;
7518 #endif
7519 		break;
7520 	}
7521 
7522 	if (un->un_f_is_fibre == TRUE) {
7523 		if (scsi_ifgetcap(SD_ADDRESS(un), "scsi-version", 1) ==
7524 		    SCSI_VERSION_3) {
7525 			switch (un->un_interconnect_type) {
7526 			case SD_INTERCONNECT_FIBRE:
7527 			case SD_INTERCONNECT_SSA:
7528 				un->un_node_type = DDI_NT_BLOCK_WWN;
7529 				break;
7530 			default:
7531 				break;
7532 			}
7533 		}
7534 	}
7535 
7536 	/*
7537 	 * Initialize the Request Sense command for the target
7538 	 */
7539 	if (sd_alloc_rqs(devp, un) != DDI_SUCCESS) {
7540 		goto alloc_rqs_failed;
7541 	}
7542 
7543 	/*
7544 	 * Set un_retry_count with SD_RETRY_COUNT, this is ok for Sparc
7545 	 * with separate binary for sd and ssd.
7546 	 *
7547 	 * x86 has 1 binary, un_retry_count is set base on connection type.
7548 	 * The hardcoded values will go away when Sparc uses 1 binary
7549 	 * for sd and ssd.  This hardcoded values need to match
7550 	 * SD_RETRY_COUNT in sddef.h
7551 	 * The value used is base on interconnect type.
7552 	 * fibre = 3, parallel = 5
7553 	 */
7554 #if defined(__x86)
7555 	un->un_retry_count = un->un_f_is_fibre ? 3 : 5;
7556 #else
7557 	un->un_retry_count = SD_RETRY_COUNT;
7558 #endif
7559 
7560 	/*
7561 	 * Set the per disk retry count to the default number of retries
7562 	 * for disks and CDROMs. This value can be overridden by the
7563 	 * disk property list or an entry in sd.conf.
7564 	 */
7565 	un->un_notready_retry_count =
7566 	    ISCD(un) ? CD_NOT_READY_RETRY_COUNT(un)
7567 	    : DISK_NOT_READY_RETRY_COUNT(un);
7568 
7569 	/*
7570 	 * Set the busy retry count to the default value of un_retry_count.
7571 	 * This can be overridden by entries in sd.conf or the device
7572 	 * config table.
7573 	 */
7574 	un->un_busy_retry_count = un->un_retry_count;
7575 
7576 	/*
7577 	 * Init the reset threshold for retries.  This number determines
7578 	 * how many retries must be performed before a reset can be issued
7579 	 * (for certain error conditions). This can be overridden by entries
7580 	 * in sd.conf or the device config table.
7581 	 */
7582 	un->un_reset_retry_count = (un->un_retry_count / 2);
7583 
7584 	/*
7585 	 * Set the victim_retry_count to the default un_retry_count
7586 	 */
7587 	un->un_victim_retry_count = (2 * un->un_retry_count);
7588 
7589 	/*
7590 	 * Set the reservation release timeout to the default value of
7591 	 * 5 seconds. This can be overridden by entries in ssd.conf or the
7592 	 * device config table.
7593 	 */
7594 	un->un_reserve_release_time = 5;
7595 
7596 	/*
7597 	 * Set up the default maximum transfer size. Note that this may
7598 	 * get updated later in the attach, when setting up default wide
7599 	 * operations for disks.
7600 	 */
7601 #if defined(__x86)
7602 	un->un_max_xfer_size = (uint_t)SD_DEFAULT_MAX_XFER_SIZE;
7603 	un->un_partial_dma_supported = 1;
7604 #else
7605 	un->un_max_xfer_size = (uint_t)maxphys;
7606 #endif
7607 
7608 	/*
7609 	 * Get "allow bus device reset" property (defaults to "enabled" if
7610 	 * the property was not defined). This is to disable bus resets for
7611 	 * certain kinds of error recovery. Note: In the future when a run-time
7612 	 * fibre check is available the soft state flag should default to
7613 	 * enabled.
7614 	 */
7615 	if (un->un_f_is_fibre == TRUE) {
7616 		un->un_f_allow_bus_device_reset = TRUE;
7617 	} else {
7618 		if (ddi_getprop(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
7619 		    "allow-bus-device-reset", 1) != 0) {
7620 			un->un_f_allow_bus_device_reset = TRUE;
7621 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7622 			    "sd_unit_attach: un:0x%p Bus device reset "
7623 			    "enabled\n", un);
7624 		} else {
7625 			un->un_f_allow_bus_device_reset = FALSE;
7626 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7627 			    "sd_unit_attach: un:0x%p Bus device reset "
7628 			    "disabled\n", un);
7629 		}
7630 	}
7631 
7632 	/*
7633 	 * Check if this is an ATAPI device. ATAPI devices use Group 1
7634 	 * Read/Write commands and Group 2 Mode Sense/Select commands.
7635 	 *
7636 	 * Note: The "obsolete" way of doing this is to check for the "atapi"
7637 	 * property. The new "variant" property with a value of "atapi" has been
7638 	 * introduced so that future 'variants' of standard SCSI behavior (like
7639 	 * atapi) could be specified by the underlying HBA drivers by supplying
7640 	 * a new value for the "variant" property, instead of having to define a
7641 	 * new property.
7642 	 */
7643 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "atapi", -1) != -1) {
7644 		un->un_f_cfg_is_atapi = TRUE;
7645 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
7646 		    "sd_unit_attach: un:0x%p Atapi device\n", un);
7647 	}
7648 	if (ddi_prop_lookup_string(DDI_DEV_T_ANY, devi, 0, "variant",
7649 	    &variantp) == DDI_PROP_SUCCESS) {
7650 		if (strcmp(variantp, "atapi") == 0) {
7651 			un->un_f_cfg_is_atapi = TRUE;
7652 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7653 			    "sd_unit_attach: un:0x%p Atapi device\n", un);
7654 		}
7655 		ddi_prop_free(variantp);
7656 	}
7657 
7658 	un->un_cmd_timeout	= SD_IO_TIME;
7659 
7660 	un->un_busy_timeout  = SD_BSY_TIMEOUT;
7661 
7662 	/* Info on current states, statuses, etc. (Updated frequently) */
7663 	un->un_state		= SD_STATE_NORMAL;
7664 	un->un_last_state	= SD_STATE_NORMAL;
7665 
7666 	/* Control & status info for command throttling */
7667 	un->un_throttle		= sd_max_throttle;
7668 	un->un_saved_throttle	= sd_max_throttle;
7669 	un->un_min_throttle	= sd_min_throttle;
7670 
7671 	if (un->un_f_is_fibre == TRUE) {
7672 		un->un_f_use_adaptive_throttle = TRUE;
7673 	} else {
7674 		un->un_f_use_adaptive_throttle = FALSE;
7675 	}
7676 
7677 	/* Removable media support. */
7678 	cv_init(&un->un_state_cv, NULL, CV_DRIVER, NULL);
7679 	un->un_mediastate		= DKIO_NONE;
7680 	un->un_specified_mediastate	= DKIO_NONE;
7681 
7682 	/* CVs for suspend/resume (PM or DR) */
7683 	cv_init(&un->un_suspend_cv,   NULL, CV_DRIVER, NULL);
7684 	cv_init(&un->un_disk_busy_cv, NULL, CV_DRIVER, NULL);
7685 
7686 	/* Power management support. */
7687 	un->un_power_level = SD_SPINDLE_UNINIT;
7688 
7689 	cv_init(&un->un_wcc_cv,   NULL, CV_DRIVER, NULL);
7690 	un->un_f_wcc_inprog = 0;
7691 
7692 	/*
7693 	 * The open/close semaphore is used to serialize threads executing
7694 	 * in the driver's open & close entry point routines for a given
7695 	 * instance.
7696 	 */
7697 	(void) sema_init(&un->un_semoclose, 1, NULL, SEMA_DRIVER, NULL);
7698 
7699 	/*
7700 	 * The conf file entry and softstate variable is a forceful override,
7701 	 * meaning a non-zero value must be entered to change the default.
7702 	 */
7703 	un->un_f_disksort_disabled = FALSE;
7704 	un->un_f_rmw_type = SD_RMW_TYPE_DEFAULT;
7705 	un->un_f_enable_rmw = FALSE;
7706 
7707 	/*
7708 	 * GET EVENT STATUS NOTIFICATION media polling enabled by default, but
7709 	 * can be overridden via [s]sd-config-list "mmc-gesn-polling" property.
7710 	 */
7711 	un->un_f_mmc_gesn_polling = TRUE;
7712 
7713 	/*
7714 	 * physical sector size defaults to DEV_BSIZE currently. We can
7715 	 * override this value via the driver configuration file so we must
7716 	 * set it before calling sd_read_unit_properties().
7717 	 */
7718 	un->un_phy_blocksize = DEV_BSIZE;
7719 
7720 	/*
7721 	 * Retrieve the properties from the static driver table or the driver
7722 	 * configuration file (.conf) for this unit and update the soft state
7723 	 * for the device as needed for the indicated properties.
7724 	 * Note: the property configuration needs to occur here as some of the
7725 	 * following routines may have dependencies on soft state flags set
7726 	 * as part of the driver property configuration.
7727 	 */
7728 	sd_read_unit_properties(un);
7729 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7730 	    "sd_unit_attach: un:0x%p property configuration complete.\n", un);
7731 
7732 	/*
7733 	 * Only if a device has "hotpluggable" property, it is
7734 	 * treated as hotpluggable device. Otherwise, it is
7735 	 * regarded as non-hotpluggable one.
7736 	 */
7737 	if (ddi_prop_get_int(DDI_DEV_T_ANY, devi, 0, "hotpluggable",
7738 	    -1) != -1) {
7739 		un->un_f_is_hotpluggable = TRUE;
7740 	}
7741 
7742 	/*
7743 	 * set unit's attributes(flags) according to "hotpluggable" and
7744 	 * RMB bit in INQUIRY data.
7745 	 */
7746 	sd_set_unit_attributes(un, devi);
7747 
7748 	/*
7749 	 * By default, we mark the capacity, lbasize, and geometry
7750 	 * as invalid. Only if we successfully read a valid capacity
7751 	 * will we update the un_blockcount and un_tgt_blocksize with the
7752 	 * valid values (the geometry will be validated later).
7753 	 */
7754 	un->un_f_blockcount_is_valid	= FALSE;
7755 	un->un_f_tgt_blocksize_is_valid	= FALSE;
7756 
7757 	/*
7758 	 * Use DEV_BSIZE and DEV_BSHIFT as defaults, until we can determine
7759 	 * otherwise.
7760 	 */
7761 	un->un_tgt_blocksize  = un->un_sys_blocksize  = DEV_BSIZE;
7762 	un->un_blockcount = 0;
7763 
7764 	/*
7765 	 * Set up the per-instance info needed to determine the correct
7766 	 * CDBs and other info for issuing commands to the target.
7767 	 */
7768 	sd_init_cdb_limits(un);
7769 
7770 	/*
7771 	 * Set up the IO chains to use, based upon the target type.
7772 	 */
7773 	if (un->un_f_non_devbsize_supported) {
7774 		un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA;
7775 	} else {
7776 		un->un_buf_chain_type = SD_CHAIN_INFO_DISK;
7777 	}
7778 	un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD;
7779 	un->un_direct_chain_type = SD_CHAIN_INFO_DIRECT_CMD;
7780 	un->un_priority_chain_type = SD_CHAIN_INFO_PRIORITY_CMD;
7781 
7782 	un->un_xbuf_attr = ddi_xbuf_attr_create(sizeof (struct sd_xbuf),
7783 	    sd_xbuf_strategy, un, sd_xbuf_active_limit,  sd_xbuf_reserve_limit,
7784 	    ddi_driver_major(devi), DDI_XBUF_QTHREAD_DRIVER);
7785 	ddi_xbuf_attr_register_devinfo(un->un_xbuf_attr, devi);
7786 
7787 
7788 	if (ISCD(un)) {
7789 		un->un_additional_codes = sd_additional_codes;
7790 	} else {
7791 		un->un_additional_codes = NULL;
7792 	}
7793 
7794 	/*
7795 	 * Create the kstats here so they can be available for attach-time
7796 	 * routines that send commands to the unit (either polled or via
7797 	 * sd_send_scsi_cmd).
7798 	 *
7799 	 * Note: This is a critical sequence that needs to be maintained:
7800 	 *	1) Instantiate the kstats here, before any routines using the
7801 	 *	   iopath (i.e. sd_send_scsi_cmd).
7802 	 *	2) Instantiate and initialize the partition stats
7803 	 *	   (sd_set_pstats).
7804 	 *	3) Initialize the error stats (sd_set_errstats), following
7805 	 *	   sd_validate_geometry(),sd_register_devid(),
7806 	 *	   and sd_cache_control().
7807 	 */
7808 
7809 	un->un_stats = kstat_create(sd_label, instance,
7810 	    NULL, "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
7811 	if (un->un_stats != NULL) {
7812 		un->un_stats->ks_lock = SD_MUTEX(un);
7813 		kstat_install(un->un_stats);
7814 	}
7815 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7816 	    "sd_unit_attach: un:0x%p un_stats created\n", un);
7817 
7818 	un->un_unmapstats_ks = kstat_create(sd_label, instance, "unmapstats",
7819 	    "misc", KSTAT_TYPE_NAMED, sizeof (*un->un_unmapstats) /
7820 	    sizeof (kstat_named_t), 0);
7821 	if (un->un_unmapstats_ks) {
7822 		un->un_unmapstats = un->un_unmapstats_ks->ks_data;
7823 
7824 		kstat_named_init(&un->un_unmapstats->us_cmds,
7825 		    "commands", KSTAT_DATA_UINT64);
7826 		kstat_named_init(&un->un_unmapstats->us_errs,
7827 		    "errors", KSTAT_DATA_UINT64);
7828 		kstat_named_init(&un->un_unmapstats->us_extents,
7829 		    "extents", KSTAT_DATA_UINT64);
7830 		kstat_named_init(&un->un_unmapstats->us_bytes,
7831 		    "bytes", KSTAT_DATA_UINT64);
7832 
7833 		kstat_install(un->un_unmapstats_ks);
7834 	} else {
7835 		cmn_err(CE_NOTE, "!Cannot create unmap kstats for disk %d",
7836 		    instance);
7837 	}
7838 
7839 	sd_create_errstats(un, instance);
7840 	if (un->un_errstats == NULL) {
7841 		goto create_errstats_failed;
7842 	}
7843 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
7844 	    "sd_unit_attach: un:0x%p errstats created\n", un);
7845 
7846 	/*
7847 	 * The following if/else code was relocated here from below as part
7848 	 * of the fix for bug (4430280). However with the default setup added
7849 	 * on entry to this routine, it's no longer absolutely necessary for
7850 	 * this to be before the call to sd_spin_up_unit.
7851 	 */
7852 	if (SD_IS_PARALLEL_SCSI(un) || SD_IS_SERIAL(un)) {
7853 		int tq_trigger_flag = (((devp->sd_inq->inq_ansi == 4) ||
7854 		    (devp->sd_inq->inq_ansi == 5)) &&
7855 		    devp->sd_inq->inq_bque) || devp->sd_inq->inq_cmdque;
7856 
7857 		/*
7858 		 * If tagged queueing is supported by the target
7859 		 * and by the host adapter then we will enable it
7860 		 */
7861 		un->un_tagflags = 0;
7862 		if ((devp->sd_inq->inq_rdf == RDF_SCSI2) && tq_trigger_flag &&
7863 		    (un->un_f_arq_enabled == TRUE)) {
7864 			if (scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing",
7865 			    1, 1) == 1) {
7866 				un->un_tagflags = FLAG_STAG;
7867 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7868 				    "sd_unit_attach: un:0x%p tag queueing "
7869 				    "enabled\n", un);
7870 			} else if (scsi_ifgetcap(SD_ADDRESS(un),
7871 			    "untagged-qing", 0) == 1) {
7872 				un->un_f_opt_queueing = TRUE;
7873 				un->un_saved_throttle = un->un_throttle =
7874 				    min(un->un_throttle, 3);
7875 			} else {
7876 				un->un_f_opt_queueing = FALSE;
7877 				un->un_saved_throttle = un->un_throttle = 1;
7878 			}
7879 		} else if ((scsi_ifgetcap(SD_ADDRESS(un), "untagged-qing", 0)
7880 		    == 1) && (un->un_f_arq_enabled == TRUE)) {
7881 			/* The Host Adapter supports internal queueing. */
7882 			un->un_f_opt_queueing = TRUE;
7883 			un->un_saved_throttle = un->un_throttle =
7884 			    min(un->un_throttle, 3);
7885 		} else {
7886 			un->un_f_opt_queueing = FALSE;
7887 			un->un_saved_throttle = un->un_throttle = 1;
7888 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7889 			    "sd_unit_attach: un:0x%p no tag queueing\n", un);
7890 		}
7891 
7892 		/*
7893 		 * Enable large transfers for SATA/SAS drives
7894 		 */
7895 		if (SD_IS_SERIAL(un)) {
7896 			un->un_max_xfer_size =
7897 			    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7898 			    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7899 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
7900 			    "sd_unit_attach: un:0x%p max transfer "
7901 			    "size=0x%x\n", un, un->un_max_xfer_size);
7902 
7903 		}
7904 
7905 		/* Setup or tear down default wide operations for disks */
7906 
7907 		/*
7908 		 * Note: Legacy: it may be possible for both "sd_max_xfer_size"
7909 		 * and "ssd_max_xfer_size" to exist simultaneously on the same
7910 		 * system and be set to different values. In the future this
7911 		 * code may need to be updated when the ssd module is
7912 		 * obsoleted and removed from the system. (4299588)
7913 		 */
7914 		if (SD_IS_PARALLEL_SCSI(un) &&
7915 		    (devp->sd_inq->inq_rdf == RDF_SCSI2) &&
7916 		    (devp->sd_inq->inq_wbus16 || devp->sd_inq->inq_wbus32)) {
7917 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7918 			    1, 1) == 1) {
7919 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7920 				    "sd_unit_attach: un:0x%p Wide Transfer "
7921 				    "enabled\n", un);
7922 			}
7923 
7924 			/*
7925 			 * If tagged queuing has also been enabled, then
7926 			 * enable large xfers
7927 			 */
7928 			if (un->un_saved_throttle == sd_max_throttle) {
7929 				un->un_max_xfer_size =
7930 				    ddi_getprop(DDI_DEV_T_ANY, devi, 0,
7931 				    sd_max_xfer_size, SD_MAX_XFER_SIZE);
7932 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7933 				    "sd_unit_attach: un:0x%p max transfer "
7934 				    "size=0x%x\n", un, un->un_max_xfer_size);
7935 			}
7936 		} else {
7937 			if (scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer",
7938 			    0, 1) == 1) {
7939 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
7940 				    "sd_unit_attach: un:0x%p "
7941 				    "Wide Transfer disabled\n", un);
7942 			}
7943 		}
7944 	} else {
7945 		un->un_tagflags = FLAG_STAG;
7946 		un->un_max_xfer_size = ddi_getprop(DDI_DEV_T_ANY,
7947 		    devi, 0, sd_max_xfer_size, SD_MAX_XFER_SIZE);
7948 	}
7949 
7950 	/*
7951 	 * If this target supports LUN reset, try to enable it.
7952 	 */
7953 	if (un->un_f_lun_reset_enabled) {
7954 		if (scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 1, 1) == 1) {
7955 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7956 			    "un:0x%p lun_reset capability set\n", un);
7957 		} else {
7958 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7959 			    "un:0x%p lun-reset capability not set\n", un);
7960 		}
7961 	}
7962 
7963 	/*
7964 	 * Adjust the maximum transfer size. This is to fix
7965 	 * the problem of partial DMA support on SPARC. Some
7966 	 * HBA driver, like aac, has very small dma_attr_maxxfer
7967 	 * size, which requires partial DMA support on SPARC.
7968 	 * In the future the SPARC pci nexus driver may solve
7969 	 * the problem instead of this fix.
7970 	 */
7971 	max_xfer_size = scsi_ifgetcap(SD_ADDRESS(un), "dma-max", 1);
7972 	if ((max_xfer_size > 0) && (max_xfer_size < un->un_max_xfer_size)) {
7973 		/* We need DMA partial even on sparc to ensure sddump() works */
7974 		un->un_max_xfer_size = max_xfer_size;
7975 		if (un->un_partial_dma_supported == 0)
7976 			un->un_partial_dma_supported = 1;
7977 	}
7978 	if (ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
7979 	    DDI_PROP_DONTPASS, "buf_break", 0) == 1) {
7980 		if (ddi_xbuf_attr_setup_brk(un->un_xbuf_attr,
7981 		    un->un_max_xfer_size) == 1) {
7982 			un->un_buf_breakup_supported = 1;
7983 			SD_INFO(SD_LOG_ATTACH_DETACH, un, "sd_unit_attach: "
7984 			    "un:0x%p Buf breakup enabled\n", un);
7985 		}
7986 	}
7987 
7988 	/*
7989 	 * Set PKT_DMA_PARTIAL flag.
7990 	 */
7991 	if (un->un_partial_dma_supported == 1) {
7992 		un->un_pkt_flags = PKT_DMA_PARTIAL;
7993 	} else {
7994 		un->un_pkt_flags = 0;
7995 	}
7996 
7997 	/* Initialize sd_ssc_t for internal uscsi commands */
7998 	ssc = sd_ssc_init(un);
7999 	scsi_fm_init(devp);
8000 
8001 	/*
8002 	 * Allocate memory for SCSI FMA stuffs.
8003 	 */
8004 	un->un_fm_private =
8005 	    kmem_zalloc(sizeof (struct sd_fm_internal), KM_SLEEP);
8006 	sfip = (struct sd_fm_internal *)un->un_fm_private;
8007 	sfip->fm_ssc.ssc_uscsi_cmd = &sfip->fm_ucmd;
8008 	sfip->fm_ssc.ssc_uscsi_info = &sfip->fm_uinfo;
8009 	sfip->fm_ssc.ssc_un = un;
8010 
8011 	if (ISCD(un) ||
8012 	    un->un_f_has_removable_media ||
8013 	    devp->sd_fm_capable == DDI_FM_NOT_CAPABLE) {
8014 		/*
8015 		 * We don't touch CDROM or the DDI_FM_NOT_CAPABLE device.
8016 		 * Their log are unchanged.
8017 		 */
8018 		sfip->fm_log_level = SD_FM_LOG_NSUP;
8019 	} else {
8020 		/*
8021 		 * If enter here, it should be non-CDROM and FM-capable
8022 		 * device, and it will not keep the old scsi_log as before
8023 		 * in /var/adm/messages. However, the property
8024 		 * "fm-scsi-log" will control whether the FM telemetry will
8025 		 * be logged in /var/adm/messages.
8026 		 */
8027 		int fm_scsi_log;
8028 		fm_scsi_log = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
8029 		    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "fm-scsi-log", 0);
8030 
8031 		if (fm_scsi_log)
8032 			sfip->fm_log_level = SD_FM_LOG_EREPORT;
8033 		else
8034 			sfip->fm_log_level = SD_FM_LOG_SILENT;
8035 	}
8036 
8037 	/*
8038 	 * At this point in the attach, we have enough info in the
8039 	 * soft state to be able to issue commands to the target.
8040 	 *
8041 	 * All command paths used below MUST issue their commands as
8042 	 * SD_PATH_DIRECT. This is important as intermediate layers
8043 	 * are not all initialized yet (such as PM).
8044 	 */
8045 
8046 	/*
8047 	 * Send a TEST UNIT READY command to the device. This should clear
8048 	 * any outstanding UNIT ATTENTION that may be present.
8049 	 *
8050 	 * Note: Don't check for success, just track if there is a reservation,
8051 	 * this is a throw away command to clear any unit attentions.
8052 	 *
8053 	 * Note: This MUST be the first command issued to the target during
8054 	 * attach to ensure power on UNIT ATTENTIONS are cleared.
8055 	 * Pass in flag SD_DONT_RETRY_TUR to prevent the long delays associated
8056 	 * with attempts at spinning up a device with no media.
8057 	 */
8058 	status = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
8059 	if (status != 0) {
8060 		if (status == EACCES)
8061 			reservation_flag = SD_TARGET_IS_RESERVED;
8062 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8063 	}
8064 
8065 	/*
8066 	 * If the device is NOT a removable media device, attempt to spin
8067 	 * it up (using the START_STOP_UNIT command) and read its capacity
8068 	 * (using the READ CAPACITY command).  Note, however, that either
8069 	 * of these could fail and in some cases we would continue with
8070 	 * the attach despite the failure (see below).
8071 	 */
8072 	if (un->un_f_descr_format_supported) {
8073 
8074 		switch (sd_spin_up_unit(ssc)) {
8075 		case 0:
8076 			/*
8077 			 * Spin-up was successful; now try to read the
8078 			 * capacity.  If successful then save the results
8079 			 * and mark the capacity & lbasize as valid.
8080 			 */
8081 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8082 			    "sd_unit_attach: un:0x%p spin-up successful\n", un);
8083 
8084 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
8085 			    &lbasize, SD_PATH_DIRECT);
8086 
8087 			switch (status) {
8088 			case 0: {
8089 				if (capacity > DK_MAX_BLOCKS) {
8090 #ifdef _LP64
8091 					if ((capacity + 1) >
8092 					    SD_GROUP1_MAX_ADDRESS) {
8093 						/*
8094 						 * Enable descriptor format
8095 						 * sense data so that we can
8096 						 * get 64 bit sense data
8097 						 * fields.
8098 						 */
8099 						sd_enable_descr_sense(ssc);
8100 					}
8101 #else
8102 					/* 32-bit kernels can't handle this */
8103 					scsi_log(SD_DEVINFO(un),
8104 					    sd_label, CE_WARN,
8105 					    "disk has %llu blocks, which "
8106 					    "is too large for a 32-bit "
8107 					    "kernel", capacity);
8108 
8109 #if defined(__x86)
8110 					/*
8111 					 * 1TB disk was treated as (1T - 512)B
8112 					 * in the past, so that it might have
8113 					 * valid VTOC and solaris partitions,
8114 					 * we have to allow it to continue to
8115 					 * work.
8116 					 */
8117 					if (capacity - 1 > DK_MAX_BLOCKS)
8118 #endif
8119 					goto spinup_failed;
8120 #endif
8121 				}
8122 
8123 				/*
8124 				 * Here it's not necessary to check the case:
8125 				 * the capacity of the device is bigger than
8126 				 * what the max hba cdb can support. Because
8127 				 * sd_send_scsi_READ_CAPACITY will retrieve
8128 				 * the capacity by sending USCSI command, which
8129 				 * is constrained by the max hba cdb. Actually,
8130 				 * sd_send_scsi_READ_CAPACITY will return
8131 				 * EINVAL when using bigger cdb than required
8132 				 * cdb length. Will handle this case in
8133 				 * "case EINVAL".
8134 				 */
8135 
8136 				/*
8137 				 * The following relies on
8138 				 * sd_send_scsi_READ_CAPACITY never
8139 				 * returning 0 for capacity and/or lbasize.
8140 				 */
8141 				sd_update_block_info(un, lbasize, capacity);
8142 
8143 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8144 				    "sd_unit_attach: un:0x%p capacity = %ld "
8145 				    "blocks; lbasize= %ld.\n", un,
8146 				    un->un_blockcount, un->un_tgt_blocksize);
8147 
8148 				break;
8149 			}
8150 			case EINVAL:
8151 				/*
8152 				 * In the case where the max-cdb-length property
8153 				 * is smaller than the required CDB length for
8154 				 * a SCSI device, a target driver can fail to
8155 				 * attach to that device.
8156 				 */
8157 				scsi_log(SD_DEVINFO(un),
8158 				    sd_label, CE_WARN,
8159 				    "disk capacity is too large "
8160 				    "for current cdb length");
8161 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8162 
8163 				goto spinup_failed;
8164 			case EACCES:
8165 				/*
8166 				 * Should never get here if the spin-up
8167 				 * succeeded, but code it in anyway.
8168 				 * From here, just continue with the attach...
8169 				 */
8170 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
8171 				    "sd_unit_attach: un:0x%p "
8172 				    "sd_send_scsi_READ_CAPACITY "
8173 				    "returned reservation conflict\n", un);
8174 				reservation_flag = SD_TARGET_IS_RESERVED;
8175 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8176 				break;
8177 			default:
8178 				/*
8179 				 * Likewise, should never get here if the
8180 				 * spin-up succeeded. Just continue with
8181 				 * the attach...
8182 				 */
8183 				if (status == EIO)
8184 					sd_ssc_assessment(ssc,
8185 					    SD_FMT_STATUS_CHECK);
8186 				else
8187 					sd_ssc_assessment(ssc,
8188 					    SD_FMT_IGNORE);
8189 				break;
8190 			}
8191 			break;
8192 		case EACCES:
8193 			/*
8194 			 * Device is reserved by another host.  In this case
8195 			 * we could not spin it up or read the capacity, but
8196 			 * we continue with the attach anyway.
8197 			 */
8198 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8199 			    "sd_unit_attach: un:0x%p spin-up reservation "
8200 			    "conflict.\n", un);
8201 			reservation_flag = SD_TARGET_IS_RESERVED;
8202 			break;
8203 		default:
8204 			/* Fail the attach if the spin-up failed. */
8205 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
8206 			    "sd_unit_attach: un:0x%p spin-up failed.", un);
8207 			goto spinup_failed;
8208 		}
8209 
8210 	}
8211 
8212 	/*
8213 	 * Check to see if this is a MMC drive
8214 	 */
8215 	if (ISCD(un)) {
8216 		sd_set_mmc_caps(ssc);
8217 	}
8218 
8219 	/*
8220 	 * Add a zero-length attribute to tell the world we support
8221 	 * kernel ioctls (for layered drivers)
8222 	 */
8223 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8224 	    DDI_KERNEL_IOCTL, NULL, 0);
8225 
8226 	/*
8227 	 * Add a boolean property to tell the world we support
8228 	 * the B_FAILFAST flag (for layered drivers)
8229 	 */
8230 	(void) ddi_prop_create(DDI_DEV_T_NONE, devi, DDI_PROP_CANSLEEP,
8231 	    "ddi-failfast-supported", NULL, 0);
8232 
8233 	/*
8234 	 * Initialize power management
8235 	 */
8236 	mutex_init(&un->un_pm_mutex, NULL, MUTEX_DRIVER, NULL);
8237 	cv_init(&un->un_pm_busy_cv, NULL, CV_DRIVER, NULL);
8238 	sd_setup_pm(ssc, devi);
8239 	if (un->un_f_pm_is_enabled == FALSE) {
8240 		/*
8241 		 * For performance, point to a jump table that does
8242 		 * not include pm.
8243 		 * The direct and priority chains don't change with PM.
8244 		 *
8245 		 * Note: this is currently done based on individual device
8246 		 * capabilities. When an interface for determining system
8247 		 * power enabled state becomes available, or when additional
8248 		 * layers are added to the command chain, these values will
8249 		 * have to be re-evaluated for correctness.
8250 		 */
8251 		if (un->un_f_non_devbsize_supported) {
8252 			un->un_buf_chain_type = SD_CHAIN_INFO_RMMEDIA_NO_PM;
8253 		} else {
8254 			un->un_buf_chain_type = SD_CHAIN_INFO_DISK_NO_PM;
8255 		}
8256 		un->un_uscsi_chain_type  = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
8257 	}
8258 
8259 	/*
8260 	 * This property is set to 0 by HA software to avoid retries
8261 	 * on a reserved disk. (The preferred property name is
8262 	 * "retry-on-reservation-conflict") (1189689)
8263 	 *
8264 	 * Note: The use of a global here can have unintended consequences. A
8265 	 * per instance variable is preferable to match the capabilities of
8266 	 * different underlying hba's (4402600)
8267 	 */
8268 	sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY, devi,
8269 	    DDI_PROP_DONTPASS, "retry-on-reservation-conflict",
8270 	    sd_retry_on_reservation_conflict);
8271 	if (sd_retry_on_reservation_conflict != 0) {
8272 		sd_retry_on_reservation_conflict = ddi_getprop(DDI_DEV_T_ANY,
8273 		    devi, DDI_PROP_DONTPASS, sd_resv_conflict_name,
8274 		    sd_retry_on_reservation_conflict);
8275 	}
8276 
8277 	/* Set up options for QFULL handling. */
8278 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8279 	    "qfull-retries", -1)) != -1) {
8280 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retries",
8281 		    rval, 1);
8282 	}
8283 	if ((rval = ddi_getprop(DDI_DEV_T_ANY, devi, 0,
8284 	    "qfull-retry-interval", -1)) != -1) {
8285 		(void) scsi_ifsetcap(SD_ADDRESS(un), "qfull-retry-interval",
8286 		    rval, 1);
8287 	}
8288 
8289 	/*
8290 	 * This just prints a message that announces the existence of the
8291 	 * device. The message is always printed in the system logfile, but
8292 	 * only appears on the console if the system is booted with the
8293 	 * -v (verbose) argument.
8294 	 */
8295 	ddi_report_dev(devi);
8296 
8297 	un->un_mediastate = DKIO_NONE;
8298 
8299 	/*
8300 	 * Check Block Device Characteristics VPD.
8301 	 */
8302 	sd_check_bdc_vpd(ssc);
8303 
8304 	/*
8305 	 * Check whether the drive is in emulation mode.
8306 	 */
8307 	sd_check_emulation_mode(ssc);
8308 
8309 	cmlb_alloc_handle(&un->un_cmlbhandle);
8310 
8311 #if defined(__x86)
8312 	/*
8313 	 * On x86, compensate for off-by-1 legacy error
8314 	 */
8315 	if (!un->un_f_has_removable_media && !un->un_f_is_hotpluggable &&
8316 	    (lbasize == un->un_sys_blocksize))
8317 		offbyone = CMLB_OFF_BY_ONE;
8318 #endif
8319 
8320 	if (cmlb_attach(devi, &sd_tgops, (int)devp->sd_inq->inq_dtype,
8321 	    VOID2BOOLEAN(un->un_f_has_removable_media != 0),
8322 	    VOID2BOOLEAN(un->un_f_is_hotpluggable != 0),
8323 	    un->un_node_type, offbyone, un->un_cmlbhandle,
8324 	    (void *)SD_PATH_DIRECT) != 0) {
8325 		goto cmlb_attach_failed;
8326 	}
8327 
8328 
8329 	/*
8330 	 * Read and validate the device's geometry (ie, disk label)
8331 	 * A new unformatted drive will not have a valid geometry, but
8332 	 * the driver needs to successfully attach to this device so
8333 	 * the drive can be formatted via ioctls.
8334 	 */
8335 	geom_label_valid = (cmlb_validate(un->un_cmlbhandle, 0,
8336 	    (void *)SD_PATH_DIRECT) == 0) ? 1: 0;
8337 
8338 	mutex_enter(SD_MUTEX(un));
8339 
8340 	/*
8341 	 * Read and initialize the devid for the unit.
8342 	 */
8343 	if (un->un_f_devid_supported) {
8344 		sd_register_devid(ssc, devi, reservation_flag);
8345 	}
8346 	mutex_exit(SD_MUTEX(un));
8347 
8348 #if (defined(__fibre))
8349 	/*
8350 	 * Register callbacks for fibre only.  You can't do this solely
8351 	 * on the basis of the devid_type because this is hba specific.
8352 	 * We need to query our hba capabilities to find out whether to
8353 	 * register or not.
8354 	 */
8355 	if (un->un_f_is_fibre) {
8356 		if (strcmp(un->un_node_type, DDI_NT_BLOCK_CHAN)) {
8357 			sd_init_event_callbacks(un);
8358 			SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8359 			    "sd_unit_attach: un:0x%p event callbacks inserted",
8360 			    un);
8361 		}
8362 	}
8363 #endif
8364 
8365 	if (un->un_f_opt_disable_cache == TRUE) {
8366 		/*
8367 		 * Disable both read cache and write cache.  This is
8368 		 * the historic behavior of the keywords in the config file.
8369 		 */
8370 		if (sd_cache_control(ssc, SD_CACHE_DISABLE, SD_CACHE_DISABLE) !=
8371 		    0) {
8372 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8373 			    "sd_unit_attach: un:0x%p Could not disable "
8374 			    "caching", un);
8375 			goto devid_failed;
8376 		}
8377 	}
8378 
8379 	/*
8380 	 * Check the value of the WCE bit and if it's allowed to be changed,
8381 	 * set un_f_write_cache_enabled and un_f_cache_mode_changeable
8382 	 * accordingly.
8383 	 */
8384 	(void) sd_get_write_cache_enabled(ssc, &wc_enabled);
8385 	sd_get_write_cache_changeable(ssc, &wc_changeable);
8386 	mutex_enter(SD_MUTEX(un));
8387 	un->un_f_write_cache_enabled = (wc_enabled != 0);
8388 	un->un_f_cache_mode_changeable = (wc_changeable != 0);
8389 	mutex_exit(SD_MUTEX(un));
8390 
8391 	if ((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR &&
8392 	    un->un_tgt_blocksize != DEV_BSIZE) ||
8393 	    un->un_f_enable_rmw) {
8394 		if (!(un->un_wm_cache)) {
8395 			(void) snprintf(name_str, sizeof (name_str),
8396 			    "%s%d_cache",
8397 			    ddi_driver_name(SD_DEVINFO(un)),
8398 			    ddi_get_instance(SD_DEVINFO(un)));
8399 			un->un_wm_cache = kmem_cache_create(
8400 			    name_str, sizeof (struct sd_w_map),
8401 			    8, sd_wm_cache_constructor,
8402 			    sd_wm_cache_destructor, NULL,
8403 			    (void *)un, NULL, 0);
8404 			if (!(un->un_wm_cache)) {
8405 				goto wm_cache_failed;
8406 			}
8407 		}
8408 	}
8409 
8410 	/*
8411 	 * Check the value of the NV_SUP bit and set
8412 	 * un_f_suppress_cache_flush accordingly.
8413 	 */
8414 	sd_get_nv_sup(ssc);
8415 
8416 	/*
8417 	 * Find out what type of reservation this disk supports.
8418 	 */
8419 	status = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS, 0, NULL);
8420 
8421 	switch (status) {
8422 	case 0:
8423 		/*
8424 		 * SCSI-3 reservations are supported.
8425 		 */
8426 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8427 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8428 		    "sd_unit_attach: un:0x%p SCSI-3 reservations\n", un);
8429 		break;
8430 	case ENOTSUP:
8431 		/*
8432 		 * The PERSISTENT RESERVE IN command would not be recognized by
8433 		 * a SCSI-2 device, so assume the reservation type is SCSI-2.
8434 		 */
8435 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8436 		    "sd_unit_attach: un:0x%p SCSI-2 reservations\n", un);
8437 		un->un_reservation_type = SD_SCSI2_RESERVATION;
8438 
8439 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8440 		break;
8441 	default:
8442 		/*
8443 		 * default to SCSI-3 reservations
8444 		 */
8445 		SD_INFO(SD_LOG_ATTACH_DETACH, un,
8446 		    "sd_unit_attach: un:0x%p default SCSI3 reservations\n", un);
8447 		un->un_reservation_type = SD_SCSI3_RESERVATION;
8448 
8449 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
8450 		break;
8451 	}
8452 
8453 	/*
8454 	 * Set the pstat and error stat values here, so data obtained during the
8455 	 * previous attach-time routines is available.
8456 	 *
8457 	 * Note: This is a critical sequence that needs to be maintained:
8458 	 *	1) Instantiate the kstats before any routines using the iopath
8459 	 *	   (i.e. sd_send_scsi_cmd).
8460 	 *	2) Initialize the error stats (sd_set_errstats) and partition
8461 	 *	   stats (sd_set_pstats)here, following
8462 	 *	   cmlb_validate_geometry(), sd_register_devid(), and
8463 	 *	   sd_cache_control().
8464 	 */
8465 
8466 	if (un->un_f_pkstats_enabled && geom_label_valid) {
8467 		sd_set_pstats(un);
8468 		SD_TRACE(SD_LOG_IO_PARTITION, un,
8469 		    "sd_unit_attach: un:0x%p pstats created and set\n", un);
8470 	}
8471 
8472 	sd_set_errstats(un);
8473 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8474 	    "sd_unit_attach: un:0x%p errstats set\n", un);
8475 
8476 	sd_setup_blk_limits(ssc);
8477 
8478 	/*
8479 	 * After successfully attaching an instance, we record the information
8480 	 * of how many luns have been attached on the relative target and
8481 	 * controller for parallel SCSI. This information is used when sd tries
8482 	 * to set the tagged queuing capability in HBA.
8483 	 */
8484 	if (SD_IS_PARALLEL_SCSI(un) && (tgt >= 0) && (tgt < NTARGETS_WIDE)) {
8485 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_ATTACH);
8486 	}
8487 
8488 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
8489 	    "sd_unit_attach: un:0x%p exit success\n", un);
8490 
8491 	/* Uninitialize sd_ssc_t pointer */
8492 	sd_ssc_fini(ssc);
8493 
8494 	return (DDI_SUCCESS);
8495 
8496 	/*
8497 	 * An error occurred during the attach; clean up & return failure.
8498 	 */
8499 wm_cache_failed:
8500 devid_failed:
8501 	ddi_remove_minor_node(devi, NULL);
8502 
8503 cmlb_attach_failed:
8504 	/*
8505 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8506 	 */
8507 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8508 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8509 
8510 	/*
8511 	 * Refer to the comments of setting tagged-qing in the beginning of
8512 	 * sd_unit_attach. We can only disable tagged queuing when there is
8513 	 * no lun attached on the target.
8514 	 */
8515 	if (sd_scsi_get_target_lun_count(pdip, tgt) < 1) {
8516 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8517 	}
8518 
8519 	if (un->un_f_is_fibre == FALSE) {
8520 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8521 	}
8522 
8523 spinup_failed:
8524 
8525 	/* Uninitialize sd_ssc_t pointer */
8526 	sd_ssc_fini(ssc);
8527 
8528 	mutex_enter(SD_MUTEX(un));
8529 
8530 	/* Deallocate SCSI FMA memory spaces */
8531 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
8532 
8533 	/* Cancel callback for SD_PATH_DIRECT_PRIORITY cmd. restart */
8534 	if (un->un_direct_priority_timeid != NULL) {
8535 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8536 		un->un_direct_priority_timeid = NULL;
8537 		mutex_exit(SD_MUTEX(un));
8538 		(void) untimeout(temp_id);
8539 		mutex_enter(SD_MUTEX(un));
8540 	}
8541 
8542 	/* Cancel any pending start/stop timeouts */
8543 	if (un->un_startstop_timeid != NULL) {
8544 		timeout_id_t temp_id = un->un_startstop_timeid;
8545 		un->un_startstop_timeid = NULL;
8546 		mutex_exit(SD_MUTEX(un));
8547 		(void) untimeout(temp_id);
8548 		mutex_enter(SD_MUTEX(un));
8549 	}
8550 
8551 	/* Cancel any pending reset-throttle timeouts */
8552 	if (un->un_reset_throttle_timeid != NULL) {
8553 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8554 		un->un_reset_throttle_timeid = NULL;
8555 		mutex_exit(SD_MUTEX(un));
8556 		(void) untimeout(temp_id);
8557 		mutex_enter(SD_MUTEX(un));
8558 	}
8559 
8560 	/* Cancel rmw warning message timeouts */
8561 	if (un->un_rmw_msg_timeid != NULL) {
8562 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8563 		un->un_rmw_msg_timeid = NULL;
8564 		mutex_exit(SD_MUTEX(un));
8565 		(void) untimeout(temp_id);
8566 		mutex_enter(SD_MUTEX(un));
8567 	}
8568 
8569 	/* Cancel any pending retry timeouts */
8570 	if (un->un_retry_timeid != NULL) {
8571 		timeout_id_t temp_id = un->un_retry_timeid;
8572 		un->un_retry_timeid = NULL;
8573 		mutex_exit(SD_MUTEX(un));
8574 		(void) untimeout(temp_id);
8575 		mutex_enter(SD_MUTEX(un));
8576 	}
8577 
8578 	/* Cancel any pending delayed cv broadcast timeouts */
8579 	if (un->un_dcvb_timeid != NULL) {
8580 		timeout_id_t temp_id = un->un_dcvb_timeid;
8581 		un->un_dcvb_timeid = NULL;
8582 		mutex_exit(SD_MUTEX(un));
8583 		(void) untimeout(temp_id);
8584 		mutex_enter(SD_MUTEX(un));
8585 	}
8586 
8587 	mutex_exit(SD_MUTEX(un));
8588 
8589 	/* There should not be any in-progress I/O so ASSERT this check */
8590 	ASSERT(un->un_ncmds_in_transport == 0);
8591 	ASSERT(un->un_ncmds_in_driver == 0);
8592 
8593 	/* Do not free the softstate if the callback routine is active */
8594 	sd_sync_with_callback(un);
8595 
8596 	/*
8597 	 * Partition stats apparently are not used with removables. These would
8598 	 * not have been created during attach, so no need to clean them up...
8599 	 */
8600 	if (un->un_errstats != NULL) {
8601 		kstat_delete(un->un_errstats);
8602 		un->un_errstats = NULL;
8603 	}
8604 
8605 create_errstats_failed:
8606 
8607 	if (un->un_stats != NULL) {
8608 		kstat_delete(un->un_stats);
8609 		un->un_stats = NULL;
8610 	}
8611 
8612 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
8613 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
8614 
8615 	ddi_prop_remove_all(devi);
8616 	sema_destroy(&un->un_semoclose);
8617 	cv_destroy(&un->un_state_cv);
8618 
8619 	sd_free_rqs(un);
8620 
8621 alloc_rqs_failed:
8622 
8623 	devp->sd_private = NULL;
8624 	bzero(un, sizeof (struct sd_lun));	/* Clear any stale data! */
8625 
8626 	/*
8627 	 * Note: the man pages are unclear as to whether or not doing a
8628 	 * ddi_soft_state_free(sd_state, instance) is the right way to
8629 	 * clean up after the ddi_soft_state_zalloc() if the subsequent
8630 	 * ddi_get_soft_state() fails.  The implication seems to be
8631 	 * that the get_soft_state cannot fail if the zalloc succeeds.
8632 	 */
8633 #ifndef XPV_HVM_DRIVER
8634 	ddi_soft_state_free(sd_state, instance);
8635 #endif /* !XPV_HVM_DRIVER */
8636 
8637 probe_failed:
8638 	scsi_unprobe(devp);
8639 
8640 	return (DDI_FAILURE);
8641 }
8642 
8643 
8644 /*
8645  *    Function: sd_unit_detach
8646  *
8647  * Description: Performs DDI_DETACH processing for sddetach().
8648  *
8649  * Return Code: DDI_SUCCESS
8650  *		DDI_FAILURE
8651  *
8652  *     Context: Kernel thread context
8653  */
8654 
8655 static int
8656 sd_unit_detach(dev_info_t *devi)
8657 {
8658 	struct scsi_device	*devp;
8659 	struct sd_lun		*un;
8660 	int			i;
8661 	int			tgt;
8662 	dev_t			dev;
8663 	dev_info_t		*pdip = ddi_get_parent(devi);
8664 	int			instance = ddi_get_instance(devi);
8665 
8666 	mutex_enter(&sd_detach_mutex);
8667 
8668 	/*
8669 	 * Fail the detach for any of the following:
8670 	 *  - Unable to get the sd_lun struct for the instance
8671 	 *  - A layered driver has an outstanding open on the instance
8672 	 *  - Another thread is already detaching this instance
8673 	 *  - Another thread is currently performing an open
8674 	 */
8675 	devp = ddi_get_driver_private(devi);
8676 	if ((devp == NULL) ||
8677 	    ((un = (struct sd_lun *)devp->sd_private) == NULL) ||
8678 	    (un->un_ncmds_in_driver != 0) || (un->un_layer_count != 0) ||
8679 	    (un->un_detach_count != 0) || (un->un_opens_in_progress != 0)) {
8680 		mutex_exit(&sd_detach_mutex);
8681 		return (DDI_FAILURE);
8682 	}
8683 
8684 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: entry 0x%p\n", un);
8685 
8686 	/*
8687 	 * Mark this instance as currently in a detach, to inhibit any
8688 	 * opens from a layered driver.
8689 	 */
8690 	un->un_detach_count++;
8691 	mutex_exit(&sd_detach_mutex);
8692 
8693 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, devi, DDI_PROP_DONTPASS,
8694 	    SCSI_ADDR_PROP_TARGET, -1);
8695 
8696 	dev = sd_make_device(SD_DEVINFO(un));
8697 
8698 #ifndef lint
8699 	_NOTE(COMPETING_THREADS_NOW);
8700 #endif
8701 
8702 	mutex_enter(SD_MUTEX(un));
8703 
8704 	/*
8705 	 * Fail the detach if there are any outstanding layered
8706 	 * opens on this device.
8707 	 */
8708 	for (i = 0; i < NDKMAP; i++) {
8709 		if (un->un_ocmap.lyropen[i] != 0) {
8710 			goto err_notclosed;
8711 		}
8712 	}
8713 
8714 	/*
8715 	 * Verify there are NO outstanding commands issued to this device.
8716 	 * ie, un_ncmds_in_transport == 0.
8717 	 * It's possible to have outstanding commands through the physio
8718 	 * code path, even though everything's closed.
8719 	 */
8720 	if ((un->un_ncmds_in_transport != 0) || (un->un_retry_timeid != NULL) ||
8721 	    (un->un_direct_priority_timeid != NULL) ||
8722 	    (un->un_state == SD_STATE_RWAIT)) {
8723 		mutex_exit(SD_MUTEX(un));
8724 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8725 		    "sd_dr_detach: Detach failure due to outstanding cmds\n");
8726 		goto err_stillbusy;
8727 	}
8728 
8729 	/*
8730 	 * If we have the device reserved, release the reservation.
8731 	 */
8732 	if ((un->un_resvd_status & SD_RESERVE) &&
8733 	    !(un->un_resvd_status & SD_LOST_RESERVE)) {
8734 		mutex_exit(SD_MUTEX(un));
8735 		/*
8736 		 * Note: sd_reserve_release sends a command to the device
8737 		 * via the sd_ioctlcmd() path, and can sleep.
8738 		 */
8739 		if (sd_reserve_release(dev, SD_RELEASE) != 0) {
8740 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8741 			    "sd_dr_detach: Cannot release reservation \n");
8742 		}
8743 	} else {
8744 		mutex_exit(SD_MUTEX(un));
8745 	}
8746 
8747 	/*
8748 	 * Untimeout any reserve recover, throttle reset, restart unit
8749 	 * and delayed broadcast timeout threads. Protect the timeout pointer
8750 	 * from getting nulled by their callback functions.
8751 	 */
8752 	mutex_enter(SD_MUTEX(un));
8753 	if (un->un_resvd_timeid != NULL) {
8754 		timeout_id_t temp_id = un->un_resvd_timeid;
8755 		un->un_resvd_timeid = NULL;
8756 		mutex_exit(SD_MUTEX(un));
8757 		(void) untimeout(temp_id);
8758 		mutex_enter(SD_MUTEX(un));
8759 	}
8760 
8761 	if (un->un_reset_throttle_timeid != NULL) {
8762 		timeout_id_t temp_id = un->un_reset_throttle_timeid;
8763 		un->un_reset_throttle_timeid = NULL;
8764 		mutex_exit(SD_MUTEX(un));
8765 		(void) untimeout(temp_id);
8766 		mutex_enter(SD_MUTEX(un));
8767 	}
8768 
8769 	if (un->un_startstop_timeid != NULL) {
8770 		timeout_id_t temp_id = un->un_startstop_timeid;
8771 		un->un_startstop_timeid = NULL;
8772 		mutex_exit(SD_MUTEX(un));
8773 		(void) untimeout(temp_id);
8774 		mutex_enter(SD_MUTEX(un));
8775 	}
8776 
8777 	if (un->un_rmw_msg_timeid != NULL) {
8778 		timeout_id_t temp_id = un->un_rmw_msg_timeid;
8779 		un->un_rmw_msg_timeid = NULL;
8780 		mutex_exit(SD_MUTEX(un));
8781 		(void) untimeout(temp_id);
8782 		mutex_enter(SD_MUTEX(un));
8783 	}
8784 
8785 	if (un->un_dcvb_timeid != NULL) {
8786 		timeout_id_t temp_id = un->un_dcvb_timeid;
8787 		un->un_dcvb_timeid = NULL;
8788 		mutex_exit(SD_MUTEX(un));
8789 		(void) untimeout(temp_id);
8790 	} else {
8791 		mutex_exit(SD_MUTEX(un));
8792 	}
8793 
8794 	/* Remove any pending reservation reclaim requests for this device */
8795 	sd_rmv_resv_reclaim_req(dev);
8796 
8797 	mutex_enter(SD_MUTEX(un));
8798 
8799 	/* Cancel any pending callbacks for SD_PATH_DIRECT_PRIORITY cmd. */
8800 	if (un->un_direct_priority_timeid != NULL) {
8801 		timeout_id_t temp_id = un->un_direct_priority_timeid;
8802 		un->un_direct_priority_timeid = NULL;
8803 		mutex_exit(SD_MUTEX(un));
8804 		(void) untimeout(temp_id);
8805 		mutex_enter(SD_MUTEX(un));
8806 	}
8807 
8808 	/* Cancel any active multi-host disk watch thread requests */
8809 	if (un->un_mhd_token != NULL) {
8810 		mutex_exit(SD_MUTEX(un));
8811 		 _NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_mhd_token));
8812 		if (scsi_watch_request_terminate(un->un_mhd_token,
8813 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8814 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8815 			    "sd_dr_detach: Cannot cancel mhd watch request\n");
8816 			/*
8817 			 * Note: We are returning here after having removed
8818 			 * some driver timeouts above. This is consistent with
8819 			 * the legacy implementation but perhaps the watch
8820 			 * terminate call should be made with the wait flag set.
8821 			 */
8822 			goto err_stillbusy;
8823 		}
8824 		mutex_enter(SD_MUTEX(un));
8825 		un->un_mhd_token = NULL;
8826 	}
8827 
8828 	if (un->un_swr_token != NULL) {
8829 		mutex_exit(SD_MUTEX(un));
8830 		_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_swr_token));
8831 		if (scsi_watch_request_terminate(un->un_swr_token,
8832 		    SCSI_WATCH_TERMINATE_NOWAIT)) {
8833 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8834 			    "sd_dr_detach: Cannot cancel swr watch request\n");
8835 			/*
8836 			 * Note: We are returning here after having removed
8837 			 * some driver timeouts above. This is consistent with
8838 			 * the legacy implementation but perhaps the watch
8839 			 * terminate call should be made with the wait flag set.
8840 			 */
8841 			goto err_stillbusy;
8842 		}
8843 		mutex_enter(SD_MUTEX(un));
8844 		un->un_swr_token = NULL;
8845 	}
8846 
8847 	mutex_exit(SD_MUTEX(un));
8848 
8849 	/*
8850 	 * Clear any scsi_reset_notifies. We clear the reset notifies
8851 	 * if we have not registered one.
8852 	 * Note: The sd_mhd_reset_notify_cb() fn tries to acquire SD_MUTEX!
8853 	 */
8854 	(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
8855 	    sd_mhd_reset_notify_cb, (caddr_t)un);
8856 
8857 	/*
8858 	 * protect the timeout pointers from getting nulled by
8859 	 * their callback functions during the cancellation process.
8860 	 * In such a scenario untimeout can be invoked with a null value.
8861 	 */
8862 	_NOTE(NO_COMPETING_THREADS_NOW);
8863 
8864 	mutex_enter(&un->un_pm_mutex);
8865 	if (un->un_pm_idle_timeid != NULL) {
8866 		timeout_id_t temp_id = un->un_pm_idle_timeid;
8867 		un->un_pm_idle_timeid = NULL;
8868 		mutex_exit(&un->un_pm_mutex);
8869 
8870 		/*
8871 		 * Timeout is active; cancel it.
8872 		 * Note that it'll never be active on a device
8873 		 * that does not support PM therefore we don't
8874 		 * have to check before calling pm_idle_component.
8875 		 */
8876 		(void) untimeout(temp_id);
8877 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8878 		mutex_enter(&un->un_pm_mutex);
8879 	}
8880 
8881 	/*
8882 	 * Check whether there is already a timeout scheduled for power
8883 	 * management. If yes then don't lower the power here, that's.
8884 	 * the timeout handler's job.
8885 	 */
8886 	if (un->un_pm_timeid != NULL) {
8887 		timeout_id_t temp_id = un->un_pm_timeid;
8888 		un->un_pm_timeid = NULL;
8889 		mutex_exit(&un->un_pm_mutex);
8890 		/*
8891 		 * Timeout is active; cancel it.
8892 		 * Note that it'll never be active on a device
8893 		 * that does not support PM therefore we don't
8894 		 * have to check before calling pm_idle_component.
8895 		 */
8896 		(void) untimeout(temp_id);
8897 		(void) pm_idle_component(SD_DEVINFO(un), 0);
8898 
8899 	} else {
8900 		mutex_exit(&un->un_pm_mutex);
8901 		if ((un->un_f_pm_is_enabled == TRUE) &&
8902 		    (pm_lower_power(SD_DEVINFO(un), 0, SD_PM_STATE_STOPPED(un))
8903 		    != DDI_SUCCESS)) {
8904 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8905 		    "sd_dr_detach: Lower power request failed, ignoring.\n");
8906 			/*
8907 			 * Fix for bug: 4297749, item # 13
8908 			 * The above test now includes a check to see if PM is
8909 			 * supported by this device before call
8910 			 * pm_lower_power().
8911 			 * Note, the following is not dead code. The call to
8912 			 * pm_lower_power above will generate a call back into
8913 			 * our sdpower routine which might result in a timeout
8914 			 * handler getting activated. Therefore the following
8915 			 * code is valid and necessary.
8916 			 */
8917 			mutex_enter(&un->un_pm_mutex);
8918 			if (un->un_pm_timeid != NULL) {
8919 				timeout_id_t temp_id = un->un_pm_timeid;
8920 				un->un_pm_timeid = NULL;
8921 				mutex_exit(&un->un_pm_mutex);
8922 				(void) untimeout(temp_id);
8923 				(void) pm_idle_component(SD_DEVINFO(un), 0);
8924 			} else {
8925 				mutex_exit(&un->un_pm_mutex);
8926 			}
8927 		}
8928 	}
8929 
8930 	/*
8931 	 * Cleanup from the scsi_ifsetcap() calls (437868)
8932 	 * Relocated here from above to be after the call to
8933 	 * pm_lower_power, which was getting errors.
8934 	 */
8935 	(void) scsi_ifsetcap(SD_ADDRESS(un), "lun-reset", 0, 1);
8936 	(void) scsi_ifsetcap(SD_ADDRESS(un), "wide-xfer", 0, 1);
8937 
8938 	/*
8939 	 * Currently, tagged queuing is supported per target based by HBA.
8940 	 * Setting this per lun instance actually sets the capability of this
8941 	 * target in HBA, which affects those luns already attached on the
8942 	 * same target. So during detach, we can only disable this capability
8943 	 * only when this is the only lun left on this target. By doing
8944 	 * this, we assume a target has the same tagged queuing capability
8945 	 * for every lun. The condition can be removed when HBA is changed to
8946 	 * support per lun based tagged queuing capability.
8947 	 */
8948 	if (sd_scsi_get_target_lun_count(pdip, tgt) <= 1) {
8949 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
8950 	}
8951 
8952 	if (un->un_f_is_fibre == FALSE) {
8953 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 0, 1);
8954 	}
8955 
8956 	/*
8957 	 * Remove any event callbacks, fibre only
8958 	 */
8959 	if (un->un_f_is_fibre == TRUE) {
8960 		if ((un->un_insert_event != NULL) &&
8961 		    (ddi_remove_event_handler(un->un_insert_cb_id) !=
8962 		    DDI_SUCCESS)) {
8963 			/*
8964 			 * Note: We are returning here after having done
8965 			 * substantial cleanup above. This is consistent
8966 			 * with the legacy implementation but this may not
8967 			 * be the right thing to do.
8968 			 */
8969 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8970 			    "sd_dr_detach: Cannot cancel insert event\n");
8971 			goto err_remove_event;
8972 		}
8973 		un->un_insert_event = NULL;
8974 
8975 		if ((un->un_remove_event != NULL) &&
8976 		    (ddi_remove_event_handler(un->un_remove_cb_id) !=
8977 		    DDI_SUCCESS)) {
8978 			/*
8979 			 * Note: We are returning here after having done
8980 			 * substantial cleanup above. This is consistent
8981 			 * with the legacy implementation but this may not
8982 			 * be the right thing to do.
8983 			 */
8984 			SD_ERROR(SD_LOG_ATTACH_DETACH, un,
8985 			    "sd_dr_detach: Cannot cancel remove event\n");
8986 			goto err_remove_event;
8987 		}
8988 		un->un_remove_event = NULL;
8989 	}
8990 
8991 	/* Do not free the softstate if the callback routine is active */
8992 	sd_sync_with_callback(un);
8993 
8994 	cmlb_detach(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
8995 	cmlb_free_handle(&un->un_cmlbhandle);
8996 
8997 	/*
8998 	 * Hold the detach mutex here, to make sure that no other threads ever
8999 	 * can access a (partially) freed soft state structure.
9000 	 */
9001 	mutex_enter(&sd_detach_mutex);
9002 
9003 	/*
9004 	 * Clean up the soft state struct.
9005 	 * Cleanup is done in reverse order of allocs/inits.
9006 	 * At this point there should be no competing threads anymore.
9007 	 */
9008 
9009 	scsi_fm_fini(devp);
9010 
9011 	/*
9012 	 * Deallocate memory for SCSI FMA.
9013 	 */
9014 	kmem_free(un->un_fm_private, sizeof (struct sd_fm_internal));
9015 
9016 	/*
9017 	 * Unregister and free device id if it was not registered
9018 	 * by the transport.
9019 	 */
9020 	if (un->un_f_devid_transport_defined == FALSE)
9021 		ddi_devid_unregister(devi);
9022 
9023 	/*
9024 	 * free the devid structure if allocated before (by ddi_devid_init()
9025 	 * or ddi_devid_get()).
9026 	 */
9027 	if (un->un_devid) {
9028 		ddi_devid_free(un->un_devid);
9029 		un->un_devid = NULL;
9030 	}
9031 
9032 	/*
9033 	 * Destroy wmap cache if it exists.
9034 	 */
9035 	if (un->un_wm_cache != NULL) {
9036 		kmem_cache_destroy(un->un_wm_cache);
9037 		un->un_wm_cache = NULL;
9038 	}
9039 
9040 	/*
9041 	 * kstat cleanup is done in detach for all device types (4363169).
9042 	 * We do not want to fail detach if the device kstats are not deleted
9043 	 * since there is a confusion about the devo_refcnt for the device.
9044 	 * We just delete the kstats and let detach complete successfully.
9045 	 */
9046 	if (un->un_stats != NULL) {
9047 		kstat_delete(un->un_stats);
9048 		un->un_stats = NULL;
9049 	}
9050 	if (un->un_unmapstats != NULL) {
9051 		kstat_delete(un->un_unmapstats_ks);
9052 		un->un_unmapstats_ks = NULL;
9053 		un->un_unmapstats = NULL;
9054 	}
9055 	if (un->un_errstats != NULL) {
9056 		kstat_delete(un->un_errstats);
9057 		un->un_errstats = NULL;
9058 	}
9059 
9060 	/* Remove partition stats */
9061 	if (un->un_f_pkstats_enabled) {
9062 		for (i = 0; i < NSDMAP; i++) {
9063 			if (un->un_pstats[i] != NULL) {
9064 				kstat_delete(un->un_pstats[i]);
9065 				un->un_pstats[i] = NULL;
9066 			}
9067 		}
9068 	}
9069 
9070 	/* Remove xbuf registration */
9071 	ddi_xbuf_attr_unregister_devinfo(un->un_xbuf_attr, devi);
9072 	ddi_xbuf_attr_destroy(un->un_xbuf_attr);
9073 
9074 	/* Remove driver properties */
9075 	ddi_prop_remove_all(devi);
9076 
9077 	mutex_destroy(&un->un_pm_mutex);
9078 	cv_destroy(&un->un_pm_busy_cv);
9079 
9080 	cv_destroy(&un->un_wcc_cv);
9081 
9082 	/* Open/close semaphore */
9083 	sema_destroy(&un->un_semoclose);
9084 
9085 	/* Removable media condvar. */
9086 	cv_destroy(&un->un_state_cv);
9087 
9088 	/* Suspend/resume condvar. */
9089 	cv_destroy(&un->un_suspend_cv);
9090 	cv_destroy(&un->un_disk_busy_cv);
9091 
9092 	sd_free_rqs(un);
9093 
9094 	/* Free up soft state */
9095 	devp->sd_private = NULL;
9096 
9097 	bzero(un, sizeof (struct sd_lun));
9098 
9099 	ddi_soft_state_free(sd_state, instance);
9100 
9101 	mutex_exit(&sd_detach_mutex);
9102 
9103 	/* This frees up the INQUIRY data associated with the device. */
9104 	scsi_unprobe(devp);
9105 
9106 	/*
9107 	 * After successfully detaching an instance, we update the information
9108 	 * of how many luns have been attached in the relative target and
9109 	 * controller for parallel SCSI. This information is used when sd tries
9110 	 * to set the tagged queuing capability in HBA.
9111 	 * Since un has been released, we can't use SD_IS_PARALLEL_SCSI(un) to
9112 	 * check if the device is parallel SCSI. However, we don't need to
9113 	 * check here because we've already checked during attach. No device
9114 	 * that is not parallel SCSI is in the chain.
9115 	 */
9116 	if ((tgt >= 0) && (tgt < NTARGETS_WIDE)) {
9117 		sd_scsi_update_lun_on_target(pdip, tgt, SD_SCSI_LUN_DETACH);
9118 	}
9119 
9120 	return (DDI_SUCCESS);
9121 
9122 err_notclosed:
9123 	mutex_exit(SD_MUTEX(un));
9124 
9125 err_stillbusy:
9126 	_NOTE(NO_COMPETING_THREADS_NOW);
9127 
9128 err_remove_event:
9129 	mutex_enter(&sd_detach_mutex);
9130 	un->un_detach_count--;
9131 	mutex_exit(&sd_detach_mutex);
9132 
9133 	SD_TRACE(SD_LOG_ATTACH_DETACH, un, "sd_unit_detach: exit failure\n");
9134 	return (DDI_FAILURE);
9135 }
9136 
9137 
9138 /*
9139  *    Function: sd_create_errstats
9140  *
9141  * Description: This routine instantiates the device error stats.
9142  *
9143  *		Note: During attach the stats are instantiated first so they are
9144  *		available for attach-time routines that utilize the driver
9145  *		iopath to send commands to the device. The stats are initialized
9146  *		separately so data obtained during some attach-time routines is
9147  *		available. (4362483)
9148  *
9149  *   Arguments: un - driver soft state (unit) structure
9150  *		instance - driver instance
9151  *
9152  *     Context: Kernel thread context
9153  */
9154 
9155 static void
9156 sd_create_errstats(struct sd_lun *un, int instance)
9157 {
9158 	struct	sd_errstats	*stp;
9159 	char	kstatmodule_err[KSTAT_STRLEN];
9160 	char	kstatname[KSTAT_STRLEN];
9161 	int	ndata = (sizeof (struct sd_errstats) / sizeof (kstat_named_t));
9162 
9163 	ASSERT(un != NULL);
9164 
9165 	if (un->un_errstats != NULL) {
9166 		return;
9167 	}
9168 
9169 	(void) snprintf(kstatmodule_err, sizeof (kstatmodule_err),
9170 	    "%serr", sd_label);
9171 	(void) snprintf(kstatname, sizeof (kstatname),
9172 	    "%s%d,err", sd_label, instance);
9173 
9174 	un->un_errstats = kstat_create(kstatmodule_err, instance, kstatname,
9175 	    "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
9176 
9177 	if (un->un_errstats == NULL) {
9178 		SD_ERROR(SD_LOG_ATTACH_DETACH, un,
9179 		    "sd_create_errstats: Failed kstat_create\n");
9180 		return;
9181 	}
9182 
9183 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9184 	kstat_named_init(&stp->sd_softerrs,	"Soft Errors",
9185 	    KSTAT_DATA_UINT32);
9186 	kstat_named_init(&stp->sd_harderrs,	"Hard Errors",
9187 	    KSTAT_DATA_UINT32);
9188 	kstat_named_init(&stp->sd_transerrs,	"Transport Errors",
9189 	    KSTAT_DATA_UINT32);
9190 	kstat_named_init(&stp->sd_vid,		"Vendor",
9191 	    KSTAT_DATA_CHAR);
9192 	kstat_named_init(&stp->sd_pid,		"Product",
9193 	    KSTAT_DATA_CHAR);
9194 	kstat_named_init(&stp->sd_revision,	"Revision",
9195 	    KSTAT_DATA_CHAR);
9196 	kstat_named_init(&stp->sd_serial,	"Serial No",
9197 	    KSTAT_DATA_CHAR);
9198 	kstat_named_init(&stp->sd_capacity,	"Size",
9199 	    KSTAT_DATA_ULONGLONG);
9200 	kstat_named_init(&stp->sd_rq_media_err,	"Media Error",
9201 	    KSTAT_DATA_UINT32);
9202 	kstat_named_init(&stp->sd_rq_ntrdy_err,	"Device Not Ready",
9203 	    KSTAT_DATA_UINT32);
9204 	kstat_named_init(&stp->sd_rq_nodev_err,	"No Device",
9205 	    KSTAT_DATA_UINT32);
9206 	kstat_named_init(&stp->sd_rq_recov_err,	"Recoverable",
9207 	    KSTAT_DATA_UINT32);
9208 	kstat_named_init(&stp->sd_rq_illrq_err,	"Illegal Request",
9209 	    KSTAT_DATA_UINT32);
9210 	kstat_named_init(&stp->sd_rq_pfa_err,	"Predictive Failure Analysis",
9211 	    KSTAT_DATA_UINT32);
9212 
9213 	un->un_errstats->ks_private = un;
9214 	un->un_errstats->ks_update  = nulldev;
9215 
9216 	kstat_install(un->un_errstats);
9217 }
9218 
9219 
9220 /*
9221  *    Function: sd_set_errstats
9222  *
9223  * Description: This routine sets the value of the vendor id, product id,
9224  *		revision, serial number, and capacity device error stats.
9225  *
9226  *		Note: During attach the stats are instantiated first so they are
9227  *		available for attach-time routines that utilize the driver
9228  *		iopath to send commands to the device. The stats are initialized
9229  *		separately so data obtained during some attach-time routines is
9230  *		available. (4362483)
9231  *
9232  *   Arguments: un - driver soft state (unit) structure
9233  *
9234  *     Context: Kernel thread context
9235  */
9236 
9237 static void
9238 sd_set_errstats(struct sd_lun *un)
9239 {
9240 	struct	sd_errstats	*stp;
9241 	char			*sn;
9242 
9243 	ASSERT(un != NULL);
9244 	ASSERT(un->un_errstats != NULL);
9245 	stp = (struct sd_errstats *)un->un_errstats->ks_data;
9246 	ASSERT(stp != NULL);
9247 	(void) strncpy(stp->sd_vid.value.c, un->un_sd->sd_inq->inq_vid, 8);
9248 	(void) strncpy(stp->sd_pid.value.c, un->un_sd->sd_inq->inq_pid, 16);
9249 	(void) strncpy(stp->sd_revision.value.c,
9250 	    un->un_sd->sd_inq->inq_revision, 4);
9251 
9252 	/*
9253 	 * All the errstats are persistent across detach/attach,
9254 	 * so reset all the errstats here in case of the hot
9255 	 * replacement of disk drives, except for not changed
9256 	 * Sun qualified drives.
9257 	 */
9258 	if ((bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) != 0) ||
9259 	    (bcmp(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9260 	    sizeof (SD_INQUIRY(un)->inq_serial)) != 0)) {
9261 		stp->sd_softerrs.value.ui32 = 0;
9262 		stp->sd_harderrs.value.ui32 = 0;
9263 		stp->sd_transerrs.value.ui32 = 0;
9264 		stp->sd_rq_media_err.value.ui32 = 0;
9265 		stp->sd_rq_ntrdy_err.value.ui32 = 0;
9266 		stp->sd_rq_nodev_err.value.ui32 = 0;
9267 		stp->sd_rq_recov_err.value.ui32 = 0;
9268 		stp->sd_rq_illrq_err.value.ui32 = 0;
9269 		stp->sd_rq_pfa_err.value.ui32 = 0;
9270 	}
9271 
9272 	/*
9273 	 * Set the "Serial No" kstat for Sun qualified drives (indicated by
9274 	 * "SUN" in bytes 25-27 of the inquiry data (bytes 9-11 of the pid)
9275 	 * (4376302))
9276 	 */
9277 	if (bcmp(&SD_INQUIRY(un)->inq_pid[9], "SUN", 3) == 0) {
9278 		bcopy(&SD_INQUIRY(un)->inq_serial, stp->sd_serial.value.c,
9279 		    sizeof (SD_INQUIRY(un)->inq_serial));
9280 	} else {
9281 		/*
9282 		 * Set the "Serial No" kstat for non-Sun qualified drives
9283 		 */
9284 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, SD_DEVINFO(un),
9285 		    DDI_PROP_NOTPROM | DDI_PROP_DONTPASS,
9286 		    INQUIRY_SERIAL_NO, &sn) == DDI_SUCCESS) {
9287 			(void) strlcpy(stp->sd_serial.value.c, sn,
9288 			    sizeof (stp->sd_serial.value.c));
9289 			ddi_prop_free(sn);
9290 		}
9291 	}
9292 
9293 	if (un->un_f_blockcount_is_valid != TRUE) {
9294 		/*
9295 		 * Set capacity error stat to 0 for no media. This ensures
9296 		 * a valid capacity is displayed in response to 'iostat -E'
9297 		 * when no media is present in the device.
9298 		 */
9299 		stp->sd_capacity.value.ui64 = 0;
9300 	} else {
9301 		/*
9302 		 * Multiply un_blockcount by un->un_sys_blocksize to get
9303 		 * capacity.
9304 		 *
9305 		 * Note: for non-512 blocksize devices "un_blockcount" has been
9306 		 * "scaled" in sd_send_scsi_READ_CAPACITY by multiplying by
9307 		 * (un_tgt_blocksize / un->un_sys_blocksize).
9308 		 */
9309 		stp->sd_capacity.value.ui64 = (uint64_t)
9310 		    ((uint64_t)un->un_blockcount * un->un_sys_blocksize);
9311 	}
9312 }
9313 
9314 
9315 /*
9316  *    Function: sd_set_pstats
9317  *
9318  * Description: This routine instantiates and initializes the partition
9319  *              stats for each partition with more than zero blocks.
9320  *		(4363169)
9321  *
9322  *   Arguments: un - driver soft state (unit) structure
9323  *
9324  *     Context: Kernel thread context
9325  */
9326 
9327 static void
9328 sd_set_pstats(struct sd_lun *un)
9329 {
9330 	char	kstatname[KSTAT_STRLEN];
9331 	int	instance;
9332 	int	i;
9333 	diskaddr_t	nblks = 0;
9334 	char	*partname = NULL;
9335 
9336 	ASSERT(un != NULL);
9337 
9338 	instance = ddi_get_instance(SD_DEVINFO(un));
9339 
9340 	/* Note:x86: is this a VTOC8/VTOC16 difference? */
9341 	for (i = 0; i < NSDMAP; i++) {
9342 
9343 		if (cmlb_partinfo(un->un_cmlbhandle, i,
9344 		    &nblks, NULL, &partname, NULL, (void *)SD_PATH_DIRECT) != 0)
9345 			continue;
9346 		mutex_enter(SD_MUTEX(un));
9347 
9348 		if ((un->un_pstats[i] == NULL) &&
9349 		    (nblks != 0)) {
9350 
9351 			(void) snprintf(kstatname, sizeof (kstatname),
9352 			    "%s%d,%s", sd_label, instance,
9353 			    partname);
9354 
9355 			un->un_pstats[i] = kstat_create(sd_label,
9356 			    instance, kstatname, "partition", KSTAT_TYPE_IO,
9357 			    1, KSTAT_FLAG_PERSISTENT);
9358 			if (un->un_pstats[i] != NULL) {
9359 				un->un_pstats[i]->ks_lock = SD_MUTEX(un);
9360 				kstat_install(un->un_pstats[i]);
9361 			}
9362 		}
9363 		mutex_exit(SD_MUTEX(un));
9364 	}
9365 }
9366 
9367 
9368 #if (defined(__fibre))
9369 /*
9370  *    Function: sd_init_event_callbacks
9371  *
9372  * Description: This routine initializes the insertion and removal event
9373  *		callbacks. (fibre only)
9374  *
9375  *   Arguments: un - driver soft state (unit) structure
9376  *
9377  *     Context: Kernel thread context
9378  */
9379 
9380 static void
9381 sd_init_event_callbacks(struct sd_lun *un)
9382 {
9383 	ASSERT(un != NULL);
9384 
9385 	if ((un->un_insert_event == NULL) &&
9386 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_INSERT_EVENT,
9387 	    &un->un_insert_event) == DDI_SUCCESS)) {
9388 		/*
9389 		 * Add the callback for an insertion event
9390 		 */
9391 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9392 		    un->un_insert_event, sd_event_callback, (void *)un,
9393 		    &(un->un_insert_cb_id));
9394 	}
9395 
9396 	if ((un->un_remove_event == NULL) &&
9397 	    (ddi_get_eventcookie(SD_DEVINFO(un), FCAL_REMOVE_EVENT,
9398 	    &un->un_remove_event) == DDI_SUCCESS)) {
9399 		/*
9400 		 * Add the callback for a removal event
9401 		 */
9402 		(void) ddi_add_event_handler(SD_DEVINFO(un),
9403 		    un->un_remove_event, sd_event_callback, (void *)un,
9404 		    &(un->un_remove_cb_id));
9405 	}
9406 }
9407 
9408 
9409 /*
9410  *    Function: sd_event_callback
9411  *
9412  * Description: This routine handles insert/remove events (photon). The
9413  *		state is changed to OFFLINE which can be used to supress
9414  *		error msgs. (fibre only)
9415  *
9416  *   Arguments: un - driver soft state (unit) structure
9417  *
9418  *     Context: Callout thread context
9419  */
9420 /* ARGSUSED */
9421 static void
9422 sd_event_callback(dev_info_t *dip, ddi_eventcookie_t event, void *arg,
9423     void *bus_impldata)
9424 {
9425 	struct sd_lun *un = (struct sd_lun *)arg;
9426 
9427 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_insert_event));
9428 	if (event == un->un_insert_event) {
9429 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: insert event");
9430 		mutex_enter(SD_MUTEX(un));
9431 		if (un->un_state == SD_STATE_OFFLINE) {
9432 			if (un->un_last_state != SD_STATE_SUSPENDED) {
9433 				un->un_state = un->un_last_state;
9434 			} else {
9435 				/*
9436 				 * We have gone through SUSPEND/RESUME while
9437 				 * we were offline. Restore the last state
9438 				 */
9439 				un->un_state = un->un_save_state;
9440 			}
9441 		}
9442 		mutex_exit(SD_MUTEX(un));
9443 
9444 	_NOTE(DATA_READABLE_WITHOUT_LOCK(sd_lun::un_remove_event));
9445 	} else if (event == un->un_remove_event) {
9446 		SD_TRACE(SD_LOG_COMMON, un, "sd_event_callback: remove event");
9447 		mutex_enter(SD_MUTEX(un));
9448 		/*
9449 		 * We need to handle an event callback that occurs during
9450 		 * the suspend operation, since we don't prevent it.
9451 		 */
9452 		if (un->un_state != SD_STATE_OFFLINE) {
9453 			if (un->un_state != SD_STATE_SUSPENDED) {
9454 				New_state(un, SD_STATE_OFFLINE);
9455 			} else {
9456 				un->un_last_state = SD_STATE_OFFLINE;
9457 			}
9458 		}
9459 		mutex_exit(SD_MUTEX(un));
9460 	} else {
9461 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
9462 		    "!Unknown event\n");
9463 	}
9464 
9465 }
9466 #endif
9467 
9468 /*
9469  * Values related to caching mode page depending on whether the unit is ATAPI.
9470  */
9471 #define	SDC_CDB_GROUP(un) ((un->un_f_cfg_is_atapi == TRUE) ? \
9472 	CDB_GROUP1 : CDB_GROUP0)
9473 #define	SDC_HDRLEN(un) ((un->un_f_cfg_is_atapi == TRUE) ? \
9474 	MODE_HEADER_LENGTH_GRP2 : MODE_HEADER_LENGTH)
9475 /*
9476  * Use mode_cache_scsi3 to ensure we get all of the mode sense data, otherwise
9477  * the mode select will fail (mode_cache_scsi3 is a superset of mode_caching).
9478  */
9479 #define	SDC_BUFLEN(un) (SDC_HDRLEN(un) + MODE_BLK_DESC_LENGTH + \
9480 	sizeof (struct mode_cache_scsi3))
9481 
9482 static int
9483 sd_get_caching_mode_page(sd_ssc_t *ssc, uchar_t page_control, uchar_t **header,
9484     int *bdlen)
9485 {
9486 	struct sd_lun	*un = ssc->ssc_un;
9487 	struct mode_caching *mode_caching_page;
9488 	size_t		buflen = SDC_BUFLEN(un);
9489 	int		hdrlen = SDC_HDRLEN(un);
9490 	int		rval;
9491 
9492 	/*
9493 	 * Do a test unit ready, otherwise a mode sense may not work if this
9494 	 * is the first command sent to the device after boot.
9495 	 */
9496 	if (sd_send_scsi_TEST_UNIT_READY(ssc, 0) != 0)
9497 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9498 
9499 	/*
9500 	 * Allocate memory for the retrieved mode page and its headers.  Set
9501 	 * a pointer to the page itself.
9502 	 */
9503 	*header = kmem_zalloc(buflen, KM_SLEEP);
9504 
9505 	/* Get the information from the device */
9506 	rval = sd_send_scsi_MODE_SENSE(ssc, SDC_CDB_GROUP(un), *header, buflen,
9507 	    page_control | MODEPAGE_CACHING, SD_PATH_DIRECT);
9508 	if (rval != 0) {
9509 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un, "%s: Mode Sense Failed\n",
9510 		    __func__);
9511 		goto mode_sense_failed;
9512 	}
9513 
9514 	/*
9515 	 * Determine size of Block Descriptors in order to locate
9516 	 * the mode page data. ATAPI devices return 0, SCSI devices
9517 	 * should return MODE_BLK_DESC_LENGTH.
9518 	 */
9519 	if (un->un_f_cfg_is_atapi == TRUE) {
9520 		struct mode_header_grp2 *mhp =
9521 		    (struct mode_header_grp2 *)(*header);
9522 		*bdlen = (mhp->bdesc_length_hi << 8) | mhp->bdesc_length_lo;
9523 	} else {
9524 		*bdlen = ((struct mode_header *)(*header))->bdesc_length;
9525 	}
9526 
9527 	if (*bdlen > MODE_BLK_DESC_LENGTH) {
9528 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, 0,
9529 		    "%s: Mode Sense returned invalid block descriptor length\n",
9530 		    __func__);
9531 		rval = EIO;
9532 		goto mode_sense_failed;
9533 	}
9534 
9535 	mode_caching_page = (struct mode_caching *)(*header + hdrlen + *bdlen);
9536 	if (mode_caching_page->mode_page.code != MODEPAGE_CACHING) {
9537 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, SD_LOG_COMMON,
9538 		    "%s: Mode Sense caching page code mismatch %d\n",
9539 		    __func__, mode_caching_page->mode_page.code);
9540 		rval = EIO;
9541 	}
9542 
9543 mode_sense_failed:
9544 	if (rval != 0) {
9545 		kmem_free(*header, buflen);
9546 		*header = NULL;
9547 		*bdlen = 0;
9548 	}
9549 	return (rval);
9550 }
9551 
9552 /*
9553  *    Function: sd_cache_control()
9554  *
9555  * Description: This routine is the driver entry point for setting
9556  *		read and write caching by modifying the WCE (write cache
9557  *		enable) and RCD (read cache disable) bits of mode
9558  *		page 8 (MODEPAGE_CACHING).
9559  *
9560  *   Arguments: ssc		- ssc contains pointer to driver soft state
9561  *				  (unit) structure for this target.
9562  *		rcd_flag	- flag for controlling the read cache
9563  *		wce_flag	- flag for controlling the write cache
9564  *
9565  * Return Code: EIO
9566  *		code returned by sd_send_scsi_MODE_SENSE and
9567  *		sd_send_scsi_MODE_SELECT
9568  *
9569  *     Context: Kernel Thread
9570  */
9571 
9572 static int
9573 sd_cache_control(sd_ssc_t *ssc, int rcd_flag, int wce_flag)
9574 {
9575 	struct sd_lun	*un = ssc->ssc_un;
9576 	struct mode_caching *mode_caching_page;
9577 	uchar_t		*header;
9578 	size_t		buflen = SDC_BUFLEN(un);
9579 	int		hdrlen = SDC_HDRLEN(un);
9580 	int		bdlen;
9581 	int		rval;
9582 
9583 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CURRENT, &header, &bdlen);
9584 	switch (rval) {
9585 	case 0:
9586 		/* Check the relevant bits on successful mode sense */
9587 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
9588 		    bdlen);
9589 		if ((mode_caching_page->rcd && rcd_flag == SD_CACHE_ENABLE) ||
9590 		    (!mode_caching_page->rcd && rcd_flag == SD_CACHE_DISABLE) ||
9591 		    (mode_caching_page->wce && wce_flag == SD_CACHE_DISABLE) ||
9592 		    (!mode_caching_page->wce && wce_flag == SD_CACHE_ENABLE)) {
9593 			size_t sbuflen;
9594 			uchar_t save_pg;
9595 
9596 			/*
9597 			 * Construct select buffer length based on the
9598 			 * length of the sense data returned.
9599 			 */
9600 			sbuflen = hdrlen + bdlen + sizeof (struct mode_page) +
9601 			    (int)mode_caching_page->mode_page.length;
9602 
9603 			/* Set the caching bits as requested */
9604 			if (rcd_flag == SD_CACHE_ENABLE)
9605 				mode_caching_page->rcd = 0;
9606 			else if (rcd_flag == SD_CACHE_DISABLE)
9607 				mode_caching_page->rcd = 1;
9608 
9609 			if (wce_flag == SD_CACHE_ENABLE)
9610 				mode_caching_page->wce = 1;
9611 			else if (wce_flag == SD_CACHE_DISABLE)
9612 				mode_caching_page->wce = 0;
9613 
9614 			/*
9615 			 * Save the page if the mode sense says the
9616 			 * drive supports it.
9617 			 */
9618 			save_pg = mode_caching_page->mode_page.ps ?
9619 			    SD_SAVE_PAGE : SD_DONTSAVE_PAGE;
9620 
9621 			/* Clear reserved bits before mode select */
9622 			mode_caching_page->mode_page.ps = 0;
9623 
9624 			/*
9625 			 * Clear out mode header for mode select.
9626 			 * The rest of the retrieved page will be reused.
9627 			 */
9628 			bzero(header, hdrlen);
9629 
9630 			if (un->un_f_cfg_is_atapi == TRUE) {
9631 				struct mode_header_grp2 *mhp =
9632 				    (struct mode_header_grp2 *)header;
9633 				mhp->bdesc_length_hi = bdlen >> 8;
9634 				mhp->bdesc_length_lo = (uchar_t)bdlen & 0xff;
9635 			} else {
9636 				((struct mode_header *)header)->bdesc_length =
9637 				    bdlen;
9638 			}
9639 
9640 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9641 
9642 			/* Issue mode select to change the cache settings */
9643 			rval = sd_send_scsi_MODE_SELECT(ssc, SDC_CDB_GROUP(un),
9644 			    header, sbuflen, save_pg, SD_PATH_DIRECT);
9645 		}
9646 		kmem_free(header, buflen);
9647 		break;
9648 	case EIO:
9649 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9650 		break;
9651 	default:
9652 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9653 		break;
9654 	}
9655 
9656 	return (rval);
9657 }
9658 
9659 
9660 /*
9661  *    Function: sd_get_write_cache_enabled()
9662  *
9663  * Description: This routine is the driver entry point for determining if write
9664  *		caching is enabled.  It examines the WCE (write cache enable)
9665  *		bits of mode page 8 (MODEPAGE_CACHING) with Page Control field
9666  *		bits set to MODEPAGE_CURRENT.
9667  *
9668  *   Arguments: ssc		- ssc contains pointer to driver soft state
9669  *				  (unit) structure for this target.
9670  *		is_enabled	- pointer to int where write cache enabled state
9671  *				  is returned (non-zero -> write cache enabled)
9672  *
9673  * Return Code: EIO
9674  *		code returned by sd_send_scsi_MODE_SENSE
9675  *
9676  *     Context: Kernel Thread
9677  *
9678  * NOTE: If ioctl is added to disable write cache, this sequence should
9679  * be followed so that no locking is required for accesses to
9680  * un->un_f_write_cache_enabled:
9681  *	do mode select to clear wce
9682  *	do synchronize cache to flush cache
9683  *	set un->un_f_write_cache_enabled = FALSE
9684  *
9685  * Conversely, an ioctl to enable the write cache should be done
9686  * in this order:
9687  *	set un->un_f_write_cache_enabled = TRUE
9688  *	do mode select to set wce
9689  */
9690 
9691 static int
9692 sd_get_write_cache_enabled(sd_ssc_t *ssc, int *is_enabled)
9693 {
9694 	struct sd_lun	*un = ssc->ssc_un;
9695 	struct mode_caching *mode_caching_page;
9696 	uchar_t		*header;
9697 	size_t		buflen = SDC_BUFLEN(un);
9698 	int		hdrlen = SDC_HDRLEN(un);
9699 	int		bdlen;
9700 	int		rval;
9701 
9702 	/* In case of error, flag as enabled */
9703 	*is_enabled = TRUE;
9704 
9705 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CURRENT, &header, &bdlen);
9706 	switch (rval) {
9707 	case 0:
9708 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
9709 		    bdlen);
9710 		*is_enabled = mode_caching_page->wce;
9711 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
9712 		kmem_free(header, buflen);
9713 		break;
9714 	case EIO: {
9715 		/*
9716 		 * Some disks do not support Mode Sense(6), we
9717 		 * should ignore this kind of error (sense key is
9718 		 * 0x5 - illegal request).
9719 		 */
9720 		uint8_t *sensep;
9721 		int senlen;
9722 
9723 		sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
9724 		senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
9725 		    ssc->ssc_uscsi_cmd->uscsi_rqresid);
9726 
9727 		if (senlen > 0 &&
9728 		    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
9729 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
9730 		} else {
9731 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9732 		}
9733 		break;
9734 	}
9735 	default:
9736 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9737 		break;
9738 	}
9739 
9740 	return (rval);
9741 }
9742 
9743 /*
9744  *    Function: sd_get_write_cache_changeable()
9745  *
9746  * Description: This routine is the driver entry point for determining if write
9747  *		caching is changeable.  It examines the WCE (write cache enable)
9748  *		bits of mode page 8 (MODEPAGE_CACHING) with Page Control field
9749  *		bits set to MODEPAGE_CHANGEABLE.
9750  *
9751  *   Arguments: ssc		- ssc contains pointer to driver soft state
9752  *				  (unit) structure for this target.
9753  *		is_changeable	- pointer to int where write cache changeable
9754  *				  state is returned (non-zero -> write cache
9755  *				  changeable)
9756  *
9757  *     Context: Kernel Thread
9758  */
9759 
9760 static void
9761 sd_get_write_cache_changeable(sd_ssc_t *ssc, int *is_changeable)
9762 {
9763 	struct sd_lun	*un = ssc->ssc_un;
9764 	struct mode_caching *mode_caching_page;
9765 	uchar_t		*header;
9766 	size_t		buflen = SDC_BUFLEN(un);
9767 	int		hdrlen = SDC_HDRLEN(un);
9768 	int		bdlen;
9769 	int		rval;
9770 
9771 	/* In case of error, flag as enabled */
9772 	*is_changeable = TRUE;
9773 
9774 	rval = sd_get_caching_mode_page(ssc, MODEPAGE_CHANGEABLE, &header,
9775 	    &bdlen);
9776 	switch (rval) {
9777 	case 0:
9778 		mode_caching_page = (struct mode_caching *)(header + hdrlen +
9779 		    bdlen);
9780 		*is_changeable = mode_caching_page->wce;
9781 		kmem_free(header, buflen);
9782 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
9783 		break;
9784 	case EIO:
9785 		sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
9786 		break;
9787 	default:
9788 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9789 		break;
9790 	}
9791 }
9792 
9793 /*
9794  *    Function: sd_get_nv_sup()
9795  *
9796  * Description: This routine is the driver entry point for
9797  * determining whether non-volatile cache is supported. This
9798  * determination process works as follows:
9799  *
9800  * 1. sd first queries sd.conf on whether
9801  * suppress_cache_flush bit is set for this device.
9802  *
9803  * 2. if not there, then queries the internal disk table.
9804  *
9805  * 3. if either sd.conf or internal disk table specifies
9806  * cache flush be suppressed, we don't bother checking
9807  * NV_SUP bit.
9808  *
9809  * If SUPPRESS_CACHE_FLUSH bit is not set to 1, sd queries
9810  * the optional INQUIRY VPD page 0x86. If the device
9811  * supports VPD page 0x86, sd examines the NV_SUP
9812  * (non-volatile cache support) bit in the INQUIRY VPD page
9813  * 0x86:
9814  *   o If NV_SUP bit is set, sd assumes the device has a
9815  *   non-volatile cache and set the
9816  *   un_f_sync_nv_supported to TRUE.
9817  *   o Otherwise cache is not non-volatile,
9818  *   un_f_sync_nv_supported is set to FALSE.
9819  *
9820  * Arguments: un - driver soft state (unit) structure
9821  *
9822  * Return Code:
9823  *
9824  *     Context: Kernel Thread
9825  */
9826 
9827 static void
9828 sd_get_nv_sup(sd_ssc_t *ssc)
9829 {
9830 	int		rval		= 0;
9831 	uchar_t		*inq86		= NULL;
9832 	size_t		inq86_len	= MAX_INQUIRY_SIZE;
9833 	size_t		inq86_resid	= 0;
9834 	struct		dk_callback *dkc;
9835 	struct sd_lun	*un;
9836 
9837 	ASSERT(ssc != NULL);
9838 	un = ssc->ssc_un;
9839 	ASSERT(un != NULL);
9840 
9841 	mutex_enter(SD_MUTEX(un));
9842 
9843 	/*
9844 	 * Be conservative on the device's support of
9845 	 * SYNC_NV bit: un_f_sync_nv_supported is
9846 	 * initialized to be false.
9847 	 */
9848 	un->un_f_sync_nv_supported = FALSE;
9849 
9850 	/*
9851 	 * If either sd.conf or internal disk table
9852 	 * specifies cache flush be suppressed, then
9853 	 * we don't bother checking NV_SUP bit.
9854 	 */
9855 	if (un->un_f_suppress_cache_flush == TRUE) {
9856 		mutex_exit(SD_MUTEX(un));
9857 		return;
9858 	}
9859 
9860 	if (sd_check_vpd_page_support(ssc) == 0 &&
9861 	    un->un_vpd_page_mask & SD_VPD_EXTENDED_DATA_PG) {
9862 		mutex_exit(SD_MUTEX(un));
9863 		/* collect page 86 data if available */
9864 		inq86 = kmem_zalloc(inq86_len, KM_SLEEP);
9865 
9866 		rval = sd_send_scsi_INQUIRY(ssc, inq86, inq86_len,
9867 		    0x01, 0x86, &inq86_resid);
9868 
9869 		if (rval == 0 && (inq86_len - inq86_resid > 6)) {
9870 			SD_TRACE(SD_LOG_COMMON, un,
9871 			    "sd_get_nv_sup: \
9872 			    successfully get VPD page: %x \
9873 			    PAGE LENGTH: %x BYTE 6: %x\n",
9874 			    inq86[1], inq86[3], inq86[6]);
9875 
9876 			mutex_enter(SD_MUTEX(un));
9877 			/*
9878 			 * check the value of NV_SUP bit: only if the device
9879 			 * reports NV_SUP bit to be 1, the
9880 			 * un_f_sync_nv_supported bit will be set to true.
9881 			 */
9882 			if (inq86[6] & SD_VPD_NV_SUP) {
9883 				un->un_f_sync_nv_supported = TRUE;
9884 			}
9885 			mutex_exit(SD_MUTEX(un));
9886 		} else if (rval != 0) {
9887 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9888 		}
9889 
9890 		kmem_free(inq86, inq86_len);
9891 	} else {
9892 		mutex_exit(SD_MUTEX(un));
9893 	}
9894 
9895 	/*
9896 	 * Send a SYNC CACHE command to check whether
9897 	 * SYNC_NV bit is supported. This command should have
9898 	 * un_f_sync_nv_supported set to correct value.
9899 	 */
9900 	mutex_enter(SD_MUTEX(un));
9901 	if (un->un_f_sync_nv_supported) {
9902 		mutex_exit(SD_MUTEX(un));
9903 		dkc = kmem_zalloc(sizeof (struct dk_callback), KM_SLEEP);
9904 		dkc->dkc_flag = FLUSH_VOLATILE;
9905 		(void) sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
9906 
9907 		/*
9908 		 * Send a TEST UNIT READY command to the device. This should
9909 		 * clear any outstanding UNIT ATTENTION that may be present.
9910 		 */
9911 		rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_DONT_RETRY_TUR);
9912 		if (rval != 0)
9913 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
9914 
9915 		kmem_free(dkc, sizeof (struct dk_callback));
9916 	} else {
9917 		mutex_exit(SD_MUTEX(un));
9918 	}
9919 
9920 	SD_TRACE(SD_LOG_COMMON, un, "sd_get_nv_sup: \
9921 	    un_f_suppress_cache_flush is set to %d\n",
9922 	    un->un_f_suppress_cache_flush);
9923 }
9924 
9925 /*
9926  *    Function: sd_make_device
9927  *
9928  * Description: Utility routine to return the Solaris device number from
9929  *		the data in the device's dev_info structure.
9930  *
9931  * Return Code: The Solaris device number
9932  *
9933  *     Context: Any
9934  */
9935 
9936 static dev_t
9937 sd_make_device(dev_info_t *devi)
9938 {
9939 	return (makedevice(ddi_driver_major(devi),
9940 	    ddi_get_instance(devi) << SDUNIT_SHIFT));
9941 }
9942 
9943 
9944 /*
9945  *    Function: sd_pm_entry
9946  *
9947  * Description: Called at the start of a new command to manage power
9948  *		and busy status of a device. This includes determining whether
9949  *		the current power state of the device is sufficient for
9950  *		performing the command or whether it must be changed.
9951  *		The PM framework is notified appropriately.
9952  *		Only with a return status of DDI_SUCCESS will the
9953  *		component be busy to the framework.
9954  *
9955  *		All callers of sd_pm_entry must check the return status
9956  *		and only call sd_pm_exit it it was DDI_SUCCESS. A status
9957  *		of DDI_FAILURE indicates the device failed to power up.
9958  *		In this case un_pm_count has been adjusted so the result
9959  *		on exit is still powered down, ie. count is less than 0.
9960  *		Calling sd_pm_exit with this count value hits an ASSERT.
9961  *
9962  * Return Code: DDI_SUCCESS or DDI_FAILURE
9963  *
9964  *     Context: Kernel thread context.
9965  */
9966 
9967 static int
9968 sd_pm_entry(struct sd_lun *un)
9969 {
9970 	int return_status = DDI_SUCCESS;
9971 
9972 	ASSERT(!mutex_owned(SD_MUTEX(un)));
9973 	ASSERT(!mutex_owned(&un->un_pm_mutex));
9974 
9975 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: entry\n");
9976 
9977 	if (un->un_f_pm_is_enabled == FALSE) {
9978 		SD_TRACE(SD_LOG_IO_PM, un,
9979 		    "sd_pm_entry: exiting, PM not enabled\n");
9980 		return (return_status);
9981 	}
9982 
9983 	/*
9984 	 * Just increment a counter if PM is enabled. On the transition from
9985 	 * 0 ==> 1, mark the device as busy.  The iodone side will decrement
9986 	 * the count with each IO and mark the device as idle when the count
9987 	 * hits 0.
9988 	 *
9989 	 * If the count is less than 0 the device is powered down. If a powered
9990 	 * down device is successfully powered up then the count must be
9991 	 * incremented to reflect the power up. Note that it'll get incremented
9992 	 * a second time to become busy.
9993 	 *
9994 	 * Because the following has the potential to change the device state
9995 	 * and must release the un_pm_mutex to do so, only one thread can be
9996 	 * allowed through at a time.
9997 	 */
9998 
9999 	mutex_enter(&un->un_pm_mutex);
10000 	while (un->un_pm_busy == TRUE) {
10001 		cv_wait(&un->un_pm_busy_cv, &un->un_pm_mutex);
10002 	}
10003 	un->un_pm_busy = TRUE;
10004 
10005 	if (un->un_pm_count < 1) {
10006 
10007 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_entry: busy component\n");
10008 
10009 		/*
10010 		 * Indicate we are now busy so the framework won't attempt to
10011 		 * power down the device. This call will only fail if either
10012 		 * we passed a bad component number or the device has no
10013 		 * components. Neither of these should ever happen.
10014 		 */
10015 		mutex_exit(&un->un_pm_mutex);
10016 		return_status = pm_busy_component(SD_DEVINFO(un), 0);
10017 		ASSERT(return_status == DDI_SUCCESS);
10018 
10019 		mutex_enter(&un->un_pm_mutex);
10020 
10021 		if (un->un_pm_count < 0) {
10022 			mutex_exit(&un->un_pm_mutex);
10023 
10024 			SD_TRACE(SD_LOG_IO_PM, un,
10025 			    "sd_pm_entry: power up component\n");
10026 
10027 			/*
10028 			 * pm_raise_power will cause sdpower to be called
10029 			 * which brings the device power level to the
10030 			 * desired state, If successful, un_pm_count and
10031 			 * un_power_level will be updated appropriately.
10032 			 */
10033 			return_status = pm_raise_power(SD_DEVINFO(un), 0,
10034 			    SD_PM_STATE_ACTIVE(un));
10035 
10036 			mutex_enter(&un->un_pm_mutex);
10037 
10038 			if (return_status != DDI_SUCCESS) {
10039 				/*
10040 				 * Power up failed.
10041 				 * Idle the device and adjust the count
10042 				 * so the result on exit is that we're
10043 				 * still powered down, ie. count is less than 0.
10044 				 */
10045 				SD_TRACE(SD_LOG_IO_PM, un,
10046 				    "sd_pm_entry: power up failed,"
10047 				    " idle the component\n");
10048 
10049 				(void) pm_idle_component(SD_DEVINFO(un), 0);
10050 				un->un_pm_count--;
10051 			} else {
10052 				/*
10053 				 * Device is powered up, verify the
10054 				 * count is non-negative.
10055 				 * This is debug only.
10056 				 */
10057 				ASSERT(un->un_pm_count == 0);
10058 			}
10059 		}
10060 
10061 		if (return_status == DDI_SUCCESS) {
10062 			/*
10063 			 * For performance, now that the device has been tagged
10064 			 * as busy, and it's known to be powered up, update the
10065 			 * chain types to use jump tables that do not include
10066 			 * pm. This significantly lowers the overhead and
10067 			 * therefore improves performance.
10068 			 */
10069 
10070 			mutex_exit(&un->un_pm_mutex);
10071 			mutex_enter(SD_MUTEX(un));
10072 			SD_TRACE(SD_LOG_IO_PM, un,
10073 			    "sd_pm_entry: changing uscsi_chain_type from %d\n",
10074 			    un->un_uscsi_chain_type);
10075 
10076 			if (un->un_f_non_devbsize_supported) {
10077 				un->un_buf_chain_type =
10078 				    SD_CHAIN_INFO_RMMEDIA_NO_PM;
10079 			} else {
10080 				un->un_buf_chain_type =
10081 				    SD_CHAIN_INFO_DISK_NO_PM;
10082 			}
10083 			un->un_uscsi_chain_type = SD_CHAIN_INFO_USCSI_CMD_NO_PM;
10084 
10085 			SD_TRACE(SD_LOG_IO_PM, un,
10086 			    "             changed  uscsi_chain_type to   %d\n",
10087 			    un->un_uscsi_chain_type);
10088 			mutex_exit(SD_MUTEX(un));
10089 			mutex_enter(&un->un_pm_mutex);
10090 
10091 			if (un->un_pm_idle_timeid == NULL) {
10092 				/* 300 ms. */
10093 				un->un_pm_idle_timeid =
10094 				    timeout(sd_pm_idletimeout_handler, un,
10095 				    (drv_usectohz((clock_t)300000)));
10096 				/*
10097 				 * Include an extra call to busy which keeps the
10098 				 * device busy with-respect-to the PM layer
10099 				 * until the timer fires, at which time it'll
10100 				 * get the extra idle call.
10101 				 */
10102 				(void) pm_busy_component(SD_DEVINFO(un), 0);
10103 			}
10104 		}
10105 	}
10106 	un->un_pm_busy = FALSE;
10107 	/* Next... */
10108 	cv_signal(&un->un_pm_busy_cv);
10109 
10110 	un->un_pm_count++;
10111 
10112 	SD_TRACE(SD_LOG_IO_PM, un,
10113 	    "sd_pm_entry: exiting, un_pm_count = %d\n", un->un_pm_count);
10114 
10115 	mutex_exit(&un->un_pm_mutex);
10116 
10117 	return (return_status);
10118 }
10119 
10120 
10121 /*
10122  *    Function: sd_pm_exit
10123  *
10124  * Description: Called at the completion of a command to manage busy
10125  *		status for the device. If the device becomes idle the
10126  *		PM framework is notified.
10127  *
10128  *     Context: Kernel thread context
10129  */
10130 
10131 static void
10132 sd_pm_exit(struct sd_lun *un)
10133 {
10134 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10135 	ASSERT(!mutex_owned(&un->un_pm_mutex));
10136 
10137 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: entry\n");
10138 
10139 	/*
10140 	 * After attach the following flag is only read, so don't
10141 	 * take the penalty of acquiring a mutex for it.
10142 	 */
10143 	if (un->un_f_pm_is_enabled == TRUE) {
10144 
10145 		mutex_enter(&un->un_pm_mutex);
10146 		un->un_pm_count--;
10147 
10148 		SD_TRACE(SD_LOG_IO_PM, un,
10149 		    "sd_pm_exit: un_pm_count = %d\n", un->un_pm_count);
10150 
10151 		ASSERT(un->un_pm_count >= 0);
10152 		if (un->un_pm_count == 0) {
10153 			mutex_exit(&un->un_pm_mutex);
10154 
10155 			SD_TRACE(SD_LOG_IO_PM, un,
10156 			    "sd_pm_exit: idle component\n");
10157 
10158 			(void) pm_idle_component(SD_DEVINFO(un), 0);
10159 
10160 		} else {
10161 			mutex_exit(&un->un_pm_mutex);
10162 		}
10163 	}
10164 
10165 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_exit: exiting\n");
10166 }
10167 
10168 
10169 /*
10170  *    Function: sdopen
10171  *
10172  * Description: Driver's open(9e) entry point function.
10173  *
10174  *   Arguments: dev_i   - pointer to device number
10175  *		flag    - how to open file (FEXCL, FNDELAY, FREAD, FWRITE)
10176  *		otyp    - open type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10177  *		cred_p  - user credential pointer
10178  *
10179  * Return Code: EINVAL
10180  *		ENXIO
10181  *		EIO
10182  *		EROFS
10183  *		EBUSY
10184  *
10185  *     Context: Kernel thread context
10186  */
10187 /* ARGSUSED */
10188 static int
10189 sdopen(dev_t *dev_p, int flag, int otyp, cred_t *cred_p)
10190 {
10191 	struct sd_lun	*un;
10192 	int		nodelay;
10193 	int		part;
10194 	uint64_t	partmask;
10195 	int		instance;
10196 	dev_t		dev;
10197 	int		rval = EIO;
10198 	diskaddr_t	nblks = 0;
10199 	diskaddr_t	label_cap;
10200 
10201 	/* Validate the open type */
10202 	if (otyp >= OTYPCNT) {
10203 		return (EINVAL);
10204 	}
10205 
10206 	dev = *dev_p;
10207 	instance = SDUNIT(dev);
10208 	mutex_enter(&sd_detach_mutex);
10209 
10210 	/*
10211 	 * Fail the open if there is no softstate for the instance, or
10212 	 * if another thread somewhere is trying to detach the instance.
10213 	 */
10214 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
10215 	    (un->un_detach_count != 0)) {
10216 		mutex_exit(&sd_detach_mutex);
10217 		/*
10218 		 * The probe cache only needs to be cleared when open (9e) fails
10219 		 * with ENXIO (4238046).
10220 		 */
10221 		/*
10222 		 * un-conditionally clearing probe cache is ok with
10223 		 * separate sd/ssd binaries
10224 		 * x86 platform can be an issue with both parallel
10225 		 * and fibre in 1 binary
10226 		 */
10227 		sd_scsi_clear_probe_cache();
10228 		return (ENXIO);
10229 	}
10230 
10231 	/*
10232 	 * The un_layer_count is to prevent another thread in specfs from
10233 	 * trying to detach the instance, which can happen when we are
10234 	 * called from a higher-layer driver instead of thru specfs.
10235 	 * This will not be needed when DDI provides a layered driver
10236 	 * interface that allows specfs to know that an instance is in
10237 	 * use by a layered driver & should not be detached.
10238 	 *
10239 	 * Note: the semantics for layered driver opens are exactly one
10240 	 * close for every open.
10241 	 */
10242 	if (otyp == OTYP_LYR) {
10243 		un->un_layer_count++;
10244 	}
10245 
10246 	/*
10247 	 * Keep a count of the current # of opens in progress. This is because
10248 	 * some layered drivers try to call us as a regular open. This can
10249 	 * cause problems that we cannot prevent, however by keeping this count
10250 	 * we can at least keep our open and detach routines from racing against
10251 	 * each other under such conditions.
10252 	 */
10253 	un->un_opens_in_progress++;
10254 	mutex_exit(&sd_detach_mutex);
10255 
10256 	nodelay  = (flag & (FNDELAY | FNONBLOCK));
10257 	part	 = SDPART(dev);
10258 	partmask = 1 << part;
10259 
10260 	/*
10261 	 * We use a semaphore here in order to serialize
10262 	 * open and close requests on the device.
10263 	 */
10264 	sema_p(&un->un_semoclose);
10265 
10266 	mutex_enter(SD_MUTEX(un));
10267 
10268 	/*
10269 	 * All device accesses go thru sdstrategy() where we check
10270 	 * on suspend status but there could be a scsi_poll command,
10271 	 * which bypasses sdstrategy(), so we need to check pm
10272 	 * status.
10273 	 */
10274 
10275 	if (!nodelay) {
10276 		while ((un->un_state == SD_STATE_SUSPENDED) ||
10277 		    (un->un_state == SD_STATE_PM_CHANGING)) {
10278 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10279 		}
10280 
10281 		mutex_exit(SD_MUTEX(un));
10282 		if (sd_pm_entry(un) != DDI_SUCCESS) {
10283 			rval = EIO;
10284 			SD_ERROR(SD_LOG_OPEN_CLOSE, un,
10285 			    "sdopen: sd_pm_entry failed\n");
10286 			goto open_failed_with_pm;
10287 		}
10288 		mutex_enter(SD_MUTEX(un));
10289 	}
10290 
10291 	/* check for previous exclusive open */
10292 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: un=%p\n", (void *)un);
10293 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10294 	    "sdopen: exclopen=%x, flag=%x, regopen=%x\n",
10295 	    un->un_exclopen, flag, un->un_ocmap.regopen[otyp]);
10296 
10297 	if (un->un_exclopen & (partmask)) {
10298 		goto excl_open_fail;
10299 	}
10300 
10301 	if (flag & FEXCL) {
10302 		int i;
10303 		if (un->un_ocmap.lyropen[part]) {
10304 			goto excl_open_fail;
10305 		}
10306 		for (i = 0; i < (OTYPCNT - 1); i++) {
10307 			if (un->un_ocmap.regopen[i] & (partmask)) {
10308 				goto excl_open_fail;
10309 			}
10310 		}
10311 	}
10312 
10313 	/*
10314 	 * Check the write permission if this is a removable media device,
10315 	 * NDELAY has not been set, and writable permission is requested.
10316 	 *
10317 	 * Note: If NDELAY was set and this is write-protected media the WRITE
10318 	 * attempt will fail with EIO as part of the I/O processing. This is a
10319 	 * more permissive implementation that allows the open to succeed and
10320 	 * WRITE attempts to fail when appropriate.
10321 	 */
10322 	if (un->un_f_chk_wp_open) {
10323 		if ((flag & FWRITE) && (!nodelay)) {
10324 			mutex_exit(SD_MUTEX(un));
10325 			/*
10326 			 * Defer the check for write permission on writable
10327 			 * DVD drive till sdstrategy and will not fail open even
10328 			 * if FWRITE is set as the device can be writable
10329 			 * depending upon the media and the media can change
10330 			 * after the call to open().
10331 			 */
10332 			if (un->un_f_dvdram_writable_device == FALSE) {
10333 				if (ISCD(un) || sr_check_wp(dev)) {
10334 				rval = EROFS;
10335 				mutex_enter(SD_MUTEX(un));
10336 				SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10337 				    "write to cd or write protected media\n");
10338 				goto open_fail;
10339 				}
10340 			}
10341 			mutex_enter(SD_MUTEX(un));
10342 		}
10343 	}
10344 
10345 	/*
10346 	 * If opening in NDELAY/NONBLOCK mode, just return.
10347 	 * Check if disk is ready and has a valid geometry later.
10348 	 */
10349 	if (!nodelay) {
10350 		sd_ssc_t	*ssc;
10351 
10352 		mutex_exit(SD_MUTEX(un));
10353 		ssc = sd_ssc_init(un);
10354 		rval = sd_ready_and_valid(ssc, part);
10355 		sd_ssc_fini(ssc);
10356 		mutex_enter(SD_MUTEX(un));
10357 		/*
10358 		 * Fail if device is not ready or if the number of disk
10359 		 * blocks is zero or negative for non CD devices.
10360 		 */
10361 
10362 		nblks = 0;
10363 
10364 		if (rval == SD_READY_VALID && (!ISCD(un))) {
10365 			/* if cmlb_partinfo fails, nblks remains 0 */
10366 			mutex_exit(SD_MUTEX(un));
10367 			(void) cmlb_partinfo(un->un_cmlbhandle, part, &nblks,
10368 			    NULL, NULL, NULL, (void *)SD_PATH_DIRECT);
10369 			mutex_enter(SD_MUTEX(un));
10370 		}
10371 
10372 		if ((rval != SD_READY_VALID) ||
10373 		    (!ISCD(un) && nblks <= 0)) {
10374 			rval = un->un_f_has_removable_media ? ENXIO : EIO;
10375 			SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10376 			    "device not ready or invalid disk block value\n");
10377 			goto open_fail;
10378 		}
10379 #if defined(__x86)
10380 	} else {
10381 		uchar_t *cp;
10382 		/*
10383 		 * x86 requires special nodelay handling, so that p0 is
10384 		 * always defined and accessible.
10385 		 * Invalidate geometry only if device is not already open.
10386 		 */
10387 		cp = &un->un_ocmap.chkd[0];
10388 		while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10389 			if (*cp != (uchar_t)0) {
10390 				break;
10391 			}
10392 			cp++;
10393 		}
10394 		if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10395 			mutex_exit(SD_MUTEX(un));
10396 			cmlb_invalidate(un->un_cmlbhandle,
10397 			    (void *)SD_PATH_DIRECT);
10398 			mutex_enter(SD_MUTEX(un));
10399 		}
10400 
10401 #endif
10402 	}
10403 
10404 	if (otyp == OTYP_LYR) {
10405 		un->un_ocmap.lyropen[part]++;
10406 	} else {
10407 		un->un_ocmap.regopen[otyp] |= partmask;
10408 	}
10409 
10410 	/* Set up open and exclusive open flags */
10411 	if (flag & FEXCL) {
10412 		un->un_exclopen |= (partmask);
10413 	}
10414 
10415 	/*
10416 	 * If the lun is EFI labeled and lun capacity is greater than the
10417 	 * capacity contained in the label, log a sys-event to notify the
10418 	 * interested module.
10419 	 * To avoid an infinite loop of logging sys-event, we only log the
10420 	 * event when the lun is not opened in NDELAY mode. The event handler
10421 	 * should open the lun in NDELAY mode.
10422 	 */
10423 	if (!nodelay) {
10424 		mutex_exit(SD_MUTEX(un));
10425 		if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
10426 		    (void*)SD_PATH_DIRECT) == 0) {
10427 			mutex_enter(SD_MUTEX(un));
10428 			if (un->un_f_blockcount_is_valid &&
10429 			    un->un_blockcount > label_cap &&
10430 			    un->un_f_expnevent == B_FALSE) {
10431 				un->un_f_expnevent = B_TRUE;
10432 				mutex_exit(SD_MUTEX(un));
10433 				sd_log_lun_expansion_event(un,
10434 				    (nodelay ? KM_NOSLEEP : KM_SLEEP));
10435 				mutex_enter(SD_MUTEX(un));
10436 			}
10437 		} else {
10438 			mutex_enter(SD_MUTEX(un));
10439 		}
10440 	}
10441 
10442 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: "
10443 	    "open of part %d type %d\n", part, otyp);
10444 
10445 	mutex_exit(SD_MUTEX(un));
10446 	if (!nodelay) {
10447 		sd_pm_exit(un);
10448 	}
10449 
10450 	sema_v(&un->un_semoclose);
10451 
10452 	mutex_enter(&sd_detach_mutex);
10453 	un->un_opens_in_progress--;
10454 	mutex_exit(&sd_detach_mutex);
10455 
10456 	SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdopen: exit success\n");
10457 	return (DDI_SUCCESS);
10458 
10459 excl_open_fail:
10460 	SD_ERROR(SD_LOG_OPEN_CLOSE, un, "sdopen: fail exclusive open\n");
10461 	rval = EBUSY;
10462 
10463 open_fail:
10464 	mutex_exit(SD_MUTEX(un));
10465 
10466 	/*
10467 	 * On a failed open we must exit the pm management.
10468 	 */
10469 	if (!nodelay) {
10470 		sd_pm_exit(un);
10471 	}
10472 open_failed_with_pm:
10473 	sema_v(&un->un_semoclose);
10474 
10475 	mutex_enter(&sd_detach_mutex);
10476 	un->un_opens_in_progress--;
10477 	if (otyp == OTYP_LYR) {
10478 		un->un_layer_count--;
10479 	}
10480 	mutex_exit(&sd_detach_mutex);
10481 
10482 	return (rval);
10483 }
10484 
10485 
10486 /*
10487  *    Function: sdclose
10488  *
10489  * Description: Driver's close(9e) entry point function.
10490  *
10491  *   Arguments: dev    - device number
10492  *		flag   - file status flag, informational only
10493  *		otyp   - close type (OTYP_BLK, OTYP_CHR, OTYP_LYR)
10494  *		cred_p - user credential pointer
10495  *
10496  * Return Code: ENXIO
10497  *
10498  *     Context: Kernel thread context
10499  */
10500 /* ARGSUSED */
10501 static int
10502 sdclose(dev_t dev, int flag, int otyp, cred_t *cred_p)
10503 {
10504 	struct sd_lun	*un;
10505 	uchar_t		*cp;
10506 	int		part;
10507 	int		nodelay;
10508 	int		rval = 0;
10509 
10510 	/* Validate the open type */
10511 	if (otyp >= OTYPCNT) {
10512 		return (ENXIO);
10513 	}
10514 
10515 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10516 		return (ENXIO);
10517 	}
10518 
10519 	part = SDPART(dev);
10520 	nodelay = flag & (FNDELAY | FNONBLOCK);
10521 
10522 	SD_TRACE(SD_LOG_OPEN_CLOSE, un,
10523 	    "sdclose: close of part %d type %d\n", part, otyp);
10524 
10525 	/*
10526 	 * We use a semaphore here in order to serialize
10527 	 * open and close requests on the device.
10528 	 */
10529 	sema_p(&un->un_semoclose);
10530 
10531 	mutex_enter(SD_MUTEX(un));
10532 
10533 	/* Don't proceed if power is being changed. */
10534 	while (un->un_state == SD_STATE_PM_CHANGING) {
10535 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
10536 	}
10537 
10538 	if (un->un_exclopen & (1 << part)) {
10539 		un->un_exclopen &= ~(1 << part);
10540 	}
10541 
10542 	/* Update the open partition map */
10543 	if (otyp == OTYP_LYR) {
10544 		un->un_ocmap.lyropen[part] -= 1;
10545 	} else {
10546 		un->un_ocmap.regopen[otyp] &= ~(1 << part);
10547 	}
10548 
10549 	cp = &un->un_ocmap.chkd[0];
10550 	while (cp < &un->un_ocmap.chkd[OCSIZE]) {
10551 		if (*cp != '\0') {
10552 			break;
10553 		}
10554 		cp++;
10555 	}
10556 
10557 	if (cp == &un->un_ocmap.chkd[OCSIZE]) {
10558 		SD_TRACE(SD_LOG_OPEN_CLOSE, un, "sdclose: last close\n");
10559 
10560 		/*
10561 		 * We avoid persistance upon the last close, and set
10562 		 * the throttle back to the maximum.
10563 		 */
10564 		un->un_throttle = un->un_saved_throttle;
10565 
10566 		if (un->un_state == SD_STATE_OFFLINE) {
10567 			if (un->un_f_is_fibre == FALSE) {
10568 				scsi_log(SD_DEVINFO(un), sd_label,
10569 				    CE_WARN, "offline\n");
10570 			}
10571 			mutex_exit(SD_MUTEX(un));
10572 			cmlb_invalidate(un->un_cmlbhandle,
10573 			    (void *)SD_PATH_DIRECT);
10574 			mutex_enter(SD_MUTEX(un));
10575 
10576 		} else {
10577 			/*
10578 			 * Flush any outstanding writes in NVRAM cache.
10579 			 * Note: SYNCHRONIZE CACHE is an optional SCSI-2
10580 			 * cmd, it may not work for non-Pluto devices.
10581 			 * SYNCHRONIZE CACHE is not required for removables,
10582 			 * except DVD-RAM drives.
10583 			 *
10584 			 * Also note: because SYNCHRONIZE CACHE is currently
10585 			 * the only command issued here that requires the
10586 			 * drive be powered up, only do the power up before
10587 			 * sending the Sync Cache command. If additional
10588 			 * commands are added which require a powered up
10589 			 * drive, the following sequence may have to change.
10590 			 *
10591 			 * And finally, note that parallel SCSI on SPARC
10592 			 * only issues a Sync Cache to DVD-RAM, a newly
10593 			 * supported device.
10594 			 */
10595 #if defined(__x86)
10596 			if ((un->un_f_sync_cache_supported &&
10597 			    un->un_f_sync_cache_required) ||
10598 			    un->un_f_dvdram_writable_device == TRUE) {
10599 #else
10600 			if (un->un_f_dvdram_writable_device == TRUE) {
10601 #endif
10602 				mutex_exit(SD_MUTEX(un));
10603 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10604 					rval =
10605 					    sd_send_scsi_SYNCHRONIZE_CACHE(un,
10606 					    NULL);
10607 					/* ignore error if not supported */
10608 					if (rval == ENOTSUP) {
10609 						rval = 0;
10610 					} else if (rval != 0) {
10611 						rval = EIO;
10612 					}
10613 					sd_pm_exit(un);
10614 				} else {
10615 					rval = EIO;
10616 				}
10617 				mutex_enter(SD_MUTEX(un));
10618 			}
10619 
10620 			/*
10621 			 * For devices which supports DOOR_LOCK, send an ALLOW
10622 			 * MEDIA REMOVAL command, but don't get upset if it
10623 			 * fails. We need to raise the power of the drive before
10624 			 * we can call sd_send_scsi_DOORLOCK()
10625 			 */
10626 			if (un->un_f_doorlock_supported) {
10627 				mutex_exit(SD_MUTEX(un));
10628 				if (sd_pm_entry(un) == DDI_SUCCESS) {
10629 					sd_ssc_t	*ssc;
10630 
10631 					ssc = sd_ssc_init(un);
10632 					rval = sd_send_scsi_DOORLOCK(ssc,
10633 					    SD_REMOVAL_ALLOW, SD_PATH_DIRECT);
10634 					if (rval != 0)
10635 						sd_ssc_assessment(ssc,
10636 						    SD_FMT_IGNORE);
10637 					sd_ssc_fini(ssc);
10638 
10639 					sd_pm_exit(un);
10640 					if (ISCD(un) && (rval != 0) &&
10641 					    (nodelay != 0)) {
10642 						rval = ENXIO;
10643 					}
10644 				} else {
10645 					rval = EIO;
10646 				}
10647 				mutex_enter(SD_MUTEX(un));
10648 			}
10649 
10650 			/*
10651 			 * If a device has removable media, invalidate all
10652 			 * parameters related to media, such as geometry,
10653 			 * blocksize, and blockcount.
10654 			 */
10655 			if (un->un_f_has_removable_media) {
10656 				sr_ejected(un);
10657 			}
10658 
10659 			/*
10660 			 * Destroy the cache (if it exists) which was
10661 			 * allocated for the write maps since this is
10662 			 * the last close for this media.
10663 			 */
10664 			if (un->un_wm_cache) {
10665 				/*
10666 				 * Check if there are pending commands.
10667 				 * and if there are give a warning and
10668 				 * do not destroy the cache.
10669 				 */
10670 				if (un->un_ncmds_in_driver > 0) {
10671 					scsi_log(SD_DEVINFO(un),
10672 					    sd_label, CE_WARN,
10673 					    "Unable to clean up memory "
10674 					    "because of pending I/O\n");
10675 				} else {
10676 					kmem_cache_destroy(
10677 					    un->un_wm_cache);
10678 					un->un_wm_cache = NULL;
10679 				}
10680 			}
10681 		}
10682 	}
10683 
10684 	mutex_exit(SD_MUTEX(un));
10685 	sema_v(&un->un_semoclose);
10686 
10687 	if (otyp == OTYP_LYR) {
10688 		mutex_enter(&sd_detach_mutex);
10689 		/*
10690 		 * The detach routine may run when the layer count
10691 		 * drops to zero.
10692 		 */
10693 		un->un_layer_count--;
10694 		mutex_exit(&sd_detach_mutex);
10695 	}
10696 
10697 	return (rval);
10698 }
10699 
10700 
10701 /*
10702  *    Function: sd_ready_and_valid
10703  *
10704  * Description: Test if device is ready and has a valid geometry.
10705  *
10706  *   Arguments: ssc - sd_ssc_t will contain un
10707  *		un  - driver soft state (unit) structure
10708  *
10709  * Return Code: SD_READY_VALID		ready and valid label
10710  *		SD_NOT_READY_VALID	not ready, no label
10711  *		SD_RESERVED_BY_OTHERS	reservation conflict
10712  *
10713  *     Context: Never called at interrupt context.
10714  */
10715 
10716 static int
10717 sd_ready_and_valid(sd_ssc_t *ssc, int part)
10718 {
10719 	struct sd_errstats	*stp;
10720 	uint64_t		capacity;
10721 	uint_t			lbasize;
10722 	int			rval = SD_READY_VALID;
10723 	char			name_str[48];
10724 	boolean_t		is_valid;
10725 	struct sd_lun		*un;
10726 	int			status;
10727 
10728 	ASSERT(ssc != NULL);
10729 	un = ssc->ssc_un;
10730 	ASSERT(un != NULL);
10731 	ASSERT(!mutex_owned(SD_MUTEX(un)));
10732 
10733 	mutex_enter(SD_MUTEX(un));
10734 	/*
10735 	 * If a device has removable media, we must check if media is
10736 	 * ready when checking if this device is ready and valid.
10737 	 */
10738 	if (un->un_f_has_removable_media) {
10739 		mutex_exit(SD_MUTEX(un));
10740 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10741 
10742 		if (status != 0) {
10743 			rval = SD_NOT_READY_VALID;
10744 			mutex_enter(SD_MUTEX(un));
10745 
10746 			/* Ignore all failed status for removalbe media */
10747 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10748 
10749 			goto done;
10750 		}
10751 
10752 		is_valid = SD_IS_VALID_LABEL(un);
10753 		mutex_enter(SD_MUTEX(un));
10754 		if (!is_valid ||
10755 		    (un->un_f_blockcount_is_valid == FALSE) ||
10756 		    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
10757 
10758 			/* capacity has to be read every open. */
10759 			mutex_exit(SD_MUTEX(un));
10760 			status = sd_send_scsi_READ_CAPACITY(ssc, &capacity,
10761 			    &lbasize, SD_PATH_DIRECT);
10762 
10763 			if (status != 0) {
10764 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10765 
10766 				cmlb_invalidate(un->un_cmlbhandle,
10767 				    (void *)SD_PATH_DIRECT);
10768 				mutex_enter(SD_MUTEX(un));
10769 				rval = SD_NOT_READY_VALID;
10770 
10771 				goto done;
10772 			} else {
10773 				mutex_enter(SD_MUTEX(un));
10774 				sd_update_block_info(un, lbasize, capacity);
10775 			}
10776 		}
10777 
10778 		/*
10779 		 * Check if the media in the device is writable or not.
10780 		 */
10781 		if (!is_valid && ISCD(un)) {
10782 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
10783 		}
10784 
10785 	} else {
10786 		/*
10787 		 * Do a test unit ready to clear any unit attention from non-cd
10788 		 * devices.
10789 		 */
10790 		mutex_exit(SD_MUTEX(un));
10791 
10792 		status = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10793 		if (status != 0) {
10794 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10795 		}
10796 
10797 		mutex_enter(SD_MUTEX(un));
10798 	}
10799 
10800 
10801 	/*
10802 	 * If this is a non 512 block device, allocate space for
10803 	 * the wmap cache. This is being done here since every time
10804 	 * a media is changed this routine will be called and the
10805 	 * block size is a function of media rather than device.
10806 	 */
10807 	if (((un->un_f_rmw_type != SD_RMW_TYPE_RETURN_ERROR ||
10808 	    un->un_f_non_devbsize_supported) &&
10809 	    un->un_tgt_blocksize != DEV_BSIZE) ||
10810 	    un->un_f_enable_rmw) {
10811 		if (!(un->un_wm_cache)) {
10812 			(void) snprintf(name_str, sizeof (name_str),
10813 			    "%s%d_cache",
10814 			    ddi_driver_name(SD_DEVINFO(un)),
10815 			    ddi_get_instance(SD_DEVINFO(un)));
10816 			un->un_wm_cache = kmem_cache_create(
10817 			    name_str, sizeof (struct sd_w_map),
10818 			    8, sd_wm_cache_constructor,
10819 			    sd_wm_cache_destructor, NULL,
10820 			    (void *)un, NULL, 0);
10821 			if (!(un->un_wm_cache)) {
10822 				rval = ENOMEM;
10823 				goto done;
10824 			}
10825 		}
10826 	}
10827 
10828 	if (un->un_state == SD_STATE_NORMAL) {
10829 		/*
10830 		 * If the target is not yet ready here (defined by a TUR
10831 		 * failure), invalidate the geometry and print an 'offline'
10832 		 * message. This is a legacy message, as the state of the
10833 		 * target is not actually changed to SD_STATE_OFFLINE.
10834 		 *
10835 		 * If the TUR fails for EACCES (Reservation Conflict),
10836 		 * SD_RESERVED_BY_OTHERS will be returned to indicate
10837 		 * reservation conflict. If the TUR fails for other
10838 		 * reasons, SD_NOT_READY_VALID will be returned.
10839 		 */
10840 		int err;
10841 
10842 		mutex_exit(SD_MUTEX(un));
10843 		err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
10844 		mutex_enter(SD_MUTEX(un));
10845 
10846 		if (err != 0) {
10847 			mutex_exit(SD_MUTEX(un));
10848 			cmlb_invalidate(un->un_cmlbhandle,
10849 			    (void *)SD_PATH_DIRECT);
10850 			mutex_enter(SD_MUTEX(un));
10851 			if (err == EACCES) {
10852 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10853 				    "reservation conflict\n");
10854 				rval = SD_RESERVED_BY_OTHERS;
10855 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10856 			} else {
10857 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
10858 				    "drive offline\n");
10859 				rval = SD_NOT_READY_VALID;
10860 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
10861 			}
10862 			goto done;
10863 		}
10864 	}
10865 
10866 	if (un->un_f_format_in_progress == FALSE) {
10867 		mutex_exit(SD_MUTEX(un));
10868 
10869 		(void) cmlb_validate(un->un_cmlbhandle, 0,
10870 		    (void *)SD_PATH_DIRECT);
10871 		if (cmlb_partinfo(un->un_cmlbhandle, part, NULL, NULL, NULL,
10872 		    NULL, (void *) SD_PATH_DIRECT) != 0) {
10873 			rval = SD_NOT_READY_VALID;
10874 			mutex_enter(SD_MUTEX(un));
10875 
10876 			goto done;
10877 		}
10878 		if (un->un_f_pkstats_enabled) {
10879 			sd_set_pstats(un);
10880 			SD_TRACE(SD_LOG_IO_PARTITION, un,
10881 			    "sd_ready_and_valid: un:0x%p pstats created and "
10882 			    "set\n", un);
10883 		}
10884 		mutex_enter(SD_MUTEX(un));
10885 	}
10886 
10887 	/*
10888 	 * If this device supports DOOR_LOCK command, try and send
10889 	 * this command to PREVENT MEDIA REMOVAL, but don't get upset
10890 	 * if it fails. For a CD, however, it is an error
10891 	 */
10892 	if (un->un_f_doorlock_supported) {
10893 		mutex_exit(SD_MUTEX(un));
10894 		status = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
10895 		    SD_PATH_DIRECT);
10896 
10897 		if ((status != 0) && ISCD(un)) {
10898 			rval = SD_NOT_READY_VALID;
10899 			mutex_enter(SD_MUTEX(un));
10900 
10901 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10902 
10903 			goto done;
10904 		} else if (status != 0)
10905 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
10906 		mutex_enter(SD_MUTEX(un));
10907 	}
10908 
10909 	/* The state has changed, inform the media watch routines */
10910 	un->un_mediastate = DKIO_INSERTED;
10911 	cv_broadcast(&un->un_state_cv);
10912 	rval = SD_READY_VALID;
10913 
10914 done:
10915 
10916 	/*
10917 	 * Initialize the capacity kstat value, if no media previously
10918 	 * (capacity kstat is 0) and a media has been inserted
10919 	 * (un_blockcount > 0).
10920 	 */
10921 	if (un->un_errstats != NULL) {
10922 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
10923 		if ((stp->sd_capacity.value.ui64 == 0) &&
10924 		    (un->un_f_blockcount_is_valid == TRUE)) {
10925 			stp->sd_capacity.value.ui64 =
10926 			    (uint64_t)((uint64_t)un->un_blockcount *
10927 			    un->un_sys_blocksize);
10928 		}
10929 	}
10930 
10931 	mutex_exit(SD_MUTEX(un));
10932 	return (rval);
10933 }
10934 
10935 
10936 /*
10937  *    Function: sdmin
10938  *
10939  * Description: Routine to limit the size of a data transfer. Used in
10940  *		conjunction with physio(9F).
10941  *
10942  *   Arguments: bp - pointer to the indicated buf(9S) struct.
10943  *
10944  *     Context: Kernel thread context.
10945  */
10946 
10947 static void
10948 sdmin(struct buf *bp)
10949 {
10950 	struct sd_lun	*un;
10951 	int		instance;
10952 
10953 	instance = SDUNIT(bp->b_edev);
10954 
10955 	un = ddi_get_soft_state(sd_state, instance);
10956 	ASSERT(un != NULL);
10957 
10958 	/*
10959 	 * We depend on buf breakup to restrict
10960 	 * IO size if it is enabled.
10961 	 */
10962 	if (un->un_buf_breakup_supported) {
10963 		return;
10964 	}
10965 
10966 	if (bp->b_bcount > un->un_max_xfer_size) {
10967 		bp->b_bcount = un->un_max_xfer_size;
10968 	}
10969 }
10970 
10971 
10972 /*
10973  *    Function: sdread
10974  *
10975  * Description: Driver's read(9e) entry point function.
10976  *
10977  *   Arguments: dev   - device number
10978  *		uio   - structure pointer describing where data is to be stored
10979  *			in user's space
10980  *		cred_p  - user credential pointer
10981  *
10982  * Return Code: ENXIO
10983  *		EIO
10984  *		EINVAL
10985  *		value returned by physio
10986  *
10987  *     Context: Kernel thread context.
10988  */
10989 /* ARGSUSED */
10990 static int
10991 sdread(dev_t dev, struct uio *uio, cred_t *cred_p)
10992 {
10993 	struct sd_lun	*un = NULL;
10994 	int		secmask;
10995 	int		err = 0;
10996 	sd_ssc_t	*ssc;
10997 
10998 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
10999 		return (ENXIO);
11000 	}
11001 
11002 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11003 
11004 
11005 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11006 		mutex_enter(SD_MUTEX(un));
11007 		/*
11008 		 * Because the call to sd_ready_and_valid will issue I/O we
11009 		 * must wait here if either the device is suspended or
11010 		 * if it's power level is changing.
11011 		 */
11012 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11013 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11014 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11015 		}
11016 		un->un_ncmds_in_driver++;
11017 		mutex_exit(SD_MUTEX(un));
11018 
11019 		/* Initialize sd_ssc_t for internal uscsi commands */
11020 		ssc = sd_ssc_init(un);
11021 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11022 			err = EIO;
11023 		} else {
11024 			err = 0;
11025 		}
11026 		sd_ssc_fini(ssc);
11027 
11028 		mutex_enter(SD_MUTEX(un));
11029 		un->un_ncmds_in_driver--;
11030 		ASSERT(un->un_ncmds_in_driver >= 0);
11031 		mutex_exit(SD_MUTEX(un));
11032 		if (err != 0)
11033 			return (err);
11034 	}
11035 
11036 	/*
11037 	 * Read requests are restricted to multiples of the system block size.
11038 	 */
11039 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11040 	    !un->un_f_enable_rmw)
11041 		secmask = un->un_tgt_blocksize - 1;
11042 	else
11043 		secmask = DEV_BSIZE - 1;
11044 
11045 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11046 		SD_ERROR(SD_LOG_READ_WRITE, un,
11047 		    "sdread: file offset not modulo %d\n",
11048 		    secmask + 1);
11049 		err = EINVAL;
11050 	} else if (uio->uio_iov->iov_len & (secmask)) {
11051 		SD_ERROR(SD_LOG_READ_WRITE, un,
11052 		    "sdread: transfer length not modulo %d\n",
11053 		    secmask + 1);
11054 		err = EINVAL;
11055 	} else {
11056 		err = physio(sdstrategy, NULL, dev, B_READ, sdmin, uio);
11057 	}
11058 
11059 	return (err);
11060 }
11061 
11062 
11063 /*
11064  *    Function: sdwrite
11065  *
11066  * Description: Driver's write(9e) entry point function.
11067  *
11068  *   Arguments: dev   - device number
11069  *		uio   - structure pointer describing where data is stored in
11070  *			user's space
11071  *		cred_p  - user credential pointer
11072  *
11073  * Return Code: ENXIO
11074  *		EIO
11075  *		EINVAL
11076  *		value returned by physio
11077  *
11078  *     Context: Kernel thread context.
11079  */
11080 /* ARGSUSED */
11081 static int
11082 sdwrite(dev_t dev, struct uio *uio, cred_t *cred_p)
11083 {
11084 	struct sd_lun	*un = NULL;
11085 	int		secmask;
11086 	int		err = 0;
11087 	sd_ssc_t	*ssc;
11088 
11089 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11090 		return (ENXIO);
11091 	}
11092 
11093 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11094 
11095 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11096 		mutex_enter(SD_MUTEX(un));
11097 		/*
11098 		 * Because the call to sd_ready_and_valid will issue I/O we
11099 		 * must wait here if either the device is suspended or
11100 		 * if it's power level is changing.
11101 		 */
11102 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11103 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11104 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11105 		}
11106 		un->un_ncmds_in_driver++;
11107 		mutex_exit(SD_MUTEX(un));
11108 
11109 		/* Initialize sd_ssc_t for internal uscsi commands */
11110 		ssc = sd_ssc_init(un);
11111 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11112 			err = EIO;
11113 		} else {
11114 			err = 0;
11115 		}
11116 		sd_ssc_fini(ssc);
11117 
11118 		mutex_enter(SD_MUTEX(un));
11119 		un->un_ncmds_in_driver--;
11120 		ASSERT(un->un_ncmds_in_driver >= 0);
11121 		mutex_exit(SD_MUTEX(un));
11122 		if (err != 0)
11123 			return (err);
11124 	}
11125 
11126 	/*
11127 	 * Write requests are restricted to multiples of the system block size.
11128 	 */
11129 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11130 	    !un->un_f_enable_rmw)
11131 		secmask = un->un_tgt_blocksize - 1;
11132 	else
11133 		secmask = DEV_BSIZE - 1;
11134 
11135 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11136 		SD_ERROR(SD_LOG_READ_WRITE, un,
11137 		    "sdwrite: file offset not modulo %d\n",
11138 		    secmask + 1);
11139 		err = EINVAL;
11140 	} else if (uio->uio_iov->iov_len & (secmask)) {
11141 		SD_ERROR(SD_LOG_READ_WRITE, un,
11142 		    "sdwrite: transfer length not modulo %d\n",
11143 		    secmask + 1);
11144 		err = EINVAL;
11145 	} else {
11146 		err = physio(sdstrategy, NULL, dev, B_WRITE, sdmin, uio);
11147 	}
11148 
11149 	return (err);
11150 }
11151 
11152 
11153 /*
11154  *    Function: sdaread
11155  *
11156  * Description: Driver's aread(9e) entry point function.
11157  *
11158  *   Arguments: dev   - device number
11159  *		aio   - structure pointer describing where data is to be stored
11160  *		cred_p  - user credential pointer
11161  *
11162  * Return Code: ENXIO
11163  *		EIO
11164  *		EINVAL
11165  *		value returned by aphysio
11166  *
11167  *     Context: Kernel thread context.
11168  */
11169 /* ARGSUSED */
11170 static int
11171 sdaread(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11172 {
11173 	struct sd_lun	*un = NULL;
11174 	struct uio	*uio = aio->aio_uio;
11175 	int		secmask;
11176 	int		err = 0;
11177 	sd_ssc_t	*ssc;
11178 
11179 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11180 		return (ENXIO);
11181 	}
11182 
11183 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11184 
11185 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11186 		mutex_enter(SD_MUTEX(un));
11187 		/*
11188 		 * Because the call to sd_ready_and_valid will issue I/O we
11189 		 * must wait here if either the device is suspended or
11190 		 * if it's power level is changing.
11191 		 */
11192 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11193 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11194 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11195 		}
11196 		un->un_ncmds_in_driver++;
11197 		mutex_exit(SD_MUTEX(un));
11198 
11199 		/* Initialize sd_ssc_t for internal uscsi commands */
11200 		ssc = sd_ssc_init(un);
11201 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11202 			err = EIO;
11203 		} else {
11204 			err = 0;
11205 		}
11206 		sd_ssc_fini(ssc);
11207 
11208 		mutex_enter(SD_MUTEX(un));
11209 		un->un_ncmds_in_driver--;
11210 		ASSERT(un->un_ncmds_in_driver >= 0);
11211 		mutex_exit(SD_MUTEX(un));
11212 		if (err != 0)
11213 			return (err);
11214 	}
11215 
11216 	/*
11217 	 * Read requests are restricted to multiples of the system block size.
11218 	 */
11219 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11220 	    !un->un_f_enable_rmw)
11221 		secmask = un->un_tgt_blocksize - 1;
11222 	else
11223 		secmask = DEV_BSIZE - 1;
11224 
11225 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11226 		SD_ERROR(SD_LOG_READ_WRITE, un,
11227 		    "sdaread: file offset not modulo %d\n",
11228 		    secmask + 1);
11229 		err = EINVAL;
11230 	} else if (uio->uio_iov->iov_len & (secmask)) {
11231 		SD_ERROR(SD_LOG_READ_WRITE, un,
11232 		    "sdaread: transfer length not modulo %d\n",
11233 		    secmask + 1);
11234 		err = EINVAL;
11235 	} else {
11236 		err = aphysio(sdstrategy, anocancel, dev, B_READ, sdmin, aio);
11237 	}
11238 
11239 	return (err);
11240 }
11241 
11242 
11243 /*
11244  *    Function: sdawrite
11245  *
11246  * Description: Driver's awrite(9e) entry point function.
11247  *
11248  *   Arguments: dev   - device number
11249  *		aio   - structure pointer describing where data is stored
11250  *		cred_p  - user credential pointer
11251  *
11252  * Return Code: ENXIO
11253  *		EIO
11254  *		EINVAL
11255  *		value returned by aphysio
11256  *
11257  *     Context: Kernel thread context.
11258  */
11259 /* ARGSUSED */
11260 static int
11261 sdawrite(dev_t dev, struct aio_req *aio, cred_t *cred_p)
11262 {
11263 	struct sd_lun	*un = NULL;
11264 	struct uio	*uio = aio->aio_uio;
11265 	int		secmask;
11266 	int		err = 0;
11267 	sd_ssc_t	*ssc;
11268 
11269 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
11270 		return (ENXIO);
11271 	}
11272 
11273 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11274 
11275 	if (!SD_IS_VALID_LABEL(un) && !ISCD(un)) {
11276 		mutex_enter(SD_MUTEX(un));
11277 		/*
11278 		 * Because the call to sd_ready_and_valid will issue I/O we
11279 		 * must wait here if either the device is suspended or
11280 		 * if it's power level is changing.
11281 		 */
11282 		while ((un->un_state == SD_STATE_SUSPENDED) ||
11283 		    (un->un_state == SD_STATE_PM_CHANGING)) {
11284 			cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11285 		}
11286 		un->un_ncmds_in_driver++;
11287 		mutex_exit(SD_MUTEX(un));
11288 
11289 		/* Initialize sd_ssc_t for internal uscsi commands */
11290 		ssc = sd_ssc_init(un);
11291 		if ((sd_ready_and_valid(ssc, SDPART(dev))) != SD_READY_VALID) {
11292 			err = EIO;
11293 		} else {
11294 			err = 0;
11295 		}
11296 		sd_ssc_fini(ssc);
11297 
11298 		mutex_enter(SD_MUTEX(un));
11299 		un->un_ncmds_in_driver--;
11300 		ASSERT(un->un_ncmds_in_driver >= 0);
11301 		mutex_exit(SD_MUTEX(un));
11302 		if (err != 0)
11303 			return (err);
11304 	}
11305 
11306 	/*
11307 	 * Write requests are restricted to multiples of the system block size.
11308 	 */
11309 	if (un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR &&
11310 	    !un->un_f_enable_rmw)
11311 		secmask = un->un_tgt_blocksize - 1;
11312 	else
11313 		secmask = DEV_BSIZE - 1;
11314 
11315 	if (uio->uio_loffset & ((offset_t)(secmask))) {
11316 		SD_ERROR(SD_LOG_READ_WRITE, un,
11317 		    "sdawrite: file offset not modulo %d\n",
11318 		    secmask + 1);
11319 		err = EINVAL;
11320 	} else if (uio->uio_iov->iov_len & (secmask)) {
11321 		SD_ERROR(SD_LOG_READ_WRITE, un,
11322 		    "sdawrite: transfer length not modulo %d\n",
11323 		    secmask + 1);
11324 		err = EINVAL;
11325 	} else {
11326 		err = aphysio(sdstrategy, anocancel, dev, B_WRITE, sdmin, aio);
11327 	}
11328 
11329 	return (err);
11330 }
11331 
11332 
11333 
11334 
11335 
11336 /*
11337  * Driver IO processing follows the following sequence:
11338  *
11339  *     sdioctl(9E)     sdstrategy(9E)         biodone(9F)
11340  *         |                |                     ^
11341  *         v                v                     |
11342  * sd_send_scsi_cmd()  ddi_xbuf_qstrategy()       +-------------------+
11343  *         |                |                     |                   |
11344  *         v                |                     |                   |
11345  * sd_uscsi_strategy() sd_xbuf_strategy()   sd_buf_iodone()   sd_uscsi_iodone()
11346  *         |                |                     ^                   ^
11347  *         v                v                     |                   |
11348  * SD_BEGIN_IOSTART()  SD_BEGIN_IOSTART()         |                   |
11349  *         |                |                     |                   |
11350  *     +---+                |                     +------------+      +-------+
11351  *     |                    |                                  |              |
11352  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11353  *     |                    v                                  |              |
11354  *     |         sd_mapblockaddr_iostart()           sd_mapblockaddr_iodone() |
11355  *     |                    |                                  ^              |
11356  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11357  *     |                    v                                  |              |
11358  *     |         sd_mapblocksize_iostart()           sd_mapblocksize_iodone() |
11359  *     |                    |                                  ^              |
11360  *     |   SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()|              |
11361  *     |                    v                                  |              |
11362  *     |           sd_checksum_iostart()               sd_checksum_iodone()   |
11363  *     |                    |                                  ^              |
11364  *     +-> SD_NEXT_IOSTART()|                  SD_NEXT_IODONE()+------------->+
11365  *     |                    v                                  |              |
11366  *     |              sd_pm_iostart()                     sd_pm_iodone()      |
11367  *     |                    |                                  ^              |
11368  *     |                    |                                  |              |
11369  *     +-> SD_NEXT_IOSTART()|               SD_BEGIN_IODONE()--+--------------+
11370  *                          |                           ^
11371  *                          v                           |
11372  *                   sd_core_iostart()                  |
11373  *                          |                           |
11374  *                          |                           +------>(*destroypkt)()
11375  *                          +-> sd_start_cmds() <-+     |           |
11376  *                          |                     |     |           v
11377  *                          |                     |     |  scsi_destroy_pkt(9F)
11378  *                          |                     |     |
11379  *                          +->(*initpkt)()       +- sdintr()
11380  *                          |  |                        |  |
11381  *                          |  +-> scsi_init_pkt(9F)    |  +-> sd_handle_xxx()
11382  *                          |  +-> scsi_setup_cdb(9F)   |
11383  *                          |                           |
11384  *                          +--> scsi_transport(9F)     |
11385  *                                     |                |
11386  *                                     +----> SCSA ---->+
11387  *
11388  *
11389  * This code is based upon the following presumptions:
11390  *
11391  *   - iostart and iodone functions operate on buf(9S) structures. These
11392  *     functions perform the necessary operations on the buf(9S) and pass
11393  *     them along to the next function in the chain by using the macros
11394  *     SD_NEXT_IOSTART() (for iostart side functions) and SD_NEXT_IODONE()
11395  *     (for iodone side functions).
11396  *
11397  *   - The iostart side functions may sleep. The iodone side functions
11398  *     are called under interrupt context and may NOT sleep. Therefore
11399  *     iodone side functions also may not call iostart side functions.
11400  *     (NOTE: iostart side functions should NOT sleep for memory, as
11401  *     this could result in deadlock.)
11402  *
11403  *   - An iostart side function may call its corresponding iodone side
11404  *     function directly (if necessary).
11405  *
11406  *   - In the event of an error, an iostart side function can return a buf(9S)
11407  *     to its caller by calling SD_BEGIN_IODONE() (after setting B_ERROR and
11408  *     b_error in the usual way of course).
11409  *
11410  *   - The taskq mechanism may be used by the iodone side functions to dispatch
11411  *     requests to the iostart side functions.  The iostart side functions in
11412  *     this case would be called under the context of a taskq thread, so it's
11413  *     OK for them to block/sleep/spin in this case.
11414  *
11415  *   - iostart side functions may allocate "shadow" buf(9S) structs and
11416  *     pass them along to the next function in the chain.  The corresponding
11417  *     iodone side functions must coalesce the "shadow" bufs and return
11418  *     the "original" buf to the next higher layer.
11419  *
11420  *   - The b_private field of the buf(9S) struct holds a pointer to
11421  *     an sd_xbuf struct, which contains information needed to
11422  *     construct the scsi_pkt for the command.
11423  *
11424  *   - The SD_MUTEX(un) is NOT held across calls to the next layer. Each
11425  *     layer must acquire & release the SD_MUTEX(un) as needed.
11426  */
11427 
11428 
11429 /*
11430  * Create taskq for all targets in the system. This is created at
11431  * _init(9E) and destroyed at _fini(9E).
11432  *
11433  * Note: here we set the minalloc to a reasonably high number to ensure that
11434  * we will have an adequate supply of task entries available at interrupt time.
11435  * This is used in conjunction with the TASKQ_PREPOPULATE flag in
11436  * sd_create_taskq().  Since we do not want to sleep for allocations at
11437  * interrupt time, set maxalloc equal to minalloc. That way we will just fail
11438  * the command if we ever try to dispatch more than SD_TASKQ_MAXALLOC taskq
11439  * requests any one instant in time.
11440  */
11441 #define	SD_TASKQ_NUMTHREADS	8
11442 #define	SD_TASKQ_MINALLOC	256
11443 #define	SD_TASKQ_MAXALLOC	256
11444 
11445 static taskq_t	*sd_tq = NULL;
11446 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_tq))
11447 
11448 static int	sd_taskq_minalloc = SD_TASKQ_MINALLOC;
11449 static int	sd_taskq_maxalloc = SD_TASKQ_MAXALLOC;
11450 
11451 /*
11452  * The following task queue is being created for the write part of
11453  * read-modify-write of non-512 block size devices.
11454  * Limit the number of threads to 1 for now. This number has been chosen
11455  * considering the fact that it applies only to dvd ram drives/MO drives
11456  * currently. Performance for which is not main criteria at this stage.
11457  * Note: It needs to be explored if we can use a single taskq in future
11458  */
11459 #define	SD_WMR_TASKQ_NUMTHREADS	1
11460 static taskq_t	*sd_wmr_tq = NULL;
11461 _NOTE(SCHEME_PROTECTS_DATA("stable data", sd_wmr_tq))
11462 
11463 /*
11464  *    Function: sd_taskq_create
11465  *
11466  * Description: Create taskq thread(s) and preallocate task entries
11467  *
11468  * Return Code: Returns a pointer to the allocated taskq_t.
11469  *
11470  *     Context: Can sleep. Requires blockable context.
11471  *
11472  *       Notes: - The taskq() facility currently is NOT part of the DDI.
11473  *		  (definitely NOT recommeded for 3rd-party drivers!) :-)
11474  *		- taskq_create() will block for memory, also it will panic
11475  *		  if it cannot create the requested number of threads.
11476  *		- Currently taskq_create() creates threads that cannot be
11477  *		  swapped.
11478  *		- We use TASKQ_PREPOPULATE to ensure we have an adequate
11479  *		  supply of taskq entries at interrupt time (ie, so that we
11480  *		  do not have to sleep for memory)
11481  */
11482 
11483 static void
11484 sd_taskq_create(void)
11485 {
11486 	char	taskq_name[TASKQ_NAMELEN];
11487 
11488 	ASSERT(sd_tq == NULL);
11489 	ASSERT(sd_wmr_tq == NULL);
11490 
11491 	(void) snprintf(taskq_name, sizeof (taskq_name),
11492 	    "%s_drv_taskq", sd_label);
11493 	sd_tq = (taskq_create(taskq_name, SD_TASKQ_NUMTHREADS,
11494 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11495 	    TASKQ_PREPOPULATE));
11496 
11497 	(void) snprintf(taskq_name, sizeof (taskq_name),
11498 	    "%s_rmw_taskq", sd_label);
11499 	sd_wmr_tq = (taskq_create(taskq_name, SD_WMR_TASKQ_NUMTHREADS,
11500 	    (v.v_maxsyspri - 2), sd_taskq_minalloc, sd_taskq_maxalloc,
11501 	    TASKQ_PREPOPULATE));
11502 }
11503 
11504 
11505 /*
11506  *    Function: sd_taskq_delete
11507  *
11508  * Description: Complementary cleanup routine for sd_taskq_create().
11509  *
11510  *     Context: Kernel thread context.
11511  */
11512 
11513 static void
11514 sd_taskq_delete(void)
11515 {
11516 	ASSERT(sd_tq != NULL);
11517 	ASSERT(sd_wmr_tq != NULL);
11518 	taskq_destroy(sd_tq);
11519 	taskq_destroy(sd_wmr_tq);
11520 	sd_tq = NULL;
11521 	sd_wmr_tq = NULL;
11522 }
11523 
11524 
11525 /*
11526  *    Function: sdstrategy
11527  *
11528  * Description: Driver's strategy (9E) entry point function.
11529  *
11530  *   Arguments: bp - pointer to buf(9S)
11531  *
11532  * Return Code: Always returns zero
11533  *
11534  *     Context: Kernel thread context.
11535  */
11536 
11537 static int
11538 sdstrategy(struct buf *bp)
11539 {
11540 	struct sd_lun *un;
11541 
11542 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11543 	if (un == NULL) {
11544 		bioerror(bp, EIO);
11545 		bp->b_resid = bp->b_bcount;
11546 		biodone(bp);
11547 		return (0);
11548 	}
11549 
11550 	/* As was done in the past, fail new cmds. if state is dumping. */
11551 	if (un->un_state == SD_STATE_DUMPING) {
11552 		bioerror(bp, ENXIO);
11553 		bp->b_resid = bp->b_bcount;
11554 		biodone(bp);
11555 		return (0);
11556 	}
11557 
11558 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11559 
11560 	/*
11561 	 * Commands may sneak in while we released the mutex in
11562 	 * DDI_SUSPEND, we should block new commands. However, old
11563 	 * commands that are still in the driver at this point should
11564 	 * still be allowed to drain.
11565 	 */
11566 	mutex_enter(SD_MUTEX(un));
11567 	/*
11568 	 * Must wait here if either the device is suspended or
11569 	 * if it's power level is changing.
11570 	 */
11571 	while ((un->un_state == SD_STATE_SUSPENDED) ||
11572 	    (un->un_state == SD_STATE_PM_CHANGING)) {
11573 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
11574 	}
11575 
11576 	un->un_ncmds_in_driver++;
11577 
11578 	/*
11579 	 * atapi: Since we are running the CD for now in PIO mode we need to
11580 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11581 	 * the HBA's init_pkt routine.
11582 	 */
11583 	if (un->un_f_cfg_is_atapi == TRUE) {
11584 		mutex_exit(SD_MUTEX(un));
11585 		bp_mapin(bp);
11586 		mutex_enter(SD_MUTEX(un));
11587 	}
11588 	SD_INFO(SD_LOG_IO, un, "sdstrategy: un_ncmds_in_driver = %ld\n",
11589 	    un->un_ncmds_in_driver);
11590 
11591 	if (bp->b_flags & B_WRITE)
11592 		un->un_f_sync_cache_required = TRUE;
11593 
11594 	mutex_exit(SD_MUTEX(un));
11595 
11596 	/*
11597 	 * This will (eventually) allocate the sd_xbuf area and
11598 	 * call sd_xbuf_strategy().  We just want to return the
11599 	 * result of ddi_xbuf_qstrategy so that we have an opt-
11600 	 * imized tail call which saves us a stack frame.
11601 	 */
11602 	return (ddi_xbuf_qstrategy(bp, un->un_xbuf_attr));
11603 }
11604 
11605 
11606 /*
11607  *    Function: sd_xbuf_strategy
11608  *
11609  * Description: Function for initiating IO operations via the
11610  *		ddi_xbuf_qstrategy() mechanism.
11611  *
11612  *     Context: Kernel thread context.
11613  */
11614 
11615 static void
11616 sd_xbuf_strategy(struct buf *bp, ddi_xbuf_t xp, void *arg)
11617 {
11618 	struct sd_lun *un = arg;
11619 
11620 	ASSERT(bp != NULL);
11621 	ASSERT(xp != NULL);
11622 	ASSERT(un != NULL);
11623 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11624 
11625 	/*
11626 	 * Initialize the fields in the xbuf and save a pointer to the
11627 	 * xbuf in bp->b_private.
11628 	 */
11629 	sd_xbuf_init(un, bp, xp, SD_CHAIN_BUFIO, NULL);
11630 
11631 	/* Send the buf down the iostart chain */
11632 	SD_BEGIN_IOSTART(((struct sd_xbuf *)xp)->xb_chain_iostart, un, bp);
11633 }
11634 
11635 
11636 /*
11637  *    Function: sd_xbuf_init
11638  *
11639  * Description: Prepare the given sd_xbuf struct for use.
11640  *
11641  *   Arguments: un - ptr to softstate
11642  *		bp - ptr to associated buf(9S)
11643  *		xp - ptr to associated sd_xbuf
11644  *		chain_type - IO chain type to use:
11645  *			SD_CHAIN_NULL
11646  *			SD_CHAIN_BUFIO
11647  *			SD_CHAIN_USCSI
11648  *			SD_CHAIN_DIRECT
11649  *			SD_CHAIN_DIRECT_PRIORITY
11650  *		pktinfop - ptr to private data struct for scsi_pkt(9S)
11651  *			initialization; may be NULL if none.
11652  *
11653  *     Context: Kernel thread context
11654  */
11655 
11656 static void
11657 sd_xbuf_init(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
11658     uchar_t chain_type, void *pktinfop)
11659 {
11660 	int index;
11661 
11662 	ASSERT(un != NULL);
11663 	ASSERT(bp != NULL);
11664 	ASSERT(xp != NULL);
11665 
11666 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: buf:0x%p chain type:0x%x\n",
11667 	    bp, chain_type);
11668 
11669 	xp->xb_un	= un;
11670 	xp->xb_pktp	= NULL;
11671 	xp->xb_pktinfo	= pktinfop;
11672 	xp->xb_private	= bp->b_private;
11673 	xp->xb_blkno	= (daddr_t)bp->b_blkno;
11674 
11675 	/*
11676 	 * Set up the iostart and iodone chain indexes in the xbuf, based
11677 	 * upon the specified chain type to use.
11678 	 */
11679 	switch (chain_type) {
11680 	case SD_CHAIN_NULL:
11681 		/*
11682 		 * Fall thru to just use the values for the buf type, even
11683 		 * tho for the NULL chain these values will never be used.
11684 		 */
11685 		/* FALLTHRU */
11686 	case SD_CHAIN_BUFIO:
11687 		index = un->un_buf_chain_type;
11688 		if ((!un->un_f_has_removable_media) &&
11689 		    (un->un_tgt_blocksize != 0) &&
11690 		    (un->un_tgt_blocksize != DEV_BSIZE ||
11691 		    un->un_f_enable_rmw)) {
11692 			int secmask = 0, blknomask = 0;
11693 			if (un->un_f_enable_rmw) {
11694 				blknomask =
11695 				    (un->un_phy_blocksize / DEV_BSIZE) - 1;
11696 				secmask = un->un_phy_blocksize - 1;
11697 			} else {
11698 				blknomask =
11699 				    (un->un_tgt_blocksize / DEV_BSIZE) - 1;
11700 				secmask = un->un_tgt_blocksize - 1;
11701 			}
11702 
11703 			if ((bp->b_lblkno & (blknomask)) ||
11704 			    (bp->b_bcount & (secmask))) {
11705 				if ((un->un_f_rmw_type !=
11706 				    SD_RMW_TYPE_RETURN_ERROR) ||
11707 				    un->un_f_enable_rmw) {
11708 					if (un->un_f_pm_is_enabled == FALSE)
11709 						index =
11710 						    SD_CHAIN_INFO_MSS_DSK_NO_PM;
11711 					else
11712 						index =
11713 						    SD_CHAIN_INFO_MSS_DISK;
11714 				}
11715 			}
11716 		}
11717 		break;
11718 	case SD_CHAIN_USCSI:
11719 		index = un->un_uscsi_chain_type;
11720 		break;
11721 	case SD_CHAIN_DIRECT:
11722 		index = un->un_direct_chain_type;
11723 		break;
11724 	case SD_CHAIN_DIRECT_PRIORITY:
11725 		index = un->un_priority_chain_type;
11726 		break;
11727 	default:
11728 		/* We're really broken if we ever get here... */
11729 		panic("sd_xbuf_init: illegal chain type!");
11730 		/*NOTREACHED*/
11731 	}
11732 
11733 	xp->xb_chain_iostart = sd_chain_index_map[index].sci_iostart_index;
11734 	xp->xb_chain_iodone = sd_chain_index_map[index].sci_iodone_index;
11735 
11736 	/*
11737 	 * It might be a bit easier to simply bzero the entire xbuf above,
11738 	 * but it turns out that since we init a fair number of members anyway,
11739 	 * we save a fair number cycles by doing explicit assignment of zero.
11740 	 */
11741 	xp->xb_pkt_flags	= 0;
11742 	xp->xb_dma_resid	= 0;
11743 	xp->xb_retry_count	= 0;
11744 	xp->xb_victim_retry_count = 0;
11745 	xp->xb_ua_retry_count	= 0;
11746 	xp->xb_nr_retry_count	= 0;
11747 	xp->xb_sense_bp		= NULL;
11748 	xp->xb_sense_status	= 0;
11749 	xp->xb_sense_state	= 0;
11750 	xp->xb_sense_resid	= 0;
11751 	xp->xb_ena		= 0;
11752 
11753 	bp->b_private	= xp;
11754 	bp->b_flags	&= ~(B_DONE | B_ERROR);
11755 	bp->b_resid	= 0;
11756 	bp->av_forw	= NULL;
11757 	bp->av_back	= NULL;
11758 	bioerror(bp, 0);
11759 
11760 	SD_INFO(SD_LOG_IO, un, "sd_xbuf_init: done.\n");
11761 }
11762 
11763 
11764 /*
11765  *    Function: sd_uscsi_strategy
11766  *
11767  * Description: Wrapper for calling into the USCSI chain via physio(9F)
11768  *
11769  *   Arguments: bp - buf struct ptr
11770  *
11771  * Return Code: Always returns 0
11772  *
11773  *     Context: Kernel thread context
11774  */
11775 
11776 static int
11777 sd_uscsi_strategy(struct buf *bp)
11778 {
11779 	struct sd_lun		*un;
11780 	struct sd_uscsi_info	*uip;
11781 	struct sd_xbuf		*xp;
11782 	uchar_t			chain_type;
11783 	uchar_t			cmd;
11784 
11785 	ASSERT(bp != NULL);
11786 
11787 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
11788 	if (un == NULL) {
11789 		bioerror(bp, EIO);
11790 		bp->b_resid = bp->b_bcount;
11791 		biodone(bp);
11792 		return (0);
11793 	}
11794 
11795 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11796 
11797 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: entry: buf:0x%p\n", bp);
11798 
11799 	/*
11800 	 * A pointer to a struct sd_uscsi_info is expected in bp->b_private
11801 	 */
11802 	ASSERT(bp->b_private != NULL);
11803 	uip = (struct sd_uscsi_info *)bp->b_private;
11804 	cmd = ((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_cdb[0];
11805 
11806 	mutex_enter(SD_MUTEX(un));
11807 	/*
11808 	 * atapi: Since we are running the CD for now in PIO mode we need to
11809 	 * call bp_mapin here to avoid bp_mapin called interrupt context under
11810 	 * the HBA's init_pkt routine.
11811 	 */
11812 	if (un->un_f_cfg_is_atapi == TRUE) {
11813 		mutex_exit(SD_MUTEX(un));
11814 		bp_mapin(bp);
11815 		mutex_enter(SD_MUTEX(un));
11816 	}
11817 	un->un_ncmds_in_driver++;
11818 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_strategy: un_ncmds_in_driver = %ld\n",
11819 	    un->un_ncmds_in_driver);
11820 
11821 	if ((bp->b_flags & B_WRITE) && (bp->b_bcount != 0) &&
11822 	    (cmd != SCMD_MODE_SELECT) && (cmd != SCMD_MODE_SELECT_G1))
11823 		un->un_f_sync_cache_required = TRUE;
11824 
11825 	mutex_exit(SD_MUTEX(un));
11826 
11827 	switch (uip->ui_flags) {
11828 	case SD_PATH_DIRECT:
11829 		chain_type = SD_CHAIN_DIRECT;
11830 		break;
11831 	case SD_PATH_DIRECT_PRIORITY:
11832 		chain_type = SD_CHAIN_DIRECT_PRIORITY;
11833 		break;
11834 	default:
11835 		chain_type = SD_CHAIN_USCSI;
11836 		break;
11837 	}
11838 
11839 	/*
11840 	 * We may allocate extra buf for external USCSI commands. If the
11841 	 * application asks for bigger than 20-byte sense data via USCSI,
11842 	 * SCSA layer will allocate 252 bytes sense buf for that command.
11843 	 */
11844 	if (((struct uscsi_cmd *)(uip->ui_cmdp))->uscsi_rqlen >
11845 	    SENSE_LENGTH) {
11846 		xp = kmem_zalloc(sizeof (struct sd_xbuf) - SENSE_LENGTH +
11847 		    MAX_SENSE_LENGTH, KM_SLEEP);
11848 	} else {
11849 		xp = kmem_zalloc(sizeof (struct sd_xbuf), KM_SLEEP);
11850 	}
11851 
11852 	sd_xbuf_init(un, bp, xp, chain_type, uip->ui_cmdp);
11853 
11854 	/* Use the index obtained within xbuf_init */
11855 	SD_BEGIN_IOSTART(xp->xb_chain_iostart, un, bp);
11856 
11857 	SD_TRACE(SD_LOG_IO, un, "sd_uscsi_strategy: exit: buf:0x%p\n", bp);
11858 
11859 	return (0);
11860 }
11861 
11862 /*
11863  *    Function: sd_send_scsi_cmd
11864  *
11865  * Description: Runs a USCSI command for user (when called thru sdioctl),
11866  *		or for the driver
11867  *
11868  *   Arguments: dev - the dev_t for the device
11869  *		incmd - ptr to a valid uscsi_cmd struct
11870  *		flag - bit flag, indicating open settings, 32/64 bit type
11871  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
11872  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
11873  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
11874  *			to use the USCSI "direct" chain and bypass the normal
11875  *			command waitq.
11876  *
11877  * Return Code: 0 -  successful completion of the given command
11878  *		EIO - scsi_uscsi_handle_command() failed
11879  *		ENXIO  - soft state not found for specified dev
11880  *		EINVAL
11881  *		EFAULT - copyin/copyout error
11882  *		return code of scsi_uscsi_handle_command():
11883  *			EIO
11884  *			ENXIO
11885  *			EACCES
11886  *
11887  *     Context: Waits for command to complete. Can sleep.
11888  */
11889 
11890 static int
11891 sd_send_scsi_cmd(dev_t dev, struct uscsi_cmd *incmd, int flag,
11892     enum uio_seg dataspace, int path_flag)
11893 {
11894 	struct sd_lun	*un;
11895 	sd_ssc_t	*ssc;
11896 	int		rval;
11897 
11898 	un = ddi_get_soft_state(sd_state, SDUNIT(dev));
11899 	if (un == NULL) {
11900 		return (ENXIO);
11901 	}
11902 
11903 	/*
11904 	 * Using sd_ssc_send to handle uscsi cmd
11905 	 */
11906 	ssc = sd_ssc_init(un);
11907 	rval = sd_ssc_send(ssc, incmd, flag, dataspace, path_flag);
11908 	sd_ssc_fini(ssc);
11909 
11910 	return (rval);
11911 }
11912 
11913 /*
11914  *    Function: sd_ssc_init
11915  *
11916  * Description: Uscsi end-user call this function to initialize necessary
11917  *              fields, such as uscsi_cmd and sd_uscsi_info struct.
11918  *
11919  *              The return value of sd_send_scsi_cmd will be treated as a
11920  *              fault in various conditions. Even it is not Zero, some
11921  *              callers may ignore the return value. That is to say, we can
11922  *              not make an accurate assessment in sdintr, since if a
11923  *              command is failed in sdintr it does not mean the caller of
11924  *              sd_send_scsi_cmd will treat it as a real failure.
11925  *
11926  *              To avoid printing too many error logs for a failed uscsi
11927  *              packet that the caller may not treat it as a failure, the
11928  *              sd will keep silent for handling all uscsi commands.
11929  *
11930  *              During detach->attach and attach-open, for some types of
11931  *              problems, the driver should be providing information about
11932  *              the problem encountered. Device use USCSI_SILENT, which
11933  *              suppresses all driver information. The result is that no
11934  *              information about the problem is available. Being
11935  *              completely silent during this time is inappropriate. The
11936  *              driver needs a more selective filter than USCSI_SILENT, so
11937  *              that information related to faults is provided.
11938  *
11939  *              To make the accurate accessment, the caller  of
11940  *              sd_send_scsi_USCSI_CMD should take the ownership and
11941  *              get necessary information to print error messages.
11942  *
11943  *              If we want to print necessary info of uscsi command, we need to
11944  *              keep the uscsi_cmd and sd_uscsi_info till we can make the
11945  *              assessment. We use sd_ssc_init to alloc necessary
11946  *              structs for sending an uscsi command and we are also
11947  *              responsible for free the memory by calling
11948  *              sd_ssc_fini.
11949  *
11950  *              The calling secquences will look like:
11951  *              sd_ssc_init->
11952  *
11953  *                  ...
11954  *
11955  *                  sd_send_scsi_USCSI_CMD->
11956  *                      sd_ssc_send-> - - - sdintr
11957  *                  ...
11958  *
11959  *                  if we think the return value should be treated as a
11960  *                  failure, we make the accessment here and print out
11961  *                  necessary by retrieving uscsi_cmd and sd_uscsi_info'
11962  *
11963  *                  ...
11964  *
11965  *              sd_ssc_fini
11966  *
11967  *
11968  *   Arguments: un - pointer to driver soft state (unit) structure for this
11969  *                   target.
11970  *
11971  * Return code: sd_ssc_t - pointer to allocated sd_ssc_t struct, it contains
11972  *                         uscsi_cmd and sd_uscsi_info.
11973  *                  NULL - if can not alloc memory for sd_ssc_t struct
11974  *
11975  *     Context: Kernel Thread.
11976  */
11977 static sd_ssc_t *
11978 sd_ssc_init(struct sd_lun *un)
11979 {
11980 	sd_ssc_t		*ssc;
11981 	struct uscsi_cmd	*ucmdp;
11982 	struct sd_uscsi_info	*uip;
11983 
11984 	ASSERT(un != NULL);
11985 	ASSERT(!mutex_owned(SD_MUTEX(un)));
11986 
11987 	/*
11988 	 * Allocate sd_ssc_t structure
11989 	 */
11990 	ssc = kmem_zalloc(sizeof (sd_ssc_t), KM_SLEEP);
11991 
11992 	/*
11993 	 * Allocate uscsi_cmd by calling scsi_uscsi_alloc common routine
11994 	 */
11995 	ucmdp = scsi_uscsi_alloc();
11996 
11997 	/*
11998 	 * Allocate sd_uscsi_info structure
11999 	 */
12000 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
12001 
12002 	ssc->ssc_uscsi_cmd = ucmdp;
12003 	ssc->ssc_uscsi_info = uip;
12004 	ssc->ssc_un = un;
12005 
12006 	return (ssc);
12007 }
12008 
12009 /*
12010  * Function: sd_ssc_fini
12011  *
12012  * Description: To free sd_ssc_t and it's hanging off
12013  *
12014  * Arguments: ssc - struct pointer of sd_ssc_t.
12015  */
12016 static void
12017 sd_ssc_fini(sd_ssc_t *ssc)
12018 {
12019 	scsi_uscsi_free(ssc->ssc_uscsi_cmd);
12020 
12021 	if (ssc->ssc_uscsi_info != NULL) {
12022 		kmem_free(ssc->ssc_uscsi_info, sizeof (struct sd_uscsi_info));
12023 		ssc->ssc_uscsi_info = NULL;
12024 	}
12025 
12026 	kmem_free(ssc, sizeof (sd_ssc_t));
12027 	ssc = NULL;
12028 }
12029 
12030 /*
12031  * Function: sd_ssc_send
12032  *
12033  * Description: Runs a USCSI command for user when called through sdioctl,
12034  *              or for the driver.
12035  *
12036  *   Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12037  *                    sd_uscsi_info in.
12038  *		incmd - ptr to a valid uscsi_cmd struct
12039  *		flag - bit flag, indicating open settings, 32/64 bit type
12040  *		dataspace - UIO_USERSPACE or UIO_SYSSPACE
12041  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
12042  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
12043  *			to use the USCSI "direct" chain and bypass the normal
12044  *			command waitq.
12045  *
12046  * Return Code: 0 -  successful completion of the given command
12047  *		EIO - scsi_uscsi_handle_command() failed
12048  *		ENXIO  - soft state not found for specified dev
12049  *		ECANCELED - command cancelled due to low power
12050  *		EINVAL
12051  *		EFAULT - copyin/copyout error
12052  *		return code of scsi_uscsi_handle_command():
12053  *			EIO
12054  *			ENXIO
12055  *			EACCES
12056  *
12057  *     Context: Kernel Thread;
12058  *              Waits for command to complete. Can sleep.
12059  */
12060 static int
12061 sd_ssc_send(sd_ssc_t *ssc, struct uscsi_cmd *incmd, int flag,
12062     enum uio_seg dataspace, int path_flag)
12063 {
12064 	struct sd_uscsi_info	*uip;
12065 	struct uscsi_cmd	*uscmd;
12066 	struct sd_lun		*un;
12067 	dev_t			dev;
12068 
12069 	int	format = 0;
12070 	int	rval;
12071 
12072 	ASSERT(ssc != NULL);
12073 	un = ssc->ssc_un;
12074 	ASSERT(un != NULL);
12075 	uscmd = ssc->ssc_uscsi_cmd;
12076 	ASSERT(uscmd != NULL);
12077 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12078 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
12079 		/*
12080 		 * If enter here, it indicates that the previous uscsi
12081 		 * command has not been processed by sd_ssc_assessment.
12082 		 * This is violating our rules of FMA telemetry processing.
12083 		 * We should print out this message and the last undisposed
12084 		 * uscsi command.
12085 		 */
12086 		if (uscmd->uscsi_cdb != NULL) {
12087 			SD_INFO(SD_LOG_SDTEST, un,
12088 			    "sd_ssc_send is missing the alternative "
12089 			    "sd_ssc_assessment when running command 0x%x.\n",
12090 			    uscmd->uscsi_cdb[0]);
12091 		}
12092 		/*
12093 		 * Set the ssc_flags to SSC_FLAGS_UNKNOWN, which should be
12094 		 * the initial status.
12095 		 */
12096 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12097 	}
12098 
12099 	/*
12100 	 * We need to make sure sd_ssc_send will have sd_ssc_assessment
12101 	 * followed to avoid missing FMA telemetries.
12102 	 */
12103 	ssc->ssc_flags |= SSC_FLAGS_NEED_ASSESSMENT;
12104 
12105 	/*
12106 	 * if USCSI_PMFAILFAST is set and un is in low power, fail the
12107 	 * command immediately.
12108 	 */
12109 	mutex_enter(SD_MUTEX(un));
12110 	mutex_enter(&un->un_pm_mutex);
12111 	if ((uscmd->uscsi_flags & USCSI_PMFAILFAST) &&
12112 	    SD_DEVICE_IS_IN_LOW_POWER(un)) {
12113 		SD_TRACE(SD_LOG_IO, un, "sd_ssc_send:"
12114 		    "un:0x%p is in low power\n", un);
12115 		mutex_exit(&un->un_pm_mutex);
12116 		mutex_exit(SD_MUTEX(un));
12117 		return (ECANCELED);
12118 	}
12119 	mutex_exit(&un->un_pm_mutex);
12120 	mutex_exit(SD_MUTEX(un));
12121 
12122 #ifdef SDDEBUG
12123 	switch (dataspace) {
12124 	case UIO_USERSPACE:
12125 		SD_TRACE(SD_LOG_IO, un,
12126 		    "sd_ssc_send: entry: un:0x%p UIO_USERSPACE\n", un);
12127 		break;
12128 	case UIO_SYSSPACE:
12129 		SD_TRACE(SD_LOG_IO, un,
12130 		    "sd_ssc_send: entry: un:0x%p UIO_SYSSPACE\n", un);
12131 		break;
12132 	default:
12133 		SD_TRACE(SD_LOG_IO, un,
12134 		    "sd_ssc_send: entry: un:0x%p UNEXPECTED SPACE\n", un);
12135 		break;
12136 	}
12137 #endif
12138 
12139 	rval = scsi_uscsi_copyin((intptr_t)incmd, flag,
12140 	    SD_ADDRESS(un), &uscmd);
12141 	if (rval != 0) {
12142 		SD_TRACE(SD_LOG_IO, un, "sd_sense_scsi_cmd: "
12143 		    "scsi_uscsi_alloc_and_copyin failed\n", un);
12144 		return (rval);
12145 	}
12146 
12147 	if ((uscmd->uscsi_cdb != NULL) &&
12148 	    (uscmd->uscsi_cdb[0] == SCMD_FORMAT)) {
12149 		mutex_enter(SD_MUTEX(un));
12150 		un->un_f_format_in_progress = TRUE;
12151 		mutex_exit(SD_MUTEX(un));
12152 		format = 1;
12153 	}
12154 
12155 	/*
12156 	 * Allocate an sd_uscsi_info struct and fill it with the info
12157 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
12158 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
12159 	 * since we allocate the buf here in this function, we do not
12160 	 * need to preserve the prior contents of b_private.
12161 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
12162 	 */
12163 	uip = ssc->ssc_uscsi_info;
12164 	uip->ui_flags = path_flag;
12165 	uip->ui_cmdp = uscmd;
12166 
12167 	/*
12168 	 * Commands sent with priority are intended for error recovery
12169 	 * situations, and do not have retries performed.
12170 	 */
12171 	if (path_flag == SD_PATH_DIRECT_PRIORITY) {
12172 		uscmd->uscsi_flags |= USCSI_DIAGNOSE;
12173 	}
12174 	uscmd->uscsi_flags &= ~USCSI_NOINTR;
12175 
12176 	dev = SD_GET_DEV(un);
12177 	rval = scsi_uscsi_handle_cmd(dev, dataspace, uscmd,
12178 	    sd_uscsi_strategy, NULL, uip);
12179 
12180 	/*
12181 	 * mark ssc_flags right after handle_cmd to make sure
12182 	 * the uscsi has been sent
12183 	 */
12184 	ssc->ssc_flags |= SSC_FLAGS_CMD_ISSUED;
12185 
12186 #ifdef SDDEBUG
12187 	SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
12188 	    "uscsi_status: 0x%02x  uscsi_resid:0x%x\n",
12189 	    uscmd->uscsi_status, uscmd->uscsi_resid);
12190 	if (uscmd->uscsi_bufaddr != NULL) {
12191 		SD_INFO(SD_LOG_IO, un, "sd_ssc_send: "
12192 		    "uscmd->uscsi_bufaddr: 0x%p  uscmd->uscsi_buflen:%d\n",
12193 		    uscmd->uscsi_bufaddr, uscmd->uscsi_buflen);
12194 		if (dataspace == UIO_SYSSPACE) {
12195 			SD_DUMP_MEMORY(un, SD_LOG_IO,
12196 			    "data", (uchar_t *)uscmd->uscsi_bufaddr,
12197 			    uscmd->uscsi_buflen, SD_LOG_HEX);
12198 		}
12199 	}
12200 #endif
12201 
12202 	if (format == 1) {
12203 		mutex_enter(SD_MUTEX(un));
12204 		un->un_f_format_in_progress = FALSE;
12205 		mutex_exit(SD_MUTEX(un));
12206 	}
12207 
12208 	(void) scsi_uscsi_copyout((intptr_t)incmd, uscmd);
12209 
12210 	return (rval);
12211 }
12212 
12213 /*
12214  *     Function: sd_ssc_print
12215  *
12216  * Description: Print information available to the console.
12217  *
12218  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12219  *                    sd_uscsi_info in.
12220  *            sd_severity - log level.
12221  *     Context: Kernel thread or interrupt context.
12222  */
12223 static void
12224 sd_ssc_print(sd_ssc_t *ssc, int sd_severity)
12225 {
12226 	struct uscsi_cmd	*ucmdp;
12227 	struct scsi_device	*devp;
12228 	dev_info_t		*devinfo;
12229 	uchar_t			*sensep;
12230 	int			senlen;
12231 	union scsi_cdb		*cdbp;
12232 	uchar_t			com;
12233 	extern struct scsi_key_strings scsi_cmds[];
12234 
12235 	ASSERT(ssc != NULL);
12236 	ASSERT(ssc->ssc_un != NULL);
12237 
12238 	if (SD_FM_LOG(ssc->ssc_un) != SD_FM_LOG_EREPORT)
12239 		return;
12240 	ucmdp = ssc->ssc_uscsi_cmd;
12241 	devp = SD_SCSI_DEVP(ssc->ssc_un);
12242 	devinfo = SD_DEVINFO(ssc->ssc_un);
12243 	ASSERT(ucmdp != NULL);
12244 	ASSERT(devp != NULL);
12245 	ASSERT(devinfo != NULL);
12246 	sensep = (uint8_t *)ucmdp->uscsi_rqbuf;
12247 	senlen = ucmdp->uscsi_rqlen - ucmdp->uscsi_rqresid;
12248 	cdbp = (union scsi_cdb *)ucmdp->uscsi_cdb;
12249 
12250 	/* In certain case (like DOORLOCK), the cdb could be NULL. */
12251 	if (cdbp == NULL)
12252 		return;
12253 	/* We don't print log if no sense data available. */
12254 	if (senlen == 0)
12255 		sensep = NULL;
12256 	com = cdbp->scc_cmd;
12257 	scsi_generic_errmsg(devp, sd_label, sd_severity, 0, 0, com,
12258 	    scsi_cmds, sensep, ssc->ssc_un->un_additional_codes, NULL);
12259 }
12260 
12261 /*
12262  *     Function: sd_ssc_assessment
12263  *
12264  * Description: We use this function to make an assessment at the point
12265  *              where SD driver may encounter a potential error.
12266  *
12267  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12268  *                  sd_uscsi_info in.
12269  *            tp_assess - a hint of strategy for ereport posting.
12270  *            Possible values of tp_assess include:
12271  *                SD_FMT_IGNORE - we don't post any ereport because we're
12272  *                sure that it is ok to ignore the underlying problems.
12273  *                SD_FMT_IGNORE_COMPROMISE - we don't post any ereport for now
12274  *                but it might be not correct to ignore the underlying hardware
12275  *                error.
12276  *                SD_FMT_STATUS_CHECK - we will post an ereport with the
12277  *                payload driver-assessment of value "fail" or
12278  *                "fatal"(depending on what information we have here). This
12279  *                assessment value is usually set when SD driver think there
12280  *                is a potential error occurred(Typically, when return value
12281  *                of the SCSI command is EIO).
12282  *                SD_FMT_STANDARD - we will post an ereport with the payload
12283  *                driver-assessment of value "info". This assessment value is
12284  *                set when the SCSI command returned successfully and with
12285  *                sense data sent back.
12286  *
12287  *     Context: Kernel thread.
12288  */
12289 static void
12290 sd_ssc_assessment(sd_ssc_t *ssc, enum sd_type_assessment tp_assess)
12291 {
12292 	int senlen = 0;
12293 	struct uscsi_cmd *ucmdp = NULL;
12294 	struct sd_lun *un;
12295 
12296 	ASSERT(ssc != NULL);
12297 	un = ssc->ssc_un;
12298 	ASSERT(un != NULL);
12299 	ucmdp = ssc->ssc_uscsi_cmd;
12300 	ASSERT(ucmdp != NULL);
12301 
12302 	if (ssc->ssc_flags & SSC_FLAGS_NEED_ASSESSMENT) {
12303 		ssc->ssc_flags &= ~SSC_FLAGS_NEED_ASSESSMENT;
12304 	} else {
12305 		/*
12306 		 * If enter here, it indicates that we have a wrong
12307 		 * calling sequence of sd_ssc_send and sd_ssc_assessment,
12308 		 * both of which should be called in a pair in case of
12309 		 * loss of FMA telemetries.
12310 		 */
12311 		if (ucmdp->uscsi_cdb != NULL) {
12312 			SD_INFO(SD_LOG_SDTEST, un,
12313 			    "sd_ssc_assessment is missing the "
12314 			    "alternative sd_ssc_send when running 0x%x, "
12315 			    "or there are superfluous sd_ssc_assessment for "
12316 			    "the same sd_ssc_send.\n",
12317 			    ucmdp->uscsi_cdb[0]);
12318 		}
12319 		/*
12320 		 * Set the ssc_flags to the initial value to avoid passing
12321 		 * down dirty flags to the following sd_ssc_send function.
12322 		 */
12323 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12324 		return;
12325 	}
12326 
12327 	/*
12328 	 * Only handle an issued command which is waiting for assessment.
12329 	 * A command which is not issued will not have
12330 	 * SSC_FLAGS_INVALID_DATA set, so it'ok we just return here.
12331 	 */
12332 	if (!(ssc->ssc_flags & SSC_FLAGS_CMD_ISSUED)) {
12333 		sd_ssc_print(ssc, SCSI_ERR_INFO);
12334 		return;
12335 	} else {
12336 		/*
12337 		 * For an issued command, we should clear this flag in
12338 		 * order to make the sd_ssc_t structure be used off
12339 		 * multiple uscsi commands.
12340 		 */
12341 		ssc->ssc_flags &= ~SSC_FLAGS_CMD_ISSUED;
12342 	}
12343 
12344 	/*
12345 	 * We will not deal with non-retryable(flag USCSI_DIAGNOSE set)
12346 	 * commands here. And we should clear the ssc_flags before return.
12347 	 */
12348 	if (ucmdp->uscsi_flags & USCSI_DIAGNOSE) {
12349 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12350 		return;
12351 	}
12352 
12353 	switch (tp_assess) {
12354 	case SD_FMT_IGNORE:
12355 	case SD_FMT_IGNORE_COMPROMISE:
12356 		break;
12357 	case SD_FMT_STATUS_CHECK:
12358 		/*
12359 		 * For a failed command(including the succeeded command
12360 		 * with invalid data sent back).
12361 		 */
12362 		sd_ssc_post(ssc, SD_FM_DRV_FATAL);
12363 		break;
12364 	case SD_FMT_STANDARD:
12365 		/*
12366 		 * Always for the succeeded commands probably with sense
12367 		 * data sent back.
12368 		 * Limitation:
12369 		 *	We can only handle a succeeded command with sense
12370 		 *	data sent back when auto-request-sense is enabled.
12371 		 */
12372 		senlen = ssc->ssc_uscsi_cmd->uscsi_rqlen -
12373 		    ssc->ssc_uscsi_cmd->uscsi_rqresid;
12374 		if ((ssc->ssc_uscsi_info->ui_pkt_state & STATE_ARQ_DONE) &&
12375 		    (un->un_f_arq_enabled == TRUE) &&
12376 		    senlen > 0 &&
12377 		    ssc->ssc_uscsi_cmd->uscsi_rqbuf != NULL) {
12378 			sd_ssc_post(ssc, SD_FM_DRV_NOTICE);
12379 		}
12380 		break;
12381 	default:
12382 		/*
12383 		 * Should not have other type of assessment.
12384 		 */
12385 		scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
12386 		    "sd_ssc_assessment got wrong "
12387 		    "sd_type_assessment %d.\n", tp_assess);
12388 		break;
12389 	}
12390 	/*
12391 	 * Clear up the ssc_flags before return.
12392 	 */
12393 	ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12394 }
12395 
12396 /*
12397  *    Function: sd_ssc_post
12398  *
12399  * Description: 1. read the driver property to get fm-scsi-log flag.
12400  *              2. print log if fm_log_capable is non-zero.
12401  *              3. call sd_ssc_ereport_post to post ereport if possible.
12402  *
12403  *    Context: May be called from kernel thread or interrupt context.
12404  */
12405 static void
12406 sd_ssc_post(sd_ssc_t *ssc, enum sd_driver_assessment sd_assess)
12407 {
12408 	struct sd_lun	*un;
12409 	int		sd_severity;
12410 
12411 	ASSERT(ssc != NULL);
12412 	un = ssc->ssc_un;
12413 	ASSERT(un != NULL);
12414 
12415 	/*
12416 	 * We may enter here from sd_ssc_assessment(for USCSI command) or
12417 	 * by directly called from sdintr context.
12418 	 * We don't handle a non-disk drive(CD-ROM, removable media).
12419 	 * Clear the ssc_flags before return in case we've set
12420 	 * SSC_FLAGS_INVALID_XXX which should be skipped for a non-disk
12421 	 * driver.
12422 	 */
12423 	if (ISCD(un) || un->un_f_has_removable_media) {
12424 		ssc->ssc_flags = SSC_FLAGS_UNKNOWN;
12425 		return;
12426 	}
12427 
12428 	switch (sd_assess) {
12429 		case SD_FM_DRV_FATAL:
12430 			sd_severity = SCSI_ERR_FATAL;
12431 			break;
12432 		case SD_FM_DRV_RECOVERY:
12433 			sd_severity = SCSI_ERR_RECOVERED;
12434 			break;
12435 		case SD_FM_DRV_RETRY:
12436 			sd_severity = SCSI_ERR_RETRYABLE;
12437 			break;
12438 		case SD_FM_DRV_NOTICE:
12439 			sd_severity = SCSI_ERR_INFO;
12440 			break;
12441 		default:
12442 			sd_severity = SCSI_ERR_UNKNOWN;
12443 	}
12444 	/* print log */
12445 	sd_ssc_print(ssc, sd_severity);
12446 
12447 	/* always post ereport */
12448 	sd_ssc_ereport_post(ssc, sd_assess);
12449 }
12450 
12451 /*
12452  *    Function: sd_ssc_set_info
12453  *
12454  * Description: Mark ssc_flags and set ssc_info which would be the
12455  *              payload of uderr ereport. This function will cause
12456  *              sd_ssc_ereport_post to post uderr ereport only.
12457  *              Besides, when ssc_flags == SSC_FLAGS_INVALID_DATA(USCSI),
12458  *              the function will also call SD_ERROR or scsi_log for a
12459  *              CDROM/removable-media/DDI_FM_NOT_CAPABLE device.
12460  *
12461  * Arguments: ssc - the struct of sd_ssc_t will bring uscsi_cmd and
12462  *                  sd_uscsi_info in.
12463  *            ssc_flags - indicate the sub-category of a uderr.
12464  *            comp - this argument is meaningful only when
12465  *                   ssc_flags == SSC_FLAGS_INVALID_DATA, and its possible
12466  *                   values include:
12467  *                   > 0, SD_ERROR is used with comp as the driver logging
12468  *                   component;
12469  *                   = 0, scsi-log is used to log error telemetries;
12470  *                   < 0, no log available for this telemetry.
12471  *
12472  *    Context: Kernel thread or interrupt context
12473  */
12474 static void
12475 sd_ssc_set_info(sd_ssc_t *ssc, int ssc_flags, uint_t comp, const char *fmt, ...)
12476 {
12477 	va_list	ap;
12478 
12479 	ASSERT(ssc != NULL);
12480 	ASSERT(ssc->ssc_un != NULL);
12481 
12482 	ssc->ssc_flags |= ssc_flags;
12483 	va_start(ap, fmt);
12484 	(void) vsnprintf(ssc->ssc_info, sizeof (ssc->ssc_info), fmt, ap);
12485 	va_end(ap);
12486 
12487 	/*
12488 	 * If SSC_FLAGS_INVALID_DATA is set, it should be a uscsi command
12489 	 * with invalid data sent back. For non-uscsi command, the
12490 	 * following code will be bypassed.
12491 	 */
12492 	if (ssc_flags & SSC_FLAGS_INVALID_DATA) {
12493 		if (SD_FM_LOG(ssc->ssc_un) == SD_FM_LOG_NSUP) {
12494 			/*
12495 			 * If the error belong to certain component and we
12496 			 * do not want it to show up on the console, we
12497 			 * will use SD_ERROR, otherwise scsi_log is
12498 			 * preferred.
12499 			 */
12500 			if (comp > 0) {
12501 				SD_ERROR(comp, ssc->ssc_un, ssc->ssc_info);
12502 			} else if (comp == 0) {
12503 				scsi_log(SD_DEVINFO(ssc->ssc_un), sd_label,
12504 				    CE_WARN, ssc->ssc_info);
12505 			}
12506 		}
12507 	}
12508 }
12509 
12510 /*
12511  *    Function: sd_buf_iodone
12512  *
12513  * Description: Frees the sd_xbuf & returns the buf to its originator.
12514  *
12515  *     Context: May be called from interrupt context.
12516  */
12517 /* ARGSUSED */
12518 static void
12519 sd_buf_iodone(int index, struct sd_lun *un, struct buf *bp)
12520 {
12521 	struct sd_xbuf *xp;
12522 
12523 	ASSERT(un != NULL);
12524 	ASSERT(bp != NULL);
12525 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12526 
12527 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: entry.\n");
12528 
12529 	xp = SD_GET_XBUF(bp);
12530 	ASSERT(xp != NULL);
12531 
12532 	/* xbuf is gone after this */
12533 	if (ddi_xbuf_done(bp, un->un_xbuf_attr)) {
12534 		mutex_enter(SD_MUTEX(un));
12535 
12536 		/*
12537 		 * Grab time when the cmd completed.
12538 		 * This is used for determining if the system has been
12539 		 * idle long enough to make it idle to the PM framework.
12540 		 * This is for lowering the overhead, and therefore improving
12541 		 * performance per I/O operation.
12542 		 */
12543 		un->un_pm_idle_time = gethrtime();
12544 
12545 		un->un_ncmds_in_driver--;
12546 		ASSERT(un->un_ncmds_in_driver >= 0);
12547 		SD_INFO(SD_LOG_IO, un,
12548 		    "sd_buf_iodone: un_ncmds_in_driver = %ld\n",
12549 		    un->un_ncmds_in_driver);
12550 
12551 		mutex_exit(SD_MUTEX(un));
12552 	}
12553 
12554 	biodone(bp);				/* bp is gone after this */
12555 
12556 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_buf_iodone: exit.\n");
12557 }
12558 
12559 
12560 /*
12561  *    Function: sd_uscsi_iodone
12562  *
12563  * Description: Frees the sd_xbuf & returns the buf to its originator.
12564  *
12565  *     Context: May be called from interrupt context.
12566  */
12567 /* ARGSUSED */
12568 static void
12569 sd_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
12570 {
12571 	struct sd_xbuf *xp;
12572 
12573 	ASSERT(un != NULL);
12574 	ASSERT(bp != NULL);
12575 
12576 	xp = SD_GET_XBUF(bp);
12577 	ASSERT(xp != NULL);
12578 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12579 
12580 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: entry.\n");
12581 
12582 	bp->b_private = xp->xb_private;
12583 
12584 	mutex_enter(SD_MUTEX(un));
12585 
12586 	/*
12587 	 * Grab time when the cmd completed.
12588 	 * This is used for determining if the system has been
12589 	 * idle long enough to make it idle to the PM framework.
12590 	 * This is for lowering the overhead, and therefore improving
12591 	 * performance per I/O operation.
12592 	 */
12593 	un->un_pm_idle_time = gethrtime();
12594 
12595 	un->un_ncmds_in_driver--;
12596 	ASSERT(un->un_ncmds_in_driver >= 0);
12597 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: un_ncmds_in_driver = %ld\n",
12598 	    un->un_ncmds_in_driver);
12599 
12600 	mutex_exit(SD_MUTEX(un));
12601 
12602 	if (((struct uscsi_cmd *)(xp->xb_pktinfo))->uscsi_rqlen >
12603 	    SENSE_LENGTH) {
12604 		kmem_free(xp, sizeof (struct sd_xbuf) - SENSE_LENGTH +
12605 		    MAX_SENSE_LENGTH);
12606 	} else {
12607 		kmem_free(xp, sizeof (struct sd_xbuf));
12608 	}
12609 
12610 	biodone(bp);
12611 
12612 	SD_INFO(SD_LOG_IO, un, "sd_uscsi_iodone: exit.\n");
12613 }
12614 
12615 
12616 /*
12617  *    Function: sd_mapblockaddr_iostart
12618  *
12619  * Description: Verify request lies within the partition limits for
12620  *		the indicated minor device.  Issue "overrun" buf if
12621  *		request would exceed partition range.  Converts
12622  *		partition-relative block address to absolute.
12623  *
12624  *              Upon exit of this function:
12625  *              1.I/O is aligned
12626  *                 xp->xb_blkno represents the absolute sector address
12627  *              2.I/O is misaligned
12628  *                 xp->xb_blkno represents the absolute logical block address
12629  *                 based on DEV_BSIZE. The logical block address will be
12630  *                 converted to physical sector address in sd_mapblocksize_\
12631  *                 iostart.
12632  *              3.I/O is misaligned but is aligned in "overrun" buf
12633  *                 xp->xb_blkno represents the absolute logical block address
12634  *                 based on DEV_BSIZE. The logical block address will be
12635  *                 converted to physical sector address in sd_mapblocksize_\
12636  *                 iostart. But no RMW will be issued in this case.
12637  *
12638  *     Context: Can sleep
12639  *
12640  *      Issues: This follows what the old code did, in terms of accessing
12641  *		some of the partition info in the unit struct without holding
12642  *		the mutext.  This is a general issue, if the partition info
12643  *		can be altered while IO is in progress... as soon as we send
12644  *		a buf, its partitioning can be invalid before it gets to the
12645  *		device.  Probably the right fix is to move partitioning out
12646  *		of the driver entirely.
12647  */
12648 
12649 static void
12650 sd_mapblockaddr_iostart(int index, struct sd_lun *un, struct buf *bp)
12651 {
12652 	diskaddr_t	nblocks;	/* #blocks in the given partition */
12653 	daddr_t	blocknum;	/* Block number specified by the buf */
12654 	size_t	requested_nblocks;
12655 	size_t	available_nblocks;
12656 	int	partition;
12657 	diskaddr_t	partition_offset;
12658 	struct sd_xbuf *xp;
12659 	int secmask = 0, blknomask = 0;
12660 	ushort_t is_aligned = TRUE;
12661 
12662 	ASSERT(un != NULL);
12663 	ASSERT(bp != NULL);
12664 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12665 
12666 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12667 	    "sd_mapblockaddr_iostart: entry: buf:0x%p\n", bp);
12668 
12669 	xp = SD_GET_XBUF(bp);
12670 	ASSERT(xp != NULL);
12671 
12672 	/*
12673 	 * If the geometry is not indicated as valid, attempt to access
12674 	 * the unit & verify the geometry/label. This can be the case for
12675 	 * removable-media devices, of if the device was opened in
12676 	 * NDELAY/NONBLOCK mode.
12677 	 */
12678 	partition = SDPART(bp->b_edev);
12679 
12680 	if (!SD_IS_VALID_LABEL(un)) {
12681 		sd_ssc_t *ssc;
12682 		/*
12683 		 * Initialize sd_ssc_t for internal uscsi commands
12684 		 * In case of potential porformance issue, we need
12685 		 * to alloc memory only if there is invalid label
12686 		 */
12687 		ssc = sd_ssc_init(un);
12688 
12689 		if (sd_ready_and_valid(ssc, partition) != SD_READY_VALID) {
12690 			/*
12691 			 * For removable devices it is possible to start an
12692 			 * I/O without a media by opening the device in nodelay
12693 			 * mode. Also for writable CDs there can be many
12694 			 * scenarios where there is no geometry yet but volume
12695 			 * manager is trying to issue a read() just because
12696 			 * it can see TOC on the CD. So do not print a message
12697 			 * for removables.
12698 			 */
12699 			if (!un->un_f_has_removable_media) {
12700 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
12701 				    "i/o to invalid geometry\n");
12702 			}
12703 			bioerror(bp, EIO);
12704 			bp->b_resid = bp->b_bcount;
12705 			SD_BEGIN_IODONE(index, un, bp);
12706 
12707 			sd_ssc_fini(ssc);
12708 			return;
12709 		}
12710 		sd_ssc_fini(ssc);
12711 	}
12712 
12713 	nblocks = 0;
12714 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
12715 	    &nblocks, &partition_offset, NULL, NULL, (void *)SD_PATH_DIRECT);
12716 
12717 	if (un->un_f_enable_rmw) {
12718 		blknomask = (un->un_phy_blocksize / DEV_BSIZE) - 1;
12719 		secmask = un->un_phy_blocksize - 1;
12720 	} else {
12721 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
12722 		secmask = un->un_tgt_blocksize - 1;
12723 	}
12724 
12725 	if ((bp->b_lblkno & (blknomask)) || (bp->b_bcount & (secmask))) {
12726 		is_aligned = FALSE;
12727 	}
12728 
12729 	if (!(NOT_DEVBSIZE(un)) || un->un_f_enable_rmw) {
12730 		/*
12731 		 * If I/O is aligned, no need to involve RMW(Read Modify Write)
12732 		 * Convert the logical block number to target's physical sector
12733 		 * number.
12734 		 */
12735 		if (is_aligned) {
12736 			xp->xb_blkno = SD_SYS2TGTBLOCK(un, xp->xb_blkno);
12737 		} else {
12738 			/*
12739 			 * There is no RMW if we're just reading, so don't
12740 			 * warn or error out because of it.
12741 			 */
12742 			if (bp->b_flags & B_READ) {
12743 				/*EMPTY*/
12744 			} else if (!un->un_f_enable_rmw &&
12745 			    un->un_f_rmw_type == SD_RMW_TYPE_RETURN_ERROR) {
12746 				bp->b_flags |= B_ERROR;
12747 				goto error_exit;
12748 			} else if (un->un_f_rmw_type == SD_RMW_TYPE_DEFAULT) {
12749 				mutex_enter(SD_MUTEX(un));
12750 				if (!un->un_f_enable_rmw &&
12751 				    un->un_rmw_msg_timeid == NULL) {
12752 					scsi_log(SD_DEVINFO(un), sd_label,
12753 					    CE_WARN, "I/O request is not "
12754 					    "aligned with %d disk sector size. "
12755 					    "It is handled through Read Modify "
12756 					    "Write but the performance is "
12757 					    "very low.\n",
12758 					    un->un_tgt_blocksize);
12759 					un->un_rmw_msg_timeid =
12760 					    timeout(sd_rmw_msg_print_handler,
12761 					    un, SD_RMW_MSG_PRINT_TIMEOUT);
12762 				} else {
12763 					un->un_rmw_incre_count ++;
12764 				}
12765 				mutex_exit(SD_MUTEX(un));
12766 			}
12767 
12768 			nblocks = SD_TGT2SYSBLOCK(un, nblocks);
12769 			partition_offset = SD_TGT2SYSBLOCK(un,
12770 			    partition_offset);
12771 		}
12772 	}
12773 
12774 	/*
12775 	 * blocknum is the starting block number of the request. At this
12776 	 * point it is still relative to the start of the minor device.
12777 	 */
12778 	blocknum = xp->xb_blkno;
12779 
12780 	/*
12781 	 * Legacy: If the starting block number is one past the last block
12782 	 * in the partition, do not set B_ERROR in the buf.
12783 	 */
12784 	if (blocknum == nblocks)  {
12785 		goto error_exit;
12786 	}
12787 
12788 	/*
12789 	 * Confirm that the first block of the request lies within the
12790 	 * partition limits. Also the requested number of bytes must be
12791 	 * a multiple of the system block size.
12792 	 */
12793 	if ((blocknum < 0) || (blocknum >= nblocks) ||
12794 	    ((bp->b_bcount & (DEV_BSIZE - 1)) != 0)) {
12795 		bp->b_flags |= B_ERROR;
12796 		goto error_exit;
12797 	}
12798 
12799 	/*
12800 	 * If the requsted # blocks exceeds the available # blocks, that
12801 	 * is an overrun of the partition.
12802 	 */
12803 	if ((!NOT_DEVBSIZE(un)) && is_aligned) {
12804 		requested_nblocks = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
12805 	} else {
12806 		requested_nblocks = SD_BYTES2SYSBLOCKS(bp->b_bcount);
12807 	}
12808 
12809 	available_nblocks = (size_t)(nblocks - blocknum);
12810 	ASSERT(nblocks >= blocknum);
12811 
12812 	if (requested_nblocks > available_nblocks) {
12813 		size_t resid;
12814 
12815 		/*
12816 		 * Allocate an "overrun" buf to allow the request to proceed
12817 		 * for the amount of space available in the partition. The
12818 		 * amount not transferred will be added into the b_resid
12819 		 * when the operation is complete. The overrun buf
12820 		 * replaces the original buf here, and the original buf
12821 		 * is saved inside the overrun buf, for later use.
12822 		 */
12823 		if ((!NOT_DEVBSIZE(un)) && is_aligned) {
12824 			resid = SD_TGTBLOCKS2BYTES(un,
12825 			    (offset_t)(requested_nblocks - available_nblocks));
12826 		} else {
12827 			resid = SD_SYSBLOCKS2BYTES(
12828 			    (offset_t)(requested_nblocks - available_nblocks));
12829 		}
12830 
12831 		size_t count = bp->b_bcount - resid;
12832 		/*
12833 		 * Note: count is an unsigned entity thus it'll NEVER
12834 		 * be less than 0 so ASSERT the original values are
12835 		 * correct.
12836 		 */
12837 		ASSERT(bp->b_bcount >= resid);
12838 
12839 		bp = sd_bioclone_alloc(bp, count, blocknum,
12840 		    (int (*)(struct buf *))(uintptr_t)sd_mapblockaddr_iodone);
12841 		xp = SD_GET_XBUF(bp); /* Update for 'new' bp! */
12842 		ASSERT(xp != NULL);
12843 	}
12844 
12845 	/* At this point there should be no residual for this buf. */
12846 	ASSERT(bp->b_resid == 0);
12847 
12848 	/* Convert the block number to an absolute address. */
12849 	xp->xb_blkno += partition_offset;
12850 
12851 	SD_NEXT_IOSTART(index, un, bp);
12852 
12853 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12854 	    "sd_mapblockaddr_iostart: exit 0: buf:0x%p\n", bp);
12855 
12856 	return;
12857 
12858 error_exit:
12859 	bp->b_resid = bp->b_bcount;
12860 	SD_BEGIN_IODONE(index, un, bp);
12861 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12862 	    "sd_mapblockaddr_iostart: exit 1: buf:0x%p\n", bp);
12863 }
12864 
12865 
12866 /*
12867  *    Function: sd_mapblockaddr_iodone
12868  *
12869  * Description: Completion-side processing for partition management.
12870  *
12871  *     Context: May be called under interrupt context
12872  */
12873 
12874 static void
12875 sd_mapblockaddr_iodone(int index, struct sd_lun *un, struct buf *bp)
12876 {
12877 	/* int	partition; */	/* Not used, see below. */
12878 	ASSERT(un != NULL);
12879 	ASSERT(bp != NULL);
12880 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12881 
12882 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12883 	    "sd_mapblockaddr_iodone: entry: buf:0x%p\n", bp);
12884 
12885 	if ((uintptr_t)bp->b_iodone == (uintptr_t)sd_mapblockaddr_iodone) {
12886 		/*
12887 		 * We have an "overrun" buf to deal with...
12888 		 */
12889 		struct sd_xbuf	*xp;
12890 		struct buf	*obp;	/* ptr to the original buf */
12891 
12892 		xp = SD_GET_XBUF(bp);
12893 		ASSERT(xp != NULL);
12894 
12895 		/* Retrieve the pointer to the original buf */
12896 		obp = (struct buf *)xp->xb_private;
12897 		ASSERT(obp != NULL);
12898 
12899 		obp->b_resid = obp->b_bcount - (bp->b_bcount - bp->b_resid);
12900 		bioerror(obp, bp->b_error);
12901 
12902 		sd_bioclone_free(bp);
12903 
12904 		/*
12905 		 * Get back the original buf.
12906 		 * Note that since the restoration of xb_blkno below
12907 		 * was removed, the sd_xbuf is not needed.
12908 		 */
12909 		bp = obp;
12910 		/*
12911 		 * xp = SD_GET_XBUF(bp);
12912 		 * ASSERT(xp != NULL);
12913 		 */
12914 	}
12915 
12916 	/*
12917 	 * Convert sd->xb_blkno back to a minor-device relative value.
12918 	 * Note: this has been commented out, as it is not needed in the
12919 	 * current implementation of the driver (ie, since this function
12920 	 * is at the top of the layering chains, so the info will be
12921 	 * discarded) and it is in the "hot" IO path.
12922 	 *
12923 	 * partition = getminor(bp->b_edev) & SDPART_MASK;
12924 	 * xp->xb_blkno -= un->un_offset[partition];
12925 	 */
12926 
12927 	SD_NEXT_IODONE(index, un, bp);
12928 
12929 	SD_TRACE(SD_LOG_IO_PARTITION, un,
12930 	    "sd_mapblockaddr_iodone: exit: buf:0x%p\n", bp);
12931 }
12932 
12933 
12934 /*
12935  *    Function: sd_mapblocksize_iostart
12936  *
12937  * Description: Convert between system block size (un->un_sys_blocksize)
12938  *		and target block size (un->un_tgt_blocksize).
12939  *
12940  *     Context: Can sleep to allocate resources.
12941  *
12942  * Assumptions: A higher layer has already performed any partition validation,
12943  *		and converted the xp->xb_blkno to an absolute value relative
12944  *		to the start of the device.
12945  *
12946  *		It is also assumed that the higher layer has implemented
12947  *		an "overrun" mechanism for the case where the request would
12948  *		read/write beyond the end of a partition.  In this case we
12949  *		assume (and ASSERT) that bp->b_resid == 0.
12950  *
12951  *		Note: The implementation for this routine assumes the target
12952  *		block size remains constant between allocation and transport.
12953  */
12954 
12955 static void
12956 sd_mapblocksize_iostart(int index, struct sd_lun *un, struct buf *bp)
12957 {
12958 	struct sd_mapblocksize_info	*bsp;
12959 	struct sd_xbuf			*xp;
12960 	offset_t first_byte;
12961 	daddr_t	start_block, end_block;
12962 	daddr_t	request_bytes;
12963 	ushort_t is_aligned = FALSE;
12964 
12965 	ASSERT(un != NULL);
12966 	ASSERT(bp != NULL);
12967 	ASSERT(!mutex_owned(SD_MUTEX(un)));
12968 	ASSERT(bp->b_resid == 0);
12969 
12970 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
12971 	    "sd_mapblocksize_iostart: entry: buf:0x%p\n", bp);
12972 
12973 	/*
12974 	 * For a non-writable CD, a write request is an error
12975 	 */
12976 	if (ISCD(un) && ((bp->b_flags & B_READ) == 0) &&
12977 	    (un->un_f_mmc_writable_media == FALSE)) {
12978 		bioerror(bp, EIO);
12979 		bp->b_resid = bp->b_bcount;
12980 		SD_BEGIN_IODONE(index, un, bp);
12981 		return;
12982 	}
12983 
12984 	/*
12985 	 * We do not need a shadow buf if the device is using
12986 	 * un->un_sys_blocksize as its block size or if bcount == 0.
12987 	 * In this case there is no layer-private data block allocated.
12988 	 */
12989 	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
12990 	    (bp->b_bcount == 0)) {
12991 		goto done;
12992 	}
12993 
12994 #if defined(__x86)
12995 	/* We do not support non-block-aligned transfers for ROD devices */
12996 	ASSERT(!ISROD(un));
12997 #endif
12998 
12999 	xp = SD_GET_XBUF(bp);
13000 	ASSERT(xp != NULL);
13001 
13002 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
13003 	    "tgt_blocksize:0x%x sys_blocksize: 0x%x\n",
13004 	    un->un_tgt_blocksize, DEV_BSIZE);
13005 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
13006 	    "request start block:0x%x\n", xp->xb_blkno);
13007 	SD_INFO(SD_LOG_IO_RMMEDIA, un, "sd_mapblocksize_iostart: "
13008 	    "request len:0x%x\n", bp->b_bcount);
13009 
13010 	/*
13011 	 * Allocate the layer-private data area for the mapblocksize layer.
13012 	 * Layers are allowed to use the xp_private member of the sd_xbuf
13013 	 * struct to store the pointer to their layer-private data block, but
13014 	 * each layer also has the responsibility of restoring the prior
13015 	 * contents of xb_private before returning the buf/xbuf to the
13016 	 * higher layer that sent it.
13017 	 *
13018 	 * Here we save the prior contents of xp->xb_private into the
13019 	 * bsp->mbs_oprivate field of our layer-private data area. This value
13020 	 * is restored by sd_mapblocksize_iodone() just prior to freeing up
13021 	 * the layer-private area and returning the buf/xbuf to the layer
13022 	 * that sent it.
13023 	 *
13024 	 * Note that here we use kmem_zalloc for the allocation as there are
13025 	 * parts of the mapblocksize code that expect certain fields to be
13026 	 * zero unless explicitly set to a required value.
13027 	 */
13028 	bsp = kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
13029 	bsp->mbs_oprivate = xp->xb_private;
13030 	xp->xb_private = bsp;
13031 
13032 	/*
13033 	 * This treats the data on the disk (target) as an array of bytes.
13034 	 * first_byte is the byte offset, from the beginning of the device,
13035 	 * to the location of the request. This is converted from a
13036 	 * un->un_sys_blocksize block address to a byte offset, and then back
13037 	 * to a block address based upon a un->un_tgt_blocksize block size.
13038 	 *
13039 	 * xp->xb_blkno should be absolute upon entry into this function,
13040 	 * but, but it is based upon partitions that use the "system"
13041 	 * block size. It must be adjusted to reflect the block size of
13042 	 * the target.
13043 	 *
13044 	 * Note that end_block is actually the block that follows the last
13045 	 * block of the request, but that's what is needed for the computation.
13046 	 */
13047 	first_byte  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
13048 	if (un->un_f_enable_rmw) {
13049 		start_block = xp->xb_blkno =
13050 		    (first_byte / un->un_phy_blocksize) *
13051 		    (un->un_phy_blocksize / DEV_BSIZE);
13052 		end_block   = ((first_byte + bp->b_bcount +
13053 		    un->un_phy_blocksize - 1) / un->un_phy_blocksize) *
13054 		    (un->un_phy_blocksize / DEV_BSIZE);
13055 	} else {
13056 		start_block = xp->xb_blkno = first_byte / un->un_tgt_blocksize;
13057 		end_block   = (first_byte + bp->b_bcount +
13058 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
13059 	}
13060 
13061 	/* request_bytes is rounded up to a multiple of the target block size */
13062 	request_bytes = (end_block - start_block) * un->un_tgt_blocksize;
13063 
13064 	/*
13065 	 * See if the starting address of the request and the request
13066 	 * length are aligned on a un->un_tgt_blocksize boundary. If aligned
13067 	 * then we do not need to allocate a shadow buf to handle the request.
13068 	 */
13069 	if (un->un_f_enable_rmw) {
13070 		if (((first_byte % un->un_phy_blocksize) == 0) &&
13071 		    ((bp->b_bcount % un->un_phy_blocksize) == 0)) {
13072 			is_aligned = TRUE;
13073 		}
13074 	} else {
13075 		if (((first_byte % un->un_tgt_blocksize) == 0) &&
13076 		    ((bp->b_bcount % un->un_tgt_blocksize) == 0)) {
13077 			is_aligned = TRUE;
13078 		}
13079 	}
13080 
13081 	if ((bp->b_flags & B_READ) == 0) {
13082 		/*
13083 		 * Lock the range for a write operation. An aligned request is
13084 		 * considered a simple write; otherwise the request must be a
13085 		 * read-modify-write.
13086 		 */
13087 		bsp->mbs_wmp = sd_range_lock(un, start_block, end_block - 1,
13088 		    (is_aligned == TRUE) ? SD_WTYPE_SIMPLE : SD_WTYPE_RMW);
13089 	}
13090 
13091 	/*
13092 	 * Alloc a shadow buf if the request is not aligned. Also, this is
13093 	 * where the READ command is generated for a read-modify-write. (The
13094 	 * write phase is deferred until after the read completes.)
13095 	 */
13096 	if (is_aligned == FALSE) {
13097 
13098 		struct sd_mapblocksize_info	*shadow_bsp;
13099 		struct sd_xbuf	*shadow_xp;
13100 		struct buf	*shadow_bp;
13101 
13102 		/*
13103 		 * Allocate the shadow buf and it associated xbuf. Note that
13104 		 * after this call the xb_blkno value in both the original
13105 		 * buf's sd_xbuf _and_ the shadow buf's sd_xbuf will be the
13106 		 * same: absolute relative to the start of the device, and
13107 		 * adjusted for the target block size. The b_blkno in the
13108 		 * shadow buf will also be set to this value. We should never
13109 		 * change b_blkno in the original bp however.
13110 		 *
13111 		 * Note also that the shadow buf will always need to be a
13112 		 * READ command, regardless of whether the incoming command
13113 		 * is a READ or a WRITE.
13114 		 */
13115 		shadow_bp = sd_shadow_buf_alloc(bp, request_bytes, B_READ,
13116 		    xp->xb_blkno,
13117 		    (int (*)(struct buf *))(uintptr_t)sd_mapblocksize_iodone);
13118 
13119 		shadow_xp = SD_GET_XBUF(shadow_bp);
13120 
13121 		/*
13122 		 * Allocate the layer-private data for the shadow buf.
13123 		 * (No need to preserve xb_private in the shadow xbuf.)
13124 		 */
13125 		shadow_xp->xb_private = shadow_bsp =
13126 		    kmem_zalloc(sizeof (struct sd_mapblocksize_info), KM_SLEEP);
13127 
13128 		/*
13129 		 * bsp->mbs_copy_offset is used later by sd_mapblocksize_iodone
13130 		 * to figure out where the start of the user data is (based upon
13131 		 * the system block size) in the data returned by the READ
13132 		 * command (which will be based upon the target blocksize). Note
13133 		 * that this is only really used if the request is unaligned.
13134 		 */
13135 		if (un->un_f_enable_rmw) {
13136 			bsp->mbs_copy_offset = (ssize_t)(first_byte -
13137 			    ((offset_t)xp->xb_blkno * un->un_sys_blocksize));
13138 			ASSERT((bsp->mbs_copy_offset >= 0) &&
13139 			    (bsp->mbs_copy_offset < un->un_phy_blocksize));
13140 		} else {
13141 			bsp->mbs_copy_offset = (ssize_t)(first_byte -
13142 			    ((offset_t)xp->xb_blkno * un->un_tgt_blocksize));
13143 			ASSERT((bsp->mbs_copy_offset >= 0) &&
13144 			    (bsp->mbs_copy_offset < un->un_tgt_blocksize));
13145 		}
13146 
13147 		shadow_bsp->mbs_copy_offset = bsp->mbs_copy_offset;
13148 
13149 		shadow_bsp->mbs_layer_index = bsp->mbs_layer_index = index;
13150 
13151 		/* Transfer the wmap (if any) to the shadow buf */
13152 		shadow_bsp->mbs_wmp = bsp->mbs_wmp;
13153 		bsp->mbs_wmp = NULL;
13154 
13155 		/*
13156 		 * The shadow buf goes on from here in place of the
13157 		 * original buf.
13158 		 */
13159 		shadow_bsp->mbs_orig_bp = bp;
13160 		bp = shadow_bp;
13161 	}
13162 
13163 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13164 	    "sd_mapblocksize_iostart: tgt start block:0x%x\n", xp->xb_blkno);
13165 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13166 	    "sd_mapblocksize_iostart: tgt request len:0x%x\n",
13167 	    request_bytes);
13168 	SD_INFO(SD_LOG_IO_RMMEDIA, un,
13169 	    "sd_mapblocksize_iostart: shadow buf:0x%x\n", bp);
13170 
13171 done:
13172 	SD_NEXT_IOSTART(index, un, bp);
13173 
13174 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13175 	    "sd_mapblocksize_iostart: exit: buf:0x%p\n", bp);
13176 }
13177 
13178 
13179 /*
13180  *    Function: sd_mapblocksize_iodone
13181  *
13182  * Description: Completion side processing for block-size mapping.
13183  *
13184  *     Context: May be called under interrupt context
13185  */
13186 
13187 static void
13188 sd_mapblocksize_iodone(int index, struct sd_lun *un, struct buf *bp)
13189 {
13190 	struct sd_mapblocksize_info	*bsp;
13191 	struct sd_xbuf	*xp;
13192 	struct sd_xbuf	*orig_xp;	/* sd_xbuf for the original buf */
13193 	struct buf	*orig_bp;	/* ptr to the original buf */
13194 	offset_t	shadow_end;
13195 	offset_t	request_end;
13196 	offset_t	shadow_start;
13197 	ssize_t		copy_offset;
13198 	size_t		copy_length;
13199 	size_t		shortfall;
13200 	uint_t		is_write;	/* TRUE if this bp is a WRITE */
13201 	uint_t		has_wmap;	/* TRUE is this bp has a wmap */
13202 
13203 	ASSERT(un != NULL);
13204 	ASSERT(bp != NULL);
13205 
13206 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
13207 	    "sd_mapblocksize_iodone: entry: buf:0x%p\n", bp);
13208 
13209 	/*
13210 	 * There is no shadow buf or layer-private data if the target is
13211 	 * using un->un_sys_blocksize as its block size or if bcount == 0.
13212 	 */
13213 	if ((un->un_tgt_blocksize == DEV_BSIZE && !un->un_f_enable_rmw) ||
13214 	    (bp->b_bcount == 0)) {
13215 		goto exit;
13216 	}
13217 
13218 	xp = SD_GET_XBUF(bp);
13219 	ASSERT(xp != NULL);
13220 
13221 	/* Retrieve the pointer to the layer-private data area from the xbuf. */
13222 	bsp = xp->xb_private;
13223 
13224 	is_write = ((bp->b_flags & B_READ) == 0) ? TRUE : FALSE;
13225 	has_wmap = (bsp->mbs_wmp != NULL) ? TRUE : FALSE;
13226 
13227 	if (is_write) {
13228 		/*
13229 		 * For a WRITE request we must free up the block range that
13230 		 * we have locked up.  This holds regardless of whether this is
13231 		 * an aligned write request or a read-modify-write request.
13232 		 */
13233 		sd_range_unlock(un, bsp->mbs_wmp);
13234 		bsp->mbs_wmp = NULL;
13235 	}
13236 
13237 	if ((uintptr_t)bp->b_iodone != (uintptr_t)sd_mapblocksize_iodone) {
13238 		/*
13239 		 * An aligned read or write command will have no shadow buf;
13240 		 * there is not much else to do with it.
13241 		 */
13242 		goto done;
13243 	}
13244 
13245 	orig_bp = bsp->mbs_orig_bp;
13246 	ASSERT(orig_bp != NULL);
13247 	orig_xp = SD_GET_XBUF(orig_bp);
13248 	ASSERT(orig_xp != NULL);
13249 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13250 
13251 	if (!is_write && has_wmap) {
13252 		/*
13253 		 * A READ with a wmap means this is the READ phase of a
13254 		 * read-modify-write. If an error occurred on the READ then
13255 		 * we do not proceed with the WRITE phase or copy any data.
13256 		 * Just release the write maps and return with an error.
13257 		 */
13258 		if ((bp->b_resid != 0) || (bp->b_error != 0)) {
13259 			orig_bp->b_resid = orig_bp->b_bcount;
13260 			bioerror(orig_bp, bp->b_error);
13261 			sd_range_unlock(un, bsp->mbs_wmp);
13262 			goto freebuf_done;
13263 		}
13264 	}
13265 
13266 	/*
13267 	 * Here is where we set up to copy the data from the shadow buf
13268 	 * into the space associated with the original buf.
13269 	 *
13270 	 * To deal with the conversion between block sizes, these
13271 	 * computations treat the data as an array of bytes, with the
13272 	 * first byte (byte 0) corresponding to the first byte in the
13273 	 * first block on the disk.
13274 	 */
13275 
13276 	/*
13277 	 * shadow_start and shadow_len indicate the location and size of
13278 	 * the data returned with the shadow IO request.
13279 	 */
13280 	if (un->un_f_enable_rmw) {
13281 		shadow_start  = SD_SYSBLOCKS2BYTES((offset_t)xp->xb_blkno);
13282 	} else {
13283 		shadow_start  = SD_TGTBLOCKS2BYTES(un, (offset_t)xp->xb_blkno);
13284 	}
13285 	shadow_end    = shadow_start + bp->b_bcount - bp->b_resid;
13286 
13287 	/*
13288 	 * copy_offset gives the offset (in bytes) from the start of the first
13289 	 * block of the READ request to the beginning of the data.  We retrieve
13290 	 * this value from xb_pktp in the ORIGINAL xbuf, as it has been saved
13291 	 * there by sd_mapblockize_iostart(). copy_length gives the amount of
13292 	 * data to be copied (in bytes).
13293 	 */
13294 	copy_offset  = bsp->mbs_copy_offset;
13295 	if (un->un_f_enable_rmw) {
13296 		ASSERT((copy_offset >= 0) &&
13297 		    (copy_offset < un->un_phy_blocksize));
13298 	} else {
13299 		ASSERT((copy_offset >= 0) &&
13300 		    (copy_offset < un->un_tgt_blocksize));
13301 	}
13302 
13303 	copy_length  = orig_bp->b_bcount;
13304 	request_end  = shadow_start + copy_offset + orig_bp->b_bcount;
13305 
13306 	/*
13307 	 * Set up the resid and error fields of orig_bp as appropriate.
13308 	 */
13309 	if (shadow_end >= request_end) {
13310 		/* We got all the requested data; set resid to zero */
13311 		orig_bp->b_resid = 0;
13312 	} else {
13313 		/*
13314 		 * We failed to get enough data to fully satisfy the original
13315 		 * request. Just copy back whatever data we got and set
13316 		 * up the residual and error code as required.
13317 		 *
13318 		 * 'shortfall' is the amount by which the data received with the
13319 		 * shadow buf has "fallen short" of the requested amount.
13320 		 */
13321 		shortfall = (size_t)(request_end - shadow_end);
13322 
13323 		if (shortfall > orig_bp->b_bcount) {
13324 			/*
13325 			 * We did not get enough data to even partially
13326 			 * fulfill the original request.  The residual is
13327 			 * equal to the amount requested.
13328 			 */
13329 			orig_bp->b_resid = orig_bp->b_bcount;
13330 		} else {
13331 			/*
13332 			 * We did not get all the data that we requested
13333 			 * from the device, but we will try to return what
13334 			 * portion we did get.
13335 			 */
13336 			orig_bp->b_resid = shortfall;
13337 		}
13338 		ASSERT(copy_length >= orig_bp->b_resid);
13339 		copy_length  -= orig_bp->b_resid;
13340 	}
13341 
13342 	/* Propagate the error code from the shadow buf to the original buf */
13343 	bioerror(orig_bp, bp->b_error);
13344 
13345 	if (is_write) {
13346 		goto freebuf_done;	/* No data copying for a WRITE */
13347 	}
13348 
13349 	if (has_wmap) {
13350 		/*
13351 		 * This is a READ command from the READ phase of a
13352 		 * read-modify-write request. We have to copy the data given
13353 		 * by the user OVER the data returned by the READ command,
13354 		 * then convert the command from a READ to a WRITE and send
13355 		 * it back to the target.
13356 		 */
13357 		bcopy(orig_bp->b_un.b_addr, bp->b_un.b_addr + copy_offset,
13358 		    copy_length);
13359 
13360 		bp->b_flags &= ~((int)B_READ);	/* Convert to a WRITE */
13361 
13362 		/*
13363 		 * Dispatch the WRITE command to the taskq thread, which
13364 		 * will in turn send the command to the target. When the
13365 		 * WRITE command completes, we (sd_mapblocksize_iodone())
13366 		 * will get called again as part of the iodone chain
13367 		 * processing for it. Note that we will still be dealing
13368 		 * with the shadow buf at that point.
13369 		 */
13370 		if (taskq_dispatch(sd_wmr_tq, sd_read_modify_write_task, bp,
13371 		    KM_NOSLEEP) != TASKQID_INVALID) {
13372 			/*
13373 			 * Dispatch was successful so we are done. Return
13374 			 * without going any higher up the iodone chain. Do
13375 			 * not free up any layer-private data until after the
13376 			 * WRITE completes.
13377 			 */
13378 			return;
13379 		}
13380 
13381 		/*
13382 		 * Dispatch of the WRITE command failed; set up the error
13383 		 * condition and send this IO back up the iodone chain.
13384 		 */
13385 		bioerror(orig_bp, EIO);
13386 		orig_bp->b_resid = orig_bp->b_bcount;
13387 
13388 	} else {
13389 		/*
13390 		 * This is a regular READ request (ie, not a RMW). Copy the
13391 		 * data from the shadow buf into the original buf. The
13392 		 * copy_offset compensates for any "misalignment" between the
13393 		 * shadow buf (with its un->un_tgt_blocksize blocks) and the
13394 		 * original buf (with its un->un_sys_blocksize blocks).
13395 		 */
13396 		bcopy(bp->b_un.b_addr + copy_offset, orig_bp->b_un.b_addr,
13397 		    copy_length);
13398 	}
13399 
13400 freebuf_done:
13401 
13402 	/*
13403 	 * At this point we still have both the shadow buf AND the original
13404 	 * buf to deal with, as well as the layer-private data area in each.
13405 	 * Local variables are as follows:
13406 	 *
13407 	 * bp -- points to shadow buf
13408 	 * xp -- points to xbuf of shadow buf
13409 	 * bsp -- points to layer-private data area of shadow buf
13410 	 * orig_bp -- points to original buf
13411 	 *
13412 	 * First free the shadow buf and its associated xbuf, then free the
13413 	 * layer-private data area from the shadow buf. There is no need to
13414 	 * restore xb_private in the shadow xbuf.
13415 	 */
13416 	sd_shadow_buf_free(bp);
13417 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13418 
13419 	/*
13420 	 * Now update the local variables to point to the original buf, xbuf,
13421 	 * and layer-private area.
13422 	 */
13423 	bp = orig_bp;
13424 	xp = SD_GET_XBUF(bp);
13425 	ASSERT(xp != NULL);
13426 	ASSERT(xp == orig_xp);
13427 	bsp = xp->xb_private;
13428 	ASSERT(bsp != NULL);
13429 
13430 done:
13431 	/*
13432 	 * Restore xb_private to whatever it was set to by the next higher
13433 	 * layer in the chain, then free the layer-private data area.
13434 	 */
13435 	xp->xb_private = bsp->mbs_oprivate;
13436 	kmem_free(bsp, sizeof (struct sd_mapblocksize_info));
13437 
13438 exit:
13439 	SD_TRACE(SD_LOG_IO_RMMEDIA, SD_GET_UN(bp),
13440 	    "sd_mapblocksize_iodone: calling SD_NEXT_IODONE: buf:0x%p\n", bp);
13441 
13442 	SD_NEXT_IODONE(index, un, bp);
13443 }
13444 
13445 
13446 /*
13447  *    Function: sd_checksum_iostart
13448  *
13449  * Description: A stub function for a layer that's currently not used.
13450  *		For now just a placeholder.
13451  *
13452  *     Context: Kernel thread context
13453  */
13454 
13455 static void
13456 sd_checksum_iostart(int index, struct sd_lun *un, struct buf *bp)
13457 {
13458 	ASSERT(un != NULL);
13459 	ASSERT(bp != NULL);
13460 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13461 	SD_NEXT_IOSTART(index, un, bp);
13462 }
13463 
13464 
13465 /*
13466  *    Function: sd_checksum_iodone
13467  *
13468  * Description: A stub function for a layer that's currently not used.
13469  *		For now just a placeholder.
13470  *
13471  *     Context: May be called under interrupt context
13472  */
13473 
13474 static void
13475 sd_checksum_iodone(int index, struct sd_lun *un, struct buf *bp)
13476 {
13477 	ASSERT(un != NULL);
13478 	ASSERT(bp != NULL);
13479 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13480 	SD_NEXT_IODONE(index, un, bp);
13481 }
13482 
13483 
13484 /*
13485  *    Function: sd_checksum_uscsi_iostart
13486  *
13487  * Description: A stub function for a layer that's currently not used.
13488  *		For now just a placeholder.
13489  *
13490  *     Context: Kernel thread context
13491  */
13492 
13493 static void
13494 sd_checksum_uscsi_iostart(int index, struct sd_lun *un, struct buf *bp)
13495 {
13496 	ASSERT(un != NULL);
13497 	ASSERT(bp != NULL);
13498 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13499 	SD_NEXT_IOSTART(index, un, bp);
13500 }
13501 
13502 
13503 /*
13504  *    Function: sd_checksum_uscsi_iodone
13505  *
13506  * Description: A stub function for a layer that's currently not used.
13507  *		For now just a placeholder.
13508  *
13509  *     Context: May be called under interrupt context
13510  */
13511 
13512 static void
13513 sd_checksum_uscsi_iodone(int index, struct sd_lun *un, struct buf *bp)
13514 {
13515 	ASSERT(un != NULL);
13516 	ASSERT(bp != NULL);
13517 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13518 	SD_NEXT_IODONE(index, un, bp);
13519 }
13520 
13521 
13522 /*
13523  *    Function: sd_pm_iostart
13524  *
13525  * Description: iostart-side routine for Power mangement.
13526  *
13527  *     Context: Kernel thread context
13528  */
13529 
13530 static void
13531 sd_pm_iostart(int index, struct sd_lun *un, struct buf *bp)
13532 {
13533 	ASSERT(un != NULL);
13534 	ASSERT(bp != NULL);
13535 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13536 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13537 
13538 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: entry\n");
13539 
13540 	if (sd_pm_entry(un) != DDI_SUCCESS) {
13541 		/*
13542 		 * Set up to return the failed buf back up the 'iodone'
13543 		 * side of the calling chain.
13544 		 */
13545 		bioerror(bp, EIO);
13546 		bp->b_resid = bp->b_bcount;
13547 
13548 		SD_BEGIN_IODONE(index, un, bp);
13549 
13550 		SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13551 		return;
13552 	}
13553 
13554 	SD_NEXT_IOSTART(index, un, bp);
13555 
13556 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iostart: exit\n");
13557 }
13558 
13559 
13560 /*
13561  *    Function: sd_pm_iodone
13562  *
13563  * Description: iodone-side routine for power mangement.
13564  *
13565  *     Context: may be called from interrupt context
13566  */
13567 
13568 static void
13569 sd_pm_iodone(int index, struct sd_lun *un, struct buf *bp)
13570 {
13571 	ASSERT(un != NULL);
13572 	ASSERT(bp != NULL);
13573 	ASSERT(!mutex_owned(&un->un_pm_mutex));
13574 
13575 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: entry\n");
13576 
13577 	/*
13578 	 * After attach the following flag is only read, so don't
13579 	 * take the penalty of acquiring a mutex for it.
13580 	 */
13581 	if (un->un_f_pm_is_enabled == TRUE) {
13582 		sd_pm_exit(un);
13583 	}
13584 
13585 	SD_NEXT_IODONE(index, un, bp);
13586 
13587 	SD_TRACE(SD_LOG_IO_PM, un, "sd_pm_iodone: exit\n");
13588 }
13589 
13590 
13591 /*
13592  *    Function: sd_core_iostart
13593  *
13594  * Description: Primary driver function for enqueuing buf(9S) structs from
13595  *		the system and initiating IO to the target device
13596  *
13597  *     Context: Kernel thread context. Can sleep.
13598  *
13599  * Assumptions:  - The given xp->xb_blkno is absolute
13600  *		   (ie, relative to the start of the device).
13601  *		 - The IO is to be done using the native blocksize of
13602  *		   the device, as specified in un->un_tgt_blocksize.
13603  */
13604 /* ARGSUSED */
13605 static void
13606 sd_core_iostart(int index, struct sd_lun *un, struct buf *bp)
13607 {
13608 	struct sd_xbuf *xp;
13609 
13610 	ASSERT(un != NULL);
13611 	ASSERT(bp != NULL);
13612 	ASSERT(!mutex_owned(SD_MUTEX(un)));
13613 	ASSERT(bp->b_resid == 0);
13614 
13615 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: entry: bp:0x%p\n", bp);
13616 
13617 	xp = SD_GET_XBUF(bp);
13618 	ASSERT(xp != NULL);
13619 
13620 	mutex_enter(SD_MUTEX(un));
13621 
13622 	/*
13623 	 * If we are currently in the failfast state, fail any new IO
13624 	 * that has B_FAILFAST set, then return.
13625 	 */
13626 	if ((bp->b_flags & B_FAILFAST) &&
13627 	    (un->un_failfast_state == SD_FAILFAST_ACTIVE)) {
13628 		mutex_exit(SD_MUTEX(un));
13629 		bioerror(bp, EIO);
13630 		bp->b_resid = bp->b_bcount;
13631 		SD_BEGIN_IODONE(index, un, bp);
13632 		return;
13633 	}
13634 
13635 	if (SD_IS_DIRECT_PRIORITY(xp)) {
13636 		/*
13637 		 * Priority command -- transport it immediately.
13638 		 *
13639 		 * Note: We may want to assert that USCSI_DIAGNOSE is set,
13640 		 * because all direct priority commands should be associated
13641 		 * with error recovery actions which we don't want to retry.
13642 		 */
13643 		sd_start_cmds(un, bp);
13644 	} else {
13645 		/*
13646 		 * Normal command -- add it to the wait queue, then start
13647 		 * transporting commands from the wait queue.
13648 		 */
13649 		sd_add_buf_to_waitq(un, bp);
13650 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
13651 		sd_start_cmds(un, NULL);
13652 	}
13653 
13654 	mutex_exit(SD_MUTEX(un));
13655 
13656 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_core_iostart: exit: bp:0x%p\n", bp);
13657 }
13658 
13659 
13660 /*
13661  *    Function: sd_init_cdb_limits
13662  *
13663  * Description: This is to handle scsi_pkt initialization differences
13664  *		between the driver platforms.
13665  *
13666  *		Legacy behaviors:
13667  *
13668  *		If the block number or the sector count exceeds the
13669  *		capabilities of a Group 0 command, shift over to a
13670  *		Group 1 command. We don't blindly use Group 1
13671  *		commands because a) some drives (CDC Wren IVs) get a
13672  *		bit confused, and b) there is probably a fair amount
13673  *		of speed difference for a target to receive and decode
13674  *		a 10 byte command instead of a 6 byte command.
13675  *
13676  *		The xfer time difference of 6 vs 10 byte CDBs is
13677  *		still significant so this code is still worthwhile.
13678  *		10 byte CDBs are very inefficient with the fas HBA driver
13679  *		and older disks. Each CDB byte took 1 usec with some
13680  *		popular disks.
13681  *
13682  *     Context: Must be called at attach time
13683  */
13684 
13685 static void
13686 sd_init_cdb_limits(struct sd_lun *un)
13687 {
13688 	int hba_cdb_limit;
13689 
13690 	/*
13691 	 * Use CDB_GROUP1 commands for most devices except for
13692 	 * parallel SCSI fixed drives in which case we get better
13693 	 * performance using CDB_GROUP0 commands (where applicable).
13694 	 */
13695 	un->un_mincdb = SD_CDB_GROUP1;
13696 #if !defined(__fibre)
13697 	if (!un->un_f_is_fibre && !un->un_f_cfg_is_atapi && !ISROD(un) &&
13698 	    !un->un_f_has_removable_media) {
13699 		un->un_mincdb = SD_CDB_GROUP0;
13700 	}
13701 #endif
13702 
13703 	/*
13704 	 * Try to read the max-cdb-length supported by HBA.
13705 	 */
13706 	un->un_max_hba_cdb = scsi_ifgetcap(SD_ADDRESS(un), "max-cdb-length", 1);
13707 	if (0 >= un->un_max_hba_cdb) {
13708 		un->un_max_hba_cdb = CDB_GROUP4;
13709 		hba_cdb_limit = SD_CDB_GROUP4;
13710 	} else if (0 < un->un_max_hba_cdb &&
13711 	    un->un_max_hba_cdb < CDB_GROUP1) {
13712 		hba_cdb_limit = SD_CDB_GROUP0;
13713 	} else if (CDB_GROUP1 <= un->un_max_hba_cdb &&
13714 	    un->un_max_hba_cdb < CDB_GROUP5) {
13715 		hba_cdb_limit = SD_CDB_GROUP1;
13716 	} else if (CDB_GROUP5 <= un->un_max_hba_cdb &&
13717 	    un->un_max_hba_cdb < CDB_GROUP4) {
13718 		hba_cdb_limit = SD_CDB_GROUP5;
13719 	} else {
13720 		hba_cdb_limit = SD_CDB_GROUP4;
13721 	}
13722 
13723 	/*
13724 	 * Use CDB_GROUP5 commands for removable devices.  Use CDB_GROUP4
13725 	 * commands for fixed disks unless we are building for a 32 bit
13726 	 * kernel.
13727 	 */
13728 #ifdef _LP64
13729 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13730 	    min(hba_cdb_limit, SD_CDB_GROUP4);
13731 #else
13732 	un->un_maxcdb = (un->un_f_has_removable_media) ? SD_CDB_GROUP5 :
13733 	    min(hba_cdb_limit, SD_CDB_GROUP1);
13734 #endif
13735 
13736 	un->un_status_len = (int)((un->un_f_arq_enabled == TRUE)
13737 	    ? sizeof (struct scsi_arq_status) : 1);
13738 	if (!ISCD(un))
13739 		un->un_cmd_timeout = (ushort_t)sd_io_time;
13740 	un->un_uscsi_timeout = ((ISCD(un)) ? 2 : 1) * un->un_cmd_timeout;
13741 }
13742 
13743 
13744 /*
13745  *    Function: sd_initpkt_for_buf
13746  *
13747  * Description: Allocate and initialize for transport a scsi_pkt struct,
13748  *		based upon the info specified in the given buf struct.
13749  *
13750  *		Assumes the xb_blkno in the request is absolute (ie,
13751  *		relative to the start of the device (NOT partition!).
13752  *		Also assumes that the request is using the native block
13753  *		size of the device (as returned by the READ CAPACITY
13754  *		command).
13755  *
13756  * Return Code: SD_PKT_ALLOC_SUCCESS
13757  *		SD_PKT_ALLOC_FAILURE
13758  *		SD_PKT_ALLOC_FAILURE_NO_DMA
13759  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13760  *
13761  *     Context: Kernel thread and may be called from software interrupt context
13762  *		as part of a sdrunout callback. This function may not block or
13763  *		call routines that block
13764  */
13765 
13766 static int
13767 sd_initpkt_for_buf(struct buf *bp, struct scsi_pkt **pktpp)
13768 {
13769 	struct sd_xbuf	*xp;
13770 	struct scsi_pkt *pktp = NULL;
13771 	struct sd_lun	*un;
13772 	size_t		blockcount;
13773 	daddr_t		startblock;
13774 	int		rval;
13775 	int		cmd_flags;
13776 
13777 	ASSERT(bp != NULL);
13778 	ASSERT(pktpp != NULL);
13779 	xp = SD_GET_XBUF(bp);
13780 	ASSERT(xp != NULL);
13781 	un = SD_GET_UN(bp);
13782 	ASSERT(un != NULL);
13783 	ASSERT(mutex_owned(SD_MUTEX(un)));
13784 	ASSERT(bp->b_resid == 0);
13785 
13786 	SD_TRACE(SD_LOG_IO_CORE, un,
13787 	    "sd_initpkt_for_buf: entry: buf:0x%p\n", bp);
13788 
13789 	mutex_exit(SD_MUTEX(un));
13790 
13791 #if defined(__x86)	/* DMAFREE for x86 only */
13792 	if (xp->xb_pkt_flags & SD_XB_DMA_FREED) {
13793 		/*
13794 		 * Already have a scsi_pkt -- just need DMA resources.
13795 		 * We must recompute the CDB in case the mapping returns
13796 		 * a nonzero pkt_resid.
13797 		 * Note: if this is a portion of a PKT_DMA_PARTIAL transfer
13798 		 * that is being retried, the unmap/remap of the DMA resouces
13799 		 * will result in the entire transfer starting over again
13800 		 * from the very first block.
13801 		 */
13802 		ASSERT(xp->xb_pktp != NULL);
13803 		pktp = xp->xb_pktp;
13804 	} else {
13805 		pktp = NULL;
13806 	}
13807 #endif /* __x86 */
13808 
13809 	startblock = xp->xb_blkno;	/* Absolute block num. */
13810 	blockcount = SD_BYTES2TGTBLOCKS(un, bp->b_bcount);
13811 
13812 	cmd_flags = un->un_pkt_flags | (xp->xb_pkt_flags & SD_XB_INITPKT_MASK);
13813 
13814 	/*
13815 	 * sd_setup_rw_pkt will determine the appropriate CDB group to use,
13816 	 * call scsi_init_pkt, and build the CDB.
13817 	 */
13818 	rval = sd_setup_rw_pkt(un, &pktp, bp,
13819 	    cmd_flags, sdrunout, (caddr_t)un,
13820 	    startblock, blockcount);
13821 
13822 	if (rval == 0) {
13823 		/*
13824 		 * Success.
13825 		 *
13826 		 * If partial DMA is being used and required for this transfer.
13827 		 * set it up here.
13828 		 */
13829 		if ((un->un_pkt_flags & PKT_DMA_PARTIAL) != 0 &&
13830 		    (pktp->pkt_resid != 0)) {
13831 
13832 			/*
13833 			 * Save the CDB length and pkt_resid for the
13834 			 * next xfer
13835 			 */
13836 			xp->xb_dma_resid = pktp->pkt_resid;
13837 
13838 			/* rezero resid */
13839 			pktp->pkt_resid = 0;
13840 
13841 		} else {
13842 			xp->xb_dma_resid = 0;
13843 		}
13844 
13845 		pktp->pkt_flags = un->un_tagflags;
13846 		pktp->pkt_time  = un->un_cmd_timeout;
13847 		pktp->pkt_comp  = sdintr;
13848 
13849 		pktp->pkt_private = bp;
13850 		*pktpp = pktp;
13851 
13852 		SD_TRACE(SD_LOG_IO_CORE, un,
13853 		    "sd_initpkt_for_buf: exit: buf:0x%p\n", bp);
13854 
13855 #if defined(__x86)	/* DMAFREE for x86 only */
13856 		xp->xb_pkt_flags &= ~SD_XB_DMA_FREED;
13857 #endif
13858 
13859 		mutex_enter(SD_MUTEX(un));
13860 		return (SD_PKT_ALLOC_SUCCESS);
13861 
13862 	}
13863 
13864 	/*
13865 	 * SD_PKT_ALLOC_FAILURE is the only expected failure code
13866 	 * from sd_setup_rw_pkt.
13867 	 */
13868 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
13869 
13870 	if (rval == SD_PKT_ALLOC_FAILURE) {
13871 		*pktpp = NULL;
13872 		/*
13873 		 * Set the driver state to RWAIT to indicate the driver
13874 		 * is waiting on resource allocations. The driver will not
13875 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
13876 		 */
13877 		mutex_enter(SD_MUTEX(un));
13878 		New_state(un, SD_STATE_RWAIT);
13879 
13880 		SD_ERROR(SD_LOG_IO_CORE, un,
13881 		    "sd_initpkt_for_buf: No pktp. exit bp:0x%p\n", bp);
13882 
13883 		if ((bp->b_flags & B_ERROR) != 0) {
13884 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
13885 		}
13886 		return (SD_PKT_ALLOC_FAILURE);
13887 	} else {
13888 		/*
13889 		 * PKT_ALLOC_FAILURE_CDB_TOO_SMALL
13890 		 *
13891 		 * This should never happen.  Maybe someone messed with the
13892 		 * kernel's minphys?
13893 		 */
13894 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
13895 		    "Request rejected: too large for CDB: "
13896 		    "lba:0x%08lx  len:0x%08lx\n", startblock, blockcount);
13897 		SD_ERROR(SD_LOG_IO_CORE, un,
13898 		    "sd_initpkt_for_buf: No cp. exit bp:0x%p\n", bp);
13899 		mutex_enter(SD_MUTEX(un));
13900 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
13901 
13902 	}
13903 }
13904 
13905 
13906 /*
13907  *    Function: sd_destroypkt_for_buf
13908  *
13909  * Description: Free the scsi_pkt(9S) for the given bp (buf IO processing).
13910  *
13911  *     Context: Kernel thread or interrupt context
13912  */
13913 
13914 static void
13915 sd_destroypkt_for_buf(struct buf *bp)
13916 {
13917 	ASSERT(bp != NULL);
13918 	ASSERT(SD_GET_UN(bp) != NULL);
13919 
13920 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13921 	    "sd_destroypkt_for_buf: entry: buf:0x%p\n", bp);
13922 
13923 	ASSERT(SD_GET_PKTP(bp) != NULL);
13924 	scsi_destroy_pkt(SD_GET_PKTP(bp));
13925 
13926 	SD_TRACE(SD_LOG_IO_CORE, SD_GET_UN(bp),
13927 	    "sd_destroypkt_for_buf: exit: buf:0x%p\n", bp);
13928 }
13929 
13930 /*
13931  *    Function: sd_setup_rw_pkt
13932  *
13933  * Description: Determines appropriate CDB group for the requested LBA
13934  *		and transfer length, calls scsi_init_pkt, and builds
13935  *		the CDB.  Do not use for partial DMA transfers except
13936  *		for the initial transfer since the CDB size must
13937  *		remain constant.
13938  *
13939  *     Context: Kernel thread and may be called from software interrupt
13940  *		context as part of a sdrunout callback. This function may not
13941  *		block or call routines that block
13942  */
13943 
13944 
13945 int
13946 sd_setup_rw_pkt(struct sd_lun *un,
13947     struct scsi_pkt **pktpp, struct buf *bp, int flags,
13948     int (*callback)(caddr_t), caddr_t callback_arg,
13949     diskaddr_t lba, uint32_t blockcount)
13950 {
13951 	struct scsi_pkt *return_pktp;
13952 	union scsi_cdb *cdbp;
13953 	struct sd_cdbinfo *cp = NULL;
13954 	int i;
13955 
13956 	/*
13957 	 * See which size CDB to use, based upon the request.
13958 	 */
13959 	for (i = un->un_mincdb; i <= un->un_maxcdb; i++) {
13960 
13961 		/*
13962 		 * Check lba and block count against sd_cdbtab limits.
13963 		 * In the partial DMA case, we have to use the same size
13964 		 * CDB for all the transfers.  Check lba + blockcount
13965 		 * against the max LBA so we know that segment of the
13966 		 * transfer can use the CDB we select.
13967 		 */
13968 		if ((lba + blockcount - 1 <= sd_cdbtab[i].sc_maxlba) &&
13969 		    (blockcount <= sd_cdbtab[i].sc_maxlen)) {
13970 
13971 			/*
13972 			 * The command will fit into the CDB type
13973 			 * specified by sd_cdbtab[i].
13974 			 */
13975 			cp = sd_cdbtab + i;
13976 
13977 			/*
13978 			 * Call scsi_init_pkt so we can fill in the
13979 			 * CDB.
13980 			 */
13981 			return_pktp = scsi_init_pkt(SD_ADDRESS(un), *pktpp,
13982 			    bp, cp->sc_grpcode, un->un_status_len, 0,
13983 			    flags, callback, callback_arg);
13984 
13985 			if (return_pktp != NULL) {
13986 
13987 				/*
13988 				 * Return new value of pkt
13989 				 */
13990 				*pktpp = return_pktp;
13991 
13992 				/*
13993 				 * To be safe, zero the CDB insuring there is
13994 				 * no leftover data from a previous command.
13995 				 */
13996 				bzero(return_pktp->pkt_cdbp, cp->sc_grpcode);
13997 
13998 				/*
13999 				 * Handle partial DMA mapping
14000 				 */
14001 				if (return_pktp->pkt_resid != 0) {
14002 
14003 					/*
14004 					 * Not going to xfer as many blocks as
14005 					 * originally expected
14006 					 */
14007 					blockcount -=
14008 					    SD_BYTES2TGTBLOCKS(un,
14009 					    return_pktp->pkt_resid);
14010 				}
14011 
14012 				cdbp = (union scsi_cdb *)return_pktp->pkt_cdbp;
14013 
14014 				/*
14015 				 * Set command byte based on the CDB
14016 				 * type we matched.
14017 				 */
14018 				cdbp->scc_cmd = cp->sc_grpmask |
14019 				    ((bp->b_flags & B_READ) ?
14020 				    SCMD_READ : SCMD_WRITE);
14021 
14022 				SD_FILL_SCSI1_LUN(un, return_pktp);
14023 
14024 				/*
14025 				 * Fill in LBA and length
14026 				 */
14027 				ASSERT((cp->sc_grpcode == CDB_GROUP1) ||
14028 				    (cp->sc_grpcode == CDB_GROUP4) ||
14029 				    (cp->sc_grpcode == CDB_GROUP0) ||
14030 				    (cp->sc_grpcode == CDB_GROUP5));
14031 
14032 				if (cp->sc_grpcode == CDB_GROUP1) {
14033 					FORMG1ADDR(cdbp, lba);
14034 					FORMG1COUNT(cdbp, blockcount);
14035 					return (0);
14036 				} else if (cp->sc_grpcode == CDB_GROUP4) {
14037 					FORMG4LONGADDR(cdbp, lba);
14038 					FORMG4COUNT(cdbp, blockcount);
14039 					return (0);
14040 				} else if (cp->sc_grpcode == CDB_GROUP0) {
14041 					FORMG0ADDR(cdbp, lba);
14042 					FORMG0COUNT(cdbp, blockcount);
14043 					return (0);
14044 				} else if (cp->sc_grpcode == CDB_GROUP5) {
14045 					FORMG5ADDR(cdbp, lba);
14046 					FORMG5COUNT(cdbp, blockcount);
14047 					return (0);
14048 				}
14049 
14050 				/*
14051 				 * It should be impossible to not match one
14052 				 * of the CDB types above, so we should never
14053 				 * reach this point.  Set the CDB command byte
14054 				 * to test-unit-ready to avoid writing
14055 				 * to somewhere we don't intend.
14056 				 */
14057 				cdbp->scc_cmd = SCMD_TEST_UNIT_READY;
14058 				return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
14059 			} else {
14060 				/*
14061 				 * Couldn't get scsi_pkt
14062 				 */
14063 				return (SD_PKT_ALLOC_FAILURE);
14064 			}
14065 		}
14066 	}
14067 
14068 	/*
14069 	 * None of the available CDB types were suitable.  This really
14070 	 * should never happen:  on a 64 bit system we support
14071 	 * READ16/WRITE16 which will hold an entire 64 bit disk address
14072 	 * and on a 32 bit system we will refuse to bind to a device
14073 	 * larger than 2TB so addresses will never be larger than 32 bits.
14074 	 */
14075 	return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
14076 }
14077 
14078 /*
14079  *    Function: sd_setup_next_rw_pkt
14080  *
14081  * Description: Setup packet for partial DMA transfers, except for the
14082  *		initial transfer.  sd_setup_rw_pkt should be used for
14083  *		the initial transfer.
14084  *
14085  *     Context: Kernel thread and may be called from interrupt context.
14086  */
14087 
14088 int
14089 sd_setup_next_rw_pkt(struct sd_lun *un,
14090     struct scsi_pkt *pktp, struct buf *bp,
14091     diskaddr_t lba, uint32_t blockcount)
14092 {
14093 	uchar_t com;
14094 	union scsi_cdb *cdbp;
14095 	uchar_t cdb_group_id;
14096 
14097 	ASSERT(pktp != NULL);
14098 	ASSERT(pktp->pkt_cdbp != NULL);
14099 
14100 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
14101 	com = cdbp->scc_cmd;
14102 	cdb_group_id = CDB_GROUPID(com);
14103 
14104 	ASSERT((cdb_group_id == CDB_GROUPID_0) ||
14105 	    (cdb_group_id == CDB_GROUPID_1) ||
14106 	    (cdb_group_id == CDB_GROUPID_4) ||
14107 	    (cdb_group_id == CDB_GROUPID_5));
14108 
14109 	/*
14110 	 * Move pkt to the next portion of the xfer.
14111 	 * func is NULL_FUNC so we do not have to release
14112 	 * the disk mutex here.
14113 	 */
14114 	if (scsi_init_pkt(SD_ADDRESS(un), pktp, bp, 0, 0, 0, 0,
14115 	    NULL_FUNC, NULL) == pktp) {
14116 		/* Success.  Handle partial DMA */
14117 		if (pktp->pkt_resid != 0) {
14118 			blockcount -=
14119 			    SD_BYTES2TGTBLOCKS(un, pktp->pkt_resid);
14120 		}
14121 
14122 		cdbp->scc_cmd = com;
14123 		SD_FILL_SCSI1_LUN(un, pktp);
14124 		if (cdb_group_id == CDB_GROUPID_1) {
14125 			FORMG1ADDR(cdbp, lba);
14126 			FORMG1COUNT(cdbp, blockcount);
14127 			return (0);
14128 		} else if (cdb_group_id == CDB_GROUPID_4) {
14129 			FORMG4LONGADDR(cdbp, lba);
14130 			FORMG4COUNT(cdbp, blockcount);
14131 			return (0);
14132 		} else if (cdb_group_id == CDB_GROUPID_0) {
14133 			FORMG0ADDR(cdbp, lba);
14134 			FORMG0COUNT(cdbp, blockcount);
14135 			return (0);
14136 		} else if (cdb_group_id == CDB_GROUPID_5) {
14137 			FORMG5ADDR(cdbp, lba);
14138 			FORMG5COUNT(cdbp, blockcount);
14139 			return (0);
14140 		}
14141 
14142 		/* Unreachable */
14143 		return (SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL);
14144 	}
14145 
14146 	/*
14147 	 * Error setting up next portion of cmd transfer.
14148 	 * Something is definitely very wrong and this
14149 	 * should not happen.
14150 	 */
14151 	return (SD_PKT_ALLOC_FAILURE);
14152 }
14153 
14154 /*
14155  *    Function: sd_initpkt_for_uscsi
14156  *
14157  * Description: Allocate and initialize for transport a scsi_pkt struct,
14158  *		based upon the info specified in the given uscsi_cmd struct.
14159  *
14160  * Return Code: SD_PKT_ALLOC_SUCCESS
14161  *		SD_PKT_ALLOC_FAILURE
14162  *		SD_PKT_ALLOC_FAILURE_NO_DMA
14163  *		SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL
14164  *
14165  *     Context: Kernel thread and may be called from software interrupt context
14166  *		as part of a sdrunout callback. This function may not block or
14167  *		call routines that block
14168  */
14169 
14170 static int
14171 sd_initpkt_for_uscsi(struct buf *bp, struct scsi_pkt **pktpp)
14172 {
14173 	struct uscsi_cmd *uscmd;
14174 	struct sd_xbuf	*xp;
14175 	struct scsi_pkt	*pktp;
14176 	struct sd_lun	*un;
14177 	uint32_t	flags = 0;
14178 
14179 	ASSERT(bp != NULL);
14180 	ASSERT(pktpp != NULL);
14181 	xp = SD_GET_XBUF(bp);
14182 	ASSERT(xp != NULL);
14183 	un = SD_GET_UN(bp);
14184 	ASSERT(un != NULL);
14185 	ASSERT(mutex_owned(SD_MUTEX(un)));
14186 
14187 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14188 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14189 	ASSERT(uscmd != NULL);
14190 
14191 	SD_TRACE(SD_LOG_IO_CORE, un,
14192 	    "sd_initpkt_for_uscsi: entry: buf:0x%p\n", bp);
14193 
14194 	/*
14195 	 * Allocate the scsi_pkt for the command.
14196 	 *
14197 	 * Note: If PKT_DMA_PARTIAL flag is set, scsi_vhci binds a path
14198 	 *	 during scsi_init_pkt time and will continue to use the
14199 	 *	 same path as long as the same scsi_pkt is used without
14200 	 *	 intervening scsi_dmafree(). Since uscsi command does
14201 	 *	 not call scsi_dmafree() before retry failed command, it
14202 	 *	 is necessary to make sure PKT_DMA_PARTIAL flag is NOT
14203 	 *	 set such that scsi_vhci can use other available path for
14204 	 *	 retry. Besides, ucsci command does not allow DMA breakup,
14205 	 *	 so there is no need to set PKT_DMA_PARTIAL flag.
14206 	 *
14207 	 *	 More fundamentally, we can't support breaking up this DMA into
14208 	 *	 multiple windows on x86. There is, in general, no guarantee
14209 	 *	 that arbitrary SCSI commands are idempotent, which is required
14210 	 *	 if we want to use multiple windows for a given command.
14211 	 */
14212 	if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
14213 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
14214 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
14215 		    ((int)(uscmd->uscsi_rqlen) + sizeof (struct scsi_arq_status)
14216 		    - sizeof (struct scsi_extended_sense)), 0,
14217 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL) | PKT_XARQ,
14218 		    sdrunout, (caddr_t)un);
14219 	} else {
14220 		pktp = scsi_init_pkt(SD_ADDRESS(un), NULL,
14221 		    ((bp->b_bcount != 0) ? bp : NULL), uscmd->uscsi_cdblen,
14222 		    sizeof (struct scsi_arq_status), 0,
14223 		    (un->un_pkt_flags & ~PKT_DMA_PARTIAL),
14224 		    sdrunout, (caddr_t)un);
14225 	}
14226 
14227 	if (pktp == NULL) {
14228 		*pktpp = NULL;
14229 		/*
14230 		 * Set the driver state to RWAIT to indicate the driver
14231 		 * is waiting on resource allocations. The driver will not
14232 		 * suspend, pm_suspend, or detatch while the state is RWAIT.
14233 		 */
14234 		New_state(un, SD_STATE_RWAIT);
14235 
14236 		SD_ERROR(SD_LOG_IO_CORE, un,
14237 		    "sd_initpkt_for_uscsi: No pktp. exit bp:0x%p\n", bp);
14238 
14239 		if ((bp->b_flags & B_ERROR) != 0) {
14240 			return (SD_PKT_ALLOC_FAILURE_NO_DMA);
14241 		}
14242 		return (SD_PKT_ALLOC_FAILURE);
14243 	}
14244 
14245 	/*
14246 	 * We do not do DMA breakup for USCSI commands, so return failure
14247 	 * here if all the needed DMA resources were not allocated.
14248 	 */
14249 	if ((un->un_pkt_flags & PKT_DMA_PARTIAL) &&
14250 	    (bp->b_bcount != 0) && (pktp->pkt_resid != 0)) {
14251 		scsi_destroy_pkt(pktp);
14252 		SD_ERROR(SD_LOG_IO_CORE, un, "sd_initpkt_for_uscsi: "
14253 		    "No partial DMA for USCSI. exit: buf:0x%p\n", bp);
14254 		return (SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL);
14255 	}
14256 
14257 	/* Init the cdb from the given uscsi struct */
14258 	(void) scsi_setup_cdb((union scsi_cdb *)pktp->pkt_cdbp,
14259 	    uscmd->uscsi_cdb[0], 0, 0, 0);
14260 
14261 	SD_FILL_SCSI1_LUN(un, pktp);
14262 
14263 	/*
14264 	 * Set up the optional USCSI flags. See the uscsi(4I) man page
14265 	 * for listing of the supported flags.
14266 	 */
14267 
14268 	if (uscmd->uscsi_flags & USCSI_SILENT) {
14269 		flags |= FLAG_SILENT;
14270 	}
14271 
14272 	if (uscmd->uscsi_flags & USCSI_DIAGNOSE) {
14273 		flags |= FLAG_DIAGNOSE;
14274 	}
14275 
14276 	if (uscmd->uscsi_flags & USCSI_ISOLATE) {
14277 		flags |= FLAG_ISOLATE;
14278 	}
14279 
14280 	if (un->un_f_is_fibre == FALSE) {
14281 		if (uscmd->uscsi_flags & USCSI_RENEGOT) {
14282 			flags |= FLAG_RENEGOTIATE_WIDE_SYNC;
14283 		}
14284 	}
14285 
14286 	/*
14287 	 * Set the pkt flags here so we save time later.
14288 	 * Note: These flags are NOT in the uscsi man page!!!
14289 	 */
14290 	if (uscmd->uscsi_flags & USCSI_HEAD) {
14291 		flags |= FLAG_HEAD;
14292 	}
14293 
14294 	if (uscmd->uscsi_flags & USCSI_NOINTR) {
14295 		flags |= FLAG_NOINTR;
14296 	}
14297 
14298 	/*
14299 	 * For tagged queueing, things get a bit complicated.
14300 	 * Check first for head of queue and last for ordered queue.
14301 	 * If neither head nor order, use the default driver tag flags.
14302 	 */
14303 	if ((uscmd->uscsi_flags & USCSI_NOTAG) == 0) {
14304 		if (uscmd->uscsi_flags & USCSI_HTAG) {
14305 			flags |= FLAG_HTAG;
14306 		} else if (uscmd->uscsi_flags & USCSI_OTAG) {
14307 			flags |= FLAG_OTAG;
14308 		} else {
14309 			flags |= un->un_tagflags & FLAG_TAGMASK;
14310 		}
14311 	}
14312 
14313 	if (uscmd->uscsi_flags & USCSI_NODISCON) {
14314 		flags = (flags & ~FLAG_TAGMASK) | FLAG_NODISCON;
14315 	}
14316 
14317 	pktp->pkt_flags = flags;
14318 
14319 	/* Transfer uscsi information to scsi_pkt */
14320 	(void) scsi_uscsi_pktinit(uscmd, pktp);
14321 
14322 	/* Copy the caller's CDB into the pkt... */
14323 	bcopy(uscmd->uscsi_cdb, pktp->pkt_cdbp, uscmd->uscsi_cdblen);
14324 
14325 	if (uscmd->uscsi_timeout == 0) {
14326 		pktp->pkt_time = un->un_uscsi_timeout;
14327 	} else {
14328 		pktp->pkt_time = uscmd->uscsi_timeout;
14329 	}
14330 
14331 	/* need it later to identify USCSI request in sdintr */
14332 	xp->xb_pkt_flags |= SD_XB_USCSICMD;
14333 
14334 	xp->xb_sense_resid = uscmd->uscsi_rqresid;
14335 
14336 	pktp->pkt_private = bp;
14337 	pktp->pkt_comp = sdintr;
14338 	*pktpp = pktp;
14339 
14340 	SD_TRACE(SD_LOG_IO_CORE, un,
14341 	    "sd_initpkt_for_uscsi: exit: buf:0x%p\n", bp);
14342 
14343 	return (SD_PKT_ALLOC_SUCCESS);
14344 }
14345 
14346 
14347 /*
14348  *    Function: sd_destroypkt_for_uscsi
14349  *
14350  * Description: Free the scsi_pkt(9S) struct for the given bp, for uscsi
14351  *		IOs.. Also saves relevant info into the associated uscsi_cmd
14352  *		struct.
14353  *
14354  *     Context: May be called under interrupt context
14355  */
14356 
14357 static void
14358 sd_destroypkt_for_uscsi(struct buf *bp)
14359 {
14360 	struct uscsi_cmd *uscmd;
14361 	struct sd_xbuf	*xp;
14362 	struct scsi_pkt	*pktp;
14363 	struct sd_lun	*un;
14364 	struct sd_uscsi_info *suip;
14365 
14366 	ASSERT(bp != NULL);
14367 	xp = SD_GET_XBUF(bp);
14368 	ASSERT(xp != NULL);
14369 	un = SD_GET_UN(bp);
14370 	ASSERT(un != NULL);
14371 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14372 	pktp = SD_GET_PKTP(bp);
14373 	ASSERT(pktp != NULL);
14374 
14375 	SD_TRACE(SD_LOG_IO_CORE, un,
14376 	    "sd_destroypkt_for_uscsi: entry: buf:0x%p\n", bp);
14377 
14378 	/* The pointer to the uscsi_cmd struct is expected in xb_pktinfo */
14379 	uscmd = (struct uscsi_cmd *)xp->xb_pktinfo;
14380 	ASSERT(uscmd != NULL);
14381 
14382 	/* Save the status and the residual into the uscsi_cmd struct */
14383 	uscmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
14384 	uscmd->uscsi_resid  = bp->b_resid;
14385 
14386 	/* Transfer scsi_pkt information to uscsi */
14387 	(void) scsi_uscsi_pktfini(pktp, uscmd);
14388 
14389 	/*
14390 	 * If enabled, copy any saved sense data into the area specified
14391 	 * by the uscsi command.
14392 	 */
14393 	if (((uscmd->uscsi_flags & USCSI_RQENABLE) != 0) &&
14394 	    (uscmd->uscsi_rqlen != 0) && (uscmd->uscsi_rqbuf != NULL)) {
14395 		/*
14396 		 * Note: uscmd->uscsi_rqbuf should always point to a buffer
14397 		 * at least SENSE_LENGTH bytes in size (see sd_send_scsi_cmd())
14398 		 */
14399 		uscmd->uscsi_rqstatus = xp->xb_sense_status;
14400 		uscmd->uscsi_rqresid  = xp->xb_sense_resid;
14401 		if (uscmd->uscsi_rqlen > SENSE_LENGTH) {
14402 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
14403 			    MAX_SENSE_LENGTH);
14404 		} else {
14405 			bcopy(xp->xb_sense_data, uscmd->uscsi_rqbuf,
14406 			    SENSE_LENGTH);
14407 		}
14408 	}
14409 	/*
14410 	 * The following assignments are for SCSI FMA.
14411 	 */
14412 	ASSERT(xp->xb_private != NULL);
14413 	suip = (struct sd_uscsi_info *)xp->xb_private;
14414 	suip->ui_pkt_reason = pktp->pkt_reason;
14415 	suip->ui_pkt_state = pktp->pkt_state;
14416 	suip->ui_pkt_statistics = pktp->pkt_statistics;
14417 	suip->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
14418 
14419 	/* We are done with the scsi_pkt; free it now */
14420 	ASSERT(SD_GET_PKTP(bp) != NULL);
14421 	scsi_destroy_pkt(SD_GET_PKTP(bp));
14422 
14423 	SD_TRACE(SD_LOG_IO_CORE, un,
14424 	    "sd_destroypkt_for_uscsi: exit: buf:0x%p\n", bp);
14425 }
14426 
14427 
14428 /*
14429  *    Function: sd_bioclone_alloc
14430  *
14431  * Description: Allocate a buf(9S) and init it as per the given buf
14432  *		and the various arguments.  The associated sd_xbuf
14433  *		struct is (nearly) duplicated.  The struct buf *bp
14434  *		argument is saved in new_xp->xb_private.
14435  *
14436  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14437  *		datalen - size of data area for the shadow bp
14438  *		blkno - starting LBA
14439  *		func - function pointer for b_iodone in the shadow buf. (May
14440  *			be NULL if none.)
14441  *
14442  * Return Code: Pointer to allocates buf(9S) struct
14443  *
14444  *     Context: Can sleep.
14445  */
14446 
14447 static struct buf *
14448 sd_bioclone_alloc(struct buf *bp, size_t datalen, daddr_t blkno,
14449     int (*func)(struct buf *))
14450 {
14451 	struct	sd_lun	*un;
14452 	struct	sd_xbuf	*xp;
14453 	struct	sd_xbuf	*new_xp;
14454 	struct	buf	*new_bp;
14455 
14456 	ASSERT(bp != NULL);
14457 	xp = SD_GET_XBUF(bp);
14458 	ASSERT(xp != NULL);
14459 	un = SD_GET_UN(bp);
14460 	ASSERT(un != NULL);
14461 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14462 
14463 	new_bp = bioclone(bp, 0, datalen, SD_GET_DEV(un), blkno, func,
14464 	    NULL, KM_SLEEP);
14465 
14466 	new_bp->b_lblkno	= blkno;
14467 
14468 	/*
14469 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14470 	 * original xbuf into it.
14471 	 */
14472 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14473 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14474 
14475 	/*
14476 	 * The given bp is automatically saved in the xb_private member
14477 	 * of the new xbuf.  Callers are allowed to depend on this.
14478 	 */
14479 	new_xp->xb_private = bp;
14480 
14481 	new_bp->b_private  = new_xp;
14482 
14483 	return (new_bp);
14484 }
14485 
14486 /*
14487  *    Function: sd_shadow_buf_alloc
14488  *
14489  * Description: Allocate a buf(9S) and init it as per the given buf
14490  *		and the various arguments.  The associated sd_xbuf
14491  *		struct is (nearly) duplicated.  The struct buf *bp
14492  *		argument is saved in new_xp->xb_private.
14493  *
14494  *   Arguments: bp - ptr the the buf(9S) to be "shadowed"
14495  *		datalen - size of data area for the shadow bp
14496  *		bflags - B_READ or B_WRITE (pseudo flag)
14497  *		blkno - starting LBA
14498  *		func - function pointer for b_iodone in the shadow buf. (May
14499  *			be NULL if none.)
14500  *
14501  * Return Code: Pointer to allocates buf(9S) struct
14502  *
14503  *     Context: Can sleep.
14504  */
14505 
14506 static struct buf *
14507 sd_shadow_buf_alloc(struct buf *bp, size_t datalen, uint_t bflags,
14508     daddr_t blkno, int (*func)(struct buf *))
14509 {
14510 	struct	sd_lun	*un;
14511 	struct	sd_xbuf	*xp;
14512 	struct	sd_xbuf	*new_xp;
14513 	struct	buf	*new_bp;
14514 
14515 	ASSERT(bp != NULL);
14516 	xp = SD_GET_XBUF(bp);
14517 	ASSERT(xp != NULL);
14518 	un = SD_GET_UN(bp);
14519 	ASSERT(un != NULL);
14520 	ASSERT(!mutex_owned(SD_MUTEX(un)));
14521 
14522 	if (bp->b_flags & (B_PAGEIO | B_PHYS)) {
14523 		bp_mapin(bp);
14524 	}
14525 
14526 	bflags &= (B_READ | B_WRITE);
14527 #if defined(__x86)
14528 	new_bp = getrbuf(KM_SLEEP);
14529 	new_bp->b_un.b_addr = kmem_zalloc(datalen, KM_SLEEP);
14530 	new_bp->b_bcount = datalen;
14531 	new_bp->b_flags = bflags |
14532 	    (bp->b_flags & ~(B_PAGEIO | B_PHYS | B_REMAPPED | B_SHADOW));
14533 #else
14534 	new_bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), NULL,
14535 	    datalen, bflags, SLEEP_FUNC, NULL);
14536 #endif
14537 	new_bp->av_forw	= NULL;
14538 	new_bp->av_back	= NULL;
14539 	new_bp->b_dev	= bp->b_dev;
14540 	new_bp->b_blkno	= blkno;
14541 	new_bp->b_iodone = func;
14542 	new_bp->b_edev	= bp->b_edev;
14543 	new_bp->b_resid	= 0;
14544 
14545 	/* We need to preserve the B_FAILFAST flag */
14546 	if (bp->b_flags & B_FAILFAST) {
14547 		new_bp->b_flags |= B_FAILFAST;
14548 	}
14549 
14550 	/*
14551 	 * Allocate an xbuf for the shadow bp and copy the contents of the
14552 	 * original xbuf into it.
14553 	 */
14554 	new_xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
14555 	bcopy(xp, new_xp, sizeof (struct sd_xbuf));
14556 
14557 	/* Need later to copy data between the shadow buf & original buf! */
14558 	new_xp->xb_pkt_flags |= PKT_CONSISTENT;
14559 
14560 	/*
14561 	 * The given bp is automatically saved in the xb_private member
14562 	 * of the new xbuf.  Callers are allowed to depend on this.
14563 	 */
14564 	new_xp->xb_private = bp;
14565 
14566 	new_bp->b_private  = new_xp;
14567 
14568 	return (new_bp);
14569 }
14570 
14571 /*
14572  *    Function: sd_bioclone_free
14573  *
14574  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations
14575  *		in the larger than partition operation.
14576  *
14577  *     Context: May be called under interrupt context
14578  */
14579 
14580 static void
14581 sd_bioclone_free(struct buf *bp)
14582 {
14583 	struct sd_xbuf	*xp;
14584 
14585 	ASSERT(bp != NULL);
14586 	xp = SD_GET_XBUF(bp);
14587 	ASSERT(xp != NULL);
14588 
14589 	/*
14590 	 * Call bp_mapout() before freeing the buf,  in case a lower
14591 	 * layer or HBA  had done a bp_mapin().  we must do this here
14592 	 * as we are the "originator" of the shadow buf.
14593 	 */
14594 	bp_mapout(bp);
14595 
14596 	/*
14597 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14598 	 * never gets confused by a stale value in this field. (Just a little
14599 	 * extra defensiveness here.)
14600 	 */
14601 	bp->b_iodone = NULL;
14602 
14603 	freerbuf(bp);
14604 
14605 	kmem_free(xp, sizeof (struct sd_xbuf));
14606 }
14607 
14608 /*
14609  *    Function: sd_shadow_buf_free
14610  *
14611  * Description: Deallocate a buf(9S) that was used for 'shadow' IO operations.
14612  *
14613  *     Context: May be called under interrupt context
14614  */
14615 
14616 static void
14617 sd_shadow_buf_free(struct buf *bp)
14618 {
14619 	struct sd_xbuf	*xp;
14620 
14621 	ASSERT(bp != NULL);
14622 	xp = SD_GET_XBUF(bp);
14623 	ASSERT(xp != NULL);
14624 
14625 #if defined(__sparc)
14626 	/*
14627 	 * Call bp_mapout() before freeing the buf,  in case a lower
14628 	 * layer or HBA  had done a bp_mapin().  we must do this here
14629 	 * as we are the "originator" of the shadow buf.
14630 	 */
14631 	bp_mapout(bp);
14632 #endif
14633 
14634 	/*
14635 	 * Null out b_iodone before freeing the bp, to ensure that the driver
14636 	 * never gets confused by a stale value in this field. (Just a little
14637 	 * extra defensiveness here.)
14638 	 */
14639 	bp->b_iodone = NULL;
14640 
14641 #if defined(__x86)
14642 	kmem_free(bp->b_un.b_addr, bp->b_bcount);
14643 	freerbuf(bp);
14644 #else
14645 	scsi_free_consistent_buf(bp);
14646 #endif
14647 
14648 	kmem_free(xp, sizeof (struct sd_xbuf));
14649 }
14650 
14651 
14652 /*
14653  *    Function: sd_print_transport_rejected_message
14654  *
14655  * Description: This implements the ludicrously complex rules for printing
14656  *		a "transport rejected" message.  This is to address the
14657  *		specific problem of having a flood of this error message
14658  *		produced when a failover occurs.
14659  *
14660  *     Context: Any.
14661  */
14662 
14663 static void
14664 sd_print_transport_rejected_message(struct sd_lun *un, struct sd_xbuf *xp,
14665     int code)
14666 {
14667 	ASSERT(un != NULL);
14668 	ASSERT(mutex_owned(SD_MUTEX(un)));
14669 	ASSERT(xp != NULL);
14670 
14671 	/*
14672 	 * Print the "transport rejected" message under the following
14673 	 * conditions:
14674 	 *
14675 	 * - Whenever the SD_LOGMASK_DIAG bit of sd_level_mask is set
14676 	 * - The error code from scsi_transport() is NOT a TRAN_FATAL_ERROR.
14677 	 * - If the error code IS a TRAN_FATAL_ERROR, then the message is
14678 	 *   printed the FIRST time a TRAN_FATAL_ERROR is returned from
14679 	 *   scsi_transport(9F) (which indicates that the target might have
14680 	 *   gone off-line).  This uses the un->un_tran_fatal_count
14681 	 *   count, which is incremented whenever a TRAN_FATAL_ERROR is
14682 	 *   received, and reset to zero whenver a TRAN_ACCEPT is returned
14683 	 *   from scsi_transport().
14684 	 *
14685 	 * The FLAG_SILENT in the scsi_pkt must be CLEARED in ALL of
14686 	 * the preceeding cases in order for the message to be printed.
14687 	 */
14688 	if (((xp->xb_pktp->pkt_flags & FLAG_SILENT) == 0) &&
14689 	    (SD_FM_LOG(un) == SD_FM_LOG_NSUP)) {
14690 		if ((sd_level_mask & SD_LOGMASK_DIAG) ||
14691 		    (code != TRAN_FATAL_ERROR) ||
14692 		    (un->un_tran_fatal_count == 1)) {
14693 			switch (code) {
14694 			case TRAN_BADPKT:
14695 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14696 				    "transport rejected bad packet\n");
14697 				break;
14698 			case TRAN_FATAL_ERROR:
14699 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14700 				    "transport rejected fatal error\n");
14701 				break;
14702 			default:
14703 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
14704 				    "transport rejected (%d)\n", code);
14705 				break;
14706 			}
14707 		}
14708 	}
14709 }
14710 
14711 
14712 /*
14713  *    Function: sd_add_buf_to_waitq
14714  *
14715  * Description: Add the given buf(9S) struct to the wait queue for the
14716  *		instance.  If sorting is enabled, then the buf is added
14717  *		to the queue via an elevator sort algorithm (a la
14718  *		disksort(9F)).  The SD_GET_BLKNO(bp) is used as the sort key.
14719  *		If sorting is not enabled, then the buf is just added
14720  *		to the end of the wait queue.
14721  *
14722  * Return Code: void
14723  *
14724  *     Context: Does not sleep/block, therefore technically can be called
14725  *		from any context.  However if sorting is enabled then the
14726  *		execution time is indeterminate, and may take long if
14727  *		the wait queue grows large.
14728  */
14729 
14730 static void
14731 sd_add_buf_to_waitq(struct sd_lun *un, struct buf *bp)
14732 {
14733 	struct buf *ap;
14734 
14735 	ASSERT(bp != NULL);
14736 	ASSERT(un != NULL);
14737 	ASSERT(mutex_owned(SD_MUTEX(un)));
14738 
14739 	/* If the queue is empty, add the buf as the only entry & return. */
14740 	if (un->un_waitq_headp == NULL) {
14741 		ASSERT(un->un_waitq_tailp == NULL);
14742 		un->un_waitq_headp = un->un_waitq_tailp = bp;
14743 		bp->av_forw = NULL;
14744 		return;
14745 	}
14746 
14747 	ASSERT(un->un_waitq_tailp != NULL);
14748 
14749 	/*
14750 	 * If sorting is disabled, just add the buf to the tail end of
14751 	 * the wait queue and return.
14752 	 */
14753 	if (un->un_f_disksort_disabled || un->un_f_enable_rmw) {
14754 		un->un_waitq_tailp->av_forw = bp;
14755 		un->un_waitq_tailp = bp;
14756 		bp->av_forw = NULL;
14757 		return;
14758 	}
14759 
14760 	/*
14761 	 * Sort thru the list of requests currently on the wait queue
14762 	 * and add the new buf request at the appropriate position.
14763 	 *
14764 	 * The un->un_waitq_headp is an activity chain pointer on which
14765 	 * we keep two queues, sorted in ascending SD_GET_BLKNO() order. The
14766 	 * first queue holds those requests which are positioned after
14767 	 * the current SD_GET_BLKNO() (in the first request); the second holds
14768 	 * requests which came in after their SD_GET_BLKNO() number was passed.
14769 	 * Thus we implement a one way scan, retracting after reaching
14770 	 * the end of the drive to the first request on the second
14771 	 * queue, at which time it becomes the first queue.
14772 	 * A one-way scan is natural because of the way UNIX read-ahead
14773 	 * blocks are allocated.
14774 	 *
14775 	 * If we lie after the first request, then we must locate the
14776 	 * second request list and add ourselves to it.
14777 	 */
14778 	ap = un->un_waitq_headp;
14779 	if (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap)) {
14780 		while (ap->av_forw != NULL) {
14781 			/*
14782 			 * Look for an "inversion" in the (normally
14783 			 * ascending) block numbers. This indicates
14784 			 * the start of the second request list.
14785 			 */
14786 			if (SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) {
14787 				/*
14788 				 * Search the second request list for the
14789 				 * first request at a larger block number.
14790 				 * We go before that; however if there is
14791 				 * no such request, we go at the end.
14792 				 */
14793 				do {
14794 					if (SD_GET_BLKNO(bp) <
14795 					    SD_GET_BLKNO(ap->av_forw)) {
14796 						goto insert;
14797 					}
14798 					ap = ap->av_forw;
14799 				} while (ap->av_forw != NULL);
14800 				goto insert;		/* after last */
14801 			}
14802 			ap = ap->av_forw;
14803 		}
14804 
14805 		/*
14806 		 * No inversions... we will go after the last, and
14807 		 * be the first request in the second request list.
14808 		 */
14809 		goto insert;
14810 	}
14811 
14812 	/*
14813 	 * Request is at/after the current request...
14814 	 * sort in the first request list.
14815 	 */
14816 	while (ap->av_forw != NULL) {
14817 		/*
14818 		 * We want to go after the current request (1) if
14819 		 * there is an inversion after it (i.e. it is the end
14820 		 * of the first request list), or (2) if the next
14821 		 * request is a larger block no. than our request.
14822 		 */
14823 		if ((SD_GET_BLKNO(ap->av_forw) < SD_GET_BLKNO(ap)) ||
14824 		    (SD_GET_BLKNO(bp) < SD_GET_BLKNO(ap->av_forw))) {
14825 			goto insert;
14826 		}
14827 		ap = ap->av_forw;
14828 	}
14829 
14830 	/*
14831 	 * Neither a second list nor a larger request, therefore
14832 	 * we go at the end of the first list (which is the same
14833 	 * as the end of the whole schebang).
14834 	 */
14835 insert:
14836 	bp->av_forw = ap->av_forw;
14837 	ap->av_forw = bp;
14838 
14839 	/*
14840 	 * If we inserted onto the tail end of the waitq, make sure the
14841 	 * tail pointer is updated.
14842 	 */
14843 	if (ap == un->un_waitq_tailp) {
14844 		un->un_waitq_tailp = bp;
14845 	}
14846 }
14847 
14848 
14849 /*
14850  *    Function: sd_start_cmds
14851  *
14852  * Description: Remove and transport cmds from the driver queues.
14853  *
14854  *   Arguments: un - pointer to the unit (soft state) struct for the target.
14855  *
14856  *		immed_bp - ptr to a buf to be transported immediately. Only
14857  *		the immed_bp is transported; bufs on the waitq are not
14858  *		processed and the un_retry_bp is not checked.  If immed_bp is
14859  *		NULL, then normal queue processing is performed.
14860  *
14861  *     Context: May be called from kernel thread context, interrupt context,
14862  *		or runout callback context. This function may not block or
14863  *		call routines that block.
14864  */
14865 
14866 static void
14867 sd_start_cmds(struct sd_lun *un, struct buf *immed_bp)
14868 {
14869 	struct	sd_xbuf	*xp;
14870 	struct	buf	*bp;
14871 	void	(*statp)(kstat_io_t *);
14872 #if defined(__x86)	/* DMAFREE for x86 only */
14873 	void	(*saved_statp)(kstat_io_t *);
14874 #endif
14875 	int	rval;
14876 	struct sd_fm_internal *sfip = NULL;
14877 
14878 	ASSERT(un != NULL);
14879 	ASSERT(mutex_owned(SD_MUTEX(un)));
14880 	ASSERT(un->un_ncmds_in_transport >= 0);
14881 	ASSERT(un->un_throttle >= 0);
14882 
14883 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: entry\n");
14884 
14885 	do {
14886 #if defined(__x86)	/* DMAFREE for x86 only */
14887 		saved_statp = NULL;
14888 #endif
14889 
14890 		/*
14891 		 * If we are syncing or dumping, fail the command to
14892 		 * avoid recursively calling back into scsi_transport().
14893 		 * The dump I/O itself uses a separate code path so this
14894 		 * only prevents non-dump I/O from being sent while dumping.
14895 		 * File system sync takes place before dumping begins.
14896 		 * During panic, filesystem I/O is allowed provided
14897 		 * un_in_callback is <= 1.  This is to prevent recursion
14898 		 * such as sd_start_cmds -> scsi_transport -> sdintr ->
14899 		 * sd_start_cmds and so on.  See panic.c for more information
14900 		 * about the states the system can be in during panic.
14901 		 */
14902 		if ((un->un_state == SD_STATE_DUMPING) ||
14903 		    (ddi_in_panic() && (un->un_in_callback > 1))) {
14904 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14905 			    "sd_start_cmds: panicking\n");
14906 			goto exit;
14907 		}
14908 
14909 		if ((bp = immed_bp) != NULL) {
14910 			/*
14911 			 * We have a bp that must be transported immediately.
14912 			 * It's OK to transport the immed_bp here without doing
14913 			 * the throttle limit check because the immed_bp is
14914 			 * always used in a retry/recovery case. This means
14915 			 * that we know we are not at the throttle limit by
14916 			 * virtue of the fact that to get here we must have
14917 			 * already gotten a command back via sdintr(). This also
14918 			 * relies on (1) the command on un_retry_bp preventing
14919 			 * further commands from the waitq from being issued;
14920 			 * and (2) the code in sd_retry_command checking the
14921 			 * throttle limit before issuing a delayed or immediate
14922 			 * retry. This holds even if the throttle limit is
14923 			 * currently ratcheted down from its maximum value.
14924 			 */
14925 			statp = kstat_runq_enter;
14926 			if (bp == un->un_retry_bp) {
14927 				ASSERT((un->un_retry_statp == NULL) ||
14928 				    (un->un_retry_statp == kstat_waitq_enter) ||
14929 				    (un->un_retry_statp ==
14930 				    kstat_runq_back_to_waitq));
14931 				/*
14932 				 * If the waitq kstat was incremented when
14933 				 * sd_set_retry_bp() queued this bp for a retry,
14934 				 * then we must set up statp so that the waitq
14935 				 * count will get decremented correctly below.
14936 				 * Also we must clear un->un_retry_statp to
14937 				 * ensure that we do not act on a stale value
14938 				 * in this field.
14939 				 */
14940 				if ((un->un_retry_statp == kstat_waitq_enter) ||
14941 				    (un->un_retry_statp ==
14942 				    kstat_runq_back_to_waitq)) {
14943 					statp = kstat_waitq_to_runq;
14944 				}
14945 #if defined(__x86)	/* DMAFREE for x86 only */
14946 				saved_statp = un->un_retry_statp;
14947 #endif
14948 				un->un_retry_statp = NULL;
14949 
14950 				SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
14951 				    "sd_start_cmds: un:0x%p: GOT retry_bp:0x%p "
14952 				    "un_throttle:%d un_ncmds_in_transport:%d\n",
14953 				    un, un->un_retry_bp, un->un_throttle,
14954 				    un->un_ncmds_in_transport);
14955 			} else {
14956 				SD_TRACE(SD_LOG_IO_CORE, un, "sd_start_cmds: "
14957 				    "processing priority bp:0x%p\n", bp);
14958 			}
14959 
14960 		} else if ((bp = un->un_waitq_headp) != NULL) {
14961 			/*
14962 			 * A command on the waitq is ready to go, but do not
14963 			 * send it if:
14964 			 *
14965 			 * (1) the throttle limit has been reached, or
14966 			 * (2) a retry is pending, or
14967 			 * (3) a START_STOP_UNIT callback pending, or
14968 			 * (4) a callback for a SD_PATH_DIRECT_PRIORITY
14969 			 *	command is pending.
14970 			 *
14971 			 * For all of these conditions, IO processing will
14972 			 * restart after the condition is cleared.
14973 			 */
14974 			if (un->un_ncmds_in_transport >= un->un_throttle) {
14975 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14976 				    "sd_start_cmds: exiting, "
14977 				    "throttle limit reached!\n");
14978 				goto exit;
14979 			}
14980 			if (un->un_retry_bp != NULL) {
14981 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14982 				    "sd_start_cmds: exiting, retry pending!\n");
14983 				goto exit;
14984 			}
14985 			if (un->un_startstop_timeid != NULL) {
14986 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14987 				    "sd_start_cmds: exiting, "
14988 				    "START_STOP pending!\n");
14989 				goto exit;
14990 			}
14991 			if (un->un_direct_priority_timeid != NULL) {
14992 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
14993 				    "sd_start_cmds: exiting, "
14994 				    "SD_PATH_DIRECT_PRIORITY cmd. pending!\n");
14995 				goto exit;
14996 			}
14997 
14998 			/* Dequeue the command */
14999 			un->un_waitq_headp = bp->av_forw;
15000 			if (un->un_waitq_headp == NULL) {
15001 				un->un_waitq_tailp = NULL;
15002 			}
15003 			bp->av_forw = NULL;
15004 			statp = kstat_waitq_to_runq;
15005 			SD_TRACE(SD_LOG_IO_CORE, un,
15006 			    "sd_start_cmds: processing waitq bp:0x%p\n", bp);
15007 
15008 		} else {
15009 			/* No work to do so bail out now */
15010 			SD_TRACE(SD_LOG_IO_CORE, un,
15011 			    "sd_start_cmds: no more work, exiting!\n");
15012 			goto exit;
15013 		}
15014 
15015 		/*
15016 		 * Reset the state to normal. This is the mechanism by which
15017 		 * the state transitions from either SD_STATE_RWAIT or
15018 		 * SD_STATE_OFFLINE to SD_STATE_NORMAL.
15019 		 * If state is SD_STATE_PM_CHANGING then this command is
15020 		 * part of the device power control and the state must
15021 		 * not be put back to normal. Doing so would would
15022 		 * allow new commands to proceed when they shouldn't,
15023 		 * the device may be going off.
15024 		 */
15025 		if ((un->un_state != SD_STATE_SUSPENDED) &&
15026 		    (un->un_state != SD_STATE_PM_CHANGING)) {
15027 			New_state(un, SD_STATE_NORMAL);
15028 		}
15029 
15030 		xp = SD_GET_XBUF(bp);
15031 		ASSERT(xp != NULL);
15032 
15033 #if defined(__x86)	/* DMAFREE for x86 only */
15034 		/*
15035 		 * Allocate the scsi_pkt if we need one, or attach DMA
15036 		 * resources if we have a scsi_pkt that needs them. The
15037 		 * latter should only occur for commands that are being
15038 		 * retried.
15039 		 */
15040 		if ((xp->xb_pktp == NULL) ||
15041 		    ((xp->xb_pkt_flags & SD_XB_DMA_FREED) != 0)) {
15042 #else
15043 		if (xp->xb_pktp == NULL) {
15044 #endif
15045 			/*
15046 			 * There is no scsi_pkt allocated for this buf. Call
15047 			 * the initpkt function to allocate & init one.
15048 			 *
15049 			 * The scsi_init_pkt runout callback functionality is
15050 			 * implemented as follows:
15051 			 *
15052 			 * 1) The initpkt function always calls
15053 			 *    scsi_init_pkt(9F) with sdrunout specified as the
15054 			 *    callback routine.
15055 			 * 2) A successful packet allocation is initialized and
15056 			 *    the I/O is transported.
15057 			 * 3) The I/O associated with an allocation resource
15058 			 *    failure is left on its queue to be retried via
15059 			 *    runout or the next I/O.
15060 			 * 4) The I/O associated with a DMA error is removed
15061 			 *    from the queue and failed with EIO. Processing of
15062 			 *    the transport queues is also halted to be
15063 			 *    restarted via runout or the next I/O.
15064 			 * 5) The I/O associated with a CDB size or packet
15065 			 *    size error is removed from the queue and failed
15066 			 *    with EIO. Processing of the transport queues is
15067 			 *    continued.
15068 			 *
15069 			 * Note: there is no interface for canceling a runout
15070 			 * callback. To prevent the driver from detaching or
15071 			 * suspending while a runout is pending the driver
15072 			 * state is set to SD_STATE_RWAIT
15073 			 *
15074 			 * Note: using the scsi_init_pkt callback facility can
15075 			 * result in an I/O request persisting at the head of
15076 			 * the list which cannot be satisfied even after
15077 			 * multiple retries. In the future the driver may
15078 			 * implement some kind of maximum runout count before
15079 			 * failing an I/O.
15080 			 *
15081 			 * Note: the use of funcp below may seem superfluous,
15082 			 * but it helps warlock figure out the correct
15083 			 * initpkt function calls (see [s]sd.wlcmd).
15084 			 */
15085 			struct scsi_pkt	*pktp;
15086 			int (*funcp)(struct buf *bp, struct scsi_pkt **pktp);
15087 
15088 			ASSERT(bp != un->un_rqs_bp);
15089 
15090 			funcp = sd_initpkt_map[xp->xb_chain_iostart];
15091 			switch ((*funcp)(bp, &pktp)) {
15092 			case  SD_PKT_ALLOC_SUCCESS:
15093 				xp->xb_pktp = pktp;
15094 				SD_TRACE(SD_LOG_IO_CORE, un,
15095 				    "sd_start_cmd: SD_PKT_ALLOC_SUCCESS 0x%p\n",
15096 				    pktp);
15097 				goto got_pkt;
15098 
15099 			case SD_PKT_ALLOC_FAILURE:
15100 				/*
15101 				 * Temporary (hopefully) resource depletion.
15102 				 * Since retries and RQS commands always have a
15103 				 * scsi_pkt allocated, these cases should never
15104 				 * get here. So the only cases this needs to
15105 				 * handle is a bp from the waitq (which we put
15106 				 * back onto the waitq for sdrunout), or a bp
15107 				 * sent as an immed_bp (which we just fail).
15108 				 */
15109 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15110 				    "sd_start_cmds: SD_PKT_ALLOC_FAILURE\n");
15111 
15112 #if defined(__x86)	/* DMAFREE for x86 only */
15113 
15114 				if (bp == immed_bp) {
15115 					/*
15116 					 * If SD_XB_DMA_FREED is clear, then
15117 					 * this is a failure to allocate a
15118 					 * scsi_pkt, and we must fail the
15119 					 * command.
15120 					 */
15121 					if ((xp->xb_pkt_flags &
15122 					    SD_XB_DMA_FREED) == 0) {
15123 						break;
15124 					}
15125 
15126 					/*
15127 					 * If this immediate command is NOT our
15128 					 * un_retry_bp, then we must fail it.
15129 					 */
15130 					if (bp != un->un_retry_bp) {
15131 						break;
15132 					}
15133 
15134 					/*
15135 					 * We get here if this cmd is our
15136 					 * un_retry_bp that was DMAFREED, but
15137 					 * scsi_init_pkt() failed to reallocate
15138 					 * DMA resources when we attempted to
15139 					 * retry it. This can happen when an
15140 					 * mpxio failover is in progress, but
15141 					 * we don't want to just fail the
15142 					 * command in this case.
15143 					 *
15144 					 * Use timeout(9F) to restart it after
15145 					 * a 100ms delay.  We don't want to
15146 					 * let sdrunout() restart it, because
15147 					 * sdrunout() is just supposed to start
15148 					 * commands that are sitting on the
15149 					 * wait queue.  The un_retry_bp stays
15150 					 * set until the command completes, but
15151 					 * sdrunout can be called many times
15152 					 * before that happens.  Since sdrunout
15153 					 * cannot tell if the un_retry_bp is
15154 					 * already in the transport, it could
15155 					 * end up calling scsi_transport() for
15156 					 * the un_retry_bp multiple times.
15157 					 *
15158 					 * Also: don't schedule the callback
15159 					 * if some other callback is already
15160 					 * pending.
15161 					 */
15162 					if (un->un_retry_statp == NULL) {
15163 						/*
15164 						 * restore the kstat pointer to
15165 						 * keep kstat counts coherent
15166 						 * when we do retry the command.
15167 						 */
15168 						un->un_retry_statp =
15169 						    saved_statp;
15170 					}
15171 
15172 					if ((un->un_startstop_timeid == NULL) &&
15173 					    (un->un_retry_timeid == NULL) &&
15174 					    (un->un_direct_priority_timeid ==
15175 					    NULL)) {
15176 
15177 						un->un_retry_timeid =
15178 						    timeout(
15179 						    sd_start_retry_command,
15180 						    un, SD_RESTART_TIMEOUT);
15181 					}
15182 					goto exit;
15183 				}
15184 
15185 #else
15186 				if (bp == immed_bp) {
15187 					break;	/* Just fail the command */
15188 				}
15189 #endif
15190 
15191 				/* Add the buf back to the head of the waitq */
15192 				bp->av_forw = un->un_waitq_headp;
15193 				un->un_waitq_headp = bp;
15194 				if (un->un_waitq_tailp == NULL) {
15195 					un->un_waitq_tailp = bp;
15196 				}
15197 				goto exit;
15198 
15199 			case SD_PKT_ALLOC_FAILURE_NO_DMA:
15200 				/*
15201 				 * HBA DMA resource failure. Fail the command
15202 				 * and continue processing of the queues.
15203 				 */
15204 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15205 				    "sd_start_cmds: "
15206 				    "SD_PKT_ALLOC_FAILURE_NO_DMA\n");
15207 				break;
15208 
15209 			case SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL:
15210 				/*
15211 				 * Note:x86: Partial DMA mapping not supported
15212 				 * for USCSI commands, and all the needed DMA
15213 				 * resources were not allocated.
15214 				 */
15215 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15216 				    "sd_start_cmds: "
15217 				    "SD_PKT_ALLOC_FAILURE_PKT_TOO_SMALL\n");
15218 				break;
15219 
15220 			case SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL:
15221 				/*
15222 				 * Note:x86: Request cannot fit into CDB based
15223 				 * on lba and len.
15224 				 */
15225 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15226 				    "sd_start_cmds: "
15227 				    "SD_PKT_ALLOC_FAILURE_CDB_TOO_SMALL\n");
15228 				break;
15229 
15230 			default:
15231 				/* Should NEVER get here! */
15232 				panic("scsi_initpkt error");
15233 				/*NOTREACHED*/
15234 			}
15235 
15236 			/*
15237 			 * Fatal error in allocating a scsi_pkt for this buf.
15238 			 * Update kstats & return the buf with an error code.
15239 			 * We must use sd_return_failed_command_no_restart() to
15240 			 * avoid a recursive call back into sd_start_cmds().
15241 			 * However this also means that we must keep processing
15242 			 * the waitq here in order to avoid stalling.
15243 			 */
15244 			if (statp == kstat_waitq_to_runq) {
15245 				SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
15246 			}
15247 			sd_return_failed_command_no_restart(un, bp, EIO);
15248 			if (bp == immed_bp) {
15249 				/* immed_bp is gone by now, so clear this */
15250 				immed_bp = NULL;
15251 			}
15252 			continue;
15253 		}
15254 got_pkt:
15255 		if (bp == immed_bp) {
15256 			/* goto the head of the class.... */
15257 			xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15258 		}
15259 
15260 		un->un_ncmds_in_transport++;
15261 		SD_UPDATE_KSTATS(un, statp, bp);
15262 
15263 		/*
15264 		 * Call scsi_transport() to send the command to the target.
15265 		 * According to SCSA architecture, we must drop the mutex here
15266 		 * before calling scsi_transport() in order to avoid deadlock.
15267 		 * Note that the scsi_pkt's completion routine can be executed
15268 		 * (from interrupt context) even before the call to
15269 		 * scsi_transport() returns.
15270 		 */
15271 		SD_TRACE(SD_LOG_IO_CORE, un,
15272 		    "sd_start_cmds: calling scsi_transport()\n");
15273 		DTRACE_PROBE1(scsi__transport__dispatch, struct buf *, bp);
15274 
15275 		mutex_exit(SD_MUTEX(un));
15276 		rval = scsi_transport(xp->xb_pktp);
15277 		mutex_enter(SD_MUTEX(un));
15278 
15279 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15280 		    "sd_start_cmds: scsi_transport() returned %d\n", rval);
15281 
15282 		switch (rval) {
15283 		case TRAN_ACCEPT:
15284 			/* Clear this with every pkt accepted by the HBA */
15285 			un->un_tran_fatal_count = 0;
15286 			break;	/* Success; try the next cmd (if any) */
15287 
15288 		case TRAN_BUSY:
15289 			un->un_ncmds_in_transport--;
15290 			ASSERT(un->un_ncmds_in_transport >= 0);
15291 
15292 			/*
15293 			 * Don't retry request sense, the sense data
15294 			 * is lost when another request is sent.
15295 			 * Free up the rqs buf and retry
15296 			 * the original failed cmd.  Update kstat.
15297 			 */
15298 			if (bp == un->un_rqs_bp) {
15299 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15300 				bp = sd_mark_rqs_idle(un, xp);
15301 				sd_retry_command(un, bp, SD_RETRIES_STANDARD,
15302 				    NULL, NULL, EIO, un->un_busy_timeout / 500,
15303 				    kstat_waitq_enter);
15304 				goto exit;
15305 			}
15306 
15307 #if defined(__x86)	/* DMAFREE for x86 only */
15308 			/*
15309 			 * Free the DMA resources for the  scsi_pkt. This will
15310 			 * allow mpxio to select another path the next time
15311 			 * we call scsi_transport() with this scsi_pkt.
15312 			 * See sdintr() for the rationalization behind this.
15313 			 */
15314 			if ((un->un_f_is_fibre == TRUE) &&
15315 			    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
15316 			    ((xp->xb_pktp->pkt_flags & FLAG_SENSING) == 0)) {
15317 				scsi_dmafree(xp->xb_pktp);
15318 				xp->xb_pkt_flags |= SD_XB_DMA_FREED;
15319 			}
15320 #endif
15321 
15322 			if (SD_IS_DIRECT_PRIORITY(SD_GET_XBUF(bp))) {
15323 				/*
15324 				 * Commands that are SD_PATH_DIRECT_PRIORITY
15325 				 * are for error recovery situations. These do
15326 				 * not use the normal command waitq, so if they
15327 				 * get a TRAN_BUSY we cannot put them back onto
15328 				 * the waitq for later retry. One possible
15329 				 * problem is that there could already be some
15330 				 * other command on un_retry_bp that is waiting
15331 				 * for this one to complete, so we would be
15332 				 * deadlocked if we put this command back onto
15333 				 * the waitq for later retry (since un_retry_bp
15334 				 * must complete before the driver gets back to
15335 				 * commands on the waitq).
15336 				 *
15337 				 * To avoid deadlock we must schedule a callback
15338 				 * that will restart this command after a set
15339 				 * interval.  This should keep retrying for as
15340 				 * long as the underlying transport keeps
15341 				 * returning TRAN_BUSY (just like for other
15342 				 * commands).  Use the same timeout interval as
15343 				 * for the ordinary TRAN_BUSY retry.
15344 				 */
15345 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15346 				    "sd_start_cmds: scsi_transport() returned "
15347 				    "TRAN_BUSY for DIRECT_PRIORITY cmd!\n");
15348 
15349 				SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15350 				un->un_direct_priority_timeid =
15351 				    timeout(sd_start_direct_priority_command,
15352 				    bp, un->un_busy_timeout / 500);
15353 
15354 				goto exit;
15355 			}
15356 
15357 			/*
15358 			 * For TRAN_BUSY, we want to reduce the throttle value,
15359 			 * unless we are retrying a command.
15360 			 */
15361 			if (bp != un->un_retry_bp) {
15362 				sd_reduce_throttle(un, SD_THROTTLE_TRAN_BUSY);
15363 			}
15364 
15365 			/*
15366 			 * Set up the bp to be tried again 10 ms later.
15367 			 * Note:x86: Is there a timeout value in the sd_lun
15368 			 * for this condition?
15369 			 */
15370 			sd_set_retry_bp(un, bp, un->un_busy_timeout / 500,
15371 			    kstat_runq_back_to_waitq);
15372 			goto exit;
15373 
15374 		case TRAN_FATAL_ERROR:
15375 			un->un_tran_fatal_count++;
15376 			/* FALLTHRU */
15377 
15378 		case TRAN_BADPKT:
15379 		default:
15380 			un->un_ncmds_in_transport--;
15381 			ASSERT(un->un_ncmds_in_transport >= 0);
15382 
15383 			/*
15384 			 * If this is our REQUEST SENSE command with a
15385 			 * transport error, we must get back the pointers
15386 			 * to the original buf, and mark the REQUEST
15387 			 * SENSE command as "available".
15388 			 */
15389 			if (bp == un->un_rqs_bp) {
15390 				bp = sd_mark_rqs_idle(un, xp);
15391 				xp = SD_GET_XBUF(bp);
15392 			} else {
15393 				/*
15394 				 * Legacy behavior: do not update transport
15395 				 * error count for request sense commands.
15396 				 */
15397 				SD_UPDATE_ERRSTATS(un, sd_transerrs);
15398 			}
15399 
15400 			SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
15401 			sd_print_transport_rejected_message(un, xp, rval);
15402 
15403 			/*
15404 			 * This command will be terminated by SD driver due
15405 			 * to a fatal transport error. We should post
15406 			 * ereport.io.scsi.cmd.disk.tran with driver-assessment
15407 			 * of "fail" for any command to indicate this
15408 			 * situation.
15409 			 */
15410 			if (xp->xb_ena > 0) {
15411 				ASSERT(un->un_fm_private != NULL);
15412 				sfip = un->un_fm_private;
15413 				sfip->fm_ssc.ssc_flags |= SSC_FLAGS_TRAN_ABORT;
15414 				sd_ssc_extract_info(&sfip->fm_ssc, un,
15415 				    xp->xb_pktp, bp, xp);
15416 				sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
15417 			}
15418 
15419 			/*
15420 			 * We must use sd_return_failed_command_no_restart() to
15421 			 * avoid a recursive call back into sd_start_cmds().
15422 			 * However this also means that we must keep processing
15423 			 * the waitq here in order to avoid stalling.
15424 			 */
15425 			sd_return_failed_command_no_restart(un, bp, EIO);
15426 
15427 			/*
15428 			 * Notify any threads waiting in sd_ddi_suspend() that
15429 			 * a command completion has occurred.
15430 			 */
15431 			if (un->un_state == SD_STATE_SUSPENDED) {
15432 				cv_broadcast(&un->un_disk_busy_cv);
15433 			}
15434 
15435 			if (bp == immed_bp) {
15436 				/* immed_bp is gone by now, so clear this */
15437 				immed_bp = NULL;
15438 			}
15439 			break;
15440 		}
15441 
15442 	} while (immed_bp == NULL);
15443 
15444 exit:
15445 	ASSERT(mutex_owned(SD_MUTEX(un)));
15446 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_start_cmds: exit\n");
15447 }
15448 
15449 
15450 /*
15451  *    Function: sd_return_command
15452  *
15453  * Description: Returns a command to its originator (with or without an
15454  *		error).  Also starts commands waiting to be transported
15455  *		to the target.
15456  *
15457  *     Context: May be called from interrupt, kernel, or timeout context
15458  */
15459 
15460 static void
15461 sd_return_command(struct sd_lun *un, struct buf *bp)
15462 {
15463 	struct sd_xbuf *xp;
15464 	struct scsi_pkt *pktp;
15465 	struct sd_fm_internal *sfip;
15466 
15467 	ASSERT(bp != NULL);
15468 	ASSERT(un != NULL);
15469 	ASSERT(mutex_owned(SD_MUTEX(un)));
15470 	ASSERT(bp != un->un_rqs_bp);
15471 	xp = SD_GET_XBUF(bp);
15472 	ASSERT(xp != NULL);
15473 
15474 	pktp = SD_GET_PKTP(bp);
15475 	sfip = (struct sd_fm_internal *)un->un_fm_private;
15476 	ASSERT(sfip != NULL);
15477 
15478 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: entry\n");
15479 
15480 	/*
15481 	 * Note: check for the "sdrestart failed" case.
15482 	 */
15483 	if ((un->un_partial_dma_supported == 1) &&
15484 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) != SD_XB_USCSICMD) &&
15485 	    (geterror(bp) == 0) && (xp->xb_dma_resid != 0) &&
15486 	    (xp->xb_pktp->pkt_resid == 0)) {
15487 
15488 		if (sd_setup_next_xfer(un, bp, pktp, xp) != 0) {
15489 			/*
15490 			 * Successfully set up next portion of cmd
15491 			 * transfer, try sending it
15492 			 */
15493 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
15494 			    NULL, NULL, 0, (clock_t)0, NULL);
15495 			sd_start_cmds(un, NULL);
15496 			return;	/* Note:x86: need a return here? */
15497 		}
15498 	}
15499 
15500 	/*
15501 	 * If this is the failfast bp, clear it from un_failfast_bp. This
15502 	 * can happen if upon being re-tried the failfast bp either
15503 	 * succeeded or encountered another error (possibly even a different
15504 	 * error than the one that precipitated the failfast state, but in
15505 	 * that case it would have had to exhaust retries as well). Regardless,
15506 	 * this should not occur whenever the instance is in the active
15507 	 * failfast state.
15508 	 */
15509 	if (bp == un->un_failfast_bp) {
15510 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15511 		un->un_failfast_bp = NULL;
15512 	}
15513 
15514 	/*
15515 	 * Clear the failfast state upon successful completion of ANY cmd.
15516 	 */
15517 	if (bp->b_error == 0) {
15518 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15519 		/*
15520 		 * If this is a successful command, but used to be retried,
15521 		 * we will take it as a recovered command and post an
15522 		 * ereport with driver-assessment of "recovered".
15523 		 */
15524 		if (xp->xb_ena > 0) {
15525 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15526 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RECOVERY);
15527 		}
15528 	} else {
15529 		/*
15530 		 * If this is a failed non-USCSI command we will post an
15531 		 * ereport with driver-assessment set accordingly("fail" or
15532 		 * "fatal").
15533 		 */
15534 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15535 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15536 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_FATAL);
15537 		}
15538 	}
15539 
15540 	/*
15541 	 * This is used if the command was retried one or more times. Show that
15542 	 * we are done with it, and allow processing of the waitq to resume.
15543 	 */
15544 	if (bp == un->un_retry_bp) {
15545 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15546 		    "sd_return_command: un:0x%p: "
15547 		    "RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15548 		un->un_retry_bp = NULL;
15549 		un->un_retry_statp = NULL;
15550 	}
15551 
15552 	SD_UPDATE_RDWR_STATS(un, bp);
15553 	SD_UPDATE_PARTITION_STATS(un, bp);
15554 
15555 	switch (un->un_state) {
15556 	case SD_STATE_SUSPENDED:
15557 		/*
15558 		 * Notify any threads waiting in sd_ddi_suspend() that
15559 		 * a command completion has occurred.
15560 		 */
15561 		cv_broadcast(&un->un_disk_busy_cv);
15562 		break;
15563 	default:
15564 		sd_start_cmds(un, NULL);
15565 		break;
15566 	}
15567 
15568 	/* Return this command up the iodone chain to its originator. */
15569 	mutex_exit(SD_MUTEX(un));
15570 
15571 	(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15572 	xp->xb_pktp = NULL;
15573 
15574 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15575 
15576 	ASSERT(!mutex_owned(SD_MUTEX(un)));
15577 	mutex_enter(SD_MUTEX(un));
15578 
15579 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_return_command: exit\n");
15580 }
15581 
15582 
15583 /*
15584  *    Function: sd_return_failed_command
15585  *
15586  * Description: Command completion when an error occurred.
15587  *
15588  *     Context: May be called from interrupt context
15589  */
15590 
15591 static void
15592 sd_return_failed_command(struct sd_lun *un, struct buf *bp, int errcode)
15593 {
15594 	ASSERT(bp != NULL);
15595 	ASSERT(un != NULL);
15596 	ASSERT(mutex_owned(SD_MUTEX(un)));
15597 
15598 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15599 	    "sd_return_failed_command: entry\n");
15600 
15601 	/*
15602 	 * b_resid could already be nonzero due to a partial data
15603 	 * transfer, so do not change it here.
15604 	 */
15605 	SD_BIOERROR(bp, errcode);
15606 
15607 	sd_return_command(un, bp);
15608 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15609 	    "sd_return_failed_command: exit\n");
15610 }
15611 
15612 
15613 /*
15614  *    Function: sd_return_failed_command_no_restart
15615  *
15616  * Description: Same as sd_return_failed_command, but ensures that no
15617  *		call back into sd_start_cmds will be issued.
15618  *
15619  *     Context: May be called from interrupt context
15620  */
15621 
15622 static void
15623 sd_return_failed_command_no_restart(struct sd_lun *un, struct buf *bp,
15624     int errcode)
15625 {
15626 	struct sd_xbuf *xp;
15627 
15628 	ASSERT(bp != NULL);
15629 	ASSERT(un != NULL);
15630 	ASSERT(mutex_owned(SD_MUTEX(un)));
15631 	xp = SD_GET_XBUF(bp);
15632 	ASSERT(xp != NULL);
15633 	ASSERT(errcode != 0);
15634 
15635 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15636 	    "sd_return_failed_command_no_restart: entry\n");
15637 
15638 	/*
15639 	 * b_resid could already be nonzero due to a partial data
15640 	 * transfer, so do not change it here.
15641 	 */
15642 	SD_BIOERROR(bp, errcode);
15643 
15644 	/*
15645 	 * If this is the failfast bp, clear it. This can happen if the
15646 	 * failfast bp encounterd a fatal error when we attempted to
15647 	 * re-try it (such as a scsi_transport(9F) failure).  However
15648 	 * we should NOT be in an active failfast state if the failfast
15649 	 * bp is not NULL.
15650 	 */
15651 	if (bp == un->un_failfast_bp) {
15652 		ASSERT(un->un_failfast_state == SD_FAILFAST_INACTIVE);
15653 		un->un_failfast_bp = NULL;
15654 	}
15655 
15656 	if (bp == un->un_retry_bp) {
15657 		/*
15658 		 * This command was retried one or more times. Show that we are
15659 		 * done with it, and allow processing of the waitq to resume.
15660 		 */
15661 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15662 		    "sd_return_failed_command_no_restart: "
15663 		    " un:0x%p: RETURNING retry_bp:0x%p\n", un, un->un_retry_bp);
15664 		un->un_retry_bp = NULL;
15665 		un->un_retry_statp = NULL;
15666 	}
15667 
15668 	SD_UPDATE_RDWR_STATS(un, bp);
15669 	SD_UPDATE_PARTITION_STATS(un, bp);
15670 
15671 	mutex_exit(SD_MUTEX(un));
15672 
15673 	if (xp->xb_pktp != NULL) {
15674 		(*(sd_destroypkt_map[xp->xb_chain_iodone]))(bp);
15675 		xp->xb_pktp = NULL;
15676 	}
15677 
15678 	SD_BEGIN_IODONE(xp->xb_chain_iodone, un, bp);
15679 
15680 	mutex_enter(SD_MUTEX(un));
15681 
15682 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15683 	    "sd_return_failed_command_no_restart: exit\n");
15684 }
15685 
15686 
15687 /*
15688  *    Function: sd_retry_command
15689  *
15690  * Description: queue up a command for retry, or (optionally) fail it
15691  *		if retry counts are exhausted.
15692  *
15693  *   Arguments: un - Pointer to the sd_lun struct for the target.
15694  *
15695  *		bp - Pointer to the buf for the command to be retried.
15696  *
15697  *		retry_check_flag - Flag to see which (if any) of the retry
15698  *		   counts should be decremented/checked. If the indicated
15699  *		   retry count is exhausted, then the command will not be
15700  *		   retried; it will be failed instead. This should use a
15701  *		   value equal to one of the following:
15702  *
15703  *			SD_RETRIES_NOCHECK
15704  *			SD_RESD_RETRIES_STANDARD
15705  *			SD_RETRIES_VICTIM
15706  *
15707  *		   Optionally may be bitwise-OR'ed with SD_RETRIES_ISOLATE
15708  *		   if the check should be made to see of FLAG_ISOLATE is set
15709  *		   in the pkt. If FLAG_ISOLATE is set, then the command is
15710  *		   not retried, it is simply failed.
15711  *
15712  *		user_funcp - Ptr to function to call before dispatching the
15713  *		   command. May be NULL if no action needs to be performed.
15714  *		   (Primarily intended for printing messages.)
15715  *
15716  *		user_arg - Optional argument to be passed along to
15717  *		   the user_funcp call.
15718  *
15719  *		failure_code - errno return code to set in the bp if the
15720  *		   command is going to be failed.
15721  *
15722  *		retry_delay - Retry delay interval in (clock_t) units. May
15723  *		   be zero which indicates that the retry should be retried
15724  *		   immediately (ie, without an intervening delay).
15725  *
15726  *		statp - Ptr to kstat function to be updated if the command
15727  *		   is queued for a delayed retry. May be NULL if no kstat
15728  *		   update is desired.
15729  *
15730  *     Context: May be called from interrupt context.
15731  */
15732 
15733 static void
15734 sd_retry_command(struct sd_lun *un, struct buf *bp, int retry_check_flag,
15735     void (*user_funcp)(struct sd_lun *un, struct buf *bp, void *argp, int code),
15736     void *user_arg, int failure_code, clock_t retry_delay,
15737     void (*statp)(kstat_io_t *))
15738 {
15739 	struct sd_xbuf	*xp;
15740 	struct scsi_pkt	*pktp;
15741 	struct sd_fm_internal *sfip;
15742 
15743 	ASSERT(un != NULL);
15744 	ASSERT(mutex_owned(SD_MUTEX(un)));
15745 	ASSERT(bp != NULL);
15746 	xp = SD_GET_XBUF(bp);
15747 	ASSERT(xp != NULL);
15748 	pktp = SD_GET_PKTP(bp);
15749 	ASSERT(pktp != NULL);
15750 
15751 	sfip = (struct sd_fm_internal *)un->un_fm_private;
15752 	ASSERT(sfip != NULL);
15753 
15754 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
15755 	    "sd_retry_command: entry: bp:0x%p xp:0x%p\n", bp, xp);
15756 
15757 	/*
15758 	 * If we are syncing or dumping, fail the command to avoid
15759 	 * recursively calling back into scsi_transport().
15760 	 */
15761 	if (ddi_in_panic()) {
15762 		goto fail_command_no_log;
15763 	}
15764 
15765 	/*
15766 	 * We should never be be retrying a command with FLAG_DIAGNOSE set, so
15767 	 * log an error and fail the command.
15768 	 */
15769 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
15770 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
15771 		    "ERROR, retrying FLAG_DIAGNOSE command.\n");
15772 		sd_dump_memory(un, SD_LOG_IO, "CDB",
15773 		    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
15774 		sd_dump_memory(un, SD_LOG_IO, "Sense Data",
15775 		    (uchar_t *)xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
15776 		goto fail_command;
15777 	}
15778 
15779 	/*
15780 	 * If we are suspended, then put the command onto head of the
15781 	 * wait queue since we don't want to start more commands, and
15782 	 * clear the un_retry_bp. Next time when we are resumed, will
15783 	 * handle the command in the wait queue.
15784 	 */
15785 	switch (un->un_state) {
15786 	case SD_STATE_SUSPENDED:
15787 	case SD_STATE_DUMPING:
15788 		bp->av_forw = un->un_waitq_headp;
15789 		un->un_waitq_headp = bp;
15790 		if (un->un_waitq_tailp == NULL) {
15791 			un->un_waitq_tailp = bp;
15792 		}
15793 		if (bp == un->un_retry_bp) {
15794 			un->un_retry_bp = NULL;
15795 			un->un_retry_statp = NULL;
15796 		}
15797 		SD_UPDATE_KSTATS(un, kstat_waitq_enter, bp);
15798 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: "
15799 		    "exiting; cmd bp:0x%p requeued for SUSPEND/DUMP\n", bp);
15800 		return;
15801 	default:
15802 		break;
15803 	}
15804 
15805 	/*
15806 	 * If the caller wants us to check FLAG_ISOLATE, then see if that
15807 	 * is set; if it is then we do not want to retry the command.
15808 	 * Normally, FLAG_ISOLATE is only used with USCSI cmds.
15809 	 */
15810 	if ((retry_check_flag & SD_RETRIES_ISOLATE) != 0) {
15811 		if ((pktp->pkt_flags & FLAG_ISOLATE) != 0) {
15812 			goto fail_command;
15813 		}
15814 	}
15815 
15816 
15817 	/*
15818 	 * If SD_RETRIES_FAILFAST is set, it indicates that either a
15819 	 * command timeout or a selection timeout has occurred. This means
15820 	 * that we were unable to establish an kind of communication with
15821 	 * the target, and subsequent retries and/or commands are likely
15822 	 * to encounter similar results and take a long time to complete.
15823 	 *
15824 	 * If this is a failfast error condition, we need to update the
15825 	 * failfast state, even if this bp does not have B_FAILFAST set.
15826 	 */
15827 	if (retry_check_flag & SD_RETRIES_FAILFAST) {
15828 		if (un->un_failfast_state == SD_FAILFAST_ACTIVE) {
15829 			ASSERT(un->un_failfast_bp == NULL);
15830 			/*
15831 			 * If we are already in the active failfast state, and
15832 			 * another failfast error condition has been detected,
15833 			 * then fail this command if it has B_FAILFAST set.
15834 			 * If B_FAILFAST is clear, then maintain the legacy
15835 			 * behavior of retrying heroically, even tho this will
15836 			 * take a lot more time to fail the command.
15837 			 */
15838 			if (bp->b_flags & B_FAILFAST) {
15839 				goto fail_command;
15840 			}
15841 		} else {
15842 			/*
15843 			 * We're not in the active failfast state, but we
15844 			 * have a failfast error condition, so we must begin
15845 			 * transition to the next state. We do this regardless
15846 			 * of whether or not this bp has B_FAILFAST set.
15847 			 */
15848 			if (un->un_failfast_bp == NULL) {
15849 				/*
15850 				 * This is the first bp to meet a failfast
15851 				 * condition so save it on un_failfast_bp &
15852 				 * do normal retry processing. Do not enter
15853 				 * active failfast state yet. This marks
15854 				 * entry into the "failfast pending" state.
15855 				 */
15856 				un->un_failfast_bp = bp;
15857 
15858 			} else if (un->un_failfast_bp == bp) {
15859 				/*
15860 				 * This is the second time *this* bp has
15861 				 * encountered a failfast error condition,
15862 				 * so enter active failfast state & flush
15863 				 * queues as appropriate.
15864 				 */
15865 				un->un_failfast_state = SD_FAILFAST_ACTIVE;
15866 				un->un_failfast_bp = NULL;
15867 				sd_failfast_flushq(un);
15868 
15869 				/*
15870 				 * Fail this bp now if B_FAILFAST set;
15871 				 * otherwise continue with retries. (It would
15872 				 * be pretty ironic if this bp succeeded on a
15873 				 * subsequent retry after we just flushed all
15874 				 * the queues).
15875 				 */
15876 				if (bp->b_flags & B_FAILFAST) {
15877 					goto fail_command;
15878 				}
15879 
15880 #if !defined(lint) && !defined(__lint)
15881 			} else {
15882 				/*
15883 				 * If neither of the preceeding conditionals
15884 				 * was true, it means that there is some
15885 				 * *other* bp that has met an inital failfast
15886 				 * condition and is currently either being
15887 				 * retried or is waiting to be retried. In
15888 				 * that case we should perform normal retry
15889 				 * processing on *this* bp, since there is a
15890 				 * chance that the current failfast condition
15891 				 * is transient and recoverable. If that does
15892 				 * not turn out to be the case, then retries
15893 				 * will be cleared when the wait queue is
15894 				 * flushed anyway.
15895 				 */
15896 #endif
15897 			}
15898 		}
15899 	} else {
15900 		/*
15901 		 * SD_RETRIES_FAILFAST is clear, which indicates that we
15902 		 * likely were able to at least establish some level of
15903 		 * communication with the target and subsequent commands
15904 		 * and/or retries are likely to get through to the target,
15905 		 * In this case we want to be aggressive about clearing
15906 		 * the failfast state. Note that this does not affect
15907 		 * the "failfast pending" condition.
15908 		 */
15909 		un->un_failfast_state = SD_FAILFAST_INACTIVE;
15910 	}
15911 
15912 
15913 	/*
15914 	 * Check the specified retry count to see if we can still do
15915 	 * any retries with this pkt before we should fail it.
15916 	 */
15917 	switch (retry_check_flag & SD_RETRIES_MASK) {
15918 	case SD_RETRIES_VICTIM:
15919 		/*
15920 		 * Check the victim retry count. If exhausted, then fall
15921 		 * thru & check against the standard retry count.
15922 		 */
15923 		if (xp->xb_victim_retry_count < un->un_victim_retry_count) {
15924 			/* Increment count & proceed with the retry */
15925 			xp->xb_victim_retry_count++;
15926 			break;
15927 		}
15928 		/* Victim retries exhausted, fall back to std. retries... */
15929 		/* FALLTHRU */
15930 
15931 	case SD_RETRIES_STANDARD:
15932 		if (xp->xb_retry_count >= un->un_retry_count) {
15933 			/* Retries exhausted, fail the command */
15934 			SD_TRACE(SD_LOG_IO_CORE, un,
15935 			    "sd_retry_command: retries exhausted!\n");
15936 			/*
15937 			 * update b_resid for failed SCMD_READ & SCMD_WRITE
15938 			 * commands with nonzero pkt_resid.
15939 			 */
15940 			if ((pktp->pkt_reason == CMD_CMPLT) &&
15941 			    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD) &&
15942 			    (pktp->pkt_resid != 0)) {
15943 				uchar_t op = SD_GET_PKT_OPCODE(pktp) & 0x1F;
15944 				if ((op == SCMD_READ) || (op == SCMD_WRITE)) {
15945 					SD_UPDATE_B_RESID(bp, pktp);
15946 				}
15947 			}
15948 			goto fail_command;
15949 		}
15950 		xp->xb_retry_count++;
15951 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15952 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15953 		break;
15954 
15955 	case SD_RETRIES_UA:
15956 		if (xp->xb_ua_retry_count >= sd_ua_retry_count) {
15957 			/* Retries exhausted, fail the command */
15958 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
15959 			    "Unit Attention retries exhausted. "
15960 			    "Check the target.\n");
15961 			goto fail_command;
15962 		}
15963 		xp->xb_ua_retry_count++;
15964 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15965 		    "sd_retry_command: retry count:%d\n",
15966 		    xp->xb_ua_retry_count);
15967 		break;
15968 
15969 	case SD_RETRIES_BUSY:
15970 		if (xp->xb_retry_count >= un->un_busy_retry_count) {
15971 			/* Retries exhausted, fail the command */
15972 			SD_TRACE(SD_LOG_IO_CORE, un,
15973 			    "sd_retry_command: retries exhausted!\n");
15974 			goto fail_command;
15975 		}
15976 		xp->xb_retry_count++;
15977 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
15978 		    "sd_retry_command: retry count:%d\n", xp->xb_retry_count);
15979 		break;
15980 
15981 	case SD_RETRIES_NOCHECK:
15982 	default:
15983 		/* No retry count to check. Just proceed with the retry */
15984 		break;
15985 	}
15986 
15987 	xp->xb_pktp->pkt_flags |= FLAG_HEAD;
15988 
15989 	/*
15990 	 * If this is a non-USCSI command being retried
15991 	 * during execution last time, we should post an ereport with
15992 	 * driver-assessment of the value "retry".
15993 	 * For partial DMA, request sense and STATUS_QFULL, there are no
15994 	 * hardware errors, we bypass ereport posting.
15995 	 */
15996 	if (failure_code != 0) {
15997 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
15998 			sd_ssc_extract_info(&sfip->fm_ssc, un, pktp, bp, xp);
15999 			sd_ssc_post(&sfip->fm_ssc, SD_FM_DRV_RETRY);
16000 		}
16001 	}
16002 
16003 	/*
16004 	 * If we were given a zero timeout, we must attempt to retry the
16005 	 * command immediately (ie, without a delay).
16006 	 */
16007 	if (retry_delay == 0) {
16008 		/*
16009 		 * Check some limiting conditions to see if we can actually
16010 		 * do the immediate retry.  If we cannot, then we must
16011 		 * fall back to queueing up a delayed retry.
16012 		 */
16013 		if (un->un_ncmds_in_transport >= un->un_throttle) {
16014 			/*
16015 			 * We are at the throttle limit for the target,
16016 			 * fall back to delayed retry.
16017 			 */
16018 			retry_delay = un->un_busy_timeout;
16019 			statp = kstat_waitq_enter;
16020 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16021 			    "sd_retry_command: immed. retry hit "
16022 			    "throttle!\n");
16023 		} else {
16024 			/*
16025 			 * We're clear to proceed with the immediate retry.
16026 			 * First call the user-provided function (if any)
16027 			 */
16028 			if (user_funcp != NULL) {
16029 				(*user_funcp)(un, bp, user_arg,
16030 				    SD_IMMEDIATE_RETRY_ISSUED);
16031 #ifdef __lock_lint
16032 				sd_print_incomplete_msg(un, bp, user_arg,
16033 				    SD_IMMEDIATE_RETRY_ISSUED);
16034 				sd_print_cmd_incomplete_msg(un, bp, user_arg,
16035 				    SD_IMMEDIATE_RETRY_ISSUED);
16036 				sd_print_sense_failed_msg(un, bp, user_arg,
16037 				    SD_IMMEDIATE_RETRY_ISSUED);
16038 #endif
16039 			}
16040 
16041 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16042 			    "sd_retry_command: issuing immediate retry\n");
16043 
16044 			/*
16045 			 * Call sd_start_cmds() to transport the command to
16046 			 * the target.
16047 			 */
16048 			sd_start_cmds(un, bp);
16049 
16050 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16051 			    "sd_retry_command exit\n");
16052 			return;
16053 		}
16054 	}
16055 
16056 	/*
16057 	 * Set up to retry the command after a delay.
16058 	 * First call the user-provided function (if any)
16059 	 */
16060 	if (user_funcp != NULL) {
16061 		(*user_funcp)(un, bp, user_arg, SD_DELAYED_RETRY_ISSUED);
16062 	}
16063 
16064 	sd_set_retry_bp(un, bp, retry_delay, statp);
16065 
16066 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
16067 	return;
16068 
16069 fail_command:
16070 
16071 	if (user_funcp != NULL) {
16072 		(*user_funcp)(un, bp, user_arg, SD_NO_RETRY_ISSUED);
16073 	}
16074 
16075 fail_command_no_log:
16076 
16077 	SD_INFO(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16078 	    "sd_retry_command: returning failed command\n");
16079 
16080 	sd_return_failed_command(un, bp, failure_code);
16081 
16082 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_retry_command: exit\n");
16083 }
16084 
16085 
16086 /*
16087  *    Function: sd_set_retry_bp
16088  *
16089  * Description: Set up the given bp for retry.
16090  *
16091  *   Arguments: un - ptr to associated softstate
16092  *		bp - ptr to buf(9S) for the command
16093  *		retry_delay - time interval before issuing retry (may be 0)
16094  *		statp - optional pointer to kstat function
16095  *
16096  *     Context: May be called under interrupt context
16097  */
16098 
16099 static void
16100 sd_set_retry_bp(struct sd_lun *un, struct buf *bp, clock_t retry_delay,
16101     void (*statp)(kstat_io_t *))
16102 {
16103 	ASSERT(un != NULL);
16104 	ASSERT(mutex_owned(SD_MUTEX(un)));
16105 	ASSERT(bp != NULL);
16106 
16107 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16108 	    "sd_set_retry_bp: entry: un:0x%p bp:0x%p\n", un, bp);
16109 
16110 	/*
16111 	 * Indicate that the command is being retried. This will not allow any
16112 	 * other commands on the wait queue to be transported to the target
16113 	 * until this command has been completed (success or failure). The
16114 	 * "retry command" is not transported to the target until the given
16115 	 * time delay expires, unless the user specified a 0 retry_delay.
16116 	 *
16117 	 * Note: the timeout(9F) callback routine is what actually calls
16118 	 * sd_start_cmds() to transport the command, with the exception of a
16119 	 * zero retry_delay. The only current implementor of a zero retry delay
16120 	 * is the case where a START_STOP_UNIT is sent to spin-up a device.
16121 	 */
16122 	if (un->un_retry_bp == NULL) {
16123 		ASSERT(un->un_retry_statp == NULL);
16124 		un->un_retry_bp = bp;
16125 
16126 		/*
16127 		 * If the user has not specified a delay the command should
16128 		 * be queued and no timeout should be scheduled.
16129 		 */
16130 		if (retry_delay == 0) {
16131 			/*
16132 			 * Save the kstat pointer that will be used in the
16133 			 * call to SD_UPDATE_KSTATS() below, so that
16134 			 * sd_start_cmds() can correctly decrement the waitq
16135 			 * count when it is time to transport this command.
16136 			 */
16137 			un->un_retry_statp = statp;
16138 			goto done;
16139 		}
16140 	}
16141 
16142 	if (un->un_retry_bp == bp) {
16143 		/*
16144 		 * Save the kstat pointer that will be used in the call to
16145 		 * SD_UPDATE_KSTATS() below, so that sd_start_cmds() can
16146 		 * correctly decrement the waitq count when it is time to
16147 		 * transport this command.
16148 		 */
16149 		un->un_retry_statp = statp;
16150 
16151 		/*
16152 		 * Schedule a timeout if:
16153 		 *   1) The user has specified a delay.
16154 		 *   2) There is not a START_STOP_UNIT callback pending.
16155 		 *
16156 		 * If no delay has been specified, then it is up to the caller
16157 		 * to ensure that IO processing continues without stalling.
16158 		 * Effectively, this means that the caller will issue the
16159 		 * required call to sd_start_cmds(). The START_STOP_UNIT
16160 		 * callback does this after the START STOP UNIT command has
16161 		 * completed. In either of these cases we should not schedule
16162 		 * a timeout callback here.  Also don't schedule the timeout if
16163 		 * an SD_PATH_DIRECT_PRIORITY command is waiting to restart.
16164 		 */
16165 		if ((retry_delay != 0) && (un->un_startstop_timeid == NULL) &&
16166 		    (un->un_direct_priority_timeid == NULL)) {
16167 			un->un_retry_timeid =
16168 			    timeout(sd_start_retry_command, un, retry_delay);
16169 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16170 			    "sd_set_retry_bp: setting timeout: un: 0x%p"
16171 			    " bp:0x%p un_retry_timeid:0x%p\n",
16172 			    un, bp, un->un_retry_timeid);
16173 		}
16174 	} else {
16175 		/*
16176 		 * We only get in here if there is already another command
16177 		 * waiting to be retried.  In this case, we just put the
16178 		 * given command onto the wait queue, so it can be transported
16179 		 * after the current retry command has completed.
16180 		 *
16181 		 * Also we have to make sure that if the command at the head
16182 		 * of the wait queue is the un_failfast_bp, that we do not
16183 		 * put ahead of it any other commands that are to be retried.
16184 		 */
16185 		if ((un->un_failfast_bp != NULL) &&
16186 		    (un->un_failfast_bp == un->un_waitq_headp)) {
16187 			/*
16188 			 * Enqueue this command AFTER the first command on
16189 			 * the wait queue (which is also un_failfast_bp).
16190 			 */
16191 			bp->av_forw = un->un_waitq_headp->av_forw;
16192 			un->un_waitq_headp->av_forw = bp;
16193 			if (un->un_waitq_headp == un->un_waitq_tailp) {
16194 				un->un_waitq_tailp = bp;
16195 			}
16196 		} else {
16197 			/* Enqueue this command at the head of the waitq. */
16198 			bp->av_forw = un->un_waitq_headp;
16199 			un->un_waitq_headp = bp;
16200 			if (un->un_waitq_tailp == NULL) {
16201 				un->un_waitq_tailp = bp;
16202 			}
16203 		}
16204 
16205 		if (statp == NULL) {
16206 			statp = kstat_waitq_enter;
16207 		}
16208 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16209 		    "sd_set_retry_bp: un:0x%p already delayed retry\n", un);
16210 	}
16211 
16212 done:
16213 	if (statp != NULL) {
16214 		SD_UPDATE_KSTATS(un, statp, bp);
16215 	}
16216 
16217 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16218 	    "sd_set_retry_bp: exit un:0x%p\n", un);
16219 }
16220 
16221 
16222 /*
16223  *    Function: sd_start_retry_command
16224  *
16225  * Description: Start the command that has been waiting on the target's
16226  *		retry queue.  Called from timeout(9F) context after the
16227  *		retry delay interval has expired.
16228  *
16229  *   Arguments: arg - pointer to associated softstate for the device.
16230  *
16231  *     Context: timeout(9F) thread context.  May not sleep.
16232  */
16233 
16234 static void
16235 sd_start_retry_command(void *arg)
16236 {
16237 	struct sd_lun *un = arg;
16238 
16239 	ASSERT(un != NULL);
16240 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16241 
16242 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16243 	    "sd_start_retry_command: entry\n");
16244 
16245 	mutex_enter(SD_MUTEX(un));
16246 
16247 	un->un_retry_timeid = NULL;
16248 
16249 	if (un->un_retry_bp != NULL) {
16250 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16251 		    "sd_start_retry_command: un:0x%p STARTING bp:0x%p\n",
16252 		    un, un->un_retry_bp);
16253 		sd_start_cmds(un, un->un_retry_bp);
16254 	}
16255 
16256 	mutex_exit(SD_MUTEX(un));
16257 
16258 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16259 	    "sd_start_retry_command: exit\n");
16260 }
16261 
16262 /*
16263  *    Function: sd_rmw_msg_print_handler
16264  *
16265  * Description: If RMW mode is enabled and warning message is triggered
16266  *              print I/O count during a fixed interval.
16267  *
16268  *   Arguments: arg - pointer to associated softstate for the device.
16269  *
16270  *     Context: timeout(9F) thread context. May not sleep.
16271  */
16272 static void
16273 sd_rmw_msg_print_handler(void *arg)
16274 {
16275 	struct sd_lun *un = arg;
16276 
16277 	ASSERT(un != NULL);
16278 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16279 
16280 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16281 	    "sd_rmw_msg_print_handler: entry\n");
16282 
16283 	mutex_enter(SD_MUTEX(un));
16284 
16285 	if (un->un_rmw_incre_count > 0) {
16286 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16287 		    "%"PRIu64" I/O requests are not aligned with %d disk "
16288 		    "sector size in %ld seconds. They are handled through "
16289 		    "Read Modify Write but the performance is very low!\n",
16290 		    un->un_rmw_incre_count, un->un_tgt_blocksize,
16291 		    drv_hztousec(SD_RMW_MSG_PRINT_TIMEOUT) / 1000000);
16292 		un->un_rmw_incre_count = 0;
16293 		un->un_rmw_msg_timeid = timeout(sd_rmw_msg_print_handler,
16294 		    un, SD_RMW_MSG_PRINT_TIMEOUT);
16295 	} else {
16296 		un->un_rmw_msg_timeid = NULL;
16297 	}
16298 
16299 	mutex_exit(SD_MUTEX(un));
16300 
16301 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16302 	    "sd_rmw_msg_print_handler: exit\n");
16303 }
16304 
16305 /*
16306  *    Function: sd_start_direct_priority_command
16307  *
16308  * Description: Used to re-start an SD_PATH_DIRECT_PRIORITY command that had
16309  *		received TRAN_BUSY when we called scsi_transport() to send it
16310  *		to the underlying HBA. This function is called from timeout(9F)
16311  *		context after the delay interval has expired.
16312  *
16313  *   Arguments: arg - pointer to associated buf(9S) to be restarted.
16314  *
16315  *     Context: timeout(9F) thread context.  May not sleep.
16316  */
16317 
16318 static void
16319 sd_start_direct_priority_command(void *arg)
16320 {
16321 	struct buf	*priority_bp = arg;
16322 	struct sd_lun	*un;
16323 
16324 	ASSERT(priority_bp != NULL);
16325 	un = SD_GET_UN(priority_bp);
16326 	ASSERT(un != NULL);
16327 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16328 
16329 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16330 	    "sd_start_direct_priority_command: entry\n");
16331 
16332 	mutex_enter(SD_MUTEX(un));
16333 	un->un_direct_priority_timeid = NULL;
16334 	sd_start_cmds(un, priority_bp);
16335 	mutex_exit(SD_MUTEX(un));
16336 
16337 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16338 	    "sd_start_direct_priority_command: exit\n");
16339 }
16340 
16341 
16342 /*
16343  *    Function: sd_send_request_sense_command
16344  *
16345  * Description: Sends a REQUEST SENSE command to the target
16346  *
16347  *     Context: May be called from interrupt context.
16348  */
16349 
16350 static void
16351 sd_send_request_sense_command(struct sd_lun *un, struct buf *bp,
16352     struct scsi_pkt *pktp)
16353 {
16354 	ASSERT(bp != NULL);
16355 	ASSERT(un != NULL);
16356 	ASSERT(mutex_owned(SD_MUTEX(un)));
16357 
16358 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_send_request_sense_command: "
16359 	    "entry: buf:0x%p\n", bp);
16360 
16361 	/*
16362 	 * If we are syncing or dumping, then fail the command to avoid a
16363 	 * recursive callback into scsi_transport(). Also fail the command
16364 	 * if we are suspended (legacy behavior).
16365 	 */
16366 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
16367 	    (un->un_state == SD_STATE_DUMPING)) {
16368 		sd_return_failed_command(un, bp, EIO);
16369 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16370 		    "sd_send_request_sense_command: syncing/dumping, exit\n");
16371 		return;
16372 	}
16373 
16374 	/*
16375 	 * Retry the failed command and don't issue the request sense if:
16376 	 *    1) the sense buf is busy
16377 	 *    2) we have 1 or more outstanding commands on the target
16378 	 *    (the sense data will be cleared or invalidated any way)
16379 	 *
16380 	 * Note: There could be an issue with not checking a retry limit here,
16381 	 * the problem is determining which retry limit to check.
16382 	 */
16383 	if ((un->un_sense_isbusy != 0) || (un->un_ncmds_in_transport > 0)) {
16384 		/* Don't retry if the command is flagged as non-retryable */
16385 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
16386 			sd_retry_command(un, bp, SD_RETRIES_NOCHECK,
16387 			    NULL, NULL, 0, un->un_busy_timeout,
16388 			    kstat_waitq_enter);
16389 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16390 			    "sd_send_request_sense_command: "
16391 			    "at full throttle, retrying exit\n");
16392 		} else {
16393 			sd_return_failed_command(un, bp, EIO);
16394 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16395 			    "sd_send_request_sense_command: "
16396 			    "at full throttle, non-retryable exit\n");
16397 		}
16398 		return;
16399 	}
16400 
16401 	sd_mark_rqs_busy(un, bp);
16402 	sd_start_cmds(un, un->un_rqs_bp);
16403 
16404 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16405 	    "sd_send_request_sense_command: exit\n");
16406 }
16407 
16408 
16409 /*
16410  *    Function: sd_mark_rqs_busy
16411  *
16412  * Description: Indicate that the request sense bp for this instance is
16413  *		in use.
16414  *
16415  *     Context: May be called under interrupt context
16416  */
16417 
16418 static void
16419 sd_mark_rqs_busy(struct sd_lun *un, struct buf *bp)
16420 {
16421 	struct sd_xbuf	*sense_xp;
16422 
16423 	ASSERT(un != NULL);
16424 	ASSERT(bp != NULL);
16425 	ASSERT(mutex_owned(SD_MUTEX(un)));
16426 	ASSERT(un->un_sense_isbusy == 0);
16427 
16428 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: entry: "
16429 	    "buf:0x%p xp:0x%p un:0x%p\n", bp, SD_GET_XBUF(bp), un);
16430 
16431 	sense_xp = SD_GET_XBUF(un->un_rqs_bp);
16432 	ASSERT(sense_xp != NULL);
16433 
16434 	SD_INFO(SD_LOG_IO, un,
16435 	    "sd_mark_rqs_busy: entry: sense_xp:0x%p\n", sense_xp);
16436 
16437 	ASSERT(sense_xp->xb_pktp != NULL);
16438 	ASSERT((sense_xp->xb_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD))
16439 	    == (FLAG_SENSING | FLAG_HEAD));
16440 
16441 	un->un_sense_isbusy = 1;
16442 	un->un_rqs_bp->b_resid = 0;
16443 	sense_xp->xb_pktp->pkt_resid  = 0;
16444 	sense_xp->xb_pktp->pkt_reason = 0;
16445 
16446 	/* So we can get back the bp at interrupt time! */
16447 	sense_xp->xb_sense_bp = bp;
16448 
16449 	bzero(un->un_rqs_bp->b_un.b_addr, SENSE_LENGTH);
16450 
16451 	/*
16452 	 * Mark this buf as awaiting sense data. (This is already set in
16453 	 * the pkt_flags for the RQS packet.)
16454 	 */
16455 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags |= FLAG_SENSING;
16456 
16457 	/* Request sense down same path */
16458 	if (scsi_pkt_allocated_correctly((SD_GET_XBUF(bp))->xb_pktp) &&
16459 	    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance)
16460 		sense_xp->xb_pktp->pkt_path_instance =
16461 		    ((SD_GET_XBUF(bp))->xb_pktp)->pkt_path_instance;
16462 
16463 	sense_xp->xb_retry_count = 0;
16464 	sense_xp->xb_victim_retry_count = 0;
16465 	sense_xp->xb_ua_retry_count = 0;
16466 	sense_xp->xb_nr_retry_count = 0;
16467 	sense_xp->xb_dma_resid  = 0;
16468 
16469 	/* Clean up the fields for auto-request sense */
16470 	sense_xp->xb_sense_status = 0;
16471 	sense_xp->xb_sense_state = 0;
16472 	sense_xp->xb_sense_resid = 0;
16473 	bzero(sense_xp->xb_sense_data, sizeof (sense_xp->xb_sense_data));
16474 
16475 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_mark_rqs_busy: exit\n");
16476 }
16477 
16478 
16479 /*
16480  *    Function: sd_mark_rqs_idle
16481  *
16482  * Description: SD_MUTEX must be held continuously through this routine
16483  *		to prevent reuse of the rqs struct before the caller can
16484  *		complete it's processing.
16485  *
16486  * Return Code: Pointer to the RQS buf
16487  *
16488  *     Context: May be called under interrupt context
16489  */
16490 
16491 static struct buf *
16492 sd_mark_rqs_idle(struct sd_lun *un, struct sd_xbuf *sense_xp)
16493 {
16494 	struct buf *bp;
16495 	ASSERT(un != NULL);
16496 	ASSERT(sense_xp != NULL);
16497 	ASSERT(mutex_owned(SD_MUTEX(un)));
16498 	ASSERT(un->un_sense_isbusy != 0);
16499 
16500 	un->un_sense_isbusy = 0;
16501 	bp = sense_xp->xb_sense_bp;
16502 	sense_xp->xb_sense_bp = NULL;
16503 
16504 	/* This pkt is no longer interested in getting sense data */
16505 	((SD_GET_XBUF(bp))->xb_pktp)->pkt_flags &= ~FLAG_SENSING;
16506 
16507 	return (bp);
16508 }
16509 
16510 
16511 
16512 /*
16513  *    Function: sd_alloc_rqs
16514  *
16515  * Description: Set up the unit to receive auto request sense data
16516  *
16517  * Return Code: DDI_SUCCESS or DDI_FAILURE
16518  *
16519  *     Context: Called under attach(9E) context
16520  */
16521 
16522 static int
16523 sd_alloc_rqs(struct scsi_device *devp, struct sd_lun *un)
16524 {
16525 	struct sd_xbuf *xp;
16526 
16527 	ASSERT(un != NULL);
16528 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16529 	ASSERT(un->un_rqs_bp == NULL);
16530 	ASSERT(un->un_rqs_pktp == NULL);
16531 
16532 	/*
16533 	 * First allocate the required buf and scsi_pkt structs, then set up
16534 	 * the CDB in the scsi_pkt for a REQUEST SENSE command.
16535 	 */
16536 	un->un_rqs_bp = scsi_alloc_consistent_buf(&devp->sd_address, NULL,
16537 	    MAX_SENSE_LENGTH, B_READ, SLEEP_FUNC, NULL);
16538 	if (un->un_rqs_bp == NULL) {
16539 		return (DDI_FAILURE);
16540 	}
16541 
16542 	un->un_rqs_pktp = scsi_init_pkt(&devp->sd_address, NULL, un->un_rqs_bp,
16543 	    CDB_GROUP0, 1, 0, PKT_CONSISTENT, SLEEP_FUNC, NULL);
16544 
16545 	if (un->un_rqs_pktp == NULL) {
16546 		sd_free_rqs(un);
16547 		return (DDI_FAILURE);
16548 	}
16549 
16550 	/* Set up the CDB in the scsi_pkt for a REQUEST SENSE command. */
16551 	(void) scsi_setup_cdb((union scsi_cdb *)un->un_rqs_pktp->pkt_cdbp,
16552 	    SCMD_REQUEST_SENSE, 0, MAX_SENSE_LENGTH, 0);
16553 
16554 	SD_FILL_SCSI1_LUN(un, un->un_rqs_pktp);
16555 
16556 	/* Set up the other needed members in the ARQ scsi_pkt. */
16557 	un->un_rqs_pktp->pkt_comp   = sdintr;
16558 	un->un_rqs_pktp->pkt_time   = sd_io_time;
16559 	un->un_rqs_pktp->pkt_flags |=
16560 	    (FLAG_SENSING | FLAG_HEAD);	/* (1222170) */
16561 
16562 	/*
16563 	 * Allocate  & init the sd_xbuf struct for the RQS command. Do not
16564 	 * provide any intpkt, destroypkt routines as we take care of
16565 	 * scsi_pkt allocation/freeing here and in sd_free_rqs().
16566 	 */
16567 	xp = kmem_alloc(sizeof (struct sd_xbuf), KM_SLEEP);
16568 	sd_xbuf_init(un, un->un_rqs_bp, xp, SD_CHAIN_NULL, NULL);
16569 	xp->xb_pktp = un->un_rqs_pktp;
16570 	SD_INFO(SD_LOG_ATTACH_DETACH, un,
16571 	    "sd_alloc_rqs: un 0x%p, rqs  xp 0x%p,  pkt 0x%p,  buf 0x%p\n",
16572 	    un, xp, un->un_rqs_pktp, un->un_rqs_bp);
16573 
16574 	/*
16575 	 * Save the pointer to the request sense private bp so it can
16576 	 * be retrieved in sdintr.
16577 	 */
16578 	un->un_rqs_pktp->pkt_private = un->un_rqs_bp;
16579 	ASSERT(un->un_rqs_bp->b_private == xp);
16580 
16581 	/*
16582 	 * See if the HBA supports auto-request sense for the specified
16583 	 * target/lun. If it does, then try to enable it (if not already
16584 	 * enabled).
16585 	 *
16586 	 * Note: For some HBAs (ifp & sf), scsi_ifsetcap will always return
16587 	 * failure, while for other HBAs (pln) scsi_ifsetcap will always
16588 	 * return success.  However, in both of these cases ARQ is always
16589 	 * enabled and scsi_ifgetcap will always return true. The best approach
16590 	 * is to issue the scsi_ifgetcap() first, then try the scsi_ifsetcap().
16591 	 *
16592 	 * The 3rd case is the HBA (adp) always return enabled on
16593 	 * scsi_ifgetgetcap even when it's not enable, the best approach
16594 	 * is issue a scsi_ifsetcap then a scsi_ifgetcap
16595 	 * Note: this case is to circumvent the Adaptec bug. (x86 only)
16596 	 */
16597 
16598 	if (un->un_f_is_fibre == TRUE) {
16599 		un->un_f_arq_enabled = TRUE;
16600 	} else {
16601 #if defined(__x86)
16602 		/*
16603 		 * Circumvent the Adaptec bug, remove this code when
16604 		 * the bug is fixed
16605 		 */
16606 		(void) scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1);
16607 #endif
16608 		switch (scsi_ifgetcap(SD_ADDRESS(un), "auto-rqsense", 1)) {
16609 		case 0:
16610 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16611 			    "sd_alloc_rqs: HBA supports ARQ\n");
16612 			/*
16613 			 * ARQ is supported by this HBA but currently is not
16614 			 * enabled. Attempt to enable it and if successful then
16615 			 * mark this instance as ARQ enabled.
16616 			 */
16617 			if (scsi_ifsetcap(SD_ADDRESS(un), "auto-rqsense", 1, 1)
16618 			    == 1) {
16619 				/* Successfully enabled ARQ in the HBA */
16620 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16621 				    "sd_alloc_rqs: ARQ enabled\n");
16622 				un->un_f_arq_enabled = TRUE;
16623 			} else {
16624 				/* Could not enable ARQ in the HBA */
16625 				SD_INFO(SD_LOG_ATTACH_DETACH, un,
16626 				    "sd_alloc_rqs: failed ARQ enable\n");
16627 				un->un_f_arq_enabled = FALSE;
16628 			}
16629 			break;
16630 		case 1:
16631 			/*
16632 			 * ARQ is supported by this HBA and is already enabled.
16633 			 * Just mark ARQ as enabled for this instance.
16634 			 */
16635 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16636 			    "sd_alloc_rqs: ARQ already enabled\n");
16637 			un->un_f_arq_enabled = TRUE;
16638 			break;
16639 		default:
16640 			/*
16641 			 * ARQ is not supported by this HBA; disable it for this
16642 			 * instance.
16643 			 */
16644 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
16645 			    "sd_alloc_rqs: HBA does not support ARQ\n");
16646 			un->un_f_arq_enabled = FALSE;
16647 			break;
16648 		}
16649 	}
16650 
16651 	return (DDI_SUCCESS);
16652 }
16653 
16654 
16655 /*
16656  *    Function: sd_free_rqs
16657  *
16658  * Description: Cleanup for the pre-instance RQS command.
16659  *
16660  *     Context: Kernel thread context
16661  */
16662 
16663 static void
16664 sd_free_rqs(struct sd_lun *un)
16665 {
16666 	ASSERT(un != NULL);
16667 
16668 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: entry\n");
16669 
16670 	/*
16671 	 * If consistent memory is bound to a scsi_pkt, the pkt
16672 	 * has to be destroyed *before* freeing the consistent memory.
16673 	 * Don't change the sequence of this operations.
16674 	 * scsi_destroy_pkt() might access memory, which isn't allowed,
16675 	 * after it was freed in scsi_free_consistent_buf().
16676 	 */
16677 	if (un->un_rqs_pktp != NULL) {
16678 		scsi_destroy_pkt(un->un_rqs_pktp);
16679 		un->un_rqs_pktp = NULL;
16680 	}
16681 
16682 	if (un->un_rqs_bp != NULL) {
16683 		struct sd_xbuf *xp = SD_GET_XBUF(un->un_rqs_bp);
16684 		if (xp != NULL) {
16685 			kmem_free(xp, sizeof (struct sd_xbuf));
16686 		}
16687 		scsi_free_consistent_buf(un->un_rqs_bp);
16688 		un->un_rqs_bp = NULL;
16689 	}
16690 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_free_rqs: exit\n");
16691 }
16692 
16693 
16694 
16695 /*
16696  *    Function: sd_reduce_throttle
16697  *
16698  * Description: Reduces the maximum # of outstanding commands on a
16699  *		target to the current number of outstanding commands.
16700  *		Queues a tiemout(9F) callback to restore the limit
16701  *		after a specified interval has elapsed.
16702  *		Typically used when we get a TRAN_BUSY return code
16703  *		back from scsi_transport().
16704  *
16705  *   Arguments: un - ptr to the sd_lun softstate struct
16706  *		throttle_type: SD_THROTTLE_TRAN_BUSY or SD_THROTTLE_QFULL
16707  *
16708  *     Context: May be called from interrupt context
16709  */
16710 
16711 static void
16712 sd_reduce_throttle(struct sd_lun *un, int throttle_type)
16713 {
16714 	ASSERT(un != NULL);
16715 	ASSERT(mutex_owned(SD_MUTEX(un)));
16716 	ASSERT(un->un_ncmds_in_transport >= 0);
16717 
16718 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16719 	    "entry: un:0x%p un_throttle:%d un_ncmds_in_transport:%d\n",
16720 	    un, un->un_throttle, un->un_ncmds_in_transport);
16721 
16722 	if (un->un_throttle > 1) {
16723 		if (un->un_f_use_adaptive_throttle == TRUE) {
16724 			switch (throttle_type) {
16725 			case SD_THROTTLE_TRAN_BUSY:
16726 				if (un->un_busy_throttle == 0) {
16727 					un->un_busy_throttle = un->un_throttle;
16728 				}
16729 				break;
16730 			case SD_THROTTLE_QFULL:
16731 				un->un_busy_throttle = 0;
16732 				break;
16733 			default:
16734 				ASSERT(FALSE);
16735 			}
16736 
16737 			if (un->un_ncmds_in_transport > 0) {
16738 				un->un_throttle = un->un_ncmds_in_transport;
16739 			}
16740 
16741 		} else {
16742 			if (un->un_ncmds_in_transport == 0) {
16743 				un->un_throttle = 1;
16744 			} else {
16745 				un->un_throttle = un->un_ncmds_in_transport;
16746 			}
16747 		}
16748 	}
16749 
16750 	/* Reschedule the timeout if none is currently active */
16751 	if (un->un_reset_throttle_timeid == NULL) {
16752 		un->un_reset_throttle_timeid = timeout(sd_restore_throttle,
16753 		    un, SD_THROTTLE_RESET_INTERVAL);
16754 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
16755 		    "sd_reduce_throttle: timeout scheduled!\n");
16756 	}
16757 
16758 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reduce_throttle: "
16759 	    "exit: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16760 }
16761 
16762 
16763 
16764 /*
16765  *    Function: sd_restore_throttle
16766  *
16767  * Description: Callback function for timeout(9F).  Resets the current
16768  *		value of un->un_throttle to its default.
16769  *
16770  *   Arguments: arg - pointer to associated softstate for the device.
16771  *
16772  *     Context: May be called from interrupt context
16773  */
16774 
16775 static void
16776 sd_restore_throttle(void *arg)
16777 {
16778 	struct sd_lun	*un = arg;
16779 
16780 	ASSERT(un != NULL);
16781 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16782 
16783 	mutex_enter(SD_MUTEX(un));
16784 
16785 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16786 	    "entry: un:0x%p un_throttle:%d\n", un, un->un_throttle);
16787 
16788 	un->un_reset_throttle_timeid = NULL;
16789 
16790 	if (un->un_f_use_adaptive_throttle == TRUE) {
16791 		/*
16792 		 * If un_busy_throttle is nonzero, then it contains the
16793 		 * value that un_throttle was when we got a TRAN_BUSY back
16794 		 * from scsi_transport(). We want to revert back to this
16795 		 * value.
16796 		 *
16797 		 * In the QFULL case, the throttle limit will incrementally
16798 		 * increase until it reaches max throttle.
16799 		 */
16800 		if (un->un_busy_throttle > 0) {
16801 			un->un_throttle = un->un_busy_throttle;
16802 			un->un_busy_throttle = 0;
16803 		} else {
16804 			/*
16805 			 * increase throttle by 10% open gate slowly, schedule
16806 			 * another restore if saved throttle has not been
16807 			 * reached
16808 			 */
16809 			short throttle;
16810 			if (sd_qfull_throttle_enable) {
16811 				throttle = un->un_throttle +
16812 				    max((un->un_throttle / 10), 1);
16813 				un->un_throttle =
16814 				    (throttle < un->un_saved_throttle) ?
16815 				    throttle : un->un_saved_throttle;
16816 				if (un->un_throttle < un->un_saved_throttle) {
16817 					un->un_reset_throttle_timeid =
16818 					    timeout(sd_restore_throttle,
16819 					    un,
16820 					    SD_QFULL_THROTTLE_RESET_INTERVAL);
16821 				}
16822 			}
16823 		}
16824 
16825 		/*
16826 		 * If un_throttle has fallen below the low-water mark, we
16827 		 * restore the maximum value here (and allow it to ratchet
16828 		 * down again if necessary).
16829 		 */
16830 		if (un->un_throttle < un->un_min_throttle) {
16831 			un->un_throttle = un->un_saved_throttle;
16832 		}
16833 	} else {
16834 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: "
16835 		    "restoring limit from 0x%x to 0x%x\n",
16836 		    un->un_throttle, un->un_saved_throttle);
16837 		un->un_throttle = un->un_saved_throttle;
16838 	}
16839 
16840 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16841 	    "sd_restore_throttle: calling sd_start_cmds!\n");
16842 
16843 	sd_start_cmds(un, NULL);
16844 
16845 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un,
16846 	    "sd_restore_throttle: exit: un:0x%p un_throttle:%d\n",
16847 	    un, un->un_throttle);
16848 
16849 	mutex_exit(SD_MUTEX(un));
16850 
16851 	SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sd_restore_throttle: exit\n");
16852 }
16853 
16854 /*
16855  *    Function: sdrunout
16856  *
16857  * Description: Callback routine for scsi_init_pkt when a resource allocation
16858  *		fails.
16859  *
16860  *   Arguments: arg - a pointer to the sd_lun unit struct for the particular
16861  *		soft state instance.
16862  *
16863  * Return Code: The scsi_init_pkt routine allows for the callback function to
16864  *		return a 0 indicating the callback should be rescheduled or a 1
16865  *		indicating not to reschedule. This routine always returns 1
16866  *		because the driver always provides a callback function to
16867  *		scsi_init_pkt. This results in a callback always being scheduled
16868  *		(via the scsi_init_pkt callback implementation) if a resource
16869  *		failure occurs.
16870  *
16871  *     Context: This callback function may not block or call routines that block
16872  *
16873  *        Note: Using the scsi_init_pkt callback facility can result in an I/O
16874  *		request persisting at the head of the list which cannot be
16875  *		satisfied even after multiple retries. In the future the driver
16876  *		may implement some time of maximum runout count before failing
16877  *		an I/O.
16878  */
16879 
16880 static int
16881 sdrunout(caddr_t arg)
16882 {
16883 	struct sd_lun	*un = (struct sd_lun *)arg;
16884 
16885 	ASSERT(un != NULL);
16886 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16887 
16888 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: entry\n");
16889 
16890 	mutex_enter(SD_MUTEX(un));
16891 	sd_start_cmds(un, NULL);
16892 	mutex_exit(SD_MUTEX(un));
16893 	/*
16894 	 * This callback routine always returns 1 (i.e. do not reschedule)
16895 	 * because we always specify sdrunout as the callback handler for
16896 	 * scsi_init_pkt inside the call to sd_start_cmds.
16897 	 */
16898 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdrunout: exit\n");
16899 	return (1);
16900 }
16901 
16902 
16903 /*
16904  *    Function: sdintr
16905  *
16906  * Description: Completion callback routine for scsi_pkt(9S) structs
16907  *		sent to the HBA driver via scsi_transport(9F).
16908  *
16909  *     Context: Interrupt context
16910  */
16911 
16912 static void
16913 sdintr(struct scsi_pkt *pktp)
16914 {
16915 	struct buf	*bp;
16916 	struct sd_xbuf	*xp;
16917 	struct sd_lun	*un;
16918 	size_t		actual_len;
16919 	sd_ssc_t	*sscp;
16920 
16921 	ASSERT(pktp != NULL);
16922 	bp = (struct buf *)pktp->pkt_private;
16923 	ASSERT(bp != NULL);
16924 	xp = SD_GET_XBUF(bp);
16925 	ASSERT(xp != NULL);
16926 	ASSERT(xp->xb_pktp != NULL);
16927 	un = SD_GET_UN(bp);
16928 	ASSERT(un != NULL);
16929 	ASSERT(!mutex_owned(SD_MUTEX(un)));
16930 
16931 #ifdef SD_FAULT_INJECTION
16932 
16933 	SD_INFO(SD_LOG_IOERR, un, "sdintr: sdintr calling Fault injection\n");
16934 	/* SD FaultInjection */
16935 	sd_faultinjection(pktp);
16936 
16937 #endif /* SD_FAULT_INJECTION */
16938 
16939 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: entry: buf:0x%p,"
16940 	    " xp:0x%p, un:0x%p\n", bp, xp, un);
16941 
16942 	mutex_enter(SD_MUTEX(un));
16943 
16944 	ASSERT(un->un_fm_private != NULL);
16945 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
16946 	ASSERT(sscp != NULL);
16947 
16948 	/* Reduce the count of the #commands currently in transport */
16949 	un->un_ncmds_in_transport--;
16950 	ASSERT(un->un_ncmds_in_transport >= 0);
16951 
16952 	/* Increment counter to indicate that the callback routine is active */
16953 	un->un_in_callback++;
16954 
16955 	SD_UPDATE_KSTATS(un, kstat_runq_exit, bp);
16956 
16957 #ifdef	SDDEBUG
16958 	if (bp == un->un_retry_bp) {
16959 		SD_TRACE(SD_LOG_IO | SD_LOG_ERROR, un, "sdintr: "
16960 		    "un:0x%p: GOT retry_bp:0x%p un_ncmds_in_transport:%d\n",
16961 		    un, un->un_retry_bp, un->un_ncmds_in_transport);
16962 	}
16963 #endif
16964 
16965 	/*
16966 	 * If pkt_reason is CMD_DEV_GONE, fail the command, and update the media
16967 	 * state if needed.
16968 	 */
16969 	if (pktp->pkt_reason == CMD_DEV_GONE) {
16970 		/* Prevent multiple console messages for the same failure. */
16971 		if (un->un_last_pkt_reason != CMD_DEV_GONE) {
16972 			un->un_last_pkt_reason = CMD_DEV_GONE;
16973 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
16974 			    "Command failed to complete...Device is gone\n");
16975 		}
16976 		if (un->un_mediastate != DKIO_DEV_GONE) {
16977 			un->un_mediastate = DKIO_DEV_GONE;
16978 			cv_broadcast(&un->un_state_cv);
16979 		}
16980 		/*
16981 		 * If the command happens to be the REQUEST SENSE command,
16982 		 * free up the rqs buf and fail the original command.
16983 		 */
16984 		if (bp == un->un_rqs_bp) {
16985 			bp = sd_mark_rqs_idle(un, xp);
16986 		}
16987 		sd_return_failed_command(un, bp, EIO);
16988 		goto exit;
16989 	}
16990 
16991 	if (pktp->pkt_state & STATE_XARQ_DONE) {
16992 		SD_TRACE(SD_LOG_COMMON, un,
16993 		    "sdintr: extra sense data received. pkt=%p\n", pktp);
16994 	}
16995 
16996 	/*
16997 	 * First see if the pkt has auto-request sense data with it....
16998 	 * Look at the packet state first so we don't take a performance
16999 	 * hit looking at the arq enabled flag unless absolutely necessary.
17000 	 */
17001 	if ((pktp->pkt_state & STATE_ARQ_DONE) &&
17002 	    (un->un_f_arq_enabled == TRUE)) {
17003 		/*
17004 		 * The HBA did an auto request sense for this command so check
17005 		 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
17006 		 * driver command that should not be retried.
17007 		 */
17008 		if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
17009 			/*
17010 			 * Save the relevant sense info into the xp for the
17011 			 * original cmd.
17012 			 */
17013 			struct scsi_arq_status *asp;
17014 			asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
17015 			xp->xb_sense_status =
17016 			    *((uchar_t *)(&(asp->sts_rqpkt_status)));
17017 			xp->xb_sense_state  = asp->sts_rqpkt_state;
17018 			xp->xb_sense_resid  = asp->sts_rqpkt_resid;
17019 			if (pktp->pkt_state & STATE_XARQ_DONE) {
17020 				actual_len = MAX_SENSE_LENGTH -
17021 				    xp->xb_sense_resid;
17022 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17023 				    MAX_SENSE_LENGTH);
17024 			} else {
17025 				if (xp->xb_sense_resid > SENSE_LENGTH) {
17026 					actual_len = MAX_SENSE_LENGTH -
17027 					    xp->xb_sense_resid;
17028 				} else {
17029 					actual_len = SENSE_LENGTH -
17030 					    xp->xb_sense_resid;
17031 				}
17032 				if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
17033 					if ((((struct uscsi_cmd *)
17034 					    (xp->xb_pktinfo))->uscsi_rqlen) >
17035 					    actual_len) {
17036 						xp->xb_sense_resid =
17037 						    (((struct uscsi_cmd *)
17038 						    (xp->xb_pktinfo))->
17039 						    uscsi_rqlen) - actual_len;
17040 					} else {
17041 						xp->xb_sense_resid = 0;
17042 					}
17043 				}
17044 				bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17045 				    SENSE_LENGTH);
17046 			}
17047 
17048 			/* fail the command */
17049 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17050 			    "sdintr: arq done and FLAG_DIAGNOSE set\n");
17051 			sd_return_failed_command(un, bp, EIO);
17052 			goto exit;
17053 		}
17054 
17055 #if (defined(__x86))	/* DMAFREE for x86 only */
17056 		/*
17057 		 * We want to either retry or fail this command, so free
17058 		 * the DMA resources here.  If we retry the command then
17059 		 * the DMA resources will be reallocated in sd_start_cmds().
17060 		 * Note that when PKT_DMA_PARTIAL is used, this reallocation
17061 		 * causes the *entire* transfer to start over again from the
17062 		 * beginning of the request, even for PARTIAL chunks that
17063 		 * have already transferred successfully.
17064 		 */
17065 		if ((un->un_f_is_fibre == TRUE) &&
17066 		    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
17067 		    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
17068 			scsi_dmafree(pktp);
17069 			xp->xb_pkt_flags |= SD_XB_DMA_FREED;
17070 		}
17071 #endif
17072 
17073 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17074 		    "sdintr: arq done, sd_handle_auto_request_sense\n");
17075 
17076 		sd_handle_auto_request_sense(un, bp, xp, pktp);
17077 		goto exit;
17078 	}
17079 
17080 	/* Next see if this is the REQUEST SENSE pkt for the instance */
17081 	if (pktp->pkt_flags & FLAG_SENSING)  {
17082 		/* This pktp is from the unit's REQUEST_SENSE command */
17083 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17084 		    "sdintr: sd_handle_request_sense\n");
17085 		sd_handle_request_sense(un, bp, xp, pktp);
17086 		goto exit;
17087 	}
17088 
17089 	/*
17090 	 * Check to see if the command successfully completed as requested;
17091 	 * this is the most common case (and also the hot performance path).
17092 	 *
17093 	 * Requirements for successful completion are:
17094 	 * pkt_reason is CMD_CMPLT and packet status is status good.
17095 	 * In addition:
17096 	 * - A residual of zero indicates successful completion no matter what
17097 	 *   the command is.
17098 	 * - If the residual is not zero and the command is not a read or
17099 	 *   write, then it's still defined as successful completion. In other
17100 	 *   words, if the command is a read or write the residual must be
17101 	 *   zero for successful completion.
17102 	 * - If the residual is not zero and the command is a read or
17103 	 *   write, and it's a USCSICMD, then it's still defined as
17104 	 *   successful completion.
17105 	 */
17106 	if ((pktp->pkt_reason == CMD_CMPLT) &&
17107 	    (SD_GET_PKT_STATUS(pktp) == STATUS_GOOD)) {
17108 
17109 		/*
17110 		 * Since this command is returned with a good status, we
17111 		 * can reset the count for Sonoma failover.
17112 		 */
17113 		un->un_sonoma_failure_count = 0;
17114 
17115 		/*
17116 		 * Return all USCSI commands on good status
17117 		 */
17118 		if (pktp->pkt_resid == 0) {
17119 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17120 			    "sdintr: returning command for resid == 0\n");
17121 		} else if (((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_READ) &&
17122 		    ((SD_GET_PKT_OPCODE(pktp) & 0x1F) != SCMD_WRITE)) {
17123 			SD_UPDATE_B_RESID(bp, pktp);
17124 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17125 			    "sdintr: returning command for resid != 0\n");
17126 		} else if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
17127 			SD_UPDATE_B_RESID(bp, pktp);
17128 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17129 			    "sdintr: returning uscsi command\n");
17130 		} else {
17131 			goto not_successful;
17132 		}
17133 		sd_return_command(un, bp);
17134 
17135 		/*
17136 		 * Decrement counter to indicate that the callback routine
17137 		 * is done.
17138 		 */
17139 		un->un_in_callback--;
17140 		ASSERT(un->un_in_callback >= 0);
17141 		mutex_exit(SD_MUTEX(un));
17142 
17143 		return;
17144 	}
17145 
17146 not_successful:
17147 
17148 #if (defined(__x86))	/* DMAFREE for x86 only */
17149 	/*
17150 	 * The following is based upon knowledge of the underlying transport
17151 	 * and its use of DMA resources.  This code should be removed when
17152 	 * PKT_DMA_PARTIAL support is taken out of the disk driver in favor
17153 	 * of the new PKT_CMD_BREAKUP protocol. See also sd_initpkt_for_buf()
17154 	 * and sd_start_cmds().
17155 	 *
17156 	 * Free any DMA resources associated with this command if there
17157 	 * is a chance it could be retried or enqueued for later retry.
17158 	 * If we keep the DMA binding then mpxio cannot reissue the
17159 	 * command on another path whenever a path failure occurs.
17160 	 *
17161 	 * Note that when PKT_DMA_PARTIAL is used, free/reallocation
17162 	 * causes the *entire* transfer to start over again from the
17163 	 * beginning of the request, even for PARTIAL chunks that
17164 	 * have already transferred successfully.
17165 	 *
17166 	 * This is only done for non-uscsi commands (and also skipped for the
17167 	 * driver's internal RQS command). Also just do this for Fibre Channel
17168 	 * devices as these are the only ones that support mpxio.
17169 	 */
17170 	if ((un->un_f_is_fibre == TRUE) &&
17171 	    ((xp->xb_pkt_flags & SD_XB_USCSICMD) == 0) &&
17172 	    ((pktp->pkt_flags & FLAG_SENSING) == 0))  {
17173 		scsi_dmafree(pktp);
17174 		xp->xb_pkt_flags |= SD_XB_DMA_FREED;
17175 	}
17176 #endif
17177 
17178 	/*
17179 	 * The command did not successfully complete as requested so check
17180 	 * for FLAG_DIAGNOSE. If set this indicates a uscsi or internal
17181 	 * driver command that should not be retried so just return. If
17182 	 * FLAG_DIAGNOSE is not set the error will be processed below.
17183 	 */
17184 	if ((pktp->pkt_flags & FLAG_DIAGNOSE) != 0) {
17185 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17186 		    "sdintr: FLAG_DIAGNOSE: sd_return_failed_command\n");
17187 		/*
17188 		 * Issue a request sense if a check condition caused the error
17189 		 * (we handle the auto request sense case above), otherwise
17190 		 * just fail the command.
17191 		 */
17192 		if ((pktp->pkt_reason == CMD_CMPLT) &&
17193 		    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK)) {
17194 			sd_send_request_sense_command(un, bp, pktp);
17195 		} else {
17196 			sd_return_failed_command(un, bp, EIO);
17197 		}
17198 		goto exit;
17199 	}
17200 
17201 	/*
17202 	 * The command did not successfully complete as requested so process
17203 	 * the error, retry, and/or attempt recovery.
17204 	 */
17205 	switch (pktp->pkt_reason) {
17206 	case CMD_CMPLT:
17207 		switch (SD_GET_PKT_STATUS(pktp)) {
17208 		case STATUS_GOOD:
17209 			/*
17210 			 * The command completed successfully with a non-zero
17211 			 * residual
17212 			 */
17213 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17214 			    "sdintr: STATUS_GOOD \n");
17215 			sd_pkt_status_good(un, bp, xp, pktp);
17216 			break;
17217 
17218 		case STATUS_CHECK:
17219 		case STATUS_TERMINATED:
17220 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17221 			    "sdintr: STATUS_TERMINATED | STATUS_CHECK\n");
17222 			sd_pkt_status_check_condition(un, bp, xp, pktp);
17223 			break;
17224 
17225 		case STATUS_BUSY:
17226 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17227 			    "sdintr: STATUS_BUSY\n");
17228 			sd_pkt_status_busy(un, bp, xp, pktp);
17229 			break;
17230 
17231 		case STATUS_RESERVATION_CONFLICT:
17232 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17233 			    "sdintr: STATUS_RESERVATION_CONFLICT\n");
17234 			sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17235 			break;
17236 
17237 		case STATUS_QFULL:
17238 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17239 			    "sdintr: STATUS_QFULL\n");
17240 			sd_pkt_status_qfull(un, bp, xp, pktp);
17241 			break;
17242 
17243 		case STATUS_MET:
17244 		case STATUS_INTERMEDIATE:
17245 		case STATUS_SCSI2:
17246 		case STATUS_INTERMEDIATE_MET:
17247 		case STATUS_ACA_ACTIVE:
17248 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17249 			    "Unexpected SCSI status received: 0x%x\n",
17250 			    SD_GET_PKT_STATUS(pktp));
17251 			/*
17252 			 * Mark the ssc_flags when detected invalid status
17253 			 * code for non-USCSI command.
17254 			 */
17255 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17256 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
17257 				    0, "stat-code");
17258 			}
17259 			sd_return_failed_command(un, bp, EIO);
17260 			break;
17261 
17262 		default:
17263 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17264 			    "Invalid SCSI status received: 0x%x\n",
17265 			    SD_GET_PKT_STATUS(pktp));
17266 			if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17267 				sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_STATUS,
17268 				    0, "stat-code");
17269 			}
17270 			sd_return_failed_command(un, bp, EIO);
17271 			break;
17272 
17273 		}
17274 		break;
17275 
17276 	case CMD_INCOMPLETE:
17277 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17278 		    "sdintr:  CMD_INCOMPLETE\n");
17279 		sd_pkt_reason_cmd_incomplete(un, bp, xp, pktp);
17280 		break;
17281 	case CMD_TRAN_ERR:
17282 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17283 		    "sdintr: CMD_TRAN_ERR\n");
17284 		sd_pkt_reason_cmd_tran_err(un, bp, xp, pktp);
17285 		break;
17286 	case CMD_RESET:
17287 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17288 		    "sdintr: CMD_RESET \n");
17289 		sd_pkt_reason_cmd_reset(un, bp, xp, pktp);
17290 		break;
17291 	case CMD_ABORTED:
17292 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17293 		    "sdintr: CMD_ABORTED \n");
17294 		sd_pkt_reason_cmd_aborted(un, bp, xp, pktp);
17295 		break;
17296 	case CMD_TIMEOUT:
17297 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17298 		    "sdintr: CMD_TIMEOUT\n");
17299 		sd_pkt_reason_cmd_timeout(un, bp, xp, pktp);
17300 		break;
17301 	case CMD_UNX_BUS_FREE:
17302 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17303 		    "sdintr: CMD_UNX_BUS_FREE \n");
17304 		sd_pkt_reason_cmd_unx_bus_free(un, bp, xp, pktp);
17305 		break;
17306 	case CMD_TAG_REJECT:
17307 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17308 		    "sdintr: CMD_TAG_REJECT\n");
17309 		sd_pkt_reason_cmd_tag_reject(un, bp, xp, pktp);
17310 		break;
17311 	default:
17312 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
17313 		    "sdintr: default\n");
17314 		/*
17315 		 * Mark the ssc_flags for detecting invliad pkt_reason.
17316 		 */
17317 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17318 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_PKT_REASON,
17319 			    0, "pkt-reason");
17320 		}
17321 		sd_pkt_reason_default(un, bp, xp, pktp);
17322 		break;
17323 	}
17324 
17325 exit:
17326 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sdintr: exit\n");
17327 
17328 	/* Decrement counter to indicate that the callback routine is done. */
17329 	un->un_in_callback--;
17330 	ASSERT(un->un_in_callback >= 0);
17331 
17332 	/*
17333 	 * At this point, the pkt has been dispatched, ie, it is either
17334 	 * being re-tried or has been returned to its caller and should
17335 	 * not be referenced.
17336 	 */
17337 
17338 	mutex_exit(SD_MUTEX(un));
17339 }
17340 
17341 
17342 /*
17343  *    Function: sd_print_incomplete_msg
17344  *
17345  * Description: Prints the error message for a CMD_INCOMPLETE error.
17346  *
17347  *   Arguments: un - ptr to associated softstate for the device.
17348  *		bp - ptr to the buf(9S) for the command.
17349  *		arg - message string ptr
17350  *		code - SD_DELAYED_RETRY_ISSUED, SD_IMMEDIATE_RETRY_ISSUED,
17351  *			or SD_NO_RETRY_ISSUED.
17352  *
17353  *     Context: May be called under interrupt context
17354  */
17355 
17356 static void
17357 sd_print_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
17358 {
17359 	struct scsi_pkt	*pktp;
17360 	char	*msgp;
17361 	char	*cmdp = arg;
17362 
17363 	ASSERT(un != NULL);
17364 	ASSERT(mutex_owned(SD_MUTEX(un)));
17365 	ASSERT(bp != NULL);
17366 	ASSERT(arg != NULL);
17367 	pktp = SD_GET_PKTP(bp);
17368 	ASSERT(pktp != NULL);
17369 
17370 	switch (code) {
17371 	case SD_DELAYED_RETRY_ISSUED:
17372 	case SD_IMMEDIATE_RETRY_ISSUED:
17373 		msgp = "retrying";
17374 		break;
17375 	case SD_NO_RETRY_ISSUED:
17376 	default:
17377 		msgp = "giving up";
17378 		break;
17379 	}
17380 
17381 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17382 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17383 		    "incomplete %s- %s\n", cmdp, msgp);
17384 	}
17385 }
17386 
17387 
17388 
17389 /*
17390  *    Function: sd_pkt_status_good
17391  *
17392  * Description: Processing for a STATUS_GOOD code in pkt_status.
17393  *
17394  *     Context: May be called under interrupt context
17395  */
17396 
17397 static void
17398 sd_pkt_status_good(struct sd_lun *un, struct buf *bp,
17399     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17400 {
17401 	char	*cmdp;
17402 
17403 	ASSERT(un != NULL);
17404 	ASSERT(mutex_owned(SD_MUTEX(un)));
17405 	ASSERT(bp != NULL);
17406 	ASSERT(xp != NULL);
17407 	ASSERT(pktp != NULL);
17408 	ASSERT(pktp->pkt_reason == CMD_CMPLT);
17409 	ASSERT(SD_GET_PKT_STATUS(pktp) == STATUS_GOOD);
17410 	ASSERT(pktp->pkt_resid != 0);
17411 
17412 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: entry\n");
17413 
17414 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
17415 	switch (SD_GET_PKT_OPCODE(pktp) & 0x1F) {
17416 	case SCMD_READ:
17417 		cmdp = "read";
17418 		break;
17419 	case SCMD_WRITE:
17420 		cmdp = "write";
17421 		break;
17422 	default:
17423 		SD_UPDATE_B_RESID(bp, pktp);
17424 		sd_return_command(un, bp);
17425 		SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17426 		return;
17427 	}
17428 
17429 	/*
17430 	 * See if we can retry the read/write, preferrably immediately.
17431 	 * If retries are exhaused, then sd_retry_command() will update
17432 	 * the b_resid count.
17433 	 */
17434 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_incomplete_msg,
17435 	    cmdp, EIO, (clock_t)0, NULL);
17436 
17437 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_good: exit\n");
17438 }
17439 
17440 
17441 
17442 
17443 
17444 /*
17445  *    Function: sd_handle_request_sense
17446  *
17447  * Description: Processing for non-auto Request Sense command.
17448  *
17449  *   Arguments: un - ptr to associated softstate
17450  *		sense_bp - ptr to buf(9S) for the RQS command
17451  *		sense_xp - ptr to the sd_xbuf for the RQS command
17452  *		sense_pktp - ptr to the scsi_pkt(9S) for the RQS command
17453  *
17454  *     Context: May be called under interrupt context
17455  */
17456 
17457 static void
17458 sd_handle_request_sense(struct sd_lun *un, struct buf *sense_bp,
17459     struct sd_xbuf *sense_xp, struct scsi_pkt *sense_pktp)
17460 {
17461 	struct buf	*cmd_bp;	/* buf for the original command */
17462 	struct sd_xbuf	*cmd_xp;	/* sd_xbuf for the original command */
17463 	struct scsi_pkt *cmd_pktp;	/* pkt for the original command */
17464 	size_t		actual_len;	/* actual sense data length */
17465 
17466 	ASSERT(un != NULL);
17467 	ASSERT(mutex_owned(SD_MUTEX(un)));
17468 	ASSERT(sense_bp != NULL);
17469 	ASSERT(sense_xp != NULL);
17470 	ASSERT(sense_pktp != NULL);
17471 
17472 	/*
17473 	 * Note the sense_bp, sense_xp, and sense_pktp here are for the
17474 	 * RQS command and not the original command.
17475 	 */
17476 	ASSERT(sense_pktp == un->un_rqs_pktp);
17477 	ASSERT(sense_bp   == un->un_rqs_bp);
17478 	ASSERT((sense_pktp->pkt_flags & (FLAG_SENSING | FLAG_HEAD)) ==
17479 	    (FLAG_SENSING | FLAG_HEAD));
17480 	ASSERT((((SD_GET_XBUF(sense_xp->xb_sense_bp))->xb_pktp->pkt_flags) &
17481 	    FLAG_SENSING) == FLAG_SENSING);
17482 
17483 	/* These are the bp, xp, and pktp for the original command */
17484 	cmd_bp = sense_xp->xb_sense_bp;
17485 	cmd_xp = SD_GET_XBUF(cmd_bp);
17486 	cmd_pktp = SD_GET_PKTP(cmd_bp);
17487 
17488 	if (sense_pktp->pkt_reason != CMD_CMPLT) {
17489 		/*
17490 		 * The REQUEST SENSE command failed.  Release the REQUEST
17491 		 * SENSE command for re-use, get back the bp for the original
17492 		 * command, and attempt to re-try the original command if
17493 		 * FLAG_DIAGNOSE is not set in the original packet.
17494 		 */
17495 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
17496 		if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17497 			cmd_bp = sd_mark_rqs_idle(un, sense_xp);
17498 			sd_retry_command(un, cmd_bp, SD_RETRIES_STANDARD,
17499 			    NULL, NULL, EIO, (clock_t)0, NULL);
17500 			return;
17501 		}
17502 	}
17503 
17504 	/*
17505 	 * Save the relevant sense info into the xp for the original cmd.
17506 	 *
17507 	 * Note: if the request sense failed the state info will be zero
17508 	 * as set in sd_mark_rqs_busy()
17509 	 */
17510 	cmd_xp->xb_sense_status = *(sense_pktp->pkt_scbp);
17511 	cmd_xp->xb_sense_state  = sense_pktp->pkt_state;
17512 	actual_len = MAX_SENSE_LENGTH - sense_pktp->pkt_resid;
17513 	if ((cmd_xp->xb_pkt_flags & SD_XB_USCSICMD) &&
17514 	    (((struct uscsi_cmd *)cmd_xp->xb_pktinfo)->uscsi_rqlen >
17515 	    SENSE_LENGTH)) {
17516 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
17517 		    MAX_SENSE_LENGTH);
17518 		cmd_xp->xb_sense_resid = sense_pktp->pkt_resid;
17519 	} else {
17520 		bcopy(sense_bp->b_un.b_addr, cmd_xp->xb_sense_data,
17521 		    SENSE_LENGTH);
17522 		if (actual_len < SENSE_LENGTH) {
17523 			cmd_xp->xb_sense_resid = SENSE_LENGTH - actual_len;
17524 		} else {
17525 			cmd_xp->xb_sense_resid = 0;
17526 		}
17527 	}
17528 
17529 	/*
17530 	 *  Free up the RQS command....
17531 	 *  NOTE:
17532 	 *	Must do this BEFORE calling sd_validate_sense_data!
17533 	 *	sd_validate_sense_data may return the original command in
17534 	 *	which case the pkt will be freed and the flags can no
17535 	 *	longer be touched.
17536 	 *	SD_MUTEX is held through this process until the command
17537 	 *	is dispatched based upon the sense data, so there are
17538 	 *	no race conditions.
17539 	 */
17540 	(void) sd_mark_rqs_idle(un, sense_xp);
17541 
17542 	/*
17543 	 * For a retryable command see if we have valid sense data, if so then
17544 	 * turn it over to sd_decode_sense() to figure out the right course of
17545 	 * action. Just fail a non-retryable command.
17546 	 */
17547 	if ((cmd_pktp->pkt_flags & FLAG_DIAGNOSE) == 0) {
17548 		if (sd_validate_sense_data(un, cmd_bp, cmd_xp, actual_len) ==
17549 		    SD_SENSE_DATA_IS_VALID) {
17550 			sd_decode_sense(un, cmd_bp, cmd_xp, cmd_pktp);
17551 		}
17552 	} else {
17553 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Failed CDB",
17554 		    (uchar_t *)cmd_pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
17555 		SD_DUMP_MEMORY(un, SD_LOG_IO_CORE, "Sense Data",
17556 		    (uchar_t *)cmd_xp->xb_sense_data, SENSE_LENGTH, SD_LOG_HEX);
17557 		sd_return_failed_command(un, cmd_bp, EIO);
17558 	}
17559 }
17560 
17561 
17562 
17563 
17564 /*
17565  *    Function: sd_handle_auto_request_sense
17566  *
17567  * Description: Processing for auto-request sense information.
17568  *
17569  *   Arguments: un - ptr to associated softstate
17570  *		bp - ptr to buf(9S) for the command
17571  *		xp - ptr to the sd_xbuf for the command
17572  *		pktp - ptr to the scsi_pkt(9S) for the command
17573  *
17574  *     Context: May be called under interrupt context
17575  */
17576 
17577 static void
17578 sd_handle_auto_request_sense(struct sd_lun *un, struct buf *bp,
17579     struct sd_xbuf *xp, struct scsi_pkt *pktp)
17580 {
17581 	struct scsi_arq_status *asp;
17582 	size_t actual_len;
17583 
17584 	ASSERT(un != NULL);
17585 	ASSERT(mutex_owned(SD_MUTEX(un)));
17586 	ASSERT(bp != NULL);
17587 	ASSERT(xp != NULL);
17588 	ASSERT(pktp != NULL);
17589 	ASSERT(pktp != un->un_rqs_pktp);
17590 	ASSERT(bp   != un->un_rqs_bp);
17591 
17592 	/*
17593 	 * For auto-request sense, we get a scsi_arq_status back from
17594 	 * the HBA, with the sense data in the sts_sensedata member.
17595 	 * The pkt_scbp of the packet points to this scsi_arq_status.
17596 	 */
17597 	asp = (struct scsi_arq_status *)(pktp->pkt_scbp);
17598 
17599 	if (asp->sts_rqpkt_reason != CMD_CMPLT) {
17600 		/*
17601 		 * The auto REQUEST SENSE failed; see if we can re-try
17602 		 * the original command.
17603 		 */
17604 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17605 		    "auto request sense failed (reason=%s)\n",
17606 		    scsi_rname(asp->sts_rqpkt_reason));
17607 
17608 		sd_reset_target(un, pktp);
17609 
17610 		sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17611 		    NULL, NULL, EIO, (clock_t)0, NULL);
17612 		return;
17613 	}
17614 
17615 	/* Save the relevant sense info into the xp for the original cmd. */
17616 	xp->xb_sense_status = *((uchar_t *)(&(asp->sts_rqpkt_status)));
17617 	xp->xb_sense_state  = asp->sts_rqpkt_state;
17618 	xp->xb_sense_resid  = asp->sts_rqpkt_resid;
17619 	if (xp->xb_sense_state & STATE_XARQ_DONE) {
17620 		actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
17621 		bcopy(&asp->sts_sensedata, xp->xb_sense_data,
17622 		    MAX_SENSE_LENGTH);
17623 	} else {
17624 		if (xp->xb_sense_resid > SENSE_LENGTH) {
17625 			actual_len = MAX_SENSE_LENGTH - xp->xb_sense_resid;
17626 		} else {
17627 			actual_len = SENSE_LENGTH - xp->xb_sense_resid;
17628 		}
17629 		if (xp->xb_pkt_flags & SD_XB_USCSICMD) {
17630 			if ((((struct uscsi_cmd *)
17631 			    (xp->xb_pktinfo))->uscsi_rqlen) > actual_len) {
17632 				xp->xb_sense_resid = (((struct uscsi_cmd *)
17633 				    (xp->xb_pktinfo))->uscsi_rqlen) -
17634 				    actual_len;
17635 			} else {
17636 				xp->xb_sense_resid = 0;
17637 			}
17638 		}
17639 		bcopy(&asp->sts_sensedata, xp->xb_sense_data, SENSE_LENGTH);
17640 	}
17641 
17642 	/*
17643 	 * See if we have valid sense data, if so then turn it over to
17644 	 * sd_decode_sense() to figure out the right course of action.
17645 	 */
17646 	if (sd_validate_sense_data(un, bp, xp, actual_len) ==
17647 	    SD_SENSE_DATA_IS_VALID) {
17648 		sd_decode_sense(un, bp, xp, pktp);
17649 	}
17650 }
17651 
17652 
17653 /*
17654  *    Function: sd_print_sense_failed_msg
17655  *
17656  * Description: Print log message when RQS has failed.
17657  *
17658  *   Arguments: un - ptr to associated softstate
17659  *		bp - ptr to buf(9S) for the command
17660  *		arg - generic message string ptr
17661  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
17662  *			or SD_NO_RETRY_ISSUED
17663  *
17664  *     Context: May be called from interrupt context
17665  */
17666 
17667 static void
17668 sd_print_sense_failed_msg(struct sd_lun *un, struct buf *bp, void *arg,
17669     int code)
17670 {
17671 	char	*msgp = arg;
17672 
17673 	ASSERT(un != NULL);
17674 	ASSERT(mutex_owned(SD_MUTEX(un)));
17675 	ASSERT(bp != NULL);
17676 
17677 	if ((code == SD_NO_RETRY_ISSUED) && (msgp != NULL)) {
17678 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, msgp);
17679 	}
17680 }
17681 
17682 
17683 /*
17684  *    Function: sd_validate_sense_data
17685  *
17686  * Description: Check the given sense data for validity.
17687  *		If the sense data is not valid, the command will
17688  *		be either failed or retried!
17689  *
17690  * Return Code: SD_SENSE_DATA_IS_INVALID
17691  *		SD_SENSE_DATA_IS_VALID
17692  *
17693  *     Context: May be called from interrupt context
17694  */
17695 
17696 static int
17697 sd_validate_sense_data(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17698     size_t actual_len)
17699 {
17700 	struct scsi_extended_sense *esp;
17701 	struct	scsi_pkt *pktp;
17702 	char	*msgp = NULL;
17703 	sd_ssc_t *sscp;
17704 
17705 	ASSERT(un != NULL);
17706 	ASSERT(mutex_owned(SD_MUTEX(un)));
17707 	ASSERT(bp != NULL);
17708 	ASSERT(bp != un->un_rqs_bp);
17709 	ASSERT(xp != NULL);
17710 	ASSERT(un->un_fm_private != NULL);
17711 
17712 	pktp = SD_GET_PKTP(bp);
17713 	ASSERT(pktp != NULL);
17714 
17715 	sscp = &((struct sd_fm_internal *)(un->un_fm_private))->fm_ssc;
17716 	ASSERT(sscp != NULL);
17717 
17718 	/*
17719 	 * Check the status of the RQS command (auto or manual).
17720 	 */
17721 	switch (xp->xb_sense_status & STATUS_MASK) {
17722 	case STATUS_GOOD:
17723 		break;
17724 
17725 	case STATUS_RESERVATION_CONFLICT:
17726 		sd_pkt_status_reservation_conflict(un, bp, xp, pktp);
17727 		return (SD_SENSE_DATA_IS_INVALID);
17728 
17729 	case STATUS_BUSY:
17730 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17731 		    "Busy Status on REQUEST SENSE\n");
17732 		sd_retry_command(un, bp, SD_RETRIES_BUSY, NULL,
17733 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
17734 		return (SD_SENSE_DATA_IS_INVALID);
17735 
17736 	case STATUS_QFULL:
17737 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
17738 		    "QFULL Status on REQUEST SENSE\n");
17739 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL,
17740 		    NULL, EIO, un->un_busy_timeout / 500, kstat_waitq_enter);
17741 		return (SD_SENSE_DATA_IS_INVALID);
17742 
17743 	case STATUS_CHECK:
17744 	case STATUS_TERMINATED:
17745 		msgp = "Check Condition on REQUEST SENSE\n";
17746 		goto sense_failed;
17747 
17748 	default:
17749 		msgp = "Not STATUS_GOOD on REQUEST_SENSE\n";
17750 		goto sense_failed;
17751 	}
17752 
17753 	/*
17754 	 * See if we got the minimum required amount of sense data.
17755 	 * Note: We are assuming the returned sense data is SENSE_LENGTH bytes
17756 	 * or less.
17757 	 */
17758 	if (((xp->xb_sense_state & STATE_XFERRED_DATA) == 0) ||
17759 	    (actual_len == 0)) {
17760 		msgp = "Request Sense couldn't get sense data\n";
17761 		goto sense_failed;
17762 	}
17763 
17764 	if (actual_len < SUN_MIN_SENSE_LENGTH) {
17765 		msgp = "Not enough sense information\n";
17766 		/* Mark the ssc_flags for detecting invalid sense data */
17767 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17768 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17769 			    "sense-data");
17770 		}
17771 		goto sense_failed;
17772 	}
17773 
17774 	/*
17775 	 * We require the extended sense data
17776 	 */
17777 	esp = (struct scsi_extended_sense *)xp->xb_sense_data;
17778 	if (esp->es_class != CLASS_EXTENDED_SENSE) {
17779 		if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
17780 			static char tmp[8];
17781 			static char buf[148];
17782 			char *p = (char *)(xp->xb_sense_data);
17783 			int i;
17784 
17785 			mutex_enter(&sd_sense_mutex);
17786 			(void) strcpy(buf, "undecodable sense information:");
17787 			for (i = 0; i < actual_len; i++) {
17788 				(void) sprintf(tmp, " 0x%x", *(p++) & 0xff);
17789 				(void) strcpy(&buf[strlen(buf)], tmp);
17790 			}
17791 			i = strlen(buf);
17792 			(void) strcpy(&buf[i], "-(assumed fatal)\n");
17793 
17794 			if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
17795 				scsi_log(SD_DEVINFO(un), sd_label,
17796 				    CE_WARN, buf);
17797 			}
17798 			mutex_exit(&sd_sense_mutex);
17799 		}
17800 
17801 		/* Mark the ssc_flags for detecting invalid sense data */
17802 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17803 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17804 			    "sense-data");
17805 		}
17806 
17807 		/* Note: Legacy behavior, fail the command with no retry */
17808 		sd_return_failed_command(un, bp, EIO);
17809 		return (SD_SENSE_DATA_IS_INVALID);
17810 	}
17811 
17812 	/*
17813 	 * Check that es_code is valid (es_class concatenated with es_code
17814 	 * make up the "response code" field.  es_class will always be 7, so
17815 	 * make sure es_code is 0, 1, 2, 3 or 0xf.  es_code will indicate the
17816 	 * format.
17817 	 */
17818 	if ((esp->es_code != CODE_FMT_FIXED_CURRENT) &&
17819 	    (esp->es_code != CODE_FMT_FIXED_DEFERRED) &&
17820 	    (esp->es_code != CODE_FMT_DESCR_CURRENT) &&
17821 	    (esp->es_code != CODE_FMT_DESCR_DEFERRED) &&
17822 	    (esp->es_code != CODE_FMT_VENDOR_SPECIFIC)) {
17823 		/* Mark the ssc_flags for detecting invalid sense data */
17824 		if (!(xp->xb_pkt_flags & SD_XB_USCSICMD)) {
17825 			sd_ssc_set_info(sscp, SSC_FLAGS_INVALID_SENSE, 0,
17826 			    "sense-data");
17827 		}
17828 		goto sense_failed;
17829 	}
17830 
17831 	return (SD_SENSE_DATA_IS_VALID);
17832 
17833 sense_failed:
17834 	/*
17835 	 * If the request sense failed (for whatever reason), attempt
17836 	 * to retry the original command.
17837 	 */
17838 #if defined(__x86)
17839 	/*
17840 	 * SD_RETRY_DELAY is conditionally compile (#if fibre) in
17841 	 * sddef.h for Sparc platform, and x86 uses 1 binary
17842 	 * for both SCSI/FC.
17843 	 * The SD_RETRY_DELAY value need to be adjusted here
17844 	 * when SD_RETRY_DELAY change in sddef.h
17845 	 */
17846 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17847 	    sd_print_sense_failed_msg, msgp, EIO,
17848 	    un->un_f_is_fibre ? drv_usectohz(100000) : (clock_t)0, NULL);
17849 #else
17850 	sd_retry_command(un, bp, SD_RETRIES_STANDARD,
17851 	    sd_print_sense_failed_msg, msgp, EIO, SD_RETRY_DELAY, NULL);
17852 #endif
17853 
17854 	return (SD_SENSE_DATA_IS_INVALID);
17855 }
17856 
17857 /*
17858  *    Function: sd_decode_sense
17859  *
17860  * Description: Take recovery action(s) when SCSI Sense Data is received.
17861  *
17862  *     Context: Interrupt context.
17863  */
17864 
17865 static void
17866 sd_decode_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
17867     struct scsi_pkt *pktp)
17868 {
17869 	uint8_t sense_key;
17870 
17871 	ASSERT(un != NULL);
17872 	ASSERT(mutex_owned(SD_MUTEX(un)));
17873 	ASSERT(bp != NULL);
17874 	ASSERT(bp != un->un_rqs_bp);
17875 	ASSERT(xp != NULL);
17876 	ASSERT(pktp != NULL);
17877 
17878 	sense_key = scsi_sense_key(xp->xb_sense_data);
17879 
17880 	switch (sense_key) {
17881 	case KEY_NO_SENSE:
17882 		sd_sense_key_no_sense(un, bp, xp, pktp);
17883 		break;
17884 	case KEY_RECOVERABLE_ERROR:
17885 		sd_sense_key_recoverable_error(un, xp->xb_sense_data,
17886 		    bp, xp, pktp);
17887 		break;
17888 	case KEY_NOT_READY:
17889 		sd_sense_key_not_ready(un, xp->xb_sense_data,
17890 		    bp, xp, pktp);
17891 		break;
17892 	case KEY_MEDIUM_ERROR:
17893 	case KEY_HARDWARE_ERROR:
17894 		sd_sense_key_medium_or_hardware_error(un,
17895 		    xp->xb_sense_data, bp, xp, pktp);
17896 		break;
17897 	case KEY_ILLEGAL_REQUEST:
17898 		sd_sense_key_illegal_request(un, bp, xp, pktp);
17899 		break;
17900 	case KEY_UNIT_ATTENTION:
17901 		sd_sense_key_unit_attention(un, xp->xb_sense_data,
17902 		    bp, xp, pktp);
17903 		break;
17904 	case KEY_WRITE_PROTECT:
17905 	case KEY_VOLUME_OVERFLOW:
17906 	case KEY_MISCOMPARE:
17907 		sd_sense_key_fail_command(un, bp, xp, pktp);
17908 		break;
17909 	case KEY_BLANK_CHECK:
17910 		sd_sense_key_blank_check(un, bp, xp, pktp);
17911 		break;
17912 	case KEY_ABORTED_COMMAND:
17913 		sd_sense_key_aborted_command(un, bp, xp, pktp);
17914 		break;
17915 	case KEY_VENDOR_UNIQUE:
17916 	case KEY_COPY_ABORTED:
17917 	case KEY_EQUAL:
17918 	case KEY_RESERVED:
17919 	default:
17920 		sd_sense_key_default(un, xp->xb_sense_data,
17921 		    bp, xp, pktp);
17922 		break;
17923 	}
17924 }
17925 
17926 
17927 /*
17928  *    Function: sd_dump_memory
17929  *
17930  * Description: Debug logging routine to print the contents of a user provided
17931  *		buffer. The output of the buffer is broken up into 256 byte
17932  *		segments due to a size constraint of the scsi_log.
17933  *		implementation.
17934  *
17935  *   Arguments: un - ptr to softstate
17936  *		comp - component mask
17937  *		title - "title" string to preceed data when printed
17938  *		data - ptr to data block to be printed
17939  *		len - size of data block to be printed
17940  *		fmt - SD_LOG_HEX (use 0x%02x format) or SD_LOG_CHAR (use %c)
17941  *
17942  *     Context: May be called from interrupt context
17943  */
17944 
17945 #define	SD_DUMP_MEMORY_BUF_SIZE	256
17946 
17947 static char *sd_dump_format_string[] = {
17948 		" 0x%02x",
17949 		" %c"
17950 };
17951 
17952 static void
17953 sd_dump_memory(struct sd_lun *un, uint_t comp, char *title, uchar_t *data,
17954     int len, int fmt)
17955 {
17956 	int	i, j;
17957 	int	avail_count;
17958 	int	start_offset;
17959 	int	end_offset;
17960 	size_t	entry_len;
17961 	char	*bufp;
17962 	char	*local_buf;
17963 	char	*format_string;
17964 
17965 	ASSERT((fmt == SD_LOG_HEX) || (fmt == SD_LOG_CHAR));
17966 
17967 	/*
17968 	 * In the debug version of the driver, this function is called from a
17969 	 * number of places which are NOPs in the release driver.
17970 	 * The debug driver therefore has additional methods of filtering
17971 	 * debug output.
17972 	 */
17973 #ifdef SDDEBUG
17974 	/*
17975 	 * In the debug version of the driver we can reduce the amount of debug
17976 	 * messages by setting sd_error_level to something other than
17977 	 * SCSI_ERR_ALL and clearing bits in sd_level_mask and
17978 	 * sd_component_mask.
17979 	 */
17980 	if (((sd_level_mask & (SD_LOGMASK_DUMP_MEM | SD_LOGMASK_DIAG)) == 0) ||
17981 	    (sd_error_level != SCSI_ERR_ALL)) {
17982 		return;
17983 	}
17984 	if (((sd_component_mask & comp) == 0) ||
17985 	    (sd_error_level != SCSI_ERR_ALL)) {
17986 		return;
17987 	}
17988 #else
17989 	if (sd_error_level != SCSI_ERR_ALL) {
17990 		return;
17991 	}
17992 #endif
17993 
17994 	local_buf = kmem_zalloc(SD_DUMP_MEMORY_BUF_SIZE, KM_SLEEP);
17995 	bufp = local_buf;
17996 	/*
17997 	 * Available length is the length of local_buf[], minus the
17998 	 * length of the title string, minus one for the ":", minus
17999 	 * one for the newline, minus one for the NULL terminator.
18000 	 * This gives the #bytes available for holding the printed
18001 	 * values from the given data buffer.
18002 	 */
18003 	if (fmt == SD_LOG_HEX) {
18004 		format_string = sd_dump_format_string[0];
18005 	} else /* SD_LOG_CHAR */ {
18006 		format_string = sd_dump_format_string[1];
18007 	}
18008 	/*
18009 	 * Available count is the number of elements from the given
18010 	 * data buffer that we can fit into the available length.
18011 	 * This is based upon the size of the format string used.
18012 	 * Make one entry and find it's size.
18013 	 */
18014 	(void) sprintf(bufp, format_string, data[0]);
18015 	entry_len = strlen(bufp);
18016 	avail_count = (SD_DUMP_MEMORY_BUF_SIZE - strlen(title) - 3) / entry_len;
18017 
18018 	j = 0;
18019 	while (j < len) {
18020 		bufp = local_buf;
18021 		bzero(bufp, SD_DUMP_MEMORY_BUF_SIZE);
18022 		start_offset = j;
18023 
18024 		end_offset = start_offset + avail_count;
18025 
18026 		(void) sprintf(bufp, "%s:", title);
18027 		bufp += strlen(bufp);
18028 		for (i = start_offset; ((i < end_offset) && (j < len));
18029 		    i++, j++) {
18030 			(void) sprintf(bufp, format_string, data[i]);
18031 			bufp += entry_len;
18032 		}
18033 		(void) sprintf(bufp, "\n");
18034 
18035 		scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE, "%s", local_buf);
18036 	}
18037 	kmem_free(local_buf, SD_DUMP_MEMORY_BUF_SIZE);
18038 }
18039 
18040 /*
18041  *    Function: sd_print_sense_msg
18042  *
18043  * Description: Log a message based upon the given sense data.
18044  *
18045  *   Arguments: un - ptr to associated softstate
18046  *		bp - ptr to buf(9S) for the command
18047  *		arg - ptr to associate sd_sense_info struct
18048  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18049  *			or SD_NO_RETRY_ISSUED
18050  *
18051  *     Context: May be called from interrupt context
18052  */
18053 
18054 static void
18055 sd_print_sense_msg(struct sd_lun *un, struct buf *bp, void *arg, int code)
18056 {
18057 	struct sd_xbuf	*xp;
18058 	struct scsi_pkt	*pktp;
18059 	uint8_t *sensep;
18060 	daddr_t request_blkno;
18061 	diskaddr_t err_blkno;
18062 	int severity;
18063 	int pfa_flag;
18064 	extern struct scsi_key_strings scsi_cmds[];
18065 
18066 	ASSERT(un != NULL);
18067 	ASSERT(mutex_owned(SD_MUTEX(un)));
18068 	ASSERT(bp != NULL);
18069 	xp = SD_GET_XBUF(bp);
18070 	ASSERT(xp != NULL);
18071 	pktp = SD_GET_PKTP(bp);
18072 	ASSERT(pktp != NULL);
18073 	ASSERT(arg != NULL);
18074 
18075 	severity = ((struct sd_sense_info *)(arg))->ssi_severity;
18076 	pfa_flag = ((struct sd_sense_info *)(arg))->ssi_pfa_flag;
18077 
18078 	if ((code == SD_DELAYED_RETRY_ISSUED) ||
18079 	    (code == SD_IMMEDIATE_RETRY_ISSUED)) {
18080 		severity = SCSI_ERR_RETRYABLE;
18081 	}
18082 
18083 	/* Use absolute block number for the request block number */
18084 	request_blkno = xp->xb_blkno;
18085 
18086 	/*
18087 	 * Now try to get the error block number from the sense data
18088 	 */
18089 	sensep = xp->xb_sense_data;
18090 
18091 	if (scsi_sense_info_uint64(sensep, SENSE_LENGTH,
18092 	    (uint64_t *)&err_blkno)) {
18093 		/*
18094 		 * We retrieved the error block number from the information
18095 		 * portion of the sense data.
18096 		 *
18097 		 * For USCSI commands we are better off using the error
18098 		 * block no. as the requested block no. (This is the best
18099 		 * we can estimate.)
18100 		 */
18101 		if ((SD_IS_BUFIO(xp) == FALSE) &&
18102 		    ((pktp->pkt_flags & FLAG_SILENT) == 0)) {
18103 			request_blkno = err_blkno;
18104 		}
18105 	} else {
18106 		/*
18107 		 * Without the es_valid bit set (for fixed format) or an
18108 		 * information descriptor (for descriptor format) we cannot
18109 		 * be certain of the error blkno, so just use the
18110 		 * request_blkno.
18111 		 */
18112 		err_blkno = (diskaddr_t)request_blkno;
18113 	}
18114 
18115 	/*
18116 	 * The following will log the buffer contents for the release driver
18117 	 * if the SD_LOGMASK_DIAG bit of sd_level_mask is set, or the error
18118 	 * level is set to verbose.
18119 	 */
18120 	sd_dump_memory(un, SD_LOG_IO, "Failed CDB",
18121 	    (uchar_t *)pktp->pkt_cdbp, CDB_SIZE, SD_LOG_HEX);
18122 	sd_dump_memory(un, SD_LOG_IO, "Sense Data",
18123 	    (uchar_t *)sensep, SENSE_LENGTH, SD_LOG_HEX);
18124 
18125 	if (pfa_flag == FALSE) {
18126 		/* This is normally only set for USCSI */
18127 		if ((pktp->pkt_flags & FLAG_SILENT) != 0) {
18128 			return;
18129 		}
18130 
18131 		if ((SD_IS_BUFIO(xp) == TRUE) &&
18132 		    (((sd_level_mask & SD_LOGMASK_DIAG) == 0) &&
18133 		    (severity < sd_error_level))) {
18134 			return;
18135 		}
18136 	}
18137 	/*
18138 	 * Check for Sonoma Failover and keep a count of how many failed I/O's
18139 	 */
18140 	if ((SD_IS_LSI(un)) &&
18141 	    (scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) &&
18142 	    (scsi_sense_asc(sensep) == 0x94) &&
18143 	    (scsi_sense_ascq(sensep) == 0x01)) {
18144 		un->un_sonoma_failure_count++;
18145 		if (un->un_sonoma_failure_count > 1) {
18146 			return;
18147 		}
18148 	}
18149 
18150 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP ||
18151 	    ((scsi_sense_key(sensep) == KEY_RECOVERABLE_ERROR) &&
18152 	    (pktp->pkt_resid == 0))) {
18153 		scsi_vu_errmsg(SD_SCSI_DEVP(un), pktp, sd_label, severity,
18154 		    request_blkno, err_blkno, scsi_cmds,
18155 		    (struct scsi_extended_sense *)sensep,
18156 		    un->un_additional_codes, NULL);
18157 	}
18158 }
18159 
18160 /*
18161  *    Function: sd_sense_key_no_sense
18162  *
18163  * Description: Recovery action when sense data was not received.
18164  *
18165  *     Context: May be called from interrupt context
18166  */
18167 
18168 static void
18169 sd_sense_key_no_sense(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18170     struct scsi_pkt *pktp)
18171 {
18172 	struct sd_sense_info	si;
18173 
18174 	ASSERT(un != NULL);
18175 	ASSERT(mutex_owned(SD_MUTEX(un)));
18176 	ASSERT(bp != NULL);
18177 	ASSERT(xp != NULL);
18178 	ASSERT(pktp != NULL);
18179 
18180 	si.ssi_severity = SCSI_ERR_FATAL;
18181 	si.ssi_pfa_flag = FALSE;
18182 
18183 	SD_UPDATE_ERRSTATS(un, sd_softerrs);
18184 
18185 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18186 	    &si, EIO, (clock_t)0, NULL);
18187 }
18188 
18189 
18190 /*
18191  *    Function: sd_sense_key_recoverable_error
18192  *
18193  * Description: Recovery actions for a SCSI "Recovered Error" sense key.
18194  *
18195  *     Context: May be called from interrupt context
18196  */
18197 
18198 static void
18199 sd_sense_key_recoverable_error(struct sd_lun *un, uint8_t *sense_datap,
18200     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18201 {
18202 	struct sd_sense_info	si;
18203 	uint8_t asc = scsi_sense_asc(sense_datap);
18204 	uint8_t ascq = scsi_sense_ascq(sense_datap);
18205 
18206 	ASSERT(un != NULL);
18207 	ASSERT(mutex_owned(SD_MUTEX(un)));
18208 	ASSERT(bp != NULL);
18209 	ASSERT(xp != NULL);
18210 	ASSERT(pktp != NULL);
18211 
18212 	/*
18213 	 * 0x00, 0x1D: ATA PASSTHROUGH INFORMATION AVAILABLE
18214 	 */
18215 	if (asc == 0x00 && ascq == 0x1D) {
18216 		sd_return_command(un, bp);
18217 		return;
18218 	}
18219 
18220 	/*
18221 	 * 0x5D: FAILURE PREDICTION THRESHOLD EXCEEDED
18222 	 */
18223 	if ((asc == 0x5D) && (sd_report_pfa != 0)) {
18224 		SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
18225 		si.ssi_severity = SCSI_ERR_INFO;
18226 		si.ssi_pfa_flag = TRUE;
18227 	} else {
18228 		SD_UPDATE_ERRSTATS(un, sd_softerrs);
18229 		SD_UPDATE_ERRSTATS(un, sd_rq_recov_err);
18230 		si.ssi_severity = SCSI_ERR_RECOVERED;
18231 		si.ssi_pfa_flag = FALSE;
18232 	}
18233 
18234 	if (pktp->pkt_resid == 0) {
18235 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18236 		sd_return_command(un, bp);
18237 		return;
18238 	}
18239 
18240 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18241 	    &si, EIO, (clock_t)0, NULL);
18242 }
18243 
18244 
18245 
18246 
18247 /*
18248  *    Function: sd_sense_key_not_ready
18249  *
18250  * Description: Recovery actions for a SCSI "Not Ready" sense key.
18251  *
18252  *     Context: May be called from interrupt context
18253  */
18254 
18255 static void
18256 sd_sense_key_not_ready(struct sd_lun *un, uint8_t *sense_datap, struct buf *bp,
18257     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18258 {
18259 	struct sd_sense_info	si;
18260 	uint8_t asc = scsi_sense_asc(sense_datap);
18261 	uint8_t ascq = scsi_sense_ascq(sense_datap);
18262 
18263 	ASSERT(un != NULL);
18264 	ASSERT(mutex_owned(SD_MUTEX(un)));
18265 	ASSERT(bp != NULL);
18266 	ASSERT(xp != NULL);
18267 	ASSERT(pktp != NULL);
18268 
18269 	si.ssi_severity = SCSI_ERR_FATAL;
18270 	si.ssi_pfa_flag = FALSE;
18271 
18272 	/*
18273 	 * Update error stats after first NOT READY error. Disks may have
18274 	 * been powered down and may need to be restarted.  For CDROMs,
18275 	 * report NOT READY errors only if media is present.
18276 	 */
18277 	if ((ISCD(un) && (asc == 0x3A)) ||
18278 	    (xp->xb_nr_retry_count > 0)) {
18279 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18280 		SD_UPDATE_ERRSTATS(un, sd_rq_ntrdy_err);
18281 	}
18282 
18283 	/*
18284 	 * Just fail if the "not ready" retry limit has been reached.
18285 	 */
18286 	if (xp->xb_nr_retry_count >= un->un_notready_retry_count) {
18287 		/* Special check for error message printing for removables. */
18288 		if (un->un_f_has_removable_media && (asc == 0x04) &&
18289 		    (ascq >= 0x04)) {
18290 			si.ssi_severity = SCSI_ERR_ALL;
18291 		}
18292 		goto fail_command;
18293 	}
18294 
18295 	/*
18296 	 * Check the ASC and ASCQ in the sense data as needed, to determine
18297 	 * what to do.
18298 	 */
18299 	switch (asc) {
18300 	case 0x04:	/* LOGICAL UNIT NOT READY */
18301 		/*
18302 		 * disk drives that don't spin up result in a very long delay
18303 		 * in format without warning messages. We will log a message
18304 		 * if the error level is set to verbose.
18305 		 */
18306 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18307 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18308 			    "logical unit not ready, resetting disk\n");
18309 		}
18310 
18311 		/*
18312 		 * There are different requirements for CDROMs and disks for
18313 		 * the number of retries.  If a CD-ROM is giving this, it is
18314 		 * probably reading TOC and is in the process of getting
18315 		 * ready, so we should keep on trying for a long time to make
18316 		 * sure that all types of media are taken in account (for
18317 		 * some media the drive takes a long time to read TOC).  For
18318 		 * disks we do not want to retry this too many times as this
18319 		 * can cause a long hang in format when the drive refuses to
18320 		 * spin up (a very common failure).
18321 		 */
18322 		switch (ascq) {
18323 		case 0x00:  /* LUN NOT READY, CAUSE NOT REPORTABLE */
18324 			/*
18325 			 * Disk drives frequently refuse to spin up which
18326 			 * results in a very long hang in format without
18327 			 * warning messages.
18328 			 *
18329 			 * Note: This code preserves the legacy behavior of
18330 			 * comparing xb_nr_retry_count against zero for fibre
18331 			 * channel targets instead of comparing against the
18332 			 * un_reset_retry_count value.  The reason for this
18333 			 * discrepancy has been so utterly lost beneath the
18334 			 * Sands of Time that even Indiana Jones could not
18335 			 * find it.
18336 			 */
18337 			if (un->un_f_is_fibre == TRUE) {
18338 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
18339 				    (xp->xb_nr_retry_count > 0)) &&
18340 				    (un->un_startstop_timeid == NULL)) {
18341 					scsi_log(SD_DEVINFO(un), sd_label,
18342 					    CE_WARN, "logical unit not ready, "
18343 					    "resetting disk\n");
18344 					sd_reset_target(un, pktp);
18345 				}
18346 			} else {
18347 				if (((sd_level_mask & SD_LOGMASK_DIAG) ||
18348 				    (xp->xb_nr_retry_count >
18349 				    un->un_reset_retry_count)) &&
18350 				    (un->un_startstop_timeid == NULL)) {
18351 					scsi_log(SD_DEVINFO(un), sd_label,
18352 					    CE_WARN, "logical unit not ready, "
18353 					    "resetting disk\n");
18354 					sd_reset_target(un, pktp);
18355 				}
18356 			}
18357 			break;
18358 
18359 		case 0x01:  /* LUN IS IN PROCESS OF BECOMING READY */
18360 			/*
18361 			 * If the target is in the process of becoming
18362 			 * ready, just proceed with the retry. This can
18363 			 * happen with CD-ROMs that take a long time to
18364 			 * read TOC after a power cycle or reset.
18365 			 */
18366 			goto do_retry;
18367 
18368 		case 0x02:  /* LUN NOT READY, INITITIALIZING CMD REQUIRED */
18369 			break;
18370 
18371 		case 0x03:  /* LUN NOT READY, MANUAL INTERVENTION REQUIRED */
18372 			/*
18373 			 * Retries cannot help here so just fail right away.
18374 			 */
18375 			goto fail_command;
18376 
18377 		case 0x04:  /* LUN NOT READY, FORMAT IN PROGRESS */
18378 		case 0x05:  /* LUN NOT READY, REBUILD IN PROGRESS */
18379 		case 0x06:  /* LUN NOT READY, RECALCULATION IN PROGRESS */
18380 		case 0x07:  /* LUN NOT READY, OPERATION IN PROGRESS */
18381 		case 0x08:  /* LUN NOT READY, LONG WRITE IN PROGRESS */
18382 		default:    /* Possible future codes in SCSI spec? */
18383 			/*
18384 			 * For removable-media devices, do not retry if
18385 			 * ASCQ > 2 as these result mostly from USCSI commands
18386 			 * on MMC devices issued to check status of an
18387 			 * operation initiated in immediate mode.  Also for
18388 			 * ASCQ >= 4 do not print console messages as these
18389 			 * mainly represent a user-initiated operation
18390 			 * instead of a system failure.
18391 			 */
18392 			if (un->un_f_has_removable_media) {
18393 				si.ssi_severity = SCSI_ERR_ALL;
18394 				goto fail_command;
18395 			}
18396 			break;
18397 		}
18398 
18399 		/*
18400 		 * As part of our recovery attempt for the NOT READY
18401 		 * condition, we issue a START STOP UNIT command. However
18402 		 * we want to wait for a short delay before attempting this
18403 		 * as there may still be more commands coming back from the
18404 		 * target with the check condition. To do this we use
18405 		 * timeout(9F) to call sd_start_stop_unit_callback() after
18406 		 * the delay interval expires. (sd_start_stop_unit_callback()
18407 		 * dispatches sd_start_stop_unit_task(), which will issue
18408 		 * the actual START STOP UNIT command. The delay interval
18409 		 * is one-half of the delay that we will use to retry the
18410 		 * command that generated the NOT READY condition.
18411 		 *
18412 		 * Note that we could just dispatch sd_start_stop_unit_task()
18413 		 * from here and allow it to sleep for the delay interval,
18414 		 * but then we would be tying up the taskq thread
18415 		 * uncesessarily for the duration of the delay.
18416 		 *
18417 		 * Do not issue the START STOP UNIT if the current command
18418 		 * is already a START STOP UNIT.
18419 		 */
18420 		if (pktp->pkt_cdbp[0] == SCMD_START_STOP) {
18421 			break;
18422 		}
18423 
18424 		/*
18425 		 * Do not schedule the timeout if one is already pending.
18426 		 */
18427 		if (un->un_startstop_timeid != NULL) {
18428 			SD_INFO(SD_LOG_ERROR, un,
18429 			    "sd_sense_key_not_ready: restart already issued to"
18430 			    " %s%d\n", ddi_driver_name(SD_DEVINFO(un)),
18431 			    ddi_get_instance(SD_DEVINFO(un)));
18432 			break;
18433 		}
18434 
18435 		/*
18436 		 * Schedule the START STOP UNIT command, then queue the command
18437 		 * for a retry.
18438 		 *
18439 		 * Note: A timeout is not scheduled for this retry because we
18440 		 * want the retry to be serial with the START_STOP_UNIT. The
18441 		 * retry will be started when the START_STOP_UNIT is completed
18442 		 * in sd_start_stop_unit_task.
18443 		 */
18444 		un->un_startstop_timeid = timeout(sd_start_stop_unit_callback,
18445 		    un, un->un_busy_timeout / 2);
18446 		xp->xb_nr_retry_count++;
18447 		sd_set_retry_bp(un, bp, 0, kstat_waitq_enter);
18448 		return;
18449 
18450 	case 0x05:	/* LOGICAL UNIT DOES NOT RESPOND TO SELECTION */
18451 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18452 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18453 			    "unit does not respond to selection\n");
18454 		}
18455 		break;
18456 
18457 	case 0x3A:	/* MEDIUM NOT PRESENT */
18458 		if (sd_error_level >= SCSI_ERR_FATAL) {
18459 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18460 			    "Caddy not inserted in drive\n");
18461 		}
18462 
18463 		sr_ejected(un);
18464 		un->un_mediastate = DKIO_EJECTED;
18465 		/* The state has changed, inform the media watch routines */
18466 		cv_broadcast(&un->un_state_cv);
18467 		/* Just fail if no media is present in the drive. */
18468 		goto fail_command;
18469 
18470 	default:
18471 		if (sd_error_level < SCSI_ERR_RETRYABLE) {
18472 			scsi_log(SD_DEVINFO(un), sd_label, CE_NOTE,
18473 			    "Unit not Ready. Additional sense code 0x%x\n",
18474 			    asc);
18475 		}
18476 		break;
18477 	}
18478 
18479 do_retry:
18480 
18481 	/*
18482 	 * Retry the command, as some targets may report NOT READY for
18483 	 * several seconds after being reset.
18484 	 */
18485 	xp->xb_nr_retry_count++;
18486 	si.ssi_severity = SCSI_ERR_RETRYABLE;
18487 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, sd_print_sense_msg,
18488 	    &si, EIO, un->un_busy_timeout, NULL);
18489 
18490 	return;
18491 
18492 fail_command:
18493 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18494 	sd_return_failed_command(un, bp, EIO);
18495 }
18496 
18497 
18498 
18499 /*
18500  *    Function: sd_sense_key_medium_or_hardware_error
18501  *
18502  * Description: Recovery actions for a SCSI "Medium Error" or "Hardware Error"
18503  *		sense key.
18504  *
18505  *     Context: May be called from interrupt context
18506  */
18507 
18508 static void
18509 sd_sense_key_medium_or_hardware_error(struct sd_lun *un, uint8_t *sense_datap,
18510     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18511 {
18512 	struct sd_sense_info	si;
18513 	uint8_t sense_key = scsi_sense_key(sense_datap);
18514 	uint8_t asc = scsi_sense_asc(sense_datap);
18515 
18516 	ASSERT(un != NULL);
18517 	ASSERT(mutex_owned(SD_MUTEX(un)));
18518 	ASSERT(bp != NULL);
18519 	ASSERT(xp != NULL);
18520 	ASSERT(pktp != NULL);
18521 
18522 	si.ssi_severity = SCSI_ERR_FATAL;
18523 	si.ssi_pfa_flag = FALSE;
18524 
18525 	if (sense_key == KEY_MEDIUM_ERROR) {
18526 		SD_UPDATE_ERRSTATS(un, sd_rq_media_err);
18527 	}
18528 
18529 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18530 
18531 	if ((un->un_reset_retry_count != 0) &&
18532 	    (xp->xb_retry_count == un->un_reset_retry_count)) {
18533 		mutex_exit(SD_MUTEX(un));
18534 		/* Do NOT do a RESET_ALL here: too intrusive. (4112858) */
18535 		if (un->un_f_allow_bus_device_reset == TRUE) {
18536 
18537 			boolean_t try_resetting_target = B_TRUE;
18538 
18539 			/*
18540 			 * We need to be able to handle specific ASC when we are
18541 			 * handling a KEY_HARDWARE_ERROR. In particular
18542 			 * taking the default action of resetting the target may
18543 			 * not be the appropriate way to attempt recovery.
18544 			 * Resetting a target because of a single LUN failure
18545 			 * victimizes all LUNs on that target.
18546 			 *
18547 			 * This is true for the LSI arrays, if an LSI
18548 			 * array controller returns an ASC of 0x84 (LUN Dead) we
18549 			 * should trust it.
18550 			 */
18551 
18552 			if (sense_key == KEY_HARDWARE_ERROR) {
18553 				switch (asc) {
18554 				case 0x84:
18555 					if (SD_IS_LSI(un)) {
18556 						try_resetting_target = B_FALSE;
18557 					}
18558 					break;
18559 				default:
18560 					break;
18561 				}
18562 			}
18563 
18564 			if (try_resetting_target == B_TRUE) {
18565 				int reset_retval = 0;
18566 				if (un->un_f_lun_reset_enabled == TRUE) {
18567 					SD_TRACE(SD_LOG_IO_CORE, un,
18568 					    "sd_sense_key_medium_or_hardware_"
18569 					    "error: issuing RESET_LUN\n");
18570 					reset_retval =
18571 					    scsi_reset(SD_ADDRESS(un),
18572 					    RESET_LUN);
18573 				}
18574 				if (reset_retval == 0) {
18575 					SD_TRACE(SD_LOG_IO_CORE, un,
18576 					    "sd_sense_key_medium_or_hardware_"
18577 					    "error: issuing RESET_TARGET\n");
18578 					(void) scsi_reset(SD_ADDRESS(un),
18579 					    RESET_TARGET);
18580 				}
18581 			}
18582 		}
18583 		mutex_enter(SD_MUTEX(un));
18584 	}
18585 
18586 	/*
18587 	 * This really ought to be a fatal error, but we will retry anyway
18588 	 * as some drives report this as a spurious error.
18589 	 */
18590 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18591 	    &si, EIO, (clock_t)0, NULL);
18592 }
18593 
18594 
18595 
18596 /*
18597  *    Function: sd_sense_key_illegal_request
18598  *
18599  * Description: Recovery actions for a SCSI "Illegal Request" sense key.
18600  *
18601  *     Context: May be called from interrupt context
18602  */
18603 
18604 static void
18605 sd_sense_key_illegal_request(struct sd_lun *un, struct buf *bp,
18606     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18607 {
18608 	struct sd_sense_info	si;
18609 
18610 	ASSERT(un != NULL);
18611 	ASSERT(mutex_owned(SD_MUTEX(un)));
18612 	ASSERT(bp != NULL);
18613 	ASSERT(xp != NULL);
18614 	ASSERT(pktp != NULL);
18615 
18616 	SD_UPDATE_ERRSTATS(un, sd_rq_illrq_err);
18617 
18618 	si.ssi_severity = SCSI_ERR_INFO;
18619 	si.ssi_pfa_flag = FALSE;
18620 
18621 	/* Pointless to retry if the target thinks it's an illegal request */
18622 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18623 	sd_return_failed_command(un, bp, EIO);
18624 }
18625 
18626 
18627 
18628 
18629 /*
18630  *    Function: sd_sense_key_unit_attention
18631  *
18632  * Description: Recovery actions for a SCSI "Unit Attention" sense key.
18633  *
18634  *     Context: May be called from interrupt context
18635  */
18636 
18637 static void
18638 sd_sense_key_unit_attention(struct sd_lun *un, uint8_t *sense_datap,
18639     struct buf *bp, struct sd_xbuf *xp, struct scsi_pkt *pktp)
18640 {
18641 	/*
18642 	 * For UNIT ATTENTION we allow retries for one minute. Devices
18643 	 * like Sonoma can return UNIT ATTENTION close to a minute
18644 	 * under certain conditions.
18645 	 */
18646 	int	retry_check_flag = SD_RETRIES_UA;
18647 	boolean_t	kstat_updated = B_FALSE;
18648 	struct	sd_sense_info		si;
18649 	uint8_t asc = scsi_sense_asc(sense_datap);
18650 	uint8_t	ascq = scsi_sense_ascq(sense_datap);
18651 
18652 	ASSERT(un != NULL);
18653 	ASSERT(mutex_owned(SD_MUTEX(un)));
18654 	ASSERT(bp != NULL);
18655 	ASSERT(xp != NULL);
18656 	ASSERT(pktp != NULL);
18657 
18658 	si.ssi_severity = SCSI_ERR_INFO;
18659 	si.ssi_pfa_flag = FALSE;
18660 
18661 
18662 	switch (asc) {
18663 	case 0x5D:  /* FAILURE PREDICTION THRESHOLD EXCEEDED */
18664 		if (sd_report_pfa != 0) {
18665 			SD_UPDATE_ERRSTATS(un, sd_rq_pfa_err);
18666 			si.ssi_pfa_flag = TRUE;
18667 			retry_check_flag = SD_RETRIES_STANDARD;
18668 			goto do_retry;
18669 		}
18670 
18671 		break;
18672 
18673 	case 0x29:  /* POWER ON, RESET, OR BUS DEVICE RESET OCCURRED */
18674 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
18675 			un->un_resvd_status |=
18676 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
18677 		}
18678 #ifdef _LP64
18679 		if (un->un_blockcount + 1 > SD_GROUP1_MAX_ADDRESS) {
18680 			if (taskq_dispatch(sd_tq, sd_reenable_dsense_task,
18681 			    un, KM_NOSLEEP) == TASKQID_INVALID) {
18682 				/*
18683 				 * If we can't dispatch the task we'll just
18684 				 * live without descriptor sense.  We can
18685 				 * try again on the next "unit attention"
18686 				 */
18687 				SD_ERROR(SD_LOG_ERROR, un,
18688 				    "sd_sense_key_unit_attention: "
18689 				    "Could not dispatch "
18690 				    "sd_reenable_dsense_task\n");
18691 			}
18692 		}
18693 #endif /* _LP64 */
18694 		/* FALLTHRU */
18695 
18696 	case 0x28: /* NOT READY TO READY CHANGE, MEDIUM MAY HAVE CHANGED */
18697 		if (!un->un_f_has_removable_media) {
18698 			break;
18699 		}
18700 
18701 		/*
18702 		 * When we get a unit attention from a removable-media device,
18703 		 * it may be in a state that will take a long time to recover
18704 		 * (e.g., from a reset).  Since we are executing in interrupt
18705 		 * context here, we cannot wait around for the device to come
18706 		 * back. So hand this command off to sd_media_change_task()
18707 		 * for deferred processing under taskq thread context. (Note
18708 		 * that the command still may be failed if a problem is
18709 		 * encountered at a later time.)
18710 		 */
18711 		if (taskq_dispatch(sd_tq, sd_media_change_task, pktp,
18712 		    KM_NOSLEEP) == TASKQID_INVALID) {
18713 			/*
18714 			 * Cannot dispatch the request so fail the command.
18715 			 */
18716 			SD_UPDATE_ERRSTATS(un, sd_harderrs);
18717 			SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18718 			si.ssi_severity = SCSI_ERR_FATAL;
18719 			sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18720 			sd_return_failed_command(un, bp, EIO);
18721 		}
18722 
18723 		/*
18724 		 * If failed to dispatch sd_media_change_task(), we already
18725 		 * updated kstat. If succeed to dispatch sd_media_change_task(),
18726 		 * we should update kstat later if it encounters an error. So,
18727 		 * we update kstat_updated flag here.
18728 		 */
18729 		kstat_updated = B_TRUE;
18730 
18731 		/*
18732 		 * Either the command has been successfully dispatched to a
18733 		 * task Q for retrying, or the dispatch failed. In either case
18734 		 * do NOT retry again by calling sd_retry_command. This sets up
18735 		 * two retries of the same command and when one completes and
18736 		 * frees the resources the other will access freed memory,
18737 		 * a bad thing.
18738 		 */
18739 		return;
18740 
18741 	default:
18742 		break;
18743 	}
18744 
18745 	/*
18746 	 * ASC  ASCQ
18747 	 *  2A   09	Capacity data has changed
18748 	 *  2A   01	Mode parameters changed
18749 	 *  3F   0E	Reported luns data has changed
18750 	 * Arrays that support logical unit expansion should report
18751 	 * capacity changes(2Ah/09). Mode parameters changed and
18752 	 * reported luns data has changed are the approximation.
18753 	 */
18754 	if (((asc == 0x2a) && (ascq == 0x09)) ||
18755 	    ((asc == 0x2a) && (ascq == 0x01)) ||
18756 	    ((asc == 0x3f) && (ascq == 0x0e))) {
18757 		if (taskq_dispatch(sd_tq, sd_target_change_task, un,
18758 		    KM_NOSLEEP) == TASKQID_INVALID) {
18759 			SD_ERROR(SD_LOG_ERROR, un,
18760 			    "sd_sense_key_unit_attention: "
18761 			    "Could not dispatch sd_target_change_task\n");
18762 		}
18763 	}
18764 
18765 	/*
18766 	 * Update kstat if we haven't done that.
18767 	 */
18768 	if (!kstat_updated) {
18769 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
18770 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
18771 	}
18772 
18773 do_retry:
18774 	sd_retry_command(un, bp, retry_check_flag, sd_print_sense_msg, &si,
18775 	    EIO, SD_UA_RETRY_DELAY, NULL);
18776 }
18777 
18778 
18779 
18780 /*
18781  *    Function: sd_sense_key_fail_command
18782  *
18783  * Description: Use to fail a command when we don't like the sense key that
18784  *		was returned.
18785  *
18786  *     Context: May be called from interrupt context
18787  */
18788 
18789 static void
18790 sd_sense_key_fail_command(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18791     struct scsi_pkt *pktp)
18792 {
18793 	struct sd_sense_info	si;
18794 
18795 	ASSERT(un != NULL);
18796 	ASSERT(mutex_owned(SD_MUTEX(un)));
18797 	ASSERT(bp != NULL);
18798 	ASSERT(xp != NULL);
18799 	ASSERT(pktp != NULL);
18800 
18801 	si.ssi_severity = SCSI_ERR_FATAL;
18802 	si.ssi_pfa_flag = FALSE;
18803 
18804 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18805 	sd_return_failed_command(un, bp, EIO);
18806 }
18807 
18808 
18809 
18810 /*
18811  *    Function: sd_sense_key_blank_check
18812  *
18813  * Description: Recovery actions for a SCSI "Blank Check" sense key.
18814  *		Has no monetary connotation.
18815  *
18816  *     Context: May be called from interrupt context
18817  */
18818 
18819 static void
18820 sd_sense_key_blank_check(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
18821     struct scsi_pkt *pktp)
18822 {
18823 	struct sd_sense_info	si;
18824 
18825 	ASSERT(un != NULL);
18826 	ASSERT(mutex_owned(SD_MUTEX(un)));
18827 	ASSERT(bp != NULL);
18828 	ASSERT(xp != NULL);
18829 	ASSERT(pktp != NULL);
18830 
18831 	/*
18832 	 * Blank check is not fatal for removable devices, therefore
18833 	 * it does not require a console message.
18834 	 */
18835 	si.ssi_severity = (un->un_f_has_removable_media) ? SCSI_ERR_ALL :
18836 	    SCSI_ERR_FATAL;
18837 	si.ssi_pfa_flag = FALSE;
18838 
18839 	sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
18840 	sd_return_failed_command(un, bp, EIO);
18841 }
18842 
18843 
18844 
18845 
18846 /*
18847  *    Function: sd_sense_key_aborted_command
18848  *
18849  * Description: Recovery actions for a SCSI "Aborted Command" sense key.
18850  *
18851  *     Context: May be called from interrupt context
18852  */
18853 
18854 static void
18855 sd_sense_key_aborted_command(struct sd_lun *un, struct buf *bp,
18856     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18857 {
18858 	struct sd_sense_info	si;
18859 
18860 	ASSERT(un != NULL);
18861 	ASSERT(mutex_owned(SD_MUTEX(un)));
18862 	ASSERT(bp != NULL);
18863 	ASSERT(xp != NULL);
18864 	ASSERT(pktp != NULL);
18865 
18866 	si.ssi_severity = SCSI_ERR_FATAL;
18867 	si.ssi_pfa_flag = FALSE;
18868 
18869 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18870 
18871 	/*
18872 	 * This really ought to be a fatal error, but we will retry anyway
18873 	 * as some drives report this as a spurious error.
18874 	 */
18875 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18876 	    &si, EIO, drv_usectohz(100000), NULL);
18877 }
18878 
18879 
18880 
18881 /*
18882  *    Function: sd_sense_key_default
18883  *
18884  * Description: Default recovery action for several SCSI sense keys (basically
18885  *		attempts a retry).
18886  *
18887  *     Context: May be called from interrupt context
18888  */
18889 
18890 static void
18891 sd_sense_key_default(struct sd_lun *un, uint8_t *sense_datap, struct buf *bp,
18892     struct sd_xbuf *xp, struct scsi_pkt *pktp)
18893 {
18894 	struct sd_sense_info	si;
18895 	uint8_t sense_key = scsi_sense_key(sense_datap);
18896 
18897 	ASSERT(un != NULL);
18898 	ASSERT(mutex_owned(SD_MUTEX(un)));
18899 	ASSERT(bp != NULL);
18900 	ASSERT(xp != NULL);
18901 	ASSERT(pktp != NULL);
18902 
18903 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
18904 
18905 	/*
18906 	 * Undecoded sense key.	Attempt retries and hope that will fix
18907 	 * the problem.  Otherwise, we're dead.
18908 	 */
18909 	if ((pktp->pkt_flags & FLAG_SILENT) == 0) {
18910 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18911 		    "Unhandled Sense Key '%s'\n", sense_keys[sense_key]);
18912 	}
18913 
18914 	si.ssi_severity = SCSI_ERR_FATAL;
18915 	si.ssi_pfa_flag = FALSE;
18916 
18917 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, sd_print_sense_msg,
18918 	    &si, EIO, (clock_t)0, NULL);
18919 }
18920 
18921 
18922 
18923 /*
18924  *    Function: sd_print_retry_msg
18925  *
18926  * Description: Print a message indicating the retry action being taken.
18927  *
18928  *   Arguments: un - ptr to associated softstate
18929  *		bp - ptr to buf(9S) for the command
18930  *		arg - not used.
18931  *		flag - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
18932  *			or SD_NO_RETRY_ISSUED
18933  *
18934  *     Context: May be called from interrupt context
18935  */
18936 /* ARGSUSED */
18937 static void
18938 sd_print_retry_msg(struct sd_lun *un, struct buf *bp, void *arg, int flag)
18939 {
18940 	struct sd_xbuf	*xp;
18941 	struct scsi_pkt *pktp;
18942 	char *reasonp;
18943 	char *msgp;
18944 
18945 	ASSERT(un != NULL);
18946 	ASSERT(mutex_owned(SD_MUTEX(un)));
18947 	ASSERT(bp != NULL);
18948 	pktp = SD_GET_PKTP(bp);
18949 	ASSERT(pktp != NULL);
18950 	xp = SD_GET_XBUF(bp);
18951 	ASSERT(xp != NULL);
18952 
18953 	ASSERT(!mutex_owned(&un->un_pm_mutex));
18954 	mutex_enter(&un->un_pm_mutex);
18955 	if ((un->un_state == SD_STATE_SUSPENDED) ||
18956 	    (SD_DEVICE_IS_IN_LOW_POWER(un)) ||
18957 	    (pktp->pkt_flags & FLAG_SILENT)) {
18958 		mutex_exit(&un->un_pm_mutex);
18959 		goto update_pkt_reason;
18960 	}
18961 	mutex_exit(&un->un_pm_mutex);
18962 
18963 	/*
18964 	 * Suppress messages if they are all the same pkt_reason; with
18965 	 * TQ, many (up to 256) are returned with the same pkt_reason.
18966 	 * If we are in panic, then suppress the retry messages.
18967 	 */
18968 	switch (flag) {
18969 	case SD_NO_RETRY_ISSUED:
18970 		msgp = "giving up";
18971 		break;
18972 	case SD_IMMEDIATE_RETRY_ISSUED:
18973 	case SD_DELAYED_RETRY_ISSUED:
18974 		if (ddi_in_panic() || (un->un_state == SD_STATE_OFFLINE) ||
18975 		    ((pktp->pkt_reason == un->un_last_pkt_reason) &&
18976 		    (sd_error_level != SCSI_ERR_ALL))) {
18977 			return;
18978 		}
18979 		msgp = "retrying command";
18980 		break;
18981 	default:
18982 		goto update_pkt_reason;
18983 	}
18984 
18985 	reasonp = (((pktp->pkt_statistics & STAT_PERR) != 0) ? "parity error" :
18986 	    scsi_rname(pktp->pkt_reason));
18987 
18988 	if (SD_FM_LOG(un) == SD_FM_LOG_NSUP) {
18989 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
18990 		    "SCSI transport failed: reason '%s': %s\n", reasonp, msgp);
18991 	}
18992 
18993 update_pkt_reason:
18994 	/*
18995 	 * Update un->un_last_pkt_reason with the value in pktp->pkt_reason.
18996 	 * This is to prevent multiple console messages for the same failure
18997 	 * condition.  Note that un->un_last_pkt_reason is NOT restored if &
18998 	 * when the command is retried successfully because there still may be
18999 	 * more commands coming back with the same value of pktp->pkt_reason.
19000 	 */
19001 	if ((pktp->pkt_reason != CMD_CMPLT) || (xp->xb_retry_count == 0)) {
19002 		un->un_last_pkt_reason = pktp->pkt_reason;
19003 	}
19004 }
19005 
19006 
19007 /*
19008  *    Function: sd_print_cmd_incomplete_msg
19009  *
19010  * Description: Message logging fn. for a SCSA "CMD_INCOMPLETE" pkt_reason.
19011  *
19012  *   Arguments: un - ptr to associated softstate
19013  *		bp - ptr to buf(9S) for the command
19014  *		arg - passed to sd_print_retry_msg()
19015  *		code - SD_IMMEDIATE_RETRY_ISSUED, SD_DELAYED_RETRY_ISSUED,
19016  *			or SD_NO_RETRY_ISSUED
19017  *
19018  *     Context: May be called from interrupt context
19019  */
19020 
19021 static void
19022 sd_print_cmd_incomplete_msg(struct sd_lun *un, struct buf *bp, void *arg,
19023     int code)
19024 {
19025 	dev_info_t	*dip;
19026 
19027 	ASSERT(un != NULL);
19028 	ASSERT(mutex_owned(SD_MUTEX(un)));
19029 	ASSERT(bp != NULL);
19030 
19031 	switch (code) {
19032 	case SD_NO_RETRY_ISSUED:
19033 		/* Command was failed. Someone turned off this target? */
19034 		if (un->un_state != SD_STATE_OFFLINE) {
19035 			/*
19036 			 * Suppress message if we are detaching and
19037 			 * device has been disconnected
19038 			 * Note that DEVI_IS_DEVICE_REMOVED is a consolidation
19039 			 * private interface and not part of the DDI
19040 			 */
19041 			dip = un->un_sd->sd_dev;
19042 			if (!(DEVI_IS_DETACHING(dip) &&
19043 			    DEVI_IS_DEVICE_REMOVED(dip))) {
19044 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
19045 				"disk not responding to selection\n");
19046 			}
19047 			New_state(un, SD_STATE_OFFLINE);
19048 		}
19049 		break;
19050 
19051 	case SD_DELAYED_RETRY_ISSUED:
19052 	case SD_IMMEDIATE_RETRY_ISSUED:
19053 	default:
19054 		/* Command was successfully queued for retry */
19055 		sd_print_retry_msg(un, bp, arg, code);
19056 		break;
19057 	}
19058 }
19059 
19060 
19061 /*
19062  *    Function: sd_pkt_reason_cmd_incomplete
19063  *
19064  * Description: Recovery actions for a SCSA "CMD_INCOMPLETE" pkt_reason.
19065  *
19066  *     Context: May be called from interrupt context
19067  */
19068 
19069 static void
19070 sd_pkt_reason_cmd_incomplete(struct sd_lun *un, struct buf *bp,
19071     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19072 {
19073 	int flag = SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE;
19074 
19075 	ASSERT(un != NULL);
19076 	ASSERT(mutex_owned(SD_MUTEX(un)));
19077 	ASSERT(bp != NULL);
19078 	ASSERT(xp != NULL);
19079 	ASSERT(pktp != NULL);
19080 
19081 	/* Do not do a reset if selection did not complete */
19082 	/* Note: Should this not just check the bit? */
19083 	if (pktp->pkt_state != STATE_GOT_BUS) {
19084 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
19085 		sd_reset_target(un, pktp);
19086 	}
19087 
19088 	/*
19089 	 * If the target was not successfully selected, then set
19090 	 * SD_RETRIES_FAILFAST to indicate that we lost communication
19091 	 * with the target, and further retries and/or commands are
19092 	 * likely to take a long time.
19093 	 */
19094 	if ((pktp->pkt_state & STATE_GOT_TARGET) == 0) {
19095 		flag |= SD_RETRIES_FAILFAST;
19096 	}
19097 
19098 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19099 
19100 	sd_retry_command(un, bp, flag,
19101 	    sd_print_cmd_incomplete_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19102 }
19103 
19104 
19105 
19106 /*
19107  *    Function: sd_pkt_reason_cmd_tran_err
19108  *
19109  * Description: Recovery actions for a SCSA "CMD_TRAN_ERR" pkt_reason.
19110  *
19111  *     Context: May be called from interrupt context
19112  */
19113 
19114 static void
19115 sd_pkt_reason_cmd_tran_err(struct sd_lun *un, struct buf *bp,
19116     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19117 {
19118 	ASSERT(un != NULL);
19119 	ASSERT(mutex_owned(SD_MUTEX(un)));
19120 	ASSERT(bp != NULL);
19121 	ASSERT(xp != NULL);
19122 	ASSERT(pktp != NULL);
19123 
19124 	/*
19125 	 * Do not reset if we got a parity error, or if
19126 	 * selection did not complete.
19127 	 */
19128 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19129 	/* Note: Should this not just check the bit for pkt_state? */
19130 	if (((pktp->pkt_statistics & STAT_PERR) == 0) &&
19131 	    (pktp->pkt_state != STATE_GOT_BUS)) {
19132 		SD_UPDATE_ERRSTATS(un, sd_transerrs);
19133 		sd_reset_target(un, pktp);
19134 	}
19135 
19136 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19137 
19138 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19139 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19140 }
19141 
19142 
19143 
19144 /*
19145  *    Function: sd_pkt_reason_cmd_reset
19146  *
19147  * Description: Recovery actions for a SCSA "CMD_RESET" pkt_reason.
19148  *
19149  *     Context: May be called from interrupt context
19150  */
19151 
19152 static void
19153 sd_pkt_reason_cmd_reset(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19154     struct scsi_pkt *pktp)
19155 {
19156 	ASSERT(un != NULL);
19157 	ASSERT(mutex_owned(SD_MUTEX(un)));
19158 	ASSERT(bp != NULL);
19159 	ASSERT(xp != NULL);
19160 	ASSERT(pktp != NULL);
19161 
19162 	/* The target may still be running the command, so try to reset. */
19163 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19164 	sd_reset_target(un, pktp);
19165 
19166 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19167 
19168 	/*
19169 	 * If pkt_reason is CMD_RESET chances are that this pkt got
19170 	 * reset because another target on this bus caused it. The target
19171 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
19172 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
19173 	 */
19174 
19175 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
19176 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19177 }
19178 
19179 
19180 
19181 
19182 /*
19183  *    Function: sd_pkt_reason_cmd_aborted
19184  *
19185  * Description: Recovery actions for a SCSA "CMD_ABORTED" pkt_reason.
19186  *
19187  *     Context: May be called from interrupt context
19188  */
19189 
19190 static void
19191 sd_pkt_reason_cmd_aborted(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19192     struct scsi_pkt *pktp)
19193 {
19194 	ASSERT(un != NULL);
19195 	ASSERT(mutex_owned(SD_MUTEX(un)));
19196 	ASSERT(bp != NULL);
19197 	ASSERT(xp != NULL);
19198 	ASSERT(pktp != NULL);
19199 
19200 	/* The target may still be running the command, so try to reset. */
19201 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19202 	sd_reset_target(un, pktp);
19203 
19204 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19205 
19206 	/*
19207 	 * If pkt_reason is CMD_ABORTED chances are that this pkt got
19208 	 * aborted because another target on this bus caused it. The target
19209 	 * that caused it should get CMD_TIMEOUT with pkt_statistics
19210 	 * of STAT_TIMEOUT/STAT_DEV_RESET.
19211 	 */
19212 
19213 	sd_retry_command(un, bp, (SD_RETRIES_VICTIM | SD_RETRIES_ISOLATE),
19214 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19215 }
19216 
19217 
19218 
19219 /*
19220  *    Function: sd_pkt_reason_cmd_timeout
19221  *
19222  * Description: Recovery actions for a SCSA "CMD_TIMEOUT" pkt_reason.
19223  *
19224  *     Context: May be called from interrupt context
19225  */
19226 
19227 static void
19228 sd_pkt_reason_cmd_timeout(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19229     struct scsi_pkt *pktp)
19230 {
19231 	ASSERT(un != NULL);
19232 	ASSERT(mutex_owned(SD_MUTEX(un)));
19233 	ASSERT(bp != NULL);
19234 	ASSERT(xp != NULL);
19235 	ASSERT(pktp != NULL);
19236 
19237 
19238 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19239 	sd_reset_target(un, pktp);
19240 
19241 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19242 
19243 	/*
19244 	 * A command timeout indicates that we could not establish
19245 	 * communication with the target, so set SD_RETRIES_FAILFAST
19246 	 * as further retries/commands are likely to take a long time.
19247 	 */
19248 	sd_retry_command(un, bp,
19249 	    (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE | SD_RETRIES_FAILFAST),
19250 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19251 }
19252 
19253 
19254 
19255 /*
19256  *    Function: sd_pkt_reason_cmd_unx_bus_free
19257  *
19258  * Description: Recovery actions for a SCSA "CMD_UNX_BUS_FREE" pkt_reason.
19259  *
19260  *     Context: May be called from interrupt context
19261  */
19262 
19263 static void
19264 sd_pkt_reason_cmd_unx_bus_free(struct sd_lun *un, struct buf *bp,
19265     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19266 {
19267 	void (*funcp)(struct sd_lun *un, struct buf *bp, void *arg, int code);
19268 
19269 	ASSERT(un != NULL);
19270 	ASSERT(mutex_owned(SD_MUTEX(un)));
19271 	ASSERT(bp != NULL);
19272 	ASSERT(xp != NULL);
19273 	ASSERT(pktp != NULL);
19274 
19275 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19276 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19277 
19278 	funcp = ((pktp->pkt_statistics & STAT_PERR) == 0) ?
19279 	    sd_print_retry_msg : NULL;
19280 
19281 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19282 	    funcp, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19283 }
19284 
19285 
19286 /*
19287  *    Function: sd_pkt_reason_cmd_tag_reject
19288  *
19289  * Description: Recovery actions for a SCSA "CMD_TAG_REJECT" pkt_reason.
19290  *
19291  *     Context: May be called from interrupt context
19292  */
19293 
19294 static void
19295 sd_pkt_reason_cmd_tag_reject(struct sd_lun *un, struct buf *bp,
19296     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19297 {
19298 	ASSERT(un != NULL);
19299 	ASSERT(mutex_owned(SD_MUTEX(un)));
19300 	ASSERT(bp != NULL);
19301 	ASSERT(xp != NULL);
19302 	ASSERT(pktp != NULL);
19303 
19304 	SD_UPDATE_ERRSTATS(un, sd_harderrs);
19305 	pktp->pkt_flags = 0;
19306 	un->un_tagflags = 0;
19307 	if (un->un_f_opt_queueing == TRUE) {
19308 		un->un_throttle = min(un->un_throttle, 3);
19309 	} else {
19310 		un->un_throttle = 1;
19311 	}
19312 	mutex_exit(SD_MUTEX(un));
19313 	(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
19314 	mutex_enter(SD_MUTEX(un));
19315 
19316 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19317 
19318 	/* Legacy behavior not to check retry counts here. */
19319 	sd_retry_command(un, bp, (SD_RETRIES_NOCHECK | SD_RETRIES_ISOLATE),
19320 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19321 }
19322 
19323 
19324 /*
19325  *    Function: sd_pkt_reason_default
19326  *
19327  * Description: Default recovery actions for SCSA pkt_reason values that
19328  *		do not have more explicit recovery actions.
19329  *
19330  *     Context: May be called from interrupt context
19331  */
19332 
19333 static void
19334 sd_pkt_reason_default(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19335     struct scsi_pkt *pktp)
19336 {
19337 	ASSERT(un != NULL);
19338 	ASSERT(mutex_owned(SD_MUTEX(un)));
19339 	ASSERT(bp != NULL);
19340 	ASSERT(xp != NULL);
19341 	ASSERT(pktp != NULL);
19342 
19343 	SD_UPDATE_ERRSTATS(un, sd_transerrs);
19344 	sd_reset_target(un, pktp);
19345 
19346 	SD_UPDATE_RESERVATION_STATUS(un, pktp);
19347 
19348 	sd_retry_command(un, bp, (SD_RETRIES_STANDARD | SD_RETRIES_ISOLATE),
19349 	    sd_print_retry_msg, NULL, EIO, SD_RESTART_TIMEOUT, NULL);
19350 }
19351 
19352 
19353 
19354 /*
19355  *    Function: sd_pkt_status_check_condition
19356  *
19357  * Description: Recovery actions for a "STATUS_CHECK" SCSI command status.
19358  *
19359  *     Context: May be called from interrupt context
19360  */
19361 
19362 static void
19363 sd_pkt_status_check_condition(struct sd_lun *un, struct buf *bp,
19364     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19365 {
19366 	ASSERT(un != NULL);
19367 	ASSERT(mutex_owned(SD_MUTEX(un)));
19368 	ASSERT(bp != NULL);
19369 	ASSERT(xp != NULL);
19370 	ASSERT(pktp != NULL);
19371 
19372 	SD_TRACE(SD_LOG_IO, un, "sd_pkt_status_check_condition: "
19373 	    "entry: buf:0x%p xp:0x%p\n", bp, xp);
19374 
19375 	/*
19376 	 * If ARQ is NOT enabled, then issue a REQUEST SENSE command (the
19377 	 * command will be retried after the request sense). Otherwise, retry
19378 	 * the command. Note: we are issuing the request sense even though the
19379 	 * retry limit may have been reached for the failed command.
19380 	 */
19381 	if (un->un_f_arq_enabled == FALSE) {
19382 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
19383 		    "no ARQ, sending request sense command\n");
19384 		sd_send_request_sense_command(un, bp, pktp);
19385 	} else {
19386 		SD_INFO(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: "
19387 		    "ARQ,retrying request sense command\n");
19388 #if defined(__x86)
19389 		/*
19390 		 * The SD_RETRY_DELAY value need to be adjusted here
19391 		 * when SD_RETRY_DELAY change in sddef.h
19392 		 */
19393 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19394 		    un->un_f_is_fibre?drv_usectohz(100000):(clock_t)0,
19395 		    NULL);
19396 #else
19397 		sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL,
19398 		    EIO, SD_RETRY_DELAY, NULL);
19399 #endif
19400 	}
19401 
19402 	SD_TRACE(SD_LOG_IO_CORE, un, "sd_pkt_status_check_condition: exit\n");
19403 }
19404 
19405 
19406 /*
19407  *    Function: sd_pkt_status_busy
19408  *
19409  * Description: Recovery actions for a "STATUS_BUSY" SCSI command status.
19410  *
19411  *     Context: May be called from interrupt context
19412  */
19413 
19414 static void
19415 sd_pkt_status_busy(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19416     struct scsi_pkt *pktp)
19417 {
19418 	ASSERT(un != NULL);
19419 	ASSERT(mutex_owned(SD_MUTEX(un)));
19420 	ASSERT(bp != NULL);
19421 	ASSERT(xp != NULL);
19422 	ASSERT(pktp != NULL);
19423 
19424 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19425 	    "sd_pkt_status_busy: entry\n");
19426 
19427 	/* If retries are exhausted, just fail the command. */
19428 	if (xp->xb_retry_count >= un->un_busy_retry_count) {
19429 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
19430 		    "device busy too long\n");
19431 		sd_return_failed_command(un, bp, EIO);
19432 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19433 		    "sd_pkt_status_busy: exit\n");
19434 		return;
19435 	}
19436 	xp->xb_retry_count++;
19437 
19438 	/*
19439 	 * Try to reset the target. However, we do not want to perform
19440 	 * more than one reset if the device continues to fail. The reset
19441 	 * will be performed when the retry count reaches the reset
19442 	 * threshold.  This threshold should be set such that at least
19443 	 * one retry is issued before the reset is performed.
19444 	 */
19445 	if (xp->xb_retry_count ==
19446 	    ((un->un_reset_retry_count < 2) ? 2 : un->un_reset_retry_count)) {
19447 		int rval = 0;
19448 		mutex_exit(SD_MUTEX(un));
19449 		if (un->un_f_allow_bus_device_reset == TRUE) {
19450 			/*
19451 			 * First try to reset the LUN; if we cannot then
19452 			 * try to reset the target.
19453 			 */
19454 			if (un->un_f_lun_reset_enabled == TRUE) {
19455 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19456 				    "sd_pkt_status_busy: RESET_LUN\n");
19457 				rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19458 			}
19459 			if (rval == 0) {
19460 				SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19461 				    "sd_pkt_status_busy: RESET_TARGET\n");
19462 				rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19463 			}
19464 		}
19465 		if (rval == 0) {
19466 			/*
19467 			 * If the RESET_LUN and/or RESET_TARGET failed,
19468 			 * try RESET_ALL
19469 			 */
19470 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19471 			    "sd_pkt_status_busy: RESET_ALL\n");
19472 			rval = scsi_reset(SD_ADDRESS(un), RESET_ALL);
19473 		}
19474 		mutex_enter(SD_MUTEX(un));
19475 		if (rval == 0) {
19476 			/*
19477 			 * The RESET_LUN, RESET_TARGET, and/or RESET_ALL failed.
19478 			 * At this point we give up & fail the command.
19479 			 */
19480 			sd_return_failed_command(un, bp, EIO);
19481 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19482 			    "sd_pkt_status_busy: exit (failed cmd)\n");
19483 			return;
19484 		}
19485 	}
19486 
19487 	/*
19488 	 * Retry the command. Be sure to specify SD_RETRIES_NOCHECK as
19489 	 * we have already checked the retry counts above.
19490 	 */
19491 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL,
19492 	    EIO, un->un_busy_timeout, NULL);
19493 
19494 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19495 	    "sd_pkt_status_busy: exit\n");
19496 }
19497 
19498 
19499 /*
19500  *    Function: sd_pkt_status_reservation_conflict
19501  *
19502  * Description: Recovery actions for a "STATUS_RESERVATION_CONFLICT" SCSI
19503  *		command status.
19504  *
19505  *     Context: May be called from interrupt context
19506  */
19507 
19508 static void
19509 sd_pkt_status_reservation_conflict(struct sd_lun *un, struct buf *bp,
19510     struct sd_xbuf *xp, struct scsi_pkt *pktp)
19511 {
19512 	ASSERT(un != NULL);
19513 	ASSERT(mutex_owned(SD_MUTEX(un)));
19514 	ASSERT(bp != NULL);
19515 	ASSERT(xp != NULL);
19516 	ASSERT(pktp != NULL);
19517 
19518 	/*
19519 	 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then reservation
19520 	 * conflict could be due to various reasons like incorrect keys, not
19521 	 * registered or not reserved etc. So, we return EACCES to the caller.
19522 	 */
19523 	if (un->un_reservation_type == SD_SCSI3_RESERVATION) {
19524 		int cmd = SD_GET_PKT_OPCODE(pktp);
19525 		if ((cmd == SCMD_PERSISTENT_RESERVE_IN) ||
19526 		    (cmd == SCMD_PERSISTENT_RESERVE_OUT)) {
19527 			sd_return_failed_command(un, bp, EACCES);
19528 			return;
19529 		}
19530 	}
19531 
19532 	un->un_resvd_status |= SD_RESERVATION_CONFLICT;
19533 
19534 	if ((un->un_resvd_status & SD_FAILFAST) != 0) {
19535 		if (sd_failfast_enable != 0) {
19536 			/* By definition, we must panic here.... */
19537 			sd_panic_for_res_conflict(un);
19538 			/*NOTREACHED*/
19539 		}
19540 		SD_ERROR(SD_LOG_IO, un,
19541 		    "sd_handle_resv_conflict: Disk Reserved\n");
19542 		sd_return_failed_command(un, bp, EACCES);
19543 		return;
19544 	}
19545 
19546 	/*
19547 	 * 1147670: retry only if sd_retry_on_reservation_conflict
19548 	 * property is set (default is 1). Retries will not succeed
19549 	 * on a disk reserved by another initiator. HA systems
19550 	 * may reset this via sd.conf to avoid these retries.
19551 	 *
19552 	 * Note: The legacy return code for this failure is EIO, however EACCES
19553 	 * seems more appropriate for a reservation conflict.
19554 	 */
19555 	if (sd_retry_on_reservation_conflict == 0) {
19556 		SD_ERROR(SD_LOG_IO, un,
19557 		    "sd_handle_resv_conflict: Device Reserved\n");
19558 		sd_return_failed_command(un, bp, EIO);
19559 		return;
19560 	}
19561 
19562 	/*
19563 	 * Retry the command if we can.
19564 	 *
19565 	 * Note: The legacy return code for this failure is EIO, however EACCES
19566 	 * seems more appropriate for a reservation conflict.
19567 	 */
19568 	sd_retry_command(un, bp, SD_RETRIES_STANDARD, NULL, NULL, EIO,
19569 	    (clock_t)2, NULL);
19570 }
19571 
19572 
19573 
19574 /*
19575  *    Function: sd_pkt_status_qfull
19576  *
19577  * Description: Handle a QUEUE FULL condition from the target.  This can
19578  *		occur if the HBA does not handle the queue full condition.
19579  *		(Basically this means third-party HBAs as Sun HBAs will
19580  *		handle the queue full condition.)  Note that if there are
19581  *		some commands already in the transport, then the queue full
19582  *		has occurred because the queue for this nexus is actually
19583  *		full. If there are no commands in the transport, then the
19584  *		queue full is resulting from some other initiator or lun
19585  *		consuming all the resources at the target.
19586  *
19587  *     Context: May be called from interrupt context
19588  */
19589 
19590 static void
19591 sd_pkt_status_qfull(struct sd_lun *un, struct buf *bp, struct sd_xbuf *xp,
19592     struct scsi_pkt *pktp)
19593 {
19594 	ASSERT(un != NULL);
19595 	ASSERT(mutex_owned(SD_MUTEX(un)));
19596 	ASSERT(bp != NULL);
19597 	ASSERT(xp != NULL);
19598 	ASSERT(pktp != NULL);
19599 
19600 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19601 	    "sd_pkt_status_qfull: entry\n");
19602 
19603 	/*
19604 	 * Just lower the QFULL throttle and retry the command.  Note that
19605 	 * we do not limit the number of retries here.
19606 	 */
19607 	sd_reduce_throttle(un, SD_THROTTLE_QFULL);
19608 	sd_retry_command(un, bp, SD_RETRIES_NOCHECK, NULL, NULL, 0,
19609 	    SD_RESTART_TIMEOUT, NULL);
19610 
19611 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19612 	    "sd_pkt_status_qfull: exit\n");
19613 }
19614 
19615 
19616 /*
19617  *    Function: sd_reset_target
19618  *
19619  * Description: Issue a scsi_reset(9F), with either RESET_LUN,
19620  *		RESET_TARGET, or RESET_ALL.
19621  *
19622  *     Context: May be called under interrupt context.
19623  */
19624 
19625 static void
19626 sd_reset_target(struct sd_lun *un, struct scsi_pkt *pktp)
19627 {
19628 	int rval = 0;
19629 
19630 	ASSERT(un != NULL);
19631 	ASSERT(mutex_owned(SD_MUTEX(un)));
19632 	ASSERT(pktp != NULL);
19633 
19634 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: entry\n");
19635 
19636 	/*
19637 	 * No need to reset if the transport layer has already done so.
19638 	 */
19639 	if ((pktp->pkt_statistics &
19640 	    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) != 0) {
19641 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19642 		    "sd_reset_target: no reset\n");
19643 		return;
19644 	}
19645 
19646 	mutex_exit(SD_MUTEX(un));
19647 
19648 	if (un->un_f_allow_bus_device_reset == TRUE) {
19649 		if (un->un_f_lun_reset_enabled == TRUE) {
19650 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19651 			    "sd_reset_target: RESET_LUN\n");
19652 			rval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
19653 		}
19654 		if (rval == 0) {
19655 			SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19656 			    "sd_reset_target: RESET_TARGET\n");
19657 			rval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
19658 		}
19659 	}
19660 
19661 	if (rval == 0) {
19662 		SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
19663 		    "sd_reset_target: RESET_ALL\n");
19664 		(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
19665 	}
19666 
19667 	mutex_enter(SD_MUTEX(un));
19668 
19669 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un, "sd_reset_target: exit\n");
19670 }
19671 
19672 /*
19673  *    Function: sd_target_change_task
19674  *
19675  * Description: Handle dynamic target change
19676  *
19677  *     Context: Executes in a taskq() thread context
19678  */
19679 static void
19680 sd_target_change_task(void *arg)
19681 {
19682 	struct sd_lun		*un = arg;
19683 	uint64_t		capacity;
19684 	diskaddr_t		label_cap;
19685 	uint_t			lbasize;
19686 	sd_ssc_t		*ssc;
19687 
19688 	ASSERT(un != NULL);
19689 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19690 
19691 	if ((un->un_f_blockcount_is_valid == FALSE) ||
19692 	    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
19693 		return;
19694 	}
19695 
19696 	ssc = sd_ssc_init(un);
19697 
19698 	if (sd_send_scsi_READ_CAPACITY(ssc, &capacity,
19699 	    &lbasize, SD_PATH_DIRECT) != 0) {
19700 		SD_ERROR(SD_LOG_ERROR, un,
19701 		    "sd_target_change_task: fail to read capacity\n");
19702 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19703 		goto task_exit;
19704 	}
19705 
19706 	mutex_enter(SD_MUTEX(un));
19707 	if (capacity <= un->un_blockcount) {
19708 		mutex_exit(SD_MUTEX(un));
19709 		goto task_exit;
19710 	}
19711 
19712 	sd_update_block_info(un, lbasize, capacity);
19713 	mutex_exit(SD_MUTEX(un));
19714 
19715 	/*
19716 	 * If lun is EFI labeled and lun capacity is greater than the
19717 	 * capacity contained in the label, log a sys event.
19718 	 */
19719 	if (cmlb_efi_label_capacity(un->un_cmlbhandle, &label_cap,
19720 	    (void*)SD_PATH_DIRECT) == 0) {
19721 		mutex_enter(SD_MUTEX(un));
19722 		if (un->un_f_blockcount_is_valid &&
19723 		    un->un_blockcount > label_cap) {
19724 			mutex_exit(SD_MUTEX(un));
19725 			sd_log_lun_expansion_event(un, KM_SLEEP);
19726 		} else {
19727 			mutex_exit(SD_MUTEX(un));
19728 		}
19729 	}
19730 
19731 task_exit:
19732 	sd_ssc_fini(ssc);
19733 }
19734 
19735 
19736 /*
19737  *    Function: sd_log_dev_status_event
19738  *
19739  * Description: Log EC_dev_status sysevent
19740  *
19741  *     Context: Never called from interrupt context
19742  */
19743 static void
19744 sd_log_dev_status_event(struct sd_lun *un, char *esc, int km_flag)
19745 {
19746 	int err;
19747 	char			*path;
19748 	nvlist_t		*attr_list;
19749 	size_t			n;
19750 
19751 	/* Allocate and build sysevent attribute list */
19752 	err = nvlist_alloc(&attr_list, NV_UNIQUE_NAME_TYPE, km_flag);
19753 	if (err != 0) {
19754 		SD_ERROR(SD_LOG_ERROR, un,
19755 		    "sd_log_dev_status_event: fail to allocate space\n");
19756 		return;
19757 	}
19758 
19759 	path = kmem_alloc(MAXPATHLEN, km_flag);
19760 	if (path == NULL) {
19761 		nvlist_free(attr_list);
19762 		SD_ERROR(SD_LOG_ERROR, un,
19763 		    "sd_log_dev_status_event: fail to allocate space\n");
19764 		return;
19765 	}
19766 
19767 	n = snprintf(path, MAXPATHLEN, "/devices");
19768 	(void) ddi_pathname(SD_DEVINFO(un), path + n);
19769 	n = strlen(path);
19770 	n += snprintf(path + n, MAXPATHLEN - n, ":x");
19771 
19772 	/*
19773 	 * On receipt of this event, the ZFS sysevent module will scan
19774 	 * active zpools for child vdevs matching this physical path.
19775 	 * In order to catch both whole disk pools and those with an
19776 	 * EFI boot partition, generate separate sysevents for minor
19777 	 * node 'a' and 'b'.
19778 	 */
19779 	for (char c = 'a'; c < 'c'; c++) {
19780 		path[n - 1] = c;
19781 
19782 		err = nvlist_add_string(attr_list, DEV_PHYS_PATH, path);
19783 		if (err != 0) {
19784 			SD_ERROR(SD_LOG_ERROR, un,
19785 			    "sd_log_dev_status_event: fail to add attribute\n");
19786 			break;
19787 		}
19788 
19789 		err = ddi_log_sysevent(SD_DEVINFO(un), SUNW_VENDOR,
19790 		    EC_DEV_STATUS, esc, attr_list, NULL, km_flag);
19791 		if (err != DDI_SUCCESS) {
19792 			SD_ERROR(SD_LOG_ERROR, un,
19793 			    "sd_log_dev_status_event: fail to log sysevent\n");
19794 			break;
19795 		}
19796 	}
19797 
19798 	nvlist_free(attr_list);
19799 	kmem_free(path, MAXPATHLEN);
19800 }
19801 
19802 
19803 /*
19804  *    Function: sd_log_lun_expansion_event
19805  *
19806  * Description: Log lun expansion sys event
19807  *
19808  *     Context: Never called from interrupt context
19809  */
19810 static void
19811 sd_log_lun_expansion_event(struct sd_lun *un, int km_flag)
19812 {
19813 	sd_log_dev_status_event(un, ESC_DEV_DLE, km_flag);
19814 }
19815 
19816 
19817 /*
19818  *    Function: sd_log_eject_request_event
19819  *
19820  * Description: Log eject request sysevent
19821  *
19822  *     Context: Never called from interrupt context
19823  */
19824 static void
19825 sd_log_eject_request_event(struct sd_lun *un, int km_flag)
19826 {
19827 	sd_log_dev_status_event(un, ESC_DEV_EJECT_REQUEST, km_flag);
19828 }
19829 
19830 
19831 /*
19832  *    Function: sd_media_change_task
19833  *
19834  * Description: Recovery action for CDROM to become available.
19835  *
19836  *     Context: Executes in a taskq() thread context
19837  */
19838 
19839 static void
19840 sd_media_change_task(void *arg)
19841 {
19842 	struct	scsi_pkt	*pktp = arg;
19843 	struct	sd_lun		*un;
19844 	struct	buf		*bp;
19845 	struct	sd_xbuf		*xp;
19846 	int	err		= 0;
19847 	int	retry_count	= 0;
19848 	int	retry_limit	= SD_UNIT_ATTENTION_RETRY/10;
19849 	struct	sd_sense_info	si;
19850 
19851 	ASSERT(pktp != NULL);
19852 	bp = (struct buf *)pktp->pkt_private;
19853 	ASSERT(bp != NULL);
19854 	xp = SD_GET_XBUF(bp);
19855 	ASSERT(xp != NULL);
19856 	un = SD_GET_UN(bp);
19857 	ASSERT(un != NULL);
19858 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19859 	ASSERT(un->un_f_monitor_media_state);
19860 
19861 	si.ssi_severity = SCSI_ERR_INFO;
19862 	si.ssi_pfa_flag = FALSE;
19863 
19864 	/*
19865 	 * When a reset is issued on a CDROM, it takes a long time to
19866 	 * recover. First few attempts to read capacity and other things
19867 	 * related to handling unit attention fail (with a ASC 0x4 and
19868 	 * ASCQ 0x1). In that case we want to do enough retries and we want
19869 	 * to limit the retries in other cases of genuine failures like
19870 	 * no media in drive.
19871 	 */
19872 	while (retry_count++ < retry_limit) {
19873 		if ((err = sd_handle_mchange(un)) == 0) {
19874 			break;
19875 		}
19876 		if (err == EAGAIN) {
19877 			retry_limit = SD_UNIT_ATTENTION_RETRY;
19878 		}
19879 		/* Sleep for 0.5 sec. & try again */
19880 		delay(drv_usectohz(500000));
19881 	}
19882 
19883 	/*
19884 	 * Dispatch (retry or fail) the original command here,
19885 	 * along with appropriate console messages....
19886 	 *
19887 	 * Must grab the mutex before calling sd_retry_command,
19888 	 * sd_print_sense_msg and sd_return_failed_command.
19889 	 */
19890 	mutex_enter(SD_MUTEX(un));
19891 	if (err != SD_CMD_SUCCESS) {
19892 		SD_UPDATE_ERRSTATS(un, sd_harderrs);
19893 		SD_UPDATE_ERRSTATS(un, sd_rq_nodev_err);
19894 		si.ssi_severity = SCSI_ERR_FATAL;
19895 		sd_print_sense_msg(un, bp, &si, SD_NO_RETRY_ISSUED);
19896 		sd_return_failed_command(un, bp, EIO);
19897 	} else {
19898 		sd_retry_command(un, bp, SD_RETRIES_UA, sd_print_sense_msg,
19899 		    &si, EIO, (clock_t)0, NULL);
19900 	}
19901 	mutex_exit(SD_MUTEX(un));
19902 }
19903 
19904 
19905 
19906 /*
19907  *    Function: sd_handle_mchange
19908  *
19909  * Description: Perform geometry validation & other recovery when CDROM
19910  *		has been removed from drive.
19911  *
19912  * Return Code: 0 for success
19913  *		errno-type return code of either sd_send_scsi_DOORLOCK() or
19914  *		sd_send_scsi_READ_CAPACITY()
19915  *
19916  *     Context: Executes in a taskq() thread context
19917  */
19918 
19919 static int
19920 sd_handle_mchange(struct sd_lun *un)
19921 {
19922 	uint64_t	capacity;
19923 	uint32_t	lbasize;
19924 	int		rval;
19925 	sd_ssc_t	*ssc;
19926 
19927 	ASSERT(!mutex_owned(SD_MUTEX(un)));
19928 	ASSERT(un->un_f_monitor_media_state);
19929 
19930 	ssc = sd_ssc_init(un);
19931 	rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
19932 	    SD_PATH_DIRECT_PRIORITY);
19933 
19934 	if (rval != 0)
19935 		goto failed;
19936 
19937 	mutex_enter(SD_MUTEX(un));
19938 	sd_update_block_info(un, lbasize, capacity);
19939 
19940 	if (un->un_errstats != NULL) {
19941 		struct	sd_errstats *stp =
19942 		    (struct sd_errstats *)un->un_errstats->ks_data;
19943 		stp->sd_capacity.value.ui64 = (uint64_t)
19944 		    ((uint64_t)un->un_blockcount *
19945 		    (uint64_t)un->un_tgt_blocksize);
19946 	}
19947 
19948 	/*
19949 	 * Check if the media in the device is writable or not
19950 	 */
19951 	if (ISCD(un)) {
19952 		sd_check_for_writable_cd(ssc, SD_PATH_DIRECT_PRIORITY);
19953 	}
19954 
19955 	/*
19956 	 * Note: Maybe let the strategy/partitioning chain worry about getting
19957 	 * valid geometry.
19958 	 */
19959 	mutex_exit(SD_MUTEX(un));
19960 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
19961 
19962 
19963 	if (cmlb_validate(un->un_cmlbhandle, 0,
19964 	    (void *)SD_PATH_DIRECT_PRIORITY) != 0) {
19965 		sd_ssc_fini(ssc);
19966 		return (EIO);
19967 	} else {
19968 		if (un->un_f_pkstats_enabled) {
19969 			sd_set_pstats(un);
19970 			SD_TRACE(SD_LOG_IO_PARTITION, un,
19971 			    "sd_handle_mchange: un:0x%p pstats created and "
19972 			    "set\n", un);
19973 		}
19974 	}
19975 
19976 	/*
19977 	 * Try to lock the door
19978 	 */
19979 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
19980 	    SD_PATH_DIRECT_PRIORITY);
19981 failed:
19982 	if (rval != 0)
19983 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
19984 	sd_ssc_fini(ssc);
19985 	return (rval);
19986 }
19987 
19988 
19989 /*
19990  *    Function: sd_send_scsi_DOORLOCK
19991  *
19992  * Description: Issue the scsi DOOR LOCK command
19993  *
19994  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
19995  *                      structure for this target.
19996  *		flag  - SD_REMOVAL_ALLOW
19997  *			SD_REMOVAL_PREVENT
19998  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
19999  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20000  *			to use the USCSI "direct" chain and bypass the normal
20001  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
20002  *			command is issued as part of an error recovery action.
20003  *
20004  * Return Code: 0   - Success
20005  *		errno return code from sd_ssc_send()
20006  *
20007  *     Context: Can sleep.
20008  */
20009 
20010 static int
20011 sd_send_scsi_DOORLOCK(sd_ssc_t *ssc, int flag, int path_flag)
20012 {
20013 	struct scsi_extended_sense	sense_buf;
20014 	union scsi_cdb		cdb;
20015 	struct uscsi_cmd	ucmd_buf;
20016 	int			status;
20017 	struct sd_lun		*un;
20018 
20019 	ASSERT(ssc != NULL);
20020 	un = ssc->ssc_un;
20021 	ASSERT(un != NULL);
20022 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20023 
20024 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_DOORLOCK: entry: un:0x%p\n", un);
20025 
20026 	/* already determined doorlock is not supported, fake success */
20027 	if (un->un_f_doorlock_supported == FALSE) {
20028 		return (0);
20029 	}
20030 
20031 	/*
20032 	 * If we are ejecting and see an SD_REMOVAL_PREVENT
20033 	 * ignore the command so we can complete the eject
20034 	 * operation.
20035 	 */
20036 	if (flag == SD_REMOVAL_PREVENT) {
20037 		mutex_enter(SD_MUTEX(un));
20038 		if (un->un_f_ejecting == TRUE) {
20039 			mutex_exit(SD_MUTEX(un));
20040 			return (EAGAIN);
20041 		}
20042 		mutex_exit(SD_MUTEX(un));
20043 	}
20044 
20045 	bzero(&cdb, sizeof (cdb));
20046 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20047 
20048 	cdb.scc_cmd = SCMD_DOORLOCK;
20049 	cdb.cdb_opaque[4] = (uchar_t)flag;
20050 
20051 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20052 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20053 	ucmd_buf.uscsi_bufaddr	= NULL;
20054 	ucmd_buf.uscsi_buflen	= 0;
20055 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20056 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20057 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20058 	ucmd_buf.uscsi_timeout	= 15;
20059 
20060 	SD_TRACE(SD_LOG_IO, un,
20061 	    "sd_send_scsi_DOORLOCK: returning sd_ssc_send\n");
20062 
20063 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20064 	    UIO_SYSSPACE, path_flag);
20065 
20066 	if (status == 0)
20067 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20068 
20069 	if ((status == EIO) && (ucmd_buf.uscsi_status == STATUS_CHECK) &&
20070 	    (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20071 	    (scsi_sense_key((uint8_t *)&sense_buf) == KEY_ILLEGAL_REQUEST)) {
20072 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20073 
20074 		/* fake success and skip subsequent doorlock commands */
20075 		un->un_f_doorlock_supported = FALSE;
20076 		return (0);
20077 	}
20078 
20079 	return (status);
20080 }
20081 
20082 /*
20083  *    Function: sd_send_scsi_READ_CAPACITY
20084  *
20085  * Description: This routine uses the scsi READ CAPACITY command to determine
20086  *		the device capacity in number of blocks and the device native
20087  *		block size. If this function returns a failure, then the
20088  *		values in *capp and *lbap are undefined.  If the capacity
20089  *		returned is 0xffffffff then the lun is too large for a
20090  *		normal READ CAPACITY command and the results of a
20091  *		READ CAPACITY 16 will be used instead.
20092  *
20093  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
20094  *		capp - ptr to unsigned 64-bit variable to receive the
20095  *			capacity value from the command.
20096  *		lbap - ptr to unsigned 32-bit varaible to receive the
20097  *			block size value from the command
20098  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20099  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20100  *			to use the USCSI "direct" chain and bypass the normal
20101  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
20102  *			command is issued as part of an error recovery action.
20103  *
20104  * Return Code: 0   - Success
20105  *		EIO - IO error
20106  *		EACCES - Reservation conflict detected
20107  *		EAGAIN - Device is becoming ready
20108  *		errno return code from sd_ssc_send()
20109  *
20110  *     Context: Can sleep.  Blocks until command completes.
20111  */
20112 
20113 #define	SD_CAPACITY_SIZE	sizeof (struct scsi_capacity)
20114 
20115 static int
20116 sd_send_scsi_READ_CAPACITY(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
20117     int path_flag)
20118 {
20119 	struct	scsi_extended_sense	sense_buf;
20120 	struct	uscsi_cmd	ucmd_buf;
20121 	union	scsi_cdb	cdb;
20122 	uint32_t		*capacity_buf;
20123 	uint64_t		capacity;
20124 	uint32_t		lbasize;
20125 	uint32_t		pbsize;
20126 	int			status;
20127 	struct sd_lun		*un;
20128 
20129 	ASSERT(ssc != NULL);
20130 
20131 	un = ssc->ssc_un;
20132 	ASSERT(un != NULL);
20133 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20134 	ASSERT(capp != NULL);
20135 	ASSERT(lbap != NULL);
20136 
20137 	SD_TRACE(SD_LOG_IO, un,
20138 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
20139 
20140 	/*
20141 	 * First send a READ_CAPACITY command to the target.
20142 	 * (This command is mandatory under SCSI-2.)
20143 	 *
20144 	 * Set up the CDB for the READ_CAPACITY command.  The Partial
20145 	 * Medium Indicator bit is cleared.  The address field must be
20146 	 * zero if the PMI bit is zero.
20147 	 */
20148 	bzero(&cdb, sizeof (cdb));
20149 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20150 
20151 	capacity_buf = kmem_zalloc(SD_CAPACITY_SIZE, KM_SLEEP);
20152 
20153 	cdb.scc_cmd = SCMD_READ_CAPACITY;
20154 
20155 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20156 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
20157 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity_buf;
20158 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_SIZE;
20159 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20160 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20161 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20162 	ucmd_buf.uscsi_timeout	= 60;
20163 
20164 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20165 	    UIO_SYSSPACE, path_flag);
20166 
20167 	switch (status) {
20168 	case 0:
20169 		/* Return failure if we did not get valid capacity data. */
20170 		if (ucmd_buf.uscsi_resid != 0) {
20171 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20172 			    "sd_send_scsi_READ_CAPACITY received invalid "
20173 			    "capacity data");
20174 			kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20175 			return (EIO);
20176 		}
20177 		/*
20178 		 * Read capacity and block size from the READ CAPACITY 10 data.
20179 		 * This data may be adjusted later due to device specific
20180 		 * issues.
20181 		 *
20182 		 * According to the SCSI spec, the READ CAPACITY 10
20183 		 * command returns the following:
20184 		 *
20185 		 *  bytes 0-3: Maximum logical block address available.
20186 		 *		(MSB in byte:0 & LSB in byte:3)
20187 		 *
20188 		 *  bytes 4-7: Block length in bytes
20189 		 *		(MSB in byte:4 & LSB in byte:7)
20190 		 *
20191 		 */
20192 		capacity = BE_32(capacity_buf[0]);
20193 		lbasize = BE_32(capacity_buf[1]);
20194 
20195 		/*
20196 		 * Done with capacity_buf
20197 		 */
20198 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20199 
20200 		/*
20201 		 * if the reported capacity is set to all 0xf's, then
20202 		 * this disk is too large and requires SBC-2 commands.
20203 		 * Reissue the request using READ CAPACITY 16.
20204 		 */
20205 		if (capacity == 0xffffffff) {
20206 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20207 			status = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity,
20208 			    &lbasize, &pbsize, path_flag);
20209 			if (status != 0) {
20210 				return (status);
20211 			} else {
20212 				goto rc16_done;
20213 			}
20214 		}
20215 		break;	/* Success! */
20216 	case EIO:
20217 		switch (ucmd_buf.uscsi_status) {
20218 		case STATUS_RESERVATION_CONFLICT:
20219 			status = EACCES;
20220 			break;
20221 		case STATUS_CHECK:
20222 			/*
20223 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
20224 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
20225 			 */
20226 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20227 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
20228 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
20229 				kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20230 				return (EAGAIN);
20231 			}
20232 			break;
20233 		default:
20234 			break;
20235 		}
20236 		/* FALLTHRU */
20237 	default:
20238 		kmem_free(capacity_buf, SD_CAPACITY_SIZE);
20239 		return (status);
20240 	}
20241 
20242 	/*
20243 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
20244 	 * (2352 and 0 are common) so for these devices always force the value
20245 	 * to 2048 as required by the ATAPI specs.
20246 	 */
20247 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
20248 		lbasize = 2048;
20249 	}
20250 
20251 	/*
20252 	 * Get the maximum LBA value from the READ CAPACITY data.
20253 	 * Here we assume that the Partial Medium Indicator (PMI) bit
20254 	 * was cleared when issuing the command. This means that the LBA
20255 	 * returned from the device is the LBA of the last logical block
20256 	 * on the logical unit.  The actual logical block count will be
20257 	 * this value plus one.
20258 	 */
20259 	capacity += 1;
20260 
20261 	/*
20262 	 * Currently, for removable media, the capacity is saved in terms
20263 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
20264 	 */
20265 	if (un->un_f_has_removable_media)
20266 		capacity *= (lbasize / un->un_sys_blocksize);
20267 
20268 rc16_done:
20269 
20270 	/*
20271 	 * Copy the values from the READ CAPACITY command into the space
20272 	 * provided by the caller.
20273 	 */
20274 	*capp = capacity;
20275 	*lbap = lbasize;
20276 
20277 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY: "
20278 	    "capacity:0x%llx  lbasize:0x%x\n", capacity, lbasize);
20279 
20280 	/*
20281 	 * Both the lbasize and capacity from the device must be nonzero,
20282 	 * otherwise we assume that the values are not valid and return
20283 	 * failure to the caller. (4203735)
20284 	 */
20285 	if ((capacity == 0) || (lbasize == 0)) {
20286 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20287 		    "sd_send_scsi_READ_CAPACITY received invalid value "
20288 		    "capacity %llu lbasize %d", capacity, lbasize);
20289 		return (EIO);
20290 	}
20291 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20292 	return (0);
20293 }
20294 
20295 /*
20296  *    Function: sd_send_scsi_READ_CAPACITY_16
20297  *
20298  * Description: This routine uses the scsi READ CAPACITY 16 command to
20299  *		determine the device capacity in number of blocks and the
20300  *		device native block size.  If this function returns a failure,
20301  *		then the values in *capp and *lbap are undefined.
20302  *		This routine should be called by sd_send_scsi_READ_CAPACITY
20303  *              which will apply any device specific adjustments to capacity
20304  *              and lbasize. One exception is it is also called by
20305  *              sd_get_media_info_ext. In that function, there is no need to
20306  *              adjust the capacity and lbasize.
20307  *
20308  *   Arguments: ssc   - ssc contains ptr to soft state struct for the target
20309  *		capp - ptr to unsigned 64-bit variable to receive the
20310  *			capacity value from the command.
20311  *		lbap - ptr to unsigned 32-bit varaible to receive the
20312  *			block size value from the command
20313  *              psp  - ptr to unsigned 32-bit variable to receive the
20314  *                      physical block size value from the command
20315  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20316  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20317  *			to use the USCSI "direct" chain and bypass the normal
20318  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when
20319  *			this command is issued as part of an error recovery
20320  *			action.
20321  *
20322  * Return Code: 0   - Success
20323  *		EIO - IO error
20324  *		EACCES - Reservation conflict detected
20325  *		EAGAIN - Device is becoming ready
20326  *		errno return code from sd_ssc_send()
20327  *
20328  *     Context: Can sleep.  Blocks until command completes.
20329  */
20330 
20331 #define	SD_CAPACITY_16_SIZE	sizeof (struct scsi_capacity_16)
20332 
20333 static int
20334 sd_send_scsi_READ_CAPACITY_16(sd_ssc_t *ssc, uint64_t *capp, uint32_t *lbap,
20335     uint32_t *psp, int path_flag)
20336 {
20337 	struct	scsi_extended_sense	sense_buf;
20338 	struct	uscsi_cmd	ucmd_buf;
20339 	union	scsi_cdb	cdb;
20340 	uint64_t		*capacity16_buf;
20341 	uint64_t		capacity;
20342 	uint32_t		lbasize;
20343 	uint32_t		pbsize;
20344 	uint32_t		lbpb_exp;
20345 	int			status;
20346 	struct sd_lun		*un;
20347 
20348 	ASSERT(ssc != NULL);
20349 
20350 	un = ssc->ssc_un;
20351 	ASSERT(un != NULL);
20352 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20353 	ASSERT(capp != NULL);
20354 	ASSERT(lbap != NULL);
20355 
20356 	SD_TRACE(SD_LOG_IO, un,
20357 	    "sd_send_scsi_READ_CAPACITY: entry: un:0x%p\n", un);
20358 
20359 	/*
20360 	 * First send a READ_CAPACITY_16 command to the target.
20361 	 *
20362 	 * Set up the CDB for the READ_CAPACITY_16 command.  The Partial
20363 	 * Medium Indicator bit is cleared.  The address field must be
20364 	 * zero if the PMI bit is zero.
20365 	 */
20366 	bzero(&cdb, sizeof (cdb));
20367 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20368 
20369 	capacity16_buf = kmem_zalloc(SD_CAPACITY_16_SIZE, KM_SLEEP);
20370 
20371 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20372 	ucmd_buf.uscsi_cdblen	= CDB_GROUP4;
20373 	ucmd_buf.uscsi_bufaddr	= (caddr_t)capacity16_buf;
20374 	ucmd_buf.uscsi_buflen	= SD_CAPACITY_16_SIZE;
20375 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20376 	ucmd_buf.uscsi_rqlen	= sizeof (sense_buf);
20377 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
20378 	ucmd_buf.uscsi_timeout	= 60;
20379 
20380 	/*
20381 	 * Read Capacity (16) is a Service Action In command.  One
20382 	 * command byte (0x9E) is overloaded for multiple operations,
20383 	 * with the second CDB byte specifying the desired operation
20384 	 */
20385 	cdb.scc_cmd = SCMD_SVC_ACTION_IN_G4;
20386 	cdb.cdb_opaque[1] = SSVC_ACTION_READ_CAPACITY_G4;
20387 
20388 	/*
20389 	 * Fill in allocation length field
20390 	 */
20391 	FORMG4COUNT(&cdb, ucmd_buf.uscsi_buflen);
20392 
20393 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20394 	    UIO_SYSSPACE, path_flag);
20395 
20396 	switch (status) {
20397 	case 0:
20398 		/* Return failure if we did not get valid capacity data. */
20399 		if (ucmd_buf.uscsi_resid > 20) {
20400 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20401 			    "sd_send_scsi_READ_CAPACITY_16 received invalid "
20402 			    "capacity data");
20403 			kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20404 			return (EIO);
20405 		}
20406 
20407 		/*
20408 		 * Read capacity and block size from the READ CAPACITY 16 data.
20409 		 * This data may be adjusted later due to device specific
20410 		 * issues.
20411 		 *
20412 		 * According to the SCSI spec, the READ CAPACITY 16
20413 		 * command returns the following:
20414 		 *
20415 		 *  bytes 0-7: Maximum logical block address available.
20416 		 *		(MSB in byte:0 & LSB in byte:7)
20417 		 *
20418 		 *  bytes 8-11: Block length in bytes
20419 		 *		(MSB in byte:8 & LSB in byte:11)
20420 		 *
20421 		 *  byte 13: LOGICAL BLOCKS PER PHYSICAL BLOCK EXPONENT
20422 		 *
20423 		 *  byte 14:
20424 		 *	bit 7: Thin-Provisioning Enabled
20425 		 *	bit 6: Thin-Provisioning Read Zeros
20426 		 */
20427 		capacity = BE_64(capacity16_buf[0]);
20428 		lbasize = BE_32(*(uint32_t *)&capacity16_buf[1]);
20429 		lbpb_exp = (BE_64(capacity16_buf[1]) >> 16) & 0x0f;
20430 
20431 		un->un_thin_flags = 0;
20432 		if (((uint8_t *)capacity16_buf)[14] & (1 << 7))
20433 			un->un_thin_flags |= SD_THIN_PROV_ENABLED;
20434 		if (((uint8_t *)capacity16_buf)[14] & (1 << 6))
20435 			un->un_thin_flags |= SD_THIN_PROV_READ_ZEROS;
20436 
20437 		pbsize = lbasize << lbpb_exp;
20438 
20439 		/*
20440 		 * Done with capacity16_buf
20441 		 */
20442 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20443 
20444 		/*
20445 		 * if the reported capacity is set to all 0xf's, then
20446 		 * this disk is too large.  This could only happen with
20447 		 * a device that supports LBAs larger than 64 bits which
20448 		 * are not defined by any current T10 standards.
20449 		 */
20450 		if (capacity == 0xffffffffffffffff) {
20451 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20452 			    "disk is too large");
20453 			return (EIO);
20454 		}
20455 		break;	/* Success! */
20456 	case EIO:
20457 		switch (ucmd_buf.uscsi_status) {
20458 		case STATUS_RESERVATION_CONFLICT:
20459 			status = EACCES;
20460 			break;
20461 		case STATUS_CHECK:
20462 			/*
20463 			 * Check condition; look for ASC/ASCQ of 0x04/0x01
20464 			 * (LOGICAL UNIT IS IN PROCESS OF BECOMING READY)
20465 			 */
20466 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20467 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x04) &&
20468 			    (scsi_sense_ascq((uint8_t *)&sense_buf) == 0x01)) {
20469 				kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20470 				return (EAGAIN);
20471 			}
20472 			break;
20473 		default:
20474 			break;
20475 		}
20476 		/* FALLTHRU */
20477 	default:
20478 		kmem_free(capacity16_buf, SD_CAPACITY_16_SIZE);
20479 		return (status);
20480 	}
20481 
20482 	/*
20483 	 * Some ATAPI CD-ROM drives report inaccurate LBA size values
20484 	 * (2352 and 0 are common) so for these devices always force the value
20485 	 * to 2048 as required by the ATAPI specs.
20486 	 */
20487 	if ((un->un_f_cfg_is_atapi == TRUE) && (ISCD(un))) {
20488 		lbasize = 2048;
20489 	}
20490 
20491 	/*
20492 	 * Get the maximum LBA value from the READ CAPACITY 16 data.
20493 	 * Here we assume that the Partial Medium Indicator (PMI) bit
20494 	 * was cleared when issuing the command. This means that the LBA
20495 	 * returned from the device is the LBA of the last logical block
20496 	 * on the logical unit.  The actual logical block count will be
20497 	 * this value plus one.
20498 	 */
20499 	capacity += 1;
20500 
20501 	/*
20502 	 * Currently, for removable media, the capacity is saved in terms
20503 	 * of un->un_sys_blocksize, so scale the capacity value to reflect this.
20504 	 */
20505 	if (un->un_f_has_removable_media)
20506 		capacity *= (lbasize / un->un_sys_blocksize);
20507 
20508 	*capp = capacity;
20509 	*lbap = lbasize;
20510 	*psp = pbsize;
20511 
20512 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_READ_CAPACITY_16: "
20513 	    "capacity:0x%llx  lbasize:0x%x, pbsize: 0x%x\n",
20514 	    capacity, lbasize, pbsize);
20515 
20516 	if ((capacity == 0) || (lbasize == 0) || (pbsize == 0)) {
20517 		sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
20518 		    "sd_send_scsi_READ_CAPACITY_16 received invalid value "
20519 		    "capacity %llu lbasize %d pbsize %d", capacity, lbasize);
20520 		return (EIO);
20521 	}
20522 
20523 	sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20524 	return (0);
20525 }
20526 
20527 
20528 /*
20529  *    Function: sd_send_scsi_START_STOP_UNIT
20530  *
20531  * Description: Issue a scsi START STOP UNIT command to the target.
20532  *
20533  *   Arguments: ssc    - ssc contatins pointer to driver soft state (unit)
20534  *                       structure for this target.
20535  *      pc_flag - SD_POWER_CONDITION
20536  *                SD_START_STOP
20537  *		flag  - SD_TARGET_START
20538  *			SD_TARGET_STOP
20539  *			SD_TARGET_EJECT
20540  *			SD_TARGET_CLOSE
20541  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
20542  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
20543  *			to use the USCSI "direct" chain and bypass the normal
20544  *			command waitq. SD_PATH_DIRECT_PRIORITY is used when this
20545  *			command is issued as part of an error recovery action.
20546  *
20547  * Return Code: 0   - Success
20548  *		EIO - IO error
20549  *		EACCES - Reservation conflict detected
20550  *		ENXIO  - Not Ready, medium not present
20551  *		errno return code from sd_ssc_send()
20552  *
20553  *     Context: Can sleep.
20554  */
20555 
20556 static int
20557 sd_send_scsi_START_STOP_UNIT(sd_ssc_t *ssc, int pc_flag, int flag,
20558     int path_flag)
20559 {
20560 	struct	scsi_extended_sense	sense_buf;
20561 	union scsi_cdb		cdb;
20562 	struct uscsi_cmd	ucmd_buf;
20563 	int			status;
20564 	struct sd_lun		*un;
20565 
20566 	ASSERT(ssc != NULL);
20567 	un = ssc->ssc_un;
20568 	ASSERT(un != NULL);
20569 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20570 
20571 	SD_TRACE(SD_LOG_IO, un,
20572 	    "sd_send_scsi_START_STOP_UNIT: entry: un:0x%p\n", un);
20573 
20574 	if (un->un_f_check_start_stop &&
20575 	    (pc_flag == SD_START_STOP) &&
20576 	    ((flag == SD_TARGET_START) || (flag == SD_TARGET_STOP)) &&
20577 	    (un->un_f_start_stop_supported != TRUE)) {
20578 		return (0);
20579 	}
20580 
20581 	/*
20582 	 * If we are performing an eject operation and
20583 	 * we receive any command other than SD_TARGET_EJECT
20584 	 * we should immediately return.
20585 	 */
20586 	if (flag != SD_TARGET_EJECT) {
20587 		mutex_enter(SD_MUTEX(un));
20588 		if (un->un_f_ejecting == TRUE) {
20589 			mutex_exit(SD_MUTEX(un));
20590 			return (EAGAIN);
20591 		}
20592 		mutex_exit(SD_MUTEX(un));
20593 	}
20594 
20595 	bzero(&cdb, sizeof (cdb));
20596 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20597 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20598 
20599 	cdb.scc_cmd = SCMD_START_STOP;
20600 	cdb.cdb_opaque[4] = (pc_flag == SD_POWER_CONDITION) ?
20601 	    (uchar_t)(flag << 4) : (uchar_t)flag;
20602 
20603 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20604 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20605 	ucmd_buf.uscsi_bufaddr	= NULL;
20606 	ucmd_buf.uscsi_buflen	= 0;
20607 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20608 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20609 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20610 	ucmd_buf.uscsi_timeout	= 200;
20611 
20612 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20613 	    UIO_SYSSPACE, path_flag);
20614 
20615 	switch (status) {
20616 	case 0:
20617 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20618 		break;	/* Success! */
20619 	case EIO:
20620 		switch (ucmd_buf.uscsi_status) {
20621 		case STATUS_RESERVATION_CONFLICT:
20622 			status = EACCES;
20623 			break;
20624 		case STATUS_CHECK:
20625 			if (ucmd_buf.uscsi_rqstatus == STATUS_GOOD) {
20626 				switch (scsi_sense_key(
20627 				    (uint8_t *)&sense_buf)) {
20628 				case KEY_ILLEGAL_REQUEST:
20629 					status = ENOTSUP;
20630 					break;
20631 				case KEY_NOT_READY:
20632 					if (scsi_sense_asc(
20633 					    (uint8_t *)&sense_buf)
20634 					    == 0x3A) {
20635 						status = ENXIO;
20636 					}
20637 					break;
20638 				default:
20639 					break;
20640 				}
20641 			}
20642 			break;
20643 		default:
20644 			break;
20645 		}
20646 		break;
20647 	default:
20648 		break;
20649 	}
20650 
20651 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_START_STOP_UNIT: exit\n");
20652 
20653 	return (status);
20654 }
20655 
20656 
20657 /*
20658  *    Function: sd_start_stop_unit_callback
20659  *
20660  * Description: timeout(9F) callback to begin recovery process for a
20661  *		device that has spun down.
20662  *
20663  *   Arguments: arg - pointer to associated softstate struct.
20664  *
20665  *     Context: Executes in a timeout(9F) thread context
20666  */
20667 
20668 static void
20669 sd_start_stop_unit_callback(void *arg)
20670 {
20671 	struct sd_lun	*un = arg;
20672 	ASSERT(un != NULL);
20673 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20674 
20675 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_callback: entry\n");
20676 
20677 	(void) taskq_dispatch(sd_tq, sd_start_stop_unit_task, un, KM_NOSLEEP);
20678 }
20679 
20680 
20681 /*
20682  *    Function: sd_start_stop_unit_task
20683  *
20684  * Description: Recovery procedure when a drive is spun down.
20685  *
20686  *   Arguments: arg - pointer to associated softstate struct.
20687  *
20688  *     Context: Executes in a taskq() thread context
20689  */
20690 
20691 static void
20692 sd_start_stop_unit_task(void *arg)
20693 {
20694 	struct sd_lun	*un = arg;
20695 	sd_ssc_t	*ssc;
20696 	int		power_level;
20697 	int		rval;
20698 
20699 	ASSERT(un != NULL);
20700 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20701 
20702 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: entry\n");
20703 
20704 	/*
20705 	 * Some unformatted drives report not ready error, no need to
20706 	 * restart if format has been initiated.
20707 	 */
20708 	mutex_enter(SD_MUTEX(un));
20709 	if (un->un_f_format_in_progress == TRUE) {
20710 		mutex_exit(SD_MUTEX(un));
20711 		return;
20712 	}
20713 	mutex_exit(SD_MUTEX(un));
20714 
20715 	ssc = sd_ssc_init(un);
20716 	/*
20717 	 * When a START STOP command is issued from here, it is part of a
20718 	 * failure recovery operation and must be issued before any other
20719 	 * commands, including any pending retries. Thus it must be sent
20720 	 * using SD_PATH_DIRECT_PRIORITY. It doesn't matter if the spin up
20721 	 * succeeds or not, we will start I/O after the attempt.
20722 	 * If power condition is supported and the current power level
20723 	 * is capable of performing I/O, we should set the power condition
20724 	 * to that level. Otherwise, set the power condition to ACTIVE.
20725 	 */
20726 	if (un->un_f_power_condition_supported) {
20727 		mutex_enter(SD_MUTEX(un));
20728 		ASSERT(SD_PM_IS_LEVEL_VALID(un, un->un_power_level));
20729 		power_level = sd_pwr_pc.ran_perf[un->un_power_level]
20730 		    > 0 ? un->un_power_level : SD_SPINDLE_ACTIVE;
20731 		mutex_exit(SD_MUTEX(un));
20732 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_POWER_CONDITION,
20733 		    sd_pl2pc[power_level], SD_PATH_DIRECT_PRIORITY);
20734 	} else {
20735 		rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
20736 		    SD_TARGET_START, SD_PATH_DIRECT_PRIORITY);
20737 	}
20738 
20739 	if (rval != 0)
20740 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
20741 	sd_ssc_fini(ssc);
20742 	/*
20743 	 * The above call blocks until the START_STOP_UNIT command completes.
20744 	 * Now that it has completed, we must re-try the original IO that
20745 	 * received the NOT READY condition in the first place. There are
20746 	 * three possible conditions here:
20747 	 *
20748 	 *  (1) The original IO is on un_retry_bp.
20749 	 *  (2) The original IO is on the regular wait queue, and un_retry_bp
20750 	 *	is NULL.
20751 	 *  (3) The original IO is on the regular wait queue, and un_retry_bp
20752 	 *	points to some other, unrelated bp.
20753 	 *
20754 	 * For each case, we must call sd_start_cmds() with un_retry_bp
20755 	 * as the argument. If un_retry_bp is NULL, this will initiate
20756 	 * processing of the regular wait queue.  If un_retry_bp is not NULL,
20757 	 * then this will process the bp on un_retry_bp. That may or may not
20758 	 * be the original IO, but that does not matter: the important thing
20759 	 * is to keep the IO processing going at this point.
20760 	 *
20761 	 * Note: This is a very specific error recovery sequence associated
20762 	 * with a drive that is not spun up. We attempt a START_STOP_UNIT and
20763 	 * serialize the I/O with completion of the spin-up.
20764 	 */
20765 	mutex_enter(SD_MUTEX(un));
20766 	SD_TRACE(SD_LOG_IO_CORE | SD_LOG_ERROR, un,
20767 	    "sd_start_stop_unit_task: un:0x%p starting bp:0x%p\n",
20768 	    un, un->un_retry_bp);
20769 	un->un_startstop_timeid = NULL;	/* Timeout is no longer pending */
20770 	sd_start_cmds(un, un->un_retry_bp);
20771 	mutex_exit(SD_MUTEX(un));
20772 
20773 	SD_TRACE(SD_LOG_IO, un, "sd_start_stop_unit_task: exit\n");
20774 }
20775 
20776 
20777 /*
20778  *    Function: sd_send_scsi_INQUIRY
20779  *
20780  * Description: Issue the scsi INQUIRY command.
20781  *
20782  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20783  *                      structure for this target.
20784  *		bufaddr
20785  *		buflen
20786  *		evpd
20787  *		page_code
20788  *		page_length
20789  *
20790  * Return Code: 0   - Success
20791  *		errno return code from sd_ssc_send()
20792  *
20793  *     Context: Can sleep. Does not return until command is completed.
20794  */
20795 
20796 static int
20797 sd_send_scsi_INQUIRY(sd_ssc_t *ssc, uchar_t *bufaddr, size_t buflen,
20798     uchar_t evpd, uchar_t page_code, size_t *residp)
20799 {
20800 	union scsi_cdb		cdb;
20801 	struct uscsi_cmd	ucmd_buf;
20802 	int			status;
20803 	struct sd_lun		*un;
20804 
20805 	ASSERT(ssc != NULL);
20806 	un = ssc->ssc_un;
20807 	ASSERT(un != NULL);
20808 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20809 	ASSERT(bufaddr != NULL);
20810 
20811 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: entry: un:0x%p\n", un);
20812 
20813 	bzero(&cdb, sizeof (cdb));
20814 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20815 	bzero(bufaddr, buflen);
20816 
20817 	cdb.scc_cmd = SCMD_INQUIRY;
20818 	cdb.cdb_opaque[1] = evpd;
20819 	cdb.cdb_opaque[2] = page_code;
20820 	FORMG0COUNT(&cdb, buflen);
20821 
20822 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20823 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20824 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
20825 	ucmd_buf.uscsi_buflen	= buflen;
20826 	ucmd_buf.uscsi_rqbuf	= NULL;
20827 	ucmd_buf.uscsi_rqlen	= 0;
20828 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
20829 	ucmd_buf.uscsi_timeout	= 200;	/* Excessive legacy value */
20830 
20831 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20832 	    UIO_SYSSPACE, SD_PATH_DIRECT);
20833 
20834 	/*
20835 	 * Only handle status == 0, the upper-level caller
20836 	 * will put different assessment based on the context.
20837 	 */
20838 	if (status == 0)
20839 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20840 
20841 	if ((status == 0) && (residp != NULL)) {
20842 		*residp = ucmd_buf.uscsi_resid;
20843 	}
20844 
20845 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_INQUIRY: exit\n");
20846 
20847 	return (status);
20848 }
20849 
20850 
20851 /*
20852  *    Function: sd_send_scsi_TEST_UNIT_READY
20853  *
20854  * Description: Issue the scsi TEST UNIT READY command.
20855  *		This routine can be told to set the flag USCSI_DIAGNOSE to
20856  *		prevent retrying failed commands. Use this when the intent
20857  *		is either to check for device readiness, to clear a Unit
20858  *		Attention, or to clear any outstanding sense data.
20859  *		However under specific conditions the expected behavior
20860  *		is for retries to bring a device ready, so use the flag
20861  *		with caution.
20862  *
20863  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20864  *                      structure for this target.
20865  *		flag:   SD_CHECK_FOR_MEDIA: return ENXIO if no media present
20866  *			SD_DONT_RETRY_TUR: include uscsi flag USCSI_DIAGNOSE.
20867  *			0: dont check for media present, do retries on cmd.
20868  *
20869  * Return Code: 0   - Success
20870  *		EIO - IO error
20871  *		EACCES - Reservation conflict detected
20872  *		ENXIO  - Not Ready, medium not present
20873  *		errno return code from sd_ssc_send()
20874  *
20875  *     Context: Can sleep. Does not return until command is completed.
20876  */
20877 
20878 static int
20879 sd_send_scsi_TEST_UNIT_READY(sd_ssc_t *ssc, int flag)
20880 {
20881 	struct	scsi_extended_sense	sense_buf;
20882 	union scsi_cdb		cdb;
20883 	struct uscsi_cmd	ucmd_buf;
20884 	int			status;
20885 	struct sd_lun		*un;
20886 
20887 	ASSERT(ssc != NULL);
20888 	un = ssc->ssc_un;
20889 	ASSERT(un != NULL);
20890 	ASSERT(!mutex_owned(SD_MUTEX(un)));
20891 
20892 	SD_TRACE(SD_LOG_IO, un,
20893 	    "sd_send_scsi_TEST_UNIT_READY: entry: un:0x%p\n", un);
20894 
20895 	/*
20896 	 * Some Seagate elite1 TQ devices get hung with disconnect/reconnect
20897 	 * timeouts when they receive a TUR and the queue is not empty. Check
20898 	 * the configuration flag set during attach (indicating the drive has
20899 	 * this firmware bug) and un_ncmds_in_transport before issuing the
20900 	 * TUR. If there are
20901 	 * pending commands return success, this is a bit arbitrary but is ok
20902 	 * for non-removables (i.e. the eliteI disks) and non-clustering
20903 	 * configurations.
20904 	 */
20905 	if (un->un_f_cfg_tur_check == TRUE) {
20906 		mutex_enter(SD_MUTEX(un));
20907 		if (un->un_ncmds_in_transport != 0) {
20908 			mutex_exit(SD_MUTEX(un));
20909 			return (0);
20910 		}
20911 		mutex_exit(SD_MUTEX(un));
20912 	}
20913 
20914 	bzero(&cdb, sizeof (cdb));
20915 	bzero(&ucmd_buf, sizeof (ucmd_buf));
20916 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
20917 
20918 	cdb.scc_cmd = SCMD_TEST_UNIT_READY;
20919 
20920 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
20921 	ucmd_buf.uscsi_cdblen	= CDB_GROUP0;
20922 	ucmd_buf.uscsi_bufaddr	= NULL;
20923 	ucmd_buf.uscsi_buflen	= 0;
20924 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
20925 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
20926 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_SILENT;
20927 
20928 	/* Use flag USCSI_DIAGNOSE to prevent retries if it fails. */
20929 	if ((flag & SD_DONT_RETRY_TUR) != 0) {
20930 		ucmd_buf.uscsi_flags |= USCSI_DIAGNOSE;
20931 	}
20932 	ucmd_buf.uscsi_timeout	= 60;
20933 
20934 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
20935 	    UIO_SYSSPACE, ((flag & SD_BYPASS_PM) ? SD_PATH_DIRECT :
20936 	    SD_PATH_STANDARD));
20937 
20938 	switch (status) {
20939 	case 0:
20940 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
20941 		break;	/* Success! */
20942 	case EIO:
20943 		switch (ucmd_buf.uscsi_status) {
20944 		case STATUS_RESERVATION_CONFLICT:
20945 			status = EACCES;
20946 			break;
20947 		case STATUS_CHECK:
20948 			if ((flag & SD_CHECK_FOR_MEDIA) == 0) {
20949 				break;
20950 			}
20951 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
20952 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
20953 			    KEY_NOT_READY) &&
20954 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x3A)) {
20955 				status = ENXIO;
20956 			}
20957 			break;
20958 		default:
20959 			break;
20960 		}
20961 		break;
20962 	default:
20963 		break;
20964 	}
20965 
20966 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_TEST_UNIT_READY: exit\n");
20967 
20968 	return (status);
20969 }
20970 
20971 /*
20972  *    Function: sd_send_scsi_PERSISTENT_RESERVE_IN
20973  *
20974  * Description: Issue the scsi PERSISTENT RESERVE IN command.
20975  *
20976  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
20977  *                      structure for this target.
20978  *
20979  * Return Code: 0   - Success
20980  *		EACCES
20981  *		ENOTSUP
20982  *		errno return code from sd_ssc_send()
20983  *
20984  *     Context: Can sleep. Does not return until command is completed.
20985  */
20986 
20987 static int
20988 sd_send_scsi_PERSISTENT_RESERVE_IN(sd_ssc_t *ssc, uchar_t usr_cmd,
20989     uint16_t data_len, uchar_t *data_bufp)
20990 {
20991 	struct scsi_extended_sense	sense_buf;
20992 	union scsi_cdb		cdb;
20993 	struct uscsi_cmd	ucmd_buf;
20994 	int			status;
20995 	int			no_caller_buf = FALSE;
20996 	struct sd_lun		*un;
20997 
20998 	ASSERT(ssc != NULL);
20999 	un = ssc->ssc_un;
21000 	ASSERT(un != NULL);
21001 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21002 	ASSERT((usr_cmd == SD_READ_KEYS) || (usr_cmd == SD_READ_RESV));
21003 
21004 	SD_TRACE(SD_LOG_IO, un,
21005 	    "sd_send_scsi_PERSISTENT_RESERVE_IN: entry: un:0x%p\n", un);
21006 
21007 	bzero(&cdb, sizeof (cdb));
21008 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21009 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21010 	if (data_bufp == NULL) {
21011 		/* Allocate a default buf if the caller did not give one */
21012 		ASSERT(data_len == 0);
21013 		data_len  = MHIOC_RESV_KEY_SIZE;
21014 		data_bufp = kmem_zalloc(MHIOC_RESV_KEY_SIZE, KM_SLEEP);
21015 		no_caller_buf = TRUE;
21016 	}
21017 
21018 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_IN;
21019 	cdb.cdb_opaque[1] = usr_cmd;
21020 	FORMG1COUNT(&cdb, data_len);
21021 
21022 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21023 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21024 	ucmd_buf.uscsi_bufaddr	= (caddr_t)data_bufp;
21025 	ucmd_buf.uscsi_buflen	= data_len;
21026 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21027 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21028 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21029 	ucmd_buf.uscsi_timeout	= 60;
21030 
21031 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21032 	    UIO_SYSSPACE, SD_PATH_STANDARD);
21033 
21034 	switch (status) {
21035 	case 0:
21036 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21037 
21038 		break;	/* Success! */
21039 	case EIO:
21040 		switch (ucmd_buf.uscsi_status) {
21041 		case STATUS_RESERVATION_CONFLICT:
21042 			status = EACCES;
21043 			break;
21044 		case STATUS_CHECK:
21045 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21046 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21047 			    KEY_ILLEGAL_REQUEST)) {
21048 				status = ENOTSUP;
21049 			}
21050 			break;
21051 		default:
21052 			break;
21053 		}
21054 		break;
21055 	default:
21056 		break;
21057 	}
21058 
21059 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_IN: exit\n");
21060 
21061 	if (no_caller_buf == TRUE) {
21062 		kmem_free(data_bufp, data_len);
21063 	}
21064 
21065 	return (status);
21066 }
21067 
21068 
21069 /*
21070  *    Function: sd_send_scsi_PERSISTENT_RESERVE_OUT
21071  *
21072  * Description: This routine is the driver entry point for handling CD-ROM
21073  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS,
21074  *		MHIOCGRP_INRESV) by sending the SCSI-3 PROUT commands to the
21075  *		device.
21076  *
21077  *   Arguments: ssc  -  ssc contains un - pointer to soft state struct
21078  *                      for the target.
21079  *		usr_cmd SCSI-3 reservation facility command (one of
21080  *			SD_SCSI3_REGISTER, SD_SCSI3_RESERVE, SD_SCSI3_RELEASE,
21081  *			SD_SCSI3_PREEMPTANDABORT, SD_SCSI3_CLEAR)
21082  *		usr_bufp - user provided pointer register, reserve descriptor or
21083  *			preempt and abort structure (mhioc_register_t,
21084  *                      mhioc_resv_desc_t, mhioc_preemptandabort_t)
21085  *
21086  * Return Code: 0   - Success
21087  *		EACCES
21088  *		ENOTSUP
21089  *		errno return code from sd_ssc_send()
21090  *
21091  *     Context: Can sleep. Does not return until command is completed.
21092  */
21093 
21094 static int
21095 sd_send_scsi_PERSISTENT_RESERVE_OUT(sd_ssc_t *ssc, uchar_t usr_cmd,
21096     uchar_t *usr_bufp)
21097 {
21098 	struct scsi_extended_sense	sense_buf;
21099 	union scsi_cdb		cdb;
21100 	struct uscsi_cmd	ucmd_buf;
21101 	int			status;
21102 	uchar_t			data_len = sizeof (sd_prout_t);
21103 	sd_prout_t		*prp;
21104 	struct sd_lun		*un;
21105 
21106 	ASSERT(ssc != NULL);
21107 	un = ssc->ssc_un;
21108 	ASSERT(un != NULL);
21109 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21110 	ASSERT(data_len == 24);	/* required by scsi spec */
21111 
21112 	SD_TRACE(SD_LOG_IO, un,
21113 	    "sd_send_scsi_PERSISTENT_RESERVE_OUT: entry: un:0x%p\n", un);
21114 
21115 	if (usr_bufp == NULL) {
21116 		return (EINVAL);
21117 	}
21118 
21119 	bzero(&cdb, sizeof (cdb));
21120 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21121 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21122 	prp = kmem_zalloc(data_len, KM_SLEEP);
21123 
21124 	cdb.scc_cmd = SCMD_PERSISTENT_RESERVE_OUT;
21125 	cdb.cdb_opaque[1] = usr_cmd;
21126 	FORMG1COUNT(&cdb, data_len);
21127 
21128 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21129 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
21130 	ucmd_buf.uscsi_bufaddr	= (caddr_t)prp;
21131 	ucmd_buf.uscsi_buflen	= data_len;
21132 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21133 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21134 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
21135 	ucmd_buf.uscsi_timeout	= 60;
21136 
21137 	switch (usr_cmd) {
21138 	case SD_SCSI3_REGISTER: {
21139 		mhioc_register_t *ptr = (mhioc_register_t *)usr_bufp;
21140 
21141 		bcopy(ptr->oldkey.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21142 		bcopy(ptr->newkey.key, prp->service_key,
21143 		    MHIOC_RESV_KEY_SIZE);
21144 		prp->aptpl = ptr->aptpl;
21145 		break;
21146 	}
21147 	case SD_SCSI3_CLEAR: {
21148 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
21149 
21150 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21151 		break;
21152 	}
21153 	case SD_SCSI3_RESERVE:
21154 	case SD_SCSI3_RELEASE: {
21155 		mhioc_resv_desc_t *ptr = (mhioc_resv_desc_t *)usr_bufp;
21156 
21157 		bcopy(ptr->key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21158 		prp->scope_address = BE_32(ptr->scope_specific_addr);
21159 		cdb.cdb_opaque[2] = ptr->type;
21160 		break;
21161 	}
21162 	case SD_SCSI3_PREEMPTANDABORT: {
21163 		mhioc_preemptandabort_t *ptr =
21164 		    (mhioc_preemptandabort_t *)usr_bufp;
21165 
21166 		bcopy(ptr->resvdesc.key.key, prp->res_key, MHIOC_RESV_KEY_SIZE);
21167 		bcopy(ptr->victim_key.key, prp->service_key,
21168 		    MHIOC_RESV_KEY_SIZE);
21169 		prp->scope_address = BE_32(ptr->resvdesc.scope_specific_addr);
21170 		cdb.cdb_opaque[2] = ptr->resvdesc.type;
21171 		ucmd_buf.uscsi_flags |= USCSI_HEAD;
21172 		break;
21173 	}
21174 	case SD_SCSI3_REGISTERANDIGNOREKEY:
21175 	{
21176 		mhioc_registerandignorekey_t *ptr;
21177 		ptr = (mhioc_registerandignorekey_t *)usr_bufp;
21178 		bcopy(ptr->newkey.key,
21179 		    prp->service_key, MHIOC_RESV_KEY_SIZE);
21180 		prp->aptpl = ptr->aptpl;
21181 		break;
21182 	}
21183 	default:
21184 		ASSERT(FALSE);
21185 		break;
21186 	}
21187 
21188 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21189 	    UIO_SYSSPACE, SD_PATH_STANDARD);
21190 
21191 	switch (status) {
21192 	case 0:
21193 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21194 		break;	/* Success! */
21195 	case EIO:
21196 		switch (ucmd_buf.uscsi_status) {
21197 		case STATUS_RESERVATION_CONFLICT:
21198 			status = EACCES;
21199 			break;
21200 		case STATUS_CHECK:
21201 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
21202 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
21203 			    KEY_ILLEGAL_REQUEST)) {
21204 				status = ENOTSUP;
21205 			}
21206 			break;
21207 		default:
21208 			break;
21209 		}
21210 		break;
21211 	default:
21212 		break;
21213 	}
21214 
21215 	kmem_free(prp, data_len);
21216 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_PERSISTENT_RESERVE_OUT: exit\n");
21217 	return (status);
21218 }
21219 
21220 
21221 /*
21222  *    Function: sd_send_scsi_SYNCHRONIZE_CACHE
21223  *
21224  * Description: Issues a scsi SYNCHRONIZE CACHE command to the target
21225  *
21226  *   Arguments: un - pointer to the target's soft state struct
21227  *              dkc - pointer to the callback structure
21228  *
21229  * Return Code: 0 - success
21230  *		errno-type error code
21231  *
21232  *     Context: kernel thread context only.
21233  *
21234  *  _______________________________________________________________
21235  * | dkc_flag &   | dkc_callback | DKIOCFLUSHWRITECACHE            |
21236  * |FLUSH_VOLATILE|              | operation                       |
21237  * |______________|______________|_________________________________|
21238  * | 0            | NULL         | Synchronous flush on both       |
21239  * |              |              | volatile and non-volatile cache |
21240  * |______________|______________|_________________________________|
21241  * | 1            | NULL         | Synchronous flush on volatile   |
21242  * |              |              | cache; disk drivers may suppress|
21243  * |              |              | flush if disk table indicates   |
21244  * |              |              | non-volatile cache              |
21245  * |______________|______________|_________________________________|
21246  * | 0            | !NULL        | Asynchronous flush on both      |
21247  * |              |              | volatile and non-volatile cache;|
21248  * |______________|______________|_________________________________|
21249  * | 1            | !NULL        | Asynchronous flush on volatile  |
21250  * |              |              | cache; disk drivers may suppress|
21251  * |              |              | flush if disk table indicates   |
21252  * |              |              | non-volatile cache              |
21253  * |______________|______________|_________________________________|
21254  *
21255  */
21256 
21257 static int
21258 sd_send_scsi_SYNCHRONIZE_CACHE(struct sd_lun *un, struct dk_callback *dkc)
21259 {
21260 	struct sd_uscsi_info	*uip;
21261 	struct uscsi_cmd	*uscmd;
21262 	union scsi_cdb		*cdb;
21263 	struct buf		*bp;
21264 	int			rval = 0;
21265 	int			is_async;
21266 
21267 	SD_TRACE(SD_LOG_IO, un,
21268 	    "sd_send_scsi_SYNCHRONIZE_CACHE: entry: un:0x%p\n", un);
21269 
21270 	ASSERT(un != NULL);
21271 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21272 
21273 	if (dkc == NULL || dkc->dkc_callback == NULL) {
21274 		is_async = FALSE;
21275 	} else {
21276 		is_async = TRUE;
21277 	}
21278 
21279 	mutex_enter(SD_MUTEX(un));
21280 	/* check whether cache flush should be suppressed */
21281 	if (un->un_f_suppress_cache_flush == TRUE) {
21282 		mutex_exit(SD_MUTEX(un));
21283 		/*
21284 		 * suppress the cache flush if the device is told to do
21285 		 * so by sd.conf or disk table
21286 		 */
21287 		SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_SYNCHRONIZE_CACHE: \
21288 		    skip the cache flush since suppress_cache_flush is %d!\n",
21289 		    un->un_f_suppress_cache_flush);
21290 
21291 		if (is_async == TRUE) {
21292 			/* invoke callback for asynchronous flush */
21293 			(*dkc->dkc_callback)(dkc->dkc_cookie, 0);
21294 		}
21295 		return (rval);
21296 	}
21297 	mutex_exit(SD_MUTEX(un));
21298 
21299 	/*
21300 	 * check dkc_flag & FLUSH_VOLATILE so SYNC_NV bit can be
21301 	 * set properly
21302 	 */
21303 	cdb = kmem_zalloc(CDB_GROUP1, KM_SLEEP);
21304 	cdb->scc_cmd = SCMD_SYNCHRONIZE_CACHE;
21305 
21306 	mutex_enter(SD_MUTEX(un));
21307 	if (dkc != NULL && un->un_f_sync_nv_supported &&
21308 	    (dkc->dkc_flag & FLUSH_VOLATILE)) {
21309 		/*
21310 		 * if the device supports SYNC_NV bit, turn on
21311 		 * the SYNC_NV bit to only flush volatile cache
21312 		 */
21313 		cdb->cdb_un.tag |= SD_SYNC_NV_BIT;
21314 	}
21315 	mutex_exit(SD_MUTEX(un));
21316 
21317 	/*
21318 	 * First get some memory for the uscsi_cmd struct and cdb
21319 	 * and initialize for SYNCHRONIZE_CACHE cmd.
21320 	 */
21321 	uscmd = kmem_zalloc(sizeof (struct uscsi_cmd), KM_SLEEP);
21322 	uscmd->uscsi_cdblen = CDB_GROUP1;
21323 	uscmd->uscsi_cdb = (caddr_t)cdb;
21324 	uscmd->uscsi_bufaddr = NULL;
21325 	uscmd->uscsi_buflen = 0;
21326 	uscmd->uscsi_rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
21327 	uscmd->uscsi_rqlen = SENSE_LENGTH;
21328 	uscmd->uscsi_rqresid = SENSE_LENGTH;
21329 	uscmd->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT;
21330 	uscmd->uscsi_timeout = sd_io_time;
21331 
21332 	/*
21333 	 * Allocate an sd_uscsi_info struct and fill it with the info
21334 	 * needed by sd_initpkt_for_uscsi().  Then put the pointer into
21335 	 * b_private in the buf for sd_initpkt_for_uscsi().  Note that
21336 	 * since we allocate the buf here in this function, we do not
21337 	 * need to preserve the prior contents of b_private.
21338 	 * The sd_uscsi_info struct is also used by sd_uscsi_strategy()
21339 	 */
21340 	uip = kmem_zalloc(sizeof (struct sd_uscsi_info), KM_SLEEP);
21341 	uip->ui_flags = SD_PATH_DIRECT;
21342 	uip->ui_cmdp  = uscmd;
21343 
21344 	bp = getrbuf(KM_SLEEP);
21345 	bp->b_private = uip;
21346 
21347 	/*
21348 	 * Setup buffer to carry uscsi request.
21349 	 */
21350 	bp->b_flags  = B_BUSY;
21351 	bp->b_bcount = 0;
21352 	bp->b_blkno  = 0;
21353 
21354 	if (is_async == TRUE) {
21355 		bp->b_iodone = sd_send_scsi_SYNCHRONIZE_CACHE_biodone;
21356 		uip->ui_dkc = *dkc;
21357 	}
21358 
21359 	bp->b_edev = SD_GET_DEV(un);
21360 	bp->b_dev = cmpdev(bp->b_edev);	/* maybe unnecessary? */
21361 
21362 	/*
21363 	 * Unset un_f_sync_cache_required flag
21364 	 */
21365 	mutex_enter(SD_MUTEX(un));
21366 	un->un_f_sync_cache_required = FALSE;
21367 	mutex_exit(SD_MUTEX(un));
21368 
21369 	(void) sd_uscsi_strategy(bp);
21370 
21371 	/*
21372 	 * If synchronous request, wait for completion
21373 	 * If async just return and let b_iodone callback
21374 	 * cleanup.
21375 	 * NOTE: On return, u_ncmds_in_driver will be decremented,
21376 	 * but it was also incremented in sd_uscsi_strategy(), so
21377 	 * we should be ok.
21378 	 */
21379 	if (is_async == FALSE) {
21380 		(void) biowait(bp);
21381 		rval = sd_send_scsi_SYNCHRONIZE_CACHE_biodone(bp);
21382 	}
21383 
21384 	return (rval);
21385 }
21386 
21387 
21388 static int
21389 sd_send_scsi_SYNCHRONIZE_CACHE_biodone(struct buf *bp)
21390 {
21391 	struct sd_uscsi_info *uip;
21392 	struct uscsi_cmd *uscmd;
21393 	uint8_t *sense_buf;
21394 	struct sd_lun *un;
21395 	int status;
21396 	union scsi_cdb *cdb;
21397 
21398 	uip = (struct sd_uscsi_info *)(bp->b_private);
21399 	ASSERT(uip != NULL);
21400 
21401 	uscmd = uip->ui_cmdp;
21402 	ASSERT(uscmd != NULL);
21403 
21404 	sense_buf = (uint8_t *)uscmd->uscsi_rqbuf;
21405 	ASSERT(sense_buf != NULL);
21406 
21407 	un = ddi_get_soft_state(sd_state, SD_GET_INSTANCE_FROM_BUF(bp));
21408 	ASSERT(un != NULL);
21409 
21410 	cdb = (union scsi_cdb *)uscmd->uscsi_cdb;
21411 
21412 	status = geterror(bp);
21413 	switch (status) {
21414 	case 0:
21415 		break;	/* Success! */
21416 	case EIO:
21417 		switch (uscmd->uscsi_status) {
21418 		case STATUS_RESERVATION_CONFLICT:
21419 			/* Ignore reservation conflict */
21420 			status = 0;
21421 			goto done;
21422 
21423 		case STATUS_CHECK:
21424 			if ((uscmd->uscsi_rqstatus == STATUS_GOOD) &&
21425 			    (scsi_sense_key(sense_buf) ==
21426 			    KEY_ILLEGAL_REQUEST)) {
21427 				/* Ignore Illegal Request error */
21428 				if (cdb->cdb_un.tag&SD_SYNC_NV_BIT) {
21429 					mutex_enter(SD_MUTEX(un));
21430 					un->un_f_sync_nv_supported = FALSE;
21431 					mutex_exit(SD_MUTEX(un));
21432 					status = 0;
21433 					SD_TRACE(SD_LOG_IO, un,
21434 					    "un_f_sync_nv_supported \
21435 					    is set to false.\n");
21436 					goto done;
21437 				}
21438 
21439 				mutex_enter(SD_MUTEX(un));
21440 				un->un_f_sync_cache_supported = FALSE;
21441 				mutex_exit(SD_MUTEX(un));
21442 				SD_TRACE(SD_LOG_IO, un,
21443 				    "sd_send_scsi_SYNCHRONIZE_CACHE_biodone: \
21444 				    un_f_sync_cache_supported set to false \
21445 				    with asc = %x, ascq = %x\n",
21446 				    scsi_sense_asc(sense_buf),
21447 				    scsi_sense_ascq(sense_buf));
21448 				status = ENOTSUP;
21449 				goto done;
21450 			}
21451 			break;
21452 		default:
21453 			break;
21454 		}
21455 		/* FALLTHRU */
21456 	default:
21457 		/*
21458 		 * Turn on the un_f_sync_cache_required flag
21459 		 * since the SYNC CACHE command failed
21460 		 */
21461 		mutex_enter(SD_MUTEX(un));
21462 		un->un_f_sync_cache_required = TRUE;
21463 		mutex_exit(SD_MUTEX(un));
21464 
21465 		/*
21466 		 * Don't log an error message if this device
21467 		 * has removable media.
21468 		 */
21469 		if (!un->un_f_has_removable_media) {
21470 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
21471 			    "SYNCHRONIZE CACHE command failed (%d)\n", status);
21472 		}
21473 		break;
21474 	}
21475 
21476 done:
21477 	if (uip->ui_dkc.dkc_callback != NULL) {
21478 		(*uip->ui_dkc.dkc_callback)(uip->ui_dkc.dkc_cookie, status);
21479 	}
21480 
21481 	ASSERT((bp->b_flags & B_REMAPPED) == 0);
21482 	freerbuf(bp);
21483 	kmem_free(uip, sizeof (struct sd_uscsi_info));
21484 	kmem_free(uscmd->uscsi_rqbuf, SENSE_LENGTH);
21485 	kmem_free(uscmd->uscsi_cdb, (size_t)uscmd->uscsi_cdblen);
21486 	kmem_free(uscmd, sizeof (struct uscsi_cmd));
21487 
21488 	return (status);
21489 }
21490 
21491 /*
21492  * Issues a single SCSI UNMAP command with a prepared UNMAP parameter list.
21493  * Returns zero on success, or the non-zero command error code on failure.
21494  */
21495 static int
21496 sd_send_scsi_UNMAP_issue_one(sd_ssc_t *ssc, unmap_param_hdr_t *uph,
21497     uint64_t num_descr, uint64_t bytes)
21498 {
21499 	struct sd_lun		*un = ssc->ssc_un;
21500 	struct scsi_extended_sense	sense_buf;
21501 	union scsi_cdb		cdb;
21502 	struct uscsi_cmd	ucmd_buf;
21503 	int			status;
21504 	const uint64_t		param_size = sizeof (unmap_param_hdr_t) +
21505 	    num_descr * sizeof (unmap_blk_descr_t);
21506 
21507 	ASSERT3U(param_size - 2, <=, UINT16_MAX);
21508 	uph->uph_data_len = BE_16(param_size - 2);
21509 	uph->uph_descr_data_len = BE_16(param_size - 8);
21510 
21511 	bzero(&cdb, sizeof (cdb));
21512 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21513 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21514 
21515 	cdb.scc_cmd = SCMD_UNMAP;
21516 	FORMG1COUNT(&cdb, param_size);
21517 
21518 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21519 	ucmd_buf.uscsi_cdblen	= (uchar_t)CDB_GROUP1;
21520 	ucmd_buf.uscsi_bufaddr	= (caddr_t)uph;
21521 	ucmd_buf.uscsi_buflen	= param_size;
21522 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21523 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21524 	ucmd_buf.uscsi_flags	= USCSI_WRITE | USCSI_RQENABLE | USCSI_SILENT;
21525 	ucmd_buf.uscsi_timeout	= un->un_cmd_timeout;
21526 
21527 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL, UIO_SYSSPACE,
21528 	    SD_PATH_STANDARD);
21529 
21530 	switch (status) {
21531 	case 0:
21532 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21533 
21534 		if (un->un_unmapstats) {
21535 			atomic_inc_64(&un->un_unmapstats->us_cmds.value.ui64);
21536 			atomic_add_64(&un->un_unmapstats->us_extents.value.ui64,
21537 			    num_descr);
21538 			atomic_add_64(&un->un_unmapstats->us_bytes.value.ui64,
21539 			    bytes);
21540 		}
21541 		break;	/* Success! */
21542 	case EIO:
21543 		if (un->un_unmapstats)
21544 			atomic_inc_64(&un->un_unmapstats->us_errs.value.ui64);
21545 		switch (ucmd_buf.uscsi_status) {
21546 		case STATUS_RESERVATION_CONFLICT:
21547 			status = EACCES;
21548 			break;
21549 		default:
21550 			break;
21551 		}
21552 		break;
21553 	default:
21554 		if (un->un_unmapstats)
21555 			atomic_inc_64(&un->un_unmapstats->us_errs.value.ui64);
21556 		break;
21557 	}
21558 
21559 	return (status);
21560 }
21561 
21562 /*
21563  * Returns a pointer to the i'th block descriptor inside an UNMAP param list.
21564  */
21565 static inline unmap_blk_descr_t *
21566 UNMAP_blk_descr_i(void *buf, size_t i)
21567 {
21568 	return ((unmap_blk_descr_t *)((uintptr_t)buf +
21569 	    sizeof (unmap_param_hdr_t) + (i * sizeof (unmap_blk_descr_t))));
21570 }
21571 
21572 /*
21573  * Takes the list of extents from sd_send_scsi_UNMAP, chops it up, prepares
21574  * UNMAP block descriptors and issues individual SCSI UNMAP commands. While
21575  * doing so we consult the block limits to determine at most how many
21576  * extents and LBAs we can UNMAP in one command.
21577  * If a command fails for whatever, reason, extent list processing is aborted
21578  * and the failed command's status is returned. Otherwise returns 0 on
21579  * success.
21580  */
21581 static int
21582 sd_send_scsi_UNMAP_issue(dev_t dev, sd_ssc_t *ssc, const dkioc_free_list_t *dfl)
21583 {
21584 	struct sd_lun		*un = ssc->ssc_un;
21585 	unmap_param_hdr_t	*uph;
21586 	sd_blk_limits_t		*lim = &un->un_blk_lim;
21587 	int			rval = 0;
21588 	int			partition;
21589 	/* partition offset & length in system blocks */
21590 	diskaddr_t		part_off_sysblks = 0, part_len_sysblks = 0;
21591 	uint64_t		part_off, part_len;
21592 	uint64_t		descr_cnt_lim, byte_cnt_lim;
21593 	uint64_t		descr_issued = 0, bytes_issued = 0;
21594 
21595 	uph = kmem_zalloc(SD_UNMAP_PARAM_LIST_MAXSZ, KM_SLEEP);
21596 
21597 	partition = SDPART(dev);
21598 	rval = cmlb_partinfo(un->un_cmlbhandle, partition, &part_len_sysblks,
21599 	    &part_off_sysblks, NULL, NULL, (void *)SD_PATH_DIRECT);
21600 	if (rval != 0)
21601 		goto out;
21602 	part_off = SD_SYSBLOCKS2BYTES(part_off_sysblks);
21603 	part_len = SD_SYSBLOCKS2BYTES(part_len_sysblks);
21604 
21605 	ASSERT(un->un_blk_lim.lim_max_unmap_lba_cnt != 0);
21606 	ASSERT(un->un_blk_lim.lim_max_unmap_descr_cnt != 0);
21607 	/* Spec says 0xffffffff are special values, so compute maximums. */
21608 	byte_cnt_lim = lim->lim_max_unmap_lba_cnt < UINT32_MAX ?
21609 	    (uint64_t)lim->lim_max_unmap_lba_cnt * un->un_tgt_blocksize :
21610 	    UINT64_MAX;
21611 	descr_cnt_lim = MIN(lim->lim_max_unmap_descr_cnt, SD_UNMAP_MAX_DESCR);
21612 
21613 	if (dfl->dfl_offset >= part_len) {
21614 		rval = SET_ERROR(EINVAL);
21615 		goto out;
21616 	}
21617 
21618 	for (size_t i = 0; i < dfl->dfl_num_exts; i++) {
21619 		const dkioc_free_list_ext_t *ext = &dfl->dfl_exts[i];
21620 		uint64_t ext_start = ext->dfle_start;
21621 		uint64_t ext_length = ext->dfle_length;
21622 
21623 		while (ext_length > 0) {
21624 			unmap_blk_descr_t *ubd;
21625 			/* Respect device limit on LBA count per command */
21626 			uint64_t len = MIN(MIN(ext_length, byte_cnt_lim -
21627 			    bytes_issued), SD_TGTBLOCKS2BYTES(un, UINT32_MAX));
21628 
21629 			/* check partition limits */
21630 			if (ext_start >= part_len ||
21631 			    ext_start + len < ext_start ||
21632 			    dfl->dfl_offset + ext_start + len <
21633 			    dfl->dfl_offset ||
21634 			    dfl->dfl_offset + ext_start + len > part_len) {
21635 				rval = SET_ERROR(EINVAL);
21636 				goto out;
21637 			}
21638 
21639 			ASSERT3U(descr_issued, <, descr_cnt_lim);
21640 			ASSERT3U(bytes_issued, <, byte_cnt_lim);
21641 			ubd = UNMAP_blk_descr_i(uph, descr_issued);
21642 
21643 			/* adjust in-partition addresses to be device-global */
21644 			ubd->ubd_lba = BE_64(SD_BYTES2TGTBLOCKS(un,
21645 			    dfl->dfl_offset + ext_start + part_off));
21646 			ubd->ubd_lba_cnt = BE_32(SD_BYTES2TGTBLOCKS(un, len));
21647 
21648 			descr_issued++;
21649 			bytes_issued += len;
21650 
21651 			/* Issue command when device limits reached */
21652 			if (descr_issued == descr_cnt_lim ||
21653 			    bytes_issued == byte_cnt_lim) {
21654 				rval = sd_send_scsi_UNMAP_issue_one(ssc, uph,
21655 				    descr_issued, bytes_issued);
21656 				if (rval != 0)
21657 					goto out;
21658 				descr_issued = 0;
21659 				bytes_issued = 0;
21660 			}
21661 
21662 			ext_start += len;
21663 			ext_length -= len;
21664 		}
21665 	}
21666 
21667 	if (descr_issued > 0) {
21668 		/* issue last command */
21669 		rval = sd_send_scsi_UNMAP_issue_one(ssc, uph, descr_issued,
21670 		    bytes_issued);
21671 	}
21672 
21673 out:
21674 	kmem_free(uph, SD_UNMAP_PARAM_LIST_MAXSZ);
21675 	return (rval);
21676 }
21677 
21678 /*
21679  * Issues one or several UNMAP commands based on a list of extents to be
21680  * unmapped. The internal multi-command processing is hidden, as the exact
21681  * number of commands and extents per command is limited by both SCSI
21682  * command syntax and device limits (as expressed in the SCSI Block Limits
21683  * VPD page and un_blk_lim in struct sd_lun).
21684  * Returns zero on success, or the error code of the first failed SCSI UNMAP
21685  * command.
21686  */
21687 static int
21688 sd_send_scsi_UNMAP(dev_t dev, sd_ssc_t *ssc, dkioc_free_list_t *dfl, int flag)
21689 {
21690 	struct sd_lun		*un = ssc->ssc_un;
21691 	int			rval = 0;
21692 
21693 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21694 	ASSERT(dfl != NULL);
21695 
21696 	/* Per spec, any of these conditions signals lack of UNMAP support. */
21697 	if (!(un->un_thin_flags & SD_THIN_PROV_ENABLED) ||
21698 	    un->un_blk_lim.lim_max_unmap_descr_cnt == 0 ||
21699 	    un->un_blk_lim.lim_max_unmap_lba_cnt == 0) {
21700 		return (SET_ERROR(ENOTSUP));
21701 	}
21702 
21703 	/* For userspace calls we must copy in. */
21704 	if (!(flag & FKIOCTL)) {
21705 		int err = dfl_copyin(dfl, &dfl, flag, KM_SLEEP);
21706 		if (err != 0)
21707 			return (err);
21708 	} else if (dfl->dfl_num_exts > DFL_COPYIN_MAX_EXTS) {
21709 		ASSERT3U(dfl->dfl_num_exts, <=, DFL_COPYIN_MAX_EXTS);
21710 		return (SET_ERROR(EINVAL));
21711 	}
21712 
21713 	rval = sd_send_scsi_UNMAP_issue(dev, ssc, dfl);
21714 
21715 	if (!(flag & FKIOCTL)) {
21716 		dfl_free(dfl);
21717 		dfl = NULL;
21718 	}
21719 
21720 	return (rval);
21721 }
21722 
21723 /*
21724  *    Function: sd_send_scsi_GET_CONFIGURATION
21725  *
21726  * Description: Issues the get configuration command to the device.
21727  *		Called from sd_check_for_writable_cd & sd_get_media_info
21728  *		caller needs to ensure that buflen = SD_PROFILE_HEADER_LEN
21729  *   Arguments: ssc
21730  *		ucmdbuf
21731  *		rqbuf
21732  *		rqbuflen
21733  *		bufaddr
21734  *		buflen
21735  *		path_flag
21736  *
21737  * Return Code: 0   - Success
21738  *		errno return code from sd_ssc_send()
21739  *
21740  *     Context: Can sleep. Does not return until command is completed.
21741  *
21742  */
21743 
21744 static int
21745 sd_send_scsi_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
21746     uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
21747     int path_flag)
21748 {
21749 	char	cdb[CDB_GROUP1];
21750 	int	status;
21751 	struct sd_lun	*un;
21752 
21753 	ASSERT(ssc != NULL);
21754 	un = ssc->ssc_un;
21755 	ASSERT(un != NULL);
21756 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21757 	ASSERT(bufaddr != NULL);
21758 	ASSERT(ucmdbuf != NULL);
21759 	ASSERT(rqbuf != NULL);
21760 
21761 	SD_TRACE(SD_LOG_IO, un,
21762 	    "sd_send_scsi_GET_CONFIGURATION: entry: un:0x%p\n", un);
21763 
21764 	bzero(cdb, sizeof (cdb));
21765 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
21766 	bzero(rqbuf, rqbuflen);
21767 	bzero(bufaddr, buflen);
21768 
21769 	/*
21770 	 * Set up cdb field for the get configuration command.
21771 	 */
21772 	cdb[0] = SCMD_GET_CONFIGURATION;
21773 	cdb[1] = 0x02;  /* Requested Type */
21774 	cdb[8] = SD_PROFILE_HEADER_LEN;
21775 	ucmdbuf->uscsi_cdb = cdb;
21776 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
21777 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
21778 	ucmdbuf->uscsi_buflen = buflen;
21779 	ucmdbuf->uscsi_timeout = sd_io_time;
21780 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
21781 	ucmdbuf->uscsi_rqlen = rqbuflen;
21782 	ucmdbuf->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT | USCSI_READ;
21783 
21784 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
21785 	    UIO_SYSSPACE, path_flag);
21786 
21787 	switch (status) {
21788 	case 0:
21789 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
21790 		break;  /* Success! */
21791 	case EIO:
21792 		switch (ucmdbuf->uscsi_status) {
21793 		case STATUS_RESERVATION_CONFLICT:
21794 			status = EACCES;
21795 			break;
21796 		default:
21797 			break;
21798 		}
21799 		break;
21800 	default:
21801 		break;
21802 	}
21803 
21804 	if (status == 0) {
21805 		SD_DUMP_MEMORY(un, SD_LOG_IO,
21806 		    "sd_send_scsi_GET_CONFIGURATION: data",
21807 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
21808 	}
21809 
21810 	SD_TRACE(SD_LOG_IO, un,
21811 	    "sd_send_scsi_GET_CONFIGURATION: exit\n");
21812 
21813 	return (status);
21814 }
21815 
21816 /*
21817  *    Function: sd_send_scsi_feature_GET_CONFIGURATION
21818  *
21819  * Description: Issues the get configuration command to the device to
21820  *              retrieve a specific feature. Called from
21821  *		sd_check_for_writable_cd & sd_set_mmc_caps.
21822  *   Arguments: ssc
21823  *              ucmdbuf
21824  *              rqbuf
21825  *              rqbuflen
21826  *              bufaddr
21827  *              buflen
21828  *		feature
21829  *
21830  * Return Code: 0   - Success
21831  *              errno return code from sd_ssc_send()
21832  *
21833  *     Context: Can sleep. Does not return until command is completed.
21834  *
21835  */
21836 static int
21837 sd_send_scsi_feature_GET_CONFIGURATION(sd_ssc_t *ssc, struct uscsi_cmd *ucmdbuf,
21838     uchar_t *rqbuf, uint_t rqbuflen, uchar_t *bufaddr, uint_t buflen,
21839     char feature, int path_flag)
21840 {
21841 	char    cdb[CDB_GROUP1];
21842 	int	status;
21843 	struct sd_lun	*un;
21844 
21845 	ASSERT(ssc != NULL);
21846 	un = ssc->ssc_un;
21847 	ASSERT(un != NULL);
21848 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21849 	ASSERT(bufaddr != NULL);
21850 	ASSERT(ucmdbuf != NULL);
21851 	ASSERT(rqbuf != NULL);
21852 
21853 	SD_TRACE(SD_LOG_IO, un,
21854 	    "sd_send_scsi_feature_GET_CONFIGURATION: entry: un:0x%p\n", un);
21855 
21856 	bzero(cdb, sizeof (cdb));
21857 	bzero(ucmdbuf, sizeof (struct uscsi_cmd));
21858 	bzero(rqbuf, rqbuflen);
21859 	bzero(bufaddr, buflen);
21860 
21861 	/*
21862 	 * Set up cdb field for the get configuration command.
21863 	 */
21864 	cdb[0] = SCMD_GET_CONFIGURATION;
21865 	cdb[1] = 0x02;  /* Requested Type */
21866 	cdb[3] = feature;
21867 	cdb[8] = buflen;
21868 	ucmdbuf->uscsi_cdb = cdb;
21869 	ucmdbuf->uscsi_cdblen = CDB_GROUP1;
21870 	ucmdbuf->uscsi_bufaddr = (caddr_t)bufaddr;
21871 	ucmdbuf->uscsi_buflen = buflen;
21872 	ucmdbuf->uscsi_timeout = sd_io_time;
21873 	ucmdbuf->uscsi_rqbuf = (caddr_t)rqbuf;
21874 	ucmdbuf->uscsi_rqlen = rqbuflen;
21875 	ucmdbuf->uscsi_flags = USCSI_RQENABLE | USCSI_SILENT | USCSI_READ;
21876 
21877 	status = sd_ssc_send(ssc, ucmdbuf, FKIOCTL,
21878 	    UIO_SYSSPACE, path_flag);
21879 
21880 	switch (status) {
21881 	case 0:
21882 
21883 		break;  /* Success! */
21884 	case EIO:
21885 		switch (ucmdbuf->uscsi_status) {
21886 		case STATUS_RESERVATION_CONFLICT:
21887 			status = EACCES;
21888 			break;
21889 		default:
21890 			break;
21891 		}
21892 		break;
21893 	default:
21894 		break;
21895 	}
21896 
21897 	if (status == 0) {
21898 		SD_DUMP_MEMORY(un, SD_LOG_IO,
21899 		    "sd_send_scsi_feature_GET_CONFIGURATION: data",
21900 		    (uchar_t *)bufaddr, SD_PROFILE_HEADER_LEN, SD_LOG_HEX);
21901 	}
21902 
21903 	SD_TRACE(SD_LOG_IO, un,
21904 	    "sd_send_scsi_feature_GET_CONFIGURATION: exit\n");
21905 
21906 	return (status);
21907 }
21908 
21909 
21910 /*
21911  *    Function: sd_send_scsi_MODE_SENSE
21912  *
21913  * Description: Utility function for issuing a scsi MODE SENSE command.
21914  *		Note: This routine uses a consistent implementation for Group0,
21915  *		Group1, and Group2 commands across all platforms. ATAPI devices
21916  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
21917  *
21918  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
21919  *                      structure for this target.
21920  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
21921  *			  CDB_GROUP[1|2] (10 byte).
21922  *		bufaddr - buffer for page data retrieved from the target.
21923  *		buflen - size of page to be retrieved.
21924  *		page_code - page code of data to be retrieved from the target.
21925  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
21926  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
21927  *			to use the USCSI "direct" chain and bypass the normal
21928  *			command waitq.
21929  *
21930  * Return Code: 0   - Success
21931  *		errno return code from sd_ssc_send()
21932  *
21933  *     Context: Can sleep. Does not return until command is completed.
21934  */
21935 
21936 static int
21937 sd_send_scsi_MODE_SENSE(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
21938     size_t buflen,  uchar_t page_code, int path_flag)
21939 {
21940 	struct	scsi_extended_sense	sense_buf;
21941 	union scsi_cdb		cdb;
21942 	struct uscsi_cmd	ucmd_buf;
21943 	int			status;
21944 	int			headlen;
21945 	struct sd_lun		*un;
21946 
21947 	ASSERT(ssc != NULL);
21948 	un = ssc->ssc_un;
21949 	ASSERT(un != NULL);
21950 	ASSERT(!mutex_owned(SD_MUTEX(un)));
21951 	ASSERT(bufaddr != NULL);
21952 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
21953 	    (cdbsize == CDB_GROUP2));
21954 
21955 	SD_TRACE(SD_LOG_IO, un,
21956 	    "sd_send_scsi_MODE_SENSE: entry: un:0x%p\n", un);
21957 
21958 	bzero(&cdb, sizeof (cdb));
21959 	bzero(&ucmd_buf, sizeof (ucmd_buf));
21960 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
21961 	bzero(bufaddr, buflen);
21962 
21963 	if (cdbsize == CDB_GROUP0) {
21964 		cdb.scc_cmd = SCMD_MODE_SENSE;
21965 		cdb.cdb_opaque[2] = page_code;
21966 		FORMG0COUNT(&cdb, buflen);
21967 		headlen = MODE_HEADER_LENGTH;
21968 	} else {
21969 		cdb.scc_cmd = SCMD_MODE_SENSE_G1;
21970 		cdb.cdb_opaque[2] = page_code;
21971 		FORMG1COUNT(&cdb, buflen);
21972 		headlen = MODE_HEADER_LENGTH_GRP2;
21973 	}
21974 
21975 	ASSERT(headlen <= buflen);
21976 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
21977 
21978 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
21979 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
21980 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
21981 	ucmd_buf.uscsi_buflen	= buflen;
21982 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
21983 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
21984 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
21985 	ucmd_buf.uscsi_timeout	= 60;
21986 
21987 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
21988 	    UIO_SYSSPACE, path_flag);
21989 
21990 	switch (status) {
21991 	case 0:
21992 		/*
21993 		 * sr_check_wp() uses 0x3f page code and check the header of
21994 		 * mode page to determine if target device is write-protected.
21995 		 * But some USB devices return 0 bytes for 0x3f page code. For
21996 		 * this case, make sure that mode page header is returned at
21997 		 * least.
21998 		 */
21999 		if (buflen - ucmd_buf.uscsi_resid <  headlen) {
22000 			status = EIO;
22001 			sd_ssc_set_info(ssc, SSC_FLAGS_INVALID_DATA, -1,
22002 			    "mode page header is not returned");
22003 		}
22004 		break;	/* Success! */
22005 	case EIO:
22006 		switch (ucmd_buf.uscsi_status) {
22007 		case STATUS_RESERVATION_CONFLICT:
22008 			status = EACCES;
22009 			break;
22010 		default:
22011 			break;
22012 		}
22013 		break;
22014 	default:
22015 		break;
22016 	}
22017 
22018 	if (status == 0) {
22019 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SENSE: data",
22020 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22021 	}
22022 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SENSE: exit\n");
22023 
22024 	return (status);
22025 }
22026 
22027 
22028 /*
22029  *    Function: sd_send_scsi_MODE_SELECT
22030  *
22031  * Description: Utility function for issuing a scsi MODE SELECT command.
22032  *		Note: This routine uses a consistent implementation for Group0,
22033  *		Group1, and Group2 commands across all platforms. ATAPI devices
22034  *		use Group 1 Read/Write commands and Group 2 Mode Sense/Select
22035  *
22036  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22037  *                      structure for this target.
22038  *		cdbsize - size CDB to be used (CDB_GROUP0 (6 byte), or
22039  *			  CDB_GROUP[1|2] (10 byte).
22040  *		bufaddr - buffer for page data retrieved from the target.
22041  *		buflen - size of page to be retrieved.
22042  *		save_page - boolean to determin if SP bit should be set.
22043  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
22044  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
22045  *			to use the USCSI "direct" chain and bypass the normal
22046  *			command waitq.
22047  *
22048  * Return Code: 0   - Success
22049  *		errno return code from sd_ssc_send()
22050  *
22051  *     Context: Can sleep. Does not return until command is completed.
22052  */
22053 
22054 static int
22055 sd_send_scsi_MODE_SELECT(sd_ssc_t *ssc, int cdbsize, uchar_t *bufaddr,
22056     size_t buflen,  uchar_t save_page, int path_flag)
22057 {
22058 	struct	scsi_extended_sense	sense_buf;
22059 	union scsi_cdb		cdb;
22060 	struct uscsi_cmd	ucmd_buf;
22061 	int			status;
22062 	struct sd_lun		*un;
22063 
22064 	ASSERT(ssc != NULL);
22065 	un = ssc->ssc_un;
22066 	ASSERT(un != NULL);
22067 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22068 	ASSERT(bufaddr != NULL);
22069 	ASSERT((cdbsize == CDB_GROUP0) || (cdbsize == CDB_GROUP1) ||
22070 	    (cdbsize == CDB_GROUP2));
22071 
22072 	SD_TRACE(SD_LOG_IO, un,
22073 	    "sd_send_scsi_MODE_SELECT: entry: un:0x%p\n", un);
22074 
22075 	bzero(&cdb, sizeof (cdb));
22076 	bzero(&ucmd_buf, sizeof (ucmd_buf));
22077 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
22078 
22079 	/* Set the PF bit for many third party drives */
22080 	cdb.cdb_opaque[1] = 0x10;
22081 
22082 	/* Set the savepage(SP) bit if given */
22083 	if (save_page == SD_SAVE_PAGE) {
22084 		cdb.cdb_opaque[1] |= 0x01;
22085 	}
22086 
22087 	if (cdbsize == CDB_GROUP0) {
22088 		cdb.scc_cmd = SCMD_MODE_SELECT;
22089 		FORMG0COUNT(&cdb, buflen);
22090 	} else {
22091 		cdb.scc_cmd = SCMD_MODE_SELECT_G1;
22092 		FORMG1COUNT(&cdb, buflen);
22093 	}
22094 
22095 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
22096 
22097 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22098 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
22099 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
22100 	ucmd_buf.uscsi_buflen	= buflen;
22101 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
22102 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
22103 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_WRITE | USCSI_SILENT;
22104 	ucmd_buf.uscsi_timeout	= 60;
22105 
22106 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22107 	    UIO_SYSSPACE, path_flag);
22108 
22109 	switch (status) {
22110 	case 0:
22111 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22112 		break;	/* Success! */
22113 	case EIO:
22114 		switch (ucmd_buf.uscsi_status) {
22115 		case STATUS_RESERVATION_CONFLICT:
22116 			status = EACCES;
22117 			break;
22118 		default:
22119 			break;
22120 		}
22121 		break;
22122 	default:
22123 		break;
22124 	}
22125 
22126 	if (status == 0) {
22127 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_MODE_SELECT: data",
22128 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22129 	}
22130 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_MODE_SELECT: exit\n");
22131 
22132 	return (status);
22133 }
22134 
22135 
22136 /*
22137  *    Function: sd_send_scsi_RDWR
22138  *
22139  * Description: Issue a scsi READ or WRITE command with the given parameters.
22140  *
22141  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22142  *                      structure for this target.
22143  *		cmd:	 SCMD_READ or SCMD_WRITE
22144  *		bufaddr: Address of caller's buffer to receive the RDWR data
22145  *		buflen:  Length of caller's buffer receive the RDWR data.
22146  *		start_block: Block number for the start of the RDWR operation.
22147  *			 (Assumes target-native block size.)
22148  *		residp:  Pointer to variable to receive the redisual of the
22149  *			 RDWR operation (may be NULL of no residual requested).
22150  *		path_flag - SD_PATH_DIRECT to use the USCSI "direct" chain and
22151  *			the normal command waitq, or SD_PATH_DIRECT_PRIORITY
22152  *			to use the USCSI "direct" chain and bypass the normal
22153  *			command waitq.
22154  *
22155  * Return Code: 0   - Success
22156  *		errno return code from sd_ssc_send()
22157  *
22158  *     Context: Can sleep. Does not return until command is completed.
22159  */
22160 
22161 static int
22162 sd_send_scsi_RDWR(sd_ssc_t *ssc, uchar_t cmd, void *bufaddr,
22163     size_t buflen, daddr_t start_block, int path_flag)
22164 {
22165 	struct	scsi_extended_sense	sense_buf;
22166 	union scsi_cdb		cdb;
22167 	struct uscsi_cmd	ucmd_buf;
22168 	uint32_t		block_count;
22169 	int			status;
22170 	int			cdbsize;
22171 	uchar_t			flag;
22172 	struct sd_lun		*un;
22173 
22174 	ASSERT(ssc != NULL);
22175 	un = ssc->ssc_un;
22176 	ASSERT(un != NULL);
22177 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22178 	ASSERT(bufaddr != NULL);
22179 	ASSERT((cmd == SCMD_READ) || (cmd == SCMD_WRITE));
22180 
22181 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: entry: un:0x%p\n", un);
22182 
22183 	if (un->un_f_tgt_blocksize_is_valid != TRUE) {
22184 		return (EINVAL);
22185 	}
22186 
22187 	mutex_enter(SD_MUTEX(un));
22188 	block_count = SD_BYTES2TGTBLOCKS(un, buflen);
22189 	mutex_exit(SD_MUTEX(un));
22190 
22191 	flag = (cmd == SCMD_READ) ? USCSI_READ : USCSI_WRITE;
22192 
22193 	SD_INFO(SD_LOG_IO, un, "sd_send_scsi_RDWR: "
22194 	    "bufaddr:0x%p buflen:0x%x start_block:0x%p block_count:0x%x\n",
22195 	    bufaddr, buflen, start_block, block_count);
22196 
22197 	bzero(&cdb, sizeof (cdb));
22198 	bzero(&ucmd_buf, sizeof (ucmd_buf));
22199 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
22200 
22201 	/* Compute CDB size to use */
22202 	if (start_block > 0xffffffff)
22203 		cdbsize = CDB_GROUP4;
22204 	else if ((start_block & 0xFFE00000) ||
22205 	    (un->un_f_cfg_is_atapi == TRUE))
22206 		cdbsize = CDB_GROUP1;
22207 	else
22208 		cdbsize = CDB_GROUP0;
22209 
22210 	switch (cdbsize) {
22211 	case CDB_GROUP0:	/* 6-byte CDBs */
22212 		cdb.scc_cmd = cmd;
22213 		FORMG0ADDR(&cdb, start_block);
22214 		FORMG0COUNT(&cdb, block_count);
22215 		break;
22216 	case CDB_GROUP1:	/* 10-byte CDBs */
22217 		cdb.scc_cmd = cmd | SCMD_GROUP1;
22218 		FORMG1ADDR(&cdb, start_block);
22219 		FORMG1COUNT(&cdb, block_count);
22220 		break;
22221 	case CDB_GROUP4:	/* 16-byte CDBs */
22222 		cdb.scc_cmd = cmd | SCMD_GROUP4;
22223 		FORMG4LONGADDR(&cdb, (uint64_t)start_block);
22224 		FORMG4COUNT(&cdb, block_count);
22225 		break;
22226 	case CDB_GROUP5:	/* 12-byte CDBs (currently unsupported) */
22227 	default:
22228 		/* All others reserved */
22229 		return (EINVAL);
22230 	}
22231 
22232 	/* Set LUN bit(s) in CDB if this is a SCSI-1 device */
22233 	SD_FILL_SCSI1_LUN_CDB(un, &cdb);
22234 
22235 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22236 	ucmd_buf.uscsi_cdblen	= (uchar_t)cdbsize;
22237 	ucmd_buf.uscsi_bufaddr	= bufaddr;
22238 	ucmd_buf.uscsi_buflen	= buflen;
22239 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
22240 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
22241 	ucmd_buf.uscsi_flags	= flag | USCSI_RQENABLE | USCSI_SILENT;
22242 	ucmd_buf.uscsi_timeout	= 60;
22243 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22244 	    UIO_SYSSPACE, path_flag);
22245 
22246 	switch (status) {
22247 	case 0:
22248 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22249 		break;	/* Success! */
22250 	case EIO:
22251 		switch (ucmd_buf.uscsi_status) {
22252 		case STATUS_RESERVATION_CONFLICT:
22253 			status = EACCES;
22254 			break;
22255 		default:
22256 			break;
22257 		}
22258 		break;
22259 	default:
22260 		break;
22261 	}
22262 
22263 	if (status == 0) {
22264 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_RDWR: data",
22265 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22266 	}
22267 
22268 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_RDWR: exit\n");
22269 
22270 	return (status);
22271 }
22272 
22273 
22274 /*
22275  *    Function: sd_send_scsi_LOG_SENSE
22276  *
22277  * Description: Issue a scsi LOG_SENSE command with the given parameters.
22278  *
22279  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22280  *                      structure for this target.
22281  *
22282  * Return Code: 0   - Success
22283  *		errno return code from sd_ssc_send()
22284  *
22285  *     Context: Can sleep. Does not return until command is completed.
22286  */
22287 
22288 static int
22289 sd_send_scsi_LOG_SENSE(sd_ssc_t *ssc, uchar_t *bufaddr, uint16_t buflen,
22290     uchar_t page_code, uchar_t page_control, uint16_t param_ptr, int path_flag)
22291 {
22292 	struct scsi_extended_sense	sense_buf;
22293 	union scsi_cdb		cdb;
22294 	struct uscsi_cmd	ucmd_buf;
22295 	int			status;
22296 	struct sd_lun		*un;
22297 
22298 	ASSERT(ssc != NULL);
22299 	un = ssc->ssc_un;
22300 	ASSERT(un != NULL);
22301 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22302 
22303 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: entry: un:0x%p\n", un);
22304 
22305 	bzero(&cdb, sizeof (cdb));
22306 	bzero(&ucmd_buf, sizeof (ucmd_buf));
22307 	bzero(&sense_buf, sizeof (struct scsi_extended_sense));
22308 
22309 	cdb.scc_cmd = SCMD_LOG_SENSE_G1;
22310 	cdb.cdb_opaque[2] = (page_control << 6) | page_code;
22311 	cdb.cdb_opaque[5] = (uchar_t)((param_ptr & 0xFF00) >> 8);
22312 	cdb.cdb_opaque[6] = (uchar_t)(param_ptr  & 0x00FF);
22313 	FORMG1COUNT(&cdb, buflen);
22314 
22315 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22316 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
22317 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
22318 	ucmd_buf.uscsi_buflen	= buflen;
22319 	ucmd_buf.uscsi_rqbuf	= (caddr_t)&sense_buf;
22320 	ucmd_buf.uscsi_rqlen	= sizeof (struct scsi_extended_sense);
22321 	ucmd_buf.uscsi_flags	= USCSI_RQENABLE | USCSI_READ | USCSI_SILENT;
22322 	ucmd_buf.uscsi_timeout	= 60;
22323 
22324 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22325 	    UIO_SYSSPACE, path_flag);
22326 
22327 	switch (status) {
22328 	case 0:
22329 		break;
22330 	case EIO:
22331 		switch (ucmd_buf.uscsi_status) {
22332 		case STATUS_RESERVATION_CONFLICT:
22333 			status = EACCES;
22334 			break;
22335 		case STATUS_CHECK:
22336 			if ((ucmd_buf.uscsi_rqstatus == STATUS_GOOD) &&
22337 			    (scsi_sense_key((uint8_t *)&sense_buf) ==
22338 			    KEY_ILLEGAL_REQUEST) &&
22339 			    (scsi_sense_asc((uint8_t *)&sense_buf) == 0x24)) {
22340 				/*
22341 				 * ASC 0x24: INVALID FIELD IN CDB
22342 				 */
22343 				switch (page_code) {
22344 				case START_STOP_CYCLE_PAGE:
22345 					/*
22346 					 * The start stop cycle counter is
22347 					 * implemented as page 0x31 in earlier
22348 					 * generation disks. In new generation
22349 					 * disks the start stop cycle counter is
22350 					 * implemented as page 0xE. To properly
22351 					 * handle this case if an attempt for
22352 					 * log page 0xE is made and fails we
22353 					 * will try again using page 0x31.
22354 					 *
22355 					 * Network storage BU committed to
22356 					 * maintain the page 0x31 for this
22357 					 * purpose and will not have any other
22358 					 * page implemented with page code 0x31
22359 					 * until all disks transition to the
22360 					 * standard page.
22361 					 */
22362 					mutex_enter(SD_MUTEX(un));
22363 					un->un_start_stop_cycle_page =
22364 					    START_STOP_CYCLE_VU_PAGE;
22365 					cdb.cdb_opaque[2] =
22366 					    (char)(page_control << 6) |
22367 					    un->un_start_stop_cycle_page;
22368 					mutex_exit(SD_MUTEX(un));
22369 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22370 					status = sd_ssc_send(
22371 					    ssc, &ucmd_buf, FKIOCTL,
22372 					    UIO_SYSSPACE, path_flag);
22373 
22374 					break;
22375 				case TEMPERATURE_PAGE:
22376 					status = ENOTTY;
22377 					break;
22378 				default:
22379 					break;
22380 				}
22381 			}
22382 			break;
22383 		default:
22384 			break;
22385 		}
22386 		break;
22387 	default:
22388 		break;
22389 	}
22390 
22391 	if (status == 0) {
22392 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22393 		SD_DUMP_MEMORY(un, SD_LOG_IO, "sd_send_scsi_LOG_SENSE: data",
22394 		    (uchar_t *)bufaddr, buflen, SD_LOG_HEX);
22395 	}
22396 
22397 	SD_TRACE(SD_LOG_IO, un, "sd_send_scsi_LOG_SENSE: exit\n");
22398 
22399 	return (status);
22400 }
22401 
22402 
22403 /*
22404  *    Function: sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION
22405  *
22406  * Description: Issue the scsi GET EVENT STATUS NOTIFICATION command.
22407  *
22408  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
22409  *                      structure for this target.
22410  *		bufaddr
22411  *		buflen
22412  *		class_req
22413  *
22414  * Return Code: 0   - Success
22415  *		errno return code from sd_ssc_send()
22416  *
22417  *     Context: Can sleep. Does not return until command is completed.
22418  */
22419 
22420 static int
22421 sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION(sd_ssc_t *ssc, uchar_t *bufaddr,
22422     size_t buflen, uchar_t class_req)
22423 {
22424 	union scsi_cdb		cdb;
22425 	struct uscsi_cmd	ucmd_buf;
22426 	int			status;
22427 	struct sd_lun		*un;
22428 
22429 	ASSERT(ssc != NULL);
22430 	un = ssc->ssc_un;
22431 	ASSERT(un != NULL);
22432 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22433 	ASSERT(bufaddr != NULL);
22434 
22435 	SD_TRACE(SD_LOG_IO, un,
22436 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: entry: un:0x%p\n", un);
22437 
22438 	bzero(&cdb, sizeof (cdb));
22439 	bzero(&ucmd_buf, sizeof (ucmd_buf));
22440 	bzero(bufaddr, buflen);
22441 
22442 	cdb.scc_cmd = SCMD_GET_EVENT_STATUS_NOTIFICATION;
22443 	cdb.cdb_opaque[1] = 1; /* polled */
22444 	cdb.cdb_opaque[4] = class_req;
22445 	FORMG1COUNT(&cdb, buflen);
22446 
22447 	ucmd_buf.uscsi_cdb	= (char *)&cdb;
22448 	ucmd_buf.uscsi_cdblen	= CDB_GROUP1;
22449 	ucmd_buf.uscsi_bufaddr	= (caddr_t)bufaddr;
22450 	ucmd_buf.uscsi_buflen	= buflen;
22451 	ucmd_buf.uscsi_rqbuf	= NULL;
22452 	ucmd_buf.uscsi_rqlen	= 0;
22453 	ucmd_buf.uscsi_flags	= USCSI_READ | USCSI_SILENT;
22454 	ucmd_buf.uscsi_timeout	= 60;
22455 
22456 	status = sd_ssc_send(ssc, &ucmd_buf, FKIOCTL,
22457 	    UIO_SYSSPACE, SD_PATH_DIRECT);
22458 
22459 	/*
22460 	 * Only handle status == 0, the upper-level caller
22461 	 * will put different assessment based on the context.
22462 	 */
22463 	if (status == 0) {
22464 		sd_ssc_assessment(ssc, SD_FMT_STANDARD);
22465 
22466 		if (ucmd_buf.uscsi_resid != 0) {
22467 			status = EIO;
22468 		}
22469 	}
22470 
22471 	SD_TRACE(SD_LOG_IO, un,
22472 	    "sd_send_scsi_GET_EVENT_STATUS_NOTIFICATION: exit\n");
22473 
22474 	return (status);
22475 }
22476 
22477 
22478 static boolean_t
22479 sd_gesn_media_data_valid(uchar_t *data)
22480 {
22481 	uint16_t			len;
22482 
22483 	len = (data[1] << 8) | data[0];
22484 	return ((len >= 6) &&
22485 	    ((data[2] & SD_GESN_HEADER_NEA) == 0) &&
22486 	    ((data[2] & SD_GESN_HEADER_CLASS) == SD_GESN_MEDIA_CLASS) &&
22487 	    ((data[3] & (1 << SD_GESN_MEDIA_CLASS)) != 0));
22488 }
22489 
22490 
22491 /*
22492  *    Function: sdioctl
22493  *
22494  * Description: Driver's ioctl(9e) entry point function.
22495  *
22496  *   Arguments: dev     - device number
22497  *		cmd     - ioctl operation to be performed
22498  *		arg     - user argument, contains data to be set or reference
22499  *			  parameter for get
22500  *		flag    - bit flag, indicating open settings, 32/64 bit type
22501  *		cred_p  - user credential pointer
22502  *		rval_p  - calling process return value (OPT)
22503  *
22504  * Return Code: EINVAL
22505  *		ENOTTY
22506  *		ENXIO
22507  *		EIO
22508  *		EFAULT
22509  *		ENOTSUP
22510  *		EPERM
22511  *
22512  *     Context: Called from the device switch at normal priority.
22513  */
22514 
22515 static int
22516 sdioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cred_p, int *rval_p)
22517 {
22518 	struct sd_lun	*un = NULL;
22519 	int		err = 0;
22520 	int		i = 0;
22521 	cred_t		*cr;
22522 	int		tmprval = EINVAL;
22523 	boolean_t	is_valid;
22524 	sd_ssc_t	*ssc;
22525 
22526 	/*
22527 	 * All device accesses go thru sdstrategy where we check on suspend
22528 	 * status
22529 	 */
22530 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
22531 		return (ENXIO);
22532 	}
22533 
22534 	ASSERT(!mutex_owned(SD_MUTEX(un)));
22535 
22536 	/* Initialize sd_ssc_t for internal uscsi commands */
22537 	ssc = sd_ssc_init(un);
22538 
22539 	is_valid = SD_IS_VALID_LABEL(un);
22540 
22541 	/*
22542 	 * Moved this wait from sd_uscsi_strategy to here for
22543 	 * reasons of deadlock prevention. Internal driver commands,
22544 	 * specifically those to change a devices power level, result
22545 	 * in a call to sd_uscsi_strategy.
22546 	 */
22547 	mutex_enter(SD_MUTEX(un));
22548 	while ((un->un_state == SD_STATE_SUSPENDED) ||
22549 	    (un->un_state == SD_STATE_PM_CHANGING)) {
22550 		cv_wait(&un->un_suspend_cv, SD_MUTEX(un));
22551 	}
22552 	/*
22553 	 * Twiddling the counter here protects commands from now
22554 	 * through to the top of sd_uscsi_strategy. Without the
22555 	 * counter inc. a power down, for example, could get in
22556 	 * after the above check for state is made and before
22557 	 * execution gets to the top of sd_uscsi_strategy.
22558 	 * That would cause problems.
22559 	 */
22560 	un->un_ncmds_in_driver++;
22561 
22562 	if (!is_valid &&
22563 	    (flag & (FNDELAY | FNONBLOCK))) {
22564 		switch (cmd) {
22565 		case DKIOCGGEOM:	/* SD_PATH_DIRECT */
22566 		case DKIOCGVTOC:
22567 		case DKIOCGEXTVTOC:
22568 		case DKIOCGAPART:
22569 		case DKIOCPARTINFO:
22570 		case DKIOCEXTPARTINFO:
22571 		case DKIOCSGEOM:
22572 		case DKIOCSAPART:
22573 		case DKIOCGETEFI:
22574 		case DKIOCPARTITION:
22575 		case DKIOCSVTOC:
22576 		case DKIOCSEXTVTOC:
22577 		case DKIOCSETEFI:
22578 		case DKIOCGMBOOT:
22579 		case DKIOCSMBOOT:
22580 		case DKIOCG_PHYGEOM:
22581 		case DKIOCG_VIRTGEOM:
22582 #if defined(__x86)
22583 		case DKIOCSETEXTPART:
22584 #endif
22585 			/* let cmlb handle it */
22586 			goto skip_ready_valid;
22587 
22588 		case CDROMPAUSE:
22589 		case CDROMRESUME:
22590 		case CDROMPLAYMSF:
22591 		case CDROMPLAYTRKIND:
22592 		case CDROMREADTOCHDR:
22593 		case CDROMREADTOCENTRY:
22594 		case CDROMSTOP:
22595 		case CDROMSTART:
22596 		case CDROMVOLCTRL:
22597 		case CDROMSUBCHNL:
22598 		case CDROMREADMODE2:
22599 		case CDROMREADMODE1:
22600 		case CDROMREADOFFSET:
22601 		case CDROMSBLKMODE:
22602 		case CDROMGBLKMODE:
22603 		case CDROMGDRVSPEED:
22604 		case CDROMSDRVSPEED:
22605 		case CDROMCDDA:
22606 		case CDROMCDXA:
22607 		case CDROMSUBCODE:
22608 			if (!ISCD(un)) {
22609 				un->un_ncmds_in_driver--;
22610 				ASSERT(un->un_ncmds_in_driver >= 0);
22611 				mutex_exit(SD_MUTEX(un));
22612 				err = ENOTTY;
22613 				goto done_without_assess;
22614 			}
22615 			break;
22616 		case FDEJECT:
22617 		case DKIOCEJECT:
22618 		case CDROMEJECT:
22619 			if (!un->un_f_eject_media_supported) {
22620 				un->un_ncmds_in_driver--;
22621 				ASSERT(un->un_ncmds_in_driver >= 0);
22622 				mutex_exit(SD_MUTEX(un));
22623 				err = ENOTTY;
22624 				goto done_without_assess;
22625 			}
22626 			break;
22627 		case DKIOCFLUSHWRITECACHE:
22628 			mutex_exit(SD_MUTEX(un));
22629 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
22630 			if (err != 0) {
22631 				mutex_enter(SD_MUTEX(un));
22632 				un->un_ncmds_in_driver--;
22633 				ASSERT(un->un_ncmds_in_driver >= 0);
22634 				mutex_exit(SD_MUTEX(un));
22635 				err = EIO;
22636 				goto done_quick_assess;
22637 			}
22638 			mutex_enter(SD_MUTEX(un));
22639 			/* FALLTHROUGH */
22640 		case DKIOCREMOVABLE:
22641 		case DKIOCHOTPLUGGABLE:
22642 		case DKIOCINFO:
22643 		case DKIOCGMEDIAINFO:
22644 		case DKIOCGMEDIAINFOEXT:
22645 		case DKIOCSOLIDSTATE:
22646 		case DKIOC_CANFREE:
22647 		case MHIOCENFAILFAST:
22648 		case MHIOCSTATUS:
22649 		case MHIOCTKOWN:
22650 		case MHIOCRELEASE:
22651 		case MHIOCGRP_INKEYS:
22652 		case MHIOCGRP_INRESV:
22653 		case MHIOCGRP_REGISTER:
22654 		case MHIOCGRP_CLEAR:
22655 		case MHIOCGRP_RESERVE:
22656 		case MHIOCGRP_PREEMPTANDABORT:
22657 		case MHIOCGRP_REGISTERANDIGNOREKEY:
22658 		case CDROMCLOSETRAY:
22659 		case USCSICMD:
22660 		case USCSIMAXXFER:
22661 			goto skip_ready_valid;
22662 		default:
22663 			break;
22664 		}
22665 
22666 		mutex_exit(SD_MUTEX(un));
22667 		err = sd_ready_and_valid(ssc, SDPART(dev));
22668 		mutex_enter(SD_MUTEX(un));
22669 
22670 		if (err != SD_READY_VALID) {
22671 			switch (cmd) {
22672 			case DKIOCSTATE:
22673 			case CDROMGDRVSPEED:
22674 			case CDROMSDRVSPEED:
22675 			case FDEJECT:	/* for eject command */
22676 			case DKIOCEJECT:
22677 			case CDROMEJECT:
22678 			case DKIOCREMOVABLE:
22679 			case DKIOCHOTPLUGGABLE:
22680 				break;
22681 			default:
22682 				if (un->un_f_has_removable_media) {
22683 					err = ENXIO;
22684 				} else {
22685 				/* Do not map SD_RESERVED_BY_OTHERS to EIO */
22686 					if (err == SD_RESERVED_BY_OTHERS) {
22687 						err = EACCES;
22688 					} else {
22689 						err = EIO;
22690 					}
22691 				}
22692 				un->un_ncmds_in_driver--;
22693 				ASSERT(un->un_ncmds_in_driver >= 0);
22694 				mutex_exit(SD_MUTEX(un));
22695 
22696 				goto done_without_assess;
22697 			}
22698 		}
22699 	}
22700 
22701 skip_ready_valid:
22702 	mutex_exit(SD_MUTEX(un));
22703 
22704 	switch (cmd) {
22705 	case DKIOCINFO:
22706 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCINFO\n");
22707 		err = sd_dkio_ctrl_info(dev, (caddr_t)arg, flag);
22708 		break;
22709 
22710 	case DKIOCGMEDIAINFO:
22711 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFO\n");
22712 		err = sd_get_media_info(dev, (caddr_t)arg, flag);
22713 		break;
22714 
22715 	case DKIOCGMEDIAINFOEXT:
22716 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGMEDIAINFOEXT\n");
22717 		err = sd_get_media_info_ext(dev, (caddr_t)arg, flag);
22718 		break;
22719 
22720 	case DKIOCGGEOM:
22721 	case DKIOCGVTOC:
22722 	case DKIOCGEXTVTOC:
22723 	case DKIOCGAPART:
22724 	case DKIOCPARTINFO:
22725 	case DKIOCEXTPARTINFO:
22726 	case DKIOCSGEOM:
22727 	case DKIOCSAPART:
22728 	case DKIOCGETEFI:
22729 	case DKIOCPARTITION:
22730 	case DKIOCSVTOC:
22731 	case DKIOCSEXTVTOC:
22732 	case DKIOCSETEFI:
22733 	case DKIOCGMBOOT:
22734 	case DKIOCSMBOOT:
22735 	case DKIOCG_PHYGEOM:
22736 	case DKIOCG_VIRTGEOM:
22737 #if defined(__x86)
22738 	case DKIOCSETEXTPART:
22739 #endif
22740 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC %d\n", cmd);
22741 
22742 		/* TUR should spin up */
22743 
22744 		if (un->un_f_has_removable_media)
22745 			err = sd_send_scsi_TEST_UNIT_READY(ssc,
22746 			    SD_CHECK_FOR_MEDIA);
22747 
22748 		else
22749 			err = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
22750 
22751 		if (err != 0)
22752 			goto done_with_assess;
22753 
22754 		err = cmlb_ioctl(un->un_cmlbhandle, dev,
22755 		    cmd, arg, flag, cred_p, rval_p, (void *)SD_PATH_DIRECT);
22756 
22757 		if ((err == 0) &&
22758 		    ((cmd == DKIOCSETEFI) ||
22759 		    ((un->un_f_pkstats_enabled) &&
22760 		    (cmd == DKIOCSAPART || cmd == DKIOCSVTOC ||
22761 		    cmd == DKIOCSEXTVTOC)))) {
22762 
22763 			tmprval = cmlb_validate(un->un_cmlbhandle, CMLB_SILENT,
22764 			    (void *)SD_PATH_DIRECT);
22765 			if ((tmprval == 0) && un->un_f_pkstats_enabled) {
22766 				sd_set_pstats(un);
22767 				SD_TRACE(SD_LOG_IO_PARTITION, un,
22768 				    "sd_ioctl: un:0x%p pstats created and "
22769 				    "set\n", un);
22770 			}
22771 		}
22772 
22773 		if ((cmd == DKIOCSVTOC || cmd == DKIOCSEXTVTOC) ||
22774 		    ((cmd == DKIOCSETEFI) && (tmprval == 0))) {
22775 
22776 			mutex_enter(SD_MUTEX(un));
22777 			if (un->un_f_devid_supported &&
22778 			    (un->un_f_opt_fab_devid == TRUE)) {
22779 				if (un->un_devid == NULL) {
22780 					sd_register_devid(ssc, SD_DEVINFO(un),
22781 					    SD_TARGET_IS_UNRESERVED);
22782 				} else {
22783 					/*
22784 					 * The device id for this disk
22785 					 * has been fabricated. The
22786 					 * device id must be preserved
22787 					 * by writing it back out to
22788 					 * disk.
22789 					 */
22790 					if (sd_write_deviceid(ssc) != 0) {
22791 						ddi_devid_free(un->un_devid);
22792 						un->un_devid = NULL;
22793 					}
22794 				}
22795 			}
22796 			mutex_exit(SD_MUTEX(un));
22797 		}
22798 
22799 		break;
22800 
22801 	case DKIOCLOCK:
22802 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCLOCK\n");
22803 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
22804 		    SD_PATH_STANDARD);
22805 		goto done_with_assess;
22806 
22807 	case DKIOCUNLOCK:
22808 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCUNLOCK\n");
22809 		err = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
22810 		    SD_PATH_STANDARD);
22811 		goto done_with_assess;
22812 
22813 	case DKIOCSTATE: {
22814 		enum dkio_state		state;
22815 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSTATE\n");
22816 
22817 		if (ddi_copyin((void *)arg, &state, sizeof (int), flag) != 0) {
22818 			err = EFAULT;
22819 		} else {
22820 			err = sd_check_media(dev, state);
22821 			if (err == 0) {
22822 				if (ddi_copyout(&un->un_mediastate, (void *)arg,
22823 				    sizeof (int), flag) != 0)
22824 					err = EFAULT;
22825 			}
22826 		}
22827 		break;
22828 	}
22829 
22830 	case DKIOCREMOVABLE:
22831 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREMOVABLE\n");
22832 		i = un->un_f_has_removable_media ? 1 : 0;
22833 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22834 			err = EFAULT;
22835 		} else {
22836 			err = 0;
22837 		}
22838 		break;
22839 
22840 	case DKIOCSOLIDSTATE:
22841 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCSOLIDSTATE\n");
22842 		i = un->un_f_is_solid_state ? 1 : 0;
22843 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22844 			err = EFAULT;
22845 		} else {
22846 			err = 0;
22847 		}
22848 		break;
22849 
22850 	case DKIOCHOTPLUGGABLE:
22851 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCHOTPLUGGABLE\n");
22852 		i = un->un_f_is_hotpluggable ? 1 : 0;
22853 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22854 			err = EFAULT;
22855 		} else {
22856 			err = 0;
22857 		}
22858 		break;
22859 
22860 	case DKIOCREADONLY:
22861 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCREADONLY\n");
22862 		i = 0;
22863 		if ((ISCD(un) && !un->un_f_mmc_writable_media) ||
22864 		    (sr_check_wp(dev) != 0)) {
22865 			i = 1;
22866 		}
22867 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
22868 			err = EFAULT;
22869 		} else {
22870 			err = 0;
22871 		}
22872 		break;
22873 
22874 	case DKIOCGTEMPERATURE:
22875 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOCGTEMPERATURE\n");
22876 		err = sd_dkio_get_temp(dev, (caddr_t)arg, flag);
22877 		break;
22878 
22879 	case MHIOCENFAILFAST:
22880 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCENFAILFAST\n");
22881 		if ((err = drv_priv(cred_p)) == 0) {
22882 			err = sd_mhdioc_failfast(dev, (caddr_t)arg, flag);
22883 		}
22884 		break;
22885 
22886 	case MHIOCTKOWN:
22887 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCTKOWN\n");
22888 		if ((err = drv_priv(cred_p)) == 0) {
22889 			err = sd_mhdioc_takeown(dev, (caddr_t)arg, flag);
22890 		}
22891 		break;
22892 
22893 	case MHIOCRELEASE:
22894 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCRELEASE\n");
22895 		if ((err = drv_priv(cred_p)) == 0) {
22896 			err = sd_mhdioc_release(dev);
22897 		}
22898 		break;
22899 
22900 	case MHIOCSTATUS:
22901 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCSTATUS\n");
22902 		if ((err = drv_priv(cred_p)) == 0) {
22903 			switch (sd_send_scsi_TEST_UNIT_READY(ssc, 0)) {
22904 			case 0:
22905 				err = 0;
22906 				break;
22907 			case EACCES:
22908 				*rval_p = 1;
22909 				err = 0;
22910 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
22911 				break;
22912 			default:
22913 				err = EIO;
22914 				goto done_with_assess;
22915 			}
22916 		}
22917 		break;
22918 
22919 	case MHIOCQRESERVE:
22920 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCQRESERVE\n");
22921 		if ((err = drv_priv(cred_p)) == 0) {
22922 			err = sd_reserve_release(dev, SD_RESERVE);
22923 		}
22924 		break;
22925 
22926 	case MHIOCREREGISTERDEVID:
22927 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCREREGISTERDEVID\n");
22928 		if (drv_priv(cred_p) == EPERM) {
22929 			err = EPERM;
22930 		} else if (!un->un_f_devid_supported) {
22931 			err = ENOTTY;
22932 		} else {
22933 			err = sd_mhdioc_register_devid(dev);
22934 		}
22935 		break;
22936 
22937 	case MHIOCGRP_INKEYS:
22938 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INKEYS\n");
22939 		if (((err = drv_priv(cred_p)) != EPERM) &&
22940 		    arg != (intptr_t)NULL) {
22941 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22942 				err = ENOTSUP;
22943 			} else {
22944 				err = sd_mhdioc_inkeys(dev, (caddr_t)arg,
22945 				    flag);
22946 			}
22947 		}
22948 		break;
22949 
22950 	case MHIOCGRP_INRESV:
22951 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_INRESV\n");
22952 		if (((err = drv_priv(cred_p)) != EPERM) &&
22953 		    arg != (intptr_t)NULL) {
22954 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22955 				err = ENOTSUP;
22956 			} else {
22957 				err = sd_mhdioc_inresv(dev, (caddr_t)arg, flag);
22958 			}
22959 		}
22960 		break;
22961 
22962 	case MHIOCGRP_REGISTER:
22963 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTER\n");
22964 		if ((err = drv_priv(cred_p)) != EPERM) {
22965 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22966 				err = ENOTSUP;
22967 			} else if (arg != (intptr_t)NULL) {
22968 				mhioc_register_t reg;
22969 				if (ddi_copyin((void *)arg, &reg,
22970 				    sizeof (mhioc_register_t), flag) != 0) {
22971 					err = EFAULT;
22972 				} else {
22973 					err =
22974 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22975 					    ssc, SD_SCSI3_REGISTER,
22976 					    (uchar_t *)&reg);
22977 					if (err != 0)
22978 						goto done_with_assess;
22979 				}
22980 			}
22981 		}
22982 		break;
22983 
22984 	case MHIOCGRP_CLEAR:
22985 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_CLEAR\n");
22986 		if ((err = drv_priv(cred_p)) != EPERM) {
22987 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
22988 				err = ENOTSUP;
22989 			} else if (arg != (intptr_t)NULL) {
22990 				mhioc_register_t reg;
22991 				if (ddi_copyin((void *)arg, &reg,
22992 				    sizeof (mhioc_register_t), flag) != 0) {
22993 					err = EFAULT;
22994 				} else {
22995 					err =
22996 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
22997 					    ssc, SD_SCSI3_CLEAR,
22998 					    (uchar_t *)&reg);
22999 					if (err != 0)
23000 						goto done_with_assess;
23001 				}
23002 			}
23003 		}
23004 		break;
23005 
23006 	case MHIOCGRP_RESERVE:
23007 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_RESERVE\n");
23008 		if ((err = drv_priv(cred_p)) != EPERM) {
23009 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
23010 				err = ENOTSUP;
23011 			} else if (arg != (intptr_t)NULL) {
23012 				mhioc_resv_desc_t resv_desc;
23013 				if (ddi_copyin((void *)arg, &resv_desc,
23014 				    sizeof (mhioc_resv_desc_t), flag) != 0) {
23015 					err = EFAULT;
23016 				} else {
23017 					err =
23018 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
23019 					    ssc, SD_SCSI3_RESERVE,
23020 					    (uchar_t *)&resv_desc);
23021 					if (err != 0)
23022 						goto done_with_assess;
23023 				}
23024 			}
23025 		}
23026 		break;
23027 
23028 	case MHIOCGRP_PREEMPTANDABORT:
23029 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_PREEMPTANDABORT\n");
23030 		if ((err = drv_priv(cred_p)) != EPERM) {
23031 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
23032 				err = ENOTSUP;
23033 			} else if (arg != (intptr_t)NULL) {
23034 				mhioc_preemptandabort_t preempt_abort;
23035 				if (ddi_copyin((void *)arg, &preempt_abort,
23036 				    sizeof (mhioc_preemptandabort_t),
23037 				    flag) != 0) {
23038 					err = EFAULT;
23039 				} else {
23040 					err =
23041 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
23042 					    ssc, SD_SCSI3_PREEMPTANDABORT,
23043 					    (uchar_t *)&preempt_abort);
23044 					if (err != 0)
23045 						goto done_with_assess;
23046 				}
23047 			}
23048 		}
23049 		break;
23050 
23051 	case MHIOCGRP_REGISTERANDIGNOREKEY:
23052 		SD_TRACE(SD_LOG_IOCTL, un, "MHIOCGRP_REGISTERANDIGNOREKEY\n");
23053 		if ((err = drv_priv(cred_p)) != EPERM) {
23054 			if (un->un_reservation_type == SD_SCSI2_RESERVATION) {
23055 				err = ENOTSUP;
23056 			} else if (arg != (intptr_t)NULL) {
23057 				mhioc_registerandignorekey_t r_and_i;
23058 				if (ddi_copyin((void *)arg, (void *)&r_and_i,
23059 				    sizeof (mhioc_registerandignorekey_t),
23060 				    flag) != 0) {
23061 					err = EFAULT;
23062 				} else {
23063 					err =
23064 					    sd_send_scsi_PERSISTENT_RESERVE_OUT(
23065 					    ssc, SD_SCSI3_REGISTERANDIGNOREKEY,
23066 					    (uchar_t *)&r_and_i);
23067 					if (err != 0)
23068 						goto done_with_assess;
23069 				}
23070 			}
23071 		}
23072 		break;
23073 
23074 	case USCSICMD:
23075 		SD_TRACE(SD_LOG_IOCTL, un, "USCSICMD\n");
23076 		cr = ddi_get_cred();
23077 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
23078 			err = EPERM;
23079 		} else {
23080 			enum uio_seg	uioseg;
23081 
23082 			uioseg = (flag & FKIOCTL) ? UIO_SYSSPACE :
23083 			    UIO_USERSPACE;
23084 			if (un->un_f_format_in_progress == TRUE) {
23085 				err = EAGAIN;
23086 				break;
23087 			}
23088 
23089 			err = sd_ssc_send(ssc,
23090 			    (struct uscsi_cmd *)arg,
23091 			    flag, uioseg, SD_PATH_STANDARD);
23092 			if (err != 0)
23093 				goto done_with_assess;
23094 			else
23095 				sd_ssc_assessment(ssc, SD_FMT_STANDARD);
23096 		}
23097 		break;
23098 
23099 	case USCSIMAXXFER:
23100 		SD_TRACE(SD_LOG_IOCTL, un, "USCSIMAXXFER\n");
23101 		cr = ddi_get_cred();
23102 		if ((drv_priv(cred_p) != 0) && (drv_priv(cr) != 0)) {
23103 			err = EPERM;
23104 		} else {
23105 			const uscsi_xfer_t xfer = un->un_max_xfer_size;
23106 
23107 			if (ddi_copyout(&xfer, (void *)arg, sizeof (xfer),
23108 			    flag) != 0) {
23109 				err = EFAULT;
23110 			} else {
23111 				err = 0;
23112 			}
23113 		}
23114 		break;
23115 
23116 	case CDROMPAUSE:
23117 	case CDROMRESUME:
23118 		SD_TRACE(SD_LOG_IOCTL, un, "PAUSE-RESUME\n");
23119 		if (!ISCD(un)) {
23120 			err = ENOTTY;
23121 		} else {
23122 			err = sr_pause_resume(dev, cmd);
23123 		}
23124 		break;
23125 
23126 	case CDROMPLAYMSF:
23127 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYMSF\n");
23128 		if (!ISCD(un)) {
23129 			err = ENOTTY;
23130 		} else {
23131 			err = sr_play_msf(dev, (caddr_t)arg, flag);
23132 		}
23133 		break;
23134 
23135 	case CDROMPLAYTRKIND:
23136 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMPLAYTRKIND\n");
23137 #if defined(__x86)
23138 		/*
23139 		 * not supported on ATAPI CD drives, use CDROMPLAYMSF instead
23140 		 */
23141 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
23142 #else
23143 		if (!ISCD(un)) {
23144 #endif
23145 			err = ENOTTY;
23146 		} else {
23147 			err = sr_play_trkind(dev, (caddr_t)arg, flag);
23148 		}
23149 		break;
23150 
23151 	case CDROMREADTOCHDR:
23152 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCHDR\n");
23153 		if (!ISCD(un)) {
23154 			err = ENOTTY;
23155 		} else {
23156 			err = sr_read_tochdr(dev, (caddr_t)arg, flag);
23157 		}
23158 		break;
23159 
23160 	case CDROMREADTOCENTRY:
23161 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADTOCENTRY\n");
23162 		if (!ISCD(un)) {
23163 			err = ENOTTY;
23164 		} else {
23165 			err = sr_read_tocentry(dev, (caddr_t)arg, flag);
23166 		}
23167 		break;
23168 
23169 	case CDROMSTOP:
23170 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTOP\n");
23171 		if (!ISCD(un)) {
23172 			err = ENOTTY;
23173 		} else {
23174 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
23175 			    SD_TARGET_STOP, SD_PATH_STANDARD);
23176 			goto done_with_assess;
23177 		}
23178 		break;
23179 
23180 	case CDROMSTART:
23181 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSTART\n");
23182 		if (!ISCD(un)) {
23183 			err = ENOTTY;
23184 		} else {
23185 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
23186 			    SD_TARGET_START, SD_PATH_STANDARD);
23187 			goto done_with_assess;
23188 		}
23189 		break;
23190 
23191 	case CDROMCLOSETRAY:
23192 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCLOSETRAY\n");
23193 		if (!ISCD(un)) {
23194 			err = ENOTTY;
23195 		} else {
23196 			err = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
23197 			    SD_TARGET_CLOSE, SD_PATH_STANDARD);
23198 			goto done_with_assess;
23199 		}
23200 		break;
23201 
23202 	case FDEJECT:	/* for eject command */
23203 	case DKIOCEJECT:
23204 	case CDROMEJECT:
23205 		SD_TRACE(SD_LOG_IOCTL, un, "EJECT\n");
23206 		if (!un->un_f_eject_media_supported) {
23207 			err = ENOTTY;
23208 		} else {
23209 			err = sr_eject(dev);
23210 		}
23211 		break;
23212 
23213 	case CDROMVOLCTRL:
23214 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMVOLCTRL\n");
23215 		if (!ISCD(un)) {
23216 			err = ENOTTY;
23217 		} else {
23218 			err = sr_volume_ctrl(dev, (caddr_t)arg, flag);
23219 		}
23220 		break;
23221 
23222 	case CDROMSUBCHNL:
23223 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCHNL\n");
23224 		if (!ISCD(un)) {
23225 			err = ENOTTY;
23226 		} else {
23227 			err = sr_read_subchannel(dev, (caddr_t)arg, flag);
23228 		}
23229 		break;
23230 
23231 	case CDROMREADMODE2:
23232 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE2\n");
23233 		if (!ISCD(un)) {
23234 			err = ENOTTY;
23235 		} else if (un->un_f_cfg_is_atapi == TRUE) {
23236 			/*
23237 			 * If the drive supports READ CD, use that instead of
23238 			 * switching the LBA size via a MODE SELECT
23239 			 * Block Descriptor
23240 			 */
23241 			err = sr_read_cd_mode2(dev, (caddr_t)arg, flag);
23242 		} else {
23243 			err = sr_read_mode2(dev, (caddr_t)arg, flag);
23244 		}
23245 		break;
23246 
23247 	case CDROMREADMODE1:
23248 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADMODE1\n");
23249 		if (!ISCD(un)) {
23250 			err = ENOTTY;
23251 		} else {
23252 			err = sr_read_mode1(dev, (caddr_t)arg, flag);
23253 		}
23254 		break;
23255 
23256 	case CDROMREADOFFSET:
23257 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMREADOFFSET\n");
23258 		if (!ISCD(un)) {
23259 			err = ENOTTY;
23260 		} else {
23261 			err = sr_read_sony_session_offset(dev, (caddr_t)arg,
23262 			    flag);
23263 		}
23264 		break;
23265 
23266 	case CDROMSBLKMODE:
23267 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSBLKMODE\n");
23268 		/*
23269 		 * There is no means of changing block size in case of atapi
23270 		 * drives, thus return ENOTTY if drive type is atapi
23271 		 */
23272 		if (!ISCD(un) || (un->un_f_cfg_is_atapi == TRUE)) {
23273 			err = ENOTTY;
23274 		} else if (un->un_f_mmc_cap == TRUE) {
23275 
23276 			/*
23277 			 * MMC Devices do not support changing the
23278 			 * logical block size
23279 			 *
23280 			 * Note: EINVAL is being returned instead of ENOTTY to
23281 			 * maintain consistancy with the original mmc
23282 			 * driver update.
23283 			 */
23284 			err = EINVAL;
23285 		} else {
23286 			mutex_enter(SD_MUTEX(un));
23287 			if ((!(un->un_exclopen & (1<<SDPART(dev)))) ||
23288 			    (un->un_ncmds_in_transport > 0)) {
23289 				mutex_exit(SD_MUTEX(un));
23290 				err = EINVAL;
23291 			} else {
23292 				mutex_exit(SD_MUTEX(un));
23293 				err = sr_change_blkmode(dev, cmd, arg, flag);
23294 			}
23295 		}
23296 		break;
23297 
23298 	case CDROMGBLKMODE:
23299 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMGBLKMODE\n");
23300 		if (!ISCD(un)) {
23301 			err = ENOTTY;
23302 		} else if ((un->un_f_cfg_is_atapi != FALSE) &&
23303 		    (un->un_f_blockcount_is_valid != FALSE)) {
23304 			/*
23305 			 * Drive is an ATAPI drive so return target block
23306 			 * size for ATAPI drives since we cannot change the
23307 			 * blocksize on ATAPI drives. Used primarily to detect
23308 			 * if an ATAPI cdrom is present.
23309 			 */
23310 			if (ddi_copyout(&un->un_tgt_blocksize, (void *)arg,
23311 			    sizeof (int), flag) != 0) {
23312 				err = EFAULT;
23313 			} else {
23314 				err = 0;
23315 			}
23316 
23317 		} else {
23318 			/*
23319 			 * Drive supports changing block sizes via a Mode
23320 			 * Select.
23321 			 */
23322 			err = sr_change_blkmode(dev, cmd, arg, flag);
23323 		}
23324 		break;
23325 
23326 	case CDROMGDRVSPEED:
23327 	case CDROMSDRVSPEED:
23328 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMXDRVSPEED\n");
23329 		if (!ISCD(un)) {
23330 			err = ENOTTY;
23331 		} else if (un->un_f_mmc_cap == TRUE) {
23332 			/*
23333 			 * Note: In the future the driver implementation
23334 			 * for getting and
23335 			 * setting cd speed should entail:
23336 			 * 1) If non-mmc try the Toshiba mode page
23337 			 *    (sr_change_speed)
23338 			 * 2) If mmc but no support for Real Time Streaming try
23339 			 *    the SET CD SPEED (0xBB) command
23340 			 *   (sr_atapi_change_speed)
23341 			 * 3) If mmc and support for Real Time Streaming
23342 			 *    try the GET PERFORMANCE and SET STREAMING
23343 			 *    commands (not yet implemented, 4380808)
23344 			 */
23345 			/*
23346 			 * As per recent MMC spec, CD-ROM speed is variable
23347 			 * and changes with LBA. Since there is no such
23348 			 * things as drive speed now, fail this ioctl.
23349 			 *
23350 			 * Note: EINVAL is returned for consistancy of original
23351 			 * implementation which included support for getting
23352 			 * the drive speed of mmc devices but not setting
23353 			 * the drive speed. Thus EINVAL would be returned
23354 			 * if a set request was made for an mmc device.
23355 			 * We no longer support get or set speed for
23356 			 * mmc but need to remain consistent with regard
23357 			 * to the error code returned.
23358 			 */
23359 			err = EINVAL;
23360 		} else if (un->un_f_cfg_is_atapi == TRUE) {
23361 			err = sr_atapi_change_speed(dev, cmd, arg, flag);
23362 		} else {
23363 			err = sr_change_speed(dev, cmd, arg, flag);
23364 		}
23365 		break;
23366 
23367 	case CDROMCDDA:
23368 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDDA\n");
23369 		if (!ISCD(un)) {
23370 			err = ENOTTY;
23371 		} else {
23372 			err = sr_read_cdda(dev, (void *)arg, flag);
23373 		}
23374 		break;
23375 
23376 	case CDROMCDXA:
23377 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMCDXA\n");
23378 		if (!ISCD(un)) {
23379 			err = ENOTTY;
23380 		} else {
23381 			err = sr_read_cdxa(dev, (caddr_t)arg, flag);
23382 		}
23383 		break;
23384 
23385 	case CDROMSUBCODE:
23386 		SD_TRACE(SD_LOG_IOCTL, un, "CDROMSUBCODE\n");
23387 		if (!ISCD(un)) {
23388 			err = ENOTTY;
23389 		} else {
23390 			err = sr_read_all_subcodes(dev, (caddr_t)arg, flag);
23391 		}
23392 		break;
23393 
23394 
23395 #ifdef SDDEBUG
23396 /* RESET/ABORTS testing ioctls */
23397 	case DKIOCRESET: {
23398 		int	reset_level;
23399 
23400 		if (ddi_copyin((void *)arg, &reset_level, sizeof (int), flag)) {
23401 			err = EFAULT;
23402 		} else {
23403 			SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCRESET: "
23404 			    "reset_level = 0x%lx\n", reset_level);
23405 			if (scsi_reset(SD_ADDRESS(un), reset_level)) {
23406 				err = 0;
23407 			} else {
23408 				err = EIO;
23409 			}
23410 		}
23411 		break;
23412 	}
23413 
23414 	case DKIOCABORT:
23415 		SD_INFO(SD_LOG_IOCTL, un, "sdioctl: DKIOCABORT:\n");
23416 		if (scsi_abort(SD_ADDRESS(un), NULL)) {
23417 			err = 0;
23418 		} else {
23419 			err = EIO;
23420 		}
23421 		break;
23422 #endif
23423 
23424 #ifdef SD_FAULT_INJECTION
23425 /* SDIOC FaultInjection testing ioctls */
23426 	case SDIOCSTART:
23427 	case SDIOCSTOP:
23428 	case SDIOCINSERTPKT:
23429 	case SDIOCINSERTXB:
23430 	case SDIOCINSERTUN:
23431 	case SDIOCINSERTARQ:
23432 	case SDIOCPUSH:
23433 	case SDIOCRETRIEVE:
23434 	case SDIOCRUN:
23435 		SD_INFO(SD_LOG_SDTEST, un, "sdioctl:"
23436 		    "SDIOC detected cmd:0x%X:\n", cmd);
23437 		/* call error generator */
23438 		sd_faultinjection_ioctl(cmd, arg, un);
23439 		err = 0;
23440 		break;
23441 
23442 #endif /* SD_FAULT_INJECTION */
23443 
23444 	case DKIOCFLUSHWRITECACHE:
23445 		{
23446 			struct dk_callback *dkc = (struct dk_callback *)arg;
23447 
23448 			mutex_enter(SD_MUTEX(un));
23449 			if (!un->un_f_sync_cache_supported ||
23450 			    !un->un_f_write_cache_enabled) {
23451 				err = un->un_f_sync_cache_supported ?
23452 				    0 : ENOTSUP;
23453 				mutex_exit(SD_MUTEX(un));
23454 				if ((flag & FKIOCTL) && dkc != NULL &&
23455 				    dkc->dkc_callback != NULL) {
23456 					(*dkc->dkc_callback)(dkc->dkc_cookie,
23457 					    err);
23458 					/*
23459 					 * Did callback and reported error.
23460 					 * Since we did a callback, ioctl
23461 					 * should return 0.
23462 					 */
23463 					err = 0;
23464 				}
23465 				break;
23466 			}
23467 			mutex_exit(SD_MUTEX(un));
23468 
23469 			if ((flag & FKIOCTL) && dkc != NULL &&
23470 			    dkc->dkc_callback != NULL) {
23471 				/* async SYNC CACHE request */
23472 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, dkc);
23473 			} else {
23474 				/* synchronous SYNC CACHE request */
23475 				err = sd_send_scsi_SYNCHRONIZE_CACHE(un, NULL);
23476 			}
23477 		}
23478 		break;
23479 
23480 	case DKIOCFREE:
23481 		{
23482 			dkioc_free_list_t *dfl = (dkioc_free_list_t *)arg;
23483 
23484 			/* bad ioctls shouldn't panic */
23485 			if (dfl == NULL) {
23486 				/* check kernel callers strictly in debug */
23487 				ASSERT0(flag & FKIOCTL);
23488 				err = SET_ERROR(EINVAL);
23489 				break;
23490 			}
23491 			/* synchronous UNMAP request */
23492 			err = sd_send_scsi_UNMAP(dev, ssc, dfl, flag);
23493 		}
23494 		break;
23495 
23496 	case DKIOC_CANFREE:
23497 		SD_TRACE(SD_LOG_IOCTL, un, "DKIOC_CANFREE\n");
23498 		i = (un->un_thin_flags & SD_THIN_PROV_ENABLED) ? 1 : 0;
23499 		if (ddi_copyout(&i, (void *)arg, sizeof (int), flag) != 0) {
23500 			err = EFAULT;
23501 		} else {
23502 			err = 0;
23503 		}
23504 		break;
23505 
23506 	case DKIOCGETWCE: {
23507 
23508 		int wce;
23509 
23510 		if ((err = sd_get_write_cache_enabled(ssc, &wce)) != 0) {
23511 			break;
23512 		}
23513 
23514 		if (ddi_copyout(&wce, (void *)arg, sizeof (wce), flag)) {
23515 			err = EFAULT;
23516 		}
23517 		break;
23518 	}
23519 
23520 	case DKIOCSETWCE: {
23521 
23522 		int wce, sync_supported;
23523 		int cur_wce = 0;
23524 
23525 		if (!un->un_f_cache_mode_changeable) {
23526 			err = EINVAL;
23527 			break;
23528 		}
23529 
23530 		if (ddi_copyin((void *)arg, &wce, sizeof (wce), flag)) {
23531 			err = EFAULT;
23532 			break;
23533 		}
23534 
23535 		/*
23536 		 * Synchronize multiple threads trying to enable
23537 		 * or disable the cache via the un_f_wcc_cv
23538 		 * condition variable.
23539 		 */
23540 		mutex_enter(SD_MUTEX(un));
23541 
23542 		/*
23543 		 * Don't allow the cache to be enabled if the
23544 		 * config file has it disabled.
23545 		 */
23546 		if (un->un_f_opt_disable_cache && wce) {
23547 			mutex_exit(SD_MUTEX(un));
23548 			err = EINVAL;
23549 			break;
23550 		}
23551 
23552 		/*
23553 		 * Wait for write cache change in progress
23554 		 * bit to be clear before proceeding.
23555 		 */
23556 		while (un->un_f_wcc_inprog)
23557 			cv_wait(&un->un_wcc_cv, SD_MUTEX(un));
23558 
23559 		un->un_f_wcc_inprog = 1;
23560 
23561 		mutex_exit(SD_MUTEX(un));
23562 
23563 		/*
23564 		 * Get the current write cache state
23565 		 */
23566 		if ((err = sd_get_write_cache_enabled(ssc, &cur_wce)) != 0) {
23567 			mutex_enter(SD_MUTEX(un));
23568 			un->un_f_wcc_inprog = 0;
23569 			cv_broadcast(&un->un_wcc_cv);
23570 			mutex_exit(SD_MUTEX(un));
23571 			break;
23572 		}
23573 
23574 		mutex_enter(SD_MUTEX(un));
23575 		un->un_f_write_cache_enabled = (cur_wce != 0);
23576 
23577 		if (un->un_f_write_cache_enabled && wce == 0) {
23578 			/*
23579 			 * Disable the write cache.  Don't clear
23580 			 * un_f_write_cache_enabled until after
23581 			 * the mode select and flush are complete.
23582 			 */
23583 			sync_supported = un->un_f_sync_cache_supported;
23584 
23585 			/*
23586 			 * If cache flush is suppressed, we assume that the
23587 			 * controller firmware will take care of managing the
23588 			 * write cache for us: no need to explicitly
23589 			 * disable it.
23590 			 */
23591 			if (!un->un_f_suppress_cache_flush) {
23592 				mutex_exit(SD_MUTEX(un));
23593 				if ((err = sd_cache_control(ssc,
23594 				    SD_CACHE_NOCHANGE,
23595 				    SD_CACHE_DISABLE)) == 0 &&
23596 				    sync_supported) {
23597 					err = sd_send_scsi_SYNCHRONIZE_CACHE(un,
23598 					    NULL);
23599 				}
23600 			} else {
23601 				mutex_exit(SD_MUTEX(un));
23602 			}
23603 
23604 			mutex_enter(SD_MUTEX(un));
23605 			if (err == 0) {
23606 				un->un_f_write_cache_enabled = 0;
23607 			}
23608 
23609 		} else if (!un->un_f_write_cache_enabled && wce != 0) {
23610 			/*
23611 			 * Set un_f_write_cache_enabled first, so there is
23612 			 * no window where the cache is enabled, but the
23613 			 * bit says it isn't.
23614 			 */
23615 			un->un_f_write_cache_enabled = 1;
23616 
23617 			/*
23618 			 * If cache flush is suppressed, we assume that the
23619 			 * controller firmware will take care of managing the
23620 			 * write cache for us: no need to explicitly
23621 			 * enable it.
23622 			 */
23623 			if (!un->un_f_suppress_cache_flush) {
23624 				mutex_exit(SD_MUTEX(un));
23625 				err = sd_cache_control(ssc, SD_CACHE_NOCHANGE,
23626 				    SD_CACHE_ENABLE);
23627 			} else {
23628 				mutex_exit(SD_MUTEX(un));
23629 			}
23630 
23631 			mutex_enter(SD_MUTEX(un));
23632 
23633 			if (err) {
23634 				un->un_f_write_cache_enabled = 0;
23635 			}
23636 		}
23637 
23638 		un->un_f_wcc_inprog = 0;
23639 		cv_broadcast(&un->un_wcc_cv);
23640 		mutex_exit(SD_MUTEX(un));
23641 		break;
23642 	}
23643 
23644 	default:
23645 		err = ENOTTY;
23646 		break;
23647 	}
23648 	mutex_enter(SD_MUTEX(un));
23649 	un->un_ncmds_in_driver--;
23650 	ASSERT(un->un_ncmds_in_driver >= 0);
23651 	mutex_exit(SD_MUTEX(un));
23652 
23653 
23654 done_without_assess:
23655 	sd_ssc_fini(ssc);
23656 
23657 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
23658 	return (err);
23659 
23660 done_with_assess:
23661 	mutex_enter(SD_MUTEX(un));
23662 	un->un_ncmds_in_driver--;
23663 	ASSERT(un->un_ncmds_in_driver >= 0);
23664 	mutex_exit(SD_MUTEX(un));
23665 
23666 done_quick_assess:
23667 	if (err != 0)
23668 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23669 	/* Uninitialize sd_ssc_t pointer */
23670 	sd_ssc_fini(ssc);
23671 
23672 	SD_TRACE(SD_LOG_IOCTL, un, "sdioctl: exit: %d\n", err);
23673 	return (err);
23674 }
23675 
23676 
23677 /*
23678  *    Function: sd_dkio_ctrl_info
23679  *
23680  * Description: This routine is the driver entry point for handling controller
23681  *		information ioctl requests (DKIOCINFO).
23682  *
23683  *   Arguments: dev  - the device number
23684  *		arg  - pointer to user provided dk_cinfo structure
23685  *		       specifying the controller type and attributes.
23686  *		flag - this argument is a pass through to ddi_copyxxx()
23687  *		       directly from the mode argument of ioctl().
23688  *
23689  * Return Code: 0
23690  *		EFAULT
23691  *		ENXIO
23692  */
23693 
23694 static int
23695 sd_dkio_ctrl_info(dev_t dev, caddr_t arg, int flag)
23696 {
23697 	struct sd_lun	*un = NULL;
23698 	struct dk_cinfo	*info;
23699 	dev_info_t	*pdip;
23700 	int		lun, tgt;
23701 
23702 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
23703 		return (ENXIO);
23704 	}
23705 
23706 	info = (struct dk_cinfo *)
23707 	    kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP);
23708 
23709 	switch (un->un_ctype) {
23710 	case CTYPE_CDROM:
23711 		info->dki_ctype = DKC_CDROM;
23712 		break;
23713 	default:
23714 		info->dki_ctype = DKC_SCSI_CCS;
23715 		break;
23716 	}
23717 	pdip = ddi_get_parent(SD_DEVINFO(un));
23718 	info->dki_cnum = ddi_get_instance(pdip);
23719 	if (strlen(ddi_get_name(pdip)) < DK_DEVLEN) {
23720 		(void) strcpy(info->dki_cname, ddi_get_name(pdip));
23721 	} else {
23722 		(void) strncpy(info->dki_cname, ddi_node_name(pdip),
23723 		    DK_DEVLEN - 1);
23724 	}
23725 
23726 	lun = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
23727 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_LUN, 0);
23728 	tgt = ddi_prop_get_int(DDI_DEV_T_ANY, SD_DEVINFO(un),
23729 	    DDI_PROP_DONTPASS, SCSI_ADDR_PROP_TARGET, 0);
23730 
23731 	/* Unit Information */
23732 	info->dki_unit = ddi_get_instance(SD_DEVINFO(un));
23733 	info->dki_slave = ((tgt << 3) | lun);
23734 	(void) strncpy(info->dki_dname, ddi_driver_name(SD_DEVINFO(un)),
23735 	    DK_DEVLEN - 1);
23736 	info->dki_flags = DKI_FMTVOL;
23737 	info->dki_partition = SDPART(dev);
23738 
23739 	/* Max Transfer size of this device in blocks */
23740 	info->dki_maxtransfer = un->un_max_xfer_size / un->un_sys_blocksize;
23741 	info->dki_addr = 0;
23742 	info->dki_space = 0;
23743 	info->dki_prio = 0;
23744 	info->dki_vec = 0;
23745 
23746 	if (ddi_copyout(info, arg, sizeof (struct dk_cinfo), flag) != 0) {
23747 		kmem_free(info, sizeof (struct dk_cinfo));
23748 		return (EFAULT);
23749 	} else {
23750 		kmem_free(info, sizeof (struct dk_cinfo));
23751 		return (0);
23752 	}
23753 }
23754 
23755 /*
23756  *    Function: sd_get_media_info_com
23757  *
23758  * Description: This routine returns the information required to populate
23759  *		the fields for the dk_minfo/dk_minfo_ext structures.
23760  *
23761  *   Arguments: dev		- the device number
23762  *		dki_media_type	- media_type
23763  *		dki_lbsize	- logical block size
23764  *		dki_capacity	- capacity in blocks
23765  *		dki_pbsize	- physical block size (if requested)
23766  *
23767  * Return Code: 0
23768  *		EACCESS
23769  *		EFAULT
23770  *		ENXIO
23771  *		EIO
23772  */
23773 static int
23774 sd_get_media_info_com(dev_t dev, uint_t *dki_media_type, uint_t *dki_lbsize,
23775     diskaddr_t *dki_capacity, uint_t *dki_pbsize)
23776 {
23777 	struct sd_lun		*un = NULL;
23778 	struct uscsi_cmd	com;
23779 	struct scsi_inquiry	*sinq;
23780 	u_longlong_t		media_capacity;
23781 	uint64_t		capacity;
23782 	uint_t			lbasize;
23783 	uint_t			pbsize;
23784 	uchar_t			*out_data;
23785 	uchar_t			*rqbuf;
23786 	int			rval = 0;
23787 	int			rtn;
23788 	sd_ssc_t		*ssc;
23789 
23790 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
23791 	    (un->un_state == SD_STATE_OFFLINE)) {
23792 		return (ENXIO);
23793 	}
23794 
23795 	SD_TRACE(SD_LOG_IOCTL_DKIO, un, "sd_get_media_info_com: entry\n");
23796 
23797 	out_data = kmem_zalloc(SD_PROFILE_HEADER_LEN, KM_SLEEP);
23798 	rqbuf = kmem_zalloc(SENSE_LENGTH, KM_SLEEP);
23799 	ssc = sd_ssc_init(un);
23800 
23801 	/* Issue a TUR to determine if the drive is ready with media present */
23802 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, SD_CHECK_FOR_MEDIA);
23803 	if (rval == ENXIO) {
23804 		goto done;
23805 	} else if (rval != 0) {
23806 		sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23807 	}
23808 
23809 	/* Now get configuration data */
23810 	if (ISCD(un)) {
23811 		*dki_media_type = DK_CDROM;
23812 
23813 		/* Allow SCMD_GET_CONFIGURATION to MMC devices only */
23814 		if (un->un_f_mmc_cap == TRUE) {
23815 			rtn = sd_send_scsi_GET_CONFIGURATION(ssc, &com, rqbuf,
23816 			    SENSE_LENGTH, out_data, SD_PROFILE_HEADER_LEN,
23817 			    SD_PATH_STANDARD);
23818 
23819 			if (rtn) {
23820 				/*
23821 				 * We ignore all failures for CD and need to
23822 				 * put the assessment before processing code
23823 				 * to avoid missing assessment for FMA.
23824 				 */
23825 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23826 				/*
23827 				 * Failed for other than an illegal request
23828 				 * or command not supported
23829 				 */
23830 				if ((com.uscsi_status == STATUS_CHECK) &&
23831 				    (com.uscsi_rqstatus == STATUS_GOOD)) {
23832 					if ((rqbuf[2] != KEY_ILLEGAL_REQUEST) ||
23833 					    (rqbuf[12] != 0x20)) {
23834 						rval = EIO;
23835 						goto no_assessment;
23836 					}
23837 				}
23838 			} else {
23839 				/*
23840 				 * The GET CONFIGURATION command succeeded
23841 				 * so set the media type according to the
23842 				 * returned data
23843 				 */
23844 				*dki_media_type = out_data[6];
23845 				*dki_media_type <<= 8;
23846 				*dki_media_type |= out_data[7];
23847 			}
23848 		}
23849 	} else {
23850 		/*
23851 		 * The profile list is not available, so we attempt to identify
23852 		 * the media type based on the inquiry data
23853 		 */
23854 		sinq = un->un_sd->sd_inq;
23855 		if ((sinq->inq_dtype == DTYPE_DIRECT) ||
23856 		    (sinq->inq_dtype == DTYPE_OPTICAL)) {
23857 			/* This is a direct access device  or optical disk */
23858 			*dki_media_type = DK_FIXED_DISK;
23859 
23860 			if ((bcmp(sinq->inq_vid, "IOMEGA", 6) == 0) ||
23861 			    (bcmp(sinq->inq_vid, "iomega", 6) == 0)) {
23862 				if ((bcmp(sinq->inq_pid, "ZIP", 3) == 0)) {
23863 					*dki_media_type = DK_ZIP;
23864 				} else if (
23865 				    (bcmp(sinq->inq_pid, "jaz", 3) == 0)) {
23866 					*dki_media_type = DK_JAZ;
23867 				}
23868 			}
23869 		} else {
23870 			/*
23871 			 * Not a CD, direct access or optical disk so return
23872 			 * unknown media
23873 			 */
23874 			*dki_media_type = DK_UNKNOWN;
23875 		}
23876 	}
23877 
23878 	/*
23879 	 * Now read the capacity so we can provide the lbasize,
23880 	 * pbsize and capacity.
23881 	 */
23882 	if (dki_pbsize && un->un_f_descr_format_supported) {
23883 		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
23884 		    &pbsize, SD_PATH_DIRECT);
23885 
23886 		/*
23887 		 * Override the physical blocksize if the instance already
23888 		 * has a larger value.
23889 		 */
23890 		pbsize = MAX(pbsize, un->un_phy_blocksize);
23891 	}
23892 
23893 	if (dki_pbsize == NULL || rval != 0 ||
23894 	    !un->un_f_descr_format_supported) {
23895 		rval = sd_send_scsi_READ_CAPACITY(ssc, &capacity, &lbasize,
23896 		    SD_PATH_DIRECT);
23897 
23898 		switch (rval) {
23899 		case 0:
23900 			if (un->un_f_enable_rmw &&
23901 			    un->un_phy_blocksize != 0) {
23902 				pbsize = un->un_phy_blocksize;
23903 			} else {
23904 				pbsize = lbasize;
23905 			}
23906 			media_capacity = capacity;
23907 
23908 			/*
23909 			 * sd_send_scsi_READ_CAPACITY() reports capacity in
23910 			 * un->un_sys_blocksize chunks. So we need to convert
23911 			 * it into cap.lbsize chunks.
23912 			 */
23913 			if (un->un_f_has_removable_media) {
23914 				media_capacity *= un->un_sys_blocksize;
23915 				media_capacity /= lbasize;
23916 			}
23917 			break;
23918 		case EACCES:
23919 			rval = EACCES;
23920 			goto done;
23921 		default:
23922 			rval = EIO;
23923 			goto done;
23924 		}
23925 	} else {
23926 		if (un->un_f_enable_rmw &&
23927 		    !ISP2(pbsize % DEV_BSIZE)) {
23928 			pbsize = SSD_SECSIZE;
23929 		} else if (!ISP2(lbasize % DEV_BSIZE) ||
23930 		    !ISP2(pbsize % DEV_BSIZE)) {
23931 			pbsize = lbasize = DEV_BSIZE;
23932 		}
23933 		media_capacity = capacity;
23934 	}
23935 
23936 	/*
23937 	 * If lun is expanded dynamically, update the un structure.
23938 	 */
23939 	mutex_enter(SD_MUTEX(un));
23940 	if ((un->un_f_blockcount_is_valid == TRUE) &&
23941 	    (un->un_f_tgt_blocksize_is_valid == TRUE) &&
23942 	    (capacity > un->un_blockcount)) {
23943 		un->un_f_expnevent = B_FALSE;
23944 		sd_update_block_info(un, lbasize, capacity);
23945 	}
23946 	mutex_exit(SD_MUTEX(un));
23947 
23948 	*dki_lbsize = lbasize;
23949 	*dki_capacity = media_capacity;
23950 	if (dki_pbsize)
23951 		*dki_pbsize = pbsize;
23952 
23953 done:
23954 	if (rval != 0) {
23955 		if (rval == EIO)
23956 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
23957 		else
23958 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
23959 	}
23960 no_assessment:
23961 	sd_ssc_fini(ssc);
23962 	kmem_free(out_data, SD_PROFILE_HEADER_LEN);
23963 	kmem_free(rqbuf, SENSE_LENGTH);
23964 	return (rval);
23965 }
23966 
23967 /*
23968  *    Function: sd_get_media_info
23969  *
23970  * Description: This routine is the driver entry point for handling ioctl
23971  *		requests for the media type or command set profile used by the
23972  *		drive to operate on the media (DKIOCGMEDIAINFO).
23973  *
23974  *   Arguments: dev	- the device number
23975  *		arg	- pointer to user provided dk_minfo structure
23976  *			  specifying the media type, logical block size and
23977  *			  drive capacity.
23978  *		flag	- this argument is a pass through to ddi_copyxxx()
23979  *			  directly from the mode argument of ioctl().
23980  *
23981  * Return Code: returns the value from sd_get_media_info_com
23982  */
23983 static int
23984 sd_get_media_info(dev_t dev, caddr_t arg, int flag)
23985 {
23986 	struct dk_minfo		mi;
23987 	int			rval;
23988 
23989 	rval = sd_get_media_info_com(dev, &mi.dki_media_type,
23990 	    &mi.dki_lbsize, &mi.dki_capacity, NULL);
23991 
23992 	if (rval)
23993 		return (rval);
23994 	if (ddi_copyout(&mi, arg, sizeof (struct dk_minfo), flag))
23995 		rval = EFAULT;
23996 	return (rval);
23997 }
23998 
23999 /*
24000  *    Function: sd_get_media_info_ext
24001  *
24002  * Description: This routine is the driver entry point for handling ioctl
24003  *		requests for the media type or command set profile used by the
24004  *		drive to operate on the media (DKIOCGMEDIAINFOEXT). The
24005  *		difference this ioctl and DKIOCGMEDIAINFO is the return value
24006  *		of this ioctl contains both logical block size and physical
24007  *		block size.
24008  *
24009  *
24010  *   Arguments: dev	- the device number
24011  *		arg	- pointer to user provided dk_minfo_ext structure
24012  *			  specifying the media type, logical block size,
24013  *			  physical block size and disk capacity.
24014  *		flag	- this argument is a pass through to ddi_copyxxx()
24015  *			  directly from the mode argument of ioctl().
24016  *
24017  * Return Code: returns the value from sd_get_media_info_com
24018  */
24019 static int
24020 sd_get_media_info_ext(dev_t dev, caddr_t arg, int flag)
24021 {
24022 	struct dk_minfo_ext	mie;
24023 	int			rval = 0;
24024 	size_t			len;
24025 
24026 	rval = sd_get_media_info_com(dev, &mie.dki_media_type,
24027 	    &mie.dki_lbsize, &mie.dki_capacity, &mie.dki_pbsize);
24028 
24029 	if (rval)
24030 		return (rval);
24031 
24032 	switch (ddi_model_convert_from(flag & FMODELS)) {
24033 	case DDI_MODEL_ILP32:
24034 		len = sizeof (struct dk_minfo_ext32);
24035 		break;
24036 	default:
24037 		len = sizeof (struct dk_minfo_ext);
24038 		break;
24039 	}
24040 
24041 	if (ddi_copyout(&mie, arg, len, flag))
24042 		rval = EFAULT;
24043 	return (rval);
24044 
24045 }
24046 
24047 /*
24048  *    Function: sd_watch_request_submit
24049  *
24050  * Description: Call scsi_watch_request_submit or scsi_mmc_watch_request_submit
24051  *		depending on which is supported by device.
24052  */
24053 static opaque_t
24054 sd_watch_request_submit(struct sd_lun *un)
24055 {
24056 	dev_t			dev;
24057 
24058 	/* All submissions are unified to use same device number */
24059 	dev = sd_make_device(SD_DEVINFO(un));
24060 
24061 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
24062 		return (scsi_mmc_watch_request_submit(SD_SCSI_DEVP(un),
24063 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24064 		    (caddr_t)dev));
24065 	} else {
24066 		return (scsi_watch_request_submit(SD_SCSI_DEVP(un),
24067 		    sd_check_media_time, SENSE_LENGTH, sd_media_watch_cb,
24068 		    (caddr_t)dev));
24069 	}
24070 }
24071 
24072 
24073 /*
24074  *    Function: sd_check_media
24075  *
24076  * Description: This utility routine implements the functionality for the
24077  *		DKIOCSTATE ioctl. This ioctl blocks the user thread until the
24078  *		driver state changes from that specified by the user
24079  *		(inserted or ejected). For example, if the user specifies
24080  *		DKIO_EJECTED and the current media state is inserted this
24081  *		routine will immediately return DKIO_INSERTED. However, if the
24082  *		current media state is not inserted the user thread will be
24083  *		blocked until the drive state changes. If DKIO_NONE is specified
24084  *		the user thread will block until a drive state change occurs.
24085  *
24086  *   Arguments: dev  - the device number
24087  *		state  - user pointer to a dkio_state, updated with the current
24088  *			drive state at return.
24089  *
24090  * Return Code: ENXIO
24091  *		EIO
24092  *		EAGAIN
24093  *		EINTR
24094  */
24095 
24096 static int
24097 sd_check_media(dev_t dev, enum dkio_state state)
24098 {
24099 	struct sd_lun		*un = NULL;
24100 	enum dkio_state		prev_state;
24101 	opaque_t		token = NULL;
24102 	int			rval = 0;
24103 	sd_ssc_t		*ssc;
24104 
24105 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24106 		return (ENXIO);
24107 	}
24108 
24109 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: entry\n");
24110 
24111 	ssc = sd_ssc_init(un);
24112 
24113 	mutex_enter(SD_MUTEX(un));
24114 
24115 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: "
24116 	    "state=%x, mediastate=%x\n", state, un->un_mediastate);
24117 
24118 	prev_state = un->un_mediastate;
24119 
24120 	/* is there anything to do? */
24121 	if (state == un->un_mediastate || un->un_mediastate == DKIO_NONE) {
24122 		/*
24123 		 * submit the request to the scsi_watch service;
24124 		 * scsi_media_watch_cb() does the real work
24125 		 */
24126 		mutex_exit(SD_MUTEX(un));
24127 
24128 		/*
24129 		 * This change handles the case where a scsi watch request is
24130 		 * added to a device that is powered down. To accomplish this
24131 		 * we power up the device before adding the scsi watch request,
24132 		 * since the scsi watch sends a TUR directly to the device
24133 		 * which the device cannot handle if it is powered down.
24134 		 */
24135 		if (sd_pm_entry(un) != DDI_SUCCESS) {
24136 			mutex_enter(SD_MUTEX(un));
24137 			goto done;
24138 		}
24139 
24140 		token = sd_watch_request_submit(un);
24141 
24142 		sd_pm_exit(un);
24143 
24144 		mutex_enter(SD_MUTEX(un));
24145 		if (token == NULL) {
24146 			rval = EAGAIN;
24147 			goto done;
24148 		}
24149 
24150 		/*
24151 		 * This is a special case IOCTL that doesn't return
24152 		 * until the media state changes. Routine sdpower
24153 		 * knows about and handles this so don't count it
24154 		 * as an active cmd in the driver, which would
24155 		 * keep the device busy to the pm framework.
24156 		 * If the count isn't decremented the device can't
24157 		 * be powered down.
24158 		 */
24159 		un->un_ncmds_in_driver--;
24160 		ASSERT(un->un_ncmds_in_driver >= 0);
24161 
24162 		/*
24163 		 * if a prior request had been made, this will be the same
24164 		 * token, as scsi_watch was designed that way.
24165 		 */
24166 		un->un_swr_token = token;
24167 		un->un_specified_mediastate = state;
24168 
24169 		/*
24170 		 * now wait for media change
24171 		 * we will not be signalled unless mediastate == state but it is
24172 		 * still better to test for this condition, since there is a
24173 		 * 2 sec cv_broadcast delay when mediastate == DKIO_INSERTED
24174 		 */
24175 		SD_TRACE(SD_LOG_COMMON, un,
24176 		    "sd_check_media: waiting for media state change\n");
24177 		while (un->un_mediastate == state) {
24178 			if (cv_wait_sig(&un->un_state_cv, SD_MUTEX(un)) == 0) {
24179 				SD_TRACE(SD_LOG_COMMON, un,
24180 				    "sd_check_media: waiting for media state "
24181 				    "was interrupted\n");
24182 				un->un_ncmds_in_driver++;
24183 				rval = EINTR;
24184 				goto done;
24185 			}
24186 			SD_TRACE(SD_LOG_COMMON, un,
24187 			    "sd_check_media: received signal, state=%x\n",
24188 			    un->un_mediastate);
24189 		}
24190 		/*
24191 		 * Inc the counter to indicate the device once again
24192 		 * has an active outstanding cmd.
24193 		 */
24194 		un->un_ncmds_in_driver++;
24195 	}
24196 
24197 	/* invalidate geometry */
24198 	if (prev_state == DKIO_INSERTED && un->un_mediastate == DKIO_EJECTED) {
24199 		sr_ejected(un);
24200 	}
24201 
24202 	if (un->un_mediastate == DKIO_INSERTED && prev_state != DKIO_INSERTED) {
24203 		uint64_t	capacity;
24204 		uint_t		lbasize;
24205 
24206 		SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: media inserted\n");
24207 		mutex_exit(SD_MUTEX(un));
24208 		/*
24209 		 * Since the following routines use SD_PATH_DIRECT, we must
24210 		 * call PM directly before the upcoming disk accesses. This
24211 		 * may cause the disk to be power/spin up.
24212 		 */
24213 
24214 		if (sd_pm_entry(un) == DDI_SUCCESS) {
24215 			rval = sd_send_scsi_READ_CAPACITY(ssc,
24216 			    &capacity, &lbasize, SD_PATH_DIRECT);
24217 			if (rval != 0) {
24218 				sd_pm_exit(un);
24219 				if (rval == EIO)
24220 					sd_ssc_assessment(ssc,
24221 					    SD_FMT_STATUS_CHECK);
24222 				else
24223 					sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24224 				mutex_enter(SD_MUTEX(un));
24225 				goto done;
24226 			}
24227 		} else {
24228 			rval = EIO;
24229 			mutex_enter(SD_MUTEX(un));
24230 			goto done;
24231 		}
24232 		mutex_enter(SD_MUTEX(un));
24233 
24234 		sd_update_block_info(un, lbasize, capacity);
24235 
24236 		/*
24237 		 *  Check if the media in the device is writable or not
24238 		 */
24239 		if (ISCD(un)) {
24240 			sd_check_for_writable_cd(ssc, SD_PATH_DIRECT);
24241 		}
24242 
24243 		mutex_exit(SD_MUTEX(un));
24244 		cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT);
24245 		if ((cmlb_validate(un->un_cmlbhandle, 0,
24246 		    (void *)SD_PATH_DIRECT) == 0) && un->un_f_pkstats_enabled) {
24247 			sd_set_pstats(un);
24248 			SD_TRACE(SD_LOG_IO_PARTITION, un,
24249 			    "sd_check_media: un:0x%p pstats created and "
24250 			    "set\n", un);
24251 		}
24252 
24253 		rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_PREVENT,
24254 		    SD_PATH_DIRECT);
24255 
24256 		sd_pm_exit(un);
24257 
24258 		if (rval != 0) {
24259 			if (rval == EIO)
24260 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24261 			else
24262 				sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24263 		}
24264 
24265 		mutex_enter(SD_MUTEX(un));
24266 	}
24267 done:
24268 	sd_ssc_fini(ssc);
24269 	un->un_f_watcht_stopped = FALSE;
24270 	if (token != NULL && un->un_swr_token != NULL) {
24271 		/*
24272 		 * Use of this local token and the mutex ensures that we avoid
24273 		 * some race conditions associated with terminating the
24274 		 * scsi watch.
24275 		 */
24276 		token = un->un_swr_token;
24277 		mutex_exit(SD_MUTEX(un));
24278 		(void) scsi_watch_request_terminate(token,
24279 		    SCSI_WATCH_TERMINATE_WAIT);
24280 		if (scsi_watch_get_ref_count(token) == 0) {
24281 			mutex_enter(SD_MUTEX(un));
24282 			un->un_swr_token = (opaque_t)NULL;
24283 		} else {
24284 			mutex_enter(SD_MUTEX(un));
24285 		}
24286 	}
24287 
24288 	/*
24289 	 * Update the capacity kstat value, if no media previously
24290 	 * (capacity kstat is 0) and a media has been inserted
24291 	 * (un_f_blockcount_is_valid == TRUE)
24292 	 */
24293 	if (un->un_errstats) {
24294 		struct sd_errstats	*stp = NULL;
24295 
24296 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
24297 		if ((stp->sd_capacity.value.ui64 == 0) &&
24298 		    (un->un_f_blockcount_is_valid == TRUE)) {
24299 			stp->sd_capacity.value.ui64 =
24300 			    (uint64_t)((uint64_t)un->un_blockcount *
24301 			    un->un_sys_blocksize);
24302 		}
24303 	}
24304 	mutex_exit(SD_MUTEX(un));
24305 	SD_TRACE(SD_LOG_COMMON, un, "sd_check_media: done\n");
24306 	return (rval);
24307 }
24308 
24309 
24310 /*
24311  *    Function: sd_delayed_cv_broadcast
24312  *
24313  * Description: Delayed cv_broadcast to allow for target to recover from media
24314  *		insertion.
24315  *
24316  *   Arguments: arg - driver soft state (unit) structure
24317  */
24318 
24319 static void
24320 sd_delayed_cv_broadcast(void *arg)
24321 {
24322 	struct sd_lun *un = arg;
24323 
24324 	SD_TRACE(SD_LOG_COMMON, un, "sd_delayed_cv_broadcast\n");
24325 
24326 	mutex_enter(SD_MUTEX(un));
24327 	un->un_dcvb_timeid = NULL;
24328 	cv_broadcast(&un->un_state_cv);
24329 	mutex_exit(SD_MUTEX(un));
24330 }
24331 
24332 
24333 /*
24334  *    Function: sd_media_watch_cb
24335  *
24336  * Description: Callback routine used for support of the DKIOCSTATE ioctl. This
24337  *		routine processes the TUR sense data and updates the driver
24338  *		state if a transition has occurred. The user thread
24339  *		(sd_check_media) is then signalled.
24340  *
24341  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
24342  *			among multiple watches that share this callback function
24343  *		resultp - scsi watch facility result packet containing scsi
24344  *			  packet, status byte and sense data
24345  *
24346  * Return Code: 0 for success, -1 for failure
24347  */
24348 
24349 static int
24350 sd_media_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
24351 {
24352 	struct sd_lun			*un;
24353 	struct scsi_status		*statusp = resultp->statusp;
24354 	uint8_t				*sensep = (uint8_t *)resultp->sensep;
24355 	enum dkio_state			state = DKIO_NONE;
24356 	dev_t				dev = (dev_t)arg;
24357 	uchar_t				actual_sense_length;
24358 	uint8_t				skey, asc, ascq;
24359 
24360 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24361 		return (-1);
24362 	}
24363 	actual_sense_length = resultp->actual_sense_length;
24364 
24365 	mutex_enter(SD_MUTEX(un));
24366 	SD_TRACE(SD_LOG_COMMON, un,
24367 	    "sd_media_watch_cb: status=%x, sensep=%p, len=%x\n",
24368 	    *((char *)statusp), (void *)sensep, actual_sense_length);
24369 
24370 	if (resultp->pkt->pkt_reason == CMD_DEV_GONE) {
24371 		un->un_mediastate = DKIO_DEV_GONE;
24372 		cv_broadcast(&un->un_state_cv);
24373 		mutex_exit(SD_MUTEX(un));
24374 
24375 		return (0);
24376 	}
24377 
24378 	if (un->un_f_mmc_cap && un->un_f_mmc_gesn_polling) {
24379 		if (sd_gesn_media_data_valid(resultp->mmc_data)) {
24380 			if ((resultp->mmc_data[5] &
24381 			    SD_GESN_MEDIA_EVENT_STATUS_PRESENT) != 0) {
24382 				state = DKIO_INSERTED;
24383 			} else {
24384 				state = DKIO_EJECTED;
24385 			}
24386 			if ((resultp->mmc_data[4] & SD_GESN_MEDIA_EVENT_CODE) ==
24387 			    SD_GESN_MEDIA_EVENT_EJECTREQUEST) {
24388 				sd_log_eject_request_event(un, KM_NOSLEEP);
24389 			}
24390 		}
24391 	} else if (sensep != NULL) {
24392 		/*
24393 		 * If there was a check condition then sensep points to valid
24394 		 * sense data. If status was not a check condition but a
24395 		 * reservation or busy status then the new state is DKIO_NONE.
24396 		 */
24397 		skey = scsi_sense_key(sensep);
24398 		asc = scsi_sense_asc(sensep);
24399 		ascq = scsi_sense_ascq(sensep);
24400 
24401 		SD_INFO(SD_LOG_COMMON, un,
24402 		    "sd_media_watch_cb: sense KEY=%x, ASC=%x, ASCQ=%x\n",
24403 		    skey, asc, ascq);
24404 		/* This routine only uses up to 13 bytes of sense data. */
24405 		if (actual_sense_length >= 13) {
24406 			if (skey == KEY_UNIT_ATTENTION) {
24407 				if (asc == 0x28) {
24408 					state = DKIO_INSERTED;
24409 				}
24410 			} else if (skey == KEY_NOT_READY) {
24411 				/*
24412 				 * Sense data of 02/06/00 means that the
24413 				 * drive could not read the media (No
24414 				 * reference position found). In this case
24415 				 * to prevent a hang on the DKIOCSTATE IOCTL
24416 				 * we set the media state to DKIO_INSERTED.
24417 				 */
24418 				if (asc == 0x06 && ascq == 0x00)
24419 					state = DKIO_INSERTED;
24420 
24421 				/*
24422 				 * if 02/04/02  means that the host
24423 				 * should send start command. Explicitly
24424 				 * leave the media state as is
24425 				 * (inserted) as the media is inserted
24426 				 * and host has stopped device for PM
24427 				 * reasons. Upon next true read/write
24428 				 * to this media will bring the
24429 				 * device to the right state good for
24430 				 * media access.
24431 				 */
24432 				if (asc == 0x3a) {
24433 					state = DKIO_EJECTED;
24434 				} else {
24435 					/*
24436 					 * If the drive is busy with an
24437 					 * operation or long write, keep the
24438 					 * media in an inserted state.
24439 					 */
24440 
24441 					if ((asc == 0x04) &&
24442 					    ((ascq == 0x02) ||
24443 					    (ascq == 0x07) ||
24444 					    (ascq == 0x08))) {
24445 						state = DKIO_INSERTED;
24446 					}
24447 				}
24448 			} else if (skey == KEY_NO_SENSE) {
24449 				if ((asc == 0x00) && (ascq == 0x00)) {
24450 					/*
24451 					 * Sense Data 00/00/00 does not provide
24452 					 * any information about the state of
24453 					 * the media. Ignore it.
24454 					 */
24455 					mutex_exit(SD_MUTEX(un));
24456 					return (0);
24457 				}
24458 			}
24459 		}
24460 	} else if ((*((char *)statusp) == STATUS_GOOD) &&
24461 	    (resultp->pkt->pkt_reason == CMD_CMPLT)) {
24462 		state = DKIO_INSERTED;
24463 	}
24464 
24465 	SD_TRACE(SD_LOG_COMMON, un,
24466 	    "sd_media_watch_cb: state=%x, specified=%x\n",
24467 	    state, un->un_specified_mediastate);
24468 
24469 	/*
24470 	 * now signal the waiting thread if this is *not* the specified state;
24471 	 * delay the signal if the state is DKIO_INSERTED to allow the target
24472 	 * to recover
24473 	 */
24474 	if (state != un->un_specified_mediastate) {
24475 		un->un_mediastate = state;
24476 		if (state == DKIO_INSERTED) {
24477 			/*
24478 			 * delay the signal to give the drive a chance
24479 			 * to do what it apparently needs to do
24480 			 */
24481 			SD_TRACE(SD_LOG_COMMON, un,
24482 			    "sd_media_watch_cb: delayed cv_broadcast\n");
24483 			if (un->un_dcvb_timeid == NULL) {
24484 				un->un_dcvb_timeid =
24485 				    timeout(sd_delayed_cv_broadcast, un,
24486 				    drv_usectohz((clock_t)MEDIA_ACCESS_DELAY));
24487 			}
24488 		} else {
24489 			SD_TRACE(SD_LOG_COMMON, un,
24490 			    "sd_media_watch_cb: immediate cv_broadcast\n");
24491 			cv_broadcast(&un->un_state_cv);
24492 		}
24493 	}
24494 	mutex_exit(SD_MUTEX(un));
24495 	return (0);
24496 }
24497 
24498 
24499 /*
24500  *    Function: sd_dkio_get_temp
24501  *
24502  * Description: This routine is the driver entry point for handling ioctl
24503  *		requests to get the disk temperature.
24504  *
24505  *   Arguments: dev  - the device number
24506  *		arg  - pointer to user provided dk_temperature structure.
24507  *		flag - this argument is a pass through to ddi_copyxxx()
24508  *		       directly from the mode argument of ioctl().
24509  *
24510  * Return Code: 0
24511  *		EFAULT
24512  *		ENXIO
24513  *		EAGAIN
24514  */
24515 
24516 static int
24517 sd_dkio_get_temp(dev_t dev, caddr_t arg, int flag)
24518 {
24519 	struct sd_lun		*un = NULL;
24520 	struct dk_temperature	*dktemp = NULL;
24521 	uchar_t			*temperature_page;
24522 	int			rval = 0;
24523 	int			path_flag = SD_PATH_STANDARD;
24524 	sd_ssc_t		*ssc;
24525 
24526 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24527 		return (ENXIO);
24528 	}
24529 
24530 	ssc = sd_ssc_init(un);
24531 	dktemp = kmem_zalloc(sizeof (struct dk_temperature), KM_SLEEP);
24532 
24533 	/* copyin the disk temp argument to get the user flags */
24534 	if (ddi_copyin((void *)arg, dktemp,
24535 	    sizeof (struct dk_temperature), flag) != 0) {
24536 		rval = EFAULT;
24537 		goto done;
24538 	}
24539 
24540 	/* Initialize the temperature to invalid. */
24541 	dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24542 	dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24543 
24544 	/*
24545 	 * Note: Investigate removing the "bypass pm" semantic.
24546 	 * Can we just bypass PM always?
24547 	 */
24548 	if (dktemp->dkt_flags & DKT_BYPASS_PM) {
24549 		path_flag = SD_PATH_DIRECT;
24550 		ASSERT(!mutex_owned(&un->un_pm_mutex));
24551 		mutex_enter(&un->un_pm_mutex);
24552 		if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
24553 			/*
24554 			 * If DKT_BYPASS_PM is set, and the drive happens to be
24555 			 * in low power mode, we can not wake it up, Need to
24556 			 * return EAGAIN.
24557 			 */
24558 			mutex_exit(&un->un_pm_mutex);
24559 			rval = EAGAIN;
24560 			goto done;
24561 		} else {
24562 			/*
24563 			 * Indicate to PM the device is busy. This is required
24564 			 * to avoid a race - i.e. the ioctl is issuing a
24565 			 * command and the pm framework brings down the device
24566 			 * to low power mode (possible power cut-off on some
24567 			 * platforms).
24568 			 */
24569 			mutex_exit(&un->un_pm_mutex);
24570 			if (sd_pm_entry(un) != DDI_SUCCESS) {
24571 				rval = EAGAIN;
24572 				goto done;
24573 			}
24574 		}
24575 	}
24576 
24577 	temperature_page = kmem_zalloc(TEMPERATURE_PAGE_SIZE, KM_SLEEP);
24578 
24579 	rval = sd_send_scsi_LOG_SENSE(ssc, temperature_page,
24580 	    TEMPERATURE_PAGE_SIZE, TEMPERATURE_PAGE, 1, 0, path_flag);
24581 	if (rval != 0)
24582 		goto done2;
24583 
24584 	/*
24585 	 * For the current temperature verify that the parameter length is 0x02
24586 	 * and the parameter code is 0x00
24587 	 */
24588 	if ((temperature_page[7] == 0x02) && (temperature_page[4] == 0x00) &&
24589 	    (temperature_page[5] == 0x00)) {
24590 		if (temperature_page[9] == 0xFF) {
24591 			dktemp->dkt_cur_temp = (short)DKT_INVALID_TEMP;
24592 		} else {
24593 			dktemp->dkt_cur_temp = (short)(temperature_page[9]);
24594 		}
24595 	}
24596 
24597 	/*
24598 	 * For the reference temperature verify that the parameter
24599 	 * length is 0x02 and the parameter code is 0x01
24600 	 */
24601 	if ((temperature_page[13] == 0x02) && (temperature_page[10] == 0x00) &&
24602 	    (temperature_page[11] == 0x01)) {
24603 		if (temperature_page[15] == 0xFF) {
24604 			dktemp->dkt_ref_temp = (short)DKT_INVALID_TEMP;
24605 		} else {
24606 			dktemp->dkt_ref_temp = (short)(temperature_page[15]);
24607 		}
24608 	}
24609 
24610 	/* Do the copyout regardless of the temperature commands status. */
24611 	if (ddi_copyout(dktemp, (void *)arg, sizeof (struct dk_temperature),
24612 	    flag) != 0) {
24613 		rval = EFAULT;
24614 		goto done1;
24615 	}
24616 
24617 done2:
24618 	if (rval != 0) {
24619 		if (rval == EIO)
24620 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24621 		else
24622 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24623 	}
24624 done1:
24625 	if (path_flag == SD_PATH_DIRECT) {
24626 		sd_pm_exit(un);
24627 	}
24628 
24629 	kmem_free(temperature_page, TEMPERATURE_PAGE_SIZE);
24630 done:
24631 	sd_ssc_fini(ssc);
24632 	if (dktemp != NULL) {
24633 		kmem_free(dktemp, sizeof (struct dk_temperature));
24634 	}
24635 
24636 	return (rval);
24637 }
24638 
24639 
24640 /*
24641  *    Function: sd_log_page_supported
24642  *
24643  * Description: This routine uses sd_send_scsi_LOG_SENSE to find the list of
24644  *		supported log pages.
24645  *
24646  *   Arguments: ssc   - ssc contains pointer to driver soft state (unit)
24647  *                      structure for this target.
24648  *		log_page -
24649  *
24650  * Return Code: -1 - on error (log sense is optional and may not be supported).
24651  *		0  - log page not found.
24652  *		1  - log page found.
24653  */
24654 
24655 static int
24656 sd_log_page_supported(sd_ssc_t *ssc, int log_page)
24657 {
24658 	uchar_t *log_page_data;
24659 	int	i;
24660 	int	match = 0;
24661 	int	log_size;
24662 	int	status = 0;
24663 	struct sd_lun	*un;
24664 
24665 	ASSERT(ssc != NULL);
24666 	un = ssc->ssc_un;
24667 	ASSERT(un != NULL);
24668 
24669 	log_page_data = kmem_zalloc(0xFF, KM_SLEEP);
24670 
24671 	status = sd_send_scsi_LOG_SENSE(ssc, log_page_data, 0xFF, 0, 0x01, 0,
24672 	    SD_PATH_DIRECT);
24673 
24674 	if (status != 0) {
24675 		if (status == EIO) {
24676 			/*
24677 			 * Some disks do not support log sense, we
24678 			 * should ignore this kind of error(sense key is
24679 			 * 0x5 - illegal request).
24680 			 */
24681 			uint8_t *sensep;
24682 			int senlen;
24683 
24684 			sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
24685 			senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
24686 			    ssc->ssc_uscsi_cmd->uscsi_rqresid);
24687 
24688 			if (senlen > 0 &&
24689 			    scsi_sense_key(sensep) == KEY_ILLEGAL_REQUEST) {
24690 				sd_ssc_assessment(ssc,
24691 				    SD_FMT_IGNORE_COMPROMISE);
24692 			} else {
24693 				sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24694 			}
24695 		} else {
24696 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24697 		}
24698 
24699 		SD_ERROR(SD_LOG_COMMON, un,
24700 		    "sd_log_page_supported: failed log page retrieval\n");
24701 		kmem_free(log_page_data, 0xFF);
24702 		return (-1);
24703 	}
24704 
24705 	log_size = log_page_data[3];
24706 
24707 	/*
24708 	 * The list of supported log pages start from the fourth byte. Check
24709 	 * until we run out of log pages or a match is found.
24710 	 */
24711 	for (i = 4; (i < (log_size + 4)) && !match; i++) {
24712 		if (log_page_data[i] == log_page) {
24713 			match++;
24714 		}
24715 	}
24716 	kmem_free(log_page_data, 0xFF);
24717 	return (match);
24718 }
24719 
24720 
24721 /*
24722  *    Function: sd_mhdioc_failfast
24723  *
24724  * Description: This routine is the driver entry point for handling ioctl
24725  *		requests to enable/disable the multihost failfast option.
24726  *		(MHIOCENFAILFAST)
24727  *
24728  *   Arguments: dev	- the device number
24729  *		arg	- user specified probing interval.
24730  *		flag	- this argument is a pass through to ddi_copyxxx()
24731  *			  directly from the mode argument of ioctl().
24732  *
24733  * Return Code: 0
24734  *		EFAULT
24735  *		ENXIO
24736  */
24737 
24738 static int
24739 sd_mhdioc_failfast(dev_t dev, caddr_t arg, int flag)
24740 {
24741 	struct sd_lun	*un = NULL;
24742 	int		mh_time;
24743 	int		rval = 0;
24744 
24745 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24746 		return (ENXIO);
24747 	}
24748 
24749 	if (ddi_copyin((void *)arg, &mh_time, sizeof (int), flag))
24750 		return (EFAULT);
24751 
24752 	if (mh_time) {
24753 		mutex_enter(SD_MUTEX(un));
24754 		un->un_resvd_status |= SD_FAILFAST;
24755 		mutex_exit(SD_MUTEX(un));
24756 		/*
24757 		 * If mh_time is INT_MAX, then this ioctl is being used for
24758 		 * SCSI-3 PGR purposes, and we don't need to spawn watch thread.
24759 		 */
24760 		if (mh_time != INT_MAX) {
24761 			rval = sd_check_mhd(dev, mh_time);
24762 		}
24763 	} else {
24764 		(void) sd_check_mhd(dev, 0);
24765 		mutex_enter(SD_MUTEX(un));
24766 		un->un_resvd_status &= ~SD_FAILFAST;
24767 		mutex_exit(SD_MUTEX(un));
24768 	}
24769 	return (rval);
24770 }
24771 
24772 
24773 /*
24774  *    Function: sd_mhdioc_takeown
24775  *
24776  * Description: This routine is the driver entry point for handling ioctl
24777  *		requests to forcefully acquire exclusive access rights to the
24778  *		multihost disk (MHIOCTKOWN).
24779  *
24780  *   Arguments: dev	- the device number
24781  *		arg	- user provided structure specifying the delay
24782  *			  parameters in milliseconds
24783  *		flag	- this argument is a pass through to ddi_copyxxx()
24784  *			  directly from the mode argument of ioctl().
24785  *
24786  * Return Code: 0
24787  *		EFAULT
24788  *		ENXIO
24789  */
24790 
24791 static int
24792 sd_mhdioc_takeown(dev_t dev, caddr_t arg, int flag)
24793 {
24794 	struct sd_lun		*un = NULL;
24795 	struct mhioctkown	*tkown = NULL;
24796 	int			rval = 0;
24797 
24798 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24799 		return (ENXIO);
24800 	}
24801 
24802 	if (arg != NULL) {
24803 		tkown = (struct mhioctkown *)
24804 		    kmem_zalloc(sizeof (struct mhioctkown), KM_SLEEP);
24805 		rval = ddi_copyin(arg, tkown, sizeof (struct mhioctkown), flag);
24806 		if (rval != 0) {
24807 			rval = EFAULT;
24808 			goto error;
24809 		}
24810 	}
24811 
24812 	rval = sd_take_ownership(dev, tkown);
24813 	mutex_enter(SD_MUTEX(un));
24814 	if (rval == 0) {
24815 		un->un_resvd_status |= SD_RESERVE;
24816 		if (tkown != NULL && tkown->reinstate_resv_delay != 0) {
24817 			sd_reinstate_resv_delay =
24818 			    tkown->reinstate_resv_delay * 1000;
24819 		} else {
24820 			sd_reinstate_resv_delay = SD_REINSTATE_RESV_DELAY;
24821 		}
24822 		/*
24823 		 * Give the scsi_watch routine interval set by
24824 		 * the MHIOCENFAILFAST ioctl precedence here.
24825 		 */
24826 		if ((un->un_resvd_status & SD_FAILFAST) == 0) {
24827 			mutex_exit(SD_MUTEX(un));
24828 			(void) sd_check_mhd(dev,
24829 			    sd_reinstate_resv_delay / 1000);
24830 			SD_TRACE(SD_LOG_IOCTL_MHD, un,
24831 			    "sd_mhdioc_takeown : %d\n",
24832 			    sd_reinstate_resv_delay);
24833 		} else {
24834 			mutex_exit(SD_MUTEX(un));
24835 		}
24836 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_NOTIFY,
24837 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24838 	} else {
24839 		un->un_resvd_status &= ~SD_RESERVE;
24840 		mutex_exit(SD_MUTEX(un));
24841 	}
24842 
24843 error:
24844 	if (tkown != NULL) {
24845 		kmem_free(tkown, sizeof (struct mhioctkown));
24846 	}
24847 	return (rval);
24848 }
24849 
24850 
24851 /*
24852  *    Function: sd_mhdioc_release
24853  *
24854  * Description: This routine is the driver entry point for handling ioctl
24855  *		requests to release exclusive access rights to the multihost
24856  *		disk (MHIOCRELEASE).
24857  *
24858  *   Arguments: dev	- the device number
24859  *
24860  * Return Code: 0
24861  *		ENXIO
24862  */
24863 
24864 static int
24865 sd_mhdioc_release(dev_t dev)
24866 {
24867 	struct sd_lun		*un = NULL;
24868 	timeout_id_t		resvd_timeid_save;
24869 	int			resvd_status_save;
24870 	int			rval = 0;
24871 
24872 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24873 		return (ENXIO);
24874 	}
24875 
24876 	mutex_enter(SD_MUTEX(un));
24877 	resvd_status_save = un->un_resvd_status;
24878 	un->un_resvd_status &=
24879 	    ~(SD_RESERVE | SD_LOST_RESERVE | SD_WANT_RESERVE);
24880 	if (un->un_resvd_timeid) {
24881 		resvd_timeid_save = un->un_resvd_timeid;
24882 		un->un_resvd_timeid = NULL;
24883 		mutex_exit(SD_MUTEX(un));
24884 		(void) untimeout(resvd_timeid_save);
24885 	} else {
24886 		mutex_exit(SD_MUTEX(un));
24887 	}
24888 
24889 	/*
24890 	 * destroy any pending timeout thread that may be attempting to
24891 	 * reinstate reservation on this device.
24892 	 */
24893 	sd_rmv_resv_reclaim_req(dev);
24894 
24895 	if ((rval = sd_reserve_release(dev, SD_RELEASE)) == 0) {
24896 		mutex_enter(SD_MUTEX(un));
24897 		if ((un->un_mhd_token) &&
24898 		    ((un->un_resvd_status & SD_FAILFAST) == 0)) {
24899 			mutex_exit(SD_MUTEX(un));
24900 			(void) sd_check_mhd(dev, 0);
24901 		} else {
24902 			mutex_exit(SD_MUTEX(un));
24903 		}
24904 		(void) scsi_reset_notify(SD_ADDRESS(un), SCSI_RESET_CANCEL,
24905 		    sd_mhd_reset_notify_cb, (caddr_t)un);
24906 	} else {
24907 		/*
24908 		 * sd_mhd_watch_cb will restart the resvd recover timeout thread
24909 		 */
24910 		mutex_enter(SD_MUTEX(un));
24911 		un->un_resvd_status = resvd_status_save;
24912 		mutex_exit(SD_MUTEX(un));
24913 	}
24914 	return (rval);
24915 }
24916 
24917 
24918 /*
24919  *    Function: sd_mhdioc_register_devid
24920  *
24921  * Description: This routine is the driver entry point for handling ioctl
24922  *		requests to register the device id (MHIOCREREGISTERDEVID).
24923  *
24924  *		Note: The implementation for this ioctl has been updated to
24925  *		be consistent with the original PSARC case (1999/357)
24926  *		(4375899, 4241671, 4220005)
24927  *
24928  *   Arguments: dev	- the device number
24929  *
24930  * Return Code: 0
24931  *		ENXIO
24932  */
24933 
24934 static int
24935 sd_mhdioc_register_devid(dev_t dev)
24936 {
24937 	struct sd_lun	*un = NULL;
24938 	int		rval = 0;
24939 	sd_ssc_t	*ssc;
24940 
24941 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
24942 		return (ENXIO);
24943 	}
24944 
24945 	ASSERT(!mutex_owned(SD_MUTEX(un)));
24946 
24947 	mutex_enter(SD_MUTEX(un));
24948 
24949 	/* If a devid already exists, de-register it */
24950 	if (un->un_devid != NULL) {
24951 		ddi_devid_unregister(SD_DEVINFO(un));
24952 		/*
24953 		 * After unregister devid, needs to free devid memory
24954 		 */
24955 		ddi_devid_free(un->un_devid);
24956 		un->un_devid = NULL;
24957 	}
24958 
24959 	/* Check for reservation conflict */
24960 	mutex_exit(SD_MUTEX(un));
24961 	ssc = sd_ssc_init(un);
24962 	rval = sd_send_scsi_TEST_UNIT_READY(ssc, 0);
24963 	mutex_enter(SD_MUTEX(un));
24964 
24965 	switch (rval) {
24966 	case 0:
24967 		sd_register_devid(ssc, SD_DEVINFO(un), SD_TARGET_IS_UNRESERVED);
24968 		break;
24969 	case EACCES:
24970 		break;
24971 	default:
24972 		rval = EIO;
24973 	}
24974 
24975 	mutex_exit(SD_MUTEX(un));
24976 	if (rval != 0) {
24977 		if (rval == EIO)
24978 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
24979 		else
24980 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
24981 	}
24982 	sd_ssc_fini(ssc);
24983 	return (rval);
24984 }
24985 
24986 
24987 /*
24988  *    Function: sd_mhdioc_inkeys
24989  *
24990  * Description: This routine is the driver entry point for handling ioctl
24991  *		requests to issue the SCSI-3 Persistent In Read Keys command
24992  *		to the device (MHIOCGRP_INKEYS).
24993  *
24994  *   Arguments: dev	- the device number
24995  *		arg	- user provided in_keys structure
24996  *		flag	- this argument is a pass through to ddi_copyxxx()
24997  *			  directly from the mode argument of ioctl().
24998  *
24999  * Return Code: code returned by sd_persistent_reservation_in_read_keys()
25000  *		ENXIO
25001  *		EFAULT
25002  */
25003 
25004 static int
25005 sd_mhdioc_inkeys(dev_t dev, caddr_t arg, int flag)
25006 {
25007 	struct sd_lun		*un;
25008 	mhioc_inkeys_t		inkeys;
25009 	int			rval = 0;
25010 
25011 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25012 		return (ENXIO);
25013 	}
25014 
25015 #ifdef _MULTI_DATAMODEL
25016 	switch (ddi_model_convert_from(flag & FMODELS)) {
25017 	case DDI_MODEL_ILP32: {
25018 		struct mhioc_inkeys32	inkeys32;
25019 
25020 		if (ddi_copyin(arg, &inkeys32,
25021 		    sizeof (struct mhioc_inkeys32), flag) != 0) {
25022 			return (EFAULT);
25023 		}
25024 		inkeys.li = (mhioc_key_list_t *)(uintptr_t)inkeys32.li;
25025 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25026 		    &inkeys, flag)) != 0) {
25027 			return (rval);
25028 		}
25029 		inkeys32.generation = inkeys.generation;
25030 		if (ddi_copyout(&inkeys32, arg, sizeof (struct mhioc_inkeys32),
25031 		    flag) != 0) {
25032 			return (EFAULT);
25033 		}
25034 		break;
25035 	}
25036 	case DDI_MODEL_NONE:
25037 		if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t),
25038 		    flag) != 0) {
25039 			return (EFAULT);
25040 		}
25041 		if ((rval = sd_persistent_reservation_in_read_keys(un,
25042 		    &inkeys, flag)) != 0) {
25043 			return (rval);
25044 		}
25045 		if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t),
25046 		    flag) != 0) {
25047 			return (EFAULT);
25048 		}
25049 		break;
25050 	}
25051 
25052 #else /* ! _MULTI_DATAMODEL */
25053 
25054 	if (ddi_copyin(arg, &inkeys, sizeof (mhioc_inkeys_t), flag) != 0) {
25055 		return (EFAULT);
25056 	}
25057 	rval = sd_persistent_reservation_in_read_keys(un, &inkeys, flag);
25058 	if (rval != 0) {
25059 		return (rval);
25060 	}
25061 	if (ddi_copyout(&inkeys, arg, sizeof (mhioc_inkeys_t), flag) != 0) {
25062 		return (EFAULT);
25063 	}
25064 
25065 #endif /* _MULTI_DATAMODEL */
25066 
25067 	return (rval);
25068 }
25069 
25070 
25071 /*
25072  *    Function: sd_mhdioc_inresv
25073  *
25074  * Description: This routine is the driver entry point for handling ioctl
25075  *		requests to issue the SCSI-3 Persistent In Read Reservations
25076  *		command to the device (MHIOCGRP_INKEYS).
25077  *
25078  *   Arguments: dev	- the device number
25079  *		arg	- user provided in_resv structure
25080  *		flag	- this argument is a pass through to ddi_copyxxx()
25081  *			  directly from the mode argument of ioctl().
25082  *
25083  * Return Code: code returned by sd_persistent_reservation_in_read_resv()
25084  *		ENXIO
25085  *		EFAULT
25086  */
25087 
25088 static int
25089 sd_mhdioc_inresv(dev_t dev, caddr_t arg, int flag)
25090 {
25091 	struct sd_lun		*un;
25092 	mhioc_inresvs_t		inresvs;
25093 	int			rval = 0;
25094 
25095 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25096 		return (ENXIO);
25097 	}
25098 
25099 #ifdef _MULTI_DATAMODEL
25100 
25101 	switch (ddi_model_convert_from(flag & FMODELS)) {
25102 	case DDI_MODEL_ILP32: {
25103 		struct mhioc_inresvs32	inresvs32;
25104 
25105 		if (ddi_copyin(arg, &inresvs32,
25106 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25107 			return (EFAULT);
25108 		}
25109 		inresvs.li = (mhioc_resv_desc_list_t *)(uintptr_t)inresvs32.li;
25110 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25111 		    &inresvs, flag)) != 0) {
25112 			return (rval);
25113 		}
25114 		inresvs32.generation = inresvs.generation;
25115 		if (ddi_copyout(&inresvs32, arg,
25116 		    sizeof (struct mhioc_inresvs32), flag) != 0) {
25117 			return (EFAULT);
25118 		}
25119 		break;
25120 	}
25121 	case DDI_MODEL_NONE:
25122 		if (ddi_copyin(arg, &inresvs,
25123 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25124 			return (EFAULT);
25125 		}
25126 		if ((rval = sd_persistent_reservation_in_read_resv(un,
25127 		    &inresvs, flag)) != 0) {
25128 			return (rval);
25129 		}
25130 		if (ddi_copyout(&inresvs, arg,
25131 		    sizeof (mhioc_inresvs_t), flag) != 0) {
25132 			return (EFAULT);
25133 		}
25134 		break;
25135 	}
25136 
25137 #else /* ! _MULTI_DATAMODEL */
25138 
25139 	if (ddi_copyin(arg, &inresvs, sizeof (mhioc_inresvs_t), flag) != 0) {
25140 		return (EFAULT);
25141 	}
25142 	rval = sd_persistent_reservation_in_read_resv(un, &inresvs, flag);
25143 	if (rval != 0) {
25144 		return (rval);
25145 	}
25146 	if (ddi_copyout(&inresvs, arg, sizeof (mhioc_inresvs_t), flag)) {
25147 		return (EFAULT);
25148 	}
25149 
25150 #endif /* ! _MULTI_DATAMODEL */
25151 
25152 	return (rval);
25153 }
25154 
25155 
25156 /*
25157  * The following routines support the clustering functionality described below
25158  * and implement lost reservation reclaim functionality.
25159  *
25160  * Clustering
25161  * ----------
25162  * The clustering code uses two different, independent forms of SCSI
25163  * reservation. Traditional SCSI-2 Reserve/Release and the newer SCSI-3
25164  * Persistent Group Reservations. For any particular disk, it will use either
25165  * SCSI-2 or SCSI-3 PGR but never both at the same time for the same disk.
25166  *
25167  * SCSI-2
25168  * The cluster software takes ownership of a multi-hosted disk by issuing the
25169  * MHIOCTKOWN ioctl to the disk driver. It releases ownership by issuing the
25170  * MHIOCRELEASE ioctl.  Closely related is the MHIOCENFAILFAST ioctl -- a
25171  * cluster, just after taking ownership of the disk with the MHIOCTKOWN ioctl
25172  * then issues the MHIOCENFAILFAST ioctl.  This ioctl "enables failfast" in the
25173  * driver. The meaning of failfast is that if the driver (on this host) ever
25174  * encounters the scsi error return code RESERVATION_CONFLICT from the device,
25175  * it should immediately panic the host. The motivation for this ioctl is that
25176  * if this host does encounter reservation conflict, the underlying cause is
25177  * that some other host of the cluster has decided that this host is no longer
25178  * in the cluster and has seized control of the disks for itself. Since this
25179  * host is no longer in the cluster, it ought to panic itself. The
25180  * MHIOCENFAILFAST ioctl does two things:
25181  *	(a) it sets a flag that will cause any returned RESERVATION_CONFLICT
25182  *      error to panic the host
25183  *      (b) it sets up a periodic timer to test whether this host still has
25184  *      "access" (in that no other host has reserved the device):  if the
25185  *      periodic timer gets RESERVATION_CONFLICT, the host is panicked. The
25186  *      purpose of that periodic timer is to handle scenarios where the host is
25187  *      otherwise temporarily quiescent, temporarily doing no real i/o.
25188  * The MHIOCTKOWN ioctl will "break" a reservation that is held by another host,
25189  * by issuing a SCSI Bus Device Reset.  It will then issue a SCSI Reserve for
25190  * the device itself.
25191  *
25192  * SCSI-3 PGR
25193  * A direct semantic implementation of the SCSI-3 Persistent Reservation
25194  * facility is supported through the shared multihost disk ioctls
25195  * (MHIOCGRP_INKEYS, MHIOCGRP_INRESV, MHIOCGRP_REGISTER, MHIOCGRP_RESERVE,
25196  * MHIOCGRP_PREEMPTANDABORT, MHIOCGRP_CLEAR)
25197  *
25198  * Reservation Reclaim:
25199  * --------------------
25200  * To support the lost reservation reclaim operations this driver creates a
25201  * single thread to handle reinstating reservations on all devices that have
25202  * lost reservations sd_resv_reclaim_requests are logged for all devices that
25203  * have LOST RESERVATIONS when the scsi watch facility callsback sd_mhd_watch_cb
25204  * and the reservation reclaim thread loops through the requests to regain the
25205  * lost reservations.
25206  */
25207 
25208 /*
25209  *    Function: sd_check_mhd()
25210  *
25211  * Description: This function sets up and submits a scsi watch request or
25212  *		terminates an existing watch request. This routine is used in
25213  *		support of reservation reclaim.
25214  *
25215  *   Arguments: dev    - the device 'dev_t' is used for context to discriminate
25216  *			 among multiple watches that share the callback function
25217  *		interval - the number of microseconds specifying the watch
25218  *			   interval for issuing TEST UNIT READY commands. If
25219  *			   set to 0 the watch should be terminated. If the
25220  *			   interval is set to 0 and if the device is required
25221  *			   to hold reservation while disabling failfast, the
25222  *			   watch is restarted with an interval of
25223  *			   reinstate_resv_delay.
25224  *
25225  * Return Code: 0	   - Successful submit/terminate of scsi watch request
25226  *		ENXIO      - Indicates an invalid device was specified
25227  *		EAGAIN     - Unable to submit the scsi watch request
25228  */
25229 
25230 static int
25231 sd_check_mhd(dev_t dev, int interval)
25232 {
25233 	struct sd_lun	*un;
25234 	opaque_t	token;
25235 
25236 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25237 		return (ENXIO);
25238 	}
25239 
25240 	/* is this a watch termination request? */
25241 	if (interval == 0) {
25242 		mutex_enter(SD_MUTEX(un));
25243 		/* if there is an existing watch task then terminate it */
25244 		if (un->un_mhd_token) {
25245 			token = un->un_mhd_token;
25246 			un->un_mhd_token = NULL;
25247 			mutex_exit(SD_MUTEX(un));
25248 			(void) scsi_watch_request_terminate(token,
25249 			    SCSI_WATCH_TERMINATE_ALL_WAIT);
25250 			mutex_enter(SD_MUTEX(un));
25251 		} else {
25252 			mutex_exit(SD_MUTEX(un));
25253 			/*
25254 			 * Note: If we return here we don't check for the
25255 			 * failfast case. This is the original legacy
25256 			 * implementation but perhaps we should be checking
25257 			 * the failfast case.
25258 			 */
25259 			return (0);
25260 		}
25261 		/*
25262 		 * If the device is required to hold reservation while
25263 		 * disabling failfast, we need to restart the scsi_watch
25264 		 * routine with an interval of reinstate_resv_delay.
25265 		 */
25266 		if (un->un_resvd_status & SD_RESERVE) {
25267 			interval = sd_reinstate_resv_delay / 1000;
25268 		} else {
25269 			/* no failfast so bail */
25270 			mutex_exit(SD_MUTEX(un));
25271 			return (0);
25272 		}
25273 		mutex_exit(SD_MUTEX(un));
25274 	}
25275 
25276 	/*
25277 	 * adjust minimum time interval to 1 second,
25278 	 * and convert from msecs to usecs
25279 	 */
25280 	if (interval > 0 && interval < 1000) {
25281 		interval = 1000;
25282 	}
25283 	interval *= 1000;
25284 
25285 	/*
25286 	 * submit the request to the scsi_watch service
25287 	 */
25288 	token = scsi_watch_request_submit(SD_SCSI_DEVP(un), interval,
25289 	    SENSE_LENGTH, sd_mhd_watch_cb, (caddr_t)dev);
25290 	if (token == NULL) {
25291 		return (EAGAIN);
25292 	}
25293 
25294 	/*
25295 	 * save token for termination later on
25296 	 */
25297 	mutex_enter(SD_MUTEX(un));
25298 	un->un_mhd_token = token;
25299 	mutex_exit(SD_MUTEX(un));
25300 	return (0);
25301 }
25302 
25303 
25304 /*
25305  *    Function: sd_mhd_watch_cb()
25306  *
25307  * Description: This function is the call back function used by the scsi watch
25308  *		facility. The scsi watch facility sends the "Test Unit Ready"
25309  *		and processes the status. If applicable (i.e. a "Unit Attention"
25310  *		status and automatic "Request Sense" not used) the scsi watch
25311  *		facility will send a "Request Sense" and retrieve the sense data
25312  *		to be passed to this callback function. In either case the
25313  *		automatic "Request Sense" or the facility submitting one, this
25314  *		callback is passed the status and sense data.
25315  *
25316  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25317  *			among multiple watches that share this callback function
25318  *		resultp - scsi watch facility result packet containing scsi
25319  *			  packet, status byte and sense data
25320  *
25321  * Return Code: 0 - continue the watch task
25322  *		non-zero - terminate the watch task
25323  */
25324 
25325 static int
25326 sd_mhd_watch_cb(caddr_t arg, struct scsi_watch_result *resultp)
25327 {
25328 	struct sd_lun			*un;
25329 	struct scsi_status		*statusp;
25330 	uint8_t				*sensep;
25331 	struct scsi_pkt			*pkt;
25332 	uchar_t				actual_sense_length;
25333 	dev_t				dev = (dev_t)arg;
25334 
25335 	ASSERT(resultp != NULL);
25336 	statusp			= resultp->statusp;
25337 	sensep			= (uint8_t *)resultp->sensep;
25338 	pkt			= resultp->pkt;
25339 	actual_sense_length	= resultp->actual_sense_length;
25340 
25341 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25342 		return (ENXIO);
25343 	}
25344 
25345 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
25346 	    "sd_mhd_watch_cb: reason '%s', status '%s'\n",
25347 	    scsi_rname(pkt->pkt_reason), sd_sname(*((unsigned char *)statusp)));
25348 
25349 	/* Begin processing of the status and/or sense data */
25350 	if (pkt->pkt_reason != CMD_CMPLT) {
25351 		/* Handle the incomplete packet */
25352 		sd_mhd_watch_incomplete(un, pkt);
25353 		return (0);
25354 	} else if (*((unsigned char *)statusp) != STATUS_GOOD) {
25355 		if (*((unsigned char *)statusp)
25356 		    == STATUS_RESERVATION_CONFLICT) {
25357 			/*
25358 			 * Handle a reservation conflict by panicking if
25359 			 * configured for failfast or by logging the conflict
25360 			 * and updating the reservation status
25361 			 */
25362 			mutex_enter(SD_MUTEX(un));
25363 			if ((un->un_resvd_status & SD_FAILFAST) &&
25364 			    (sd_failfast_enable)) {
25365 				sd_panic_for_res_conflict(un);
25366 				/*NOTREACHED*/
25367 			}
25368 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25369 			    "sd_mhd_watch_cb: Reservation Conflict\n");
25370 			un->un_resvd_status |= SD_RESERVATION_CONFLICT;
25371 			mutex_exit(SD_MUTEX(un));
25372 		}
25373 	}
25374 
25375 	if (sensep != NULL) {
25376 		if (actual_sense_length >= (SENSE_LENGTH - 2)) {
25377 			mutex_enter(SD_MUTEX(un));
25378 			if ((scsi_sense_asc(sensep) ==
25379 			    SD_SCSI_RESET_SENSE_CODE) &&
25380 			    (un->un_resvd_status & SD_RESERVE)) {
25381 				/*
25382 				 * The additional sense code indicates a power
25383 				 * on or bus device reset has occurred; update
25384 				 * the reservation status.
25385 				 */
25386 				un->un_resvd_status |=
25387 				    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25388 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25389 				    "sd_mhd_watch_cb: Lost Reservation\n");
25390 			}
25391 		} else {
25392 			return (0);
25393 		}
25394 	} else {
25395 		mutex_enter(SD_MUTEX(un));
25396 	}
25397 
25398 	if ((un->un_resvd_status & SD_RESERVE) &&
25399 	    (un->un_resvd_status & SD_LOST_RESERVE)) {
25400 		if (un->un_resvd_status & SD_WANT_RESERVE) {
25401 			/*
25402 			 * A reset occurred in between the last probe and this
25403 			 * one so if a timeout is pending cancel it.
25404 			 */
25405 			if (un->un_resvd_timeid) {
25406 				timeout_id_t temp_id = un->un_resvd_timeid;
25407 				un->un_resvd_timeid = NULL;
25408 				mutex_exit(SD_MUTEX(un));
25409 				(void) untimeout(temp_id);
25410 				mutex_enter(SD_MUTEX(un));
25411 			}
25412 			un->un_resvd_status &= ~SD_WANT_RESERVE;
25413 		}
25414 		if (un->un_resvd_timeid == 0) {
25415 			/* Schedule a timeout to handle the lost reservation */
25416 			un->un_resvd_timeid = timeout(sd_mhd_resvd_recover,
25417 			    (void *)dev,
25418 			    drv_usectohz(sd_reinstate_resv_delay));
25419 		}
25420 	}
25421 	mutex_exit(SD_MUTEX(un));
25422 	return (0);
25423 }
25424 
25425 
25426 /*
25427  *    Function: sd_mhd_watch_incomplete()
25428  *
25429  * Description: This function is used to find out why a scsi pkt sent by the
25430  *		scsi watch facility was not completed. Under some scenarios this
25431  *		routine will return. Otherwise it will send a bus reset to see
25432  *		if the drive is still online.
25433  *
25434  *   Arguments: un  - driver soft state (unit) structure
25435  *		pkt - incomplete scsi pkt
25436  */
25437 
25438 static void
25439 sd_mhd_watch_incomplete(struct sd_lun *un, struct scsi_pkt *pkt)
25440 {
25441 	int	be_chatty;
25442 	int	perr;
25443 
25444 	ASSERT(pkt != NULL);
25445 	ASSERT(un != NULL);
25446 	be_chatty	= (!(pkt->pkt_flags & FLAG_SILENT));
25447 	perr		= (pkt->pkt_statistics & STAT_PERR);
25448 
25449 	mutex_enter(SD_MUTEX(un));
25450 	if (un->un_state == SD_STATE_DUMPING) {
25451 		mutex_exit(SD_MUTEX(un));
25452 		return;
25453 	}
25454 
25455 	switch (pkt->pkt_reason) {
25456 	case CMD_UNX_BUS_FREE:
25457 		/*
25458 		 * If we had a parity error that caused the target to drop BSY*,
25459 		 * don't be chatty about it.
25460 		 */
25461 		if (perr && be_chatty) {
25462 			be_chatty = 0;
25463 		}
25464 		break;
25465 	case CMD_TAG_REJECT:
25466 		/*
25467 		 * The SCSI-2 spec states that a tag reject will be sent by the
25468 		 * target if tagged queuing is not supported. A tag reject may
25469 		 * also be sent during certain initialization periods or to
25470 		 * control internal resources. For the latter case the target
25471 		 * may also return Queue Full.
25472 		 *
25473 		 * If this driver receives a tag reject from a target that is
25474 		 * going through an init period or controlling internal
25475 		 * resources tagged queuing will be disabled. This is a less
25476 		 * than optimal behavior but the driver is unable to determine
25477 		 * the target state and assumes tagged queueing is not supported
25478 		 */
25479 		pkt->pkt_flags = 0;
25480 		un->un_tagflags = 0;
25481 
25482 		if (un->un_f_opt_queueing == TRUE) {
25483 			un->un_throttle = min(un->un_throttle, 3);
25484 		} else {
25485 			un->un_throttle = 1;
25486 		}
25487 		mutex_exit(SD_MUTEX(un));
25488 		(void) scsi_ifsetcap(SD_ADDRESS(un), "tagged-qing", 0, 1);
25489 		mutex_enter(SD_MUTEX(un));
25490 		break;
25491 	case CMD_INCOMPLETE:
25492 		/*
25493 		 * The transport stopped with an abnormal state, fallthrough and
25494 		 * reset the target and/or bus unless selection did not complete
25495 		 * (indicated by STATE_GOT_BUS) in which case we don't want to
25496 		 * go through a target/bus reset
25497 		 */
25498 		if (pkt->pkt_state == STATE_GOT_BUS) {
25499 			break;
25500 		}
25501 		/*FALLTHROUGH*/
25502 
25503 	case CMD_TIMEOUT:
25504 	default:
25505 		/*
25506 		 * The lun may still be running the command, so a lun reset
25507 		 * should be attempted. If the lun reset fails or cannot be
25508 		 * issued, than try a target reset. Lastly try a bus reset.
25509 		 */
25510 		if ((pkt->pkt_statistics &
25511 		    (STAT_BUS_RESET | STAT_DEV_RESET | STAT_ABORTED)) == 0) {
25512 			int reset_retval = 0;
25513 			mutex_exit(SD_MUTEX(un));
25514 			if (un->un_f_allow_bus_device_reset == TRUE) {
25515 				if (un->un_f_lun_reset_enabled == TRUE) {
25516 					reset_retval =
25517 					    scsi_reset(SD_ADDRESS(un),
25518 					    RESET_LUN);
25519 				}
25520 				if (reset_retval == 0) {
25521 					reset_retval =
25522 					    scsi_reset(SD_ADDRESS(un),
25523 					    RESET_TARGET);
25524 				}
25525 			}
25526 			if (reset_retval == 0) {
25527 				(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
25528 			}
25529 			mutex_enter(SD_MUTEX(un));
25530 		}
25531 		break;
25532 	}
25533 
25534 	/* A device/bus reset has occurred; update the reservation status. */
25535 	if ((pkt->pkt_reason == CMD_RESET) || (pkt->pkt_statistics &
25536 	    (STAT_BUS_RESET | STAT_DEV_RESET))) {
25537 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25538 			un->un_resvd_status |=
25539 			    (SD_LOST_RESERVE | SD_WANT_RESERVE);
25540 			SD_INFO(SD_LOG_IOCTL_MHD, un,
25541 			    "sd_mhd_watch_incomplete: Lost Reservation\n");
25542 		}
25543 	}
25544 
25545 	/*
25546 	 * The disk has been turned off; Update the device state.
25547 	 *
25548 	 * Note: Should we be offlining the disk here?
25549 	 */
25550 	if (pkt->pkt_state == STATE_GOT_BUS) {
25551 		SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_watch_incomplete: "
25552 		    "Disk not responding to selection\n");
25553 		if (un->un_state != SD_STATE_OFFLINE) {
25554 			New_state(un, SD_STATE_OFFLINE);
25555 		}
25556 	} else if (be_chatty) {
25557 		/*
25558 		 * suppress messages if they are all the same pkt reason;
25559 		 * with TQ, many (up to 256) are returned with the same
25560 		 * pkt_reason
25561 		 */
25562 		if (pkt->pkt_reason != un->un_last_pkt_reason) {
25563 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
25564 			    "sd_mhd_watch_incomplete: "
25565 			    "SCSI transport failed: reason '%s'\n",
25566 			    scsi_rname(pkt->pkt_reason));
25567 		}
25568 	}
25569 	un->un_last_pkt_reason = pkt->pkt_reason;
25570 	mutex_exit(SD_MUTEX(un));
25571 }
25572 
25573 
25574 /*
25575  *    Function: sd_sname()
25576  *
25577  * Description: This is a simple little routine to return a string containing
25578  *		a printable description of command status byte for use in
25579  *		logging.
25580  *
25581  *   Arguments: status - pointer to a status byte
25582  *
25583  * Return Code: char * - string containing status description.
25584  */
25585 
25586 static char *
25587 sd_sname(uchar_t status)
25588 {
25589 	switch (status & STATUS_MASK) {
25590 	case STATUS_GOOD:
25591 		return ("good status");
25592 	case STATUS_CHECK:
25593 		return ("check condition");
25594 	case STATUS_MET:
25595 		return ("condition met");
25596 	case STATUS_BUSY:
25597 		return ("busy");
25598 	case STATUS_INTERMEDIATE:
25599 		return ("intermediate");
25600 	case STATUS_INTERMEDIATE_MET:
25601 		return ("intermediate - condition met");
25602 	case STATUS_RESERVATION_CONFLICT:
25603 		return ("reservation_conflict");
25604 	case STATUS_TERMINATED:
25605 		return ("command terminated");
25606 	case STATUS_QFULL:
25607 		return ("queue full");
25608 	default:
25609 		return ("<unknown status>");
25610 	}
25611 }
25612 
25613 
25614 /*
25615  *    Function: sd_mhd_resvd_recover()
25616  *
25617  * Description: This function adds a reservation entry to the
25618  *		sd_resv_reclaim_request list and signals the reservation
25619  *		reclaim thread that there is work pending. If the reservation
25620  *		reclaim thread has not been previously created this function
25621  *		will kick it off.
25622  *
25623  *   Arguments: arg -   the device 'dev_t' is used for context to discriminate
25624  *			among multiple watches that share this callback function
25625  *
25626  *     Context: This routine is called by timeout() and is run in interrupt
25627  *		context. It must not sleep or call other functions which may
25628  *		sleep.
25629  */
25630 
25631 static void
25632 sd_mhd_resvd_recover(void *arg)
25633 {
25634 	dev_t			dev = (dev_t)arg;
25635 	struct sd_lun		*un;
25636 	struct sd_thr_request	*sd_treq = NULL;
25637 	struct sd_thr_request	*sd_cur = NULL;
25638 	struct sd_thr_request	*sd_prev = NULL;
25639 	int			already_there = 0;
25640 
25641 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25642 		return;
25643 	}
25644 
25645 	mutex_enter(SD_MUTEX(un));
25646 	un->un_resvd_timeid = NULL;
25647 	if (un->un_resvd_status & SD_WANT_RESERVE) {
25648 		/*
25649 		 * There was a reset so don't issue the reserve, allow the
25650 		 * sd_mhd_watch_cb callback function to notice this and
25651 		 * reschedule the timeout for reservation.
25652 		 */
25653 		mutex_exit(SD_MUTEX(un));
25654 		return;
25655 	}
25656 	mutex_exit(SD_MUTEX(un));
25657 
25658 	/*
25659 	 * Add this device to the sd_resv_reclaim_request list and the
25660 	 * sd_resv_reclaim_thread should take care of the rest.
25661 	 *
25662 	 * Note: We can't sleep in this context so if the memory allocation
25663 	 * fails allow the sd_mhd_watch_cb callback function to notice this and
25664 	 * reschedule the timeout for reservation.  (4378460)
25665 	 */
25666 	sd_treq = (struct sd_thr_request *)
25667 	    kmem_zalloc(sizeof (struct sd_thr_request), KM_NOSLEEP);
25668 	if (sd_treq == NULL) {
25669 		return;
25670 	}
25671 
25672 	sd_treq->sd_thr_req_next = NULL;
25673 	sd_treq->dev = dev;
25674 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25675 	if (sd_tr.srq_thr_req_head == NULL) {
25676 		sd_tr.srq_thr_req_head = sd_treq;
25677 	} else {
25678 		sd_cur = sd_prev = sd_tr.srq_thr_req_head;
25679 		for (; sd_cur != NULL; sd_cur = sd_cur->sd_thr_req_next) {
25680 			if (sd_cur->dev == dev) {
25681 				/*
25682 				 * already in Queue so don't log
25683 				 * another request for the device
25684 				 */
25685 				already_there = 1;
25686 				break;
25687 			}
25688 			sd_prev = sd_cur;
25689 		}
25690 		if (!already_there) {
25691 			SD_INFO(SD_LOG_IOCTL_MHD, un, "sd_mhd_resvd_recover: "
25692 			    "logging request for %lx\n", dev);
25693 			sd_prev->sd_thr_req_next = sd_treq;
25694 		} else {
25695 			kmem_free(sd_treq, sizeof (struct sd_thr_request));
25696 		}
25697 	}
25698 
25699 	/*
25700 	 * Create a kernel thread to do the reservation reclaim and free up this
25701 	 * thread. We cannot block this thread while we go away to do the
25702 	 * reservation reclaim
25703 	 */
25704 	if (sd_tr.srq_resv_reclaim_thread == NULL)
25705 		sd_tr.srq_resv_reclaim_thread = thread_create(NULL, 0,
25706 		    sd_resv_reclaim_thread, NULL,
25707 		    0, &p0, TS_RUN, v.v_maxsyspri - 2);
25708 
25709 	/* Tell the reservation reclaim thread that it has work to do */
25710 	cv_signal(&sd_tr.srq_resv_reclaim_cv);
25711 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25712 }
25713 
25714 /*
25715  *    Function: sd_resv_reclaim_thread()
25716  *
25717  * Description: This function implements the reservation reclaim operations
25718  *
25719  *   Arguments: arg - the device 'dev_t' is used for context to discriminate
25720  *		      among multiple watches that share this callback function
25721  */
25722 
25723 static void
25724 sd_resv_reclaim_thread()
25725 {
25726 	struct sd_lun		*un;
25727 	struct sd_thr_request	*sd_mhreq;
25728 
25729 	/* Wait for work */
25730 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25731 	if (sd_tr.srq_thr_req_head == NULL) {
25732 		cv_wait(&sd_tr.srq_resv_reclaim_cv,
25733 		    &sd_tr.srq_resv_reclaim_mutex);
25734 	}
25735 
25736 	/* Loop while we have work */
25737 	while ((sd_tr.srq_thr_cur_req = sd_tr.srq_thr_req_head) != NULL) {
25738 		un = ddi_get_soft_state(sd_state,
25739 		    SDUNIT(sd_tr.srq_thr_cur_req->dev));
25740 		if (un == NULL) {
25741 			/*
25742 			 * softstate structure is NULL so just
25743 			 * dequeue the request and continue
25744 			 */
25745 			sd_tr.srq_thr_req_head =
25746 			    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25747 			kmem_free(sd_tr.srq_thr_cur_req,
25748 			    sizeof (struct sd_thr_request));
25749 			continue;
25750 		}
25751 
25752 		/* dequeue the request */
25753 		sd_mhreq = sd_tr.srq_thr_cur_req;
25754 		sd_tr.srq_thr_req_head =
25755 		    sd_tr.srq_thr_cur_req->sd_thr_req_next;
25756 		mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25757 
25758 		/*
25759 		 * Reclaim reservation only if SD_RESERVE is still set. There
25760 		 * may have been a call to MHIOCRELEASE before we got here.
25761 		 */
25762 		mutex_enter(SD_MUTEX(un));
25763 		if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25764 			/*
25765 			 * Note: The SD_LOST_RESERVE flag is cleared before
25766 			 * reclaiming the reservation. If this is done after the
25767 			 * call to sd_reserve_release a reservation loss in the
25768 			 * window between pkt completion of reserve cmd and
25769 			 * mutex_enter below may not be recognized
25770 			 */
25771 			un->un_resvd_status &= ~SD_LOST_RESERVE;
25772 			mutex_exit(SD_MUTEX(un));
25773 
25774 			if (sd_reserve_release(sd_mhreq->dev,
25775 			    SD_RESERVE) == 0) {
25776 				mutex_enter(SD_MUTEX(un));
25777 				un->un_resvd_status |= SD_RESERVE;
25778 				mutex_exit(SD_MUTEX(un));
25779 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25780 				    "sd_resv_reclaim_thread: "
25781 				    "Reservation Recovered\n");
25782 			} else {
25783 				mutex_enter(SD_MUTEX(un));
25784 				un->un_resvd_status |= SD_LOST_RESERVE;
25785 				mutex_exit(SD_MUTEX(un));
25786 				SD_INFO(SD_LOG_IOCTL_MHD, un,
25787 				    "sd_resv_reclaim_thread: Failed "
25788 				    "Reservation Recovery\n");
25789 			}
25790 		} else {
25791 			mutex_exit(SD_MUTEX(un));
25792 		}
25793 		mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25794 		ASSERT(sd_mhreq == sd_tr.srq_thr_cur_req);
25795 		kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25796 		sd_mhreq = sd_tr.srq_thr_cur_req = NULL;
25797 		/*
25798 		 * wakeup the destroy thread if anyone is waiting on
25799 		 * us to complete.
25800 		 */
25801 		cv_signal(&sd_tr.srq_inprocess_cv);
25802 		SD_TRACE(SD_LOG_IOCTL_MHD, un,
25803 		    "sd_resv_reclaim_thread: cv_signalling current request \n");
25804 	}
25805 
25806 	/*
25807 	 * cleanup the sd_tr structure now that this thread will not exist
25808 	 */
25809 	ASSERT(sd_tr.srq_thr_req_head == NULL);
25810 	ASSERT(sd_tr.srq_thr_cur_req == NULL);
25811 	sd_tr.srq_resv_reclaim_thread = NULL;
25812 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25813 	thread_exit();
25814 }
25815 
25816 
25817 /*
25818  *    Function: sd_rmv_resv_reclaim_req()
25819  *
25820  * Description: This function removes any pending reservation reclaim requests
25821  *		for the specified device.
25822  *
25823  *   Arguments: dev - the device 'dev_t'
25824  */
25825 
25826 static void
25827 sd_rmv_resv_reclaim_req(dev_t dev)
25828 {
25829 	struct sd_thr_request *sd_mhreq;
25830 	struct sd_thr_request *sd_prev;
25831 
25832 	/* Remove a reservation reclaim request from the list */
25833 	mutex_enter(&sd_tr.srq_resv_reclaim_mutex);
25834 	if (sd_tr.srq_thr_cur_req && sd_tr.srq_thr_cur_req->dev == dev) {
25835 		/*
25836 		 * We are attempting to reinstate reservation for
25837 		 * this device. We wait for sd_reserve_release()
25838 		 * to return before we return.
25839 		 */
25840 		cv_wait(&sd_tr.srq_inprocess_cv,
25841 		    &sd_tr.srq_resv_reclaim_mutex);
25842 	} else {
25843 		sd_prev = sd_mhreq = sd_tr.srq_thr_req_head;
25844 		if (sd_mhreq && sd_mhreq->dev == dev) {
25845 			sd_tr.srq_thr_req_head = sd_mhreq->sd_thr_req_next;
25846 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25847 			mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25848 			return;
25849 		}
25850 		for (; sd_mhreq != NULL; sd_mhreq = sd_mhreq->sd_thr_req_next) {
25851 			if (sd_mhreq && sd_mhreq->dev == dev) {
25852 				break;
25853 			}
25854 			sd_prev = sd_mhreq;
25855 		}
25856 		if (sd_mhreq != NULL) {
25857 			sd_prev->sd_thr_req_next = sd_mhreq->sd_thr_req_next;
25858 			kmem_free(sd_mhreq, sizeof (struct sd_thr_request));
25859 		}
25860 	}
25861 	mutex_exit(&sd_tr.srq_resv_reclaim_mutex);
25862 }
25863 
25864 
25865 /*
25866  *    Function: sd_mhd_reset_notify_cb()
25867  *
25868  * Description: This is a call back function for scsi_reset_notify. This
25869  *		function updates the softstate reserved status and logs the
25870  *		reset. The driver scsi watch facility callback function
25871  *		(sd_mhd_watch_cb) and reservation reclaim thread functionality
25872  *		will reclaim the reservation.
25873  *
25874  *   Arguments: arg  - driver soft state (unit) structure
25875  */
25876 
25877 static void
25878 sd_mhd_reset_notify_cb(caddr_t arg)
25879 {
25880 	struct sd_lun *un = (struct sd_lun *)arg;
25881 
25882 	mutex_enter(SD_MUTEX(un));
25883 	if ((un->un_resvd_status & SD_RESERVE) == SD_RESERVE) {
25884 		un->un_resvd_status |= (SD_LOST_RESERVE | SD_WANT_RESERVE);
25885 		SD_INFO(SD_LOG_IOCTL_MHD, un,
25886 		    "sd_mhd_reset_notify_cb: Lost Reservation\n");
25887 	}
25888 	mutex_exit(SD_MUTEX(un));
25889 }
25890 
25891 
25892 /*
25893  *    Function: sd_take_ownership()
25894  *
25895  * Description: This routine implements an algorithm to achieve a stable
25896  *		reservation on disks which don't implement priority reserve,
25897  *		and makes sure that other host lose re-reservation attempts.
25898  *		This algorithm contains of a loop that keeps issuing the RESERVE
25899  *		for some period of time (min_ownership_delay, default 6 seconds)
25900  *		During that loop, it looks to see if there has been a bus device
25901  *		reset or bus reset (both of which cause an existing reservation
25902  *		to be lost). If the reservation is lost issue RESERVE until a
25903  *		period of min_ownership_delay with no resets has gone by, or
25904  *		until max_ownership_delay has expired. This loop ensures that
25905  *		the host really did manage to reserve the device, in spite of
25906  *		resets. The looping for min_ownership_delay (default six
25907  *		seconds) is important to early generation clustering products,
25908  *		Solstice HA 1.x and Sun Cluster 2.x. Those products use an
25909  *		MHIOCENFAILFAST periodic timer of two seconds. By having
25910  *		MHIOCTKOWN issue Reserves in a loop for six seconds, and having
25911  *		MHIOCENFAILFAST poll every two seconds, the idea is that by the
25912  *		time the MHIOCTKOWN ioctl returns, the other host (if any) will
25913  *		have already noticed, via the MHIOCENFAILFAST polling, that it
25914  *		no longer "owns" the disk and will have panicked itself.  Thus,
25915  *		the host issuing the MHIOCTKOWN is assured (with timing
25916  *		dependencies) that by the time it actually starts to use the
25917  *		disk for real work, the old owner is no longer accessing it.
25918  *
25919  *		min_ownership_delay is the minimum amount of time for which the
25920  *		disk must be reserved continuously devoid of resets before the
25921  *		MHIOCTKOWN ioctl will return success.
25922  *
25923  *		max_ownership_delay indicates the amount of time by which the
25924  *		take ownership should succeed or timeout with an error.
25925  *
25926  *   Arguments: dev - the device 'dev_t'
25927  *		*p  - struct containing timing info.
25928  *
25929  * Return Code: 0 for success or error code
25930  */
25931 
25932 static int
25933 sd_take_ownership(dev_t dev, struct mhioctkown *p)
25934 {
25935 	struct sd_lun	*un;
25936 	int		rval;
25937 	int		err;
25938 	int		reservation_count   = 0;
25939 	int		min_ownership_delay =  6000000; /* in usec */
25940 	int		max_ownership_delay = 30000000; /* in usec */
25941 	clock_t		start_time;	/* starting time of this algorithm */
25942 	clock_t		end_time;	/* time limit for giving up */
25943 	clock_t		ownership_time;	/* time limit for stable ownership */
25944 	clock_t		current_time;
25945 	clock_t		previous_current_time;
25946 
25947 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
25948 		return (ENXIO);
25949 	}
25950 
25951 	/*
25952 	 * Attempt a device reservation. A priority reservation is requested.
25953 	 */
25954 	if ((rval = sd_reserve_release(dev, SD_PRIORITY_RESERVE))
25955 	    != SD_SUCCESS) {
25956 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
25957 		    "sd_take_ownership: return(1)=%d\n", rval);
25958 		return (rval);
25959 	}
25960 
25961 	/* Update the softstate reserved status to indicate the reservation */
25962 	mutex_enter(SD_MUTEX(un));
25963 	un->un_resvd_status |= SD_RESERVE;
25964 	un->un_resvd_status &=
25965 	    ~(SD_LOST_RESERVE | SD_WANT_RESERVE | SD_RESERVATION_CONFLICT);
25966 	mutex_exit(SD_MUTEX(un));
25967 
25968 	if (p != NULL) {
25969 		if (p->min_ownership_delay != 0) {
25970 			min_ownership_delay = p->min_ownership_delay * 1000;
25971 		}
25972 		if (p->max_ownership_delay != 0) {
25973 			max_ownership_delay = p->max_ownership_delay * 1000;
25974 		}
25975 	}
25976 	SD_INFO(SD_LOG_IOCTL_MHD, un,
25977 	    "sd_take_ownership: min, max delays: %d, %d\n",
25978 	    min_ownership_delay, max_ownership_delay);
25979 
25980 	start_time = ddi_get_lbolt();
25981 	current_time	= start_time;
25982 	ownership_time	= current_time + drv_usectohz(min_ownership_delay);
25983 	end_time	= start_time + drv_usectohz(max_ownership_delay);
25984 
25985 	while (current_time - end_time < 0) {
25986 		delay(drv_usectohz(500000));
25987 
25988 		if ((err = sd_reserve_release(dev, SD_RESERVE)) != 0) {
25989 			if ((sd_reserve_release(dev, SD_RESERVE)) != 0) {
25990 				mutex_enter(SD_MUTEX(un));
25991 				rval = (un->un_resvd_status &
25992 				    SD_RESERVATION_CONFLICT) ? EACCES : EIO;
25993 				mutex_exit(SD_MUTEX(un));
25994 				break;
25995 			}
25996 		}
25997 		previous_current_time = current_time;
25998 		current_time = ddi_get_lbolt();
25999 		mutex_enter(SD_MUTEX(un));
26000 		if (err || (un->un_resvd_status & SD_LOST_RESERVE)) {
26001 			ownership_time = ddi_get_lbolt() +
26002 			    drv_usectohz(min_ownership_delay);
26003 			reservation_count = 0;
26004 		} else {
26005 			reservation_count++;
26006 		}
26007 		un->un_resvd_status |= SD_RESERVE;
26008 		un->un_resvd_status &= ~(SD_LOST_RESERVE | SD_WANT_RESERVE);
26009 		mutex_exit(SD_MUTEX(un));
26010 
26011 		SD_INFO(SD_LOG_IOCTL_MHD, un,
26012 		    "sd_take_ownership: ticks for loop iteration=%ld, "
26013 		    "reservation=%s\n", (current_time - previous_current_time),
26014 		    reservation_count ? "ok" : "reclaimed");
26015 
26016 		if (current_time - ownership_time >= 0 &&
26017 		    reservation_count >= 4) {
26018 			rval = 0; /* Achieved a stable ownership */
26019 			break;
26020 		}
26021 		if (current_time - end_time >= 0) {
26022 			rval = EACCES; /* No ownership in max possible time */
26023 			break;
26024 		}
26025 	}
26026 	SD_TRACE(SD_LOG_IOCTL_MHD, un,
26027 	    "sd_take_ownership: return(2)=%d\n", rval);
26028 	return (rval);
26029 }
26030 
26031 
26032 /*
26033  *    Function: sd_reserve_release()
26034  *
26035  * Description: This function builds and sends scsi RESERVE, RELEASE, and
26036  *		PRIORITY RESERVE commands based on a user specified command type
26037  *
26038  *   Arguments: dev - the device 'dev_t'
26039  *		cmd - user specified command type; one of SD_PRIORITY_RESERVE,
26040  *		      SD_RESERVE, SD_RELEASE
26041  *
26042  * Return Code: 0 or Error Code
26043  */
26044 
26045 static int
26046 sd_reserve_release(dev_t dev, int cmd)
26047 {
26048 	struct uscsi_cmd	*com = NULL;
26049 	struct sd_lun		*un = NULL;
26050 	char			cdb[CDB_GROUP0];
26051 	int			rval;
26052 
26053 	ASSERT((cmd == SD_RELEASE) || (cmd == SD_RESERVE) ||
26054 	    (cmd == SD_PRIORITY_RESERVE));
26055 
26056 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
26057 		return (ENXIO);
26058 	}
26059 
26060 	/* instantiate and initialize the command and cdb */
26061 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
26062 	bzero(cdb, CDB_GROUP0);
26063 	com->uscsi_flags   = USCSI_SILENT;
26064 	com->uscsi_timeout = un->un_reserve_release_time;
26065 	com->uscsi_cdblen  = CDB_GROUP0;
26066 	com->uscsi_cdb	   = cdb;
26067 	if (cmd == SD_RELEASE) {
26068 		cdb[0] = SCMD_RELEASE;
26069 	} else {
26070 		cdb[0] = SCMD_RESERVE;
26071 	}
26072 
26073 	/* Send the command. */
26074 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26075 	    SD_PATH_STANDARD);
26076 
26077 	/*
26078 	 * "break" a reservation that is held by another host, by issuing a
26079 	 * reset if priority reserve is desired, and we could not get the
26080 	 * device.
26081 	 */
26082 	if ((cmd == SD_PRIORITY_RESERVE) &&
26083 	    (rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26084 		/*
26085 		 * First try to reset the LUN. If we cannot, then try a target
26086 		 * reset, followed by a bus reset if the target reset fails.
26087 		 */
26088 		int reset_retval = 0;
26089 		if (un->un_f_lun_reset_enabled == TRUE) {
26090 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_LUN);
26091 		}
26092 		if (reset_retval == 0) {
26093 			/* The LUN reset either failed or was not issued */
26094 			reset_retval = scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26095 		}
26096 		if ((reset_retval == 0) &&
26097 		    (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0)) {
26098 			rval = EIO;
26099 			kmem_free(com, sizeof (*com));
26100 			return (rval);
26101 		}
26102 
26103 		bzero(com, sizeof (struct uscsi_cmd));
26104 		com->uscsi_flags   = USCSI_SILENT;
26105 		com->uscsi_cdb	   = cdb;
26106 		com->uscsi_cdblen  = CDB_GROUP0;
26107 		com->uscsi_timeout = 5;
26108 
26109 		/*
26110 		 * Reissue the last reserve command, this time without request
26111 		 * sense.  Assume that it is just a regular reserve command.
26112 		 */
26113 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
26114 		    SD_PATH_STANDARD);
26115 	}
26116 
26117 	/* Return an error if still getting a reservation conflict. */
26118 	if ((rval != 0) && (com->uscsi_status == STATUS_RESERVATION_CONFLICT)) {
26119 		rval = EACCES;
26120 	}
26121 
26122 	kmem_free(com, sizeof (*com));
26123 	return (rval);
26124 }
26125 
26126 
26127 #define	SD_NDUMP_RETRIES	12
26128 /*
26129  *	System Crash Dump routine
26130  */
26131 
26132 static int
26133 sddump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk)
26134 {
26135 	int		instance;
26136 	int		partition;
26137 	int		i;
26138 	int		err;
26139 	struct sd_lun	*un;
26140 	struct scsi_pkt *wr_pktp;
26141 	struct buf	*wr_bp;
26142 	struct buf	wr_buf;
26143 	daddr_t		tgt_byte_offset; /* rmw - byte offset for target */
26144 	daddr_t		tgt_blkno;	/* rmw - blkno for target */
26145 	size_t		tgt_byte_count; /* rmw -  # of bytes to xfer */
26146 	size_t		tgt_nblk; /* rmw -  # of tgt blks to xfer */
26147 	size_t		io_start_offset;
26148 	int		doing_rmw = FALSE;
26149 	int		rval;
26150 	ssize_t		dma_resid;
26151 	daddr_t		oblkno;
26152 	diskaddr_t	nblks = 0;
26153 	diskaddr_t	start_block;
26154 
26155 	instance = SDUNIT(dev);
26156 	if (((un = ddi_get_soft_state(sd_state, instance)) == NULL) ||
26157 	    !SD_IS_VALID_LABEL(un) || ISCD(un)) {
26158 		return (ENXIO);
26159 	}
26160 
26161 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*un))
26162 
26163 	SD_TRACE(SD_LOG_DUMP, un, "sddump: entry\n");
26164 
26165 	partition = SDPART(dev);
26166 	SD_INFO(SD_LOG_DUMP, un, "sddump: partition = %d\n", partition);
26167 
26168 	if (!(NOT_DEVBSIZE(un))) {
26169 		int secmask = 0;
26170 		int blknomask = 0;
26171 
26172 		blknomask = (un->un_tgt_blocksize / DEV_BSIZE) - 1;
26173 		secmask = un->un_tgt_blocksize - 1;
26174 
26175 		if (blkno & blknomask) {
26176 			SD_TRACE(SD_LOG_DUMP, un,
26177 			    "sddump: dump start block not modulo %d\n",
26178 			    un->un_tgt_blocksize);
26179 			return (EINVAL);
26180 		}
26181 
26182 		if ((nblk * DEV_BSIZE) & secmask) {
26183 			SD_TRACE(SD_LOG_DUMP, un,
26184 			    "sddump: dump length not modulo %d\n",
26185 			    un->un_tgt_blocksize);
26186 			return (EINVAL);
26187 		}
26188 
26189 	}
26190 
26191 	/* Validate blocks to dump at against partition size. */
26192 
26193 	(void) cmlb_partinfo(un->un_cmlbhandle, partition,
26194 	    &nblks, &start_block, NULL, NULL, (void *)SD_PATH_DIRECT);
26195 
26196 	if (NOT_DEVBSIZE(un)) {
26197 		if ((blkno + nblk) > nblks) {
26198 			SD_TRACE(SD_LOG_DUMP, un,
26199 			    "sddump: dump range larger than partition: "
26200 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26201 			    blkno, nblk, nblks);
26202 			return (EINVAL);
26203 		}
26204 	} else {
26205 		if (((blkno / (un->un_tgt_blocksize / DEV_BSIZE)) +
26206 		    (nblk / (un->un_tgt_blocksize / DEV_BSIZE))) > nblks) {
26207 			SD_TRACE(SD_LOG_DUMP, un,
26208 			    "sddump: dump range larger than partition: "
26209 			    "blkno = 0x%x, nblk = 0x%x, dkl_nblk = 0x%x\n",
26210 			    blkno, nblk, nblks);
26211 			return (EINVAL);
26212 		}
26213 	}
26214 
26215 	mutex_enter(&un->un_pm_mutex);
26216 	if (SD_DEVICE_IS_IN_LOW_POWER(un)) {
26217 		struct scsi_pkt *start_pktp;
26218 
26219 		mutex_exit(&un->un_pm_mutex);
26220 
26221 		/*
26222 		 * use pm framework to power on HBA 1st
26223 		 */
26224 		(void) pm_raise_power(SD_DEVINFO(un), 0,
26225 		    SD_PM_STATE_ACTIVE(un));
26226 
26227 		/*
26228 		 * Dump no long uses sdpower to power on a device, it's
26229 		 * in-line here so it can be done in polled mode.
26230 		 */
26231 
26232 		SD_INFO(SD_LOG_DUMP, un, "sddump: starting device\n");
26233 
26234 		start_pktp = scsi_init_pkt(SD_ADDRESS(un), NULL, NULL,
26235 		    CDB_GROUP0, un->un_status_len, 0, 0, NULL_FUNC, NULL);
26236 
26237 		if (start_pktp == NULL) {
26238 			/* We were not given a SCSI packet, fail. */
26239 			return (EIO);
26240 		}
26241 		bzero(start_pktp->pkt_cdbp, CDB_GROUP0);
26242 		start_pktp->pkt_cdbp[0] = SCMD_START_STOP;
26243 		start_pktp->pkt_cdbp[4] = SD_TARGET_START;
26244 		start_pktp->pkt_flags = FLAG_NOINTR;
26245 
26246 		mutex_enter(SD_MUTEX(un));
26247 		SD_FILL_SCSI1_LUN(un, start_pktp);
26248 		mutex_exit(SD_MUTEX(un));
26249 		/*
26250 		 * Scsi_poll returns 0 (success) if the command completes and
26251 		 * the status block is STATUS_GOOD.
26252 		 */
26253 		if (sd_scsi_poll(un, start_pktp) != 0) {
26254 			scsi_destroy_pkt(start_pktp);
26255 			return (EIO);
26256 		}
26257 		scsi_destroy_pkt(start_pktp);
26258 		(void) sd_pm_state_change(un, SD_PM_STATE_ACTIVE(un),
26259 		    SD_PM_STATE_CHANGE);
26260 	} else {
26261 		mutex_exit(&un->un_pm_mutex);
26262 	}
26263 
26264 	mutex_enter(SD_MUTEX(un));
26265 	un->un_throttle = 0;
26266 
26267 	/*
26268 	 * The first time through, reset the specific target device.
26269 	 * However, when cpr calls sddump we know that sd is in a
26270 	 * a good state so no bus reset is required.
26271 	 * Clear sense data via Request Sense cmd.
26272 	 * In sddump we don't care about allow_bus_device_reset anymore
26273 	 */
26274 
26275 	if ((un->un_state != SD_STATE_SUSPENDED) &&
26276 	    (un->un_state != SD_STATE_DUMPING)) {
26277 
26278 		New_state(un, SD_STATE_DUMPING);
26279 
26280 		if (un->un_f_is_fibre == FALSE) {
26281 			mutex_exit(SD_MUTEX(un));
26282 			/*
26283 			 * Attempt a bus reset for parallel scsi.
26284 			 *
26285 			 * Note: A bus reset is required because on some host
26286 			 * systems (i.e. E420R) a bus device reset is
26287 			 * insufficient to reset the state of the target.
26288 			 *
26289 			 * Note: Don't issue the reset for fibre-channel,
26290 			 * because this tends to hang the bus (loop) for
26291 			 * too long while everyone is logging out and in
26292 			 * and the deadman timer for dumping will fire
26293 			 * before the dump is complete.
26294 			 */
26295 			if (scsi_reset(SD_ADDRESS(un), RESET_ALL) == 0) {
26296 				mutex_enter(SD_MUTEX(un));
26297 				Restore_state(un);
26298 				mutex_exit(SD_MUTEX(un));
26299 				return (EIO);
26300 			}
26301 
26302 			/* Delay to give the device some recovery time. */
26303 			drv_usecwait(10000);
26304 
26305 			if (sd_send_polled_RQS(un) == SD_FAILURE) {
26306 				SD_INFO(SD_LOG_DUMP, un,
26307 				    "sddump: sd_send_polled_RQS failed\n");
26308 			}
26309 			mutex_enter(SD_MUTEX(un));
26310 		}
26311 	}
26312 
26313 	/*
26314 	 * Convert the partition-relative block number to a
26315 	 * disk physical block number.
26316 	 */
26317 	if (NOT_DEVBSIZE(un)) {
26318 		blkno += start_block;
26319 	} else {
26320 		blkno = blkno / (un->un_tgt_blocksize / DEV_BSIZE);
26321 		blkno += start_block;
26322 	}
26323 
26324 	SD_INFO(SD_LOG_DUMP, un, "sddump: disk blkno = 0x%x\n", blkno);
26325 
26326 
26327 	/*
26328 	 * Check if the device has a non-512 block size.
26329 	 */
26330 	wr_bp = NULL;
26331 	if (NOT_DEVBSIZE(un)) {
26332 		tgt_byte_offset = blkno * un->un_sys_blocksize;
26333 		tgt_byte_count = nblk * un->un_sys_blocksize;
26334 		if ((tgt_byte_offset % un->un_tgt_blocksize) ||
26335 		    (tgt_byte_count % un->un_tgt_blocksize)) {
26336 			doing_rmw = TRUE;
26337 			/*
26338 			 * Calculate the block number and number of block
26339 			 * in terms of the media block size.
26340 			 */
26341 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26342 			tgt_nblk =
26343 			    ((tgt_byte_offset + tgt_byte_count +
26344 			    (un->un_tgt_blocksize - 1)) /
26345 			    un->un_tgt_blocksize) - tgt_blkno;
26346 
26347 			/*
26348 			 * Invoke the routine which is going to do read part
26349 			 * of read-modify-write.
26350 			 * Note that this routine returns a pointer to
26351 			 * a valid bp in wr_bp.
26352 			 */
26353 			err = sddump_do_read_of_rmw(un, tgt_blkno, tgt_nblk,
26354 			    &wr_bp);
26355 			if (err) {
26356 				mutex_exit(SD_MUTEX(un));
26357 				return (err);
26358 			}
26359 			/*
26360 			 * Offset is being calculated as -
26361 			 * (original block # * system block size) -
26362 			 * (new block # * target block size)
26363 			 */
26364 			io_start_offset =
26365 			    ((uint64_t)(blkno * un->un_sys_blocksize)) -
26366 			    ((uint64_t)(tgt_blkno * un->un_tgt_blocksize));
26367 
26368 			ASSERT(io_start_offset < un->un_tgt_blocksize);
26369 			/*
26370 			 * Do the modify portion of read modify write.
26371 			 */
26372 			bcopy(addr, &wr_bp->b_un.b_addr[io_start_offset],
26373 			    (size_t)nblk * un->un_sys_blocksize);
26374 		} else {
26375 			doing_rmw = FALSE;
26376 			tgt_blkno = tgt_byte_offset / un->un_tgt_blocksize;
26377 			tgt_nblk = tgt_byte_count / un->un_tgt_blocksize;
26378 		}
26379 
26380 		/* Convert blkno and nblk to target blocks */
26381 		blkno = tgt_blkno;
26382 		nblk = tgt_nblk;
26383 	} else {
26384 		wr_bp = &wr_buf;
26385 		bzero(wr_bp, sizeof (struct buf));
26386 		wr_bp->b_flags		= B_BUSY;
26387 		wr_bp->b_un.b_addr	= addr;
26388 		wr_bp->b_bcount		= nblk << DEV_BSHIFT;
26389 		wr_bp->b_resid		= 0;
26390 	}
26391 
26392 	mutex_exit(SD_MUTEX(un));
26393 
26394 	/*
26395 	 * Obtain a SCSI packet for the write command.
26396 	 * It should be safe to call the allocator here without
26397 	 * worrying about being locked for DVMA mapping because
26398 	 * the address we're passed is already a DVMA mapping
26399 	 *
26400 	 * We are also not going to worry about semaphore ownership
26401 	 * in the dump buffer. Dumping is single threaded at present.
26402 	 */
26403 
26404 	wr_pktp = NULL;
26405 
26406 	dma_resid = wr_bp->b_bcount;
26407 	oblkno = blkno;
26408 
26409 	if (!(NOT_DEVBSIZE(un))) {
26410 		nblk = nblk / (un->un_tgt_blocksize / DEV_BSIZE);
26411 	}
26412 
26413 	while (dma_resid != 0) {
26414 
26415 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26416 		wr_bp->b_flags &= ~B_ERROR;
26417 
26418 		if (un->un_partial_dma_supported == 1) {
26419 			blkno = oblkno +
26420 			    ((wr_bp->b_bcount - dma_resid) /
26421 			    un->un_tgt_blocksize);
26422 			nblk = dma_resid / un->un_tgt_blocksize;
26423 
26424 			if (wr_pktp) {
26425 				/*
26426 				 * Partial DMA transfers after initial transfer
26427 				 */
26428 				rval = sd_setup_next_rw_pkt(un, wr_pktp, wr_bp,
26429 				    blkno, nblk);
26430 			} else {
26431 				/* Initial transfer */
26432 				rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26433 				    un->un_pkt_flags, NULL_FUNC, NULL,
26434 				    blkno, nblk);
26435 			}
26436 		} else {
26437 			rval = sd_setup_rw_pkt(un, &wr_pktp, wr_bp,
26438 			    0, NULL_FUNC, NULL, blkno, nblk);
26439 		}
26440 
26441 		if (rval == 0) {
26442 			/* We were given a SCSI packet, continue. */
26443 			break;
26444 		}
26445 
26446 		if (i == 0) {
26447 			if (wr_bp->b_flags & B_ERROR) {
26448 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26449 				    "no resources for dumping; "
26450 				    "error code: 0x%x, retrying",
26451 				    geterror(wr_bp));
26452 			} else {
26453 				scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26454 				    "no resources for dumping; retrying");
26455 			}
26456 		} else if (i != (SD_NDUMP_RETRIES - 1)) {
26457 			if (wr_bp->b_flags & B_ERROR) {
26458 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26459 				    "no resources for dumping; error code: "
26460 				    "0x%x, retrying\n", geterror(wr_bp));
26461 			}
26462 		} else {
26463 			if (wr_bp->b_flags & B_ERROR) {
26464 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26465 				    "no resources for dumping; "
26466 				    "error code: 0x%x, retries failed, "
26467 				    "giving up.\n", geterror(wr_bp));
26468 			} else {
26469 				scsi_log(SD_DEVINFO(un), sd_label, CE_CONT,
26470 				    "no resources for dumping; "
26471 				    "retries failed, giving up.\n");
26472 			}
26473 			mutex_enter(SD_MUTEX(un));
26474 			Restore_state(un);
26475 			if (NOT_DEVBSIZE(un) && (doing_rmw == TRUE)) {
26476 				mutex_exit(SD_MUTEX(un));
26477 				scsi_free_consistent_buf(wr_bp);
26478 			} else {
26479 				mutex_exit(SD_MUTEX(un));
26480 			}
26481 			return (EIO);
26482 		}
26483 		drv_usecwait(10000);
26484 	}
26485 
26486 	if (un->un_partial_dma_supported == 1) {
26487 		/*
26488 		 * save the resid from PARTIAL_DMA
26489 		 */
26490 		dma_resid = wr_pktp->pkt_resid;
26491 		if (dma_resid != 0)
26492 			nblk -= SD_BYTES2TGTBLOCKS(un, dma_resid);
26493 		wr_pktp->pkt_resid = 0;
26494 	} else {
26495 		dma_resid = 0;
26496 	}
26497 
26498 	/* SunBug 1222170 */
26499 	wr_pktp->pkt_flags = FLAG_NOINTR;
26500 
26501 	err = EIO;
26502 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
26503 
26504 		/*
26505 		 * Scsi_poll returns 0 (success) if the command completes and
26506 		 * the status block is STATUS_GOOD.  We should only check
26507 		 * errors if this condition is not true.  Even then we should
26508 		 * send our own request sense packet only if we have a check
26509 		 * condition and auto request sense has not been performed by
26510 		 * the hba.
26511 		 */
26512 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending write\n");
26513 
26514 		if ((sd_scsi_poll(un, wr_pktp) == 0) &&
26515 		    (wr_pktp->pkt_resid == 0)) {
26516 			err = SD_SUCCESS;
26517 			break;
26518 		}
26519 
26520 		/*
26521 		 * Check CMD_DEV_GONE 1st, give up if device is gone.
26522 		 */
26523 		if (wr_pktp->pkt_reason == CMD_DEV_GONE) {
26524 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
26525 			    "Error while dumping state...Device is gone\n");
26526 			break;
26527 		}
26528 
26529 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_CHECK) {
26530 			SD_INFO(SD_LOG_DUMP, un,
26531 			    "sddump: write failed with CHECK, try # %d\n", i);
26532 			if (((wr_pktp->pkt_state & STATE_ARQ_DONE) == 0)) {
26533 				(void) sd_send_polled_RQS(un);
26534 			}
26535 
26536 			continue;
26537 		}
26538 
26539 		if (SD_GET_PKT_STATUS(wr_pktp) == STATUS_BUSY) {
26540 			int reset_retval = 0;
26541 
26542 			SD_INFO(SD_LOG_DUMP, un,
26543 			    "sddump: write failed with BUSY, try # %d\n", i);
26544 
26545 			if (un->un_f_lun_reset_enabled == TRUE) {
26546 				reset_retval = scsi_reset(SD_ADDRESS(un),
26547 				    RESET_LUN);
26548 			}
26549 			if (reset_retval == 0) {
26550 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
26551 			}
26552 			(void) sd_send_polled_RQS(un);
26553 
26554 		} else {
26555 			SD_INFO(SD_LOG_DUMP, un,
26556 			    "sddump: write failed with 0x%x, try # %d\n",
26557 			    SD_GET_PKT_STATUS(wr_pktp), i);
26558 			mutex_enter(SD_MUTEX(un));
26559 			sd_reset_target(un, wr_pktp);
26560 			mutex_exit(SD_MUTEX(un));
26561 		}
26562 
26563 		/*
26564 		 * If we are not getting anywhere with lun/target resets,
26565 		 * let's reset the bus.
26566 		 */
26567 		if (i == SD_NDUMP_RETRIES / 2) {
26568 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
26569 			(void) sd_send_polled_RQS(un);
26570 		}
26571 	}
26572 	}
26573 
26574 	scsi_destroy_pkt(wr_pktp);
26575 	mutex_enter(SD_MUTEX(un));
26576 	if ((NOT_DEVBSIZE(un)) && (doing_rmw == TRUE)) {
26577 		mutex_exit(SD_MUTEX(un));
26578 		scsi_free_consistent_buf(wr_bp);
26579 	} else {
26580 		mutex_exit(SD_MUTEX(un));
26581 	}
26582 	SD_TRACE(SD_LOG_DUMP, un, "sddump: exit: err = %d\n", err);
26583 	return (err);
26584 }
26585 
26586 /*
26587  *    Function: sd_scsi_poll()
26588  *
26589  * Description: This is a wrapper for the scsi_poll call.
26590  *
26591  *   Arguments: sd_lun - The unit structure
26592  *              scsi_pkt - The scsi packet being sent to the device.
26593  *
26594  * Return Code: 0 - Command completed successfully with good status
26595  *             -1 - Command failed.  This could indicate a check condition
26596  *                  or other status value requiring recovery action.
26597  *
26598  * NOTE: This code is only called off sddump().
26599  */
26600 
26601 static int
26602 sd_scsi_poll(struct sd_lun *un, struct scsi_pkt *pktp)
26603 {
26604 	int status;
26605 
26606 	ASSERT(un != NULL);
26607 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26608 	ASSERT(pktp != NULL);
26609 
26610 	status = SD_SUCCESS;
26611 
26612 	if (scsi_ifgetcap(&pktp->pkt_address, "tagged-qing", 1) == 1) {
26613 		pktp->pkt_flags |= un->un_tagflags;
26614 		pktp->pkt_flags &= ~FLAG_NODISCON;
26615 	}
26616 
26617 	status = sd_ddi_scsi_poll(pktp);
26618 	/*
26619 	 * Scsi_poll returns 0 (success) if the command completes and the
26620 	 * status block is STATUS_GOOD.  We should only check errors if this
26621 	 * condition is not true.  Even then we should send our own request
26622 	 * sense packet only if we have a check condition and auto
26623 	 * request sense has not been performed by the hba.
26624 	 * Don't get RQS data if pkt_reason is CMD_DEV_GONE.
26625 	 */
26626 	if ((status != SD_SUCCESS) &&
26627 	    (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK) &&
26628 	    (pktp->pkt_state & STATE_ARQ_DONE) == 0 &&
26629 	    (pktp->pkt_reason != CMD_DEV_GONE))
26630 		(void) sd_send_polled_RQS(un);
26631 
26632 	return (status);
26633 }
26634 
26635 /*
26636  *    Function: sd_send_polled_RQS()
26637  *
26638  * Description: This sends the request sense command to a device.
26639  *
26640  *   Arguments: sd_lun - The unit structure
26641  *
26642  * Return Code: 0 - Command completed successfully with good status
26643  *             -1 - Command failed.
26644  *
26645  */
26646 
26647 static int
26648 sd_send_polled_RQS(struct sd_lun *un)
26649 {
26650 	int	ret_val;
26651 	struct	scsi_pkt	*rqs_pktp;
26652 	struct	buf		*rqs_bp;
26653 
26654 	ASSERT(un != NULL);
26655 	ASSERT(!mutex_owned(SD_MUTEX(un)));
26656 
26657 	ret_val = SD_SUCCESS;
26658 
26659 	rqs_pktp = un->un_rqs_pktp;
26660 	rqs_bp	 = un->un_rqs_bp;
26661 
26662 	mutex_enter(SD_MUTEX(un));
26663 
26664 	if (un->un_sense_isbusy) {
26665 		ret_val = SD_FAILURE;
26666 		mutex_exit(SD_MUTEX(un));
26667 		return (ret_val);
26668 	}
26669 
26670 	/*
26671 	 * If the request sense buffer (and packet) is not in use,
26672 	 * let's set the un_sense_isbusy and send our packet
26673 	 */
26674 	un->un_sense_isbusy = 1;
26675 	rqs_pktp->pkt_resid = 0;
26676 	rqs_pktp->pkt_reason = 0;
26677 	rqs_pktp->pkt_flags |= FLAG_NOINTR;
26678 	bzero(rqs_bp->b_un.b_addr, SENSE_LENGTH);
26679 
26680 	mutex_exit(SD_MUTEX(un));
26681 
26682 	SD_INFO(SD_LOG_COMMON, un, "sd_send_polled_RQS: req sense buf at"
26683 	    " 0x%p\n", rqs_bp->b_un.b_addr);
26684 
26685 	/*
26686 	 * Can't send this to sd_scsi_poll, we wrap ourselves around the
26687 	 * axle - it has a call into us!
26688 	 */
26689 	if ((ret_val = sd_ddi_scsi_poll(rqs_pktp)) != 0) {
26690 		SD_INFO(SD_LOG_COMMON, un,
26691 		    "sd_send_polled_RQS: RQS failed\n");
26692 	}
26693 
26694 	SD_DUMP_MEMORY(un, SD_LOG_COMMON, "sd_send_polled_RQS:",
26695 	    (uchar_t *)rqs_bp->b_un.b_addr, SENSE_LENGTH, SD_LOG_HEX);
26696 
26697 	mutex_enter(SD_MUTEX(un));
26698 	un->un_sense_isbusy = 0;
26699 	mutex_exit(SD_MUTEX(un));
26700 
26701 	return (ret_val);
26702 }
26703 
26704 /*
26705  * Defines needed for localized version of the scsi_poll routine.
26706  */
26707 #define	CSEC		10000			/* usecs */
26708 #define	SEC_TO_CSEC	(1000000 / CSEC)
26709 
26710 /*
26711  *    Function: sd_ddi_scsi_poll()
26712  *
26713  * Description: Localized version of the scsi_poll routine.  The purpose is to
26714  *		send a scsi_pkt to a device as a polled command.  This version
26715  *		is to ensure more robust handling of transport errors.
26716  *		Specifically this routine cures not ready, coming ready
26717  *		transition for power up and reset of sonoma's.  This can take
26718  *		up to 45 seconds for power-on and 20 seconds for reset of a
26719  *		sonoma lun.
26720  *
26721  *   Arguments: scsi_pkt - The scsi_pkt being sent to a device
26722  *
26723  * Return Code: 0 - Command completed successfully with good status
26724  *             -1 - Command failed.
26725  *
26726  * NOTE: This code is almost identical to scsi_poll, however before 6668774 can
26727  * be fixed (removing this code), we need to determine how to handle the
26728  * KEY_UNIT_ATTENTION condition below in conditions not as limited as sddump().
26729  *
26730  * NOTE: This code is only called off sddump().
26731  */
26732 static int
26733 sd_ddi_scsi_poll(struct scsi_pkt *pkt)
26734 {
26735 	int			rval = -1;
26736 	int			savef;
26737 	long			savet;
26738 	void			(*savec)();
26739 	int			timeout;
26740 	int			busy_count;
26741 	int			poll_delay;
26742 	int			rc;
26743 	uint8_t			*sensep;
26744 	struct scsi_arq_status	*arqstat;
26745 	extern int		do_polled_io;
26746 
26747 	ASSERT(pkt->pkt_scbp);
26748 
26749 	/*
26750 	 * save old flags..
26751 	 */
26752 	savef = pkt->pkt_flags;
26753 	savec = pkt->pkt_comp;
26754 	savet = pkt->pkt_time;
26755 
26756 	pkt->pkt_flags |= FLAG_NOINTR;
26757 
26758 	/*
26759 	 * XXX there is nothing in the SCSA spec that states that we should not
26760 	 * do a callback for polled cmds; however, removing this will break sd
26761 	 * and probably other target drivers
26762 	 */
26763 	pkt->pkt_comp = NULL;
26764 
26765 	/*
26766 	 * we don't like a polled command without timeout.
26767 	 * 60 seconds seems long enough.
26768 	 */
26769 	if (pkt->pkt_time == 0)
26770 		pkt->pkt_time = SCSI_POLL_TIMEOUT;
26771 
26772 	/*
26773 	 * Send polled cmd.
26774 	 *
26775 	 * We do some error recovery for various errors.  Tran_busy,
26776 	 * queue full, and non-dispatched commands are retried every 10 msec.
26777 	 * as they are typically transient failures.  Busy status and Not
26778 	 * Ready are retried every second as this status takes a while to
26779 	 * change.
26780 	 */
26781 	timeout = pkt->pkt_time * SEC_TO_CSEC;
26782 
26783 	for (busy_count = 0; busy_count < timeout; busy_count++) {
26784 		/*
26785 		 * Initialize pkt status variables.
26786 		 */
26787 		*pkt->pkt_scbp = pkt->pkt_reason = pkt->pkt_state = 0;
26788 
26789 		if ((rc = scsi_transport(pkt)) != TRAN_ACCEPT) {
26790 			if (rc != TRAN_BUSY) {
26791 				/* Transport failed - give up. */
26792 				break;
26793 			} else {
26794 				/* Transport busy - try again. */
26795 				poll_delay = 1 * CSEC;		/* 10 msec. */
26796 			}
26797 		} else {
26798 			/*
26799 			 * Transport accepted - check pkt status.
26800 			 */
26801 			rc = (*pkt->pkt_scbp) & STATUS_MASK;
26802 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26803 			    (rc == STATUS_CHECK) &&
26804 			    (pkt->pkt_state & STATE_ARQ_DONE)) {
26805 				arqstat =
26806 				    (struct scsi_arq_status *)(pkt->pkt_scbp);
26807 				sensep = (uint8_t *)&arqstat->sts_sensedata;
26808 			} else {
26809 				sensep = NULL;
26810 			}
26811 
26812 			if ((pkt->pkt_reason == CMD_CMPLT) &&
26813 			    (rc == STATUS_GOOD)) {
26814 				/* No error - we're done */
26815 				rval = 0;
26816 				break;
26817 
26818 			} else if (pkt->pkt_reason == CMD_DEV_GONE) {
26819 				/* Lost connection - give up */
26820 				break;
26821 
26822 			} else if ((pkt->pkt_reason == CMD_INCOMPLETE) &&
26823 			    (pkt->pkt_state == 0)) {
26824 				/* Pkt not dispatched - try again. */
26825 				poll_delay = 1 * CSEC;		/* 10 msec. */
26826 
26827 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26828 			    (rc == STATUS_QFULL)) {
26829 				/* Queue full - try again. */
26830 				poll_delay = 1 * CSEC;		/* 10 msec. */
26831 
26832 			} else if ((pkt->pkt_reason == CMD_CMPLT) &&
26833 			    (rc == STATUS_BUSY)) {
26834 				/* Busy - try again. */
26835 				poll_delay = 100 * CSEC;	/* 1 sec. */
26836 				busy_count += (SEC_TO_CSEC - 1);
26837 
26838 			} else if ((sensep != NULL) &&
26839 			    (scsi_sense_key(sensep) == KEY_UNIT_ATTENTION)) {
26840 				/*
26841 				 * Unit Attention - try again.
26842 				 * Pretend it took 1 sec.
26843 				 * NOTE: 'continue' avoids poll_delay
26844 				 */
26845 				busy_count += (SEC_TO_CSEC - 1);
26846 				continue;
26847 
26848 			} else if ((sensep != NULL) &&
26849 			    (scsi_sense_key(sensep) == KEY_NOT_READY) &&
26850 			    (scsi_sense_asc(sensep) == 0x04) &&
26851 			    (scsi_sense_ascq(sensep) == 0x01)) {
26852 				/*
26853 				 * Not ready -> ready - try again.
26854 				 * 04h/01h: LUN IS IN PROCESS OF BECOMING READY
26855 				 * ...same as STATUS_BUSY
26856 				 */
26857 				poll_delay = 100 * CSEC;	/* 1 sec. */
26858 				busy_count += (SEC_TO_CSEC - 1);
26859 
26860 			} else {
26861 				/* BAD status - give up. */
26862 				break;
26863 			}
26864 		}
26865 
26866 		if (((curthread->t_flag & T_INTR_THREAD) == 0) &&
26867 		    !do_polled_io) {
26868 			delay(drv_usectohz(poll_delay));
26869 		} else {
26870 			/* we busy wait during cpr_dump or interrupt threads */
26871 			drv_usecwait(poll_delay);
26872 		}
26873 	}
26874 
26875 	pkt->pkt_flags = savef;
26876 	pkt->pkt_comp = savec;
26877 	pkt->pkt_time = savet;
26878 
26879 	/* return on error */
26880 	if (rval)
26881 		return (rval);
26882 
26883 	/*
26884 	 * This is not a performance critical code path.
26885 	 *
26886 	 * As an accommodation for scsi_poll callers, to avoid ddi_dma_sync()
26887 	 * issues associated with looking at DMA memory prior to
26888 	 * scsi_pkt_destroy(), we scsi_sync_pkt() prior to return.
26889 	 */
26890 	scsi_sync_pkt(pkt);
26891 	return (0);
26892 }
26893 
26894 
26895 
26896 /*
26897  *    Function: sd_persistent_reservation_in_read_keys
26898  *
26899  * Description: This routine is the driver entry point for handling CD-ROM
26900  *		multi-host persistent reservation requests (MHIOCGRP_INKEYS)
26901  *		by sending the SCSI-3 PRIN commands to the device.
26902  *		Processes the read keys command response by copying the
26903  *		reservation key information into the user provided buffer.
26904  *		Support for the 32/64 bit _MULTI_DATAMODEL is implemented.
26905  *
26906  *   Arguments: un   -  Pointer to soft state struct for the target.
26907  *		usrp -	user provided pointer to multihost Persistent In Read
26908  *			Keys structure (mhioc_inkeys_t)
26909  *		flag -	this argument is a pass through to ddi_copyxxx()
26910  *			directly from the mode argument of ioctl().
26911  *
26912  * Return Code: 0   - Success
26913  *		EACCES
26914  *		ENOTSUP
26915  *		errno return code from sd_send_scsi_cmd()
26916  *
26917  *     Context: Can sleep. Does not return until command is completed.
26918  */
26919 
26920 static int
26921 sd_persistent_reservation_in_read_keys(struct sd_lun *un,
26922     mhioc_inkeys_t *usrp, int flag)
26923 {
26924 #ifdef _MULTI_DATAMODEL
26925 	struct mhioc_key_list32	li32;
26926 #endif
26927 	sd_prin_readkeys_t	*in;
26928 	mhioc_inkeys_t		*ptr;
26929 	mhioc_key_list_t	li;
26930 	uchar_t			*data_bufp = NULL;
26931 	int			data_len = 0;
26932 	int			rval = 0;
26933 	size_t			copysz = 0;
26934 	sd_ssc_t		*ssc;
26935 
26936 	if ((ptr = (mhioc_inkeys_t *)usrp) == NULL) {
26937 		return (EINVAL);
26938 	}
26939 	bzero(&li, sizeof (mhioc_key_list_t));
26940 
26941 	ssc = sd_ssc_init(un);
26942 
26943 	/*
26944 	 * Get the listsize from user
26945 	 */
26946 #ifdef _MULTI_DATAMODEL
26947 	switch (ddi_model_convert_from(flag & FMODELS)) {
26948 	case DDI_MODEL_ILP32:
26949 		copysz = sizeof (struct mhioc_key_list32);
26950 		if (ddi_copyin(ptr->li, &li32, copysz, flag)) {
26951 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26952 			    "sd_persistent_reservation_in_read_keys: "
26953 			    "failed ddi_copyin: mhioc_key_list32_t\n");
26954 			rval = EFAULT;
26955 			goto done;
26956 		}
26957 		li.listsize = li32.listsize;
26958 		li.list = (mhioc_resv_key_t *)(uintptr_t)li32.list;
26959 		break;
26960 
26961 	case DDI_MODEL_NONE:
26962 		copysz = sizeof (mhioc_key_list_t);
26963 		if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26964 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
26965 			    "sd_persistent_reservation_in_read_keys: "
26966 			    "failed ddi_copyin: mhioc_key_list_t\n");
26967 			rval = EFAULT;
26968 			goto done;
26969 		}
26970 		break;
26971 	}
26972 
26973 #else /* ! _MULTI_DATAMODEL */
26974 	copysz = sizeof (mhioc_key_list_t);
26975 	if (ddi_copyin(ptr->li, &li, copysz, flag)) {
26976 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
26977 		    "sd_persistent_reservation_in_read_keys: "
26978 		    "failed ddi_copyin: mhioc_key_list_t\n");
26979 		rval = EFAULT;
26980 		goto done;
26981 	}
26982 #endif
26983 
26984 	data_len  = li.listsize * MHIOC_RESV_KEY_SIZE;
26985 	data_len += (sizeof (sd_prin_readkeys_t) - sizeof (caddr_t));
26986 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
26987 
26988 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_KEYS,
26989 	    data_len, data_bufp);
26990 	if (rval != 0) {
26991 		if (rval == EIO)
26992 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
26993 		else
26994 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
26995 		goto done;
26996 	}
26997 	in = (sd_prin_readkeys_t *)data_bufp;
26998 	ptr->generation = BE_32(in->generation);
26999 	li.listlen = BE_32(in->len) / MHIOC_RESV_KEY_SIZE;
27000 
27001 	/*
27002 	 * Return the min(listsize, listlen) keys
27003 	 */
27004 #ifdef _MULTI_DATAMODEL
27005 
27006 	switch (ddi_model_convert_from(flag & FMODELS)) {
27007 	case DDI_MODEL_ILP32:
27008 		li32.listlen = li.listlen;
27009 		if (ddi_copyout(&li32, ptr->li, copysz, flag)) {
27010 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27011 			    "sd_persistent_reservation_in_read_keys: "
27012 			    "failed ddi_copyout: mhioc_key_list32_t\n");
27013 			rval = EFAULT;
27014 			goto done;
27015 		}
27016 		break;
27017 
27018 	case DDI_MODEL_NONE:
27019 		if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27020 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27021 			    "sd_persistent_reservation_in_read_keys: "
27022 			    "failed ddi_copyout: mhioc_key_list_t\n");
27023 			rval = EFAULT;
27024 			goto done;
27025 		}
27026 		break;
27027 	}
27028 
27029 #else /* ! _MULTI_DATAMODEL */
27030 
27031 	if (ddi_copyout(&li, ptr->li, copysz, flag)) {
27032 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27033 		    "sd_persistent_reservation_in_read_keys: "
27034 		    "failed ddi_copyout: mhioc_key_list_t\n");
27035 		rval = EFAULT;
27036 		goto done;
27037 	}
27038 
27039 #endif /* _MULTI_DATAMODEL */
27040 
27041 	copysz = min(li.listlen * MHIOC_RESV_KEY_SIZE,
27042 	    li.listsize * MHIOC_RESV_KEY_SIZE);
27043 	if (ddi_copyout(&in->keylist, li.list, copysz, flag)) {
27044 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27045 		    "sd_persistent_reservation_in_read_keys: "
27046 		    "failed ddi_copyout: keylist\n");
27047 		rval = EFAULT;
27048 	}
27049 done:
27050 	sd_ssc_fini(ssc);
27051 	kmem_free(data_bufp, data_len);
27052 	return (rval);
27053 }
27054 
27055 
27056 /*
27057  *    Function: sd_persistent_reservation_in_read_resv
27058  *
27059  * Description: This routine is the driver entry point for handling CD-ROM
27060  *		multi-host persistent reservation requests (MHIOCGRP_INRESV)
27061  *		by sending the SCSI-3 PRIN commands to the device.
27062  *		Process the read persistent reservations command response by
27063  *		copying the reservation information into the user provided
27064  *		buffer. Support for the 32/64 _MULTI_DATAMODEL is implemented.
27065  *
27066  *   Arguments: un   -  Pointer to soft state struct for the target.
27067  *		usrp -	user provided pointer to multihost Persistent In Read
27068  *			Keys structure (mhioc_inkeys_t)
27069  *		flag -	this argument is a pass through to ddi_copyxxx()
27070  *			directly from the mode argument of ioctl().
27071  *
27072  * Return Code: 0   - Success
27073  *		EACCES
27074  *		ENOTSUP
27075  *		errno return code from sd_send_scsi_cmd()
27076  *
27077  *     Context: Can sleep. Does not return until command is completed.
27078  */
27079 
27080 static int
27081 sd_persistent_reservation_in_read_resv(struct sd_lun *un,
27082     mhioc_inresvs_t *usrp, int flag)
27083 {
27084 #ifdef _MULTI_DATAMODEL
27085 	struct mhioc_resv_desc_list32 resvlist32;
27086 #endif
27087 	sd_prin_readresv_t	*in;
27088 	mhioc_inresvs_t		*ptr;
27089 	sd_readresv_desc_t	*readresv_ptr;
27090 	mhioc_resv_desc_list_t	resvlist;
27091 	mhioc_resv_desc_t	resvdesc;
27092 	uchar_t			*data_bufp = NULL;
27093 	int			data_len;
27094 	int			rval = 0;
27095 	int			i;
27096 	size_t			copysz = 0;
27097 	mhioc_resv_desc_t	*bufp;
27098 	sd_ssc_t		*ssc;
27099 
27100 	if ((ptr = usrp) == NULL) {
27101 		return (EINVAL);
27102 	}
27103 
27104 	ssc = sd_ssc_init(un);
27105 
27106 	/*
27107 	 * Get the listsize from user
27108 	 */
27109 #ifdef _MULTI_DATAMODEL
27110 	switch (ddi_model_convert_from(flag & FMODELS)) {
27111 	case DDI_MODEL_ILP32:
27112 		copysz = sizeof (struct mhioc_resv_desc_list32);
27113 		if (ddi_copyin(ptr->li, &resvlist32, copysz, flag)) {
27114 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27115 			    "sd_persistent_reservation_in_read_resv: "
27116 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27117 			rval = EFAULT;
27118 			goto done;
27119 		}
27120 		resvlist.listsize = resvlist32.listsize;
27121 		resvlist.list = (mhioc_resv_desc_t *)(uintptr_t)resvlist32.list;
27122 		break;
27123 
27124 	case DDI_MODEL_NONE:
27125 		copysz = sizeof (mhioc_resv_desc_list_t);
27126 		if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27127 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27128 			    "sd_persistent_reservation_in_read_resv: "
27129 			    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27130 			rval = EFAULT;
27131 			goto done;
27132 		}
27133 		break;
27134 	}
27135 #else /* ! _MULTI_DATAMODEL */
27136 	copysz = sizeof (mhioc_resv_desc_list_t);
27137 	if (ddi_copyin(ptr->li, &resvlist, copysz, flag)) {
27138 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27139 		    "sd_persistent_reservation_in_read_resv: "
27140 		    "failed ddi_copyin: mhioc_resv_desc_list_t\n");
27141 		rval = EFAULT;
27142 		goto done;
27143 	}
27144 #endif /* ! _MULTI_DATAMODEL */
27145 
27146 	data_len  = resvlist.listsize * SCSI3_RESV_DESC_LEN;
27147 	data_len += (sizeof (sd_prin_readresv_t) - sizeof (caddr_t));
27148 	data_bufp = kmem_zalloc(data_len, KM_SLEEP);
27149 
27150 	rval = sd_send_scsi_PERSISTENT_RESERVE_IN(ssc, SD_READ_RESV,
27151 	    data_len, data_bufp);
27152 	if (rval != 0) {
27153 		if (rval == EIO)
27154 			sd_ssc_assessment(ssc, SD_FMT_IGNORE_COMPROMISE);
27155 		else
27156 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
27157 		goto done;
27158 	}
27159 	in = (sd_prin_readresv_t *)data_bufp;
27160 	ptr->generation = BE_32(in->generation);
27161 	resvlist.listlen = BE_32(in->len) / SCSI3_RESV_DESC_LEN;
27162 
27163 	/*
27164 	 * Return the min(listsize, listlen( keys
27165 	 */
27166 #ifdef _MULTI_DATAMODEL
27167 
27168 	switch (ddi_model_convert_from(flag & FMODELS)) {
27169 	case DDI_MODEL_ILP32:
27170 		resvlist32.listlen = resvlist.listlen;
27171 		if (ddi_copyout(&resvlist32, ptr->li, copysz, flag)) {
27172 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27173 			    "sd_persistent_reservation_in_read_resv: "
27174 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27175 			rval = EFAULT;
27176 			goto done;
27177 		}
27178 		break;
27179 
27180 	case DDI_MODEL_NONE:
27181 		if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27182 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27183 			    "sd_persistent_reservation_in_read_resv: "
27184 			    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27185 			rval = EFAULT;
27186 			goto done;
27187 		}
27188 		break;
27189 	}
27190 
27191 #else /* ! _MULTI_DATAMODEL */
27192 
27193 	if (ddi_copyout(&resvlist, ptr->li, copysz, flag)) {
27194 		SD_ERROR(SD_LOG_IOCTL_MHD, un,
27195 		    "sd_persistent_reservation_in_read_resv: "
27196 		    "failed ddi_copyout: mhioc_resv_desc_list_t\n");
27197 		rval = EFAULT;
27198 		goto done;
27199 	}
27200 
27201 #endif /* ! _MULTI_DATAMODEL */
27202 
27203 	readresv_ptr = (sd_readresv_desc_t *)&in->readresv_desc;
27204 	bufp = resvlist.list;
27205 	copysz = sizeof (mhioc_resv_desc_t);
27206 	for (i = 0; i < min(resvlist.listlen, resvlist.listsize);
27207 	    i++, readresv_ptr++, bufp++) {
27208 
27209 		bcopy(&readresv_ptr->resvkey, &resvdesc.key,
27210 		    MHIOC_RESV_KEY_SIZE);
27211 		resvdesc.type  = readresv_ptr->type;
27212 		resvdesc.scope = readresv_ptr->scope;
27213 		resvdesc.scope_specific_addr =
27214 		    BE_32(readresv_ptr->scope_specific_addr);
27215 
27216 		if (ddi_copyout(&resvdesc, bufp, copysz, flag)) {
27217 			SD_ERROR(SD_LOG_IOCTL_MHD, un,
27218 			    "sd_persistent_reservation_in_read_resv: "
27219 			    "failed ddi_copyout: resvlist\n");
27220 			rval = EFAULT;
27221 			goto done;
27222 		}
27223 	}
27224 done:
27225 	sd_ssc_fini(ssc);
27226 	/* only if data_bufp is allocated, we need to free it */
27227 	if (data_bufp) {
27228 		kmem_free(data_bufp, data_len);
27229 	}
27230 	return (rval);
27231 }
27232 
27233 
27234 /*
27235  *    Function: sr_change_blkmode()
27236  *
27237  * Description: This routine is the driver entry point for handling CD-ROM
27238  *		block mode ioctl requests. Support for returning and changing
27239  *		the current block size in use by the device is implemented. The
27240  *		LBA size is changed via a MODE SELECT Block Descriptor.
27241  *
27242  *		This routine issues a mode sense with an allocation length of
27243  *		12 bytes for the mode page header and a single block descriptor.
27244  *
27245  *   Arguments: dev - the device 'dev_t'
27246  *		cmd - the request type; one of CDROMGBLKMODE (get) or
27247  *		      CDROMSBLKMODE (set)
27248  *		data - current block size or requested block size
27249  *		flag - this argument is a pass through to ddi_copyxxx() directly
27250  *		       from the mode argument of ioctl().
27251  *
27252  * Return Code: the code returned by sd_send_scsi_cmd()
27253  *		EINVAL if invalid arguments are provided
27254  *		EFAULT if ddi_copyxxx() fails
27255  *		ENXIO if fail ddi_get_soft_state
27256  *		EIO if invalid mode sense block descriptor length
27257  *
27258  */
27259 
27260 static int
27261 sr_change_blkmode(dev_t dev, int cmd, intptr_t data, int flag)
27262 {
27263 	struct sd_lun			*un = NULL;
27264 	struct mode_header		*sense_mhp, *select_mhp;
27265 	struct block_descriptor		*sense_desc, *select_desc;
27266 	int				current_bsize;
27267 	int				rval = EINVAL;
27268 	uchar_t				*sense = NULL;
27269 	uchar_t				*select = NULL;
27270 	sd_ssc_t			*ssc;
27271 
27272 	ASSERT((cmd == CDROMGBLKMODE) || (cmd == CDROMSBLKMODE));
27273 
27274 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27275 		return (ENXIO);
27276 	}
27277 
27278 	/*
27279 	 * The block length is changed via the Mode Select block descriptor, the
27280 	 * "Read/Write Error Recovery" mode page (0x1) contents are not actually
27281 	 * required as part of this routine. Therefore the mode sense allocation
27282 	 * length is specified to be the length of a mode page header and a
27283 	 * block descriptor.
27284 	 */
27285 	sense = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27286 
27287 	ssc = sd_ssc_init(un);
27288 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
27289 	    BUFLEN_CHG_BLK_MODE, MODEPAGE_ERR_RECOV, SD_PATH_STANDARD);
27290 	sd_ssc_fini(ssc);
27291 	if (rval != 0) {
27292 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27293 		    "sr_change_blkmode: Mode Sense Failed\n");
27294 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27295 		return (rval);
27296 	}
27297 
27298 	/* Check the block descriptor len to handle only 1 block descriptor */
27299 	sense_mhp = (struct mode_header *)sense;
27300 	if ((sense_mhp->bdesc_length == 0) ||
27301 	    (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH)) {
27302 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27303 		    "sr_change_blkmode: Mode Sense returned invalid block"
27304 		    " descriptor length\n");
27305 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27306 		return (EIO);
27307 	}
27308 	sense_desc = (struct block_descriptor *)(sense + MODE_HEADER_LENGTH);
27309 	current_bsize = ((sense_desc->blksize_hi << 16) |
27310 	    (sense_desc->blksize_mid << 8) | sense_desc->blksize_lo);
27311 
27312 	/* Process command */
27313 	switch (cmd) {
27314 	case CDROMGBLKMODE:
27315 		/* Return the block size obtained during the mode sense */
27316 		if (ddi_copyout(&current_bsize, (void *)data,
27317 		    sizeof (int), flag) != 0)
27318 			rval = EFAULT;
27319 		break;
27320 	case CDROMSBLKMODE:
27321 		/* Validate the requested block size */
27322 		switch (data) {
27323 		case CDROM_BLK_512:
27324 		case CDROM_BLK_1024:
27325 		case CDROM_BLK_2048:
27326 		case CDROM_BLK_2056:
27327 		case CDROM_BLK_2336:
27328 		case CDROM_BLK_2340:
27329 		case CDROM_BLK_2352:
27330 		case CDROM_BLK_2368:
27331 		case CDROM_BLK_2448:
27332 		case CDROM_BLK_2646:
27333 		case CDROM_BLK_2647:
27334 			break;
27335 		default:
27336 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27337 			    "sr_change_blkmode: "
27338 			    "Block Size '%ld' Not Supported\n", data);
27339 			kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27340 			return (EINVAL);
27341 		}
27342 
27343 		/*
27344 		 * The current block size matches the requested block size so
27345 		 * there is no need to send the mode select to change the size
27346 		 */
27347 		if (current_bsize == data) {
27348 			break;
27349 		}
27350 
27351 		/* Build the select data for the requested block size */
27352 		select = kmem_zalloc(BUFLEN_CHG_BLK_MODE, KM_SLEEP);
27353 		select_mhp = (struct mode_header *)select;
27354 		select_desc =
27355 		    (struct block_descriptor *)(select + MODE_HEADER_LENGTH);
27356 		/*
27357 		 * The LBA size is changed via the block descriptor, so the
27358 		 * descriptor is built according to the user data
27359 		 */
27360 		select_mhp->bdesc_length = MODE_BLK_DESC_LENGTH;
27361 		select_desc->blksize_hi  = (char)(((data) & 0x00ff0000) >> 16);
27362 		select_desc->blksize_mid = (char)(((data) & 0x0000ff00) >> 8);
27363 		select_desc->blksize_lo  = (char)((data) & 0x000000ff);
27364 
27365 		/* Send the mode select for the requested block size */
27366 		ssc = sd_ssc_init(un);
27367 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
27368 		    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27369 		    SD_PATH_STANDARD);
27370 		sd_ssc_fini(ssc);
27371 		if (rval != 0) {
27372 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27373 			    "sr_change_blkmode: Mode Select Failed\n");
27374 			/*
27375 			 * The mode select failed for the requested block size,
27376 			 * so reset the data for the original block size and
27377 			 * send it to the target. The error is indicated by the
27378 			 * return value for the failed mode select.
27379 			 */
27380 			select_desc->blksize_hi  = sense_desc->blksize_hi;
27381 			select_desc->blksize_mid = sense_desc->blksize_mid;
27382 			select_desc->blksize_lo  = sense_desc->blksize_lo;
27383 			ssc = sd_ssc_init(un);
27384 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0,
27385 			    select, BUFLEN_CHG_BLK_MODE, SD_DONTSAVE_PAGE,
27386 			    SD_PATH_STANDARD);
27387 			sd_ssc_fini(ssc);
27388 		} else {
27389 			ASSERT(!mutex_owned(SD_MUTEX(un)));
27390 			mutex_enter(SD_MUTEX(un));
27391 			sd_update_block_info(un, (uint32_t)data, 0);
27392 			mutex_exit(SD_MUTEX(un));
27393 		}
27394 		break;
27395 	default:
27396 		/* should not reach here, but check anyway */
27397 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27398 		    "sr_change_blkmode: Command '%x' Not Supported\n", cmd);
27399 		rval = EINVAL;
27400 		break;
27401 	}
27402 
27403 	if (select) {
27404 		kmem_free(select, BUFLEN_CHG_BLK_MODE);
27405 	}
27406 	if (sense) {
27407 		kmem_free(sense, BUFLEN_CHG_BLK_MODE);
27408 	}
27409 	return (rval);
27410 }
27411 
27412 
27413 /*
27414  * Note: The following sr_change_speed() and sr_atapi_change_speed() routines
27415  * implement driver support for getting and setting the CD speed. The command
27416  * set used will be based on the device type. If the device has not been
27417  * identified as MMC the Toshiba vendor specific mode page will be used. If
27418  * the device is MMC but does not support the Real Time Streaming feature
27419  * the SET CD SPEED command will be used to set speed and mode page 0x2A will
27420  * be used to read the speed.
27421  */
27422 
27423 /*
27424  *    Function: sr_change_speed()
27425  *
27426  * Description: This routine is the driver entry point for handling CD-ROM
27427  *		drive speed ioctl requests for devices supporting the Toshiba
27428  *		vendor specific drive speed mode page. Support for returning
27429  *		and changing the current drive speed in use by the device is
27430  *		implemented.
27431  *
27432  *   Arguments: dev - the device 'dev_t'
27433  *		cmd - the request type; one of CDROMGDRVSPEED (get) or
27434  *		      CDROMSDRVSPEED (set)
27435  *		data - current drive speed or requested drive speed
27436  *		flag - this argument is a pass through to ddi_copyxxx() directly
27437  *		       from the mode argument of ioctl().
27438  *
27439  * Return Code: the code returned by sd_send_scsi_cmd()
27440  *		EINVAL if invalid arguments are provided
27441  *		EFAULT if ddi_copyxxx() fails
27442  *		ENXIO if fail ddi_get_soft_state
27443  *		EIO if invalid mode sense block descriptor length
27444  */
27445 
27446 static int
27447 sr_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27448 {
27449 	struct sd_lun			*un = NULL;
27450 	struct mode_header		*sense_mhp, *select_mhp;
27451 	struct mode_speed		*sense_page, *select_page;
27452 	int				current_speed;
27453 	int				rval = EINVAL;
27454 	int				bd_len;
27455 	uchar_t				*sense = NULL;
27456 	uchar_t				*select = NULL;
27457 	sd_ssc_t			*ssc;
27458 
27459 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27460 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27461 		return (ENXIO);
27462 	}
27463 
27464 	/*
27465 	 * Note: The drive speed is being modified here according to a Toshiba
27466 	 * vendor specific mode page (0x31).
27467 	 */
27468 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27469 
27470 	ssc = sd_ssc_init(un);
27471 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
27472 	    BUFLEN_MODE_CDROM_SPEED, CDROM_MODE_SPEED,
27473 	    SD_PATH_STANDARD);
27474 	sd_ssc_fini(ssc);
27475 	if (rval != 0) {
27476 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27477 		    "sr_change_speed: Mode Sense Failed\n");
27478 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27479 		return (rval);
27480 	}
27481 	sense_mhp  = (struct mode_header *)sense;
27482 
27483 	/* Check the block descriptor len to handle only 1 block descriptor */
27484 	bd_len = sense_mhp->bdesc_length;
27485 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27486 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27487 		    "sr_change_speed: Mode Sense returned invalid block "
27488 		    "descriptor length\n");
27489 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27490 		return (EIO);
27491 	}
27492 
27493 	sense_page = (struct mode_speed *)
27494 	    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
27495 	current_speed = sense_page->speed;
27496 
27497 	/* Process command */
27498 	switch (cmd) {
27499 	case CDROMGDRVSPEED:
27500 		/* Return the drive speed obtained during the mode sense */
27501 		if (current_speed == 0x2) {
27502 			current_speed = CDROM_TWELVE_SPEED;
27503 		}
27504 		if (ddi_copyout(&current_speed, (void *)data,
27505 		    sizeof (int), flag) != 0) {
27506 			rval = EFAULT;
27507 		}
27508 		break;
27509 	case CDROMSDRVSPEED:
27510 		/* Validate the requested drive speed */
27511 		switch ((uchar_t)data) {
27512 		case CDROM_TWELVE_SPEED:
27513 			data = 0x2;
27514 			/*FALLTHROUGH*/
27515 		case CDROM_NORMAL_SPEED:
27516 		case CDROM_DOUBLE_SPEED:
27517 		case CDROM_QUAD_SPEED:
27518 		case CDROM_MAXIMUM_SPEED:
27519 			break;
27520 		default:
27521 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27522 			    "sr_change_speed: "
27523 			    "Drive Speed '%d' Not Supported\n", (uchar_t)data);
27524 			kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27525 			return (EINVAL);
27526 		}
27527 
27528 		/*
27529 		 * The current drive speed matches the requested drive speed so
27530 		 * there is no need to send the mode select to change the speed
27531 		 */
27532 		if (current_speed == data) {
27533 			break;
27534 		}
27535 
27536 		/* Build the select data for the requested drive speed */
27537 		select = kmem_zalloc(BUFLEN_MODE_CDROM_SPEED, KM_SLEEP);
27538 		select_mhp = (struct mode_header *)select;
27539 		select_mhp->bdesc_length = 0;
27540 		select_page =
27541 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27542 		select_page =
27543 		    (struct mode_speed *)(select + MODE_HEADER_LENGTH);
27544 		select_page->mode_page.code = CDROM_MODE_SPEED;
27545 		select_page->mode_page.length = 2;
27546 		select_page->speed = (uchar_t)data;
27547 
27548 		/* Send the mode select for the requested block size */
27549 		ssc = sd_ssc_init(un);
27550 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27551 		    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27552 		    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27553 		sd_ssc_fini(ssc);
27554 		if (rval != 0) {
27555 			/*
27556 			 * The mode select failed for the requested drive speed,
27557 			 * so reset the data for the original drive speed and
27558 			 * send it to the target. The error is indicated by the
27559 			 * return value for the failed mode select.
27560 			 */
27561 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27562 			    "sr_drive_speed: Mode Select Failed\n");
27563 			select_page->speed = sense_page->speed;
27564 			ssc = sd_ssc_init(un);
27565 			(void) sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
27566 			    MODEPAGE_CDROM_SPEED_LEN + MODE_HEADER_LENGTH,
27567 			    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
27568 			sd_ssc_fini(ssc);
27569 		}
27570 		break;
27571 	default:
27572 		/* should not reach here, but check anyway */
27573 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27574 		    "sr_change_speed: Command '%x' Not Supported\n", cmd);
27575 		rval = EINVAL;
27576 		break;
27577 	}
27578 
27579 	if (select) {
27580 		kmem_free(select, BUFLEN_MODE_CDROM_SPEED);
27581 	}
27582 	if (sense) {
27583 		kmem_free(sense, BUFLEN_MODE_CDROM_SPEED);
27584 	}
27585 
27586 	return (rval);
27587 }
27588 
27589 
27590 /*
27591  *    Function: sr_atapi_change_speed()
27592  *
27593  * Description: This routine is the driver entry point for handling CD-ROM
27594  *		drive speed ioctl requests for MMC devices that do not support
27595  *		the Real Time Streaming feature (0x107).
27596  *
27597  *		Note: This routine will use the SET SPEED command which may not
27598  *		be supported by all devices.
27599  *
27600  *   Arguments: dev- the device 'dev_t'
27601  *		cmd- the request type; one of CDROMGDRVSPEED (get) or
27602  *		     CDROMSDRVSPEED (set)
27603  *		data- current drive speed or requested drive speed
27604  *		flag- this argument is a pass through to ddi_copyxxx() directly
27605  *		      from the mode argument of ioctl().
27606  *
27607  * Return Code: the code returned by sd_send_scsi_cmd()
27608  *		EINVAL if invalid arguments are provided
27609  *		EFAULT if ddi_copyxxx() fails
27610  *		ENXIO if fail ddi_get_soft_state
27611  *		EIO if invalid mode sense block descriptor length
27612  */
27613 
27614 static int
27615 sr_atapi_change_speed(dev_t dev, int cmd, intptr_t data, int flag)
27616 {
27617 	struct sd_lun			*un;
27618 	struct uscsi_cmd		*com = NULL;
27619 	struct mode_header_grp2		*sense_mhp;
27620 	uchar_t				*sense_page;
27621 	uchar_t				*sense = NULL;
27622 	char				cdb[CDB_GROUP5];
27623 	int				bd_len;
27624 	int				current_speed = 0;
27625 	int				max_speed = 0;
27626 	int				rval;
27627 	sd_ssc_t			*ssc;
27628 
27629 	ASSERT((cmd == CDROMGDRVSPEED) || (cmd == CDROMSDRVSPEED));
27630 
27631 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27632 		return (ENXIO);
27633 	}
27634 
27635 	sense = kmem_zalloc(BUFLEN_MODE_CDROM_CAP, KM_SLEEP);
27636 
27637 	ssc = sd_ssc_init(un);
27638 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
27639 	    BUFLEN_MODE_CDROM_CAP, MODEPAGE_CDROM_CAP,
27640 	    SD_PATH_STANDARD);
27641 	sd_ssc_fini(ssc);
27642 	if (rval != 0) {
27643 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27644 		    "sr_atapi_change_speed: Mode Sense Failed\n");
27645 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27646 		return (rval);
27647 	}
27648 
27649 	/* Check the block descriptor len to handle only 1 block descriptor */
27650 	sense_mhp = (struct mode_header_grp2 *)sense;
27651 	bd_len = (sense_mhp->bdesc_length_hi << 8) | sense_mhp->bdesc_length_lo;
27652 	if (bd_len > MODE_BLK_DESC_LENGTH) {
27653 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27654 		    "sr_atapi_change_speed: Mode Sense returned invalid "
27655 		    "block descriptor length\n");
27656 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27657 		return (EIO);
27658 	}
27659 
27660 	/* Calculate the current and maximum drive speeds */
27661 	sense_page = (uchar_t *)(sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
27662 	current_speed = (sense_page[14] << 8) | sense_page[15];
27663 	max_speed = (sense_page[8] << 8) | sense_page[9];
27664 
27665 	/* Process the command */
27666 	switch (cmd) {
27667 	case CDROMGDRVSPEED:
27668 		current_speed /= SD_SPEED_1X;
27669 		if (ddi_copyout(&current_speed, (void *)data,
27670 		    sizeof (int), flag) != 0)
27671 			rval = EFAULT;
27672 		break;
27673 	case CDROMSDRVSPEED:
27674 		/* Convert the speed code to KB/sec */
27675 		switch ((uchar_t)data) {
27676 		case CDROM_NORMAL_SPEED:
27677 			current_speed = SD_SPEED_1X;
27678 			break;
27679 		case CDROM_DOUBLE_SPEED:
27680 			current_speed = 2 * SD_SPEED_1X;
27681 			break;
27682 		case CDROM_QUAD_SPEED:
27683 			current_speed = 4 * SD_SPEED_1X;
27684 			break;
27685 		case CDROM_TWELVE_SPEED:
27686 			current_speed = 12 * SD_SPEED_1X;
27687 			break;
27688 		case CDROM_MAXIMUM_SPEED:
27689 			current_speed = 0xffff;
27690 			break;
27691 		default:
27692 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27693 			    "sr_atapi_change_speed: invalid drive speed %d\n",
27694 			    (uchar_t)data);
27695 			kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27696 			return (EINVAL);
27697 		}
27698 
27699 		/* Check the request against the drive's max speed. */
27700 		if (current_speed != 0xffff) {
27701 			if (current_speed > max_speed) {
27702 				kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27703 				return (EINVAL);
27704 			}
27705 		}
27706 
27707 		/*
27708 		 * Build and send the SET SPEED command
27709 		 *
27710 		 * Note: The SET SPEED (0xBB) command used in this routine is
27711 		 * obsolete per the SCSI MMC spec but still supported in the
27712 		 * MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
27713 		 * therefore the command is still implemented in this routine.
27714 		 */
27715 		bzero(cdb, sizeof (cdb));
27716 		cdb[0] = (char)SCMD_SET_CDROM_SPEED;
27717 		cdb[2] = (uchar_t)(current_speed >> 8);
27718 		cdb[3] = (uchar_t)current_speed;
27719 		com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27720 		com->uscsi_cdb	   = (caddr_t)cdb;
27721 		com->uscsi_cdblen  = CDB_GROUP5;
27722 		com->uscsi_bufaddr = NULL;
27723 		com->uscsi_buflen  = 0;
27724 		com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT;
27725 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, 0, SD_PATH_STANDARD);
27726 		break;
27727 	default:
27728 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27729 		    "sr_atapi_change_speed: Command '%x' Not Supported\n", cmd);
27730 		rval = EINVAL;
27731 	}
27732 
27733 	if (sense) {
27734 		kmem_free(sense, BUFLEN_MODE_CDROM_CAP);
27735 	}
27736 	if (com) {
27737 		kmem_free(com, sizeof (*com));
27738 	}
27739 	return (rval);
27740 }
27741 
27742 
27743 /*
27744  *    Function: sr_pause_resume()
27745  *
27746  * Description: This routine is the driver entry point for handling CD-ROM
27747  *		pause/resume ioctl requests. This only affects the audio play
27748  *		operation.
27749  *
27750  *   Arguments: dev - the device 'dev_t'
27751  *		cmd - the request type; one of CDROMPAUSE or CDROMRESUME, used
27752  *		      for setting the resume bit of the cdb.
27753  *
27754  * Return Code: the code returned by sd_send_scsi_cmd()
27755  *		EINVAL if invalid mode specified
27756  *
27757  */
27758 
27759 static int
27760 sr_pause_resume(dev_t dev, int cmd)
27761 {
27762 	struct sd_lun		*un;
27763 	struct uscsi_cmd	*com;
27764 	char			cdb[CDB_GROUP1];
27765 	int			rval;
27766 
27767 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27768 		return (ENXIO);
27769 	}
27770 
27771 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27772 	bzero(cdb, CDB_GROUP1);
27773 	cdb[0] = SCMD_PAUSE_RESUME;
27774 	switch (cmd) {
27775 	case CDROMRESUME:
27776 		cdb[8] = 1;
27777 		break;
27778 	case CDROMPAUSE:
27779 		cdb[8] = 0;
27780 		break;
27781 	default:
27782 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_pause_resume:"
27783 		    " Command '%x' Not Supported\n", cmd);
27784 		rval = EINVAL;
27785 		goto done;
27786 	}
27787 
27788 	com->uscsi_cdb    = cdb;
27789 	com->uscsi_cdblen = CDB_GROUP1;
27790 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
27791 
27792 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27793 	    SD_PATH_STANDARD);
27794 
27795 done:
27796 	kmem_free(com, sizeof (*com));
27797 	return (rval);
27798 }
27799 
27800 
27801 /*
27802  *    Function: sr_play_msf()
27803  *
27804  * Description: This routine is the driver entry point for handling CD-ROM
27805  *		ioctl requests to output the audio signals at the specified
27806  *		starting address and continue the audio play until the specified
27807  *		ending address (CDROMPLAYMSF) The address is in Minute Second
27808  *		Frame (MSF) format.
27809  *
27810  *   Arguments: dev	- the device 'dev_t'
27811  *		data	- pointer to user provided audio msf structure,
27812  *		          specifying start/end addresses.
27813  *		flag	- this argument is a pass through to ddi_copyxxx()
27814  *		          directly from the mode argument of ioctl().
27815  *
27816  * Return Code: the code returned by sd_send_scsi_cmd()
27817  *		EFAULT if ddi_copyxxx() fails
27818  *		ENXIO if fail ddi_get_soft_state
27819  *		EINVAL if data pointer is NULL
27820  */
27821 
27822 static int
27823 sr_play_msf(dev_t dev, caddr_t data, int flag)
27824 {
27825 	struct sd_lun		*un;
27826 	struct uscsi_cmd	*com;
27827 	struct cdrom_msf	msf_struct;
27828 	struct cdrom_msf	*msf = &msf_struct;
27829 	char			cdb[CDB_GROUP1];
27830 	int			rval;
27831 
27832 	if (data == NULL) {
27833 		return (EINVAL);
27834 	}
27835 
27836 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27837 		return (ENXIO);
27838 	}
27839 
27840 	if (ddi_copyin(data, msf, sizeof (struct cdrom_msf), flag)) {
27841 		return (EFAULT);
27842 	}
27843 
27844 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27845 	bzero(cdb, CDB_GROUP1);
27846 	cdb[0] = SCMD_PLAYAUDIO_MSF;
27847 	if (un->un_f_cfg_playmsf_bcd == TRUE) {
27848 		cdb[3] = BYTE_TO_BCD(msf->cdmsf_min0);
27849 		cdb[4] = BYTE_TO_BCD(msf->cdmsf_sec0);
27850 		cdb[5] = BYTE_TO_BCD(msf->cdmsf_frame0);
27851 		cdb[6] = BYTE_TO_BCD(msf->cdmsf_min1);
27852 		cdb[7] = BYTE_TO_BCD(msf->cdmsf_sec1);
27853 		cdb[8] = BYTE_TO_BCD(msf->cdmsf_frame1);
27854 	} else {
27855 		cdb[3] = msf->cdmsf_min0;
27856 		cdb[4] = msf->cdmsf_sec0;
27857 		cdb[5] = msf->cdmsf_frame0;
27858 		cdb[6] = msf->cdmsf_min1;
27859 		cdb[7] = msf->cdmsf_sec1;
27860 		cdb[8] = msf->cdmsf_frame1;
27861 	}
27862 	com->uscsi_cdb    = cdb;
27863 	com->uscsi_cdblen = CDB_GROUP1;
27864 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
27865 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27866 	    SD_PATH_STANDARD);
27867 	kmem_free(com, sizeof (*com));
27868 	return (rval);
27869 }
27870 
27871 
27872 /*
27873  *    Function: sr_play_trkind()
27874  *
27875  * Description: This routine is the driver entry point for handling CD-ROM
27876  *		ioctl requests to output the audio signals at the specified
27877  *		starting address and continue the audio play until the specified
27878  *		ending address (CDROMPLAYTRKIND). The address is in Track Index
27879  *		format.
27880  *
27881  *   Arguments: dev	- the device 'dev_t'
27882  *		data	- pointer to user provided audio track/index structure,
27883  *		          specifying start/end addresses.
27884  *		flag	- this argument is a pass through to ddi_copyxxx()
27885  *		          directly from the mode argument of ioctl().
27886  *
27887  * Return Code: the code returned by sd_send_scsi_cmd()
27888  *		EFAULT if ddi_copyxxx() fails
27889  *		ENXIO if fail ddi_get_soft_state
27890  *		EINVAL if data pointer is NULL
27891  */
27892 
27893 static int
27894 sr_play_trkind(dev_t dev, caddr_t data, int flag)
27895 {
27896 	struct cdrom_ti		ti_struct;
27897 	struct cdrom_ti		*ti = &ti_struct;
27898 	struct uscsi_cmd	*com = NULL;
27899 	char			cdb[CDB_GROUP1];
27900 	int			rval;
27901 
27902 	if (data == NULL) {
27903 		return (EINVAL);
27904 	}
27905 
27906 	if (ddi_copyin(data, ti, sizeof (struct cdrom_ti), flag)) {
27907 		return (EFAULT);
27908 	}
27909 
27910 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
27911 	bzero(cdb, CDB_GROUP1);
27912 	cdb[0] = SCMD_PLAYAUDIO_TI;
27913 	cdb[4] = ti->cdti_trk0;
27914 	cdb[5] = ti->cdti_ind0;
27915 	cdb[7] = ti->cdti_trk1;
27916 	cdb[8] = ti->cdti_ind1;
27917 	com->uscsi_cdb    = cdb;
27918 	com->uscsi_cdblen = CDB_GROUP1;
27919 	com->uscsi_flags  = USCSI_DIAGNOSE | USCSI_SILENT;
27920 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
27921 	    SD_PATH_STANDARD);
27922 	kmem_free(com, sizeof (*com));
27923 	return (rval);
27924 }
27925 
27926 
27927 /*
27928  *    Function: sr_read_all_subcodes()
27929  *
27930  * Description: This routine is the driver entry point for handling CD-ROM
27931  *		ioctl requests to return raw subcode data while the target is
27932  *		playing audio (CDROMSUBCODE).
27933  *
27934  *   Arguments: dev	- the device 'dev_t'
27935  *		data	- pointer to user provided cdrom subcode structure,
27936  *		          specifying the transfer length and address.
27937  *		flag	- this argument is a pass through to ddi_copyxxx()
27938  *		          directly from the mode argument of ioctl().
27939  *
27940  * Return Code: the code returned by sd_send_scsi_cmd()
27941  *		EFAULT if ddi_copyxxx() fails
27942  *		ENXIO if fail ddi_get_soft_state
27943  *		EINVAL if data pointer is NULL
27944  */
27945 
27946 static int
27947 sr_read_all_subcodes(dev_t dev, caddr_t data, int flag)
27948 {
27949 	struct sd_lun		*un = NULL;
27950 	struct uscsi_cmd	*com = NULL;
27951 	struct cdrom_subcode	*subcode = NULL;
27952 	int			rval;
27953 	size_t			buflen;
27954 	char			cdb[CDB_GROUP5];
27955 
27956 #ifdef _MULTI_DATAMODEL
27957 	/* To support ILP32 applications in an LP64 world */
27958 	struct cdrom_subcode32		cdrom_subcode32;
27959 	struct cdrom_subcode32		*cdsc32 = &cdrom_subcode32;
27960 #endif
27961 	if (data == NULL) {
27962 		return (EINVAL);
27963 	}
27964 
27965 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
27966 		return (ENXIO);
27967 	}
27968 
27969 	subcode = kmem_zalloc(sizeof (struct cdrom_subcode), KM_SLEEP);
27970 
27971 #ifdef _MULTI_DATAMODEL
27972 	switch (ddi_model_convert_from(flag & FMODELS)) {
27973 	case DDI_MODEL_ILP32:
27974 		if (ddi_copyin(data, cdsc32, sizeof (*cdsc32), flag)) {
27975 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27976 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27977 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27978 			return (EFAULT);
27979 		}
27980 		/* Convert the ILP32 uscsi data from the application to LP64 */
27981 		cdrom_subcode32tocdrom_subcode(cdsc32, subcode);
27982 		break;
27983 	case DDI_MODEL_NONE:
27984 		if (ddi_copyin(data, subcode,
27985 		    sizeof (struct cdrom_subcode), flag)) {
27986 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27987 			    "sr_read_all_subcodes: ddi_copyin Failed\n");
27988 			kmem_free(subcode, sizeof (struct cdrom_subcode));
27989 			return (EFAULT);
27990 		}
27991 		break;
27992 	}
27993 #else /* ! _MULTI_DATAMODEL */
27994 	if (ddi_copyin(data, subcode, sizeof (struct cdrom_subcode), flag)) {
27995 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
27996 		    "sr_read_all_subcodes: ddi_copyin Failed\n");
27997 		kmem_free(subcode, sizeof (struct cdrom_subcode));
27998 		return (EFAULT);
27999 	}
28000 #endif /* _MULTI_DATAMODEL */
28001 
28002 	/*
28003 	 * Since MMC-2 expects max 3 bytes for length, check if the
28004 	 * length input is greater than 3 bytes
28005 	 */
28006 	if ((subcode->cdsc_length & 0xFF000000) != 0) {
28007 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28008 		    "sr_read_all_subcodes: "
28009 		    "cdrom transfer length too large: %d (limit %d)\n",
28010 		    subcode->cdsc_length, 0xFFFFFF);
28011 		kmem_free(subcode, sizeof (struct cdrom_subcode));
28012 		return (EINVAL);
28013 	}
28014 
28015 	buflen = CDROM_BLK_SUBCODE * subcode->cdsc_length;
28016 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28017 	bzero(cdb, CDB_GROUP5);
28018 
28019 	if (un->un_f_mmc_cap == TRUE) {
28020 		cdb[0] = (char)SCMD_READ_CD;
28021 		cdb[2] = (char)0xff;
28022 		cdb[3] = (char)0xff;
28023 		cdb[4] = (char)0xff;
28024 		cdb[5] = (char)0xff;
28025 		cdb[6] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28026 		cdb[7] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28027 		cdb[8] = ((subcode->cdsc_length) & 0x000000ff);
28028 		cdb[10] = 1;
28029 	} else {
28030 		/*
28031 		 * Note: A vendor specific command (0xDF) is being used here to
28032 		 * request a read of all subcodes.
28033 		 */
28034 		cdb[0] = (char)SCMD_READ_ALL_SUBCODES;
28035 		cdb[6] = (((subcode->cdsc_length) & 0xff000000) >> 24);
28036 		cdb[7] = (((subcode->cdsc_length) & 0x00ff0000) >> 16);
28037 		cdb[8] = (((subcode->cdsc_length) & 0x0000ff00) >> 8);
28038 		cdb[9] = ((subcode->cdsc_length) & 0x000000ff);
28039 	}
28040 	com->uscsi_cdb	   = cdb;
28041 	com->uscsi_cdblen  = CDB_GROUP5;
28042 	com->uscsi_bufaddr = (caddr_t)subcode->cdsc_addr;
28043 	com->uscsi_buflen  = buflen;
28044 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28045 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28046 	    SD_PATH_STANDARD);
28047 	kmem_free(subcode, sizeof (struct cdrom_subcode));
28048 	kmem_free(com, sizeof (*com));
28049 	return (rval);
28050 }
28051 
28052 
28053 /*
28054  *    Function: sr_read_subchannel()
28055  *
28056  * Description: This routine is the driver entry point for handling CD-ROM
28057  *		ioctl requests to return the Q sub-channel data of the CD
28058  *		current position block. (CDROMSUBCHNL) The data includes the
28059  *		track number, index number, absolute CD-ROM address (LBA or MSF
28060  *		format per the user) , track relative CD-ROM address (LBA or MSF
28061  *		format per the user), control data and audio status.
28062  *
28063  *   Arguments: dev	- the device 'dev_t'
28064  *		data	- pointer to user provided cdrom sub-channel structure
28065  *		flag	- this argument is a pass through to ddi_copyxxx()
28066  *		          directly from the mode argument of ioctl().
28067  *
28068  * Return Code: the code returned by sd_send_scsi_cmd()
28069  *		EFAULT if ddi_copyxxx() fails
28070  *		ENXIO if fail ddi_get_soft_state
28071  *		EINVAL if data pointer is NULL
28072  */
28073 
28074 static int
28075 sr_read_subchannel(dev_t dev, caddr_t data, int flag)
28076 {
28077 	struct sd_lun		*un;
28078 	struct uscsi_cmd	*com;
28079 	struct cdrom_subchnl	subchanel;
28080 	struct cdrom_subchnl	*subchnl = &subchanel;
28081 	char			cdb[CDB_GROUP1];
28082 	caddr_t			buffer;
28083 	int			rval;
28084 
28085 	if (data == NULL) {
28086 		return (EINVAL);
28087 	}
28088 
28089 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28090 	    (un->un_state == SD_STATE_OFFLINE)) {
28091 		return (ENXIO);
28092 	}
28093 
28094 	if (ddi_copyin(data, subchnl, sizeof (struct cdrom_subchnl), flag)) {
28095 		return (EFAULT);
28096 	}
28097 
28098 	buffer = kmem_zalloc((size_t)16, KM_SLEEP);
28099 	bzero(cdb, CDB_GROUP1);
28100 	cdb[0] = SCMD_READ_SUBCHANNEL;
28101 	/* Set the MSF bit based on the user requested address format */
28102 	cdb[1] = (subchnl->cdsc_format & CDROM_LBA) ? 0 : 0x02;
28103 	/*
28104 	 * Set the Q bit in byte 2 to indicate that Q sub-channel data be
28105 	 * returned
28106 	 */
28107 	cdb[2] = 0x40;
28108 	/*
28109 	 * Set byte 3 to specify the return data format. A value of 0x01
28110 	 * indicates that the CD-ROM current position should be returned.
28111 	 */
28112 	cdb[3] = 0x01;
28113 	cdb[8] = 0x10;
28114 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28115 	com->uscsi_cdb	   = cdb;
28116 	com->uscsi_cdblen  = CDB_GROUP1;
28117 	com->uscsi_bufaddr = buffer;
28118 	com->uscsi_buflen  = 16;
28119 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28120 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28121 	    SD_PATH_STANDARD);
28122 	if (rval != 0) {
28123 		kmem_free(buffer, 16);
28124 		kmem_free(com, sizeof (*com));
28125 		return (rval);
28126 	}
28127 
28128 	/* Process the returned Q sub-channel data */
28129 	subchnl->cdsc_audiostatus = buffer[1];
28130 	subchnl->cdsc_adr	= (buffer[5] & 0xF0) >> 4;
28131 	subchnl->cdsc_ctrl	= (buffer[5] & 0x0F);
28132 	subchnl->cdsc_trk	= buffer[6];
28133 	subchnl->cdsc_ind	= buffer[7];
28134 	if (subchnl->cdsc_format & CDROM_LBA) {
28135 		subchnl->cdsc_absaddr.lba =
28136 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28137 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28138 		subchnl->cdsc_reladdr.lba =
28139 		    ((uchar_t)buffer[12] << 24) + ((uchar_t)buffer[13] << 16) +
28140 		    ((uchar_t)buffer[14] << 8) + ((uchar_t)buffer[15]);
28141 	} else if (un->un_f_cfg_readsub_bcd == TRUE) {
28142 		subchnl->cdsc_absaddr.msf.minute = BCD_TO_BYTE(buffer[9]);
28143 		subchnl->cdsc_absaddr.msf.second = BCD_TO_BYTE(buffer[10]);
28144 		subchnl->cdsc_absaddr.msf.frame  = BCD_TO_BYTE(buffer[11]);
28145 		subchnl->cdsc_reladdr.msf.minute = BCD_TO_BYTE(buffer[13]);
28146 		subchnl->cdsc_reladdr.msf.second = BCD_TO_BYTE(buffer[14]);
28147 		subchnl->cdsc_reladdr.msf.frame  = BCD_TO_BYTE(buffer[15]);
28148 	} else {
28149 		subchnl->cdsc_absaddr.msf.minute = buffer[9];
28150 		subchnl->cdsc_absaddr.msf.second = buffer[10];
28151 		subchnl->cdsc_absaddr.msf.frame  = buffer[11];
28152 		subchnl->cdsc_reladdr.msf.minute = buffer[13];
28153 		subchnl->cdsc_reladdr.msf.second = buffer[14];
28154 		subchnl->cdsc_reladdr.msf.frame  = buffer[15];
28155 	}
28156 	kmem_free(buffer, 16);
28157 	kmem_free(com, sizeof (*com));
28158 	if (ddi_copyout(subchnl, data, sizeof (struct cdrom_subchnl), flag)
28159 	    != 0) {
28160 		return (EFAULT);
28161 	}
28162 	return (rval);
28163 }
28164 
28165 
28166 /*
28167  *    Function: sr_read_tocentry()
28168  *
28169  * Description: This routine is the driver entry point for handling CD-ROM
28170  *		ioctl requests to read from the Table of Contents (TOC)
28171  *		(CDROMREADTOCENTRY). This routine provides the ADR and CTRL
28172  *		fields, the starting address (LBA or MSF format per the user)
28173  *		and the data mode if the user specified track is a data track.
28174  *
28175  *		Note: The READ HEADER (0x44) command used in this routine is
28176  *		obsolete per the SCSI MMC spec but still supported in the
28177  *		MT FUJI vendor spec. Most equipment is adhereing to MT FUJI
28178  *		therefore the command is still implemented in this routine.
28179  *
28180  *   Arguments: dev	- the device 'dev_t'
28181  *		data	- pointer to user provided toc entry structure,
28182  *			  specifying the track # and the address format
28183  *			  (LBA or MSF).
28184  *		flag	- this argument is a pass through to ddi_copyxxx()
28185  *		          directly from the mode argument of ioctl().
28186  *
28187  * Return Code: the code returned by sd_send_scsi_cmd()
28188  *		EFAULT if ddi_copyxxx() fails
28189  *		ENXIO if fail ddi_get_soft_state
28190  *		EINVAL if data pointer is NULL
28191  */
28192 
28193 static int
28194 sr_read_tocentry(dev_t dev, caddr_t data, int flag)
28195 {
28196 	struct sd_lun		*un = NULL;
28197 	struct uscsi_cmd	*com;
28198 	struct cdrom_tocentry	toc_entry;
28199 	struct cdrom_tocentry	*entry = &toc_entry;
28200 	caddr_t			buffer;
28201 	int			rval;
28202 	char			cdb[CDB_GROUP1];
28203 
28204 	if (data == NULL) {
28205 		return (EINVAL);
28206 	}
28207 
28208 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28209 	    (un->un_state == SD_STATE_OFFLINE)) {
28210 		return (ENXIO);
28211 	}
28212 
28213 	if (ddi_copyin(data, entry, sizeof (struct cdrom_tocentry), flag)) {
28214 		return (EFAULT);
28215 	}
28216 
28217 	/* Validate the requested track and address format */
28218 	if (!(entry->cdte_format & (CDROM_LBA | CDROM_MSF))) {
28219 		return (EINVAL);
28220 	}
28221 
28222 	if (entry->cdte_track == 0) {
28223 		return (EINVAL);
28224 	}
28225 
28226 	buffer = kmem_zalloc((size_t)12, KM_SLEEP);
28227 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28228 	bzero(cdb, CDB_GROUP1);
28229 
28230 	cdb[0] = SCMD_READ_TOC;
28231 	/* Set the MSF bit based on the user requested address format  */
28232 	cdb[1] = ((entry->cdte_format & CDROM_LBA) ? 0 : 2);
28233 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28234 		cdb[6] = BYTE_TO_BCD(entry->cdte_track);
28235 	} else {
28236 		cdb[6] = entry->cdte_track;
28237 	}
28238 
28239 	/*
28240 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
28241 	 * (4 byte TOC response header + 8 byte track descriptor)
28242 	 */
28243 	cdb[8] = 12;
28244 	com->uscsi_cdb	   = cdb;
28245 	com->uscsi_cdblen  = CDB_GROUP1;
28246 	com->uscsi_bufaddr = buffer;
28247 	com->uscsi_buflen  = 0x0C;
28248 	com->uscsi_flags   = (USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ);
28249 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28250 	    SD_PATH_STANDARD);
28251 	if (rval != 0) {
28252 		kmem_free(buffer, 12);
28253 		kmem_free(com, sizeof (*com));
28254 		return (rval);
28255 	}
28256 
28257 	/* Process the toc entry */
28258 	entry->cdte_adr		= (buffer[5] & 0xF0) >> 4;
28259 	entry->cdte_ctrl	= (buffer[5] & 0x0F);
28260 	if (entry->cdte_format & CDROM_LBA) {
28261 		entry->cdte_addr.lba =
28262 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
28263 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
28264 	} else if (un->un_f_cfg_read_toc_addr_bcd == TRUE) {
28265 		entry->cdte_addr.msf.minute	= BCD_TO_BYTE(buffer[9]);
28266 		entry->cdte_addr.msf.second	= BCD_TO_BYTE(buffer[10]);
28267 		entry->cdte_addr.msf.frame	= BCD_TO_BYTE(buffer[11]);
28268 		/*
28269 		 * Send a READ TOC command using the LBA address format to get
28270 		 * the LBA for the track requested so it can be used in the
28271 		 * READ HEADER request
28272 		 *
28273 		 * Note: The MSF bit of the READ HEADER command specifies the
28274 		 * output format. The block address specified in that command
28275 		 * must be in LBA format.
28276 		 */
28277 		cdb[1] = 0;
28278 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28279 		    SD_PATH_STANDARD);
28280 		if (rval != 0) {
28281 			kmem_free(buffer, 12);
28282 			kmem_free(com, sizeof (*com));
28283 			return (rval);
28284 		}
28285 	} else {
28286 		entry->cdte_addr.msf.minute	= buffer[9];
28287 		entry->cdte_addr.msf.second	= buffer[10];
28288 		entry->cdte_addr.msf.frame	= buffer[11];
28289 		/*
28290 		 * Send a READ TOC command using the LBA address format to get
28291 		 * the LBA for the track requested so it can be used in the
28292 		 * READ HEADER request
28293 		 *
28294 		 * Note: The MSF bit of the READ HEADER command specifies the
28295 		 * output format. The block address specified in that command
28296 		 * must be in LBA format.
28297 		 */
28298 		cdb[1] = 0;
28299 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28300 		    SD_PATH_STANDARD);
28301 		if (rval != 0) {
28302 			kmem_free(buffer, 12);
28303 			kmem_free(com, sizeof (*com));
28304 			return (rval);
28305 		}
28306 	}
28307 
28308 	/*
28309 	 * Build and send the READ HEADER command to determine the data mode of
28310 	 * the user specified track.
28311 	 */
28312 	if ((entry->cdte_ctrl & CDROM_DATA_TRACK) &&
28313 	    (entry->cdte_track != CDROM_LEADOUT)) {
28314 		bzero(cdb, CDB_GROUP1);
28315 		cdb[0] = SCMD_READ_HEADER;
28316 		cdb[2] = buffer[8];
28317 		cdb[3] = buffer[9];
28318 		cdb[4] = buffer[10];
28319 		cdb[5] = buffer[11];
28320 		cdb[8] = 0x08;
28321 		com->uscsi_buflen = 0x08;
28322 		rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28323 		    SD_PATH_STANDARD);
28324 		if (rval == 0) {
28325 			entry->cdte_datamode = buffer[0];
28326 		} else {
28327 			/*
28328 			 * READ HEADER command failed, since this is
28329 			 * obsoleted in one spec, its better to return
28330 			 * -1 for an invlid track so that we can still
28331 			 * receive the rest of the TOC data.
28332 			 */
28333 			entry->cdte_datamode = (uchar_t)-1;
28334 		}
28335 	} else {
28336 		entry->cdte_datamode = (uchar_t)-1;
28337 	}
28338 
28339 	kmem_free(buffer, 12);
28340 	kmem_free(com, sizeof (*com));
28341 	if (ddi_copyout(entry, data, sizeof (struct cdrom_tocentry), flag) != 0)
28342 		return (EFAULT);
28343 
28344 	return (rval);
28345 }
28346 
28347 
28348 /*
28349  *    Function: sr_read_tochdr()
28350  *
28351  * Description: This routine is the driver entry point for handling CD-ROM
28352  *		ioctl requests to read the Table of Contents (TOC) header
28353  *		(CDROMREADTOHDR). The TOC header consists of the disk starting
28354  *		and ending track numbers
28355  *
28356  *   Arguments: dev	- the device 'dev_t'
28357  *		data	- pointer to user provided toc header structure,
28358  *			  specifying the starting and ending track numbers.
28359  *		flag	- this argument is a pass through to ddi_copyxxx()
28360  *			  directly from the mode argument of ioctl().
28361  *
28362  * Return Code: the code returned by sd_send_scsi_cmd()
28363  *		EFAULT if ddi_copyxxx() fails
28364  *		ENXIO if fail ddi_get_soft_state
28365  *		EINVAL if data pointer is NULL
28366  */
28367 
28368 static int
28369 sr_read_tochdr(dev_t dev, caddr_t data, int flag)
28370 {
28371 	struct sd_lun		*un;
28372 	struct uscsi_cmd	*com;
28373 	struct cdrom_tochdr	toc_header;
28374 	struct cdrom_tochdr	*hdr = &toc_header;
28375 	char			cdb[CDB_GROUP1];
28376 	int			rval;
28377 	caddr_t			buffer;
28378 
28379 	if (data == NULL) {
28380 		return (EINVAL);
28381 	}
28382 
28383 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28384 	    (un->un_state == SD_STATE_OFFLINE)) {
28385 		return (ENXIO);
28386 	}
28387 
28388 	buffer = kmem_zalloc(4, KM_SLEEP);
28389 	bzero(cdb, CDB_GROUP1);
28390 	cdb[0] = SCMD_READ_TOC;
28391 	/*
28392 	 * Specifying a track number of 0x00 in the READ TOC command indicates
28393 	 * that the TOC header should be returned
28394 	 */
28395 	cdb[6] = 0x00;
28396 	/*
28397 	 * Bytes 7 & 8 are the 4 byte allocation length for TOC header.
28398 	 * (2 byte data len + 1 byte starting track # + 1 byte ending track #)
28399 	 */
28400 	cdb[8] = 0x04;
28401 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28402 	com->uscsi_cdb	   = cdb;
28403 	com->uscsi_cdblen  = CDB_GROUP1;
28404 	com->uscsi_bufaddr = buffer;
28405 	com->uscsi_buflen  = 0x04;
28406 	com->uscsi_timeout = 300;
28407 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28408 
28409 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
28410 	    SD_PATH_STANDARD);
28411 	if (un->un_f_cfg_read_toc_trk_bcd == TRUE) {
28412 		hdr->cdth_trk0 = BCD_TO_BYTE(buffer[2]);
28413 		hdr->cdth_trk1 = BCD_TO_BYTE(buffer[3]);
28414 	} else {
28415 		hdr->cdth_trk0 = buffer[2];
28416 		hdr->cdth_trk1 = buffer[3];
28417 	}
28418 	kmem_free(buffer, 4);
28419 	kmem_free(com, sizeof (*com));
28420 	if (ddi_copyout(hdr, data, sizeof (struct cdrom_tochdr), flag) != 0) {
28421 		return (EFAULT);
28422 	}
28423 	return (rval);
28424 }
28425 
28426 
28427 /*
28428  * Note: The following sr_read_mode1(), sr_read_cd_mode2(), sr_read_mode2(),
28429  * sr_read_cdda(), sr_read_cdxa(), routines implement driver support for
28430  * handling CDROMREAD ioctl requests for mode 1 user data, mode 2 user data,
28431  * digital audio and extended architecture digital audio. These modes are
28432  * defined in the IEC908 (Red Book), ISO10149 (Yellow Book), and the SCSI3
28433  * MMC specs.
28434  *
28435  * In addition to support for the various data formats these routines also
28436  * include support for devices that implement only the direct access READ
28437  * commands (0x08, 0x28), devices that implement the READ_CD commands
28438  * (0xBE, 0xD4), and devices that implement the vendor unique READ CDDA and
28439  * READ CDXA commands (0xD8, 0xDB)
28440  */
28441 
28442 /*
28443  *    Function: sr_read_mode1()
28444  *
28445  * Description: This routine is the driver entry point for handling CD-ROM
28446  *		ioctl read mode1 requests (CDROMREADMODE1).
28447  *
28448  *   Arguments: dev	- the device 'dev_t'
28449  *		data	- pointer to user provided cd read structure specifying
28450  *			  the lba buffer address and length.
28451  *		flag	- this argument is a pass through to ddi_copyxxx()
28452  *			  directly from the mode argument of ioctl().
28453  *
28454  * Return Code: the code returned by sd_send_scsi_cmd()
28455  *		EFAULT if ddi_copyxxx() fails
28456  *		ENXIO if fail ddi_get_soft_state
28457  *		EINVAL if data pointer is NULL
28458  */
28459 
28460 static int
28461 sr_read_mode1(dev_t dev, caddr_t data, int flag)
28462 {
28463 	struct sd_lun		*un;
28464 	struct cdrom_read	mode1_struct;
28465 	struct cdrom_read	*mode1 = &mode1_struct;
28466 	int			rval;
28467 	sd_ssc_t		*ssc;
28468 
28469 #ifdef _MULTI_DATAMODEL
28470 	/* To support ILP32 applications in an LP64 world */
28471 	struct cdrom_read32	cdrom_read32;
28472 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28473 #endif /* _MULTI_DATAMODEL */
28474 
28475 	if (data == NULL) {
28476 		return (EINVAL);
28477 	}
28478 
28479 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28480 	    (un->un_state == SD_STATE_OFFLINE)) {
28481 		return (ENXIO);
28482 	}
28483 
28484 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28485 	    "sd_read_mode1: entry: un:0x%p\n", un);
28486 
28487 #ifdef _MULTI_DATAMODEL
28488 	switch (ddi_model_convert_from(flag & FMODELS)) {
28489 	case DDI_MODEL_ILP32:
28490 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28491 			return (EFAULT);
28492 		}
28493 		/* Convert the ILP32 uscsi data from the application to LP64 */
28494 		cdrom_read32tocdrom_read(cdrd32, mode1);
28495 		break;
28496 	case DDI_MODEL_NONE:
28497 		if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28498 			return (EFAULT);
28499 		}
28500 	}
28501 #else /* ! _MULTI_DATAMODEL */
28502 	if (ddi_copyin(data, mode1, sizeof (struct cdrom_read), flag)) {
28503 		return (EFAULT);
28504 	}
28505 #endif /* _MULTI_DATAMODEL */
28506 
28507 	ssc = sd_ssc_init(un);
28508 	rval = sd_send_scsi_READ(ssc, mode1->cdread_bufaddr,
28509 	    mode1->cdread_buflen, mode1->cdread_lba, SD_PATH_STANDARD);
28510 	sd_ssc_fini(ssc);
28511 
28512 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28513 	    "sd_read_mode1: exit: un:0x%p\n", un);
28514 
28515 	return (rval);
28516 }
28517 
28518 
28519 /*
28520  *    Function: sr_read_cd_mode2()
28521  *
28522  * Description: This routine is the driver entry point for handling CD-ROM
28523  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28524  *		support the READ CD (0xBE) command or the 1st generation
28525  *		READ CD (0xD4) command.
28526  *
28527  *   Arguments: dev	- the device 'dev_t'
28528  *		data	- pointer to user provided cd read structure specifying
28529  *			  the lba buffer address and length.
28530  *		flag	- this argument is a pass through to ddi_copyxxx()
28531  *			  directly from the mode argument of ioctl().
28532  *
28533  * Return Code: the code returned by sd_send_scsi_cmd()
28534  *		EFAULT if ddi_copyxxx() fails
28535  *		ENXIO if fail ddi_get_soft_state
28536  *		EINVAL if data pointer is NULL
28537  */
28538 
28539 static int
28540 sr_read_cd_mode2(dev_t dev, caddr_t data, int flag)
28541 {
28542 	struct sd_lun		*un;
28543 	struct uscsi_cmd	*com;
28544 	struct cdrom_read	mode2_struct;
28545 	struct cdrom_read	*mode2 = &mode2_struct;
28546 	uchar_t			cdb[CDB_GROUP5];
28547 	int			nblocks;
28548 	int			rval;
28549 #ifdef _MULTI_DATAMODEL
28550 	/*  To support ILP32 applications in an LP64 world */
28551 	struct cdrom_read32	cdrom_read32;
28552 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28553 #endif /* _MULTI_DATAMODEL */
28554 
28555 	if (data == NULL) {
28556 		return (EINVAL);
28557 	}
28558 
28559 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28560 	    (un->un_state == SD_STATE_OFFLINE)) {
28561 		return (ENXIO);
28562 	}
28563 
28564 #ifdef _MULTI_DATAMODEL
28565 	switch (ddi_model_convert_from(flag & FMODELS)) {
28566 	case DDI_MODEL_ILP32:
28567 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28568 			return (EFAULT);
28569 		}
28570 		/* Convert the ILP32 uscsi data from the application to LP64 */
28571 		cdrom_read32tocdrom_read(cdrd32, mode2);
28572 		break;
28573 	case DDI_MODEL_NONE:
28574 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28575 			return (EFAULT);
28576 		}
28577 		break;
28578 	}
28579 
28580 #else /* ! _MULTI_DATAMODEL */
28581 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28582 		return (EFAULT);
28583 	}
28584 #endif /* _MULTI_DATAMODEL */
28585 
28586 	bzero(cdb, sizeof (cdb));
28587 	if (un->un_f_cfg_read_cd_xd4 == TRUE) {
28588 		/* Read command supported by 1st generation atapi drives */
28589 		cdb[0] = SCMD_READ_CDD4;
28590 	} else {
28591 		/* Universal CD Access Command */
28592 		cdb[0] = SCMD_READ_CD;
28593 	}
28594 
28595 	/*
28596 	 * Set expected sector type to: 2336s byte, Mode 2 Yellow Book
28597 	 */
28598 	cdb[1] = CDROM_SECTOR_TYPE_MODE2;
28599 
28600 	/* set the start address */
28601 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 24) & 0XFF);
28602 	cdb[3] = (uchar_t)((mode2->cdread_lba >> 16) & 0XFF);
28603 	cdb[4] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28604 	cdb[5] = (uchar_t)(mode2->cdread_lba & 0xFF);
28605 
28606 	/* set the transfer length */
28607 	nblocks = mode2->cdread_buflen / 2336;
28608 	cdb[6] = (uchar_t)(nblocks >> 16);
28609 	cdb[7] = (uchar_t)(nblocks >> 8);
28610 	cdb[8] = (uchar_t)nblocks;
28611 
28612 	/* set the filter bits */
28613 	cdb[9] = CDROM_READ_CD_USERDATA;
28614 
28615 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28616 	com->uscsi_cdb = (caddr_t)cdb;
28617 	com->uscsi_cdblen = sizeof (cdb);
28618 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28619 	com->uscsi_buflen = mode2->cdread_buflen;
28620 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28621 
28622 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28623 	    SD_PATH_STANDARD);
28624 	kmem_free(com, sizeof (*com));
28625 	return (rval);
28626 }
28627 
28628 
28629 /*
28630  *    Function: sr_read_mode2()
28631  *
28632  * Description: This routine is the driver entry point for handling CD-ROM
28633  *		ioctl read mode2 requests (CDROMREADMODE2) for devices that
28634  *		do not support the READ CD (0xBE) command.
28635  *
28636  *   Arguments: dev	- the device 'dev_t'
28637  *		data	- pointer to user provided cd read structure specifying
28638  *			  the lba buffer address and length.
28639  *		flag	- this argument is a pass through to ddi_copyxxx()
28640  *			  directly from the mode argument of ioctl().
28641  *
28642  * Return Code: the code returned by sd_send_scsi_cmd()
28643  *		EFAULT if ddi_copyxxx() fails
28644  *		ENXIO if fail ddi_get_soft_state
28645  *		EINVAL if data pointer is NULL
28646  *		EIO if fail to reset block size
28647  *		EAGAIN if commands are in progress in the driver
28648  */
28649 
28650 static int
28651 sr_read_mode2(dev_t dev, caddr_t data, int flag)
28652 {
28653 	struct sd_lun		*un;
28654 	struct cdrom_read	mode2_struct;
28655 	struct cdrom_read	*mode2 = &mode2_struct;
28656 	int			rval;
28657 	uint32_t		restore_blksize;
28658 	struct uscsi_cmd	*com;
28659 	uchar_t			cdb[CDB_GROUP0];
28660 	int			nblocks;
28661 
28662 #ifdef _MULTI_DATAMODEL
28663 	/* To support ILP32 applications in an LP64 world */
28664 	struct cdrom_read32	cdrom_read32;
28665 	struct cdrom_read32	*cdrd32 = &cdrom_read32;
28666 #endif /* _MULTI_DATAMODEL */
28667 
28668 	if (data == NULL) {
28669 		return (EINVAL);
28670 	}
28671 
28672 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28673 	    (un->un_state == SD_STATE_OFFLINE)) {
28674 		return (ENXIO);
28675 	}
28676 
28677 	/*
28678 	 * Because this routine will update the device and driver block size
28679 	 * being used we want to make sure there are no commands in progress.
28680 	 * If commands are in progress the user will have to try again.
28681 	 *
28682 	 * We check for 1 instead of 0 because we increment un_ncmds_in_driver
28683 	 * in sdioctl to protect commands from sdioctl through to the top of
28684 	 * sd_uscsi_strategy. See sdioctl for details.
28685 	 */
28686 	mutex_enter(SD_MUTEX(un));
28687 	if (un->un_ncmds_in_driver != 1) {
28688 		mutex_exit(SD_MUTEX(un));
28689 		return (EAGAIN);
28690 	}
28691 	mutex_exit(SD_MUTEX(un));
28692 
28693 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28694 	    "sd_read_mode2: entry: un:0x%p\n", un);
28695 
28696 #ifdef _MULTI_DATAMODEL
28697 	switch (ddi_model_convert_from(flag & FMODELS)) {
28698 	case DDI_MODEL_ILP32:
28699 		if (ddi_copyin(data, cdrd32, sizeof (*cdrd32), flag) != 0) {
28700 			return (EFAULT);
28701 		}
28702 		/* Convert the ILP32 uscsi data from the application to LP64 */
28703 		cdrom_read32tocdrom_read(cdrd32, mode2);
28704 		break;
28705 	case DDI_MODEL_NONE:
28706 		if (ddi_copyin(data, mode2, sizeof (*mode2), flag) != 0) {
28707 			return (EFAULT);
28708 		}
28709 		break;
28710 	}
28711 #else /* ! _MULTI_DATAMODEL */
28712 	if (ddi_copyin(data, mode2, sizeof (*mode2), flag)) {
28713 		return (EFAULT);
28714 	}
28715 #endif /* _MULTI_DATAMODEL */
28716 
28717 	/* Store the current target block size for restoration later */
28718 	restore_blksize = un->un_tgt_blocksize;
28719 
28720 	/* Change the device and soft state target block size to 2336 */
28721 	if (sr_sector_mode(dev, SD_MODE2_BLKSIZE) != 0) {
28722 		rval = EIO;
28723 		goto done;
28724 	}
28725 
28726 
28727 	bzero(cdb, sizeof (cdb));
28728 
28729 	/* set READ operation */
28730 	cdb[0] = SCMD_READ;
28731 
28732 	/* adjust lba for 2kbyte blocks from 512 byte blocks */
28733 	mode2->cdread_lba >>= 2;
28734 
28735 	/* set the start address */
28736 	cdb[1] = (uchar_t)((mode2->cdread_lba >> 16) & 0X1F);
28737 	cdb[2] = (uchar_t)((mode2->cdread_lba >> 8) & 0xFF);
28738 	cdb[3] = (uchar_t)(mode2->cdread_lba & 0xFF);
28739 
28740 	/* set the transfer length */
28741 	nblocks = mode2->cdread_buflen / 2336;
28742 	cdb[4] = (uchar_t)nblocks & 0xFF;
28743 
28744 	/* build command */
28745 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28746 	com->uscsi_cdb = (caddr_t)cdb;
28747 	com->uscsi_cdblen = sizeof (cdb);
28748 	com->uscsi_bufaddr = mode2->cdread_bufaddr;
28749 	com->uscsi_buflen = mode2->cdread_buflen;
28750 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
28751 
28752 	/*
28753 	 * Issue SCSI command with user space address for read buffer.
28754 	 *
28755 	 * This sends the command through main channel in the driver.
28756 	 *
28757 	 * Since this is accessed via an IOCTL call, we go through the
28758 	 * standard path, so that if the device was powered down, then
28759 	 * it would be 'awakened' to handle the command.
28760 	 */
28761 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
28762 	    SD_PATH_STANDARD);
28763 
28764 	kmem_free(com, sizeof (*com));
28765 
28766 	/* Restore the device and soft state target block size */
28767 	if (sr_sector_mode(dev, restore_blksize) != 0) {
28768 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28769 		    "can't do switch back to mode 1\n");
28770 		/*
28771 		 * If sd_send_scsi_READ succeeded we still need to report
28772 		 * an error because we failed to reset the block size
28773 		 */
28774 		if (rval == 0) {
28775 			rval = EIO;
28776 		}
28777 	}
28778 
28779 done:
28780 	SD_TRACE(SD_LOG_ATTACH_DETACH, un,
28781 	    "sd_read_mode2: exit: un:0x%p\n", un);
28782 
28783 	return (rval);
28784 }
28785 
28786 
28787 /*
28788  *    Function: sr_sector_mode()
28789  *
28790  * Description: This utility function is used by sr_read_mode2 to set the target
28791  *		block size based on the user specified size. This is a legacy
28792  *		implementation based upon a vendor specific mode page
28793  *
28794  *   Arguments: dev	- the device 'dev_t'
28795  *		data	- flag indicating if block size is being set to 2336 or
28796  *			  512.
28797  *
28798  * Return Code: the code returned by sd_send_scsi_cmd()
28799  *		EFAULT if ddi_copyxxx() fails
28800  *		ENXIO if fail ddi_get_soft_state
28801  *		EINVAL if data pointer is NULL
28802  */
28803 
28804 static int
28805 sr_sector_mode(dev_t dev, uint32_t blksize)
28806 {
28807 	struct sd_lun	*un;
28808 	uchar_t		*sense;
28809 	uchar_t		*select;
28810 	int		rval;
28811 	sd_ssc_t	*ssc;
28812 
28813 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
28814 	    (un->un_state == SD_STATE_OFFLINE)) {
28815 		return (ENXIO);
28816 	}
28817 
28818 	sense = kmem_zalloc(20, KM_SLEEP);
28819 
28820 	/* Note: This is a vendor specific mode page (0x81) */
28821 	ssc = sd_ssc_init(un);
28822 	rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, 20, 0x81,
28823 	    SD_PATH_STANDARD);
28824 	sd_ssc_fini(ssc);
28825 	if (rval != 0) {
28826 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28827 		    "sr_sector_mode: Mode Sense failed\n");
28828 		kmem_free(sense, 20);
28829 		return (rval);
28830 	}
28831 	select = kmem_zalloc(20, KM_SLEEP);
28832 	select[3] = 0x08;
28833 	select[10] = ((blksize >> 8) & 0xff);
28834 	select[11] = (blksize & 0xff);
28835 	select[12] = 0x01;
28836 	select[13] = 0x06;
28837 	select[14] = sense[14];
28838 	select[15] = sense[15];
28839 	if (blksize == SD_MODE2_BLKSIZE) {
28840 		select[14] |= 0x01;
28841 	}
28842 
28843 	ssc = sd_ssc_init(un);
28844 	rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select, 20,
28845 	    SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
28846 	sd_ssc_fini(ssc);
28847 	if (rval != 0) {
28848 		SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
28849 		    "sr_sector_mode: Mode Select failed\n");
28850 	} else {
28851 		/*
28852 		 * Only update the softstate block size if we successfully
28853 		 * changed the device block mode.
28854 		 */
28855 		mutex_enter(SD_MUTEX(un));
28856 		sd_update_block_info(un, blksize, 0);
28857 		mutex_exit(SD_MUTEX(un));
28858 	}
28859 	kmem_free(sense, 20);
28860 	kmem_free(select, 20);
28861 	return (rval);
28862 }
28863 
28864 
28865 /*
28866  *    Function: sr_read_cdda()
28867  *
28868  * Description: This routine is the driver entry point for handling CD-ROM
28869  *		ioctl requests to return CD-DA or subcode data. (CDROMCDDA) If
28870  *		the target supports CDDA these requests are handled via a vendor
28871  *		specific command (0xD8) If the target does not support CDDA
28872  *		these requests are handled via the READ CD command (0xBE).
28873  *
28874  *   Arguments: dev	- the device 'dev_t'
28875  *		data	- pointer to user provided CD-DA structure specifying
28876  *			  the track starting address, transfer length, and
28877  *			  subcode options.
28878  *		flag	- this argument is a pass through to ddi_copyxxx()
28879  *			  directly from the mode argument of ioctl().
28880  *
28881  * Return Code: the code returned by sd_send_scsi_cmd()
28882  *		EFAULT if ddi_copyxxx() fails
28883  *		ENXIO if fail ddi_get_soft_state
28884  *		EINVAL if invalid arguments are provided
28885  *		ENOTTY
28886  */
28887 
28888 static int
28889 sr_read_cdda(dev_t dev, caddr_t data, int flag)
28890 {
28891 	struct sd_lun			*un;
28892 	struct uscsi_cmd		*com;
28893 	struct cdrom_cdda		*cdda;
28894 	int				rval;
28895 	size_t				buflen;
28896 	char				cdb[CDB_GROUP5];
28897 
28898 #ifdef _MULTI_DATAMODEL
28899 	/* To support ILP32 applications in an LP64 world */
28900 	struct cdrom_cdda32	cdrom_cdda32;
28901 	struct cdrom_cdda32	*cdda32 = &cdrom_cdda32;
28902 #endif /* _MULTI_DATAMODEL */
28903 
28904 	if (data == NULL) {
28905 		return (EINVAL);
28906 	}
28907 
28908 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
28909 		return (ENXIO);
28910 	}
28911 
28912 	cdda = kmem_zalloc(sizeof (struct cdrom_cdda), KM_SLEEP);
28913 
28914 #ifdef _MULTI_DATAMODEL
28915 	switch (ddi_model_convert_from(flag & FMODELS)) {
28916 	case DDI_MODEL_ILP32:
28917 		if (ddi_copyin(data, cdda32, sizeof (*cdda32), flag)) {
28918 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28919 			    "sr_read_cdda: ddi_copyin Failed\n");
28920 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28921 			return (EFAULT);
28922 		}
28923 		/* Convert the ILP32 uscsi data from the application to LP64 */
28924 		cdrom_cdda32tocdrom_cdda(cdda32, cdda);
28925 		break;
28926 	case DDI_MODEL_NONE:
28927 		if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28928 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28929 			    "sr_read_cdda: ddi_copyin Failed\n");
28930 			kmem_free(cdda, sizeof (struct cdrom_cdda));
28931 			return (EFAULT);
28932 		}
28933 		break;
28934 	}
28935 #else /* ! _MULTI_DATAMODEL */
28936 	if (ddi_copyin(data, cdda, sizeof (struct cdrom_cdda), flag)) {
28937 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28938 		    "sr_read_cdda: ddi_copyin Failed\n");
28939 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28940 		return (EFAULT);
28941 	}
28942 #endif /* _MULTI_DATAMODEL */
28943 
28944 	/*
28945 	 * Since MMC-2 expects max 3 bytes for length, check if the
28946 	 * length input is greater than 3 bytes
28947 	 */
28948 	if ((cdda->cdda_length & 0xFF000000) != 0) {
28949 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdda: "
28950 		    "cdrom transfer length too large: %d (limit %d)\n",
28951 		    cdda->cdda_length, 0xFFFFFF);
28952 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28953 		return (EINVAL);
28954 	}
28955 
28956 	switch (cdda->cdda_subcode) {
28957 	case CDROM_DA_NO_SUBCODE:
28958 		buflen = CDROM_BLK_2352 * cdda->cdda_length;
28959 		break;
28960 	case CDROM_DA_SUBQ:
28961 		buflen = CDROM_BLK_2368 * cdda->cdda_length;
28962 		break;
28963 	case CDROM_DA_ALL_SUBCODE:
28964 		buflen = CDROM_BLK_2448 * cdda->cdda_length;
28965 		break;
28966 	case CDROM_DA_SUBCODE_ONLY:
28967 		buflen = CDROM_BLK_SUBCODE * cdda->cdda_length;
28968 		break;
28969 	default:
28970 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
28971 		    "sr_read_cdda: Subcode '0x%x' Not Supported\n",
28972 		    cdda->cdda_subcode);
28973 		kmem_free(cdda, sizeof (struct cdrom_cdda));
28974 		return (EINVAL);
28975 	}
28976 
28977 	/* Build and send the command */
28978 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
28979 	bzero(cdb, CDB_GROUP5);
28980 
28981 	if (un->un_f_cfg_cdda == TRUE) {
28982 		cdb[0] = (char)SCMD_READ_CD;
28983 		cdb[1] = 0x04;
28984 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
28985 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
28986 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
28987 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
28988 		cdb[6] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
28989 		cdb[7] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
28990 		cdb[8] = ((cdda->cdda_length) & 0x000000ff);
28991 		cdb[9] = 0x10;
28992 		switch (cdda->cdda_subcode) {
28993 		case CDROM_DA_NO_SUBCODE :
28994 			cdb[10] = 0x0;
28995 			break;
28996 		case CDROM_DA_SUBQ :
28997 			cdb[10] = 0x2;
28998 			break;
28999 		case CDROM_DA_ALL_SUBCODE :
29000 			cdb[10] = 0x1;
29001 			break;
29002 		case CDROM_DA_SUBCODE_ONLY :
29003 			/* FALLTHROUGH */
29004 		default :
29005 			kmem_free(cdda, sizeof (struct cdrom_cdda));
29006 			kmem_free(com, sizeof (*com));
29007 			return (ENOTTY);
29008 		}
29009 	} else {
29010 		cdb[0] = (char)SCMD_READ_CDDA;
29011 		cdb[2] = (((cdda->cdda_addr) & 0xff000000) >> 24);
29012 		cdb[3] = (((cdda->cdda_addr) & 0x00ff0000) >> 16);
29013 		cdb[4] = (((cdda->cdda_addr) & 0x0000ff00) >> 8);
29014 		cdb[5] = ((cdda->cdda_addr) & 0x000000ff);
29015 		cdb[6] = (((cdda->cdda_length) & 0xff000000) >> 24);
29016 		cdb[7] = (((cdda->cdda_length) & 0x00ff0000) >> 16);
29017 		cdb[8] = (((cdda->cdda_length) & 0x0000ff00) >> 8);
29018 		cdb[9] = ((cdda->cdda_length) & 0x000000ff);
29019 		cdb[10] = cdda->cdda_subcode;
29020 	}
29021 
29022 	com->uscsi_cdb = cdb;
29023 	com->uscsi_cdblen = CDB_GROUP5;
29024 	com->uscsi_bufaddr = (caddr_t)cdda->cdda_data;
29025 	com->uscsi_buflen = buflen;
29026 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
29027 
29028 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
29029 	    SD_PATH_STANDARD);
29030 
29031 	kmem_free(cdda, sizeof (struct cdrom_cdda));
29032 	kmem_free(com, sizeof (*com));
29033 	return (rval);
29034 }
29035 
29036 
29037 /*
29038  *    Function: sr_read_cdxa()
29039  *
29040  * Description: This routine is the driver entry point for handling CD-ROM
29041  *		ioctl requests to return CD-XA (Extended Architecture) data.
29042  *		(CDROMCDXA).
29043  *
29044  *   Arguments: dev	- the device 'dev_t'
29045  *		data	- pointer to user provided CD-XA structure specifying
29046  *			  the data starting address, transfer length, and format
29047  *		flag	- this argument is a pass through to ddi_copyxxx()
29048  *			  directly from the mode argument of ioctl().
29049  *
29050  * Return Code: the code returned by sd_send_scsi_cmd()
29051  *		EFAULT if ddi_copyxxx() fails
29052  *		ENXIO if fail ddi_get_soft_state
29053  *		EINVAL if data pointer is NULL
29054  */
29055 
29056 static int
29057 sr_read_cdxa(dev_t dev, caddr_t data, int flag)
29058 {
29059 	struct sd_lun		*un;
29060 	struct uscsi_cmd	*com;
29061 	struct cdrom_cdxa	*cdxa;
29062 	int			rval;
29063 	size_t			buflen;
29064 	char			cdb[CDB_GROUP5];
29065 	uchar_t			read_flags;
29066 
29067 #ifdef _MULTI_DATAMODEL
29068 	/* To support ILP32 applications in an LP64 world */
29069 	struct cdrom_cdxa32		cdrom_cdxa32;
29070 	struct cdrom_cdxa32		*cdxa32 = &cdrom_cdxa32;
29071 #endif /* _MULTI_DATAMODEL */
29072 
29073 	if (data == NULL) {
29074 		return (EINVAL);
29075 	}
29076 
29077 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29078 		return (ENXIO);
29079 	}
29080 
29081 	cdxa = kmem_zalloc(sizeof (struct cdrom_cdxa), KM_SLEEP);
29082 
29083 #ifdef _MULTI_DATAMODEL
29084 	switch (ddi_model_convert_from(flag & FMODELS)) {
29085 	case DDI_MODEL_ILP32:
29086 		if (ddi_copyin(data, cdxa32, sizeof (*cdxa32), flag)) {
29087 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29088 			return (EFAULT);
29089 		}
29090 		/*
29091 		 * Convert the ILP32 uscsi data from the
29092 		 * application to LP64 for internal use.
29093 		 */
29094 		cdrom_cdxa32tocdrom_cdxa(cdxa32, cdxa);
29095 		break;
29096 	case DDI_MODEL_NONE:
29097 		if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29098 			kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29099 			return (EFAULT);
29100 		}
29101 		break;
29102 	}
29103 #else /* ! _MULTI_DATAMODEL */
29104 	if (ddi_copyin(data, cdxa, sizeof (struct cdrom_cdxa), flag)) {
29105 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29106 		return (EFAULT);
29107 	}
29108 #endif /* _MULTI_DATAMODEL */
29109 
29110 	/*
29111 	 * Since MMC-2 expects max 3 bytes for length, check if the
29112 	 * length input is greater than 3 bytes
29113 	 */
29114 	if ((cdxa->cdxa_length & 0xFF000000) != 0) {
29115 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN, "sr_read_cdxa: "
29116 		    "cdrom transfer length too large: %d (limit %d)\n",
29117 		    cdxa->cdxa_length, 0xFFFFFF);
29118 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29119 		return (EINVAL);
29120 	}
29121 
29122 	switch (cdxa->cdxa_format) {
29123 	case CDROM_XA_DATA:
29124 		buflen = CDROM_BLK_2048 * cdxa->cdxa_length;
29125 		read_flags = 0x10;
29126 		break;
29127 	case CDROM_XA_SECTOR_DATA:
29128 		buflen = CDROM_BLK_2352 * cdxa->cdxa_length;
29129 		read_flags = 0xf8;
29130 		break;
29131 	case CDROM_XA_DATA_W_ERROR:
29132 		buflen = CDROM_BLK_2646 * cdxa->cdxa_length;
29133 		read_flags = 0xfc;
29134 		break;
29135 	default:
29136 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29137 		    "sr_read_cdxa: Format '0x%x' Not Supported\n",
29138 		    cdxa->cdxa_format);
29139 		kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29140 		return (EINVAL);
29141 	}
29142 
29143 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29144 	bzero(cdb, CDB_GROUP5);
29145 	if (un->un_f_mmc_cap == TRUE) {
29146 		cdb[0] = (char)SCMD_READ_CD;
29147 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29148 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29149 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29150 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29151 		cdb[6] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29152 		cdb[7] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29153 		cdb[8] = ((cdxa->cdxa_length) & 0x000000ff);
29154 		cdb[9] = (char)read_flags;
29155 	} else {
29156 		/*
29157 		 * Note: A vendor specific command (0xDB) is being used her to
29158 		 * request a read of all subcodes.
29159 		 */
29160 		cdb[0] = (char)SCMD_READ_CDXA;
29161 		cdb[2] = (((cdxa->cdxa_addr) & 0xff000000) >> 24);
29162 		cdb[3] = (((cdxa->cdxa_addr) & 0x00ff0000) >> 16);
29163 		cdb[4] = (((cdxa->cdxa_addr) & 0x0000ff00) >> 8);
29164 		cdb[5] = ((cdxa->cdxa_addr) & 0x000000ff);
29165 		cdb[6] = (((cdxa->cdxa_length) & 0xff000000) >> 24);
29166 		cdb[7] = (((cdxa->cdxa_length) & 0x00ff0000) >> 16);
29167 		cdb[8] = (((cdxa->cdxa_length) & 0x0000ff00) >> 8);
29168 		cdb[9] = ((cdxa->cdxa_length) & 0x000000ff);
29169 		cdb[10] = cdxa->cdxa_format;
29170 	}
29171 	com->uscsi_cdb	   = cdb;
29172 	com->uscsi_cdblen  = CDB_GROUP5;
29173 	com->uscsi_bufaddr = (caddr_t)cdxa->cdxa_data;
29174 	com->uscsi_buflen  = buflen;
29175 	com->uscsi_flags   = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
29176 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_USERSPACE,
29177 	    SD_PATH_STANDARD);
29178 	kmem_free(cdxa, sizeof (struct cdrom_cdxa));
29179 	kmem_free(com, sizeof (*com));
29180 	return (rval);
29181 }
29182 
29183 
29184 /*
29185  *    Function: sr_eject()
29186  *
29187  * Description: This routine is the driver entry point for handling CD-ROM
29188  *		eject ioctl requests (FDEJECT, DKIOCEJECT, CDROMEJECT)
29189  *
29190  *   Arguments: dev	- the device 'dev_t'
29191  *
29192  * Return Code: the code returned by sd_send_scsi_cmd()
29193  */
29194 
29195 static int
29196 sr_eject(dev_t dev)
29197 {
29198 	struct sd_lun	*un;
29199 	int		rval;
29200 	sd_ssc_t	*ssc;
29201 
29202 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29203 	    (un->un_state == SD_STATE_OFFLINE)) {
29204 		return (ENXIO);
29205 	}
29206 
29207 	/*
29208 	 * To prevent race conditions with the eject
29209 	 * command, keep track of an eject command as
29210 	 * it progresses. If we are already handling
29211 	 * an eject command in the driver for the given
29212 	 * unit and another request to eject is received
29213 	 * immediately return EAGAIN so we don't lose
29214 	 * the command if the current eject command fails.
29215 	 */
29216 	mutex_enter(SD_MUTEX(un));
29217 	if (un->un_f_ejecting == TRUE) {
29218 		mutex_exit(SD_MUTEX(un));
29219 		return (EAGAIN);
29220 	}
29221 	un->un_f_ejecting = TRUE;
29222 	mutex_exit(SD_MUTEX(un));
29223 
29224 	ssc = sd_ssc_init(un);
29225 	rval = sd_send_scsi_DOORLOCK(ssc, SD_REMOVAL_ALLOW,
29226 	    SD_PATH_STANDARD);
29227 	sd_ssc_fini(ssc);
29228 
29229 	if (rval != 0) {
29230 		mutex_enter(SD_MUTEX(un));
29231 		un->un_f_ejecting = FALSE;
29232 		mutex_exit(SD_MUTEX(un));
29233 		return (rval);
29234 	}
29235 
29236 	ssc = sd_ssc_init(un);
29237 	rval = sd_send_scsi_START_STOP_UNIT(ssc, SD_START_STOP,
29238 	    SD_TARGET_EJECT, SD_PATH_STANDARD);
29239 	sd_ssc_fini(ssc);
29240 
29241 	if (rval == 0) {
29242 		mutex_enter(SD_MUTEX(un));
29243 		sr_ejected(un);
29244 		un->un_mediastate = DKIO_EJECTED;
29245 		un->un_f_ejecting = FALSE;
29246 		cv_broadcast(&un->un_state_cv);
29247 		mutex_exit(SD_MUTEX(un));
29248 	} else {
29249 		mutex_enter(SD_MUTEX(un));
29250 		un->un_f_ejecting = FALSE;
29251 		mutex_exit(SD_MUTEX(un));
29252 	}
29253 	return (rval);
29254 }
29255 
29256 
29257 /*
29258  *    Function: sr_ejected()
29259  *
29260  * Description: This routine updates the soft state structure to invalidate the
29261  *		geometry information after the media has been ejected or a
29262  *		media eject has been detected.
29263  *
29264  *   Arguments: un - driver soft state (unit) structure
29265  */
29266 
29267 static void
29268 sr_ejected(struct sd_lun *un)
29269 {
29270 	struct sd_errstats *stp;
29271 
29272 	ASSERT(un != NULL);
29273 	ASSERT(mutex_owned(SD_MUTEX(un)));
29274 
29275 	un->un_f_blockcount_is_valid	= FALSE;
29276 	un->un_f_tgt_blocksize_is_valid	= FALSE;
29277 	mutex_exit(SD_MUTEX(un));
29278 	cmlb_invalidate(un->un_cmlbhandle, (void *)SD_PATH_DIRECT_PRIORITY);
29279 	mutex_enter(SD_MUTEX(un));
29280 
29281 	if (un->un_errstats != NULL) {
29282 		stp = (struct sd_errstats *)un->un_errstats->ks_data;
29283 		stp->sd_capacity.value.ui64 = 0;
29284 	}
29285 }
29286 
29287 
29288 /*
29289  *    Function: sr_check_wp()
29290  *
29291  * Description: This routine checks the write protection of a removable
29292  *      media disk and hotpluggable devices via the write protect bit of
29293  *      the Mode Page Header device specific field. Some devices choke
29294  *      on unsupported mode page. In order to workaround this issue,
29295  *      this routine has been implemented to use 0x3f mode page(request
29296  *      for all pages) for all device types.
29297  *
29298  *   Arguments: dev             - the device 'dev_t'
29299  *
29300  * Return Code: int indicating if the device is write protected (1) or not (0)
29301  *
29302  *     Context: Kernel thread.
29303  *
29304  */
29305 
29306 static int
29307 sr_check_wp(dev_t dev)
29308 {
29309 	struct sd_lun	*un;
29310 	uchar_t		device_specific;
29311 	uchar_t		*sense;
29312 	int		hdrlen;
29313 	int		rval = FALSE;
29314 	int		status;
29315 	sd_ssc_t	*ssc;
29316 
29317 	/*
29318 	 * Note: The return codes for this routine should be reworked to
29319 	 * properly handle the case of a NULL softstate.
29320 	 */
29321 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL) {
29322 		return (FALSE);
29323 	}
29324 
29325 	if (un->un_f_cfg_is_atapi == TRUE) {
29326 		/*
29327 		 * The mode page contents are not required; set the allocation
29328 		 * length for the mode page header only
29329 		 */
29330 		hdrlen = MODE_HEADER_LENGTH_GRP2;
29331 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29332 		ssc = sd_ssc_init(un);
29333 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense, hdrlen,
29334 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
29335 		sd_ssc_fini(ssc);
29336 		if (status != 0)
29337 			goto err_exit;
29338 		device_specific =
29339 		    ((struct mode_header_grp2 *)sense)->device_specific;
29340 	} else {
29341 		hdrlen = MODE_HEADER_LENGTH;
29342 		sense = kmem_zalloc(hdrlen, KM_SLEEP);
29343 		ssc = sd_ssc_init(un);
29344 		status = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense, hdrlen,
29345 		    MODEPAGE_ALLPAGES, SD_PATH_STANDARD);
29346 		sd_ssc_fini(ssc);
29347 		if (status != 0)
29348 			goto err_exit;
29349 		device_specific =
29350 		    ((struct mode_header *)sense)->device_specific;
29351 	}
29352 
29353 
29354 	/*
29355 	 * Write protect mode sense failed; not all disks
29356 	 * understand this query. Return FALSE assuming that
29357 	 * these devices are not writable.
29358 	 */
29359 	if (device_specific & WRITE_PROTECT) {
29360 		rval = TRUE;
29361 	}
29362 
29363 err_exit:
29364 	kmem_free(sense, hdrlen);
29365 	return (rval);
29366 }
29367 
29368 /*
29369  *    Function: sr_volume_ctrl()
29370  *
29371  * Description: This routine is the driver entry point for handling CD-ROM
29372  *		audio output volume ioctl requests. (CDROMVOLCTRL)
29373  *
29374  *   Arguments: dev	- the device 'dev_t'
29375  *		data	- pointer to user audio volume control structure
29376  *		flag	- this argument is a pass through to ddi_copyxxx()
29377  *			  directly from the mode argument of ioctl().
29378  *
29379  * Return Code: the code returned by sd_send_scsi_cmd()
29380  *		EFAULT if ddi_copyxxx() fails
29381  *		ENXIO if fail ddi_get_soft_state
29382  *		EINVAL if data pointer is NULL
29383  *
29384  */
29385 
29386 static int
29387 sr_volume_ctrl(dev_t dev, caddr_t data, int flag)
29388 {
29389 	struct sd_lun		*un;
29390 	struct cdrom_volctrl    volume;
29391 	struct cdrom_volctrl    *vol = &volume;
29392 	uchar_t			*sense_page;
29393 	uchar_t			*select_page;
29394 	uchar_t			*sense;
29395 	uchar_t			*select;
29396 	int			sense_buflen;
29397 	int			select_buflen;
29398 	int			rval;
29399 	sd_ssc_t		*ssc;
29400 
29401 	if (data == NULL) {
29402 		return (EINVAL);
29403 	}
29404 
29405 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29406 	    (un->un_state == SD_STATE_OFFLINE)) {
29407 		return (ENXIO);
29408 	}
29409 
29410 	if (ddi_copyin(data, vol, sizeof (struct cdrom_volctrl), flag)) {
29411 		return (EFAULT);
29412 	}
29413 
29414 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29415 		struct mode_header_grp2		*sense_mhp;
29416 		struct mode_header_grp2		*select_mhp;
29417 		int				bd_len;
29418 
29419 		sense_buflen = MODE_PARAM_LENGTH_GRP2 + MODEPAGE_AUDIO_CTRL_LEN;
29420 		select_buflen = MODE_HEADER_LENGTH_GRP2 +
29421 		    MODEPAGE_AUDIO_CTRL_LEN;
29422 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29423 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29424 		ssc = sd_ssc_init(un);
29425 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP1, sense,
29426 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29427 		    SD_PATH_STANDARD);
29428 		sd_ssc_fini(ssc);
29429 
29430 		if (rval != 0) {
29431 			SD_ERROR(SD_LOG_IOCTL_RMMEDIA, un,
29432 			    "sr_volume_ctrl: Mode Sense Failed\n");
29433 			kmem_free(sense, sense_buflen);
29434 			kmem_free(select, select_buflen);
29435 			return (rval);
29436 		}
29437 		sense_mhp = (struct mode_header_grp2 *)sense;
29438 		select_mhp = (struct mode_header_grp2 *)select;
29439 		bd_len = (sense_mhp->bdesc_length_hi << 8) |
29440 		    sense_mhp->bdesc_length_lo;
29441 		if (bd_len > MODE_BLK_DESC_LENGTH) {
29442 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29443 			    "sr_volume_ctrl: Mode Sense returned invalid "
29444 			    "block descriptor length\n");
29445 			kmem_free(sense, sense_buflen);
29446 			kmem_free(select, select_buflen);
29447 			return (EIO);
29448 		}
29449 		sense_page = (uchar_t *)
29450 		    (sense + MODE_HEADER_LENGTH_GRP2 + bd_len);
29451 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH_GRP2);
29452 		select_mhp->length_msb = 0;
29453 		select_mhp->length_lsb = 0;
29454 		select_mhp->bdesc_length_hi = 0;
29455 		select_mhp->bdesc_length_lo = 0;
29456 	} else {
29457 		struct mode_header		*sense_mhp, *select_mhp;
29458 
29459 		sense_buflen = MODE_PARAM_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29460 		select_buflen = MODE_HEADER_LENGTH + MODEPAGE_AUDIO_CTRL_LEN;
29461 		sense  = kmem_zalloc(sense_buflen, KM_SLEEP);
29462 		select = kmem_zalloc(select_buflen, KM_SLEEP);
29463 		ssc = sd_ssc_init(un);
29464 		rval = sd_send_scsi_MODE_SENSE(ssc, CDB_GROUP0, sense,
29465 		    sense_buflen, MODEPAGE_AUDIO_CTRL,
29466 		    SD_PATH_STANDARD);
29467 		sd_ssc_fini(ssc);
29468 
29469 		if (rval != 0) {
29470 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29471 			    "sr_volume_ctrl: Mode Sense Failed\n");
29472 			kmem_free(sense, sense_buflen);
29473 			kmem_free(select, select_buflen);
29474 			return (rval);
29475 		}
29476 		sense_mhp  = (struct mode_header *)sense;
29477 		select_mhp = (struct mode_header *)select;
29478 		if (sense_mhp->bdesc_length > MODE_BLK_DESC_LENGTH) {
29479 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
29480 			    "sr_volume_ctrl: Mode Sense returned invalid "
29481 			    "block descriptor length\n");
29482 			kmem_free(sense, sense_buflen);
29483 			kmem_free(select, select_buflen);
29484 			return (EIO);
29485 		}
29486 		sense_page = (uchar_t *)
29487 		    (sense + MODE_HEADER_LENGTH + sense_mhp->bdesc_length);
29488 		select_page = (uchar_t *)(select + MODE_HEADER_LENGTH);
29489 		select_mhp->length = 0;
29490 		select_mhp->bdesc_length = 0;
29491 	}
29492 	/*
29493 	 * Note: An audio control data structure could be created and overlayed
29494 	 * on the following in place of the array indexing method implemented.
29495 	 */
29496 
29497 	/* Build the select data for the user volume data */
29498 	select_page[0] = MODEPAGE_AUDIO_CTRL;
29499 	select_page[1] = 0xE;
29500 	/* Set the immediate bit */
29501 	select_page[2] = 0x04;
29502 	/* Zero out reserved fields */
29503 	select_page[3] = 0x00;
29504 	select_page[4] = 0x00;
29505 	/* Return sense data for fields not to be modified */
29506 	select_page[5] = sense_page[5];
29507 	select_page[6] = sense_page[6];
29508 	select_page[7] = sense_page[7];
29509 	/* Set the user specified volume levels for channel 0 and 1 */
29510 	select_page[8] = 0x01;
29511 	select_page[9] = vol->channel0;
29512 	select_page[10] = 0x02;
29513 	select_page[11] = vol->channel1;
29514 	/* Channel 2 and 3 are currently unsupported so return the sense data */
29515 	select_page[12] = sense_page[12];
29516 	select_page[13] = sense_page[13];
29517 	select_page[14] = sense_page[14];
29518 	select_page[15] = sense_page[15];
29519 
29520 	ssc = sd_ssc_init(un);
29521 	if ((un->un_f_cfg_is_atapi == TRUE) || (un->un_f_mmc_cap == TRUE)) {
29522 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP1, select,
29523 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29524 	} else {
29525 		rval = sd_send_scsi_MODE_SELECT(ssc, CDB_GROUP0, select,
29526 		    select_buflen, SD_DONTSAVE_PAGE, SD_PATH_STANDARD);
29527 	}
29528 	sd_ssc_fini(ssc);
29529 
29530 	kmem_free(sense, sense_buflen);
29531 	kmem_free(select, select_buflen);
29532 	return (rval);
29533 }
29534 
29535 
29536 /*
29537  *    Function: sr_read_sony_session_offset()
29538  *
29539  * Description: This routine is the driver entry point for handling CD-ROM
29540  *		ioctl requests for session offset information. (CDROMREADOFFSET)
29541  *		The address of the first track in the last session of a
29542  *		multi-session CD-ROM is returned
29543  *
29544  *		Note: This routine uses a vendor specific key value in the
29545  *		command control field without implementing any vendor check here
29546  *		or in the ioctl routine.
29547  *
29548  *   Arguments: dev	- the device 'dev_t'
29549  *		data	- pointer to an int to hold the requested address
29550  *		flag	- this argument is a pass through to ddi_copyxxx()
29551  *			  directly from the mode argument of ioctl().
29552  *
29553  * Return Code: the code returned by sd_send_scsi_cmd()
29554  *		EFAULT if ddi_copyxxx() fails
29555  *		ENXIO if fail ddi_get_soft_state
29556  *		EINVAL if data pointer is NULL
29557  */
29558 
29559 static int
29560 sr_read_sony_session_offset(dev_t dev, caddr_t data, int flag)
29561 {
29562 	struct sd_lun		*un;
29563 	struct uscsi_cmd	*com;
29564 	caddr_t			buffer;
29565 	char			cdb[CDB_GROUP1];
29566 	int			session_offset = 0;
29567 	int			rval;
29568 
29569 	if (data == NULL) {
29570 		return (EINVAL);
29571 	}
29572 
29573 	if ((un = ddi_get_soft_state(sd_state, SDUNIT(dev))) == NULL ||
29574 	    (un->un_state == SD_STATE_OFFLINE)) {
29575 		return (ENXIO);
29576 	}
29577 
29578 	buffer = kmem_zalloc((size_t)SONY_SESSION_OFFSET_LEN, KM_SLEEP);
29579 	bzero(cdb, CDB_GROUP1);
29580 	cdb[0] = SCMD_READ_TOC;
29581 	/*
29582 	 * Bytes 7 & 8 are the 12 byte allocation length for a single entry.
29583 	 * (4 byte TOC response header + 8 byte response data)
29584 	 */
29585 	cdb[8] = SONY_SESSION_OFFSET_LEN;
29586 	/* Byte 9 is the control byte. A vendor specific value is used */
29587 	cdb[9] = SONY_SESSION_OFFSET_KEY;
29588 	com = kmem_zalloc(sizeof (*com), KM_SLEEP);
29589 	com->uscsi_cdb = cdb;
29590 	com->uscsi_cdblen = CDB_GROUP1;
29591 	com->uscsi_bufaddr = buffer;
29592 	com->uscsi_buflen = SONY_SESSION_OFFSET_LEN;
29593 	com->uscsi_flags = USCSI_DIAGNOSE | USCSI_SILENT | USCSI_READ;
29594 
29595 	rval = sd_send_scsi_cmd(dev, com, FKIOCTL, UIO_SYSSPACE,
29596 	    SD_PATH_STANDARD);
29597 	if (rval != 0) {
29598 		kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29599 		kmem_free(com, sizeof (*com));
29600 		return (rval);
29601 	}
29602 	if (buffer[1] == SONY_SESSION_OFFSET_VALID) {
29603 		session_offset =
29604 		    ((uchar_t)buffer[8] << 24) + ((uchar_t)buffer[9] << 16) +
29605 		    ((uchar_t)buffer[10] << 8) + ((uchar_t)buffer[11]);
29606 		/*
29607 		 * Offset returned offset in current lbasize block's. Convert to
29608 		 * 2k block's to return to the user
29609 		 */
29610 		if (un->un_tgt_blocksize == CDROM_BLK_512) {
29611 			session_offset >>= 2;
29612 		} else if (un->un_tgt_blocksize == CDROM_BLK_1024) {
29613 			session_offset >>= 1;
29614 		}
29615 	}
29616 
29617 	if (ddi_copyout(&session_offset, data, sizeof (int), flag) != 0) {
29618 		rval = EFAULT;
29619 	}
29620 
29621 	kmem_free(buffer, SONY_SESSION_OFFSET_LEN);
29622 	kmem_free(com, sizeof (*com));
29623 	return (rval);
29624 }
29625 
29626 
29627 /*
29628  *    Function: sd_wm_cache_constructor()
29629  *
29630  * Description: Cache Constructor for the wmap cache for the read/modify/write
29631  *		devices.
29632  *
29633  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29634  *		un	- sd_lun structure for the device.
29635  *		flag	- the km flags passed to constructor
29636  *
29637  * Return Code: 0 on success.
29638  *		-1 on failure.
29639  */
29640 
29641 /*ARGSUSED*/
29642 static int
29643 sd_wm_cache_constructor(void *wm, void *un, int flags)
29644 {
29645 	bzero(wm, sizeof (struct sd_w_map));
29646 	cv_init(&((struct sd_w_map *)wm)->wm_avail, NULL, CV_DRIVER, NULL);
29647 	return (0);
29648 }
29649 
29650 
29651 /*
29652  *    Function: sd_wm_cache_destructor()
29653  *
29654  * Description: Cache destructor for the wmap cache for the read/modify/write
29655  *		devices.
29656  *
29657  *   Arguments: wm      - A pointer to the sd_w_map to be initialized.
29658  *		un	- sd_lun structure for the device.
29659  */
29660 /*ARGSUSED*/
29661 static void
29662 sd_wm_cache_destructor(void *wm, void *un)
29663 {
29664 	cv_destroy(&((struct sd_w_map *)wm)->wm_avail);
29665 }
29666 
29667 
29668 /*
29669  *    Function: sd_range_lock()
29670  *
29671  * Description: Lock the range of blocks specified as parameter to ensure
29672  *		that read, modify write is atomic and no other i/o writes
29673  *		to the same location. The range is specified in terms
29674  *		of start and end blocks. Block numbers are the actual
29675  *		media block numbers and not system.
29676  *
29677  *   Arguments: un	- sd_lun structure for the device.
29678  *		startb - The starting block number
29679  *		endb - The end block number
29680  *		typ - type of i/o - simple/read_modify_write
29681  *
29682  * Return Code: wm  - pointer to the wmap structure.
29683  *
29684  *     Context: This routine can sleep.
29685  */
29686 
29687 static struct sd_w_map *
29688 sd_range_lock(struct sd_lun *un, daddr_t startb, daddr_t endb, ushort_t typ)
29689 {
29690 	struct sd_w_map *wmp = NULL;
29691 	struct sd_w_map *sl_wmp = NULL;
29692 	struct sd_w_map *tmp_wmp;
29693 	wm_state state = SD_WM_CHK_LIST;
29694 
29695 
29696 	ASSERT(un != NULL);
29697 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29698 
29699 	mutex_enter(SD_MUTEX(un));
29700 
29701 	while (state != SD_WM_DONE) {
29702 
29703 		switch (state) {
29704 		case SD_WM_CHK_LIST:
29705 			/*
29706 			 * This is the starting state. Check the wmap list
29707 			 * to see if the range is currently available.
29708 			 */
29709 			if (!(typ & SD_WTYPE_RMW) && !(un->un_rmw_count)) {
29710 				/*
29711 				 * If this is a simple write and no rmw
29712 				 * i/o is pending then try to lock the
29713 				 * range as the range should be available.
29714 				 */
29715 				state = SD_WM_LOCK_RANGE;
29716 			} else {
29717 				tmp_wmp = sd_get_range(un, startb, endb);
29718 				if (tmp_wmp != NULL) {
29719 					if ((wmp != NULL) && ONLIST(un, wmp)) {
29720 						/*
29721 						 * Should not keep onlist wmps
29722 						 * while waiting this macro
29723 						 * will also do wmp = NULL;
29724 						 */
29725 						FREE_ONLIST_WMAP(un, wmp);
29726 					}
29727 					/*
29728 					 * sl_wmp is the wmap on which wait
29729 					 * is done, since the tmp_wmp points
29730 					 * to the inuse wmap, set sl_wmp to
29731 					 * tmp_wmp and change the state to sleep
29732 					 */
29733 					sl_wmp = tmp_wmp;
29734 					state = SD_WM_WAIT_MAP;
29735 				} else {
29736 					state = SD_WM_LOCK_RANGE;
29737 				}
29738 
29739 			}
29740 			break;
29741 
29742 		case SD_WM_LOCK_RANGE:
29743 			ASSERT(un->un_wm_cache);
29744 			/*
29745 			 * The range need to be locked, try to get a wmap.
29746 			 * First attempt it with NO_SLEEP, want to avoid a sleep
29747 			 * if possible as we will have to release the sd mutex
29748 			 * if we have to sleep.
29749 			 */
29750 			if (wmp == NULL)
29751 				wmp = kmem_cache_alloc(un->un_wm_cache,
29752 				    KM_NOSLEEP);
29753 			if (wmp == NULL) {
29754 				mutex_exit(SD_MUTEX(un));
29755 				_NOTE(DATA_READABLE_WITHOUT_LOCK
29756 				    (sd_lun::un_wm_cache))
29757 				wmp = kmem_cache_alloc(un->un_wm_cache,
29758 				    KM_SLEEP);
29759 				mutex_enter(SD_MUTEX(un));
29760 				/*
29761 				 * we released the mutex so recheck and go to
29762 				 * check list state.
29763 				 */
29764 				state = SD_WM_CHK_LIST;
29765 			} else {
29766 				/*
29767 				 * We exit out of state machine since we
29768 				 * have the wmap. Do the housekeeping first.
29769 				 * place the wmap on the wmap list if it is not
29770 				 * on it already and then set the state to done.
29771 				 */
29772 				wmp->wm_start = startb;
29773 				wmp->wm_end = endb;
29774 				wmp->wm_flags = typ | SD_WM_BUSY;
29775 				if (typ & SD_WTYPE_RMW) {
29776 					un->un_rmw_count++;
29777 				}
29778 				/*
29779 				 * If not already on the list then link
29780 				 */
29781 				if (!ONLIST(un, wmp)) {
29782 					wmp->wm_next = un->un_wm;
29783 					wmp->wm_prev = NULL;
29784 					if (wmp->wm_next)
29785 						wmp->wm_next->wm_prev = wmp;
29786 					un->un_wm = wmp;
29787 				}
29788 				state = SD_WM_DONE;
29789 			}
29790 			break;
29791 
29792 		case SD_WM_WAIT_MAP:
29793 			ASSERT(sl_wmp->wm_flags & SD_WM_BUSY);
29794 			/*
29795 			 * Wait is done on sl_wmp, which is set in the
29796 			 * check_list state.
29797 			 */
29798 			sl_wmp->wm_wanted_count++;
29799 			cv_wait(&sl_wmp->wm_avail, SD_MUTEX(un));
29800 			sl_wmp->wm_wanted_count--;
29801 			/*
29802 			 * We can reuse the memory from the completed sl_wmp
29803 			 * lock range for our new lock, but only if noone is
29804 			 * waiting for it.
29805 			 */
29806 			ASSERT(!(sl_wmp->wm_flags & SD_WM_BUSY));
29807 			if (sl_wmp->wm_wanted_count == 0) {
29808 				if (wmp != NULL) {
29809 					CHK_N_FREEWMP(un, wmp);
29810 				}
29811 				wmp = sl_wmp;
29812 			}
29813 			sl_wmp = NULL;
29814 			/*
29815 			 * After waking up, need to recheck for availability of
29816 			 * range.
29817 			 */
29818 			state = SD_WM_CHK_LIST;
29819 			break;
29820 
29821 		default:
29822 			panic("sd_range_lock: "
29823 			    "Unknown state %d in sd_range_lock", state);
29824 			/*NOTREACHED*/
29825 		} /* switch(state) */
29826 
29827 	} /* while(state != SD_WM_DONE) */
29828 
29829 	mutex_exit(SD_MUTEX(un));
29830 
29831 	ASSERT(wmp != NULL);
29832 
29833 	return (wmp);
29834 }
29835 
29836 
29837 /*
29838  *    Function: sd_get_range()
29839  *
29840  * Description: Find if there any overlapping I/O to this one
29841  *		Returns the write-map of 1st such I/O, NULL otherwise.
29842  *
29843  *   Arguments: un	- sd_lun structure for the device.
29844  *		startb - The starting block number
29845  *		endb - The end block number
29846  *
29847  * Return Code: wm  - pointer to the wmap structure.
29848  */
29849 
29850 static struct sd_w_map *
29851 sd_get_range(struct sd_lun *un, daddr_t startb, daddr_t endb)
29852 {
29853 	struct sd_w_map *wmp;
29854 
29855 	ASSERT(un != NULL);
29856 
29857 	for (wmp = un->un_wm; wmp != NULL; wmp = wmp->wm_next) {
29858 		if (!(wmp->wm_flags & SD_WM_BUSY)) {
29859 			continue;
29860 		}
29861 		if ((startb >= wmp->wm_start) && (startb <= wmp->wm_end)) {
29862 			break;
29863 		}
29864 		if ((endb >= wmp->wm_start) && (endb <= wmp->wm_end)) {
29865 			break;
29866 		}
29867 	}
29868 
29869 	return (wmp);
29870 }
29871 
29872 
29873 /*
29874  *    Function: sd_free_inlist_wmap()
29875  *
29876  * Description: Unlink and free a write map struct.
29877  *
29878  *   Arguments: un      - sd_lun structure for the device.
29879  *		wmp	- sd_w_map which needs to be unlinked.
29880  */
29881 
29882 static void
29883 sd_free_inlist_wmap(struct sd_lun *un, struct sd_w_map *wmp)
29884 {
29885 	ASSERT(un != NULL);
29886 
29887 	if (un->un_wm == wmp) {
29888 		un->un_wm = wmp->wm_next;
29889 	} else {
29890 		wmp->wm_prev->wm_next = wmp->wm_next;
29891 	}
29892 
29893 	if (wmp->wm_next) {
29894 		wmp->wm_next->wm_prev = wmp->wm_prev;
29895 	}
29896 
29897 	wmp->wm_next = wmp->wm_prev = NULL;
29898 
29899 	kmem_cache_free(un->un_wm_cache, wmp);
29900 }
29901 
29902 
29903 /*
29904  *    Function: sd_range_unlock()
29905  *
29906  * Description: Unlock the range locked by wm.
29907  *		Free write map if nobody else is waiting on it.
29908  *
29909  *   Arguments: un      - sd_lun structure for the device.
29910  *              wmp     - sd_w_map which needs to be unlinked.
29911  */
29912 
29913 static void
29914 sd_range_unlock(struct sd_lun *un, struct sd_w_map *wm)
29915 {
29916 	ASSERT(un != NULL);
29917 	ASSERT(wm != NULL);
29918 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29919 
29920 	mutex_enter(SD_MUTEX(un));
29921 
29922 	if (wm->wm_flags & SD_WTYPE_RMW) {
29923 		un->un_rmw_count--;
29924 	}
29925 
29926 	if (wm->wm_wanted_count) {
29927 		wm->wm_flags = 0;
29928 		/*
29929 		 * Broadcast that the wmap is available now.
29930 		 */
29931 		cv_broadcast(&wm->wm_avail);
29932 	} else {
29933 		/*
29934 		 * If no one is waiting on the map, it should be free'ed.
29935 		 */
29936 		sd_free_inlist_wmap(un, wm);
29937 	}
29938 
29939 	mutex_exit(SD_MUTEX(un));
29940 }
29941 
29942 
29943 /*
29944  *    Function: sd_read_modify_write_task
29945  *
29946  * Description: Called from a taskq thread to initiate the write phase of
29947  *		a read-modify-write request.  This is used for targets where
29948  *		un->un_sys_blocksize != un->un_tgt_blocksize.
29949  *
29950  *   Arguments: arg - a pointer to the buf(9S) struct for the write command.
29951  *
29952  *     Context: Called under taskq thread context.
29953  */
29954 
29955 static void
29956 sd_read_modify_write_task(void *arg)
29957 {
29958 	struct sd_mapblocksize_info	*bsp;
29959 	struct buf	*bp;
29960 	struct sd_xbuf	*xp;
29961 	struct sd_lun	*un;
29962 
29963 	bp = arg;	/* The bp is given in arg */
29964 	ASSERT(bp != NULL);
29965 
29966 	/* Get the pointer to the layer-private data struct */
29967 	xp = SD_GET_XBUF(bp);
29968 	ASSERT(xp != NULL);
29969 	bsp = xp->xb_private;
29970 	ASSERT(bsp != NULL);
29971 
29972 	un = SD_GET_UN(bp);
29973 	ASSERT(un != NULL);
29974 	ASSERT(!mutex_owned(SD_MUTEX(un)));
29975 
29976 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29977 	    "sd_read_modify_write_task: entry: buf:0x%p\n", bp);
29978 
29979 	/*
29980 	 * This is the write phase of a read-modify-write request, called
29981 	 * under the context of a taskq thread in response to the completion
29982 	 * of the read portion of the rmw request completing under interrupt
29983 	 * context. The write request must be sent from here down the iostart
29984 	 * chain as if it were being sent from sd_mapblocksize_iostart(), so
29985 	 * we use the layer index saved in the layer-private data area.
29986 	 */
29987 	SD_NEXT_IOSTART(bsp->mbs_layer_index, un, bp);
29988 
29989 	SD_TRACE(SD_LOG_IO_RMMEDIA, un,
29990 	    "sd_read_modify_write_task: exit: buf:0x%p\n", bp);
29991 }
29992 
29993 
29994 /*
29995  *    Function: sddump_do_read_of_rmw()
29996  *
29997  * Description: This routine will be called from sddump, If sddump is called
29998  *		with an I/O which not aligned on device blocksize boundary
29999  *		then the write has to be converted to read-modify-write.
30000  *		Do the read part here in order to keep sddump simple.
30001  *		Note - That the sd_mutex is held across the call to this
30002  *		routine.
30003  *
30004  *   Arguments: un	- sd_lun
30005  *		blkno	- block number in terms of media block size.
30006  *		nblk	- number of blocks.
30007  *		bpp	- pointer to pointer to the buf structure. On return
30008  *			from this function, *bpp points to the valid buffer
30009  *			to which the write has to be done.
30010  *
30011  * Return Code: 0 for success or errno-type return code
30012  */
30013 
30014 static int
30015 sddump_do_read_of_rmw(struct sd_lun *un, uint64_t blkno, uint64_t nblk,
30016     struct buf **bpp)
30017 {
30018 	int err;
30019 	int i;
30020 	int rval;
30021 	struct buf *bp;
30022 	struct scsi_pkt *pkt = NULL;
30023 	uint32_t target_blocksize;
30024 
30025 	ASSERT(un != NULL);
30026 	ASSERT(mutex_owned(SD_MUTEX(un)));
30027 
30028 	target_blocksize = un->un_tgt_blocksize;
30029 
30030 	mutex_exit(SD_MUTEX(un));
30031 
30032 	bp = scsi_alloc_consistent_buf(SD_ADDRESS(un), (struct buf *)NULL,
30033 	    (size_t)(nblk * target_blocksize), B_READ, NULL_FUNC, NULL);
30034 	if (bp == NULL) {
30035 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30036 		    "no resources for dumping; giving up");
30037 		err = ENOMEM;
30038 		goto done;
30039 	}
30040 
30041 	rval = sd_setup_rw_pkt(un, &pkt, bp, 0, NULL_FUNC, NULL,
30042 	    blkno, nblk);
30043 	if (rval != 0) {
30044 		scsi_free_consistent_buf(bp);
30045 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30046 		    "no resources for dumping; giving up");
30047 		err = ENOMEM;
30048 		goto done;
30049 	}
30050 
30051 	pkt->pkt_flags |= FLAG_NOINTR;
30052 
30053 	err = EIO;
30054 	for (i = 0; i < SD_NDUMP_RETRIES; i++) {
30055 
30056 		/*
30057 		 * Scsi_poll returns 0 (success) if the command completes and
30058 		 * the status block is STATUS_GOOD.  We should only check
30059 		 * errors if this condition is not true.  Even then we should
30060 		 * send our own request sense packet only if we have a check
30061 		 * condition and auto request sense has not been performed by
30062 		 * the hba.
30063 		 */
30064 		SD_TRACE(SD_LOG_DUMP, un, "sddump: sending read\n");
30065 
30066 		if ((sd_scsi_poll(un, pkt) == 0) && (pkt->pkt_resid == 0)) {
30067 			err = 0;
30068 			break;
30069 		}
30070 
30071 		/*
30072 		 * Check CMD_DEV_GONE 1st, give up if device is gone,
30073 		 * no need to read RQS data.
30074 		 */
30075 		if (pkt->pkt_reason == CMD_DEV_GONE) {
30076 			scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30077 			    "Error while dumping state with rmw..."
30078 			    "Device is gone\n");
30079 			break;
30080 		}
30081 
30082 		if (SD_GET_PKT_STATUS(pkt) == STATUS_CHECK) {
30083 			SD_INFO(SD_LOG_DUMP, un,
30084 			    "sddump: read failed with CHECK, try # %d\n", i);
30085 			if (((pkt->pkt_state & STATE_ARQ_DONE) == 0)) {
30086 				(void) sd_send_polled_RQS(un);
30087 			}
30088 
30089 			continue;
30090 		}
30091 
30092 		if (SD_GET_PKT_STATUS(pkt) == STATUS_BUSY) {
30093 			int reset_retval = 0;
30094 
30095 			SD_INFO(SD_LOG_DUMP, un,
30096 			    "sddump: read failed with BUSY, try # %d\n", i);
30097 
30098 			if (un->un_f_lun_reset_enabled == TRUE) {
30099 				reset_retval = scsi_reset(SD_ADDRESS(un),
30100 				    RESET_LUN);
30101 			}
30102 			if (reset_retval == 0) {
30103 				(void) scsi_reset(SD_ADDRESS(un), RESET_TARGET);
30104 			}
30105 			(void) sd_send_polled_RQS(un);
30106 
30107 		} else {
30108 			SD_INFO(SD_LOG_DUMP, un,
30109 			    "sddump: read failed with 0x%x, try # %d\n",
30110 			    SD_GET_PKT_STATUS(pkt), i);
30111 			mutex_enter(SD_MUTEX(un));
30112 			sd_reset_target(un, pkt);
30113 			mutex_exit(SD_MUTEX(un));
30114 		}
30115 
30116 		/*
30117 		 * If we are not getting anywhere with lun/target resets,
30118 		 * let's reset the bus.
30119 		 */
30120 		if (i > SD_NDUMP_RETRIES / 2) {
30121 			(void) scsi_reset(SD_ADDRESS(un), RESET_ALL);
30122 			(void) sd_send_polled_RQS(un);
30123 		}
30124 
30125 	}
30126 	scsi_destroy_pkt(pkt);
30127 
30128 	if (err != 0) {
30129 		scsi_free_consistent_buf(bp);
30130 		*bpp = NULL;
30131 	} else {
30132 		*bpp = bp;
30133 	}
30134 
30135 done:
30136 	mutex_enter(SD_MUTEX(un));
30137 	return (err);
30138 }
30139 
30140 
30141 /*
30142  *    Function: sd_failfast_flushq
30143  *
30144  * Description: Take all bp's on the wait queue that have B_FAILFAST set
30145  *		in b_flags and move them onto the failfast queue, then kick
30146  *		off a thread to return all bp's on the failfast queue to
30147  *		their owners with an error set.
30148  *
30149  *   Arguments: un - pointer to the soft state struct for the instance.
30150  *
30151  *     Context: may execute in interrupt context.
30152  */
30153 
30154 static void
30155 sd_failfast_flushq(struct sd_lun *un)
30156 {
30157 	struct buf *bp;
30158 	struct buf *next_waitq_bp;
30159 	struct buf *prev_waitq_bp = NULL;
30160 
30161 	ASSERT(un != NULL);
30162 	ASSERT(mutex_owned(SD_MUTEX(un)));
30163 	ASSERT(un->un_failfast_state == SD_FAILFAST_ACTIVE);
30164 	ASSERT(un->un_failfast_bp == NULL);
30165 
30166 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30167 	    "sd_failfast_flushq: entry: un:0x%p\n", un);
30168 
30169 	/*
30170 	 * Check if we should flush all bufs when entering failfast state, or
30171 	 * just those with B_FAILFAST set.
30172 	 */
30173 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) {
30174 		/*
30175 		 * Move *all* bp's on the wait queue to the failfast flush
30176 		 * queue, including those that do NOT have B_FAILFAST set.
30177 		 */
30178 		if (un->un_failfast_headp == NULL) {
30179 			ASSERT(un->un_failfast_tailp == NULL);
30180 			un->un_failfast_headp = un->un_waitq_headp;
30181 		} else {
30182 			ASSERT(un->un_failfast_tailp != NULL);
30183 			un->un_failfast_tailp->av_forw = un->un_waitq_headp;
30184 		}
30185 
30186 		un->un_failfast_tailp = un->un_waitq_tailp;
30187 
30188 		/* update kstat for each bp moved out of the waitq */
30189 		for (bp = un->un_waitq_headp; bp != NULL; bp = bp->av_forw) {
30190 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30191 		}
30192 
30193 		/* empty the waitq */
30194 		un->un_waitq_headp = un->un_waitq_tailp = NULL;
30195 
30196 	} else {
30197 		/*
30198 		 * Go thru the wait queue, pick off all entries with
30199 		 * B_FAILFAST set, and move these onto the failfast queue.
30200 		 */
30201 		for (bp = un->un_waitq_headp; bp != NULL; bp = next_waitq_bp) {
30202 			/*
30203 			 * Save the pointer to the next bp on the wait queue,
30204 			 * so we get to it on the next iteration of this loop.
30205 			 */
30206 			next_waitq_bp = bp->av_forw;
30207 
30208 			/*
30209 			 * If this bp from the wait queue does NOT have
30210 			 * B_FAILFAST set, just move on to the next element
30211 			 * in the wait queue. Note, this is the only place
30212 			 * where it is correct to set prev_waitq_bp.
30213 			 */
30214 			if ((bp->b_flags & B_FAILFAST) == 0) {
30215 				prev_waitq_bp = bp;
30216 				continue;
30217 			}
30218 
30219 			/*
30220 			 * Remove the bp from the wait queue.
30221 			 */
30222 			if (bp == un->un_waitq_headp) {
30223 				/* The bp is the first element of the waitq. */
30224 				un->un_waitq_headp = next_waitq_bp;
30225 				if (un->un_waitq_headp == NULL) {
30226 					/* The wait queue is now empty */
30227 					un->un_waitq_tailp = NULL;
30228 				}
30229 			} else {
30230 				/*
30231 				 * The bp is either somewhere in the middle
30232 				 * or at the end of the wait queue.
30233 				 */
30234 				ASSERT(un->un_waitq_headp != NULL);
30235 				ASSERT(prev_waitq_bp != NULL);
30236 				ASSERT((prev_waitq_bp->b_flags & B_FAILFAST)
30237 				    == 0);
30238 				if (bp == un->un_waitq_tailp) {
30239 					/* bp is the last entry on the waitq. */
30240 					ASSERT(next_waitq_bp == NULL);
30241 					un->un_waitq_tailp = prev_waitq_bp;
30242 				}
30243 				prev_waitq_bp->av_forw = next_waitq_bp;
30244 			}
30245 			bp->av_forw = NULL;
30246 
30247 			/*
30248 			 * update kstat since the bp is moved out of
30249 			 * the waitq
30250 			 */
30251 			SD_UPDATE_KSTATS(un, kstat_waitq_exit, bp);
30252 
30253 			/*
30254 			 * Now put the bp onto the failfast queue.
30255 			 */
30256 			if (un->un_failfast_headp == NULL) {
30257 				/* failfast queue is currently empty */
30258 				ASSERT(un->un_failfast_tailp == NULL);
30259 				un->un_failfast_headp =
30260 				    un->un_failfast_tailp = bp;
30261 			} else {
30262 				/* Add the bp to the end of the failfast q */
30263 				ASSERT(un->un_failfast_tailp != NULL);
30264 				ASSERT(un->un_failfast_tailp->b_flags &
30265 				    B_FAILFAST);
30266 				un->un_failfast_tailp->av_forw = bp;
30267 				un->un_failfast_tailp = bp;
30268 			}
30269 		}
30270 	}
30271 
30272 	/*
30273 	 * Now return all bp's on the failfast queue to their owners.
30274 	 */
30275 	while ((bp = un->un_failfast_headp) != NULL) {
30276 
30277 		un->un_failfast_headp = bp->av_forw;
30278 		if (un->un_failfast_headp == NULL) {
30279 			un->un_failfast_tailp = NULL;
30280 		}
30281 
30282 		/*
30283 		 * We want to return the bp with a failure error code, but
30284 		 * we do not want a call to sd_start_cmds() to occur here,
30285 		 * so use sd_return_failed_command_no_restart() instead of
30286 		 * sd_return_failed_command().
30287 		 */
30288 		sd_return_failed_command_no_restart(un, bp, EIO);
30289 	}
30290 
30291 	/* Flush the xbuf queues if required. */
30292 	if (sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_QUEUES) {
30293 		ddi_xbuf_flushq(un->un_xbuf_attr, sd_failfast_flushq_callback);
30294 	}
30295 
30296 	SD_TRACE(SD_LOG_IO_FAILFAST, un,
30297 	    "sd_failfast_flushq: exit: un:0x%p\n", un);
30298 }
30299 
30300 
30301 /*
30302  *    Function: sd_failfast_flushq_callback
30303  *
30304  * Description: Return TRUE if the given bp meets the criteria for failfast
30305  *		flushing. Used with ddi_xbuf_flushq(9F).
30306  *
30307  *   Arguments: bp - ptr to buf struct to be examined.
30308  *
30309  *     Context: Any
30310  */
30311 
30312 static int
30313 sd_failfast_flushq_callback(struct buf *bp)
30314 {
30315 	/*
30316 	 * Return TRUE if (1) we want to flush ALL bufs when the failfast
30317 	 * state is entered; OR (2) the given bp has B_FAILFAST set.
30318 	 */
30319 	return (((sd_failfast_flushctl & SD_FAILFAST_FLUSH_ALL_BUFS) ||
30320 	    (bp->b_flags & B_FAILFAST)) ? TRUE : FALSE);
30321 }
30322 
30323 
30324 
30325 /*
30326  * Function: sd_setup_next_xfer
30327  *
30328  * Description: Prepare next I/O operation using DMA_PARTIAL
30329  *
30330  */
30331 
30332 static int
30333 sd_setup_next_xfer(struct sd_lun *un, struct buf *bp,
30334     struct scsi_pkt *pkt, struct sd_xbuf *xp)
30335 {
30336 	ssize_t	num_blks_not_xfered;
30337 	daddr_t	strt_blk_num;
30338 	ssize_t	bytes_not_xfered;
30339 	int	rval;
30340 
30341 	ASSERT(pkt->pkt_resid == 0);
30342 
30343 	/*
30344 	 * Calculate next block number and amount to be transferred.
30345 	 *
30346 	 * How much data NOT transfered to the HBA yet.
30347 	 */
30348 	bytes_not_xfered = xp->xb_dma_resid;
30349 
30350 	/*
30351 	 * figure how many blocks NOT transfered to the HBA yet.
30352 	 */
30353 	num_blks_not_xfered = SD_BYTES2TGTBLOCKS(un, bytes_not_xfered);
30354 
30355 	/*
30356 	 * set starting block number to the end of what WAS transfered.
30357 	 */
30358 	strt_blk_num = xp->xb_blkno +
30359 	    SD_BYTES2TGTBLOCKS(un, bp->b_bcount - bytes_not_xfered);
30360 
30361 	/*
30362 	 * Move pkt to the next portion of the xfer.  sd_setup_next_rw_pkt
30363 	 * will call scsi_initpkt with NULL_FUNC so we do not have to release
30364 	 * the disk mutex here.
30365 	 */
30366 	rval = sd_setup_next_rw_pkt(un, pkt, bp,
30367 	    strt_blk_num, num_blks_not_xfered);
30368 
30369 	if (rval == 0) {
30370 
30371 		/*
30372 		 * Success.
30373 		 *
30374 		 * Adjust things if there are still more blocks to be
30375 		 * transfered.
30376 		 */
30377 		xp->xb_dma_resid = pkt->pkt_resid;
30378 		pkt->pkt_resid = 0;
30379 
30380 		return (1);
30381 	}
30382 
30383 	/*
30384 	 * There's really only one possible return value from
30385 	 * sd_setup_next_rw_pkt which occurs when scsi_init_pkt
30386 	 * returns NULL.
30387 	 */
30388 	ASSERT(rval == SD_PKT_ALLOC_FAILURE);
30389 
30390 	bp->b_resid = bp->b_bcount;
30391 	bp->b_flags |= B_ERROR;
30392 
30393 	scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
30394 	    "Error setting up next portion of DMA transfer\n");
30395 
30396 	return (0);
30397 }
30398 
30399 /*
30400  *    Function: sd_panic_for_res_conflict
30401  *
30402  * Description: Call panic with a string formatted with "Reservation Conflict"
30403  *		and a human readable identifier indicating the SD instance
30404  *		that experienced the reservation conflict.
30405  *
30406  *   Arguments: un - pointer to the soft state struct for the instance.
30407  *
30408  *     Context: may execute in interrupt context.
30409  */
30410 
30411 #define	SD_RESV_CONFLICT_FMT_LEN 40
30412 void
30413 sd_panic_for_res_conflict(struct sd_lun *un)
30414 {
30415 	char panic_str[SD_RESV_CONFLICT_FMT_LEN + MAXPATHLEN];
30416 	char path_str[MAXPATHLEN];
30417 
30418 	(void) snprintf(panic_str, sizeof (panic_str),
30419 	    "Reservation Conflict\nDisk: %s",
30420 	    ddi_pathname(SD_DEVINFO(un), path_str));
30421 
30422 	panic(panic_str);
30423 }
30424 
30425 /*
30426  * Note: The following sd_faultinjection_ioctl( ) routines implement
30427  * driver support for handling fault injection for error analysis
30428  * causing faults in multiple layers of the driver.
30429  *
30430  */
30431 
30432 #ifdef SD_FAULT_INJECTION
30433 static uint_t   sd_fault_injection_on = 0;
30434 
30435 /*
30436  *    Function: sd_faultinjection_ioctl()
30437  *
30438  * Description: This routine is the driver entry point for handling
30439  *              faultinjection ioctls to inject errors into the
30440  *              layer model
30441  *
30442  *   Arguments: cmd	- the ioctl cmd received
30443  *		arg	- the arguments from user and returns
30444  */
30445 
30446 static void
30447 sd_faultinjection_ioctl(int cmd, intptr_t arg,  struct sd_lun *un)
30448 {
30449 	uint_t i = 0;
30450 	uint_t rval;
30451 
30452 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: entry\n");
30453 
30454 	mutex_enter(SD_MUTEX(un));
30455 
30456 	switch (cmd) {
30457 	case SDIOCRUN:
30458 		/* Allow pushed faults to be injected */
30459 		SD_INFO(SD_LOG_SDTEST, un,
30460 		    "sd_faultinjection_ioctl: Injecting Fault Run\n");
30461 
30462 		sd_fault_injection_on = 1;
30463 
30464 		SD_INFO(SD_LOG_IOERR, un,
30465 		    "sd_faultinjection_ioctl: run finished\n");
30466 		break;
30467 
30468 	case SDIOCSTART:
30469 		/* Start Injection Session */
30470 		SD_INFO(SD_LOG_SDTEST, un,
30471 		    "sd_faultinjection_ioctl: Injecting Fault Start\n");
30472 
30473 		sd_fault_injection_on = 0;
30474 		un->sd_injection_mask = 0xFFFFFFFF;
30475 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30476 			un->sd_fi_fifo_pkt[i] = NULL;
30477 			un->sd_fi_fifo_xb[i] = NULL;
30478 			un->sd_fi_fifo_un[i] = NULL;
30479 			un->sd_fi_fifo_arq[i] = NULL;
30480 		}
30481 		un->sd_fi_fifo_start = 0;
30482 		un->sd_fi_fifo_end = 0;
30483 
30484 		mutex_enter(&(un->un_fi_mutex));
30485 		un->sd_fi_log[0] = '\0';
30486 		un->sd_fi_buf_len = 0;
30487 		mutex_exit(&(un->un_fi_mutex));
30488 
30489 		SD_INFO(SD_LOG_IOERR, un,
30490 		    "sd_faultinjection_ioctl: start finished\n");
30491 		break;
30492 
30493 	case SDIOCSTOP:
30494 		/* Stop Injection Session */
30495 		SD_INFO(SD_LOG_SDTEST, un,
30496 		    "sd_faultinjection_ioctl: Injecting Fault Stop\n");
30497 		sd_fault_injection_on = 0;
30498 		un->sd_injection_mask = 0x0;
30499 
30500 		/* Empty stray or unuseds structs from fifo */
30501 		for (i = 0; i < SD_FI_MAX_ERROR; i++) {
30502 			if (un->sd_fi_fifo_pkt[i] != NULL) {
30503 				kmem_free(un->sd_fi_fifo_pkt[i],
30504 				    sizeof (struct sd_fi_pkt));
30505 			}
30506 			if (un->sd_fi_fifo_xb[i] != NULL) {
30507 				kmem_free(un->sd_fi_fifo_xb[i],
30508 				    sizeof (struct sd_fi_xb));
30509 			}
30510 			if (un->sd_fi_fifo_un[i] != NULL) {
30511 				kmem_free(un->sd_fi_fifo_un[i],
30512 				    sizeof (struct sd_fi_un));
30513 			}
30514 			if (un->sd_fi_fifo_arq[i] != NULL) {
30515 				kmem_free(un->sd_fi_fifo_arq[i],
30516 				    sizeof (struct sd_fi_arq));
30517 			}
30518 			un->sd_fi_fifo_pkt[i] = NULL;
30519 			un->sd_fi_fifo_un[i] = NULL;
30520 			un->sd_fi_fifo_xb[i] = NULL;
30521 			un->sd_fi_fifo_arq[i] = NULL;
30522 		}
30523 		un->sd_fi_fifo_start = 0;
30524 		un->sd_fi_fifo_end = 0;
30525 
30526 		SD_INFO(SD_LOG_IOERR, un,
30527 		    "sd_faultinjection_ioctl: stop finished\n");
30528 		break;
30529 
30530 	case SDIOCINSERTPKT:
30531 		/* Store a packet struct to be pushed onto fifo */
30532 		SD_INFO(SD_LOG_SDTEST, un,
30533 		    "sd_faultinjection_ioctl: Injecting Fault Insert Pkt\n");
30534 
30535 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30536 
30537 		sd_fault_injection_on = 0;
30538 
30539 		/* No more that SD_FI_MAX_ERROR allowed in Queue */
30540 		if (un->sd_fi_fifo_pkt[i] != NULL) {
30541 			kmem_free(un->sd_fi_fifo_pkt[i],
30542 			    sizeof (struct sd_fi_pkt));
30543 		}
30544 		if (arg != (uintptr_t)NULL) {
30545 			un->sd_fi_fifo_pkt[i] =
30546 			    kmem_alloc(sizeof (struct sd_fi_pkt), KM_NOSLEEP);
30547 			if (un->sd_fi_fifo_pkt[i] == NULL) {
30548 				/* Alloc failed don't store anything */
30549 				break;
30550 			}
30551 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_pkt[i],
30552 			    sizeof (struct sd_fi_pkt), 0);
30553 			if (rval == -1) {
30554 				kmem_free(un->sd_fi_fifo_pkt[i],
30555 				    sizeof (struct sd_fi_pkt));
30556 				un->sd_fi_fifo_pkt[i] = NULL;
30557 			}
30558 		} else {
30559 			SD_INFO(SD_LOG_IOERR, un,
30560 			    "sd_faultinjection_ioctl: pkt null\n");
30561 		}
30562 		break;
30563 
30564 	case SDIOCINSERTXB:
30565 		/* Store a xb struct to be pushed onto fifo */
30566 		SD_INFO(SD_LOG_SDTEST, un,
30567 		    "sd_faultinjection_ioctl: Injecting Fault Insert XB\n");
30568 
30569 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30570 
30571 		sd_fault_injection_on = 0;
30572 
30573 		if (un->sd_fi_fifo_xb[i] != NULL) {
30574 			kmem_free(un->sd_fi_fifo_xb[i],
30575 			    sizeof (struct sd_fi_xb));
30576 			un->sd_fi_fifo_xb[i] = NULL;
30577 		}
30578 		if (arg != (uintptr_t)NULL) {
30579 			un->sd_fi_fifo_xb[i] =
30580 			    kmem_alloc(sizeof (struct sd_fi_xb), KM_NOSLEEP);
30581 			if (un->sd_fi_fifo_xb[i] == NULL) {
30582 				/* Alloc failed don't store anything */
30583 				break;
30584 			}
30585 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_xb[i],
30586 			    sizeof (struct sd_fi_xb), 0);
30587 
30588 			if (rval == -1) {
30589 				kmem_free(un->sd_fi_fifo_xb[i],
30590 				    sizeof (struct sd_fi_xb));
30591 				un->sd_fi_fifo_xb[i] = NULL;
30592 			}
30593 		} else {
30594 			SD_INFO(SD_LOG_IOERR, un,
30595 			    "sd_faultinjection_ioctl: xb null\n");
30596 		}
30597 		break;
30598 
30599 	case SDIOCINSERTUN:
30600 		/* Store a un struct to be pushed onto fifo */
30601 		SD_INFO(SD_LOG_SDTEST, un,
30602 		    "sd_faultinjection_ioctl: Injecting Fault Insert UN\n");
30603 
30604 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30605 
30606 		sd_fault_injection_on = 0;
30607 
30608 		if (un->sd_fi_fifo_un[i] != NULL) {
30609 			kmem_free(un->sd_fi_fifo_un[i],
30610 			    sizeof (struct sd_fi_un));
30611 			un->sd_fi_fifo_un[i] = NULL;
30612 		}
30613 		if (arg != (uintptr_t)NULL) {
30614 			un->sd_fi_fifo_un[i] =
30615 			    kmem_alloc(sizeof (struct sd_fi_un), KM_NOSLEEP);
30616 			if (un->sd_fi_fifo_un[i] == NULL) {
30617 				/* Alloc failed don't store anything */
30618 				break;
30619 			}
30620 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_un[i],
30621 			    sizeof (struct sd_fi_un), 0);
30622 			if (rval == -1) {
30623 				kmem_free(un->sd_fi_fifo_un[i],
30624 				    sizeof (struct sd_fi_un));
30625 				un->sd_fi_fifo_un[i] = NULL;
30626 			}
30627 
30628 		} else {
30629 			SD_INFO(SD_LOG_IOERR, un,
30630 			    "sd_faultinjection_ioctl: un null\n");
30631 		}
30632 
30633 		break;
30634 
30635 	case SDIOCINSERTARQ:
30636 		/* Store a arq struct to be pushed onto fifo */
30637 		SD_INFO(SD_LOG_SDTEST, un,
30638 		    "sd_faultinjection_ioctl: Injecting Fault Insert ARQ\n");
30639 		i = un->sd_fi_fifo_end % SD_FI_MAX_ERROR;
30640 
30641 		sd_fault_injection_on = 0;
30642 
30643 		if (un->sd_fi_fifo_arq[i] != NULL) {
30644 			kmem_free(un->sd_fi_fifo_arq[i],
30645 			    sizeof (struct sd_fi_arq));
30646 			un->sd_fi_fifo_arq[i] = NULL;
30647 		}
30648 		if (arg != (uintptr_t)NULL) {
30649 			un->sd_fi_fifo_arq[i] =
30650 			    kmem_alloc(sizeof (struct sd_fi_arq), KM_NOSLEEP);
30651 			if (un->sd_fi_fifo_arq[i] == NULL) {
30652 				/* Alloc failed don't store anything */
30653 				break;
30654 			}
30655 			rval = ddi_copyin((void *)arg, un->sd_fi_fifo_arq[i],
30656 			    sizeof (struct sd_fi_arq), 0);
30657 			if (rval == -1) {
30658 				kmem_free(un->sd_fi_fifo_arq[i],
30659 				    sizeof (struct sd_fi_arq));
30660 				un->sd_fi_fifo_arq[i] = NULL;
30661 			}
30662 
30663 		} else {
30664 			SD_INFO(SD_LOG_IOERR, un,
30665 			    "sd_faultinjection_ioctl: arq null\n");
30666 		}
30667 
30668 		break;
30669 
30670 	case SDIOCPUSH:
30671 		/* Push stored xb, pkt, un, and arq onto fifo */
30672 		sd_fault_injection_on = 0;
30673 
30674 		if (arg != (uintptr_t)NULL) {
30675 			rval = ddi_copyin((void *)arg, &i, sizeof (uint_t), 0);
30676 			if (rval != -1 &&
30677 			    un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30678 				un->sd_fi_fifo_end += i;
30679 			}
30680 		} else {
30681 			SD_INFO(SD_LOG_IOERR, un,
30682 			    "sd_faultinjection_ioctl: push arg null\n");
30683 			if (un->sd_fi_fifo_end + i < SD_FI_MAX_ERROR) {
30684 				un->sd_fi_fifo_end++;
30685 			}
30686 		}
30687 		SD_INFO(SD_LOG_IOERR, un,
30688 		    "sd_faultinjection_ioctl: push to end=%d\n",
30689 		    un->sd_fi_fifo_end);
30690 		break;
30691 
30692 	case SDIOCRETRIEVE:
30693 		/* Return buffer of log from Injection session */
30694 		SD_INFO(SD_LOG_SDTEST, un,
30695 		    "sd_faultinjection_ioctl: Injecting Fault Retreive");
30696 
30697 		sd_fault_injection_on = 0;
30698 
30699 		mutex_enter(&(un->un_fi_mutex));
30700 		rval = ddi_copyout(un->sd_fi_log, (void *)arg,
30701 		    un->sd_fi_buf_len+1, 0);
30702 		mutex_exit(&(un->un_fi_mutex));
30703 
30704 		if (rval == -1) {
30705 			/*
30706 			 * arg is possibly invalid setting
30707 			 * it to NULL for return
30708 			 */
30709 			arg = (uintptr_t)NULL;
30710 		}
30711 		break;
30712 	}
30713 
30714 	mutex_exit(SD_MUTEX(un));
30715 	SD_TRACE(SD_LOG_IOERR, un, "sd_faultinjection_ioctl: exit\n");
30716 }
30717 
30718 
30719 /*
30720  *    Function: sd_injection_log()
30721  *
30722  * Description: This routine adds buff to the already existing injection log
30723  *              for retrieval via faultinjection_ioctl for use in fault
30724  *              detection and recovery
30725  *
30726  *   Arguments: buf - the string to add to the log
30727  */
30728 
30729 static void
30730 sd_injection_log(char *buf, struct sd_lun *un)
30731 {
30732 	uint_t len;
30733 
30734 	ASSERT(un != NULL);
30735 	ASSERT(buf != NULL);
30736 
30737 	mutex_enter(&(un->un_fi_mutex));
30738 
30739 	len = min(strlen(buf), 255);
30740 	/* Add logged value to Injection log to be returned later */
30741 	if (len + un->sd_fi_buf_len < SD_FI_MAX_BUF) {
30742 		uint_t	offset = strlen((char *)un->sd_fi_log);
30743 		char *destp = (char *)un->sd_fi_log + offset;
30744 		int i;
30745 		for (i = 0; i < len; i++) {
30746 			*destp++ = *buf++;
30747 		}
30748 		un->sd_fi_buf_len += len;
30749 		un->sd_fi_log[un->sd_fi_buf_len] = '\0';
30750 	}
30751 
30752 	mutex_exit(&(un->un_fi_mutex));
30753 }
30754 
30755 
30756 /*
30757  *    Function: sd_faultinjection()
30758  *
30759  * Description: This routine takes the pkt and changes its
30760  *		content based on error injection scenerio.
30761  *
30762  *   Arguments: pktp	- packet to be changed
30763  */
30764 
30765 static void
30766 sd_faultinjection(struct scsi_pkt *pktp)
30767 {
30768 	uint_t i;
30769 	struct sd_fi_pkt *fi_pkt;
30770 	struct sd_fi_xb *fi_xb;
30771 	struct sd_fi_un *fi_un;
30772 	struct sd_fi_arq *fi_arq;
30773 	struct buf *bp;
30774 	struct sd_xbuf *xb;
30775 	struct sd_lun *un;
30776 
30777 	ASSERT(pktp != NULL);
30778 
30779 	/* pull bp xb and un from pktp */
30780 	bp = (struct buf *)pktp->pkt_private;
30781 	xb = SD_GET_XBUF(bp);
30782 	un = SD_GET_UN(bp);
30783 
30784 	ASSERT(un != NULL);
30785 
30786 	mutex_enter(SD_MUTEX(un));
30787 
30788 	SD_TRACE(SD_LOG_SDTEST, un,
30789 	    "sd_faultinjection: entry Injection from sdintr\n");
30790 
30791 	/* if injection is off return */
30792 	if (sd_fault_injection_on == 0 ||
30793 	    un->sd_fi_fifo_start == un->sd_fi_fifo_end) {
30794 		mutex_exit(SD_MUTEX(un));
30795 		return;
30796 	}
30797 
30798 	SD_INFO(SD_LOG_SDTEST, un,
30799 	    "sd_faultinjection: is working for copying\n");
30800 
30801 	/* take next set off fifo */
30802 	i = un->sd_fi_fifo_start % SD_FI_MAX_ERROR;
30803 
30804 	fi_pkt = un->sd_fi_fifo_pkt[i];
30805 	fi_xb = un->sd_fi_fifo_xb[i];
30806 	fi_un = un->sd_fi_fifo_un[i];
30807 	fi_arq = un->sd_fi_fifo_arq[i];
30808 
30809 
30810 	/* set variables accordingly */
30811 	/* set pkt if it was on fifo */
30812 	if (fi_pkt != NULL) {
30813 		SD_CONDSET(pktp, pkt, pkt_flags, "pkt_flags");
30814 		SD_CONDSET(*pktp, pkt, pkt_scbp, "pkt_scbp");
30815 		if (fi_pkt->pkt_cdbp != 0xff)
30816 			SD_CONDSET(*pktp, pkt, pkt_cdbp, "pkt_cdbp");
30817 		SD_CONDSET(pktp, pkt, pkt_state, "pkt_state");
30818 		SD_CONDSET(pktp, pkt, pkt_statistics, "pkt_statistics");
30819 		SD_CONDSET(pktp, pkt, pkt_reason, "pkt_reason");
30820 
30821 	}
30822 	/* set xb if it was on fifo */
30823 	if (fi_xb != NULL) {
30824 		SD_CONDSET(xb, xb, xb_blkno, "xb_blkno");
30825 		SD_CONDSET(xb, xb, xb_dma_resid, "xb_dma_resid");
30826 		if (fi_xb->xb_retry_count != 0)
30827 			SD_CONDSET(xb, xb, xb_retry_count, "xb_retry_count");
30828 		SD_CONDSET(xb, xb, xb_victim_retry_count,
30829 		    "xb_victim_retry_count");
30830 		SD_CONDSET(xb, xb, xb_sense_status, "xb_sense_status");
30831 		SD_CONDSET(xb, xb, xb_sense_state, "xb_sense_state");
30832 		SD_CONDSET(xb, xb, xb_sense_resid, "xb_sense_resid");
30833 
30834 		/* copy in block data from sense */
30835 		/*
30836 		 * if (fi_xb->xb_sense_data[0] != -1) {
30837 		 *	bcopy(fi_xb->xb_sense_data, xb->xb_sense_data,
30838 		 *	SENSE_LENGTH);
30839 		 * }
30840 		 */
30841 		bcopy(fi_xb->xb_sense_data, xb->xb_sense_data, SENSE_LENGTH);
30842 
30843 		/* copy in extended sense codes */
30844 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30845 		    xb, es_code, "es_code");
30846 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30847 		    xb, es_key, "es_key");
30848 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30849 		    xb, es_add_code, "es_add_code");
30850 		SD_CONDSET(((struct scsi_extended_sense *)xb->xb_sense_data),
30851 		    xb, es_qual_code, "es_qual_code");
30852 		struct scsi_extended_sense *esp;
30853 		esp = (struct scsi_extended_sense *)xb->xb_sense_data;
30854 		esp->es_class = CLASS_EXTENDED_SENSE;
30855 	}
30856 
30857 	/* set un if it was on fifo */
30858 	if (fi_un != NULL) {
30859 		SD_CONDSET(un->un_sd->sd_inq, un, inq_rmb, "inq_rmb");
30860 		SD_CONDSET(un, un, un_ctype, "un_ctype");
30861 		SD_CONDSET(un, un, un_reset_retry_count,
30862 		    "un_reset_retry_count");
30863 		SD_CONDSET(un, un, un_reservation_type, "un_reservation_type");
30864 		SD_CONDSET(un, un, un_resvd_status, "un_resvd_status");
30865 		SD_CONDSET(un, un, un_f_arq_enabled, "un_f_arq_enabled");
30866 		SD_CONDSET(un, un, un_f_allow_bus_device_reset,
30867 		    "un_f_allow_bus_device_reset");
30868 		SD_CONDSET(un, un, un_f_opt_queueing, "un_f_opt_queueing");
30869 
30870 	}
30871 
30872 	/* copy in auto request sense if it was on fifo */
30873 	if (fi_arq != NULL) {
30874 		bcopy(fi_arq, pktp->pkt_scbp, sizeof (struct sd_fi_arq));
30875 	}
30876 
30877 	/* free structs */
30878 	if (un->sd_fi_fifo_pkt[i] != NULL) {
30879 		kmem_free(un->sd_fi_fifo_pkt[i], sizeof (struct sd_fi_pkt));
30880 	}
30881 	if (un->sd_fi_fifo_xb[i] != NULL) {
30882 		kmem_free(un->sd_fi_fifo_xb[i], sizeof (struct sd_fi_xb));
30883 	}
30884 	if (un->sd_fi_fifo_un[i] != NULL) {
30885 		kmem_free(un->sd_fi_fifo_un[i], sizeof (struct sd_fi_un));
30886 	}
30887 	if (un->sd_fi_fifo_arq[i] != NULL) {
30888 		kmem_free(un->sd_fi_fifo_arq[i], sizeof (struct sd_fi_arq));
30889 	}
30890 
30891 	/*
30892 	 * kmem_free does not gurantee to set to NULL
30893 	 * since we uses these to determine if we set
30894 	 * values or not lets confirm they are always
30895 	 * NULL after free
30896 	 */
30897 	un->sd_fi_fifo_pkt[i] = NULL;
30898 	un->sd_fi_fifo_un[i] = NULL;
30899 	un->sd_fi_fifo_xb[i] = NULL;
30900 	un->sd_fi_fifo_arq[i] = NULL;
30901 
30902 	un->sd_fi_fifo_start++;
30903 
30904 	mutex_exit(SD_MUTEX(un));
30905 
30906 	SD_INFO(SD_LOG_SDTEST, un, "sd_faultinjection: exit\n");
30907 }
30908 
30909 #endif /* SD_FAULT_INJECTION */
30910 
30911 /*
30912  * This routine is invoked in sd_unit_attach(). Before calling it, the
30913  * properties in conf file should be processed already, and "hotpluggable"
30914  * property was processed also.
30915  *
30916  * The sd driver distinguishes 3 different type of devices: removable media,
30917  * non-removable media, and hotpluggable. Below the differences are defined:
30918  *
30919  * 1. Device ID
30920  *
30921  *     The device ID of a device is used to identify this device. Refer to
30922  *     ddi_devid_register(9F).
30923  *
30924  *     For a non-removable media disk device which can provide 0x80 or 0x83
30925  *     VPD page (refer to INQUIRY command of SCSI SPC specification), a unique
30926  *     device ID is created to identify this device. For other non-removable
30927  *     media devices, a default device ID is created only if this device has
30928  *     at least 2 alter cylinders. Otherwise, this device has no devid.
30929  *
30930  *     -------------------------------------------------------
30931  *     removable media   hotpluggable  | Can Have Device ID
30932  *     -------------------------------------------------------
30933  *         false             false     |     Yes
30934  *         false             true      |     Yes
30935  *         true                x       |     No
30936  *     ------------------------------------------------------
30937  *
30938  *
30939  * 2. SCSI group 4 commands
30940  *
30941  *     In SCSI specs, only some commands in group 4 command set can use
30942  *     8-byte addresses that can be used to access >2TB storage spaces.
30943  *     Other commands have no such capability. Without supporting group4,
30944  *     it is impossible to make full use of storage spaces of a disk with
30945  *     capacity larger than 2TB.
30946  *
30947  *     -----------------------------------------------
30948  *     removable media   hotpluggable   LP64  |  Group
30949  *     -----------------------------------------------
30950  *           false          false       false |   1
30951  *           false          false       true  |   4
30952  *           false          true        false |   1
30953  *           false          true        true  |   4
30954  *           true             x           x   |   5
30955  *     -----------------------------------------------
30956  *
30957  *
30958  * 3. Check for VTOC Label
30959  *
30960  *     If a direct-access disk has no EFI label, sd will check if it has a
30961  *     valid VTOC label. Now, sd also does that check for removable media
30962  *     and hotpluggable devices.
30963  *
30964  *     --------------------------------------------------------------
30965  *     Direct-Access   removable media    hotpluggable |  Check Label
30966  *     -------------------------------------------------------------
30967  *         false          false           false        |   No
30968  *         false          false           true         |   No
30969  *         false          true            false        |   Yes
30970  *         false          true            true         |   Yes
30971  *         true            x                x          |   Yes
30972  *     --------------------------------------------------------------
30973  *
30974  *
30975  * 4. Building default VTOC label
30976  *
30977  *     As section 3 says, sd checks if some kinds of devices have VTOC label.
30978  *     If those devices have no valid VTOC label, sd(4D) will attempt to
30979  *     create default VTOC for them. Currently sd creates default VTOC label
30980  *     for all devices on x86 platform (VTOC_16), but only for removable
30981  *     media devices on SPARC (VTOC_8).
30982  *
30983  *     -----------------------------------------------------------
30984  *       removable media hotpluggable platform   |   Default Label
30985  *     -----------------------------------------------------------
30986  *             false          false    sparc     |     No
30987  *             false          true      x86      |     Yes
30988  *             false          true     sparc     |     Yes
30989  *             true             x        x       |     Yes
30990  *     ----------------------------------------------------------
30991  *
30992  *
30993  * 5. Supported blocksizes of target devices
30994  *
30995  *     Sd supports non-512-byte blocksize for removable media devices only.
30996  *     For other devices, only 512-byte blocksize is supported. This may be
30997  *     changed in near future because some RAID devices require non-512-byte
30998  *     blocksize
30999  *
31000  *     -----------------------------------------------------------
31001  *     removable media    hotpluggable    | non-512-byte blocksize
31002  *     -----------------------------------------------------------
31003  *           false          false         |   No
31004  *           false          true          |   No
31005  *           true             x           |   Yes
31006  *     -----------------------------------------------------------
31007  *
31008  *
31009  * 6. Automatic mount & unmount
31010  *
31011  *     sd(4D) driver provides DKIOCREMOVABLE ioctl. This ioctl is used to query
31012  *     if a device is removable media device. It return 1 for removable media
31013  *     devices, and 0 for others.
31014  *
31015  *     The automatic mounting subsystem should distinguish between the types
31016  *     of devices and apply automounting policies to each.
31017  *
31018  *
31019  * 7. fdisk partition management
31020  *
31021  *     Fdisk is traditional partition method on x86 platform. sd(4D) driver
31022  *     just supports fdisk partitions on x86 platform. On sparc platform, sd
31023  *     doesn't support fdisk partitions at all. Note: pcfs(4FS) can recognize
31024  *     fdisk partitions on both x86 and SPARC platform.
31025  *
31026  *     -----------------------------------------------------------
31027  *       platform   removable media  USB/1394  |  fdisk supported
31028  *     -----------------------------------------------------------
31029  *        x86         X               X        |       true
31030  *     ------------------------------------------------------------
31031  *        sparc       X               X        |       false
31032  *     ------------------------------------------------------------
31033  *
31034  *
31035  * 8. MBOOT/MBR
31036  *
31037  *     Although sd(4D) doesn't support fdisk on SPARC platform, it does support
31038  *     read/write mboot for removable media devices on sparc platform.
31039  *
31040  *     -----------------------------------------------------------
31041  *       platform   removable media  USB/1394  |  mboot supported
31042  *     -----------------------------------------------------------
31043  *        x86         X               X        |       true
31044  *     ------------------------------------------------------------
31045  *        sparc      false           false     |       false
31046  *        sparc      false           true      |       true
31047  *        sparc      true            false     |       true
31048  *        sparc      true            true      |       true
31049  *     ------------------------------------------------------------
31050  *
31051  *
31052  * 9.  error handling during opening device
31053  *
31054  *     If failed to open a disk device, an errno is returned. For some kinds
31055  *     of errors, different errno is returned depending on if this device is
31056  *     a removable media device. This brings USB/1394 hard disks in line with
31057  *     expected hard disk behavior. It is not expected that this breaks any
31058  *     application.
31059  *
31060  *     ------------------------------------------------------
31061  *       removable media    hotpluggable   |  errno
31062  *     ------------------------------------------------------
31063  *             false          false        |   EIO
31064  *             false          true         |   EIO
31065  *             true             x          |   ENXIO
31066  *     ------------------------------------------------------
31067  *
31068  *
31069  * 11. ioctls: DKIOCEJECT, CDROMEJECT
31070  *
31071  *     These IOCTLs are applicable only to removable media devices.
31072  *
31073  *     -----------------------------------------------------------
31074  *       removable media    hotpluggable   |DKIOCEJECT, CDROMEJECT
31075  *     -----------------------------------------------------------
31076  *             false          false        |     No
31077  *             false          true         |     No
31078  *             true            x           |     Yes
31079  *     -----------------------------------------------------------
31080  *
31081  *
31082  * 12. Kstats for partitions
31083  *
31084  *     sd creates partition kstat for non-removable media devices. USB and
31085  *     Firewire hard disks now have partition kstats
31086  *
31087  *      ------------------------------------------------------
31088  *       removable media    hotpluggable   |   kstat
31089  *      ------------------------------------------------------
31090  *             false          false        |    Yes
31091  *             false          true         |    Yes
31092  *             true             x          |    No
31093  *       ------------------------------------------------------
31094  *
31095  *
31096  * 13. Removable media & hotpluggable properties
31097  *
31098  *     Sd driver creates a "removable-media" property for removable media
31099  *     devices. Parent nexus drivers create a "hotpluggable" property if
31100  *     it supports hotplugging.
31101  *
31102  *     ---------------------------------------------------------------------
31103  *     removable media   hotpluggable |  "removable-media"   " hotpluggable"
31104  *     ---------------------------------------------------------------------
31105  *       false            false       |    No                   No
31106  *       false            true        |    No                   Yes
31107  *       true             false       |    Yes                  No
31108  *       true             true        |    Yes                  Yes
31109  *     ---------------------------------------------------------------------
31110  *
31111  *
31112  * 14. Power Management
31113  *
31114  *     sd only power manages removable media devices or devices that support
31115  *     LOG_SENSE or have a "pm-capable" property  (PSARC/2002/250)
31116  *
31117  *     A parent nexus that supports hotplugging can also set "pm-capable"
31118  *     if the disk can be power managed.
31119  *
31120  *     ------------------------------------------------------------
31121  *       removable media hotpluggable pm-capable  |   power manage
31122  *     ------------------------------------------------------------
31123  *             false          false     false     |     No
31124  *             false          false     true      |     Yes
31125  *             false          true      false     |     No
31126  *             false          true      true      |     Yes
31127  *             true             x        x        |     Yes
31128  *     ------------------------------------------------------------
31129  *
31130  *      USB and firewire hard disks can now be power managed independently
31131  *      of the framebuffer
31132  *
31133  *
31134  * 15. Support for USB disks with capacity larger than 1TB
31135  *
31136  *     Currently, sd doesn't permit a fixed disk device with capacity
31137  *     larger than 1TB to be used in a 32-bit operating system environment.
31138  *     However, sd doesn't do that for removable media devices. Instead, it
31139  *     assumes that removable media devices cannot have a capacity larger
31140  *     than 1TB. Therefore, using those devices on 32-bit system is partially
31141  *     supported, which can cause some unexpected results.
31142  *
31143  *     ---------------------------------------------------------------------
31144  *       removable media    USB/1394 | Capacity > 1TB |   Used in 32-bit env
31145  *     ---------------------------------------------------------------------
31146  *             false          false  |   true         |     no
31147  *             false          true   |   true         |     no
31148  *             true           false  |   true         |     Yes
31149  *             true           true   |   true         |     Yes
31150  *     ---------------------------------------------------------------------
31151  *
31152  *
31153  * 16. Check write-protection at open time
31154  *
31155  *     When a removable media device is being opened for writing without NDELAY
31156  *     flag, sd will check if this device is writable. If attempting to open
31157  *     without NDELAY flag a write-protected device, this operation will abort.
31158  *
31159  *     ------------------------------------------------------------
31160  *       removable media    USB/1394   |   WP Check
31161  *     ------------------------------------------------------------
31162  *             false          false    |     No
31163  *             false          true     |     No
31164  *             true           false    |     Yes
31165  *             true           true     |     Yes
31166  *     ------------------------------------------------------------
31167  *
31168  *
31169  * 17. syslog when corrupted VTOC is encountered
31170  *
31171  *      Currently, if an invalid VTOC is encountered, sd only print syslog
31172  *      for fixed SCSI disks.
31173  *     ------------------------------------------------------------
31174  *       removable media    USB/1394   |   print syslog
31175  *     ------------------------------------------------------------
31176  *             false          false    |     Yes
31177  *             false          true     |     No
31178  *             true           false    |     No
31179  *             true           true     |     No
31180  *     ------------------------------------------------------------
31181  */
31182 static void
31183 sd_set_unit_attributes(struct sd_lun *un, dev_info_t *devi)
31184 {
31185 	int	pm_cap;
31186 
31187 	ASSERT(un->un_sd);
31188 	ASSERT(un->un_sd->sd_inq);
31189 
31190 	/*
31191 	 * Enable SYNC CACHE support for all devices.
31192 	 */
31193 	un->un_f_sync_cache_supported = TRUE;
31194 
31195 	/*
31196 	 * Set the sync cache required flag to false.
31197 	 * This would ensure that there is no SYNC CACHE
31198 	 * sent when there are no writes
31199 	 */
31200 	un->un_f_sync_cache_required = FALSE;
31201 
31202 	if (un->un_sd->sd_inq->inq_rmb) {
31203 		/*
31204 		 * The media of this device is removable. And for this kind
31205 		 * of devices, it is possible to change medium after opening
31206 		 * devices. Thus we should support this operation.
31207 		 */
31208 		un->un_f_has_removable_media = TRUE;
31209 
31210 		/*
31211 		 * support non-512-byte blocksize of removable media devices
31212 		 */
31213 		un->un_f_non_devbsize_supported = TRUE;
31214 
31215 		/*
31216 		 * Assume that all removable media devices support DOOR_LOCK
31217 		 */
31218 		un->un_f_doorlock_supported = TRUE;
31219 
31220 		/*
31221 		 * For a removable media device, it is possible to be opened
31222 		 * with NDELAY flag when there is no media in drive, in this
31223 		 * case we don't care if device is writable. But if without
31224 		 * NDELAY flag, we need to check if media is write-protected.
31225 		 */
31226 		un->un_f_chk_wp_open = TRUE;
31227 
31228 		/*
31229 		 * need to start a SCSI watch thread to monitor media state,
31230 		 * when media is being inserted or ejected, notify syseventd.
31231 		 */
31232 		un->un_f_monitor_media_state = TRUE;
31233 
31234 		/*
31235 		 * Some devices don't support START_STOP_UNIT command.
31236 		 * Therefore, we'd better check if a device supports it
31237 		 * before sending it.
31238 		 */
31239 		un->un_f_check_start_stop = TRUE;
31240 
31241 		/*
31242 		 * support eject media ioctl:
31243 		 *		FDEJECT, DKIOCEJECT, CDROMEJECT
31244 		 */
31245 		un->un_f_eject_media_supported = TRUE;
31246 
31247 		/*
31248 		 * Because many removable-media devices don't support
31249 		 * LOG_SENSE, we couldn't use this command to check if
31250 		 * a removable media device support power-management.
31251 		 * We assume that they support power-management via
31252 		 * START_STOP_UNIT command and can be spun up and down
31253 		 * without limitations.
31254 		 */
31255 		un->un_f_pm_supported = TRUE;
31256 
31257 		/*
31258 		 * Need to create a zero length (Boolean) property
31259 		 * removable-media for the removable media devices.
31260 		 * Note that the return value of the property is not being
31261 		 * checked, since if unable to create the property
31262 		 * then do not want the attach to fail altogether. Consistent
31263 		 * with other property creation in attach.
31264 		 */
31265 		(void) ddi_prop_create(DDI_DEV_T_NONE, devi,
31266 		    DDI_PROP_CANSLEEP, "removable-media", NULL, 0);
31267 
31268 	} else {
31269 		/*
31270 		 * create device ID for device
31271 		 */
31272 		un->un_f_devid_supported = TRUE;
31273 
31274 		/*
31275 		 * Spin up non-removable-media devices once it is attached
31276 		 */
31277 		un->un_f_attach_spinup = TRUE;
31278 
31279 		/*
31280 		 * According to SCSI specification, Sense data has two kinds of
31281 		 * format: fixed format, and descriptor format. At present, we
31282 		 * don't support descriptor format sense data for removable
31283 		 * media.
31284 		 */
31285 		if (SD_INQUIRY(un)->inq_dtype == DTYPE_DIRECT) {
31286 			un->un_f_descr_format_supported = TRUE;
31287 		}
31288 
31289 		/*
31290 		 * kstats are created only for non-removable media devices.
31291 		 *
31292 		 * Set this in sd.conf to 0 in order to disable kstats.  The
31293 		 * default is 1, so they are enabled by default.
31294 		 */
31295 		un->un_f_pkstats_enabled = (ddi_prop_get_int(DDI_DEV_T_ANY,
31296 		    SD_DEVINFO(un), DDI_PROP_DONTPASS,
31297 		    "enable-partition-kstats", 1));
31298 
31299 		/*
31300 		 * Check if HBA has set the "pm-capable" property.
31301 		 * If "pm-capable" exists and is non-zero then we can
31302 		 * power manage the device without checking the start/stop
31303 		 * cycle count log sense page.
31304 		 *
31305 		 * If "pm-capable" exists and is set to be false (0),
31306 		 * then we should not power manage the device.
31307 		 *
31308 		 * If "pm-capable" doesn't exist then pm_cap will
31309 		 * be set to SD_PM_CAPABLE_UNDEFINED (-1).  In this case,
31310 		 * sd will check the start/stop cycle count log sense page
31311 		 * and power manage the device if the cycle count limit has
31312 		 * not been exceeded.
31313 		 */
31314 		pm_cap = ddi_prop_get_int(DDI_DEV_T_ANY, devi,
31315 		    DDI_PROP_DONTPASS, "pm-capable", SD_PM_CAPABLE_UNDEFINED);
31316 		if (SD_PM_CAPABLE_IS_UNDEFINED(pm_cap)) {
31317 			un->un_f_log_sense_supported = TRUE;
31318 			if (!un->un_f_power_condition_disabled &&
31319 			    SD_INQUIRY(un)->inq_ansi == 6) {
31320 				un->un_f_power_condition_supported = TRUE;
31321 			}
31322 		} else {
31323 			/*
31324 			 * pm-capable property exists.
31325 			 *
31326 			 * Convert "TRUE" values for pm_cap to
31327 			 * SD_PM_CAPABLE_IS_TRUE to make it easier to check
31328 			 * later. "TRUE" values are any values defined in
31329 			 * inquiry.h.
31330 			 */
31331 			if (SD_PM_CAPABLE_IS_FALSE(pm_cap)) {
31332 				un->un_f_log_sense_supported = FALSE;
31333 			} else {
31334 				/* SD_PM_CAPABLE_IS_TRUE case */
31335 				un->un_f_pm_supported = TRUE;
31336 				if (!un->un_f_power_condition_disabled &&
31337 				    SD_PM_CAPABLE_IS_SPC_4(pm_cap)) {
31338 					un->un_f_power_condition_supported =
31339 					    TRUE;
31340 				}
31341 				if (SD_PM_CAP_LOG_SUPPORTED(pm_cap)) {
31342 					un->un_f_log_sense_supported = TRUE;
31343 					un->un_f_pm_log_sense_smart =
31344 					    SD_PM_CAP_SMART_LOG(pm_cap);
31345 				}
31346 			}
31347 
31348 			SD_INFO(SD_LOG_ATTACH_DETACH, un,
31349 			    "sd_unit_attach: un:0x%p pm-capable "
31350 			    "property set to %d.\n", un, un->un_f_pm_supported);
31351 		}
31352 	}
31353 
31354 	if (un->un_f_is_hotpluggable) {
31355 
31356 		/*
31357 		 * Have to watch hotpluggable devices as well, since
31358 		 * that's the only way for userland applications to
31359 		 * detect hot removal while device is busy/mounted.
31360 		 */
31361 		un->un_f_monitor_media_state = TRUE;
31362 
31363 		un->un_f_check_start_stop = TRUE;
31364 
31365 	}
31366 }
31367 
31368 /*
31369  * sd_tg_rdwr:
31370  * Provides rdwr access for cmlb via sd_tgops. The start_block is
31371  * in sys block size, req_length in bytes.
31372  *
31373  */
31374 static int
31375 sd_tg_rdwr(dev_info_t *devi, uchar_t cmd, void *bufaddr,
31376     diskaddr_t start_block, size_t reqlength, void *tg_cookie)
31377 {
31378 	struct sd_lun *un;
31379 	int path_flag = (int)(uintptr_t)tg_cookie;
31380 	char *dkl = NULL;
31381 	diskaddr_t real_addr = start_block;
31382 	diskaddr_t first_byte, end_block;
31383 
31384 	size_t	buffer_size = reqlength;
31385 	int rval = 0;
31386 	diskaddr_t	cap;
31387 	uint32_t	lbasize;
31388 	sd_ssc_t	*ssc;
31389 
31390 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
31391 	if (un == NULL)
31392 		return (ENXIO);
31393 
31394 	if (cmd != TG_READ && cmd != TG_WRITE)
31395 		return (EINVAL);
31396 
31397 	ssc = sd_ssc_init(un);
31398 	mutex_enter(SD_MUTEX(un));
31399 	if (un->un_f_tgt_blocksize_is_valid == FALSE) {
31400 		mutex_exit(SD_MUTEX(un));
31401 		rval = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
31402 		    &lbasize, path_flag);
31403 		if (rval != 0)
31404 			goto done1;
31405 		mutex_enter(SD_MUTEX(un));
31406 		sd_update_block_info(un, lbasize, cap);
31407 		if ((un->un_f_tgt_blocksize_is_valid == FALSE)) {
31408 			mutex_exit(SD_MUTEX(un));
31409 			rval = EIO;
31410 			goto done;
31411 		}
31412 	}
31413 
31414 	if (NOT_DEVBSIZE(un)) {
31415 		/*
31416 		 * sys_blocksize != tgt_blocksize, need to re-adjust
31417 		 * blkno and save the index to beginning of dk_label
31418 		 */
31419 		first_byte  = SD_SYSBLOCKS2BYTES(start_block);
31420 		real_addr = first_byte / un->un_tgt_blocksize;
31421 
31422 		end_block = (first_byte + reqlength +
31423 		    un->un_tgt_blocksize - 1) / un->un_tgt_blocksize;
31424 
31425 		/* round up buffer size to multiple of target block size */
31426 		buffer_size = (end_block - real_addr) * un->un_tgt_blocksize;
31427 
31428 		SD_TRACE(SD_LOG_IO_PARTITION, un, "sd_tg_rdwr",
31429 		    "label_addr: 0x%x allocation size: 0x%x\n",
31430 		    real_addr, buffer_size);
31431 
31432 		if (((first_byte % un->un_tgt_blocksize) != 0) ||
31433 		    (reqlength % un->un_tgt_blocksize) != 0)
31434 			/* the request is not aligned */
31435 			dkl = kmem_zalloc(buffer_size, KM_SLEEP);
31436 	}
31437 
31438 	/*
31439 	 * The MMC standard allows READ CAPACITY to be
31440 	 * inaccurate by a bounded amount (in the interest of
31441 	 * response latency).  As a result, failed READs are
31442 	 * commonplace (due to the reading of metadata and not
31443 	 * data). Depending on the per-Vendor/drive Sense data,
31444 	 * the failed READ can cause many (unnecessary) retries.
31445 	 */
31446 
31447 	if (ISCD(un) && (cmd == TG_READ) &&
31448 	    (un->un_f_blockcount_is_valid == TRUE) &&
31449 	    ((start_block == (un->un_blockcount - 1)) ||
31450 	    (start_block == (un->un_blockcount - 2)))) {
31451 			path_flag = SD_PATH_DIRECT_PRIORITY;
31452 	}
31453 
31454 	mutex_exit(SD_MUTEX(un));
31455 	if (cmd == TG_READ) {
31456 		rval = sd_send_scsi_READ(ssc, (dkl != NULL) ? dkl : bufaddr,
31457 		    buffer_size, real_addr, path_flag);
31458 		if (dkl != NULL)
31459 			bcopy(dkl + SD_TGTBYTEOFFSET(un, start_block,
31460 			    real_addr), bufaddr, reqlength);
31461 	} else {
31462 		if (dkl) {
31463 			rval = sd_send_scsi_READ(ssc, dkl, buffer_size,
31464 			    real_addr, path_flag);
31465 			if (rval) {
31466 				goto done1;
31467 			}
31468 			bcopy(bufaddr, dkl + SD_TGTBYTEOFFSET(un, start_block,
31469 			    real_addr), reqlength);
31470 		}
31471 		rval = sd_send_scsi_WRITE(ssc, (dkl != NULL) ? dkl : bufaddr,
31472 		    buffer_size, real_addr, path_flag);
31473 	}
31474 
31475 done1:
31476 	if (dkl != NULL)
31477 		kmem_free(dkl, buffer_size);
31478 
31479 	if (rval != 0) {
31480 		if (rval == EIO)
31481 			sd_ssc_assessment(ssc, SD_FMT_STATUS_CHECK);
31482 		else
31483 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
31484 	}
31485 done:
31486 	sd_ssc_fini(ssc);
31487 	return (rval);
31488 }
31489 
31490 
31491 static int
31492 sd_tg_getinfo(dev_info_t *devi, int cmd, void *arg, void *tg_cookie)
31493 {
31494 
31495 	struct sd_lun *un;
31496 	diskaddr_t	cap;
31497 	uint32_t	lbasize;
31498 	int		path_flag = (int)(uintptr_t)tg_cookie;
31499 	int		ret = 0;
31500 
31501 	un = ddi_get_soft_state(sd_state, ddi_get_instance(devi));
31502 	if (un == NULL)
31503 		return (ENXIO);
31504 
31505 	switch (cmd) {
31506 	case TG_GETPHYGEOM:
31507 	case TG_GETVIRTGEOM:
31508 	case TG_GETCAPACITY:
31509 	case TG_GETBLOCKSIZE:
31510 		mutex_enter(SD_MUTEX(un));
31511 
31512 		if ((un->un_f_blockcount_is_valid == TRUE) &&
31513 		    (un->un_f_tgt_blocksize_is_valid == TRUE)) {
31514 			cap = un->un_blockcount;
31515 			lbasize = un->un_tgt_blocksize;
31516 			mutex_exit(SD_MUTEX(un));
31517 		} else {
31518 			sd_ssc_t	*ssc;
31519 			mutex_exit(SD_MUTEX(un));
31520 			ssc = sd_ssc_init(un);
31521 			ret = sd_send_scsi_READ_CAPACITY(ssc, (uint64_t *)&cap,
31522 			    &lbasize, path_flag);
31523 			if (ret != 0) {
31524 				if (ret == EIO)
31525 					sd_ssc_assessment(ssc,
31526 					    SD_FMT_STATUS_CHECK);
31527 				else
31528 					sd_ssc_assessment(ssc,
31529 					    SD_FMT_IGNORE);
31530 				sd_ssc_fini(ssc);
31531 				return (ret);
31532 			}
31533 			sd_ssc_fini(ssc);
31534 			mutex_enter(SD_MUTEX(un));
31535 			sd_update_block_info(un, lbasize, cap);
31536 			if ((un->un_f_blockcount_is_valid == FALSE) ||
31537 			    (un->un_f_tgt_blocksize_is_valid == FALSE)) {
31538 				mutex_exit(SD_MUTEX(un));
31539 				return (EIO);
31540 			}
31541 			mutex_exit(SD_MUTEX(un));
31542 		}
31543 
31544 		if (cmd == TG_GETCAPACITY) {
31545 			*(diskaddr_t *)arg = cap;
31546 			return (0);
31547 		}
31548 
31549 		if (cmd == TG_GETBLOCKSIZE) {
31550 			*(uint32_t *)arg = lbasize;
31551 			return (0);
31552 		}
31553 
31554 		if (cmd == TG_GETPHYGEOM)
31555 			ret = sd_get_physical_geometry(un, (cmlb_geom_t *)arg,
31556 			    cap, lbasize, path_flag);
31557 		else
31558 			/* TG_GETVIRTGEOM */
31559 			ret = sd_get_virtual_geometry(un,
31560 			    (cmlb_geom_t *)arg, cap, lbasize);
31561 
31562 		return (ret);
31563 
31564 	case TG_GETATTR:
31565 		mutex_enter(SD_MUTEX(un));
31566 		((tg_attribute_t *)arg)->media_is_writable =
31567 		    un->un_f_mmc_writable_media;
31568 		((tg_attribute_t *)arg)->media_is_solid_state =
31569 		    un->un_f_is_solid_state;
31570 		((tg_attribute_t *)arg)->media_is_rotational =
31571 		    un->un_f_is_rotational;
31572 		mutex_exit(SD_MUTEX(un));
31573 		return (0);
31574 	default:
31575 		return (ENOTTY);
31576 
31577 	}
31578 }
31579 
31580 /*
31581  *    Function: sd_ssc_ereport_post
31582  *
31583  * Description: Will be called when SD driver need to post an ereport.
31584  *
31585  *    Context: Kernel thread or interrupt context.
31586  */
31587 
31588 #define	DEVID_IF_KNOWN(d) "devid", DATA_TYPE_STRING, (d) ? (d) : "unknown"
31589 
31590 static void
31591 sd_ssc_ereport_post(sd_ssc_t *ssc, enum sd_driver_assessment drv_assess)
31592 {
31593 	int uscsi_path_instance = 0;
31594 	uchar_t	uscsi_pkt_reason;
31595 	uint32_t uscsi_pkt_state;
31596 	uint32_t uscsi_pkt_statistics;
31597 	uint64_t uscsi_ena;
31598 	uchar_t op_code;
31599 	uint8_t *sensep;
31600 	union scsi_cdb *cdbp;
31601 	uint_t cdblen = 0;
31602 	uint_t senlen = 0;
31603 	struct sd_lun *un;
31604 	dev_info_t *dip;
31605 	char *devid;
31606 	int ssc_invalid_flags = SSC_FLAGS_INVALID_PKT_REASON |
31607 	    SSC_FLAGS_INVALID_STATUS |
31608 	    SSC_FLAGS_INVALID_SENSE |
31609 	    SSC_FLAGS_INVALID_DATA;
31610 	char assessment[16];
31611 
31612 	ASSERT(ssc != NULL);
31613 	ASSERT(ssc->ssc_uscsi_cmd != NULL);
31614 	ASSERT(ssc->ssc_uscsi_info != NULL);
31615 
31616 	un = ssc->ssc_un;
31617 	ASSERT(un != NULL);
31618 
31619 	dip = un->un_sd->sd_dev;
31620 
31621 	/*
31622 	 * Get the devid:
31623 	 *	devid will only be passed to non-transport error reports.
31624 	 */
31625 	devid = DEVI(dip)->devi_devid_str;
31626 
31627 	/*
31628 	 * If we are syncing or dumping, the command will not be executed
31629 	 * so we bypass this situation.
31630 	 */
31631 	if (ddi_in_panic() || (un->un_state == SD_STATE_SUSPENDED) ||
31632 	    (un->un_state == SD_STATE_DUMPING))
31633 		return;
31634 
31635 	uscsi_pkt_reason = ssc->ssc_uscsi_info->ui_pkt_reason;
31636 	uscsi_path_instance = ssc->ssc_uscsi_cmd->uscsi_path_instance;
31637 	uscsi_pkt_state = ssc->ssc_uscsi_info->ui_pkt_state;
31638 	uscsi_pkt_statistics = ssc->ssc_uscsi_info->ui_pkt_statistics;
31639 	uscsi_ena = ssc->ssc_uscsi_info->ui_ena;
31640 
31641 	sensep = (uint8_t *)ssc->ssc_uscsi_cmd->uscsi_rqbuf;
31642 	cdbp = (union scsi_cdb *)ssc->ssc_uscsi_cmd->uscsi_cdb;
31643 
31644 	/* In rare cases, EG:DOORLOCK, the cdb could be NULL */
31645 	if (cdbp == NULL) {
31646 		scsi_log(SD_DEVINFO(un), sd_label, CE_WARN,
31647 		    "sd_ssc_ereport_post meet empty cdb\n");
31648 		return;
31649 	}
31650 
31651 	op_code = cdbp->scc_cmd;
31652 
31653 	cdblen = (int)ssc->ssc_uscsi_cmd->uscsi_cdblen;
31654 	senlen = (int)(ssc->ssc_uscsi_cmd->uscsi_rqlen -
31655 	    ssc->ssc_uscsi_cmd->uscsi_rqresid);
31656 
31657 	if (senlen > 0)
31658 		ASSERT(sensep != NULL);
31659 
31660 	/*
31661 	 * Initialize drv_assess to corresponding values.
31662 	 * SD_FM_DRV_FATAL will be mapped to "fail" or "fatal" depending
31663 	 * on the sense-key returned back.
31664 	 */
31665 	switch (drv_assess) {
31666 		case SD_FM_DRV_RECOVERY:
31667 			(void) sprintf(assessment, "%s", "recovered");
31668 			break;
31669 		case SD_FM_DRV_RETRY:
31670 			(void) sprintf(assessment, "%s", "retry");
31671 			break;
31672 		case SD_FM_DRV_NOTICE:
31673 			(void) sprintf(assessment, "%s", "info");
31674 			break;
31675 		case SD_FM_DRV_FATAL:
31676 		default:
31677 			(void) sprintf(assessment, "%s", "unknown");
31678 	}
31679 	/*
31680 	 * If drv_assess == SD_FM_DRV_RECOVERY, this should be a recovered
31681 	 * command, we will post ereport.io.scsi.cmd.disk.recovered.
31682 	 * driver-assessment will always be "recovered" here.
31683 	 */
31684 	if (drv_assess == SD_FM_DRV_RECOVERY) {
31685 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
31686 		    "cmd.disk.recovered", uscsi_ena, devid, NULL,
31687 		    DDI_NOSLEEP, NULL,
31688 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31689 		    DEVID_IF_KNOWN(devid),
31690 		    "driver-assessment", DATA_TYPE_STRING, assessment,
31691 		    "op-code", DATA_TYPE_UINT8, op_code,
31692 		    "cdb", DATA_TYPE_UINT8_ARRAY,
31693 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31694 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31695 		    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31696 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
31697 		    NULL);
31698 		return;
31699 	}
31700 
31701 	/*
31702 	 * If there is un-expected/un-decodable data, we should post
31703 	 * ereport.io.scsi.cmd.disk.dev.uderr.
31704 	 * driver-assessment will be set based on parameter drv_assess.
31705 	 * SSC_FLAGS_INVALID_SENSE - invalid sense data sent back.
31706 	 * SSC_FLAGS_INVALID_PKT_REASON - invalid pkt-reason encountered.
31707 	 * SSC_FLAGS_INVALID_STATUS - invalid stat-code encountered.
31708 	 * SSC_FLAGS_INVALID_DATA - invalid data sent back.
31709 	 */
31710 	if (ssc->ssc_flags & ssc_invalid_flags) {
31711 		if (ssc->ssc_flags & SSC_FLAGS_INVALID_SENSE) {
31712 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31713 			    NULL, "cmd.disk.dev.uderr", uscsi_ena, devid,
31714 			    NULL, DDI_NOSLEEP, NULL,
31715 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31716 			    DEVID_IF_KNOWN(devid),
31717 			    "driver-assessment", DATA_TYPE_STRING,
31718 			    drv_assess == SD_FM_DRV_FATAL ?
31719 			    "fail" : assessment,
31720 			    "op-code", DATA_TYPE_UINT8, op_code,
31721 			    "cdb", DATA_TYPE_UINT8_ARRAY,
31722 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31723 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31724 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31725 			    "pkt-stats", DATA_TYPE_UINT32,
31726 			    uscsi_pkt_statistics,
31727 			    "stat-code", DATA_TYPE_UINT8,
31728 			    ssc->ssc_uscsi_cmd->uscsi_status,
31729 			    "un-decode-info", DATA_TYPE_STRING,
31730 			    ssc->ssc_info,
31731 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
31732 			    senlen, sensep,
31733 			    NULL);
31734 		} else {
31735 			/*
31736 			 * For other type of invalid data, the
31737 			 * un-decode-value field would be empty because the
31738 			 * un-decodable content could be seen from upper
31739 			 * level payload or inside un-decode-info.
31740 			 */
31741 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31742 			    NULL,
31743 			    "cmd.disk.dev.uderr", uscsi_ena, devid,
31744 			    NULL, DDI_NOSLEEP, NULL,
31745 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31746 			    DEVID_IF_KNOWN(devid),
31747 			    "driver-assessment", DATA_TYPE_STRING,
31748 			    drv_assess == SD_FM_DRV_FATAL ?
31749 			    "fail" : assessment,
31750 			    "op-code", DATA_TYPE_UINT8, op_code,
31751 			    "cdb", DATA_TYPE_UINT8_ARRAY,
31752 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31753 			    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31754 			    "pkt-state", DATA_TYPE_UINT32, uscsi_pkt_state,
31755 			    "pkt-stats", DATA_TYPE_UINT32,
31756 			    uscsi_pkt_statistics,
31757 			    "stat-code", DATA_TYPE_UINT8,
31758 			    ssc->ssc_uscsi_cmd->uscsi_status,
31759 			    "un-decode-info", DATA_TYPE_STRING,
31760 			    ssc->ssc_info,
31761 			    "un-decode-value", DATA_TYPE_UINT8_ARRAY,
31762 			    0, NULL,
31763 			    NULL);
31764 		}
31765 		ssc->ssc_flags &= ~ssc_invalid_flags;
31766 		return;
31767 	}
31768 
31769 	if (uscsi_pkt_reason != CMD_CMPLT ||
31770 	    (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)) {
31771 		/*
31772 		 * pkt-reason != CMD_CMPLT or SSC_FLAGS_TRAN_ABORT was
31773 		 * set inside sd_start_cmds due to errors(bad packet or
31774 		 * fatal transport error), we should take it as a
31775 		 * transport error, so we post ereport.io.scsi.cmd.disk.tran.
31776 		 * driver-assessment will be set based on drv_assess.
31777 		 * We will set devid to NULL because it is a transport
31778 		 * error.
31779 		 */
31780 		if (ssc->ssc_flags & SSC_FLAGS_TRAN_ABORT)
31781 			ssc->ssc_flags &= ~SSC_FLAGS_TRAN_ABORT;
31782 
31783 		scsi_fm_ereport_post(un->un_sd, uscsi_path_instance, NULL,
31784 		    "cmd.disk.tran", uscsi_ena, NULL, NULL, DDI_NOSLEEP, NULL,
31785 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31786 		    DEVID_IF_KNOWN(devid),
31787 		    "driver-assessment", DATA_TYPE_STRING,
31788 		    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31789 		    "op-code", DATA_TYPE_UINT8, op_code,
31790 		    "cdb", DATA_TYPE_UINT8_ARRAY,
31791 		    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31792 		    "pkt-reason", DATA_TYPE_UINT8, uscsi_pkt_reason,
31793 		    "pkt-state", DATA_TYPE_UINT8, uscsi_pkt_state,
31794 		    "pkt-stats", DATA_TYPE_UINT32, uscsi_pkt_statistics,
31795 		    NULL);
31796 	} else {
31797 		/*
31798 		 * If we got here, we have a completed command, and we need
31799 		 * to further investigate the sense data to see what kind
31800 		 * of ereport we should post.
31801 		 * No ereport is needed if sense-key is KEY_RECOVERABLE_ERROR
31802 		 * and asc/ascq is "ATA PASS-THROUGH INFORMATION AVAILABLE".
31803 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.merr if sense-key is
31804 		 * KEY_MEDIUM_ERROR.
31805 		 * Post ereport.io.scsi.cmd.disk.dev.rqs.derr otherwise.
31806 		 * driver-assessment will be set based on the parameter
31807 		 * drv_assess.
31808 		 */
31809 		if (senlen > 0) {
31810 			/*
31811 			 * Here we have sense data available.
31812 			 */
31813 			uint8_t sense_key = scsi_sense_key(sensep);
31814 			uint8_t sense_asc = scsi_sense_asc(sensep);
31815 			uint8_t sense_ascq = scsi_sense_ascq(sensep);
31816 
31817 			if (sense_key == KEY_RECOVERABLE_ERROR &&
31818 			    sense_asc == 0x00 && sense_ascq == 0x1d)
31819 				return;
31820 
31821 			if (sense_key == KEY_MEDIUM_ERROR) {
31822 				/*
31823 				 * driver-assessment should be "fatal" if
31824 				 * drv_assess is SD_FM_DRV_FATAL.
31825 				 */
31826 				scsi_fm_ereport_post(un->un_sd,
31827 				    uscsi_path_instance, NULL,
31828 				    "cmd.disk.dev.rqs.merr",
31829 				    uscsi_ena, devid, NULL, DDI_NOSLEEP, NULL,
31830 				    FM_VERSION, DATA_TYPE_UINT8,
31831 				    FM_EREPORT_VERS0,
31832 				    DEVID_IF_KNOWN(devid),
31833 				    "driver-assessment",
31834 				    DATA_TYPE_STRING,
31835 				    drv_assess == SD_FM_DRV_FATAL ?
31836 				    "fatal" : assessment,
31837 				    "op-code",
31838 				    DATA_TYPE_UINT8, op_code,
31839 				    "cdb",
31840 				    DATA_TYPE_UINT8_ARRAY, cdblen,
31841 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
31842 				    "pkt-reason",
31843 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
31844 				    "pkt-state",
31845 				    DATA_TYPE_UINT8, uscsi_pkt_state,
31846 				    "pkt-stats",
31847 				    DATA_TYPE_UINT32,
31848 				    uscsi_pkt_statistics,
31849 				    "stat-code",
31850 				    DATA_TYPE_UINT8,
31851 				    ssc->ssc_uscsi_cmd->uscsi_status,
31852 				    "key",
31853 				    DATA_TYPE_UINT8,
31854 				    scsi_sense_key(sensep),
31855 				    "asc",
31856 				    DATA_TYPE_UINT8,
31857 				    scsi_sense_asc(sensep),
31858 				    "ascq",
31859 				    DATA_TYPE_UINT8,
31860 				    scsi_sense_ascq(sensep),
31861 				    "sense-data",
31862 				    DATA_TYPE_UINT8_ARRAY,
31863 				    senlen, sensep,
31864 				    "lba",
31865 				    DATA_TYPE_UINT64,
31866 				    ssc->ssc_uscsi_info->ui_lba,
31867 				    NULL);
31868 			} else {
31869 				/*
31870 				 * if sense-key == 0x4(hardware
31871 				 * error), driver-assessment should
31872 				 * be "fatal" if drv_assess is
31873 				 * SD_FM_DRV_FATAL.
31874 				 */
31875 				scsi_fm_ereport_post(un->un_sd,
31876 				    uscsi_path_instance, NULL,
31877 				    "cmd.disk.dev.rqs.derr",
31878 				    uscsi_ena, devid,
31879 				    NULL, DDI_NOSLEEP, NULL,
31880 				    FM_VERSION,
31881 				    DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31882 				    DEVID_IF_KNOWN(devid),
31883 				    "driver-assessment",
31884 				    DATA_TYPE_STRING,
31885 				    drv_assess == SD_FM_DRV_FATAL ?
31886 				    (sense_key == 0x4 ?
31887 				    "fatal" : "fail") : assessment,
31888 				    "op-code",
31889 				    DATA_TYPE_UINT8, op_code,
31890 				    "cdb",
31891 				    DATA_TYPE_UINT8_ARRAY, cdblen,
31892 				    ssc->ssc_uscsi_cmd->uscsi_cdb,
31893 				    "pkt-reason",
31894 				    DATA_TYPE_UINT8, uscsi_pkt_reason,
31895 				    "pkt-state",
31896 				    DATA_TYPE_UINT8, uscsi_pkt_state,
31897 				    "pkt-stats",
31898 				    DATA_TYPE_UINT32,
31899 				    uscsi_pkt_statistics,
31900 				    "stat-code",
31901 				    DATA_TYPE_UINT8,
31902 				    ssc->ssc_uscsi_cmd->uscsi_status,
31903 				    "key",
31904 				    DATA_TYPE_UINT8,
31905 				    scsi_sense_key(sensep),
31906 				    "asc",
31907 				    DATA_TYPE_UINT8,
31908 				    scsi_sense_asc(sensep),
31909 				    "ascq",
31910 				    DATA_TYPE_UINT8,
31911 				    scsi_sense_ascq(sensep),
31912 				    "sense-data",
31913 				    DATA_TYPE_UINT8_ARRAY,
31914 				    senlen, sensep,
31915 				    NULL);
31916 			}
31917 		} else {
31918 			/*
31919 			 * For stat_code == STATUS_GOOD, this is not a
31920 			 * hardware error.
31921 			 */
31922 			if (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD)
31923 				return;
31924 
31925 			/*
31926 			 * Post ereport.io.scsi.cmd.disk.dev.serr if we got the
31927 			 * stat-code but with sense data unavailable.
31928 			 * driver-assessment will be set based on parameter
31929 			 * drv_assess.
31930 			 */
31931 			scsi_fm_ereport_post(un->un_sd, uscsi_path_instance,
31932 			    NULL,
31933 			    "cmd.disk.dev.serr", uscsi_ena,
31934 			    devid, NULL, DDI_NOSLEEP, NULL,
31935 			    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
31936 			    DEVID_IF_KNOWN(devid),
31937 			    "driver-assessment", DATA_TYPE_STRING,
31938 			    drv_assess == SD_FM_DRV_FATAL ? "fail" : assessment,
31939 			    "op-code", DATA_TYPE_UINT8, op_code,
31940 			    "cdb",
31941 			    DATA_TYPE_UINT8_ARRAY,
31942 			    cdblen, ssc->ssc_uscsi_cmd->uscsi_cdb,
31943 			    "pkt-reason",
31944 			    DATA_TYPE_UINT8, uscsi_pkt_reason,
31945 			    "pkt-state",
31946 			    DATA_TYPE_UINT8, uscsi_pkt_state,
31947 			    "pkt-stats",
31948 			    DATA_TYPE_UINT32, uscsi_pkt_statistics,
31949 			    "stat-code",
31950 			    DATA_TYPE_UINT8,
31951 			    ssc->ssc_uscsi_cmd->uscsi_status,
31952 			    NULL);
31953 		}
31954 	}
31955 }
31956 
31957 /*
31958  *     Function: sd_ssc_extract_info
31959  *
31960  * Description: Extract information available to help generate ereport.
31961  *
31962  *     Context: Kernel thread or interrupt context.
31963  */
31964 static void
31965 sd_ssc_extract_info(sd_ssc_t *ssc, struct sd_lun *un, struct scsi_pkt *pktp,
31966     struct buf *bp, struct sd_xbuf *xp)
31967 {
31968 	size_t senlen = 0;
31969 	union scsi_cdb *cdbp;
31970 	int path_instance;
31971 	/*
31972 	 * Need scsi_cdb_size array to determine the cdb length.
31973 	 */
31974 	extern uchar_t	scsi_cdb_size[];
31975 
31976 	ASSERT(un != NULL);
31977 	ASSERT(pktp != NULL);
31978 	ASSERT(bp != NULL);
31979 	ASSERT(xp != NULL);
31980 	ASSERT(ssc != NULL);
31981 	ASSERT(mutex_owned(SD_MUTEX(un)));
31982 
31983 	/*
31984 	 * Transfer the cdb buffer pointer here.
31985 	 */
31986 	cdbp = (union scsi_cdb *)pktp->pkt_cdbp;
31987 
31988 	ssc->ssc_uscsi_cmd->uscsi_cdblen = scsi_cdb_size[GETGROUP(cdbp)];
31989 	ssc->ssc_uscsi_cmd->uscsi_cdb = (caddr_t)cdbp;
31990 
31991 	/*
31992 	 * Transfer the sense data buffer pointer if sense data is available,
31993 	 * calculate the sense data length first.
31994 	 */
31995 	if ((xp->xb_sense_state & STATE_XARQ_DONE) ||
31996 	    (xp->xb_sense_state & STATE_ARQ_DONE)) {
31997 		/*
31998 		 * For arq case, we will enter here.
31999 		 */
32000 		if (xp->xb_sense_state & STATE_XARQ_DONE) {
32001 			senlen = MAX_SENSE_LENGTH - xp->xb_sense_resid;
32002 		} else {
32003 			senlen = SENSE_LENGTH;
32004 		}
32005 	} else {
32006 		/*
32007 		 * For non-arq case, we will enter this branch.
32008 		 */
32009 		if (SD_GET_PKT_STATUS(pktp) == STATUS_CHECK &&
32010 		    (xp->xb_sense_state & STATE_XFERRED_DATA)) {
32011 			senlen = SENSE_LENGTH - xp->xb_sense_resid;
32012 		}
32013 
32014 	}
32015 
32016 	ssc->ssc_uscsi_cmd->uscsi_rqlen = (senlen & 0xff);
32017 	ssc->ssc_uscsi_cmd->uscsi_rqresid = 0;
32018 	ssc->ssc_uscsi_cmd->uscsi_rqbuf = (caddr_t)xp->xb_sense_data;
32019 
32020 	ssc->ssc_uscsi_cmd->uscsi_status = ((*(pktp)->pkt_scbp) & STATUS_MASK);
32021 
32022 	/*
32023 	 * Only transfer path_instance when scsi_pkt was properly allocated.
32024 	 */
32025 	path_instance = pktp->pkt_path_instance;
32026 	if (scsi_pkt_allocated_correctly(pktp) && path_instance)
32027 		ssc->ssc_uscsi_cmd->uscsi_path_instance = path_instance;
32028 	else
32029 		ssc->ssc_uscsi_cmd->uscsi_path_instance = 0;
32030 
32031 	/*
32032 	 * Copy in the other fields we may need when posting ereport.
32033 	 */
32034 	ssc->ssc_uscsi_info->ui_pkt_reason = pktp->pkt_reason;
32035 	ssc->ssc_uscsi_info->ui_pkt_state = pktp->pkt_state;
32036 	ssc->ssc_uscsi_info->ui_pkt_statistics = pktp->pkt_statistics;
32037 	ssc->ssc_uscsi_info->ui_lba = (uint64_t)SD_GET_BLKNO(bp);
32038 
32039 	/*
32040 	 * For partially read/write command, we will not create ena
32041 	 * in case of a successful command be reconized as recovered.
32042 	 */
32043 	if ((pktp->pkt_reason == CMD_CMPLT) &&
32044 	    (ssc->ssc_uscsi_cmd->uscsi_status == STATUS_GOOD) &&
32045 	    (senlen == 0)) {
32046 		return;
32047 	}
32048 
32049 	/*
32050 	 * To associate ereports of a single command execution flow, we
32051 	 * need a shared ena for a specific command.
32052 	 */
32053 	if (xp->xb_ena == 0)
32054 		xp->xb_ena = fm_ena_generate(0, FM_ENA_FMT1);
32055 	ssc->ssc_uscsi_info->ui_ena = xp->xb_ena;
32056 }
32057 
32058 
32059 /*
32060  *     Function: sd_check_bdc_vpd
32061  *
32062  * Description: Query the optional INQUIRY VPD page 0xb1. If the device
32063  *              supports VPD page 0xb1, sd examines the MEDIUM ROTATION
32064  *              RATE.
32065  *
32066  *		Set the following based on RPM value:
32067  *		= 0	device is not solid state, non-rotational
32068  *		= 1	device is solid state, non-rotational
32069  *		> 1	device is not solid state, rotational
32070  *
32071  *     Context: Kernel thread or interrupt context.
32072  */
32073 
32074 static void
32075 sd_check_bdc_vpd(sd_ssc_t *ssc)
32076 {
32077 	int		rval		= 0;
32078 	uchar_t		*inqb1		= NULL;
32079 	size_t		inqb1_len	= MAX_INQUIRY_SIZE;
32080 	size_t		inqb1_resid	= 0;
32081 	struct sd_lun	*un;
32082 
32083 	ASSERT(ssc != NULL);
32084 	un = ssc->ssc_un;
32085 	ASSERT(un != NULL);
32086 	ASSERT(!mutex_owned(SD_MUTEX(un)));
32087 
32088 	mutex_enter(SD_MUTEX(un));
32089 	un->un_f_is_rotational = TRUE;
32090 	un->un_f_is_solid_state = FALSE;
32091 
32092 	if (ISCD(un)) {
32093 		mutex_exit(SD_MUTEX(un));
32094 		return;
32095 	}
32096 
32097 	if (sd_check_vpd_page_support(ssc) == 0 &&
32098 	    un->un_vpd_page_mask & SD_VPD_DEV_CHARACTER_PG) {
32099 		mutex_exit(SD_MUTEX(un));
32100 		/* collect page b1 data */
32101 		inqb1 = kmem_zalloc(inqb1_len, KM_SLEEP);
32102 
32103 		rval = sd_send_scsi_INQUIRY(ssc, inqb1, inqb1_len,
32104 		    0x01, 0xB1, &inqb1_resid);
32105 
32106 		if (rval == 0 && (inqb1_len - inqb1_resid > 5)) {
32107 			SD_TRACE(SD_LOG_COMMON, un,
32108 			    "sd_check_bdc_vpd: \
32109 			    successfully get VPD page: %x \
32110 			    PAGE LENGTH: %x BYTE 4: %x \
32111 			    BYTE 5: %x", inqb1[1], inqb1[3], inqb1[4],
32112 			    inqb1[5]);
32113 
32114 			mutex_enter(SD_MUTEX(un));
32115 			/*
32116 			 * Check the MEDIUM ROTATION RATE.
32117 			 */
32118 			if (inqb1[4] == 0) {
32119 				if (inqb1[5] == 0) {
32120 					un->un_f_is_rotational = FALSE;
32121 				} else if (inqb1[5] == 1) {
32122 					un->un_f_is_rotational = FALSE;
32123 					un->un_f_is_solid_state = TRUE;
32124 					/*
32125 					 * Solid state drives don't need
32126 					 * disksort.
32127 					 */
32128 					un->un_f_disksort_disabled = TRUE;
32129 				}
32130 			}
32131 			mutex_exit(SD_MUTEX(un));
32132 		} else if (rval != 0) {
32133 			sd_ssc_assessment(ssc, SD_FMT_IGNORE);
32134 		}
32135 
32136 		kmem_free(inqb1, inqb1_len);
32137 	} else {
32138 		mutex_exit(SD_MUTEX(un));
32139 	}
32140 }
32141 
32142 /*
32143  *	Function: sd_check_emulation_mode
32144  *
32145  *   Description: Check whether the SSD is at emulation mode
32146  *		  by issuing READ_CAPACITY_16 to see whether
32147  *		  we can get physical block size of the drive.
32148  *
32149  *	 Context: Kernel thread or interrupt context.
32150  */
32151 
32152 static void
32153 sd_check_emulation_mode(sd_ssc_t *ssc)
32154 {
32155 	int		rval = 0;
32156 	uint64_t	capacity;
32157 	uint_t		lbasize;
32158 	uint_t		pbsize;
32159 	int		i;
32160 	int		devid_len;
32161 	struct sd_lun	*un;
32162 
32163 	ASSERT(ssc != NULL);
32164 	un = ssc->ssc_un;
32165 	ASSERT(un != NULL);
32166 	ASSERT(!mutex_owned(SD_MUTEX(un)));
32167 
32168 	mutex_enter(SD_MUTEX(un));
32169 	if (ISCD(un)) {
32170 		mutex_exit(SD_MUTEX(un));
32171 		return;
32172 	}
32173 
32174 	if (un->un_f_descr_format_supported) {
32175 		mutex_exit(SD_MUTEX(un));
32176 		rval = sd_send_scsi_READ_CAPACITY_16(ssc, &capacity, &lbasize,
32177 		    &pbsize, SD_PATH_DIRECT);
32178 		mutex_enter(SD_MUTEX(un));
32179 
32180 		if (rval != 0) {
32181 			un->un_phy_blocksize = DEV_BSIZE;
32182 		} else {
32183 			if (!ISP2(pbsize % DEV_BSIZE) || pbsize == 0) {
32184 				un->un_phy_blocksize = DEV_BSIZE;
32185 			} else if (pbsize > un->un_phy_blocksize) {
32186 				/*
32187 				 * Don't reset the physical blocksize
32188 				 * unless we've detected a larger value.
32189 				 */
32190 				un->un_phy_blocksize = pbsize;
32191 			}
32192 		}
32193 	}
32194 
32195 	for (i = 0; i < sd_flash_dev_table_size; i++) {
32196 		devid_len = (int)strlen(sd_flash_dev_table[i]);
32197 		if (sd_sdconf_id_match(un, sd_flash_dev_table[i], devid_len)
32198 		    == SD_SUCCESS) {
32199 			un->un_phy_blocksize = SSD_SECSIZE;
32200 			if (un->un_f_is_solid_state &&
32201 			    un->un_phy_blocksize != un->un_tgt_blocksize)
32202 				un->un_f_enable_rmw = TRUE;
32203 		}
32204 	}
32205 
32206 	mutex_exit(SD_MUTEX(un));
32207 }
32208