xref: /freebsd/sys/dev/nvme/nvme.h (revision 8a0a413e)
1 /*-
2  * Copyright (C) 2012-2013 Intel Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #ifndef __NVME_H__
30 #define __NVME_H__
31 
32 #ifdef _KERNEL
33 #include <sys/types.h>
34 #endif
35 
36 #include <sys/param.h>
37 
38 #define	NVME_PASSTHROUGH_CMD		_IOWR('n', 0, struct nvme_pt_command)
39 #define	NVME_RESET_CONTROLLER		_IO('n', 1)
40 
41 #define	NVME_IO_TEST			_IOWR('n', 100, struct nvme_io_test)
42 #define	NVME_BIO_TEST			_IOWR('n', 101, struct nvme_io_test)
43 
44 /*
45  * Macros to deal with NVME revisions, as defined VS register
46  */
47 #define NVME_REV(x, y)			(((x) << 16) | ((y) << 8))
48 #define NVME_MAJOR(r)			(((r) >> 16) & 0xffff)
49 #define NVME_MINOR(r)			(((r) >> 8) & 0xff)
50 
51 /*
52  * Use to mark a command to apply to all namespaces, or to retrieve global
53  *  log pages.
54  */
55 #define NVME_GLOBAL_NAMESPACE_TAG	((uint32_t)0xFFFFFFFF)
56 
57 /* Cap nvme to 1MB transfers driver explodes with larger sizes */
58 #define NVME_MAX_XFER_SIZE		(MAXPHYS < (1<<20) ? MAXPHYS : (1<<20))
59 
60 union cap_lo_register {
61 	uint32_t	raw;
62 	struct {
63 		/** maximum queue entries supported */
64 		uint32_t mqes		: 16;
65 
66 		/** contiguous queues required */
67 		uint32_t cqr		: 1;
68 
69 		/** arbitration mechanism supported */
70 		uint32_t ams		: 2;
71 
72 		uint32_t reserved1	: 5;
73 
74 		/** timeout */
75 		uint32_t to		: 8;
76 	} bits __packed;
77 } __packed;
78 
79 _Static_assert(sizeof(union cap_lo_register) == 4, "bad size for cap_lo_register");
80 
81 union cap_hi_register {
82 	uint32_t	raw;
83 	struct {
84 		/** doorbell stride */
85 		uint32_t dstrd		: 4;
86 
87 		uint32_t reserved3	: 1;
88 
89 		/** command sets supported */
90 		uint32_t css_nvm	: 1;
91 
92 		uint32_t css_reserved	: 3;
93 		uint32_t reserved2	: 7;
94 
95 		/** memory page size minimum */
96 		uint32_t mpsmin		: 4;
97 
98 		/** memory page size maximum */
99 		uint32_t mpsmax		: 4;
100 
101 		uint32_t reserved1	: 8;
102 	} bits __packed;
103 } __packed;
104 
105 _Static_assert(sizeof(union cap_hi_register) == 4, "bad size of cap_hi_register");
106 
107 union cc_register {
108 	uint32_t	raw;
109 	struct {
110 		/** enable */
111 		uint32_t en		: 1;
112 
113 		uint32_t reserved1	: 3;
114 
115 		/** i/o command set selected */
116 		uint32_t css		: 3;
117 
118 		/** memory page size */
119 		uint32_t mps		: 4;
120 
121 		/** arbitration mechanism selected */
122 		uint32_t ams		: 3;
123 
124 		/** shutdown notification */
125 		uint32_t shn		: 2;
126 
127 		/** i/o submission queue entry size */
128 		uint32_t iosqes		: 4;
129 
130 		/** i/o completion queue entry size */
131 		uint32_t iocqes		: 4;
132 
133 		uint32_t reserved2	: 8;
134 	} bits __packed;
135 } __packed;
136 
137 _Static_assert(sizeof(union cc_register) == 4, "bad size for cc_register");
138 
139 enum shn_value {
140 	NVME_SHN_NORMAL		= 0x1,
141 	NVME_SHN_ABRUPT		= 0x2,
142 };
143 
144 union csts_register {
145 	uint32_t	raw;
146 	struct {
147 		/** ready */
148 		uint32_t rdy		: 1;
149 
150 		/** controller fatal status */
151 		uint32_t cfs		: 1;
152 
153 		/** shutdown status */
154 		uint32_t shst		: 2;
155 
156 		uint32_t reserved1	: 28;
157 	} bits __packed;
158 } __packed;
159 
160 _Static_assert(sizeof(union csts_register) == 4, "bad size for csts_register");
161 
162 enum shst_value {
163 	NVME_SHST_NORMAL	= 0x0,
164 	NVME_SHST_OCCURRING	= 0x1,
165 	NVME_SHST_COMPLETE	= 0x2,
166 };
167 
168 union aqa_register {
169 	uint32_t	raw;
170 	struct {
171 		/** admin submission queue size */
172 		uint32_t asqs		: 12;
173 
174 		uint32_t reserved1	: 4;
175 
176 		/** admin completion queue size */
177 		uint32_t acqs		: 12;
178 
179 		uint32_t reserved2	: 4;
180 	} bits __packed;
181 } __packed;
182 
183 _Static_assert(sizeof(union aqa_register) == 4, "bad size for aqa_resgister");
184 
185 struct nvme_registers
186 {
187 	/** controller capabilities */
188 	union cap_lo_register	cap_lo;
189 	union cap_hi_register	cap_hi;
190 
191 	uint32_t		vs;	/* version */
192 	uint32_t		intms;	/* interrupt mask set */
193 	uint32_t		intmc;	/* interrupt mask clear */
194 
195 	/** controller configuration */
196 	union cc_register	cc;
197 
198 	uint32_t		reserved1;
199 
200 	/** controller status */
201 	union csts_register	csts;
202 
203 	uint32_t		reserved2;
204 
205 	/** admin queue attributes */
206 	union aqa_register	aqa;
207 
208 	uint64_t		asq;	/* admin submission queue base addr */
209 	uint64_t		acq;	/* admin completion queue base addr */
210 	uint32_t		reserved3[0x3f2];
211 
212 	struct {
213 	    uint32_t		sq_tdbl; /* submission queue tail doorbell */
214 	    uint32_t		cq_hdbl; /* completion queue head doorbell */
215 	} doorbell[1] __packed;
216 } __packed;
217 
218 _Static_assert(sizeof(struct nvme_registers) == 0x1008, "bad size for nvme_registers");
219 
220 struct nvme_command
221 {
222 	/* dword 0 */
223 	uint16_t opc	:  8;	/* opcode */
224 	uint16_t fuse	:  2;	/* fused operation */
225 	uint16_t rsvd1	:  6;
226 	uint16_t cid;		/* command identifier */
227 
228 	/* dword 1 */
229 	uint32_t nsid;		/* namespace identifier */
230 
231 	/* dword 2-3 */
232 	uint32_t rsvd2;
233 	uint32_t rsvd3;
234 
235 	/* dword 4-5 */
236 	uint64_t mptr;		/* metadata pointer */
237 
238 	/* dword 6-7 */
239 	uint64_t prp1;		/* prp entry 1 */
240 
241 	/* dword 8-9 */
242 	uint64_t prp2;		/* prp entry 2 */
243 
244 	/* dword 10-15 */
245 	uint32_t cdw10;		/* command-specific */
246 	uint32_t cdw11;		/* command-specific */
247 	uint32_t cdw12;		/* command-specific */
248 	uint32_t cdw13;		/* command-specific */
249 	uint32_t cdw14;		/* command-specific */
250 	uint32_t cdw15;		/* command-specific */
251 } __packed;
252 
253 _Static_assert(sizeof(struct nvme_command) == 16 * 4, "bad size for nvme_command");
254 
255 struct nvme_status {
256 
257 	uint16_t p	:  1;	/* phase tag */
258 	uint16_t sc	:  8;	/* status code */
259 	uint16_t sct	:  3;	/* status code type */
260 	uint16_t rsvd2	:  2;
261 	uint16_t m	:  1;	/* more */
262 	uint16_t dnr	:  1;	/* do not retry */
263 } __packed;
264 
265 _Static_assert(sizeof(struct nvme_status) == 2, "bad size for nvme_status");
266 
267 struct nvme_completion {
268 
269 	/* dword 0 */
270 	uint32_t		cdw0;	/* command-specific */
271 
272 	/* dword 1 */
273 	uint32_t		rsvd1;
274 
275 	/* dword 2 */
276 	uint16_t		sqhd;	/* submission queue head pointer */
277 	uint16_t		sqid;	/* submission queue identifier */
278 
279 	/* dword 3 */
280 	uint16_t		cid;	/* command identifier */
281 	struct nvme_status	status;
282 } __packed;
283 
284 _Static_assert(sizeof(struct nvme_completion) == 4 * 4, "bad size for nvme_completion");
285 
286 struct nvme_dsm_range {
287 
288 	uint32_t attributes;
289 	uint32_t length;
290 	uint64_t starting_lba;
291 } __packed;
292 
293 _Static_assert(sizeof(struct nvme_dsm_range) == 16, "bad size for nvme_dsm_ranage");
294 
295 /* status code types */
296 enum nvme_status_code_type {
297 	NVME_SCT_GENERIC		= 0x0,
298 	NVME_SCT_COMMAND_SPECIFIC	= 0x1,
299 	NVME_SCT_MEDIA_ERROR		= 0x2,
300 	/* 0x3-0x6 - reserved */
301 	NVME_SCT_VENDOR_SPECIFIC	= 0x7,
302 };
303 
304 /* generic command status codes */
305 enum nvme_generic_command_status_code {
306 	NVME_SC_SUCCESS				= 0x00,
307 	NVME_SC_INVALID_OPCODE			= 0x01,
308 	NVME_SC_INVALID_FIELD			= 0x02,
309 	NVME_SC_COMMAND_ID_CONFLICT		= 0x03,
310 	NVME_SC_DATA_TRANSFER_ERROR		= 0x04,
311 	NVME_SC_ABORTED_POWER_LOSS		= 0x05,
312 	NVME_SC_INTERNAL_DEVICE_ERROR		= 0x06,
313 	NVME_SC_ABORTED_BY_REQUEST		= 0x07,
314 	NVME_SC_ABORTED_SQ_DELETION		= 0x08,
315 	NVME_SC_ABORTED_FAILED_FUSED		= 0x09,
316 	NVME_SC_ABORTED_MISSING_FUSED		= 0x0a,
317 	NVME_SC_INVALID_NAMESPACE_OR_FORMAT	= 0x0b,
318 	NVME_SC_COMMAND_SEQUENCE_ERROR		= 0x0c,
319 
320 	NVME_SC_LBA_OUT_OF_RANGE		= 0x80,
321 	NVME_SC_CAPACITY_EXCEEDED		= 0x81,
322 	NVME_SC_NAMESPACE_NOT_READY		= 0x82,
323 };
324 
325 /* command specific status codes */
326 enum nvme_command_specific_status_code {
327 	NVME_SC_COMPLETION_QUEUE_INVALID	= 0x00,
328 	NVME_SC_INVALID_QUEUE_IDENTIFIER	= 0x01,
329 	NVME_SC_MAXIMUM_QUEUE_SIZE_EXCEEDED	= 0x02,
330 	NVME_SC_ABORT_COMMAND_LIMIT_EXCEEDED	= 0x03,
331 	/* 0x04 - reserved */
332 	NVME_SC_ASYNC_EVENT_REQUEST_LIMIT_EXCEEDED = 0x05,
333 	NVME_SC_INVALID_FIRMWARE_SLOT		= 0x06,
334 	NVME_SC_INVALID_FIRMWARE_IMAGE		= 0x07,
335 	NVME_SC_INVALID_INTERRUPT_VECTOR	= 0x08,
336 	NVME_SC_INVALID_LOG_PAGE		= 0x09,
337 	NVME_SC_INVALID_FORMAT			= 0x0a,
338 	NVME_SC_FIRMWARE_REQUIRES_RESET		= 0x0b,
339 
340 	NVME_SC_CONFLICTING_ATTRIBUTES		= 0x80,
341 	NVME_SC_INVALID_PROTECTION_INFO		= 0x81,
342 	NVME_SC_ATTEMPTED_WRITE_TO_RO_PAGE	= 0x82,
343 };
344 
345 /* media error status codes */
346 enum nvme_media_error_status_code {
347 	NVME_SC_WRITE_FAULTS			= 0x80,
348 	NVME_SC_UNRECOVERED_READ_ERROR		= 0x81,
349 	NVME_SC_GUARD_CHECK_ERROR		= 0x82,
350 	NVME_SC_APPLICATION_TAG_CHECK_ERROR	= 0x83,
351 	NVME_SC_REFERENCE_TAG_CHECK_ERROR	= 0x84,
352 	NVME_SC_COMPARE_FAILURE			= 0x85,
353 	NVME_SC_ACCESS_DENIED			= 0x86,
354 };
355 
356 /* admin opcodes */
357 enum nvme_admin_opcode {
358 	NVME_OPC_DELETE_IO_SQ			= 0x00,
359 	NVME_OPC_CREATE_IO_SQ			= 0x01,
360 	NVME_OPC_GET_LOG_PAGE			= 0x02,
361 	/* 0x03 - reserved */
362 	NVME_OPC_DELETE_IO_CQ			= 0x04,
363 	NVME_OPC_CREATE_IO_CQ			= 0x05,
364 	NVME_OPC_IDENTIFY			= 0x06,
365 	/* 0x07 - reserved */
366 	NVME_OPC_ABORT				= 0x08,
367 	NVME_OPC_SET_FEATURES			= 0x09,
368 	NVME_OPC_GET_FEATURES			= 0x0a,
369 	/* 0x0b - reserved */
370 	NVME_OPC_ASYNC_EVENT_REQUEST		= 0x0c,
371 	NVME_OPC_NAMESPACE_MANAGEMENT		= 0x0d,
372 	/* 0x0e-0x0f - reserved */
373 	NVME_OPC_FIRMWARE_ACTIVATE		= 0x10,
374 	NVME_OPC_FIRMWARE_IMAGE_DOWNLOAD	= 0x11,
375 	NVME_OPC_NAMESPACE_ATTACHMENT		= 0x15,
376 
377 	NVME_OPC_FORMAT_NVM			= 0x80,
378 	NVME_OPC_SECURITY_SEND			= 0x81,
379 	NVME_OPC_SECURITY_RECEIVE		= 0x82,
380 };
381 
382 /* nvme nvm opcodes */
383 enum nvme_nvm_opcode {
384 	NVME_OPC_FLUSH				= 0x00,
385 	NVME_OPC_WRITE				= 0x01,
386 	NVME_OPC_READ				= 0x02,
387 	/* 0x03 - reserved */
388 	NVME_OPC_WRITE_UNCORRECTABLE		= 0x04,
389 	NVME_OPC_COMPARE			= 0x05,
390 	/* 0x06-0x07 - reserved */
391 	NVME_OPC_DATASET_MANAGEMENT		= 0x09,
392 };
393 
394 enum nvme_feature {
395 	/* 0x00 - reserved */
396 	NVME_FEAT_ARBITRATION			= 0x01,
397 	NVME_FEAT_POWER_MANAGEMENT		= 0x02,
398 	NVME_FEAT_LBA_RANGE_TYPE		= 0x03,
399 	NVME_FEAT_TEMPERATURE_THRESHOLD		= 0x04,
400 	NVME_FEAT_ERROR_RECOVERY		= 0x05,
401 	NVME_FEAT_VOLATILE_WRITE_CACHE		= 0x06,
402 	NVME_FEAT_NUMBER_OF_QUEUES		= 0x07,
403 	NVME_FEAT_INTERRUPT_COALESCING		= 0x08,
404 	NVME_FEAT_INTERRUPT_VECTOR_CONFIGURATION = 0x09,
405 	NVME_FEAT_WRITE_ATOMICITY		= 0x0A,
406 	NVME_FEAT_ASYNC_EVENT_CONFIGURATION	= 0x0B,
407 	NVME_FEAT_AUTONOMOUS_POWER_STATE_TRANSITION = 0x0C,
408 	NVME_FEAT_HOST_MEMORY_BUFFER		= 0x0D,
409 	NVME_FEAT_TIMESTAMP			= 0x0E,
410 	NVME_FEAT_KEEP_ALIVE_TIMER		= 0x0F,
411 	NVME_FEAT_HOST_CONTROLLED_THERMAL_MGMT	= 0x10,
412 	NVME_FEAT_NON_OP_POWER_STATE_CONFIG	= 0x11,
413 	/* 0x12-0x77 - reserved */
414 	/* 0x78-0x7f - NVMe Management Interface */
415 	NVME_FEAT_SOFTWARE_PROGRESS_MARKER	= 0x80,
416 	/* 0x81-0xBF - command set specific (reserved) */
417 	/* 0xC0-0xFF - vendor specific */
418 };
419 
420 enum nvme_dsm_attribute {
421 	NVME_DSM_ATTR_INTEGRAL_READ		= 0x1,
422 	NVME_DSM_ATTR_INTEGRAL_WRITE		= 0x2,
423 	NVME_DSM_ATTR_DEALLOCATE		= 0x4,
424 };
425 
426 enum nvme_activate_action {
427 	NVME_AA_REPLACE_NO_ACTIVATE		= 0x0,
428 	NVME_AA_REPLACE_ACTIVATE		= 0x1,
429 	NVME_AA_ACTIVATE			= 0x2,
430 };
431 
432 struct nvme_power_state {
433 	/** Maximum Power */
434 	uint16_t	mp;			/* Maximum Power */
435 	uint8_t		ps_rsvd1;
436 	uint8_t		mps      : 1;		/* Max Power Scale */
437 	uint8_t		nops     : 1;		/* Non-Operational State */
438 	uint8_t		ps_rsvd2 : 6;
439 	uint32_t	enlat;			/* Entry Latency */
440 	uint32_t	exlat;			/* Exit Latency */
441 	uint8_t		rrt      : 5;		/* Relative Read Throughput */
442 	uint8_t		ps_rsvd3 : 3;
443 	uint8_t		rrl      : 5;		/* Relative Read Latency */
444 	uint8_t		ps_rsvd4 : 3;
445 	uint8_t		rwt      : 5;		/* Relative Write Throughput */
446 	uint8_t		ps_rsvd5 : 3;
447 	uint8_t		rwl      : 5;		/* Relative Write Latency */
448 	uint8_t		ps_rsvd6 : 3;
449 	uint16_t	idlp;			/* Idle Power */
450 	uint8_t		ps_rsvd7 : 6;
451 	uint8_t		ips      : 2;		/* Idle Power Scale */
452 	uint8_t		ps_rsvd8;
453 	uint16_t	actp;			/* Active Power */
454 	uint8_t		apw      : 3;		/* Active Power Workload */
455 	uint8_t		ps_rsvd9 : 3;
456 	uint8_t		aps      : 2;		/* Active Power Scale */
457 	uint8_t		ps_rsvd10[9];
458 } __packed;
459 
460 _Static_assert(sizeof(struct nvme_power_state) == 32, "bad size for nvme_power_state");
461 
462 #define NVME_SERIAL_NUMBER_LENGTH	20
463 #define NVME_MODEL_NUMBER_LENGTH	40
464 #define NVME_FIRMWARE_REVISION_LENGTH	8
465 
466 struct nvme_controller_data {
467 
468 	/* bytes 0-255: controller capabilities and features */
469 
470 	/** pci vendor id */
471 	uint16_t		vid;
472 
473 	/** pci subsystem vendor id */
474 	uint16_t		ssvid;
475 
476 	/** serial number */
477 	uint8_t			sn[NVME_SERIAL_NUMBER_LENGTH];
478 
479 	/** model number */
480 	uint8_t			mn[NVME_MODEL_NUMBER_LENGTH];
481 
482 	/** firmware revision */
483 	uint8_t			fr[NVME_FIRMWARE_REVISION_LENGTH];
484 
485 	/** recommended arbitration burst */
486 	uint8_t			rab;
487 
488 	/** ieee oui identifier */
489 	uint8_t			ieee[3];
490 
491 	/** multi-interface capabilities */
492 	uint8_t			mic;
493 
494 	/** maximum data transfer size */
495 	uint8_t			mdts;
496 
497 	/** Controller ID */
498 	uint16_t		ctrlr_id;
499 
500 	/** Version */
501 	uint32_t		ver;
502 
503 	/** RTD3 Resume Latency */
504 	uint32_t		rtd3r;
505 
506 	/** RTD3 Enter Latency */
507 	uint32_t		rtd3e;
508 
509 	/** Optional Asynchronous Events Supported */
510 	uint32_t		oaes;	/* bitfield really */
511 
512 	/** Controller Attributes */
513 	uint32_t		ctratt;	/* bitfield really */
514 
515 	uint8_t			reserved1[12];
516 
517 	/** FRU Globally Unique Identifier */
518 	uint8_t			fguid[16];
519 
520 	uint8_t			reserved2[128];
521 
522 	/* bytes 256-511: admin command set attributes */
523 
524 	/** optional admin command support */
525 	struct {
526 		/* supports security send/receive commands */
527 		uint16_t	security  : 1;
528 
529 		/* supports format nvm command */
530 		uint16_t	format    : 1;
531 
532 		/* supports firmware activate/download commands */
533 		uint16_t	firmware  : 1;
534 
535 		/* supports namespace management commands */
536 		uint16_t	nsmgmt	  : 1;
537 
538 		uint16_t	oacs_rsvd : 12;
539 	} __packed oacs;
540 
541 	/** abort command limit */
542 	uint8_t			acl;
543 
544 	/** asynchronous event request limit */
545 	uint8_t			aerl;
546 
547 	/** firmware updates */
548 	struct {
549 		/* first slot is read-only */
550 		uint8_t		slot1_ro  : 1;
551 
552 		/* number of firmware slots */
553 		uint8_t		num_slots : 3;
554 
555 		uint8_t		frmw_rsvd : 4;
556 	} __packed frmw;
557 
558 	/** log page attributes */
559 	struct {
560 		/* per namespace smart/health log page */
561 		uint8_t		ns_smart : 1;
562 
563 		uint8_t		lpa_rsvd : 7;
564 	} __packed lpa;
565 
566 	/** error log page entries */
567 	uint8_t			elpe;
568 
569 	/** number of power states supported */
570 	uint8_t			npss;
571 
572 	/** admin vendor specific command configuration */
573 	struct {
574 		/* admin vendor specific commands use spec format */
575 		uint8_t		spec_format : 1;
576 
577 		uint8_t		avscc_rsvd  : 7;
578 	} __packed avscc;
579 
580 	/** Autonomous Power State Transition Attributes */
581 	struct {
582 		/* Autonmous Power State Transitions supported */
583 		uint8_t		apst_supp : 1;
584 
585 		uint8_t		apsta_rsvd : 7;
586 	} __packed apsta;
587 
588 	/** Warning Composite Temperature Threshold */
589 	uint16_t		wctemp;
590 
591 	/** Critical Composite Temperature Threshold */
592 	uint16_t		cctemp;
593 
594 	/** Maximum Time for Firmware Activation */
595 	uint16_t		mtfa;
596 
597 	/** Host Memory Buffer Preferred Size */
598 	uint32_t		hmpre;
599 
600 	/** Host Memory Buffer Minimum Size */
601 	uint32_t		hmmin;
602 
603 	/** Name space capabilities  */
604 	struct {
605 		/* if nsmgmt, report tnvmcap and unvmcap */
606 		uint8_t    tnvmcap[16];
607 		uint8_t    unvmcap[16];
608 	} __packed untncap;
609 
610 	/** Replay Protected Memory Block Support */
611 	uint32_t		rpmbs; /* Really a bitfield */
612 
613 	/** Extended Device Self-test Time */
614 	uint16_t		edstt;
615 
616 	/** Device Self-test Options */
617 	uint8_t			dsto; /* Really a bitfield */
618 
619 	/** Firmware Update Granularity */
620 	uint8_t			fwug;
621 
622 	/** Keep Alive Support */
623 	uint16_t		kas;
624 
625 	/** Host Controlled Thermal Management Attributes */
626 	uint16_t		hctma; /* Really a bitfield */
627 
628 	/** Minimum Thermal Management Temperature */
629 	uint16_t		mntmt;
630 
631 	/** Maximum Thermal Management Temperature */
632 	uint16_t		mxtmt;
633 
634 	/** Sanitize Capabilities */
635 	uint32_t		sanicap; /* Really a bitfield */
636 
637 	uint8_t reserved3[180];
638 	/* bytes 512-703: nvm command set attributes */
639 
640 	/** submission queue entry size */
641 	struct {
642 		uint8_t		min : 4;
643 		uint8_t		max : 4;
644 	} __packed sqes;
645 
646 	/** completion queue entry size */
647 	struct {
648 		uint8_t		min : 4;
649 		uint8_t		max : 4;
650 	} __packed cqes;
651 
652 	/** Maximum Outstanding Commands */
653 	uint16_t		maxcmd;
654 
655 	/** number of namespaces */
656 	uint32_t		nn;
657 
658 	/** optional nvm command support */
659 	struct {
660 		uint16_t	compare : 1;
661 		uint16_t	write_unc : 1;
662 		uint16_t	dsm: 1;
663 		uint16_t	reserved: 13;
664 	} __packed oncs;
665 
666 	/** fused operation support */
667 	uint16_t		fuses;
668 
669 	/** format nvm attributes */
670 	uint8_t			fna;
671 
672 	/** volatile write cache */
673 	struct {
674 		uint8_t		present : 1;
675 		uint8_t		reserved : 7;
676 	} __packed vwc;
677 
678 	/* TODO: flesh out remaining nvm command set attributes */
679 	uint8_t			reserved5[178];
680 
681 	/* bytes 704-2047: i/o command set attributes */
682 	uint8_t			reserved6[1344];
683 
684 	/* bytes 2048-3071: power state descriptors */
685 	struct nvme_power_state power_state[32];
686 
687 	/* bytes 3072-4095: vendor specific */
688 	uint8_t			vs[1024];
689 } __packed __aligned(4);
690 
691 _Static_assert(sizeof(struct nvme_controller_data) == 4096, "bad size for nvme_controller_data");
692 
693 struct nvme_namespace_data {
694 
695 	/** namespace size */
696 	uint64_t		nsze;
697 
698 	/** namespace capacity */
699 	uint64_t		ncap;
700 
701 	/** namespace utilization */
702 	uint64_t		nuse;
703 
704 	/** namespace features */
705 	struct {
706 		/** thin provisioning */
707 		uint8_t		thin_prov : 1;
708 		uint8_t		reserved1 : 7;
709 	} __packed nsfeat;
710 
711 	/** number of lba formats */
712 	uint8_t			nlbaf;
713 
714 	/** formatted lba size */
715 	struct {
716 		uint8_t		format    : 4;
717 		uint8_t		extended  : 1;
718 		uint8_t		reserved2 : 3;
719 	} __packed flbas;
720 
721 	/** metadata capabilities */
722 	struct {
723 		/* metadata can be transferred as part of data prp list */
724 		uint8_t		extended  : 1;
725 
726 		/* metadata can be transferred with separate metadata pointer */
727 		uint8_t		pointer   : 1;
728 
729 		uint8_t		reserved3 : 6;
730 	} __packed mc;
731 
732 	/** end-to-end data protection capabilities */
733 	struct {
734 		/* protection information type 1 */
735 		uint8_t		pit1     : 1;
736 
737 		/* protection information type 2 */
738 		uint8_t		pit2     : 1;
739 
740 		/* protection information type 3 */
741 		uint8_t		pit3     : 1;
742 
743 		/* first eight bytes of metadata */
744 		uint8_t		md_start : 1;
745 
746 		/* last eight bytes of metadata */
747 		uint8_t		md_end   : 1;
748 	} __packed dpc;
749 
750 	/** end-to-end data protection type settings */
751 	struct {
752 		/* protection information type */
753 		uint8_t		pit       : 3;
754 
755 		/* 1 == protection info transferred at start of metadata */
756 		/* 0 == protection info transferred at end of metadata */
757 		uint8_t		md_start  : 1;
758 
759 		uint8_t		reserved4 : 4;
760 	} __packed dps;
761 
762 	uint8_t			reserved5[98];
763 
764 	/** lba format support */
765 	struct {
766 		/** metadata size */
767 		uint32_t	ms	  : 16;
768 
769 		/** lba data size */
770 		uint32_t	lbads	  : 8;
771 
772 		/** relative performance */
773 		uint32_t	rp	  : 2;
774 
775 		uint32_t	reserved6 : 6;
776 	} __packed lbaf[16];
777 
778 	uint8_t			reserved6[192];
779 
780 	uint8_t			vendor_specific[3712];
781 } __packed __aligned(4);
782 
783 _Static_assert(sizeof(struct nvme_namespace_data) == 4096, "bad size for nvme_namepsace_data");
784 
785 enum nvme_log_page {
786 
787 	/* 0x00 - reserved */
788 	NVME_LOG_ERROR			= 0x01,
789 	NVME_LOG_HEALTH_INFORMATION	= 0x02,
790 	NVME_LOG_FIRMWARE_SLOT		= 0x03,
791 	NVME_LOG_CHANGED_NAMESPACE	= 0x04,
792 	NVME_LOG_COMMAND_EFFECT		= 0x05,
793 	/* 0x06-0x7F - reserved */
794 	/* 0x80-0xBF - I/O command set specific */
795 	NVME_LOG_RES_NOTIFICATION	= 0x80,
796 	/* 0xC0-0xFF - vendor specific */
797 
798 	/*
799 	 * The following are Intel Specific log pages, but they seem
800 	 * to be widely implemented.
801 	 */
802 	INTEL_LOG_READ_LAT_LOG		= 0xc1,
803 	INTEL_LOG_WRITE_LAT_LOG		= 0xc2,
804 	INTEL_LOG_TEMP_STATS		= 0xc5,
805 	INTEL_LOG_ADD_SMART		= 0xca,
806 	INTEL_LOG_DRIVE_MKT_NAME	= 0xdd,
807 
808 	/*
809 	 * HGST log page, with lots ofs sub pages.
810 	 */
811 	HGST_INFO_LOG			= 0xc1,
812 };
813 
814 struct nvme_error_information_entry {
815 
816 	uint64_t		error_count;
817 	uint16_t		sqid;
818 	uint16_t		cid;
819 	struct nvme_status	status;
820 	uint16_t		error_location;
821 	uint64_t		lba;
822 	uint32_t		nsid;
823 	uint8_t			vendor_specific;
824 	uint8_t			reserved[35];
825 } __packed __aligned(4);
826 
827 _Static_assert(sizeof(struct nvme_error_information_entry) == 64, "bad size for nvme_error_information_entry");
828 
829 union nvme_critical_warning_state {
830 
831 	uint8_t		raw;
832 
833 	struct {
834 		uint8_t	available_spare		: 1;
835 		uint8_t	temperature		: 1;
836 		uint8_t	device_reliability	: 1;
837 		uint8_t	read_only		: 1;
838 		uint8_t	volatile_memory_backup	: 1;
839 		uint8_t	reserved		: 3;
840 	} __packed bits;
841 } __packed;
842 
843 _Static_assert(sizeof(union nvme_critical_warning_state) == 1, "bad size for nvme_critical_warning_state");
844 
845 struct nvme_health_information_page {
846 
847 	union nvme_critical_warning_state	critical_warning;
848 
849 	uint16_t		temperature;
850 	uint8_t			available_spare;
851 	uint8_t			available_spare_threshold;
852 	uint8_t			percentage_used;
853 
854 	uint8_t			reserved[26];
855 
856 	/*
857 	 * Note that the following are 128-bit values, but are
858 	 *  defined as an array of 2 64-bit values.
859 	 */
860 	/* Data Units Read is always in 512-byte units. */
861 	uint64_t		data_units_read[2];
862 	/* Data Units Written is always in 512-byte units. */
863 	uint64_t		data_units_written[2];
864 	/* For NVM command set, this includes Compare commands. */
865 	uint64_t		host_read_commands[2];
866 	uint64_t		host_write_commands[2];
867 	/* Controller Busy Time is reported in minutes. */
868 	uint64_t		controller_busy_time[2];
869 	uint64_t		power_cycles[2];
870 	uint64_t		power_on_hours[2];
871 	uint64_t		unsafe_shutdowns[2];
872 	uint64_t		media_errors[2];
873 	uint64_t		num_error_info_log_entries[2];
874 	uint32_t		warning_temp_time;
875 	uint32_t		error_temp_time;
876 	uint16_t		temp_sensor[8];
877 
878 	uint8_t			reserved2[296];
879 } __packed __aligned(4);
880 
881 _Static_assert(sizeof(struct nvme_health_information_page) == 512, "bad size for nvme_health_information_page");
882 
883 struct nvme_firmware_page {
884 
885 	struct {
886 		uint8_t	slot		: 3; /* slot for current FW */
887 		uint8_t	reserved	: 5;
888 	} __packed afi;
889 
890 	uint8_t			reserved[7];
891 	uint64_t		revision[7]; /* revisions for 7 slots */
892 	uint8_t			reserved2[448];
893 } __packed __aligned(4);
894 
895 _Static_assert(sizeof(struct nvme_firmware_page) == 512, "bad size for nvme_firmware_page");
896 
897 struct intel_log_temp_stats
898 {
899 	uint64_t	current;
900 	uint64_t	overtemp_flag_last;
901 	uint64_t	overtemp_flag_life;
902 	uint64_t	max_temp;
903 	uint64_t	min_temp;
904 	uint64_t	_rsvd[5];
905 	uint64_t	max_oper_temp;
906 	uint64_t	min_oper_temp;
907 	uint64_t	est_offset;
908 } __packed __aligned(4);
909 
910 _Static_assert(sizeof(struct intel_log_temp_stats) == 13 * 8, "bad size for intel_log_temp_stats");
911 
912 #define NVME_TEST_MAX_THREADS	128
913 
914 struct nvme_io_test {
915 
916 	enum nvme_nvm_opcode	opc;
917 	uint32_t		size;
918 	uint32_t		time;	/* in seconds */
919 	uint32_t		num_threads;
920 	uint32_t		flags;
921 	uint64_t		io_completed[NVME_TEST_MAX_THREADS];
922 };
923 
924 enum nvme_io_test_flags {
925 
926 	/*
927 	 * Specifies whether dev_refthread/dev_relthread should be
928 	 *  called during NVME_BIO_TEST.  Ignored for other test
929 	 *  types.
930 	 */
931 	NVME_TEST_FLAG_REFTHREAD =	0x1,
932 };
933 
934 struct nvme_pt_command {
935 
936 	/*
937 	 * cmd is used to specify a passthrough command to a controller or
938 	 *  namespace.
939 	 *
940 	 * The following fields from cmd may be specified by the caller:
941 	 *	* opc  (opcode)
942 	 *	* nsid (namespace id) - for admin commands only
943 	 *	* cdw10-cdw15
944 	 *
945 	 * Remaining fields must be set to 0 by the caller.
946 	 */
947 	struct nvme_command	cmd;
948 
949 	/*
950 	 * cpl returns completion status for the passthrough command
951 	 *  specified by cmd.
952 	 *
953 	 * The following fields will be filled out by the driver, for
954 	 *  consumption by the caller:
955 	 *	* cdw0
956 	 *	* status (except for phase)
957 	 *
958 	 * Remaining fields will be set to 0 by the driver.
959 	 */
960 	struct nvme_completion	cpl;
961 
962 	/* buf is the data buffer associated with this passthrough command. */
963 	void *			buf;
964 
965 	/*
966 	 * len is the length of the data buffer associated with this
967 	 *  passthrough command.
968 	 */
969 	uint32_t		len;
970 
971 	/*
972 	 * is_read = 1 if the passthrough command will read data into the
973 	 *  supplied buffer from the controller.
974 	 *
975 	 * is_read = 0 if the passthrough command will write data from the
976 	 *  supplied buffer to the controller.
977 	 */
978 	uint32_t		is_read;
979 
980 	/*
981 	 * driver_lock is used by the driver only.  It must be set to 0
982 	 *  by the caller.
983 	 */
984 	struct mtx *		driver_lock;
985 };
986 
987 #define nvme_completion_is_error(cpl)					\
988 	((cpl)->status.sc != 0 || (cpl)->status.sct != 0)
989 
990 void	nvme_strvis(uint8_t *dst, const uint8_t *src, int dstlen, int srclen);
991 
992 #ifdef _KERNEL
993 
994 struct bio;
995 
996 struct nvme_namespace;
997 struct nvme_controller;
998 struct nvme_consumer;
999 
1000 typedef void (*nvme_cb_fn_t)(void *, const struct nvme_completion *);
1001 
1002 typedef void *(*nvme_cons_ns_fn_t)(struct nvme_namespace *, void *);
1003 typedef void *(*nvme_cons_ctrlr_fn_t)(struct nvme_controller *);
1004 typedef void (*nvme_cons_async_fn_t)(void *, const struct nvme_completion *,
1005 				     uint32_t, void *, uint32_t);
1006 typedef void (*nvme_cons_fail_fn_t)(void *);
1007 
1008 enum nvme_namespace_flags {
1009 	NVME_NS_DEALLOCATE_SUPPORTED	= 0x1,
1010 	NVME_NS_FLUSH_SUPPORTED		= 0x2,
1011 };
1012 
1013 int	nvme_ctrlr_passthrough_cmd(struct nvme_controller *ctrlr,
1014 				   struct nvme_pt_command *pt,
1015 				   uint32_t nsid, int is_user_buffer,
1016 				   int is_admin_cmd);
1017 
1018 /* Admin functions */
1019 void	nvme_ctrlr_cmd_set_feature(struct nvme_controller *ctrlr,
1020 				   uint8_t feature, uint32_t cdw11,
1021 				   void *payload, uint32_t payload_size,
1022 				   nvme_cb_fn_t cb_fn, void *cb_arg);
1023 void	nvme_ctrlr_cmd_get_feature(struct nvme_controller *ctrlr,
1024 				   uint8_t feature, uint32_t cdw11,
1025 				   void *payload, uint32_t payload_size,
1026 				   nvme_cb_fn_t cb_fn, void *cb_arg);
1027 void	nvme_ctrlr_cmd_get_log_page(struct nvme_controller *ctrlr,
1028 				    uint8_t log_page, uint32_t nsid,
1029 				    void *payload, uint32_t payload_size,
1030 				    nvme_cb_fn_t cb_fn, void *cb_arg);
1031 
1032 /* NVM I/O functions */
1033 int	nvme_ns_cmd_write(struct nvme_namespace *ns, void *payload,
1034 			  uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
1035 			  void *cb_arg);
1036 int	nvme_ns_cmd_write_bio(struct nvme_namespace *ns, struct bio *bp,
1037 			      nvme_cb_fn_t cb_fn, void *cb_arg);
1038 int	nvme_ns_cmd_read(struct nvme_namespace *ns, void *payload,
1039 			 uint64_t lba, uint32_t lba_count, nvme_cb_fn_t cb_fn,
1040 			 void *cb_arg);
1041 int	nvme_ns_cmd_read_bio(struct nvme_namespace *ns, struct bio *bp,
1042 			      nvme_cb_fn_t cb_fn, void *cb_arg);
1043 int	nvme_ns_cmd_deallocate(struct nvme_namespace *ns, void *payload,
1044 			       uint8_t num_ranges, nvme_cb_fn_t cb_fn,
1045 			       void *cb_arg);
1046 int	nvme_ns_cmd_flush(struct nvme_namespace *ns, nvme_cb_fn_t cb_fn,
1047 			  void *cb_arg);
1048 int	nvme_ns_dump(struct nvme_namespace *ns, void *virt, off_t offset,
1049 		     size_t len);
1050 
1051 /* Registration functions */
1052 struct nvme_consumer *	nvme_register_consumer(nvme_cons_ns_fn_t    ns_fn,
1053 					       nvme_cons_ctrlr_fn_t ctrlr_fn,
1054 					       nvme_cons_async_fn_t async_fn,
1055 					       nvme_cons_fail_fn_t  fail_fn);
1056 void		nvme_unregister_consumer(struct nvme_consumer *consumer);
1057 
1058 /* Controller helper functions */
1059 device_t	nvme_ctrlr_get_device(struct nvme_controller *ctrlr);
1060 const struct nvme_controller_data *
1061 		nvme_ctrlr_get_data(struct nvme_controller *ctrlr);
1062 
1063 /* Namespace helper functions */
1064 uint32_t	nvme_ns_get_max_io_xfer_size(struct nvme_namespace *ns);
1065 uint32_t	nvme_ns_get_sector_size(struct nvme_namespace *ns);
1066 uint64_t	nvme_ns_get_num_sectors(struct nvme_namespace *ns);
1067 uint64_t	nvme_ns_get_size(struct nvme_namespace *ns);
1068 uint32_t	nvme_ns_get_flags(struct nvme_namespace *ns);
1069 const char *	nvme_ns_get_serial_number(struct nvme_namespace *ns);
1070 const char *	nvme_ns_get_model_number(struct nvme_namespace *ns);
1071 const struct nvme_namespace_data *
1072 		nvme_ns_get_data(struct nvme_namespace *ns);
1073 uint32_t	nvme_ns_get_stripesize(struct nvme_namespace *ns);
1074 
1075 int	nvme_ns_bio_process(struct nvme_namespace *ns, struct bio *bp,
1076 			    nvme_cb_fn_t cb_fn);
1077 
1078 /*
1079  * Command building helper functions -- shared with CAM
1080  * These functions assume allocator zeros out cmd structure
1081  * CAM's xpt_get_ccb and the request allocator for nvme both
1082  * do zero'd allocations.
1083  */
1084 static inline
1085 void	nvme_ns_flush_cmd(struct nvme_command *cmd, uint32_t nsid)
1086 {
1087 
1088 	cmd->opc = NVME_OPC_FLUSH;
1089 	cmd->nsid = nsid;
1090 }
1091 
1092 static inline
1093 void	nvme_ns_rw_cmd(struct nvme_command *cmd, uint32_t rwcmd, uint32_t nsid,
1094     uint64_t lba, uint32_t count)
1095 {
1096 	cmd->opc = rwcmd;
1097 	cmd->nsid = nsid;
1098 	cmd->cdw10 = lba & 0xffffffffu;
1099 	cmd->cdw11 = lba >> 32;
1100 	cmd->cdw12 = count-1;
1101 }
1102 
1103 static inline
1104 void	nvme_ns_write_cmd(struct nvme_command *cmd, uint32_t nsid,
1105     uint64_t lba, uint32_t count)
1106 {
1107 	nvme_ns_rw_cmd(cmd, NVME_OPC_WRITE, nsid, lba, count);
1108 }
1109 
1110 static inline
1111 void	nvme_ns_read_cmd(struct nvme_command *cmd, uint32_t nsid,
1112     uint64_t lba, uint32_t count)
1113 {
1114 	nvme_ns_rw_cmd(cmd, NVME_OPC_READ, nsid, lba, count);
1115 }
1116 
1117 static inline
1118 void	nvme_ns_trim_cmd(struct nvme_command *cmd, uint32_t nsid,
1119     uint32_t num_ranges)
1120 {
1121 	cmd->opc = NVME_OPC_DATASET_MANAGEMENT;
1122 	cmd->nsid = nsid;
1123 	cmd->cdw10 = num_ranges - 1;
1124 	cmd->cdw11 = NVME_DSM_ATTR_DEALLOCATE;
1125 }
1126 
1127 extern int nvme_use_nvd;
1128 
1129 #endif /* _KERNEL */
1130 
1131 #endif /* __NVME_H__ */
1132