xref: /freebsd/sys/dev/nvme/nvme.c (revision 148a8da8)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (C) 2012-2014 Intel Corporation
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/conf.h>
35 #include <sys/module.h>
36 
37 #include <vm/uma.h>
38 
39 #include <dev/pci/pcireg.h>
40 #include <dev/pci/pcivar.h>
41 
42 #include "nvme_private.h"
43 
44 struct nvme_consumer {
45 	uint32_t		id;
46 	nvme_cons_ns_fn_t	ns_fn;
47 	nvme_cons_ctrlr_fn_t	ctrlr_fn;
48 	nvme_cons_async_fn_t	async_fn;
49 	nvme_cons_fail_fn_t	fail_fn;
50 };
51 
52 struct nvme_consumer nvme_consumer[NVME_MAX_CONSUMERS];
53 #define	INVALID_CONSUMER_ID	0xFFFF
54 
55 uma_zone_t	nvme_request_zone;
56 int32_t		nvme_retry_count;
57 
58 MALLOC_DEFINE(M_NVME, "nvme", "nvme(4) memory allocations");
59 
60 static int    nvme_probe(device_t);
61 static int    nvme_attach(device_t);
62 static int    nvme_detach(device_t);
63 static int    nvme_shutdown(device_t);
64 
65 static devclass_t nvme_devclass;
66 
67 static device_method_t nvme_pci_methods[] = {
68 	/* Device interface */
69 	DEVMETHOD(device_probe,     nvme_probe),
70 	DEVMETHOD(device_attach,    nvme_attach),
71 	DEVMETHOD(device_detach,    nvme_detach),
72 	DEVMETHOD(device_shutdown,  nvme_shutdown),
73 	{ 0, 0 }
74 };
75 
76 static driver_t nvme_pci_driver = {
77 	"nvme",
78 	nvme_pci_methods,
79 	sizeof(struct nvme_controller),
80 };
81 
82 DRIVER_MODULE(nvme, pci, nvme_pci_driver, nvme_devclass, NULL, NULL);
83 MODULE_VERSION(nvme, 1);
84 MODULE_DEPEND(nvme, cam, 1, 1, 1);
85 
86 static struct _pcsid
87 {
88 	uint32_t	devid;
89 	int		match_subdevice;
90 	uint16_t	subdevice;
91 	const char	*desc;
92 	uint32_t	quirks;
93 } pci_ids[] = {
94 	{ 0x01118086,		0, 0, "NVMe Controller"  },
95 	{ IDT32_PCI_ID,		0, 0, "IDT NVMe Controller (32 channel)"  },
96 	{ IDT8_PCI_ID,		0, 0, "IDT NVMe Controller (8 channel)" },
97 	{ 0x09538086,		1, 0x3702, "DC P3700 SSD" },
98 	{ 0x09538086,		1, 0x3703, "DC P3700 SSD [2.5\" SFF]" },
99 	{ 0x09538086,		1, 0x3704, "DC P3500 SSD [Add-in Card]" },
100 	{ 0x09538086,		1, 0x3705, "DC P3500 SSD [2.5\" SFF]" },
101 	{ 0x09538086,		1, 0x3709, "DC P3600 SSD [Add-in Card]" },
102 	{ 0x09538086,		1, 0x370a, "DC P3600 SSD [2.5\" SFF]" },
103 	{ 0x00031c58,		0, 0, "HGST SN100",	QUIRK_DELAY_B4_CHK_RDY },
104 	{ 0x00231c58,		0, 0, "WDC SN200",	QUIRK_DELAY_B4_CHK_RDY },
105 	{ 0x05401c5f,		0, 0, "Memblaze Pblaze4", QUIRK_DELAY_B4_CHK_RDY },
106 	{ 0xa821144d,		0, 0, "Samsung PM1725", QUIRK_DELAY_B4_CHK_RDY },
107 	{ 0xa822144d,		0, 0, "Samsung PM1725a", QUIRK_DELAY_B4_CHK_RDY },
108 	{ 0x01161179,		0, 0, "Toshiba XG5", QUIRK_DISABLE_TIMEOUT },
109 	{ 0x00000000,		0, 0, NULL  }
110 };
111 
112 static int
113 nvme_match(uint32_t devid, uint16_t subdevice, struct _pcsid *ep)
114 {
115 	if (devid != ep->devid)
116 		return 0;
117 
118 	if (!ep->match_subdevice)
119 		return 1;
120 
121 	if (subdevice == ep->subdevice)
122 		return 1;
123 	else
124 		return 0;
125 }
126 
127 static int
128 nvme_probe (device_t device)
129 {
130 	struct _pcsid	*ep;
131 	uint32_t	devid;
132 	uint16_t	subdevice;
133 
134 	devid = pci_get_devid(device);
135 	subdevice = pci_get_subdevice(device);
136 	ep = pci_ids;
137 
138 	while (ep->devid) {
139 		if (nvme_match(devid, subdevice, ep))
140 			break;
141 		++ep;
142 	}
143 
144 	if (ep->desc) {
145 		device_set_desc(device, ep->desc);
146 		return (BUS_PROBE_DEFAULT);
147 	}
148 
149 #if defined(PCIS_STORAGE_NVM)
150 	if (pci_get_class(device)    == PCIC_STORAGE &&
151 	    pci_get_subclass(device) == PCIS_STORAGE_NVM &&
152 	    pci_get_progif(device)   == PCIP_STORAGE_NVM_ENTERPRISE_NVMHCI_1_0) {
153 		device_set_desc(device, "Generic NVMe Device");
154 		return (BUS_PROBE_GENERIC);
155 	}
156 #endif
157 
158 	return (ENXIO);
159 }
160 
161 static void
162 nvme_init(void)
163 {
164 	uint32_t	i;
165 
166 	nvme_request_zone = uma_zcreate("nvme_request",
167 	    sizeof(struct nvme_request), NULL, NULL, NULL, NULL, 0, 0);
168 
169 	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
170 		nvme_consumer[i].id = INVALID_CONSUMER_ID;
171 }
172 
173 SYSINIT(nvme_register, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_init, NULL);
174 
175 static void
176 nvme_uninit(void)
177 {
178 	uma_zdestroy(nvme_request_zone);
179 }
180 
181 SYSUNINIT(nvme_unregister, SI_SUB_DRIVERS, SI_ORDER_SECOND, nvme_uninit, NULL);
182 
183 static int
184 nvme_shutdown(device_t dev)
185 {
186 	struct nvme_controller	*ctrlr;
187 
188 	ctrlr = DEVICE2SOFTC(dev);
189 	nvme_ctrlr_shutdown(ctrlr);
190 
191 	return (0);
192 }
193 
194 void
195 nvme_dump_command(struct nvme_command *cmd)
196 {
197 
198 	printf(
199 "opc:%x f:%x cid:%x nsid:%x r2:%x r3:%x mptr:%jx prp1:%jx prp2:%jx cdw:%x %x %x %x %x %x\n",
200 	    cmd->opc, cmd->fuse, cmd->cid, le32toh(cmd->nsid),
201 	    cmd->rsvd2, cmd->rsvd3,
202 	    (uintmax_t)le64toh(cmd->mptr), (uintmax_t)le64toh(cmd->prp1), (uintmax_t)le64toh(cmd->prp2),
203 	    le32toh(cmd->cdw10), le32toh(cmd->cdw11), le32toh(cmd->cdw12),
204 	    le32toh(cmd->cdw13), le32toh(cmd->cdw14), le32toh(cmd->cdw15));
205 }
206 
207 void
208 nvme_dump_completion(struct nvme_completion *cpl)
209 {
210 	uint8_t p, sc, sct, m, dnr;
211 	uint16_t status;
212 
213 	status = le16toh(cpl->status);
214 
215 	p = NVME_STATUS_GET_P(status);
216 	sc = NVME_STATUS_GET_SC(status);
217 	sct = NVME_STATUS_GET_SCT(status);
218 	m = NVME_STATUS_GET_M(status);
219 	dnr = NVME_STATUS_GET_DNR(status);
220 
221 	printf("cdw0:%08x sqhd:%04x sqid:%04x "
222 	    "cid:%04x p:%x sc:%02x sct:%x m:%x dnr:%x\n",
223 	    le32toh(cpl->cdw0), le16toh(cpl->sqhd), le16toh(cpl->sqid),
224 	    cpl->cid, p, sc, sct, m, dnr);
225 }
226 
227 static int
228 nvme_attach(device_t dev)
229 {
230 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
231 	int			status;
232 	struct _pcsid		*ep;
233 	uint32_t		devid;
234 	uint16_t		subdevice;
235 
236 	devid = pci_get_devid(dev);
237 	subdevice = pci_get_subdevice(dev);
238 	ep = pci_ids;
239 	while (ep->devid) {
240 		if (nvme_match(devid, subdevice, ep))
241 			break;
242 		++ep;
243 	}
244 	ctrlr->quirks = ep->quirks;
245 
246 	status = nvme_ctrlr_construct(ctrlr, dev);
247 
248 	if (status != 0) {
249 		nvme_ctrlr_destruct(ctrlr, dev);
250 		return (status);
251 	}
252 
253 	/*
254 	 * Some drives do not implement the completion timeout feature
255 	 * correctly. There's a WAR from the manufacturer to just disable it.
256 	 * The driver wouldn't respond correctly to a timeout anyway.
257 	 */
258 	if (ep->quirks & QUIRK_DISABLE_TIMEOUT) {
259 		int ptr;
260 		uint16_t devctl2;
261 
262 		status = pci_find_cap(dev, PCIY_EXPRESS, &ptr);
263 		if (status) {
264 			device_printf(dev, "Can't locate PCIe capability?");
265 			return (status);
266 		}
267 		devctl2 = pci_read_config(dev, ptr + PCIER_DEVICE_CTL2, sizeof(devctl2));
268 		devctl2 |= PCIEM_CTL2_COMP_TIMO_DISABLE;
269 		pci_write_config(dev, ptr + PCIER_DEVICE_CTL2, devctl2, sizeof(devctl2));
270 	}
271 
272 	/*
273 	 * Enable busmastering so the completion status messages can
274 	 * be busmastered back to the host.
275 	 */
276 	pci_enable_busmaster(dev);
277 
278 	/*
279 	 * Reset controller twice to ensure we do a transition from cc.en==1
280 	 *  to cc.en==0.  This is because we don't really know what status
281 	 *  the controller was left in when boot handed off to OS.
282 	 */
283 	status = nvme_ctrlr_hw_reset(ctrlr);
284 	if (status != 0) {
285 		nvme_ctrlr_destruct(ctrlr, dev);
286 		return (status);
287 	}
288 
289 	status = nvme_ctrlr_hw_reset(ctrlr);
290 	if (status != 0) {
291 		nvme_ctrlr_destruct(ctrlr, dev);
292 		return (status);
293 	}
294 
295 	ctrlr->config_hook.ich_func = nvme_ctrlr_start_config_hook;
296 	ctrlr->config_hook.ich_arg = ctrlr;
297 
298 	config_intrhook_establish(&ctrlr->config_hook);
299 
300 	return (0);
301 }
302 
303 static int
304 nvme_detach (device_t dev)
305 {
306 	struct nvme_controller	*ctrlr = DEVICE2SOFTC(dev);
307 
308 	nvme_ctrlr_destruct(ctrlr, dev);
309 	pci_disable_busmaster(dev);
310 	return (0);
311 }
312 
313 static void
314 nvme_notify(struct nvme_consumer *cons,
315 	    struct nvme_controller *ctrlr)
316 {
317 	struct nvme_namespace	*ns;
318 	void			*ctrlr_cookie;
319 	int			cmpset, ns_idx;
320 
321 	/*
322 	 * The consumer may register itself after the nvme devices
323 	 *  have registered with the kernel, but before the
324 	 *  driver has completed initialization.  In that case,
325 	 *  return here, and when initialization completes, the
326 	 *  controller will make sure the consumer gets notified.
327 	 */
328 	if (!ctrlr->is_initialized)
329 		return;
330 
331 	cmpset = atomic_cmpset_32(&ctrlr->notification_sent, 0, 1);
332 
333 	if (cmpset == 0)
334 		return;
335 
336 	if (cons->ctrlr_fn != NULL)
337 		ctrlr_cookie = (*cons->ctrlr_fn)(ctrlr);
338 	else
339 		ctrlr_cookie = NULL;
340 	ctrlr->cons_cookie[cons->id] = ctrlr_cookie;
341 	if (ctrlr->is_failed) {
342 		if (cons->fail_fn != NULL)
343 			(*cons->fail_fn)(ctrlr_cookie);
344 		/*
345 		 * Do not notify consumers about the namespaces of a
346 		 *  failed controller.
347 		 */
348 		return;
349 	}
350 	for (ns_idx = 0; ns_idx < min(ctrlr->cdata.nn, NVME_MAX_NAMESPACES); ns_idx++) {
351 		ns = &ctrlr->ns[ns_idx];
352 		if (ns->data.nsze == 0)
353 			continue;
354 		if (cons->ns_fn != NULL)
355 			ns->cons_cookie[cons->id] =
356 			    (*cons->ns_fn)(ns, ctrlr_cookie);
357 	}
358 }
359 
360 void
361 nvme_notify_new_controller(struct nvme_controller *ctrlr)
362 {
363 	int i;
364 
365 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
366 		if (nvme_consumer[i].id != INVALID_CONSUMER_ID) {
367 			nvme_notify(&nvme_consumer[i], ctrlr);
368 		}
369 	}
370 }
371 
372 static void
373 nvme_notify_new_consumer(struct nvme_consumer *cons)
374 {
375 	device_t		*devlist;
376 	struct nvme_controller	*ctrlr;
377 	int			dev_idx, devcount;
378 
379 	if (devclass_get_devices(nvme_devclass, &devlist, &devcount))
380 		return;
381 
382 	for (dev_idx = 0; dev_idx < devcount; dev_idx++) {
383 		ctrlr = DEVICE2SOFTC(devlist[dev_idx]);
384 		nvme_notify(cons, ctrlr);
385 	}
386 
387 	free(devlist, M_TEMP);
388 }
389 
390 void
391 nvme_notify_async_consumers(struct nvme_controller *ctrlr,
392 			    const struct nvme_completion *async_cpl,
393 			    uint32_t log_page_id, void *log_page_buffer,
394 			    uint32_t log_page_size)
395 {
396 	struct nvme_consumer	*cons;
397 	uint32_t		i;
398 
399 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
400 		cons = &nvme_consumer[i];
401 		if (cons->id != INVALID_CONSUMER_ID && cons->async_fn != NULL)
402 			(*cons->async_fn)(ctrlr->cons_cookie[i], async_cpl,
403 			    log_page_id, log_page_buffer, log_page_size);
404 	}
405 }
406 
407 void
408 nvme_notify_fail_consumers(struct nvme_controller *ctrlr)
409 {
410 	struct nvme_consumer	*cons;
411 	uint32_t		i;
412 
413 	/*
414 	 * This controller failed during initialization (i.e. IDENTIFY
415 	 *  command failed or timed out).  Do not notify any nvme
416 	 *  consumers of the failure here, since the consumer does not
417 	 *  even know about the controller yet.
418 	 */
419 	if (!ctrlr->is_initialized)
420 		return;
421 
422 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
423 		cons = &nvme_consumer[i];
424 		if (cons->id != INVALID_CONSUMER_ID && cons->fail_fn != NULL)
425 			cons->fail_fn(ctrlr->cons_cookie[i]);
426 	}
427 }
428 
429 void
430 nvme_notify_ns(struct nvme_controller *ctrlr, int nsid)
431 {
432 	struct nvme_consumer	*cons;
433 	struct nvme_namespace	*ns = &ctrlr->ns[nsid - 1];
434 	uint32_t		i;
435 
436 	if (!ctrlr->is_initialized)
437 		return;
438 
439 	for (i = 0; i < NVME_MAX_CONSUMERS; i++) {
440 		cons = &nvme_consumer[i];
441 		if (cons->id != INVALID_CONSUMER_ID && cons->ns_fn != NULL)
442 			ns->cons_cookie[cons->id] =
443 			    (*cons->ns_fn)(ns, ctrlr->cons_cookie[cons->id]);
444 	}
445 }
446 
447 struct nvme_consumer *
448 nvme_register_consumer(nvme_cons_ns_fn_t ns_fn, nvme_cons_ctrlr_fn_t ctrlr_fn,
449 		       nvme_cons_async_fn_t async_fn,
450 		       nvme_cons_fail_fn_t fail_fn)
451 {
452 	int i;
453 
454 	/*
455 	 * TODO: add locking around consumer registration.  Not an issue
456 	 *  right now since we only have one nvme consumer - nvd(4).
457 	 */
458 	for (i = 0; i < NVME_MAX_CONSUMERS; i++)
459 		if (nvme_consumer[i].id == INVALID_CONSUMER_ID) {
460 			nvme_consumer[i].id = i;
461 			nvme_consumer[i].ns_fn = ns_fn;
462 			nvme_consumer[i].ctrlr_fn = ctrlr_fn;
463 			nvme_consumer[i].async_fn = async_fn;
464 			nvme_consumer[i].fail_fn = fail_fn;
465 
466 			nvme_notify_new_consumer(&nvme_consumer[i]);
467 			return (&nvme_consumer[i]);
468 		}
469 
470 	printf("nvme(4): consumer not registered - no slots available\n");
471 	return (NULL);
472 }
473 
474 void
475 nvme_unregister_consumer(struct nvme_consumer *consumer)
476 {
477 
478 	consumer->id = INVALID_CONSUMER_ID;
479 }
480 
481 void
482 nvme_completion_poll_cb(void *arg, const struct nvme_completion *cpl)
483 {
484 	struct nvme_completion_poll_status	*status = arg;
485 
486 	/*
487 	 * Copy status into the argument passed by the caller, so that
488 	 *  the caller can check the status to determine if the
489 	 *  the request passed or failed.
490 	 */
491 	memcpy(&status->cpl, cpl, sizeof(*cpl));
492 	atomic_store_rel_int(&status->done, 1);
493 }
494