xref: /dragonfly/sys/dev/disk/nvme/nvme_admin.c (revision 32efd857)
1 /*
2  * Copyright (c) 2016 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 /*
35  * Administration thread
36  *
37  * - Handles resetting, features, iteration of namespaces, and disk
38  *   attachments.  Most admin operations are serialized by the admin thread.
39  *
40  * - Ioctls as well as any BIOs which require more sophisticated processing
41  *   are handed to this thread as well.
42  *
43  * - Can freeze/resume other queues for various purposes.
44  */
45 
46 #include "nvme.h"
47 
48 static void nvme_admin_thread(void *arg);
49 static int nvme_admin_state_identify_ctlr(nvme_softc_t *sc);
50 static int nvme_admin_state_make_queues(nvme_softc_t *sc);
51 static int nvme_admin_state_identify_ns(nvme_softc_t *sc);
52 static int nvme_admin_state_operating(nvme_softc_t *sc);
53 static int nvme_admin_state_failed(nvme_softc_t *sc);
54 
55 /*
56  * Start the admin thread and block until it says it is running.
57  */
58 int
59 nvme_start_admin_thread(nvme_softc_t *sc)
60 {
61 	int error, intr_flags;
62 
63 	lockinit(&sc->admin_lk, "admlk", 0, 0);
64 	lockinit(&sc->ioctl_lk, "nvioc", 0, 0);
65 	sc->admin_signal = 0;
66 
67 	intr_flags = INTR_MPSAFE;
68 	if (sc->nirqs == 1) {
69 		/* This interrupt processes data CQs too */
70 		intr_flags |= INTR_HIFREQ;
71 	}
72 
73 	error = bus_setup_intr(sc->dev, sc->irq[0], intr_flags,
74 			       nvme_intr, &sc->comqueues[0],
75 			       &sc->irq_handle[0], NULL);
76 	if (error) {
77 		device_printf(sc->dev, "unable to install interrupt\n");
78 		return error;
79 	}
80 	lockmgr(&sc->admin_lk, LK_EXCLUSIVE);
81 	kthread_create(nvme_admin_thread, sc, &sc->admintd, "nvme_admin");
82 	while ((sc->admin_signal & ADMIN_SIG_RUNNING) == 0)
83 		lksleep(&sc->admin_signal, &sc->admin_lk, 0, "nvwbeg", 0);
84 	lockmgr(&sc->admin_lk, LK_RELEASE);
85 
86 	return 0;
87 }
88 
89 /*
90  * Stop the admin thread and block until it says it is done.
91  */
92 void
93 nvme_stop_admin_thread(nvme_softc_t *sc)
94 {
95 	uint32_t i;
96 
97 	atomic_set_int(&sc->admin_signal, ADMIN_SIG_STOP);
98 
99 	/*
100 	 * We have to wait for the admin thread to finish its probe
101 	 * before shutting it down.  Break out if the admin thread
102 	 * never managed to even start.
103 	 */
104 	lockmgr(&sc->admin_lk, LK_EXCLUSIVE);
105 	while ((sc->admin_signal & ADMIN_SIG_PROBED) == 0) {
106 		if ((sc->admin_signal & ADMIN_SIG_RUNNING) == 0)
107 			break;
108 		lksleep(&sc->admin_signal, &sc->admin_lk, 0, "nvwend", 0);
109 	}
110 	lockmgr(&sc->admin_lk, LK_RELEASE);
111 
112 	/*
113 	 * Disconnect our disks while the admin thread is still running,
114 	 * ensuring that the poll works even if interrupts are broken.
115 	 * Otherwise we could deadlock in the devfs core.
116 	 */
117 	for (i = 0; i < NVME_MAX_NAMESPACES; ++i) {
118 		nvme_softns_t *nsc;
119 
120 		if ((nsc = sc->nscary[i]) != NULL) {
121 			nvme_disk_detach(nsc);
122 
123 			kfree(nsc, M_NVME);
124 			sc->nscary[i] = NULL;
125 		}
126 	}
127 
128 	/*
129 	 * Ask the admin thread to shut-down.
130 	 */
131 	lockmgr(&sc->admin_lk, LK_EXCLUSIVE);
132 	wakeup(&sc->admin_signal);
133 	while (sc->admin_signal & ADMIN_SIG_RUNNING)
134 		lksleep(&sc->admin_signal, &sc->admin_lk, 0, "nvwend", 0);
135 	lockmgr(&sc->admin_lk, LK_RELEASE);
136 	if (sc->irq_handle[0]) {
137 		bus_teardown_intr(sc->dev, sc->irq[0], sc->irq_handle[0]);
138 		sc->irq_handle[0] = NULL;
139 	}
140 	lockuninit(&sc->ioctl_lk);
141 	lockuninit(&sc->admin_lk);
142 
143 	/*
144 	 * Thread might be running on another cpu, give it time to actually
145 	 * exit before returning in case the caller is about to unload the
146 	 * module.  Otherwise we don't need this.
147 	 */
148 	nvme_os_sleep(1);
149 }
150 
151 static
152 void
153 nvme_admin_thread(void *arg)
154 {
155 	nvme_softc_t *sc = arg;
156 	uint32_t i;
157 
158 	lockmgr(&sc->admin_lk, LK_EXCLUSIVE);
159 	atomic_set_int(&sc->admin_signal, ADMIN_SIG_RUNNING);
160 	wakeup(&sc->admin_signal);
161 
162 	sc->admin_func = nvme_admin_state_identify_ctlr;
163 
164 	while ((sc->admin_signal & ADMIN_SIG_STOP) == 0) {
165 		for (i = 0; i <= sc->niocomqs; ++i) {
166 			nvme_comqueue_t *comq = &sc->comqueues[i];
167 
168 			if (comq->nqe == 0)	/* not configured */
169 				continue;
170 
171 			lockmgr(&comq->lk, LK_EXCLUSIVE);
172 			nvme_poll_completions(comq, &comq->lk);
173 			lockmgr(&comq->lk, LK_RELEASE);
174 		}
175 		if (sc->admin_signal & ADMIN_SIG_REQUEUE) {
176 			atomic_clear_int(&sc->admin_signal, ADMIN_SIG_REQUEUE);
177 			nvme_disk_requeues(sc);
178 		}
179 		if (sc->admin_func(sc) == 0 &&
180 		    (sc->admin_signal & ADMIN_SIG_RUN_MASK) == 0) {
181 			lksleep(&sc->admin_signal, &sc->admin_lk, 0,
182 				"nvidle", hz);
183 		}
184 	}
185 
186 	/*
187 	 * Cleanup state.
188 	 *
189 	 * Note that we actually issue delete queue commands here.  The NVME
190 	 * spec says that for a normal shutdown the I/O queues should be
191 	 * deleted prior to issuing the shutdown in the CONFIG register.
192 	 */
193 	for (i = 1; i <= sc->niosubqs; ++i) {
194 		nvme_delete_subqueue(sc, i);
195 		nvme_free_subqueue(sc, i);
196 	}
197 	for (i = 1; i <= sc->niocomqs; ++i) {
198 		nvme_delete_comqueue(sc, i);
199 		nvme_free_comqueue(sc, i);
200 	}
201 
202 	/*
203 	 * Signal that we are done.
204 	 */
205 	atomic_clear_int(&sc->admin_signal, ADMIN_SIG_RUNNING);
206 	wakeup(&sc->admin_signal);
207 	lockmgr(&sc->admin_lk, LK_RELEASE);
208 }
209 
210 /*
211  * Identify the controller
212  */
213 static
214 int
215 nvme_admin_state_identify_ctlr(nvme_softc_t *sc)
216 {
217 	nvme_request_t *req;
218 	nvme_ident_ctlr_data_t *rp;
219 	int status;
220 	uint64_t mempgsize;
221 	char serial[20+16];
222 	char model[40+16];
223 
224 	/*
225 	 * Identify Controller
226 	 */
227 	mempgsize = NVME_CAP_MEMPG_MIN_GET(sc->cap);
228 
229 	req = nvme_get_admin_request(sc, NVME_OP_IDENTIFY);
230 	req->cmd.identify.cns = NVME_CNS_CTLR;
231 	req->cmd.identify.cntid = 0;
232 	bzero(req->info, sizeof(*req->info));
233 	nvme_submit_request(req);
234 	status = nvme_wait_request(req);
235 	/* XXX handle status */
236 
237 	sc->idctlr = req->info->idctlr;
238 	nvme_put_request(req);
239 
240 	rp = &sc->idctlr;
241 
242 	KKASSERT(sizeof(sc->idctlr.serialno) == 20);
243 	KKASSERT(sizeof(sc->idctlr.modelno) == 40);
244 	bzero(serial, sizeof(serial));
245 	bzero(model, sizeof(model));
246 	bcopy(rp->serialno, serial, sizeof(rp->serialno));
247 	bcopy(rp->modelno, model, sizeof(rp->modelno));
248 	string_cleanup(serial, 0);
249 	string_cleanup(model, 0);
250 
251 	device_printf(sc->dev, "Model %s BaseSerial %s nscount=%d\n",
252 		      model, serial, rp->ns_count);
253 
254 	sc->admin_func = nvme_admin_state_make_queues;
255 
256 	return 1;
257 }
258 
259 #define COMQFIXUP(msix, ncomqs)	((((msix) - 1) % ncomqs) + 1)
260 
261 /*
262  * Request and create the I/O queues.  Figure out CPU mapping optimizations.
263  */
264 static
265 int
266 nvme_admin_state_make_queues(nvme_softc_t *sc)
267 {
268 	nvme_request_t *req;
269 	uint16_t niosubqs, subq_err_idx;
270 	uint16_t niocomqs, comq_err_idx;
271 	uint32_t i;
272 	uint16_t qno;
273 	int status;
274 	int error;
275 
276 	/*
277 	 * Calculate how many I/O queues (non-inclusive of admin queue)
278 	 * we want to have, up to 65535.  dw0 in the response returns the
279 	 * number of queues the controller gives us.  Submission and
280 	 * Completion queues are specified separately.
281 	 *
282 	 * This driver runs optimally with 4 submission queues and one
283 	 * completion queue per cpu (rdhipri, rdlopri, wrhipri, wrlopri),
284 	 *
285 	 * +1 for dumps			XXX future
286 	 * +1 for async events		XXX future
287 	 *
288 	 * NOTE: Set one less than the #define because we use 1...N for I/O
289 	 *	 queues (queue 0 is used for the admin queue).  Easier this
290 	 *	 way.
291 	 */
292 	req = nvme_get_admin_request(sc, NVME_OP_SET_FEATURES);
293 
294 	niosubqs = ncpus * 2 + 0;
295 	niocomqs = ncpus + 0;
296 	if (niosubqs >= NVME_MAX_QUEUES)
297 		niosubqs = NVME_MAX_QUEUES - 1;
298 	if (niocomqs >= NVME_MAX_QUEUES)
299 		niocomqs = NVME_MAX_QUEUES - 1;
300 
301 	/*
302 	 * If there are insufficient MSI-X vectors or we use a normal
303 	 * interrupt, the completion queues are going to wind up being
304 	 * polled by a single admin interrupt.  Limit the number of
305 	 * completion queues in this case to something reasonable.
306 	 */
307 	if (sc->nirqs == 1 && niocomqs > 4) {
308 		niocomqs = 4;
309 		device_printf(sc->dev, "no MSI-X support, limit comqs to %d\n",
310 			      niocomqs);
311 	}
312 
313 	device_printf(sc->dev, "Request %u/%u queues, ", niosubqs, niocomqs);
314 
315 	req->cmd.setfeat.flags = NVME_FID_NUMQUEUES;
316 	req->cmd.setfeat.numqs.nsqr = niosubqs - 1;	/* 0's based 0=1 */
317 	req->cmd.setfeat.numqs.ncqr = niocomqs - 1;	/* 0's based 0=1 */
318 
319 	nvme_submit_request(req);
320 
321 	/*
322 	 * Get response and set our operations mode.  Limit the returned
323 	 * queue counts to no more than we requested (some chipsets may
324 	 * return more than the requested number of queues while others
325 	 * will not).
326 	 */
327 	status = nvme_wait_request(req);
328 	/* XXX handle status */
329 
330 	if (status == 0) {
331 		sc->niosubqs = 1 + (req->res.setfeat.dw0 & 0xFFFFU);
332 		sc->niocomqs = 1 + ((req->res.setfeat.dw0 >> 16) & 0xFFFFU);
333 		if (sc->niosubqs > niosubqs)
334 			sc->niosubqs = niosubqs;
335 		if (sc->niocomqs > niocomqs)
336 			sc->niocomqs = niocomqs;
337 	} else {
338 		sc->niosubqs = 0;
339 		sc->niocomqs = 0;
340 	}
341 	kprintf("Returns %u/%u queues, ", sc->niosubqs, sc->niocomqs);
342 
343 	nvme_put_request(req);
344 
345 tryagain:
346 	sc->dumpqno = 0;
347 	sc->eventqno = 0;
348 
349 	if (sc->niosubqs >= ncpus * 2 + 0 && sc->niocomqs >= ncpus + 0) {
350 		/*
351 		 * If we got all the queues we wanted do a full-bore setup of
352 		 * qmap[cpu][type].
353 		 *
354 		 * Remember that subq 0 / comq 0 is the admin queue.
355 		 */
356 		kprintf("optimal map\n");
357 		qno = 1;
358 		for (i = 0; i < ncpus; ++i) {
359 			int cpuqno = COMQFIXUP(sc->cputovect[i], ncpus);
360 
361 			KKASSERT(cpuqno != 0);
362 			sc->qmap[i][0] = qno + 0;
363 			sc->qmap[i][1] = qno + 1;
364 			sc->subqueues[qno + 0].comqid = cpuqno;
365 			sc->subqueues[qno + 1].comqid = cpuqno;
366 			qno += 2;
367 		}
368 		sc->niosubqs = ncpus * 2 + 0;
369 		sc->niocomqs = ncpus + 0;
370 	} else if (sc->niosubqs >= ncpus && sc->niocomqs >= ncpus) {
371 		/*
372 		 * We have enough to give each cpu its own submission
373 		 * and completion queue.
374 		 *
375 		 * leave dumpqno and eventqno set to the admin queue.
376 		 */
377 		kprintf("nominal map 1:1 cpu\n");
378 		for (i = 0; i < ncpus; ++i) {
379 			qno = sc->cputovect[i];
380 			KKASSERT(qno != 0);
381 			sc->qmap[i][0] = qno;
382 			sc->qmap[i][1] = qno;
383 			sc->subqueues[qno].comqid = COMQFIXUP(qno, ncpus);
384 		}
385 		sc->niosubqs = ncpus;
386 		sc->niocomqs = ncpus;
387 	} else if (sc->niosubqs >= 2 && sc->niocomqs >= 2) {
388 		/*
389 		 * prioritize trying to distribute available queues to
390 		 * cpus, don't separate read and write.
391 		 *
392 		 * leave dumpqno and eventqno set to the admin queue.
393 		 */
394 		kprintf("rw-sep map (%d, %d)\n", sc->niosubqs, sc->niocomqs);
395 		for (i = 0; i < ncpus; ++i) {
396 			int cpuqno = COMQFIXUP(sc->cputovect[i], sc->niocomqs);
397 			int qno = COMQFIXUP((i + 1), sc->niosubqs);
398 
399 			KKASSERT(qno != 0);
400 			sc->qmap[i][0] = qno;		/* read */
401 			sc->qmap[i][1] = qno;		/* write */
402 			sc->subqueues[qno].comqid = cpuqno;
403 			/* do not increment qno */
404 		}
405 #if 0
406 		sc->niosubqs = 2;
407 		sc->niocomqs = 2;
408 #endif
409 	} else if (sc->niosubqs >= 2) {
410 		/*
411 		 * We have enough to have separate read and write queues.
412 		 */
413 		kprintf("basic map\n");
414 		qno = 1;
415 		for (i = 0; i < ncpus; ++i) {
416 			int cpuqno = COMQFIXUP(sc->cputovect[i], 1);
417 
418 			KKASSERT(qno != 0);
419 			sc->qmap[i][0] = qno + 0;	/* read */
420 			sc->qmap[i][1] = qno + 1;	/* write */
421 			if (i <= 0)
422 				sc->subqueues[qno + 0].comqid = cpuqno;
423 			if (i <= 1)
424 				sc->subqueues[qno + 1].comqid = cpuqno;
425 		}
426 		sc->niosubqs = 2;
427 		sc->niocomqs = 1;
428 	} else {
429 		/*
430 		 * Minimal configuration, all cpus and I/O types use the
431 		 * same queue.  Sad day.
432 		 */
433 		kprintf("minimal map\n");
434 		sc->dumpqno = 0;
435 		sc->eventqno = 0;
436 		for (i = 0; i < ncpus; ++i) {
437 			sc->qmap[i][0] = 1;
438 			sc->qmap[i][1] = 1;
439 		}
440 		sc->subqueues[1].comqid = 1;
441 		sc->niosubqs = 1;
442 		sc->niocomqs = 1;
443 	}
444 
445 	/*
446 	 * Create all I/O submission and completion queues.  The I/O
447 	 * queues start at 1 and are inclusive of niosubqs and niocomqs.
448 	 *
449 	 * NOTE: Completion queues must be created before submission queues.
450 	 *	 That is, the completion queue specified when creating a
451 	 *	 submission queue must already exist.
452 	 */
453 	error = 0;
454 	for (i = 1; i <= sc->niocomqs; ++i) {
455 		error += nvme_alloc_comqueue(sc, i);
456 		if (error) {
457 			device_printf(sc->dev, "Unable to alloc comq %d/%d\n",
458 				      i, sc->niocomqs);
459 			break;
460 		}
461 		error += nvme_create_comqueue(sc, i);
462 		if (error) {
463 			device_printf(sc->dev, "Unable to create comq %d/%d\n",
464 				      i, sc->niocomqs);
465 			++i;	/* also delete this one below */
466 			break;
467 		}
468 	}
469 	comq_err_idx = i;
470 
471 	for (i = 1; i <= sc->niosubqs; ++i) {
472 		error += nvme_alloc_subqueue(sc, i);
473 		if (error) {
474 			device_printf(sc->dev, "Unable to alloc subq %d/%d\n",
475 				      i, sc->niosubqs);
476 			break;
477 		}
478 		error += nvme_create_subqueue(sc, i);
479 		if (error) {
480 			device_printf(sc->dev, "Unable to create subq %d/%d\n",
481 				      i, sc->niosubqs);
482 			++i;	/* also delete this one below */
483 			break;
484 		}
485 	}
486 	subq_err_idx = i;
487 
488 	/*
489 	 * If we are unable to allocate and create the number of queues
490 	 * the device told us it could handle.
491 	 */
492 	if (error) {
493 		device_printf(sc->dev, "Failed to initialize device!\n");
494 		for (i = subq_err_idx - 1; i >= 1; --i) {
495 			nvme_delete_subqueue(sc, i);
496 			nvme_free_subqueue(sc, i);
497 		}
498 		for (i = comq_err_idx - 1; i >= 1; --i) {
499 			nvme_delete_comqueue(sc, i);
500 			nvme_free_comqueue(sc, i);
501 		}
502 		sc->admin_func = nvme_admin_state_failed;
503 		if (sc->niosubqs > 1 || sc->niocomqs > 1) {
504 			int trywith = 1;
505 
506 			device_printf(sc->dev,
507 				      "Retrying with fewer queues (%d/%d) "
508 				      "just in case the device lied to us\n",
509 				      trywith, trywith);
510 			if (sc->niosubqs > trywith)
511 				sc->niosubqs = trywith;
512 			if (sc->niocomqs > trywith)
513 				sc->niocomqs = trywith;
514 			goto tryagain;
515 		}
516 	} else {
517 		sc->admin_func = nvme_admin_state_identify_ns;
518 	}
519 
520 	/*
521 	 * Disable interrupt coalescing.  It is basically worthless because
522 	 * setting the threshold has no effect when time is set to 0, and the
523 	 * smallest time that can be set is 1 (== 100uS), which is too long.
524 	 * Sequential performance is destroyed (on e.g. the Intel 750).
525 	 * So kill it.
526 	 */
527 	req = nvme_get_admin_request(sc, NVME_OP_SET_FEATURES);
528 	device_printf(sc->dev, "Interrupt Coalesce: 100uS / 4 qentries\n");
529 
530 	req->cmd.setfeat.flags = NVME_FID_INTCOALESCE;
531 	req->cmd.setfeat.intcoal.thr = 0;
532 	req->cmd.setfeat.intcoal.time = 0;
533 
534 	nvme_submit_request(req);
535 	status = nvme_wait_request(req);
536 	if (status) {
537 		device_printf(sc->dev,
538 			      "Interrupt coalesce failed status=%d\n",
539 			      status);
540 	}
541 	nvme_put_request(req);
542 
543 	return 1;
544 }
545 
546 /*
547  * Identify available namespaces, iterate, and attach to disks.
548  */
549 static
550 int
551 nvme_admin_state_identify_ns(nvme_softc_t *sc)
552 {
553 	nvme_request_t *req;
554 	nvme_ident_ns_list_t *rp;
555 	int status;
556 	int i;
557 	int j;
558 
559 	if (bootverbose) {
560 		if (sc->idctlr.admin_cap & NVME_ADMIN_NSMANAGE)
561 			device_printf(sc->dev,
562 				      "Namespace management supported\n");
563 		else
564 			device_printf(sc->dev,
565 				      "Namespace management not supported\n");
566 	}
567 #if 0
568 	/*
569 	 * Identify Controllers		TODO TODO TODO
570 	 */
571 	if (sc->idctlr.admin_cap & NVME_ADMIN_NSMANAGE) {
572 		req = nvme_get_admin_request(sc, NVME_OP_IDENTIFY);
573 		req->cmd.identify.cns = NVME_CNS_ANY_CTLR_LIST;
574 		req->cmd.identify.cntid = 0;
575 		bzero(req->info, sizeof(*req->info));
576 		nvme_submit_request(req);
577 		status = nvme_wait_request(req);
578 		kprintf("nsquery status %08x\n", status);
579 
580 #if 0
581 		for (i = 0; i < req->info->ctlrlist.idcount; ++i) {
582 			kprintf("CTLR %04x\n", req->info->ctlrlist.ctlrids[i]);
583 		}
584 #endif
585 		nvme_put_request(req);
586 	}
587 #endif
588 
589 	rp = kmalloc(sizeof(*rp), M_NVME, M_WAITOK | M_ZERO);
590 	if (sc->idctlr.admin_cap & NVME_ADMIN_NSMANAGE) {
591 		/*
592 		 * Namespace management supported, query active namespaces.
593 		 */
594 		req = nvme_get_admin_request(sc, NVME_OP_IDENTIFY);
595 		req->cmd.identify.cns = NVME_CNS_ACT_NSLIST;
596 		req->cmd.identify.cntid = 0;
597 		bzero(req->info, sizeof(*req->info));
598 		nvme_submit_request(req);
599 		status = nvme_wait_request(req);
600 		kprintf("nsquery status %08x\n", status);
601 		/* XXX handle status */
602 
603 		cpu_lfence();
604 		*rp = req->info->nslist;
605 		nvme_put_request(req);
606 	} else {
607 		/*
608 		 * Namespace management not supported, assume nsids 1..N.
609 		 * (note: (i) limited to 1024).
610 		 */
611 		for (i = 1; i <= (int)sc->idctlr.ns_count && i <= 1024; ++i)
612 			rp->nsids[i-1] = i;
613 	}
614 
615 	/*
616 	 * Identify each Namespace
617 	 */
618 	for (i = 0; i < 1024; ++i) {
619 		nvme_softns_t *nsc;
620 		nvme_lba_fmt_data_t *lbafmt;
621 
622 		if (rp->nsids[i] == 0)
623 			continue;
624 		req = nvme_get_admin_request(sc, NVME_OP_IDENTIFY);
625 		req->cmd.identify.cns = NVME_CNS_ACT_NS;
626 		req->cmd.identify.cntid = 0;
627 		req->cmd.identify.head.nsid = rp->nsids[i];
628 		bzero(req->info, sizeof(*req->info));
629 		nvme_submit_request(req);
630 		status = nvme_wait_request(req);
631 		if (status != 0) {
632 			kprintf("NS FAILED %08x\n", status);
633 			continue;
634 		}
635 
636 		for (j = 0; j < NVME_MAX_NAMESPACES; ++j) {
637 			if (sc->nscary[j] &&
638 			    sc->nscary[j]->nsid == rp->nsids[i])
639 				break;
640 		}
641 		if (j == NVME_MAX_NAMESPACES) {
642 			j = i;
643 			if (sc->nscary[j] != NULL) {
644 				for (j = NVME_MAX_NAMESPACES - 1; j >= 0; --j) {
645 					if (sc->nscary[j] == NULL)
646 						break;
647 				}
648 			}
649 		}
650 		if (j < 0) {
651 			device_printf(sc->dev, "not enough room in nscary for "
652 					       "namespace %08x\n", rp->nsids[i]);
653 			nvme_put_request(req);
654 			continue;
655 		}
656 		nsc = sc->nscary[j];
657 		if (nsc == NULL) {
658 			nsc = kmalloc(sizeof(*nsc), M_NVME, M_WAITOK | M_ZERO);
659 			nsc->unit = nvme_alloc_disk_unit();
660 			sc->nscary[j] = nsc;
661 		}
662 		if (sc->nscmax <= j)
663 			sc->nscmax = j + 1;
664 		nsc->sc = sc;
665 		nsc->nsid = rp->nsids[i];
666 		nsc->state = NVME_NSC_STATE_UNATTACHED;
667 		nsc->idns = req->info->idns;
668 		bioq_init(&nsc->bioq);
669 		lockinit(&nsc->lk, "nvnsc", 0, 0);
670 
671 		nvme_put_request(req);
672 
673 		j = NVME_FLBAS_SEL_GET(nsc->idns.flbas);
674 		lbafmt = &nsc->idns.lba_fmt[j];
675 		nsc->blksize = 1 << lbafmt->sect_size;
676 
677 		/*
678 		 * Attach the namespace
679 		 */
680 		nvme_disk_attach(nsc);
681 	}
682 	kfree(rp, M_NVME);
683 
684 	sc->admin_func = nvme_admin_state_operating;
685 	return 1;
686 }
687 
688 static
689 int
690 nvme_admin_state_operating(nvme_softc_t *sc)
691 {
692 	if ((sc->admin_signal & ADMIN_SIG_PROBED) == 0) {
693 		atomic_set_int(&sc->admin_signal, ADMIN_SIG_PROBED);
694 		wakeup(&sc->admin_signal);
695 	}
696 
697 	return 0;
698 }
699 
700 static
701 int
702 nvme_admin_state_failed(nvme_softc_t *sc)
703 {
704 	if ((sc->admin_signal & ADMIN_SIG_PROBED) == 0) {
705 		atomic_set_int(&sc->admin_signal, ADMIN_SIG_PROBED);
706 		wakeup(&sc->admin_signal);
707 	}
708 
709 	return 0;
710 }
711