xref: /freebsd/sys/dev/nvmf/nvmf_transport.c (revision aa1a8ff2)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2022-2024 Chelsio Communications, Inc.
5  * Written by: John Baldwin <jhb@FreeBSD.org>
6  */
7 
8 #include <sys/param.h>
9 #include <sys/kernel.h>
10 #include <sys/limits.h>
11 #include <sys/lock.h>
12 #include <sys/malloc.h>
13 #include <sys/mbuf.h>
14 #include <sys/module.h>
15 #include <sys/refcount.h>
16 #include <sys/sysctl.h>
17 #include <sys/sx.h>
18 #include <dev/nvme/nvme.h>
19 #include <dev/nvmf/nvmf.h>
20 #include <dev/nvmf/nvmf_transport.h>
21 #include <dev/nvmf/nvmf_transport_internal.h>
22 
23 /* Transport-independent support for fabrics queue pairs and commands. */
24 
25 struct nvmf_transport {
26 	struct nvmf_transport_ops *nt_ops;
27 
28 	volatile u_int nt_active_qpairs;
29 	SLIST_ENTRY(nvmf_transport) nt_link;
30 };
31 
32 /* nvmf_transports[nvmf_trtype] is sorted by priority */
33 static SLIST_HEAD(, nvmf_transport) nvmf_transports[NVMF_TRTYPE_TCP + 1];
34 static struct sx nvmf_transports_lock;
35 
36 static MALLOC_DEFINE(M_NVMF_TRANSPORT, "nvmf_xport",
37     "NVMe over Fabrics transport");
38 
39 SYSCTL_NODE(_kern, OID_AUTO, nvmf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
40     "NVMe over Fabrics");
41 
42 static bool
43 nvmf_supported_trtype(enum nvmf_trtype trtype)
44 {
45 	return (trtype < nitems(nvmf_transports));
46 }
47 
48 struct nvmf_qpair *
49 nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller,
50     const struct nvmf_handoff_qpair_params *params,
51     nvmf_qpair_error_t *error_cb, void *error_cb_arg,
52     nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg)
53 {
54 	struct nvmf_transport *nt;
55 	struct nvmf_qpair *qp;
56 
57 	if (!nvmf_supported_trtype(trtype))
58 		return (NULL);
59 
60 	sx_slock(&nvmf_transports_lock);
61 	SLIST_FOREACH(nt, &nvmf_transports[trtype], nt_link) {
62 		qp = nt->nt_ops->allocate_qpair(controller, params);
63 		if (qp != NULL) {
64 			refcount_acquire(&nt->nt_active_qpairs);
65 			break;
66 		}
67 	}
68 	sx_sunlock(&nvmf_transports_lock);
69 	if (qp == NULL)
70 		return (NULL);
71 
72 	qp->nq_transport = nt;
73 	qp->nq_ops = nt->nt_ops;
74 	qp->nq_controller = controller;
75 	qp->nq_error = error_cb;
76 	qp->nq_error_arg = error_cb_arg;
77 	qp->nq_receive = receive_cb;
78 	qp->nq_receive_arg = receive_cb_arg;
79 	qp->nq_admin = params->admin;
80 	return (qp);
81 }
82 
83 void
84 nvmf_free_qpair(struct nvmf_qpair *qp)
85 {
86 	struct nvmf_transport *nt;
87 
88 	nt = qp->nq_transport;
89 	qp->nq_ops->free_qpair(qp);
90 	if (refcount_release(&nt->nt_active_qpairs))
91 		wakeup(nt);
92 }
93 
94 struct nvmf_capsule *
95 nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe, int how)
96 {
97 	struct nvmf_capsule *nc;
98 
99 	KASSERT(how == M_WAITOK || how == M_NOWAIT,
100 	    ("%s: invalid how", __func__));
101 	nc = qp->nq_ops->allocate_capsule(qp, how);
102 	if (nc == NULL)
103 		return (NULL);
104 
105 	nc->nc_qpair = qp;
106 	nc->nc_qe_len = sizeof(struct nvme_command);
107 	memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len);
108 
109 	/* 4.2 of NVMe base spec: Fabrics always uses SGL. */
110 	nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT);
111 	nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL);
112 	return (nc);
113 }
114 
115 struct nvmf_capsule *
116 nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe, int how)
117 {
118 	struct nvmf_capsule *nc;
119 
120 	KASSERT(how == M_WAITOK || how == M_NOWAIT,
121 	    ("%s: invalid how", __func__));
122 	nc = qp->nq_ops->allocate_capsule(qp, how);
123 	if (nc == NULL)
124 		return (NULL);
125 
126 	nc->nc_qpair = qp;
127 	nc->nc_qe_len = sizeof(struct nvme_completion);
128 	memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len);
129 	return (nc);
130 }
131 
132 int
133 nvmf_capsule_append_data(struct nvmf_capsule *nc, struct memdesc *mem,
134     size_t len, bool send, nvmf_io_complete_t *complete_cb,
135     void *cb_arg)
136 {
137 	if (nc->nc_data.io_len != 0)
138 		return (EBUSY);
139 
140 	nc->nc_send_data = send;
141 	nc->nc_data.io_mem = *mem;
142 	nc->nc_data.io_len = len;
143 	nc->nc_data.io_complete = complete_cb;
144 	nc->nc_data.io_complete_arg = cb_arg;
145 	return (0);
146 }
147 
148 void
149 nvmf_free_capsule(struct nvmf_capsule *nc)
150 {
151 	nc->nc_qpair->nq_ops->free_capsule(nc);
152 }
153 
154 int
155 nvmf_transmit_capsule(struct nvmf_capsule *nc)
156 {
157 	return (nc->nc_qpair->nq_ops->transmit_capsule(nc));
158 }
159 
160 void
161 nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error)
162 {
163 	if (nc->nc_data.io_len != 0)
164 		nvmf_complete_io_request(&nc->nc_data, 0, error);
165 }
166 
167 void *
168 nvmf_capsule_sqe(struct nvmf_capsule *nc)
169 {
170 	KASSERT(nc->nc_qe_len == sizeof(struct nvme_command),
171 	    ("%s: capsule %p is not a command capsule", __func__, nc));
172 	return (&nc->nc_sqe);
173 }
174 
175 void *
176 nvmf_capsule_cqe(struct nvmf_capsule *nc)
177 {
178 	KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion),
179 	    ("%s: capsule %p is not a response capsule", __func__, nc));
180 	return (&nc->nc_cqe);
181 }
182 
183 uint8_t
184 nvmf_validate_command_capsule(struct nvmf_capsule *nc)
185 {
186 	KASSERT(nc->nc_qe_len == sizeof(struct nvme_command),
187 	    ("%s: capsule %p is not a command capsule", __func__, nc));
188 
189 	if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL)
190 		return (NVME_SC_INVALID_FIELD);
191 
192 	return (nc->nc_qpair->nq_ops->validate_command_capsule(nc));
193 }
194 
195 size_t
196 nvmf_capsule_data_len(const struct nvmf_capsule *nc)
197 {
198 	return (nc->nc_qpair->nq_ops->capsule_data_len(nc));
199 }
200 
201 int
202 nvmf_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
203     struct memdesc *mem, size_t len, nvmf_io_complete_t *complete_cb,
204     void *cb_arg)
205 {
206 	struct nvmf_io_request io;
207 
208 	io.io_mem = *mem;
209 	io.io_len = len;
210 	io.io_complete = complete_cb;
211 	io.io_complete_arg = cb_arg;
212 	return (nc->nc_qpair->nq_ops->receive_controller_data(nc, data_offset,
213 	    &io));
214 }
215 
216 u_int
217 nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset,
218     struct mbuf *m, size_t len)
219 {
220 	MPASS(m_length(m, NULL) == len);
221 	return (nc->nc_qpair->nq_ops->send_controller_data(nc, data_offset, m,
222 	    len));
223 }
224 
225 int
226 nvmf_transport_module_handler(struct module *mod, int what, void *arg)
227 {
228 	struct nvmf_transport_ops *ops = arg;
229 	struct nvmf_transport *nt, *nt2, *prev;
230 	int error;
231 
232 	switch (what) {
233 	case MOD_LOAD:
234 		if (!nvmf_supported_trtype(ops->trtype)) {
235 			printf("NVMF: Unsupported transport %u", ops->trtype);
236 			return (EINVAL);
237 		}
238 
239 		nt = malloc(sizeof(*nt), M_NVMF_TRANSPORT, M_WAITOK | M_ZERO);
240 		nt->nt_ops = arg;
241 
242 		sx_xlock(&nvmf_transports_lock);
243 		if (SLIST_EMPTY(&nvmf_transports[ops->trtype])) {
244 			SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], nt,
245 			    nt_link);
246 		} else {
247 			prev = NULL;
248 			SLIST_FOREACH(nt2, &nvmf_transports[ops->trtype],
249 			    nt_link) {
250 				if (ops->priority > nt2->nt_ops->priority)
251 					break;
252 				prev = nt2;
253 			}
254 			if (prev == NULL)
255 				SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype],
256 				    nt, nt_link);
257 			else
258 				SLIST_INSERT_AFTER(prev, nt, nt_link);
259 		}
260 		sx_xunlock(&nvmf_transports_lock);
261 		return (0);
262 
263 	case MOD_QUIESCE:
264 		if (!nvmf_supported_trtype(ops->trtype))
265 			return (0);
266 
267 		sx_slock(&nvmf_transports_lock);
268 		SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) {
269 			if (nt->nt_ops == ops)
270 				break;
271 		}
272 		if (nt == NULL) {
273 			sx_sunlock(&nvmf_transports_lock);
274 			return (0);
275 		}
276 		if (nt->nt_active_qpairs != 0) {
277 			sx_sunlock(&nvmf_transports_lock);
278 			return (EBUSY);
279 		}
280 		sx_sunlock(&nvmf_transports_lock);
281 		return (0);
282 
283 	case MOD_UNLOAD:
284 		if (!nvmf_supported_trtype(ops->trtype))
285 			return (0);
286 
287 		sx_xlock(&nvmf_transports_lock);
288 		prev = NULL;
289 		SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) {
290 			if (nt->nt_ops == ops)
291 				break;
292 			prev = nt;
293 		}
294 		if (nt == NULL) {
295 			KASSERT(nt->nt_active_qpairs == 0,
296 			    ("unregistered transport has connections"));
297 			sx_xunlock(&nvmf_transports_lock);
298 			return (0);
299 		}
300 
301 		if (prev == NULL)
302 			SLIST_REMOVE_HEAD(&nvmf_transports[ops->trtype],
303 			    nt_link);
304 		else
305 			SLIST_REMOVE_AFTER(prev, nt_link);
306 
307 		error = 0;
308 		while (nt->nt_active_qpairs != 0 && error == 0)
309 			error = sx_sleep(nt, &nvmf_transports_lock, PCATCH,
310 			    "nftunld", 0);
311 		sx_xunlock(&nvmf_transports_lock);
312 		if (error != 0)
313 			return (error);
314 		free(nt, M_NVMF_TRANSPORT);
315 		return (0);
316 
317 	default:
318 		return (EOPNOTSUPP);
319 	}
320 }
321 
322 static int
323 nvmf_transport_modevent(module_t mod __unused, int what, void *arg __unused)
324 {
325 	switch (what) {
326 	case MOD_LOAD:
327 		for (u_int i = 0; i < nitems(nvmf_transports); i++)
328 			SLIST_INIT(&nvmf_transports[i]);
329 		sx_init(&nvmf_transports_lock, "nvmf transports");
330 		return (0);
331 	default:
332 		return (EOPNOTSUPP);
333 	}
334 }
335 
336 static moduledata_t nvmf_transport_mod = {
337 	"nvmf_transport",
338 	nvmf_transport_modevent,
339 	0
340 };
341 
342 DECLARE_MODULE(nvmf_transport, nvmf_transport_mod, SI_SUB_DRIVERS,
343     SI_ORDER_FIRST);
344 MODULE_VERSION(nvmf_transport, 1);
345