xref: /freebsd/sys/dev/ntb/if_ntb/if_ntb.c (revision a0ee8cc6)
1 /*-
2  * Copyright (C) 2013 Intel Corporation
3  * Copyright (C) 2015 EMC Corporation
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include <sys/param.h>
32 #include <sys/kernel.h>
33 #include <sys/systm.h>
34 #include <sys/bitset.h>
35 #include <sys/bus.h>
36 #include <sys/ktr.h>
37 #include <sys/limits.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/module.h>
41 #include <sys/mutex.h>
42 #include <sys/queue.h>
43 #include <sys/socket.h>
44 #include <sys/sockio.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 
48 #include <net/if.h>
49 #include <net/if_media.h>
50 #include <net/if_types.h>
51 #include <net/if_var.h>
52 #include <net/bpf.h>
53 #include <net/ethernet.h>
54 
55 #include <vm/vm.h>
56 #include <vm/pmap.h>
57 
58 #include <machine/bus.h>
59 #include <machine/cpufunc.h>
60 #include <machine/pmap.h>
61 
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 
65 #include "../ntb_hw/ntb_hw.h"
66 
67 /*
68  * The Non-Transparent Bridge (NTB) is a device on some Intel processors that
69  * allows you to connect two systems using a PCI-e link.
70  *
71  * This module contains a protocol for sending and receiving messages, and
72  * exposes that protocol through a simulated ethernet device called ntb.
73  *
74  * NOTE: Much of the code in this module is shared with Linux. Any patches may
75  * be picked up and redistributed in Linux with a dual GPL/BSD license.
76  */
77 
78 #define QP_SETSIZE	64
79 BITSET_DEFINE(_qpset, QP_SETSIZE);
80 #define test_bit(pos, addr)	BIT_ISSET(QP_SETSIZE, (pos), (addr))
81 #define set_bit(pos, addr)	BIT_SET(QP_SETSIZE, (pos), (addr))
82 #define clear_bit(pos, addr)	BIT_CLR(QP_SETSIZE, (pos), (addr))
83 #define ffs_bit(addr)		BIT_FFS(QP_SETSIZE, (addr))
84 
85 #define KTR_NTB KTR_SPARE3
86 
87 #define NTB_TRANSPORT_VERSION	4
88 #define NTB_RX_MAX_PKTS		64
89 #define	NTB_RXQ_SIZE		300
90 
91 enum ntb_link_event {
92 	NTB_LINK_DOWN = 0,
93 	NTB_LINK_UP,
94 };
95 
96 static SYSCTL_NODE(_hw, OID_AUTO, if_ntb, CTLFLAG_RW, 0, "if_ntb");
97 
98 static unsigned g_if_ntb_debug_level;
99 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, debug_level, CTLFLAG_RWTUN,
100     &g_if_ntb_debug_level, 0, "if_ntb log level -- higher is more verbose");
101 #define ntb_printf(lvl, ...) do {			\
102 	if ((lvl) <= g_if_ntb_debug_level) {		\
103 		if_printf(nt->ifp, __VA_ARGS__);	\
104 	}						\
105 } while (0)
106 
107 static unsigned transport_mtu = IP_MAXPACKET + ETHER_HDR_LEN + ETHER_CRC_LEN;
108 
109 static uint64_t max_mw_size;
110 SYSCTL_UQUAD(_hw_if_ntb, OID_AUTO, max_mw_size, CTLFLAG_RDTUN, &max_mw_size, 0,
111     "If enabled (non-zero), limit the size of large memory windows. "
112     "Both sides of the NTB MUST set the same value here.");
113 
114 static unsigned max_num_clients;
115 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, max_num_clients, CTLFLAG_RDTUN,
116     &max_num_clients, 0, "Maximum number of NTB transport clients.  "
117     "0 (default) - use all available NTB memory windows; "
118     "positive integer N - Limit to N memory windows.");
119 
120 static unsigned enable_xeon_watchdog;
121 SYSCTL_UINT(_hw_if_ntb, OID_AUTO, enable_xeon_watchdog, CTLFLAG_RDTUN,
122     &enable_xeon_watchdog, 0, "If non-zero, write a register every second to "
123     "keep a watchdog from tearing down the NTB link");
124 
125 STAILQ_HEAD(ntb_queue_list, ntb_queue_entry);
126 
127 typedef uint32_t ntb_q_idx_t;
128 
129 struct ntb_queue_entry {
130 	/* ntb_queue list reference */
131 	STAILQ_ENTRY(ntb_queue_entry) entry;
132 
133 	/* info on data to be transferred */
134 	void		*cb_data;
135 	void		*buf;
136 	uint32_t	len;
137 	uint32_t	flags;
138 
139 	struct ntb_transport_qp		*qp;
140 	struct ntb_payload_header	*x_hdr;
141 	ntb_q_idx_t	index;
142 };
143 
144 struct ntb_rx_info {
145 	ntb_q_idx_t	entry;
146 };
147 
148 struct ntb_transport_qp {
149 	struct ntb_transport_ctx	*transport;
150 	struct ntb_softc	*ntb;
151 
152 	void			*cb_data;
153 
154 	bool			client_ready;
155 	volatile bool		link_is_up;
156 	uint8_t			qp_num;	/* Only 64 QPs are allowed.  0-63 */
157 
158 	struct ntb_rx_info	*rx_info;
159 	struct ntb_rx_info	*remote_rx_info;
160 
161 	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
162 	    void *data, int len);
163 	struct ntb_queue_list	tx_free_q;
164 	struct mtx		ntb_tx_free_q_lock;
165 	caddr_t			tx_mw;
166 	bus_addr_t		tx_mw_phys;
167 	ntb_q_idx_t		tx_index;
168 	ntb_q_idx_t		tx_max_entry;
169 	uint64_t		tx_max_frame;
170 
171 	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
172 	    void *data, int len);
173 	struct ntb_queue_list	rx_post_q;
174 	struct ntb_queue_list	rx_pend_q;
175 	/* ntb_rx_q_lock: synchronize access to rx_XXXX_q */
176 	struct mtx		ntb_rx_q_lock;
177 	struct task		rx_completion_task;
178 	struct task		rxc_db_work;
179 	caddr_t			rx_buff;
180 	ntb_q_idx_t		rx_index;
181 	ntb_q_idx_t		rx_max_entry;
182 	uint64_t		rx_max_frame;
183 
184 	void (*event_handler)(void *data, enum ntb_link_event status);
185 	struct callout		link_work;
186 	struct callout		queue_full;
187 	struct callout		rx_full;
188 
189 	uint64_t		last_rx_no_buf;
190 
191 	/* Stats */
192 	uint64_t		rx_bytes;
193 	uint64_t		rx_pkts;
194 	uint64_t		rx_ring_empty;
195 	uint64_t		rx_err_no_buf;
196 	uint64_t		rx_err_oflow;
197 	uint64_t		rx_err_ver;
198 	uint64_t		tx_bytes;
199 	uint64_t		tx_pkts;
200 	uint64_t		tx_ring_full;
201 	uint64_t		tx_err_no_buf;
202 };
203 
204 struct ntb_queue_handlers {
205 	void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data,
206 	    void *data, int len);
207 	void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data,
208 	    void *data, int len);
209 	void (*event_handler)(void *data, enum ntb_link_event status);
210 };
211 
212 struct ntb_transport_mw {
213 	vm_paddr_t	phys_addr;
214 	size_t		phys_size;
215 	size_t		xlat_align;
216 	size_t		xlat_align_size;
217 	bus_addr_t	addr_limit;
218 	/* Tx buff is off vbase / phys_addr */
219 	caddr_t		vbase;
220 	size_t		xlat_size;
221 	size_t		buff_size;
222 	/* Rx buff is off virt_addr / dma_addr */
223 	caddr_t		virt_addr;
224 	bus_addr_t	dma_addr;
225 };
226 
227 struct ntb_transport_ctx {
228 	struct ntb_softc	*ntb;
229 	struct ifnet		*ifp;
230 	struct ntb_transport_mw	mw_vec[NTB_MAX_NUM_MW];
231 	struct ntb_transport_qp	*qp_vec;
232 	struct _qpset		qp_bitmap;
233 	struct _qpset		qp_bitmap_free;
234 	unsigned		mw_count;
235 	unsigned		qp_count;
236 	volatile bool		link_is_up;
237 	struct callout		link_work;
238 	struct callout		link_watchdog;
239 	struct task		link_cleanup;
240 	uint64_t		bufsize;
241 	u_char			eaddr[ETHER_ADDR_LEN];
242 	struct mtx		tx_lock;
243 	struct mtx		rx_lock;
244 
245 	/* The hardcoded single queuepair in ntb_setup_interface() */
246 	struct ntb_transport_qp *qp;
247 };
248 
249 static struct ntb_transport_ctx net_softc;
250 
251 enum {
252 	IF_NTB_DESC_DONE_FLAG = 1 << 0,
253 	IF_NTB_LINK_DOWN_FLAG = 1 << 1,
254 };
255 
256 struct ntb_payload_header {
257 	ntb_q_idx_t ver;
258 	uint32_t len;
259 	uint32_t flags;
260 };
261 
262 enum {
263 	/*
264 	 * The order of this enum is part of the if_ntb remote protocol.  Do
265 	 * not reorder without bumping protocol version (and it's probably best
266 	 * to keep the protocol in lock-step with the Linux NTB driver.
267 	 */
268 	IF_NTB_VERSION = 0,
269 	IF_NTB_QP_LINKS,
270 	IF_NTB_NUM_QPS,
271 	IF_NTB_NUM_MWS,
272 	/*
273 	 * N.B.: transport_link_work assumes MW1 enums = MW0 + 2.
274 	 */
275 	IF_NTB_MW0_SZ_HIGH,
276 	IF_NTB_MW0_SZ_LOW,
277 	IF_NTB_MW1_SZ_HIGH,
278 	IF_NTB_MW1_SZ_LOW,
279 	IF_NTB_MAX_SPAD,
280 
281 	/*
282 	 * Some NTB-using hardware have a watchdog to work around NTB hangs; if
283 	 * a register or doorbell isn't written every few seconds, the link is
284 	 * torn down.  Write an otherwise unused register every few seconds to
285 	 * work around this watchdog.
286 	 */
287 	IF_NTB_WATCHDOG_SPAD = 15
288 };
289 CTASSERT(IF_NTB_WATCHDOG_SPAD < XEON_SPAD_COUNT &&
290     IF_NTB_WATCHDOG_SPAD < ATOM_SPAD_COUNT);
291 
292 #define QP_TO_MW(nt, qp)	((qp) % nt->mw_count)
293 #define NTB_QP_DEF_NUM_ENTRIES	100
294 #define NTB_LINK_DOWN_TIMEOUT	10
295 
296 static int ntb_handle_module_events(struct module *m, int what, void *arg);
297 static int ntb_setup_interface(void);
298 static int ntb_teardown_interface(void);
299 static void ntb_net_init(void *arg);
300 static int ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data);
301 static void ntb_start(struct ifnet *ifp);
302 static void ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data,
303     void *data, int len);
304 static void ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data,
305     void *data, int len);
306 static void ntb_net_event_handler(void *data, enum ntb_link_event status);
307 static int ntb_transport_probe(struct ntb_softc *ntb);
308 static void ntb_transport_free(struct ntb_transport_ctx *);
309 static void ntb_transport_init_queue(struct ntb_transport_ctx *nt,
310     unsigned int qp_num);
311 static void ntb_transport_free_queue(struct ntb_transport_qp *qp);
312 static struct ntb_transport_qp *ntb_transport_create_queue(void *data,
313     struct ntb_softc *pdev, const struct ntb_queue_handlers *handlers);
314 static void ntb_transport_link_up(struct ntb_transport_qp *qp);
315 static int ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb,
316     void *data, unsigned int len);
317 static int ntb_process_tx(struct ntb_transport_qp *qp,
318     struct ntb_queue_entry *entry);
319 static void ntb_memcpy_tx(struct ntb_transport_qp *qp,
320     struct ntb_queue_entry *entry, void *offset);
321 static void ntb_qp_full(void *arg);
322 static void ntb_transport_rxc_db(void *arg, int pending);
323 static int ntb_process_rxc(struct ntb_transport_qp *qp);
324 static void ntb_memcpy_rx(struct ntb_transport_qp *qp,
325     struct ntb_queue_entry *entry, void *offset);
326 static inline void ntb_rx_copy_callback(struct ntb_transport_qp *qp,
327     void *data);
328 static void ntb_complete_rxc(void *arg, int pending);
329 static void ntb_transport_doorbell_callback(void *data, uint32_t vector);
330 static void ntb_transport_event_callback(void *data);
331 static void ntb_transport_link_work(void *arg);
332 static int ntb_set_mw(struct ntb_transport_ctx *, int num_mw, size_t size);
333 static void ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw);
334 static int ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt,
335     unsigned int qp_num);
336 static void ntb_qp_link_work(void *arg);
337 static void ntb_transport_link_cleanup(struct ntb_transport_ctx *nt);
338 static void ntb_transport_link_cleanup_work(void *, int);
339 static void ntb_qp_link_down(struct ntb_transport_qp *qp);
340 static void ntb_qp_link_down_reset(struct ntb_transport_qp *qp);
341 static void ntb_qp_link_cleanup(struct ntb_transport_qp *qp);
342 static void ntb_transport_link_down(struct ntb_transport_qp *qp);
343 static void ntb_send_link_down(struct ntb_transport_qp *qp);
344 static void ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
345     struct ntb_queue_list *list);
346 static struct ntb_queue_entry *ntb_list_rm(struct mtx *lock,
347     struct ntb_queue_list *list);
348 static struct ntb_queue_entry *ntb_list_mv(struct mtx *lock,
349     struct ntb_queue_list *from, struct ntb_queue_list *to);
350 static void create_random_local_eui48(u_char *eaddr);
351 static unsigned int ntb_transport_max_size(struct ntb_transport_qp *qp);
352 static void xeon_link_watchdog_hb(void *);
353 
354 static const struct ntb_ctx_ops ntb_transport_ops = {
355 	.link_event = ntb_transport_event_callback,
356 	.db_event = ntb_transport_doorbell_callback,
357 };
358 
359 MALLOC_DEFINE(M_NTB_IF, "if_ntb", "ntb network driver");
360 
361 static inline void
362 iowrite32(uint32_t val, void *addr)
363 {
364 
365 	bus_space_write_4(X86_BUS_SPACE_MEM, 0/* HACK */, (uintptr_t)addr,
366 	    val);
367 }
368 
369 /* Module setup and teardown */
370 static int
371 ntb_handle_module_events(struct module *m, int what, void *arg)
372 {
373 	int err = 0;
374 
375 	switch (what) {
376 	case MOD_LOAD:
377 		err = ntb_setup_interface();
378 		break;
379 	case MOD_UNLOAD:
380 		err = ntb_teardown_interface();
381 		break;
382 	default:
383 		err = EOPNOTSUPP;
384 		break;
385 	}
386 	return (err);
387 }
388 
389 static moduledata_t if_ntb_mod = {
390 	"if_ntb",
391 	ntb_handle_module_events,
392 	NULL
393 };
394 
395 DECLARE_MODULE(if_ntb, if_ntb_mod, SI_SUB_KLD, SI_ORDER_ANY);
396 MODULE_DEPEND(if_ntb, ntb_hw, 1, 1, 1);
397 
398 static int
399 ntb_setup_interface(void)
400 {
401 	struct ifnet *ifp;
402 	struct ntb_queue_handlers handlers = { ntb_net_rx_handler,
403 	    ntb_net_tx_handler, ntb_net_event_handler };
404 	int rc;
405 
406 	net_softc.ntb = devclass_get_softc(devclass_find("ntb_hw"), 0);
407 	if (net_softc.ntb == NULL) {
408 		printf("ntb: Cannot find devclass\n");
409 		return (ENXIO);
410 	}
411 
412 	ifp = net_softc.ifp = if_alloc(IFT_ETHER);
413 	if (ifp == NULL) {
414 		ntb_transport_free(&net_softc);
415 		printf("ntb: Cannot allocate ifnet structure\n");
416 		return (ENOMEM);
417 	}
418 	if_initname(ifp, "ntb", 0);
419 
420 	rc = ntb_transport_probe(net_softc.ntb);
421 	if (rc != 0) {
422 		printf("ntb: Cannot init transport: %d\n", rc);
423 		if_free(net_softc.ifp);
424 		return (rc);
425 	}
426 
427 	net_softc.qp = ntb_transport_create_queue(ifp, net_softc.ntb,
428 	    &handlers);
429 	ifp->if_init = ntb_net_init;
430 	ifp->if_softc = &net_softc;
431 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX;
432 	ifp->if_ioctl = ntb_ioctl;
433 	ifp->if_start = ntb_start;
434 	IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN);
435 	ifp->if_snd.ifq_drv_maxlen = IFQ_MAXLEN;
436 	IFQ_SET_READY(&ifp->if_snd);
437 	create_random_local_eui48(net_softc.eaddr);
438 	ether_ifattach(ifp, net_softc.eaddr);
439 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU;
440 	ifp->if_capenable = ifp->if_capabilities;
441 	ifp->if_mtu = ntb_transport_max_size(net_softc.qp) - ETHER_HDR_LEN -
442 	    ETHER_CRC_LEN;
443 
444 	ntb_transport_link_up(net_softc.qp);
445 	net_softc.bufsize = ntb_transport_max_size(net_softc.qp) +
446 	    sizeof(struct ether_header);
447 	return (0);
448 }
449 
450 static int
451 ntb_teardown_interface(void)
452 {
453 
454 	if (net_softc.qp != NULL) {
455 		ntb_transport_link_down(net_softc.qp);
456 
457 		ntb_transport_free_queue(net_softc.qp);
458 		ntb_transport_free(&net_softc);
459 	}
460 
461 	if (net_softc.ifp != NULL) {
462 		ether_ifdetach(net_softc.ifp);
463 		if_free(net_softc.ifp);
464 		net_softc.ifp = NULL;
465 	}
466 
467 	return (0);
468 }
469 
470 /* Network device interface */
471 
472 static void
473 ntb_net_init(void *arg)
474 {
475 	struct ntb_transport_ctx *ntb_softc = arg;
476 	struct ifnet *ifp = ntb_softc->ifp;
477 
478 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
479 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
480 	ifp->if_flags |= IFF_UP;
481 	if_link_state_change(ifp, LINK_STATE_UP);
482 }
483 
484 static int
485 ntb_ioctl(struct ifnet *ifp, u_long command, caddr_t data)
486 {
487 	struct ntb_transport_ctx *nt = ifp->if_softc;
488 	struct ifreq *ifr = (struct ifreq *)data;
489 	int error = 0;
490 
491 	switch (command) {
492 	case SIOCSIFMTU:
493 	    {
494 		if (ifr->ifr_mtu > ntb_transport_max_size(nt->qp) -
495 		    ETHER_HDR_LEN - ETHER_CRC_LEN) {
496 			error = EINVAL;
497 			break;
498 		}
499 
500 		ifp->if_mtu = ifr->ifr_mtu;
501 		break;
502 	    }
503 	default:
504 		error = ether_ioctl(ifp, command, data);
505 		break;
506 	}
507 
508 	return (error);
509 }
510 
511 
512 static void
513 ntb_start(struct ifnet *ifp)
514 {
515 	struct mbuf *m_head;
516 	struct ntb_transport_ctx *nt = ifp->if_softc;
517 	int rc;
518 
519 	mtx_lock(&nt->tx_lock);
520 	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
521 	CTR0(KTR_NTB, "TX: ntb_start");
522 	while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
523 		IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
524 		CTR1(KTR_NTB, "TX: start mbuf %p", m_head);
525 		rc = ntb_transport_tx_enqueue(nt->qp, m_head, m_head,
526 			     m_length(m_head, NULL));
527 		if (rc != 0) {
528 			CTR1(KTR_NTB,
529 			    "TX: could not tx mbuf %p. Returning to snd q",
530 			    m_head);
531 			if (rc == EAGAIN) {
532 				ifp->if_drv_flags |= IFF_DRV_OACTIVE;
533 				IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
534 				callout_reset(&nt->qp->queue_full, hz / 1000,
535 				    ntb_qp_full, ifp);
536 			}
537 			break;
538 		}
539 
540 	}
541 	mtx_unlock(&nt->tx_lock);
542 }
543 
544 /* Network Device Callbacks */
545 static void
546 ntb_net_tx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
547     int len)
548 {
549 
550 	m_freem(data);
551 	CTR1(KTR_NTB, "TX: tx_handler freeing mbuf %p", data);
552 }
553 
554 static void
555 ntb_net_rx_handler(struct ntb_transport_qp *qp, void *qp_data, void *data,
556     int len)
557 {
558 	struct mbuf *m = data;
559 	struct ifnet *ifp = qp_data;
560 
561 	CTR0(KTR_NTB, "RX: rx handler");
562 	(*ifp->if_input)(ifp, m);
563 }
564 
565 static void
566 ntb_net_event_handler(void *data, enum ntb_link_event status)
567 {
568 	struct ifnet *ifp;
569 
570 	ifp = data;
571 	(void)ifp;
572 
573 	/* XXX The Linux driver munges with the carrier status here. */
574 
575 	switch (status) {
576 	case NTB_LINK_DOWN:
577 		break;
578 	case NTB_LINK_UP:
579 		break;
580 	default:
581 		panic("Bogus ntb_link_event %u\n", status);
582 	}
583 }
584 
585 /* Transport Init and teardown */
586 
587 static void
588 xeon_link_watchdog_hb(void *arg)
589 {
590 	struct ntb_transport_ctx *nt;
591 
592 	nt = arg;
593 	ntb_spad_write(nt->ntb, IF_NTB_WATCHDOG_SPAD, 0);
594 	callout_reset(&nt->link_watchdog, 1 * hz, xeon_link_watchdog_hb, nt);
595 }
596 
597 static int
598 ntb_transport_probe(struct ntb_softc *ntb)
599 {
600 	struct ntb_transport_ctx *nt = &net_softc;
601 	struct ntb_transport_mw *mw;
602 	uint64_t qp_bitmap;
603 	int rc;
604 	unsigned i;
605 
606 	nt->mw_count = ntb_mw_count(ntb);
607 	for (i = 0; i < nt->mw_count; i++) {
608 		mw = &nt->mw_vec[i];
609 
610 		rc = ntb_mw_get_range(ntb, i, &mw->phys_addr, &mw->vbase,
611 		    &mw->phys_size, &mw->xlat_align, &mw->xlat_align_size,
612 		    &mw->addr_limit);
613 		if (rc != 0)
614 			goto err;
615 
616 		mw->buff_size = 0;
617 		mw->xlat_size = 0;
618 		mw->virt_addr = NULL;
619 		mw->dma_addr = 0;
620 	}
621 
622 	qp_bitmap = ntb_db_valid_mask(ntb);
623 	nt->qp_count = flsll(qp_bitmap);
624 	KASSERT(nt->qp_count != 0, ("bogus db bitmap"));
625 	nt->qp_count -= 1;
626 
627 	if (max_num_clients != 0 && max_num_clients < nt->qp_count)
628 		nt->qp_count = max_num_clients;
629 	else if (nt->mw_count < nt->qp_count)
630 		nt->qp_count = nt->mw_count;
631 	KASSERT(nt->qp_count <= QP_SETSIZE, ("invalid qp_count"));
632 
633 	mtx_init(&nt->tx_lock, "ntb transport tx", NULL, MTX_DEF);
634 	mtx_init(&nt->rx_lock, "ntb transport rx", NULL, MTX_DEF);
635 
636 	nt->qp_vec = malloc(nt->qp_count * sizeof(*nt->qp_vec), M_NTB_IF,
637 	    M_WAITOK | M_ZERO);
638 
639 	for (i = 0; i < nt->qp_count; i++) {
640 		set_bit(i, &nt->qp_bitmap);
641 		set_bit(i, &nt->qp_bitmap_free);
642 		ntb_transport_init_queue(nt, i);
643 	}
644 
645 	callout_init(&nt->link_work, 0);
646 	callout_init(&nt->link_watchdog, 0);
647 	TASK_INIT(&nt->link_cleanup, 0, ntb_transport_link_cleanup_work, nt);
648 
649 	rc = ntb_set_ctx(ntb, nt, &ntb_transport_ops);
650 	if (rc != 0)
651 		goto err;
652 
653 	nt->link_is_up = false;
654 	ntb_link_enable(ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
655 	ntb_link_event(ntb);
656 
657 	callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
658 	if (enable_xeon_watchdog != 0)
659 		callout_reset(&nt->link_watchdog, 0, xeon_link_watchdog_hb, nt);
660 	return (0);
661 
662 err:
663 	free(nt->qp_vec, M_NTB_IF);
664 	nt->qp_vec = NULL;
665 	return (rc);
666 }
667 
668 static void
669 ntb_transport_free(struct ntb_transport_ctx *nt)
670 {
671 	struct ntb_softc *ntb = nt->ntb;
672 	struct _qpset qp_bitmap_alloc;
673 	uint8_t i;
674 
675 	ntb_transport_link_cleanup(nt);
676 	taskqueue_drain(taskqueue_swi, &nt->link_cleanup);
677 	callout_drain(&nt->link_work);
678 	callout_drain(&nt->link_watchdog);
679 
680 	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
681 	BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
682 
683 	/* Verify that all the QPs are freed */
684 	for (i = 0; i < nt->qp_count; i++)
685 		if (test_bit(i, &qp_bitmap_alloc))
686 			ntb_transport_free_queue(&nt->qp_vec[i]);
687 
688 	ntb_link_disable(ntb);
689 	ntb_clear_ctx(ntb);
690 
691 	for (i = 0; i < nt->mw_count; i++)
692 		ntb_free_mw(nt, i);
693 
694 	free(nt->qp_vec, M_NTB_IF);
695 }
696 
697 static void
698 ntb_transport_init_queue(struct ntb_transport_ctx *nt, unsigned int qp_num)
699 {
700 	struct ntb_transport_mw *mw;
701 	struct ntb_transport_qp *qp;
702 	vm_paddr_t mw_base;
703 	uint64_t mw_size, qp_offset;
704 	size_t tx_size;
705 	unsigned num_qps_mw, mw_num, mw_count;
706 
707 	mw_count = nt->mw_count;
708 	mw_num = QP_TO_MW(nt, qp_num);
709 	mw = &nt->mw_vec[mw_num];
710 
711 	qp = &nt->qp_vec[qp_num];
712 	qp->qp_num = qp_num;
713 	qp->transport = nt;
714 	qp->ntb = nt->ntb;
715 	qp->client_ready = false;
716 	qp->event_handler = NULL;
717 	ntb_qp_link_down_reset(qp);
718 
719 	if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
720 		num_qps_mw = nt->qp_count / mw_count + 1;
721 	else
722 		num_qps_mw = nt->qp_count / mw_count;
723 
724 	mw_base = mw->phys_addr;
725 	mw_size = mw->phys_size;
726 
727 	tx_size = mw_size / num_qps_mw;
728 	qp_offset = tx_size * (qp_num / mw_count);
729 
730 	qp->tx_mw = mw->vbase + qp_offset;
731 	KASSERT(qp->tx_mw != NULL, ("uh oh?"));
732 
733 	/* XXX Assumes that a vm_paddr_t is equivalent to bus_addr_t */
734 	qp->tx_mw_phys = mw_base + qp_offset;
735 	KASSERT(qp->tx_mw_phys != 0, ("uh oh?"));
736 
737 	tx_size -= sizeof(struct ntb_rx_info);
738 	qp->rx_info = (void *)(qp->tx_mw + tx_size);
739 
740 	/* Due to house-keeping, there must be at least 2 buffs */
741 	qp->tx_max_frame = qmin(tx_size / 2,
742 	    transport_mtu + sizeof(struct ntb_payload_header));
743 	qp->tx_max_entry = tx_size / qp->tx_max_frame;
744 
745 	callout_init(&qp->link_work, 0);
746 	callout_init(&qp->queue_full, 1);
747 	callout_init(&qp->rx_full, 1);
748 
749 	mtx_init(&qp->ntb_rx_q_lock, "ntb rx q", NULL, MTX_SPIN);
750 	mtx_init(&qp->ntb_tx_free_q_lock, "ntb tx free q", NULL, MTX_SPIN);
751 	TASK_INIT(&qp->rx_completion_task, 0, ntb_complete_rxc, qp);
752 	TASK_INIT(&qp->rxc_db_work, 0, ntb_transport_rxc_db, qp);
753 
754 	STAILQ_INIT(&qp->rx_post_q);
755 	STAILQ_INIT(&qp->rx_pend_q);
756 	STAILQ_INIT(&qp->tx_free_q);
757 
758 	callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
759 }
760 
761 static void
762 ntb_transport_free_queue(struct ntb_transport_qp *qp)
763 {
764 	struct ntb_queue_entry *entry;
765 
766 	if (qp == NULL)
767 		return;
768 
769 	callout_drain(&qp->link_work);
770 
771 	ntb_db_set_mask(qp->ntb, 1ull << qp->qp_num);
772 	taskqueue_drain(taskqueue_swi, &qp->rxc_db_work);
773 	taskqueue_drain(taskqueue_swi, &qp->rx_completion_task);
774 
775 	qp->cb_data = NULL;
776 	qp->rx_handler = NULL;
777 	qp->tx_handler = NULL;
778 	qp->event_handler = NULL;
779 
780 	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_pend_q)))
781 		free(entry, M_NTB_IF);
782 
783 	while ((entry = ntb_list_rm(&qp->ntb_rx_q_lock, &qp->rx_post_q)))
784 		free(entry, M_NTB_IF);
785 
786 	while ((entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q)))
787 		free(entry, M_NTB_IF);
788 
789 	set_bit(qp->qp_num, &qp->transport->qp_bitmap_free);
790 }
791 
792 /**
793  * ntb_transport_create_queue - Create a new NTB transport layer queue
794  * @rx_handler: receive callback function
795  * @tx_handler: transmit callback function
796  * @event_handler: event callback function
797  *
798  * Create a new NTB transport layer queue and provide the queue with a callback
799  * routine for both transmit and receive.  The receive callback routine will be
800  * used to pass up data when the transport has received it on the queue.   The
801  * transmit callback routine will be called when the transport has completed the
802  * transmission of the data on the queue and the data is ready to be freed.
803  *
804  * RETURNS: pointer to newly created ntb_queue, NULL on error.
805  */
806 static struct ntb_transport_qp *
807 ntb_transport_create_queue(void *data, struct ntb_softc *ntb,
808     const struct ntb_queue_handlers *handlers)
809 {
810 	struct ntb_queue_entry *entry;
811 	struct ntb_transport_qp *qp;
812 	struct ntb_transport_ctx *nt;
813 	unsigned int free_queue;
814 	int i;
815 
816 	nt = ntb_get_ctx(ntb, NULL);
817 	KASSERT(nt != NULL, ("bogus"));
818 
819 	free_queue = ffs_bit(&nt->qp_bitmap);
820 	if (free_queue == 0)
821 		return (NULL);
822 
823 	/* decrement free_queue to make it zero based */
824 	free_queue--;
825 
826 	qp = &nt->qp_vec[free_queue];
827 	clear_bit(qp->qp_num, &nt->qp_bitmap_free);
828 	qp->cb_data = data;
829 	qp->rx_handler = handlers->rx_handler;
830 	qp->tx_handler = handlers->tx_handler;
831 	qp->event_handler = handlers->event_handler;
832 
833 	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
834 		entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO);
835 		entry->cb_data = nt->ifp;
836 		entry->buf = NULL;
837 		entry->len = transport_mtu;
838 		ntb_list_add(&qp->ntb_rx_q_lock, entry, &qp->rx_pend_q);
839 	}
840 
841 	for (i = 0; i < NTB_QP_DEF_NUM_ENTRIES; i++) {
842 		entry = malloc(sizeof(*entry), M_NTB_IF, M_WAITOK | M_ZERO);
843 		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
844 	}
845 
846 	ntb_db_clear(ntb, 1ull << qp->qp_num);
847 	ntb_db_clear_mask(ntb, 1ull << qp->qp_num);
848 	return (qp);
849 }
850 
851 /**
852  * ntb_transport_link_up - Notify NTB transport of client readiness to use queue
853  * @qp: NTB transport layer queue to be enabled
854  *
855  * Notify NTB transport layer of client readiness to use queue
856  */
857 static void
858 ntb_transport_link_up(struct ntb_transport_qp *qp)
859 {
860 	struct ntb_transport_ctx *nt;
861 
862 	if (qp == NULL)
863 		return;
864 
865 	qp->client_ready = true;
866 
867 	nt = qp->transport;
868 	ntb_printf(2, "qp client ready\n");
869 
870 	if (qp->transport->link_is_up)
871 		callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
872 }
873 
874 
875 
876 /* Transport Tx */
877 
878 /**
879  * ntb_transport_tx_enqueue - Enqueue a new NTB queue entry
880  * @qp: NTB transport layer queue the entry is to be enqueued on
881  * @cb: per buffer pointer for callback function to use
882  * @data: pointer to data buffer that will be sent
883  * @len: length of the data buffer
884  *
885  * Enqueue a new transmit buffer onto the transport queue from which a NTB
886  * payload will be transmitted.  This assumes that a lock is being held to
887  * serialize access to the qp.
888  *
889  * RETURNS: An appropriate ERRNO error value on error, or zero for success.
890  */
891 static int
892 ntb_transport_tx_enqueue(struct ntb_transport_qp *qp, void *cb, void *data,
893     unsigned int len)
894 {
895 	struct ntb_queue_entry *entry;
896 	int rc;
897 
898 	if (qp == NULL || !qp->link_is_up || len == 0) {
899 		CTR0(KTR_NTB, "TX: link not up");
900 		return (EINVAL);
901 	}
902 
903 	entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
904 	if (entry == NULL) {
905 		CTR0(KTR_NTB, "TX: could not get entry from tx_free_q");
906 		qp->tx_err_no_buf++;
907 		return (EBUSY);
908 	}
909 	CTR1(KTR_NTB, "TX: got entry %p from tx_free_q", entry);
910 
911 	entry->cb_data = cb;
912 	entry->buf = data;
913 	entry->len = len;
914 	entry->flags = 0;
915 
916 	rc = ntb_process_tx(qp, entry);
917 	if (rc != 0) {
918 		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
919 		CTR1(KTR_NTB,
920 		    "TX: process_tx failed. Returning entry %p to tx_free_q",
921 		    entry);
922 	}
923 	return (rc);
924 }
925 
926 static int
927 ntb_process_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry)
928 {
929 	void *offset;
930 
931 	offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index;
932 	CTR3(KTR_NTB,
933 	    "TX: process_tx: tx_pkts=%lu, tx_index=%u, remote entry=%u",
934 	    qp->tx_pkts, qp->tx_index, qp->remote_rx_info->entry);
935 	if (qp->tx_index == qp->remote_rx_info->entry) {
936 		CTR0(KTR_NTB, "TX: ring full");
937 		qp->tx_ring_full++;
938 		return (EAGAIN);
939 	}
940 
941 	if (entry->len > qp->tx_max_frame - sizeof(struct ntb_payload_header)) {
942 		if (qp->tx_handler != NULL)
943 			qp->tx_handler(qp, qp->cb_data, entry->buf,
944 			    EIO);
945 		else
946 			m_freem(entry->buf);
947 
948 		entry->buf = NULL;
949 		ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
950 		CTR1(KTR_NTB,
951 		    "TX: frame too big. returning entry %p to tx_free_q",
952 		    entry);
953 		return (0);
954 	}
955 	CTR2(KTR_NTB, "TX: copying entry %p to offset %p", entry, offset);
956 	ntb_memcpy_tx(qp, entry, offset);
957 
958 	qp->tx_index++;
959 	qp->tx_index %= qp->tx_max_entry;
960 
961 	qp->tx_pkts++;
962 
963 	return (0);
964 }
965 
966 static void
967 ntb_memcpy_tx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
968     void *offset)
969 {
970 	struct ntb_payload_header *hdr;
971 
972 	/* This piece is from Linux' ntb_async_tx() */
973 	hdr = (struct ntb_payload_header *)((char *)offset + qp->tx_max_frame -
974 	    sizeof(struct ntb_payload_header));
975 	entry->x_hdr = hdr;
976 	iowrite32(entry->len, &hdr->len);
977 	iowrite32(qp->tx_pkts, &hdr->ver);
978 
979 	/* This piece is ntb_memcpy_tx() */
980 	CTR2(KTR_NTB, "TX: copying %d bytes to offset %p", entry->len, offset);
981 	if (entry->buf != NULL) {
982 		m_copydata((struct mbuf *)entry->buf, 0, entry->len, offset);
983 
984 		/*
985 		 * Ensure that the data is fully copied before setting the
986 		 * flags
987 		 */
988 		wmb();
989 	}
990 
991 	/* The rest is ntb_tx_copy_callback() */
992 	iowrite32(entry->flags | IF_NTB_DESC_DONE_FLAG, &hdr->flags);
993 	CTR1(KTR_NTB, "TX: hdr %p set DESC_DONE", hdr);
994 
995 	ntb_peer_db_set(qp->ntb, 1ull << qp->qp_num);
996 
997 	/*
998 	 * The entry length can only be zero if the packet is intended to be a
999 	 * "link down" or similar.  Since no payload is being sent in these
1000 	 * cases, there is nothing to add to the completion queue.
1001 	 */
1002 	if (entry->len > 0) {
1003 		qp->tx_bytes += entry->len;
1004 
1005 		if (qp->tx_handler)
1006 			qp->tx_handler(qp, qp->cb_data, entry->buf,
1007 			    entry->len);
1008 		else
1009 			m_freem(entry->buf);
1010 		entry->buf = NULL;
1011 	}
1012 
1013 	CTR3(KTR_NTB,
1014 	    "TX: entry %p sent. hdr->ver = %u, hdr->flags = 0x%x, Returning "
1015 	    "to tx_free_q", entry, hdr->ver, hdr->flags);
1016 	ntb_list_add(&qp->ntb_tx_free_q_lock, entry, &qp->tx_free_q);
1017 }
1018 
1019 static void
1020 ntb_qp_full(void *arg)
1021 {
1022 
1023 	CTR0(KTR_NTB, "TX: qp_full callout");
1024 	ntb_start(arg);
1025 }
1026 
1027 /* Transport Rx */
1028 static void
1029 ntb_transport_rxc_db(void *arg, int pending __unused)
1030 {
1031 	struct ntb_transport_qp *qp = arg;
1032 	ntb_q_idx_t i;
1033 	int rc;
1034 
1035 	/*
1036 	 * Limit the number of packets processed in a single interrupt to
1037 	 * provide fairness to others
1038 	 */
1039 	CTR0(KTR_NTB, "RX: transport_rx");
1040 	mtx_lock(&qp->transport->rx_lock);
1041 	for (i = 0; i < qp->rx_max_entry; i++) {
1042 		rc = ntb_process_rxc(qp);
1043 		if (rc != 0) {
1044 			CTR0(KTR_NTB, "RX: process_rxc failed");
1045 			break;
1046 		}
1047 	}
1048 	mtx_unlock(&qp->transport->rx_lock);
1049 
1050 	if (i == qp->rx_max_entry)
1051 		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
1052 	else if ((ntb_db_read(qp->ntb) & (1ull << qp->qp_num)) != 0) {
1053 		/* If db is set, clear it and read it back to commit clear. */
1054 		ntb_db_clear(qp->ntb, 1ull << qp->qp_num);
1055 		(void)ntb_db_read(qp->ntb);
1056 
1057 		/*
1058 		 * An interrupt may have arrived between finishing
1059 		 * ntb_process_rxc and clearing the doorbell bit: there might
1060 		 * be some more work to do.
1061 		 */
1062 		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
1063 	}
1064 }
1065 
1066 static int
1067 ntb_process_rxc(struct ntb_transport_qp *qp)
1068 {
1069 	struct ntb_payload_header *hdr;
1070 	struct ntb_queue_entry *entry;
1071 	caddr_t offset;
1072 
1073 	offset = qp->rx_buff + qp->rx_max_frame * qp->rx_index;
1074 	hdr = (void *)(offset + qp->rx_max_frame -
1075 	    sizeof(struct ntb_payload_header));
1076 
1077 	CTR1(KTR_NTB, "RX: process_rxc rx_index = %u", qp->rx_index);
1078 	if ((hdr->flags & IF_NTB_DESC_DONE_FLAG) == 0) {
1079 		CTR0(KTR_NTB, "RX: hdr not done");
1080 		qp->rx_ring_empty++;
1081 		return (EAGAIN);
1082 	}
1083 
1084 	if ((hdr->flags & IF_NTB_LINK_DOWN_FLAG) != 0) {
1085 		CTR0(KTR_NTB, "RX: link down");
1086 		ntb_qp_link_down(qp);
1087 		hdr->flags = 0;
1088 		return (EAGAIN);
1089 	}
1090 
1091 	if (hdr->ver != (uint32_t)qp->rx_pkts) {
1092 		CTR2(KTR_NTB,"RX: ver != rx_pkts (%x != %lx). "
1093 		    "Returning entry to rx_pend_q", hdr->ver, qp->rx_pkts);
1094 		qp->rx_err_ver++;
1095 		return (EIO);
1096 	}
1097 
1098 	entry = ntb_list_mv(&qp->ntb_rx_q_lock, &qp->rx_pend_q, &qp->rx_post_q);
1099 	if (entry == NULL) {
1100 		qp->rx_err_no_buf++;
1101 		CTR0(KTR_NTB, "RX: No entries in rx_pend_q");
1102 		return (EAGAIN);
1103 	}
1104 	callout_stop(&qp->rx_full);
1105 	CTR1(KTR_NTB, "RX: rx entry %p from rx_pend_q", entry);
1106 
1107 	entry->x_hdr = hdr;
1108 	entry->index = qp->rx_index;
1109 
1110 	if (hdr->len > entry->len) {
1111 		CTR2(KTR_NTB, "RX: len too long. Wanted %ju got %ju",
1112 		    (uintmax_t)hdr->len, (uintmax_t)entry->len);
1113 		qp->rx_err_oflow++;
1114 
1115 		entry->len = -EIO;
1116 		entry->flags |= IF_NTB_DESC_DONE_FLAG;
1117 
1118 		taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
1119 	} else {
1120 		qp->rx_bytes += hdr->len;
1121 		qp->rx_pkts++;
1122 
1123 		CTR1(KTR_NTB, "RX: received %ld rx_pkts", qp->rx_pkts);
1124 
1125 		entry->len = hdr->len;
1126 
1127 		ntb_memcpy_rx(qp, entry, offset);
1128 	}
1129 
1130 	qp->rx_index++;
1131 	qp->rx_index %= qp->rx_max_entry;
1132 	return (0);
1133 }
1134 
1135 static void
1136 ntb_memcpy_rx(struct ntb_transport_qp *qp, struct ntb_queue_entry *entry,
1137     void *offset)
1138 {
1139 	struct ifnet *ifp = entry->cb_data;
1140 	unsigned int len = entry->len;
1141 	struct mbuf *m;
1142 
1143 	CTR2(KTR_NTB, "RX: copying %d bytes from offset %p", len, offset);
1144 	m = m_devget(offset, len, 0, ifp, NULL);
1145 	m->m_pkthdr.csum_flags = CSUM_IP_CHECKED | CSUM_IP_VALID;
1146 
1147 	entry->buf = (void *)m;
1148 
1149 	/* Ensure that the data is globally visible before clearing the flag */
1150 	wmb();
1151 
1152 	CTR2(KTR_NTB, "RX: copied entry %p to mbuf %p.", entry, m);
1153 	ntb_rx_copy_callback(qp, entry);
1154 }
1155 
1156 static inline void
1157 ntb_rx_copy_callback(struct ntb_transport_qp *qp, void *data)
1158 {
1159 	struct ntb_queue_entry *entry;
1160 
1161 	entry = data;
1162 	entry->flags |= IF_NTB_DESC_DONE_FLAG;
1163 	taskqueue_enqueue(taskqueue_swi, &qp->rx_completion_task);
1164 }
1165 
1166 static void
1167 ntb_complete_rxc(void *arg, int pending)
1168 {
1169 	struct ntb_transport_qp *qp = arg;
1170 	struct ntb_queue_entry *entry;
1171 	struct mbuf *m;
1172 	unsigned len;
1173 
1174 	CTR0(KTR_NTB, "RX: rx_completion_task");
1175 
1176 	mtx_lock_spin(&qp->ntb_rx_q_lock);
1177 
1178 	while (!STAILQ_EMPTY(&qp->rx_post_q)) {
1179 		entry = STAILQ_FIRST(&qp->rx_post_q);
1180 		if ((entry->flags & IF_NTB_DESC_DONE_FLAG) == 0)
1181 			break;
1182 
1183 		entry->x_hdr->flags = 0;
1184 		iowrite32(entry->index, &qp->rx_info->entry);
1185 
1186 		STAILQ_REMOVE_HEAD(&qp->rx_post_q, entry);
1187 
1188 		len = entry->len;
1189 		m = entry->buf;
1190 
1191 		/*
1192 		 * Re-initialize queue_entry for reuse; rx_handler takes
1193 		 * ownership of the mbuf.
1194 		 */
1195 		entry->buf = NULL;
1196 		entry->len = transport_mtu;
1197 		entry->cb_data = qp->transport->ifp;
1198 
1199 		STAILQ_INSERT_TAIL(&qp->rx_pend_q, entry, entry);
1200 
1201 		mtx_unlock_spin(&qp->ntb_rx_q_lock);
1202 
1203 		CTR2(KTR_NTB, "RX: completing entry %p, mbuf %p", entry, m);
1204 		if (qp->rx_handler != NULL && qp->client_ready)
1205 			qp->rx_handler(qp, qp->cb_data, m, len);
1206 		else
1207 			m_freem(m);
1208 
1209 		mtx_lock_spin(&qp->ntb_rx_q_lock);
1210 	}
1211 
1212 	mtx_unlock_spin(&qp->ntb_rx_q_lock);
1213 }
1214 
1215 static void
1216 ntb_transport_doorbell_callback(void *data, uint32_t vector)
1217 {
1218 	struct ntb_transport_ctx *nt = data;
1219 	struct ntb_transport_qp *qp;
1220 	struct _qpset db_bits;
1221 	uint64_t vec_mask;
1222 	unsigned qp_num;
1223 
1224 	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &db_bits);
1225 	BIT_NAND(QP_SETSIZE, &db_bits, &nt->qp_bitmap_free);
1226 
1227 	vec_mask = ntb_db_vector_mask(nt->ntb, vector);
1228 	while (vec_mask != 0) {
1229 		qp_num = ffsll(vec_mask) - 1;
1230 
1231 		if (test_bit(qp_num, &db_bits)) {
1232 			qp = &nt->qp_vec[qp_num];
1233 			taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
1234 		}
1235 
1236 		vec_mask &= ~(1ull << qp_num);
1237 	}
1238 }
1239 
1240 /* Link Event handler */
1241 static void
1242 ntb_transport_event_callback(void *data)
1243 {
1244 	struct ntb_transport_ctx *nt = data;
1245 
1246 	if (ntb_link_is_up(nt->ntb, NULL, NULL)) {
1247 		ntb_printf(1, "HW link up\n");
1248 		callout_reset(&nt->link_work, 0, ntb_transport_link_work, nt);
1249 	} else {
1250 		ntb_printf(1, "HW link down\n");
1251 		taskqueue_enqueue(taskqueue_swi, &nt->link_cleanup);
1252 	}
1253 }
1254 
1255 /* Link bring up */
1256 static void
1257 ntb_transport_link_work(void *arg)
1258 {
1259 	struct ntb_transport_ctx *nt = arg;
1260 	struct ntb_softc *ntb = nt->ntb;
1261 	struct ntb_transport_qp *qp;
1262 	uint64_t val64, size;
1263 	uint32_t val;
1264 	unsigned i;
1265 	int rc;
1266 
1267 	/* send the local info, in the opposite order of the way we read it */
1268 	for (i = 0; i < nt->mw_count; i++) {
1269 		size = nt->mw_vec[i].phys_size;
1270 
1271 		if (max_mw_size != 0 && size > max_mw_size)
1272 			size = max_mw_size;
1273 
1274 		ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2),
1275 		    size >> 32);
1276 		ntb_peer_spad_write(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), size);
1277 	}
1278 
1279 	ntb_peer_spad_write(ntb, IF_NTB_NUM_MWS, nt->mw_count);
1280 
1281 	ntb_peer_spad_write(ntb, IF_NTB_NUM_QPS, nt->qp_count);
1282 
1283 	ntb_peer_spad_write(ntb, IF_NTB_VERSION, NTB_TRANSPORT_VERSION);
1284 
1285 	/* Query the remote side for its info */
1286 	val = 0;
1287 	ntb_spad_read(ntb, IF_NTB_VERSION, &val);
1288 	if (val != NTB_TRANSPORT_VERSION)
1289 		goto out;
1290 
1291 	ntb_spad_read(ntb, IF_NTB_NUM_QPS, &val);
1292 	if (val != nt->qp_count)
1293 		goto out;
1294 
1295 	ntb_spad_read(ntb, IF_NTB_NUM_MWS, &val);
1296 	if (val != nt->mw_count)
1297 		goto out;
1298 
1299 	for (i = 0; i < nt->mw_count; i++) {
1300 		ntb_spad_read(ntb, IF_NTB_MW0_SZ_HIGH + (i * 2), &val);
1301 		val64 = (uint64_t)val << 32;
1302 
1303 		ntb_spad_read(ntb, IF_NTB_MW0_SZ_LOW + (i * 2), &val);
1304 		val64 |= val;
1305 
1306 		rc = ntb_set_mw(nt, i, val64);
1307 		if (rc != 0)
1308 			goto free_mws;
1309 	}
1310 
1311 	nt->link_is_up = true;
1312 	ntb_printf(1, "transport link up\n");
1313 
1314 	for (i = 0; i < nt->qp_count; i++) {
1315 		qp = &nt->qp_vec[i];
1316 
1317 		ntb_transport_setup_qp_mw(nt, i);
1318 
1319 		if (qp->client_ready)
1320 			callout_reset(&qp->link_work, 0, ntb_qp_link_work, qp);
1321 	}
1322 
1323 	return;
1324 
1325 free_mws:
1326 	for (i = 0; i < nt->mw_count; i++)
1327 		ntb_free_mw(nt, i);
1328 out:
1329 	if (ntb_link_is_up(ntb, NULL, NULL))
1330 		callout_reset(&nt->link_work,
1331 		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_transport_link_work, nt);
1332 }
1333 
1334 static int
1335 ntb_set_mw(struct ntb_transport_ctx *nt, int num_mw, size_t size)
1336 {
1337 	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
1338 	size_t xlat_size, buff_size;
1339 	int rc;
1340 
1341 	if (size == 0)
1342 		return (EINVAL);
1343 
1344 	xlat_size = roundup(size, mw->xlat_align_size);
1345 	buff_size = xlat_size;
1346 
1347 	/* No need to re-setup */
1348 	if (mw->xlat_size == xlat_size)
1349 		return (0);
1350 
1351 	if (mw->buff_size != 0)
1352 		ntb_free_mw(nt, num_mw);
1353 
1354 	/* Alloc memory for receiving data.  Must be aligned */
1355 	mw->xlat_size = xlat_size;
1356 	mw->buff_size = buff_size;
1357 
1358 	mw->virt_addr = contigmalloc(mw->buff_size, M_NTB_IF, M_ZERO, 0,
1359 	    mw->addr_limit, mw->xlat_align, 0);
1360 	if (mw->virt_addr == NULL) {
1361 		ntb_printf(0, "Unable to allocate MW buffer of size %zu/%zu\n",
1362 		    mw->buff_size, mw->xlat_size);
1363 		mw->xlat_size = 0;
1364 		mw->buff_size = 0;
1365 		return (ENOMEM);
1366 	}
1367 	/* TODO: replace with bus_space_* functions */
1368 	mw->dma_addr = vtophys(mw->virt_addr);
1369 
1370 	/*
1371 	 * Ensure that the allocation from contigmalloc is aligned as
1372 	 * requested.  XXX: This may not be needed -- brought in for parity
1373 	 * with the Linux driver.
1374 	 */
1375 	if (mw->dma_addr % mw->xlat_align != 0) {
1376 		ntb_printf(0,
1377 		    "DMA memory 0x%jx not aligned to BAR size 0x%zx\n",
1378 		    (uintmax_t)mw->dma_addr, size);
1379 		ntb_free_mw(nt, num_mw);
1380 		return (ENOMEM);
1381 	}
1382 
1383 	/* Notify HW the memory location of the receive buffer */
1384 	rc = ntb_mw_set_trans(nt->ntb, num_mw, mw->dma_addr, mw->xlat_size);
1385 	if (rc) {
1386 		ntb_printf(0, "Unable to set mw%d translation\n", num_mw);
1387 		ntb_free_mw(nt, num_mw);
1388 		return (rc);
1389 	}
1390 
1391 	return (0);
1392 }
1393 
1394 static void
1395 ntb_free_mw(struct ntb_transport_ctx *nt, int num_mw)
1396 {
1397 	struct ntb_transport_mw *mw = &nt->mw_vec[num_mw];
1398 
1399 	if (mw->virt_addr == NULL)
1400 		return;
1401 
1402 	ntb_mw_clear_trans(nt->ntb, num_mw);
1403 	contigfree(mw->virt_addr, mw->xlat_size, M_NTB_IF);
1404 	mw->xlat_size = 0;
1405 	mw->buff_size = 0;
1406 	mw->virt_addr = NULL;
1407 }
1408 
1409 static int
1410 ntb_transport_setup_qp_mw(struct ntb_transport_ctx *nt, unsigned int qp_num)
1411 {
1412 	struct ntb_transport_qp *qp = &nt->qp_vec[qp_num];
1413 	struct ntb_transport_mw *mw;
1414 	void *offset;
1415 	ntb_q_idx_t i;
1416 	size_t rx_size;
1417 	unsigned num_qps_mw, mw_num, mw_count;
1418 
1419 	mw_count = nt->mw_count;
1420 	mw_num = QP_TO_MW(nt, qp_num);
1421 	mw = &nt->mw_vec[mw_num];
1422 
1423 	if (mw->virt_addr == NULL)
1424 		return (ENOMEM);
1425 
1426 	if (nt->qp_count % mw_count && mw_num + 1 < nt->qp_count / mw_count)
1427 		num_qps_mw = nt->qp_count / mw_count + 1;
1428 	else
1429 		num_qps_mw = nt->qp_count / mw_count;
1430 
1431 	rx_size = mw->xlat_size / num_qps_mw;
1432 	qp->rx_buff = mw->virt_addr + rx_size * (qp_num / mw_count);
1433 	rx_size -= sizeof(struct ntb_rx_info);
1434 
1435 	qp->remote_rx_info = (void*)(qp->rx_buff + rx_size);
1436 
1437 	/* Due to house-keeping, there must be at least 2 buffs */
1438 	qp->rx_max_frame = qmin(rx_size / 2,
1439 	    transport_mtu + sizeof(struct ntb_payload_header));
1440 	qp->rx_max_entry = rx_size / qp->rx_max_frame;
1441 	qp->rx_index = 0;
1442 
1443 	qp->remote_rx_info->entry = qp->rx_max_entry - 1;
1444 
1445 	/* Set up the hdr offsets with 0s */
1446 	for (i = 0; i < qp->rx_max_entry; i++) {
1447 		offset = (void *)(qp->rx_buff + qp->rx_max_frame * (i + 1) -
1448 		    sizeof(struct ntb_payload_header));
1449 		memset(offset, 0, sizeof(struct ntb_payload_header));
1450 	}
1451 
1452 	qp->rx_pkts = 0;
1453 	qp->tx_pkts = 0;
1454 	qp->tx_index = 0;
1455 
1456 	return (0);
1457 }
1458 
1459 static void
1460 ntb_qp_link_work(void *arg)
1461 {
1462 	struct ntb_transport_qp *qp = arg;
1463 	struct ntb_softc *ntb = qp->ntb;
1464 	struct ntb_transport_ctx *nt = qp->transport;
1465 	uint32_t val, dummy;
1466 
1467 	ntb_spad_read(ntb, IF_NTB_QP_LINKS, &val);
1468 
1469 	ntb_peer_spad_write(ntb, IF_NTB_QP_LINKS, val | (1ull << qp->qp_num));
1470 
1471 	/* query remote spad for qp ready bits */
1472 	ntb_peer_spad_read(ntb, IF_NTB_QP_LINKS, &dummy);
1473 
1474 	/* See if the remote side is up */
1475 	if ((val & (1ull << qp->qp_num)) != 0) {
1476 		ntb_printf(2, "qp link up\n");
1477 		qp->link_is_up = true;
1478 
1479 		if (qp->event_handler != NULL)
1480 			qp->event_handler(qp->cb_data, NTB_LINK_UP);
1481 
1482 		taskqueue_enqueue(taskqueue_swi, &qp->rxc_db_work);
1483 	} else if (nt->link_is_up)
1484 		callout_reset(&qp->link_work,
1485 		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
1486 }
1487 
1488 /* Link down event*/
1489 static void
1490 ntb_transport_link_cleanup(struct ntb_transport_ctx *nt)
1491 {
1492 	struct ntb_transport_qp *qp;
1493 	struct _qpset qp_bitmap_alloc;
1494 	unsigned i;
1495 
1496 	BIT_COPY(QP_SETSIZE, &nt->qp_bitmap, &qp_bitmap_alloc);
1497 	BIT_NAND(QP_SETSIZE, &qp_bitmap_alloc, &nt->qp_bitmap_free);
1498 
1499 	/* Pass along the info to any clients */
1500 	for (i = 0; i < nt->qp_count; i++)
1501 		if (test_bit(i, &qp_bitmap_alloc)) {
1502 			qp = &nt->qp_vec[i];
1503 			ntb_qp_link_cleanup(qp);
1504 			callout_drain(&qp->link_work);
1505 		}
1506 
1507 	if (!nt->link_is_up)
1508 		callout_drain(&nt->link_work);
1509 
1510 	/*
1511 	 * The scratchpad registers keep the values if the remote side
1512 	 * goes down, blast them now to give them a sane value the next
1513 	 * time they are accessed
1514 	 */
1515 	for (i = 0; i < IF_NTB_MAX_SPAD; i++)
1516 		ntb_spad_write(nt->ntb, i, 0);
1517 }
1518 
1519 static void
1520 ntb_transport_link_cleanup_work(void *arg, int pending __unused)
1521 {
1522 
1523 	ntb_transport_link_cleanup(arg);
1524 }
1525 
1526 static void
1527 ntb_qp_link_down(struct ntb_transport_qp *qp)
1528 {
1529 
1530 	ntb_qp_link_cleanup(qp);
1531 }
1532 
1533 static void
1534 ntb_qp_link_down_reset(struct ntb_transport_qp *qp)
1535 {
1536 
1537 	qp->link_is_up = false;
1538 
1539 	qp->tx_index = qp->rx_index = 0;
1540 	qp->tx_bytes = qp->rx_bytes = 0;
1541 	qp->tx_pkts = qp->rx_pkts = 0;
1542 
1543 	qp->rx_ring_empty = 0;
1544 	qp->tx_ring_full = 0;
1545 
1546 	qp->rx_err_no_buf = qp->tx_err_no_buf = 0;
1547 	qp->rx_err_oflow = qp->rx_err_ver = 0;
1548 }
1549 
1550 static void
1551 ntb_qp_link_cleanup(struct ntb_transport_qp *qp)
1552 {
1553 	struct ntb_transport_ctx *nt = qp->transport;
1554 
1555 	callout_drain(&qp->link_work);
1556 	ntb_qp_link_down_reset(qp);
1557 
1558 	if (qp->event_handler != NULL)
1559 		qp->event_handler(qp->cb_data, NTB_LINK_DOWN);
1560 
1561 	if (nt->link_is_up)
1562 		callout_reset(&qp->link_work,
1563 		    NTB_LINK_DOWN_TIMEOUT * hz / 1000, ntb_qp_link_work, qp);
1564 }
1565 
1566 /* Link commanded down */
1567 /**
1568  * ntb_transport_link_down - Notify NTB transport to no longer enqueue data
1569  * @qp: NTB transport layer queue to be disabled
1570  *
1571  * Notify NTB transport layer of client's desire to no longer receive data on
1572  * transport queue specified.  It is the client's responsibility to ensure all
1573  * entries on queue are purged or otherwise handled appropriately.
1574  */
1575 static void
1576 ntb_transport_link_down(struct ntb_transport_qp *qp)
1577 {
1578 	uint32_t val;
1579 
1580 	if (qp == NULL)
1581 		return;
1582 
1583 	qp->client_ready = false;
1584 
1585 	ntb_spad_read(qp->ntb, IF_NTB_QP_LINKS, &val);
1586 
1587 	ntb_peer_spad_write(qp->ntb, IF_NTB_QP_LINKS,
1588 	   val & ~(1 << qp->qp_num));
1589 
1590 	if (qp->link_is_up)
1591 		ntb_send_link_down(qp);
1592 	else
1593 		callout_drain(&qp->link_work);
1594 }
1595 
1596 static void
1597 ntb_send_link_down(struct ntb_transport_qp *qp)
1598 {
1599 	struct ntb_queue_entry *entry;
1600 	int i, rc;
1601 
1602 	if (!qp->link_is_up)
1603 		return;
1604 
1605 	for (i = 0; i < NTB_LINK_DOWN_TIMEOUT; i++) {
1606 		entry = ntb_list_rm(&qp->ntb_tx_free_q_lock, &qp->tx_free_q);
1607 		if (entry != NULL)
1608 			break;
1609 		pause("NTB Wait for link down", hz / 10);
1610 	}
1611 
1612 	if (entry == NULL)
1613 		return;
1614 
1615 	entry->cb_data = NULL;
1616 	entry->buf = NULL;
1617 	entry->len = 0;
1618 	entry->flags = IF_NTB_LINK_DOWN_FLAG;
1619 
1620 	mtx_lock(&qp->transport->tx_lock);
1621 	rc = ntb_process_tx(qp, entry);
1622 	if (rc != 0)
1623 		printf("ntb: Failed to send link down\n");
1624 	mtx_unlock(&qp->transport->tx_lock);
1625 
1626 	ntb_qp_link_down_reset(qp);
1627 }
1628 
1629 
1630 /* List Management */
1631 
1632 static void
1633 ntb_list_add(struct mtx *lock, struct ntb_queue_entry *entry,
1634     struct ntb_queue_list *list)
1635 {
1636 
1637 	mtx_lock_spin(lock);
1638 	STAILQ_INSERT_TAIL(list, entry, entry);
1639 	mtx_unlock_spin(lock);
1640 }
1641 
1642 static struct ntb_queue_entry *
1643 ntb_list_rm(struct mtx *lock, struct ntb_queue_list *list)
1644 {
1645 	struct ntb_queue_entry *entry;
1646 
1647 	mtx_lock_spin(lock);
1648 	if (STAILQ_EMPTY(list)) {
1649 		entry = NULL;
1650 		goto out;
1651 	}
1652 	entry = STAILQ_FIRST(list);
1653 	STAILQ_REMOVE_HEAD(list, entry);
1654 out:
1655 	mtx_unlock_spin(lock);
1656 
1657 	return (entry);
1658 }
1659 
1660 static struct ntb_queue_entry *
1661 ntb_list_mv(struct mtx *lock, struct ntb_queue_list *from,
1662     struct ntb_queue_list *to)
1663 {
1664 	struct ntb_queue_entry *entry;
1665 
1666 	mtx_lock_spin(lock);
1667 	if (STAILQ_EMPTY(from)) {
1668 		entry = NULL;
1669 		goto out;
1670 	}
1671 	entry = STAILQ_FIRST(from);
1672 	STAILQ_REMOVE_HEAD(from, entry);
1673 	STAILQ_INSERT_TAIL(to, entry, entry);
1674 
1675 out:
1676 	mtx_unlock_spin(lock);
1677 	return (entry);
1678 }
1679 
1680 /* Helper functions */
1681 /* TODO: This too should really be part of the kernel */
1682 #define EUI48_MULTICAST			1 << 0
1683 #define EUI48_LOCALLY_ADMINISTERED	1 << 1
1684 static void
1685 create_random_local_eui48(u_char *eaddr)
1686 {
1687 	static uint8_t counter = 0;
1688 	uint32_t seed = ticks;
1689 
1690 	eaddr[0] = EUI48_LOCALLY_ADMINISTERED;
1691 	memcpy(&eaddr[1], &seed, sizeof(uint32_t));
1692 	eaddr[5] = counter++;
1693 }
1694 
1695 /**
1696  * ntb_transport_max_size - Query the max payload size of a qp
1697  * @qp: NTB transport layer queue to be queried
1698  *
1699  * Query the maximum payload size permissible on the given qp
1700  *
1701  * RETURNS: the max payload size of a qp
1702  */
1703 static unsigned int
1704 ntb_transport_max_size(struct ntb_transport_qp *qp)
1705 {
1706 
1707 	if (qp == NULL)
1708 		return (0);
1709 
1710 	return (qp->tx_max_frame - sizeof(struct ntb_payload_header));
1711 }
1712