xref: /openbsd/sys/arch/sparc64/dev/vnet.c (revision 771fbea0)
1 /*	$OpenBSD: vnet.c,v 1.63 2020/12/12 11:48:52 jan Exp $	*/
2 /*
3  * Copyright (c) 2009, 2015 Mark Kettenis
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bpfilter.h"
19 
20 #include <sys/param.h>
21 #include <sys/atomic.h>
22 #include <sys/device.h>
23 #include <sys/malloc.h>
24 #include <sys/pool.h>
25 #include <sys/mbuf.h>
26 #include <sys/socket.h>
27 #include <sys/sockio.h>
28 #include <sys/systm.h>
29 #include <sys/timeout.h>
30 
31 #include <machine/autoconf.h>
32 #include <machine/hypervisor.h>
33 #include <machine/openfirm.h>
34 
35 #include <net/if.h>
36 #include <net/if_media.h>
37 
38 #include <netinet/in.h>
39 #include <netinet/if_ether.h>
40 
41 #if NBPFILTER > 0
42 #include <net/bpf.h>
43 #endif
44 
45 #include <uvm/uvm_extern.h>
46 
47 #include <sparc64/dev/cbusvar.h>
48 #include <sparc64/dev/ldcvar.h>
49 #include <sparc64/dev/viovar.h>
50 
51 #ifdef VNET_DEBUG
52 #define DPRINTF(x)	printf x
53 #else
54 #define DPRINTF(x)
55 #endif
56 
57 #define VNET_TX_ENTRIES		32
58 #define VNET_RX_ENTRIES		32
59 
60 struct vnet_attr_info {
61 	struct vio_msg_tag	tag;
62 	uint8_t			xfer_mode;
63 	uint8_t			addr_type;
64 	uint16_t		ack_freq;
65 	uint32_t		_reserved1;
66 	uint64_t		addr;
67 	uint64_t		mtu;
68 	uint64_t		_reserved2[3];
69 };
70 
71 /* Address types. */
72 #define VNET_ADDR_ETHERMAC	0x01
73 
74 /* Sub-Type envelopes. */
75 #define VNET_MCAST_INFO		0x0101
76 
77 #define VNET_NUM_MCAST		7
78 
79 struct vnet_mcast_info {
80 	struct vio_msg_tag	tag;
81 	uint8_t			set;
82 	uint8_t			count;
83 	uint8_t			mcast_addr[VNET_NUM_MCAST][ETHER_ADDR_LEN];
84 	uint32_t		_reserved;
85 };
86 
87 struct vnet_desc {
88 	struct vio_dring_hdr	hdr;
89 	uint32_t		nbytes;
90 	uint32_t		ncookies;
91 	struct ldc_cookie	cookie[2];
92 };
93 
94 struct vnet_desc_msg {
95 	struct vio_msg_tag	tag;
96 	uint64_t		seq_no;
97 	uint64_t		desc_handle;
98 	uint32_t		nbytes;
99 	uint32_t		ncookies;
100 	struct ldc_cookie	cookie[1];
101 };
102 
103 struct vnet_dring {
104 	bus_dmamap_t		vd_map;
105 	bus_dma_segment_t	vd_seg;
106 	struct vnet_desc	*vd_desc;
107 	int			vd_nentries;
108 };
109 
110 struct vnet_dring *vnet_dring_alloc(bus_dma_tag_t, int);
111 void	vnet_dring_free(bus_dma_tag_t, struct vnet_dring *);
112 
113 /*
114  * For now, we only support vNet 1.0.
115  */
116 #define VNET_MAJOR	1
117 #define VNET_MINOR	0
118 
119 /*
120  * The vNet protocol wants the IP header to be 64-bit aligned, so
121  * define out own variant of ETHER_ALIGN.
122  */
123 #define VNET_ETHER_ALIGN	6
124 
125 struct vnet_soft_desc {
126 	int		vsd_map_idx;
127 	caddr_t		vsd_buf;
128 };
129 
130 struct vnet_softc {
131 	struct device	sc_dv;
132 	bus_space_tag_t	sc_bustag;
133 	bus_dma_tag_t	sc_dmatag;
134 
135 	uint64_t	sc_tx_ino;
136 	uint64_t	sc_rx_ino;
137 	void		*sc_tx_ih;
138 	void		*sc_rx_ih;
139 
140 	struct ldc_conn	sc_lc;
141 
142 	uint16_t	sc_vio_state;
143 #define VIO_SND_VER_INFO	0x0001
144 #define VIO_ACK_VER_INFO	0x0002
145 #define VIO_RCV_VER_INFO	0x0004
146 #define VIO_SND_ATTR_INFO	0x0008
147 #define VIO_ACK_ATTR_INFO	0x0010
148 #define VIO_RCV_ATTR_INFO	0x0020
149 #define VIO_SND_DRING_REG	0x0040
150 #define VIO_ACK_DRING_REG	0x0080
151 #define VIO_RCV_DRING_REG	0x0100
152 #define VIO_SND_RDX		0x0200
153 #define VIO_ACK_RDX		0x0400
154 #define VIO_RCV_RDX		0x0800
155 
156 	struct timeout	sc_handshake_to;
157 
158 	uint8_t		sc_xfer_mode;
159 
160 	uint32_t	sc_local_sid;
161 	uint64_t	sc_dring_ident;
162 	uint64_t	sc_seq_no;
163 
164 	u_int		sc_tx_prod;
165 	u_int		sc_tx_cons;
166 
167 	u_int		sc_peer_state;
168 
169 	struct ldc_map	*sc_lm;
170 	struct vnet_dring *sc_vd;
171 	struct vnet_soft_desc *sc_vsd;
172 #define VNET_NUM_SOFT_DESC	128
173 
174 	size_t		sc_peer_desc_size;
175 	struct ldc_cookie sc_peer_dring_cookie;
176 	int		sc_peer_dring_nentries;
177 
178 	struct pool	sc_pool;
179 
180 	struct arpcom	sc_ac;
181 	struct ifmedia	sc_media;
182 };
183 
184 int	vnet_match(struct device *, void *, void *);
185 void	vnet_attach(struct device *, struct device *, void *);
186 
187 struct cfattach vnet_ca = {
188 	sizeof(struct vnet_softc), vnet_match, vnet_attach
189 };
190 
191 struct cfdriver vnet_cd = {
192 	NULL, "vnet", DV_IFNET
193 };
194 
195 int	vnet_tx_intr(void *);
196 int	vnet_rx_intr(void *);
197 void	vnet_handshake(void *);
198 
199 void	vio_rx_data(struct ldc_conn *, struct ldc_pkt *);
200 void	vnet_rx_vio_ctrl(struct vnet_softc *, struct vio_msg *);
201 void	vnet_rx_vio_ver_info(struct vnet_softc *, struct vio_msg_tag *);
202 void	vnet_rx_vio_attr_info(struct vnet_softc *, struct vio_msg_tag *);
203 void	vnet_rx_vio_dring_reg(struct vnet_softc *, struct vio_msg_tag *);
204 void	vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *);
205 void	vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *);
206 void	vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *);
207 void	vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *);
208 
209 void	vnet_ldc_reset(struct ldc_conn *);
210 void	vnet_ldc_start(struct ldc_conn *);
211 
212 void	vnet_sendmsg(struct vnet_softc *, void *, size_t);
213 void	vnet_send_ver_info(struct vnet_softc *, uint16_t, uint16_t);
214 void	vnet_send_attr_info(struct vnet_softc *);
215 void	vnet_send_dring_reg(struct vnet_softc *);
216 void	vio_send_rdx(struct vnet_softc *);
217 void	vnet_send_dring_data(struct vnet_softc *, uint32_t);
218 
219 void	vnet_start(struct ifnet *);
220 void	vnet_start_desc(struct ifnet *);
221 int	vnet_ioctl(struct ifnet *, u_long, caddr_t);
222 void	vnet_watchdog(struct ifnet *);
223 
224 int	vnet_media_change(struct ifnet *);
225 void	vnet_media_status(struct ifnet *, struct ifmediareq *);
226 
227 void	vnet_link_state(struct vnet_softc *sc);
228 
229 void	vnet_setmulti(struct vnet_softc *, int);
230 
231 void	vnet_init(struct ifnet *);
232 void	vnet_stop(struct ifnet *);
233 
234 int
235 vnet_match(struct device *parent, void *match, void *aux)
236 {
237 	struct cbus_attach_args *ca = aux;
238 
239 	if (strcmp(ca->ca_name, "network") == 0)
240 		return (1);
241 
242 	return (0);
243 }
244 
245 void
246 vnet_attach(struct device *parent, struct device *self, void *aux)
247 {
248 	struct vnet_softc *sc = (struct vnet_softc *)self;
249 	struct cbus_attach_args *ca = aux;
250 	struct ldc_conn *lc;
251 	struct ifnet *ifp;
252 
253 	sc->sc_bustag = ca->ca_bustag;
254 	sc->sc_dmatag = ca->ca_dmatag;
255 	sc->sc_tx_ino = ca->ca_tx_ino;
256 	sc->sc_rx_ino = ca->ca_rx_ino;
257 
258 	printf(": ivec 0x%llx, 0x%llx", sc->sc_tx_ino, sc->sc_rx_ino);
259 
260 	/*
261 	 * Un-configure queues before registering interrupt handlers,
262 	 * such that we dont get any stale LDC packets or events.
263 	 */
264 	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
265 	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
266 
267 	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_ino,
268 	    IPL_NET, BUS_INTR_ESTABLISH_MPSAFE, vnet_tx_intr,
269 	    sc, sc->sc_dv.dv_xname);
270 	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_ino,
271 	    IPL_NET, BUS_INTR_ESTABLISH_MPSAFE, vnet_rx_intr,
272 	    sc, sc->sc_dv.dv_xname);
273 	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
274 		printf(", can't establish interrupt\n");
275 		return;
276 	}
277 
278 	lc = &sc->sc_lc;
279 	lc->lc_id = ca->ca_id;
280 	lc->lc_sc = sc;
281 	lc->lc_reset = vnet_ldc_reset;
282 	lc->lc_start = vnet_ldc_start;
283 	lc->lc_rx_data = vio_rx_data;
284 
285 	timeout_set(&sc->sc_handshake_to, vnet_handshake, sc);
286 	sc->sc_peer_state = VIO_DP_STOPPED;
287 
288 	lc->lc_txq = ldc_queue_alloc(sc->sc_dmatag, VNET_TX_ENTRIES);
289 	if (lc->lc_txq == NULL) {
290 		printf(", can't allocate tx queue\n");
291 		return;
292 	}
293 
294 	lc->lc_rxq = ldc_queue_alloc(sc->sc_dmatag, VNET_RX_ENTRIES);
295 	if (lc->lc_rxq == NULL) {
296 		printf(", can't allocate rx queue\n");
297 		goto free_txqueue;
298 	}
299 
300 	if (OF_getprop(ca->ca_node, "local-mac-address",
301 	    sc->sc_ac.ac_enaddr, ETHER_ADDR_LEN) > 0)
302 		printf(", address %s", ether_sprintf(sc->sc_ac.ac_enaddr));
303 
304 	/*
305 	 * Each interface gets its own pool.
306 	 */
307 	pool_init(&sc->sc_pool, 2048, 0, IPL_NET, 0, sc->sc_dv.dv_xname, NULL);
308 
309 	ifp = &sc->sc_ac.ac_if;
310 	ifp->if_softc = sc;
311 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
312 	ifp->if_link_state = LINK_STATE_DOWN;
313 	ifp->if_ioctl = vnet_ioctl;
314 	ifp->if_start = vnet_start;
315 	ifp->if_watchdog = vnet_watchdog;
316 	strlcpy(ifp->if_xname, sc->sc_dv.dv_xname, IFNAMSIZ);
317 	ifq_set_maxlen(&ifp->if_snd, 31); /* XXX */
318 
319 	ifmedia_init(&sc->sc_media, 0, vnet_media_change, vnet_media_status);
320 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
321 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
322 
323 	if_attach(ifp);
324 	ether_ifattach(ifp);
325 
326 	printf("\n");
327 	return;
328 
329 free_txqueue:
330 	ldc_queue_free(sc->sc_dmatag, lc->lc_txq);
331 }
332 
333 int
334 vnet_tx_intr(void *arg)
335 {
336 	struct vnet_softc *sc = arg;
337 	struct ldc_conn *lc = &sc->sc_lc;
338 	uint64_t tx_head, tx_tail, tx_state;
339 
340 	hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
341 	if (tx_state != lc->lc_tx_state) {
342 		switch (tx_state) {
343 		case LDC_CHANNEL_DOWN:
344 			DPRINTF(("%s: Tx link down\n", __func__));
345 			break;
346 		case LDC_CHANNEL_UP:
347 			DPRINTF(("%s: Tx link up\n", __func__));
348 			break;
349 		case LDC_CHANNEL_RESET:
350 			DPRINTF(("%s: Tx link reset\n", __func__));
351 			break;
352 		}
353 		lc->lc_tx_state = tx_state;
354 	}
355 
356 	return (1);
357 }
358 
359 int
360 vnet_rx_intr(void *arg)
361 {
362 	struct vnet_softc *sc = arg;
363 	struct ldc_conn *lc = &sc->sc_lc;
364 	uint64_t rx_head, rx_tail, rx_state;
365 	struct ldc_pkt *lp;
366 	int err;
367 
368 	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
369 	if (err == H_EINVAL)
370 		return (0);
371 	if (err != H_EOK) {
372 		printf("hv_ldc_rx_get_state %d\n", err);
373 		return (0);
374 	}
375 
376 	if (rx_state != lc->lc_rx_state) {
377 		switch (rx_state) {
378 		case LDC_CHANNEL_DOWN:
379 			DPRINTF(("%s: Rx link down\n", __func__));
380 			lc->lc_tx_seqid = 0;
381 			lc->lc_state = 0;
382 			lc->lc_reset(lc);
383 			if (rx_head == rx_tail)
384 				break;
385 			/* Discard and ack pending I/O. */
386 			DPRINTF(("setting rx qhead to %lld\n", rx_tail));
387 			err = hv_ldc_rx_set_qhead(lc->lc_id, rx_tail);
388 			if (err == H_EOK)
389 				break;
390 			printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
391 			break;
392 		case LDC_CHANNEL_UP:
393 			DPRINTF(("%s: Rx link up\n", __func__));
394 			timeout_add_msec(&sc->sc_handshake_to, 500);
395 			break;
396 		case LDC_CHANNEL_RESET:
397 			DPRINTF(("%s: Rx link reset\n", __func__));
398 			lc->lc_tx_seqid = 0;
399 			lc->lc_state = 0;
400 			lc->lc_reset(lc);
401 			timeout_add_msec(&sc->sc_handshake_to, 500);
402 			if (rx_head == rx_tail)
403 				break;
404 			/* Discard and ack pending I/O. */
405 			DPRINTF(("setting rx qhead to %lld\n", rx_tail));
406 			err = hv_ldc_rx_set_qhead(lc->lc_id, rx_tail);
407 			if (err == H_EOK)
408 				break;
409 			printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
410 			break;
411 		}
412 		lc->lc_rx_state = rx_state;
413 		return (1);
414 	}
415 
416 	if (rx_head == rx_tail)
417 		return (0);
418 
419 	lp = (struct ldc_pkt *)(lc->lc_rxq->lq_va + rx_head);
420 	switch (lp->type) {
421 	case LDC_CTRL:
422 		ldc_rx_ctrl(lc, lp);
423 		break;
424 
425 	case LDC_DATA:
426 		ldc_rx_data(lc, lp);
427 		break;
428 
429 	default:
430 		DPRINTF(("%0x02/%0x02/%0x02\n", lp->type, lp->stype,
431 		    lp->ctrl));
432 		ldc_reset(lc);
433 		break;
434 	}
435 
436 	if (lc->lc_state == 0)
437 		return (1);
438 
439 	rx_head += sizeof(*lp);
440 	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
441 	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
442 	if (err != H_EOK)
443 		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
444 
445 	return (1);
446 }
447 
448 void
449 vnet_handshake(void *arg)
450 {
451 	struct vnet_softc *sc = arg;
452 
453 	ldc_send_vers(&sc->sc_lc);
454 }
455 
456 void
457 vio_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
458 {
459 	struct vio_msg *vm = (struct vio_msg *)lp;
460 
461 	switch (vm->type) {
462 	case VIO_TYPE_CTRL:
463 		if ((lp->env & LDC_FRAG_START) == 0 &&
464 		    (lp->env & LDC_FRAG_STOP) == 0)
465 			return;
466 		vnet_rx_vio_ctrl(lc->lc_sc, vm);
467 		break;
468 
469 	case VIO_TYPE_DATA:
470 		if((lp->env & LDC_FRAG_START) == 0)
471 			return;
472 		vnet_rx_vio_data(lc->lc_sc, vm);
473 		break;
474 
475 	default:
476 		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
477 		ldc_reset(lc);
478 		break;
479 	}
480 }
481 
482 void
483 vnet_rx_vio_ctrl(struct vnet_softc *sc, struct vio_msg *vm)
484 {
485 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
486 
487 	switch (tag->stype_env) {
488 	case VIO_VER_INFO:
489 		vnet_rx_vio_ver_info(sc, tag);
490 		break;
491 	case VIO_ATTR_INFO:
492 		vnet_rx_vio_attr_info(sc, tag);
493 		break;
494 	case VIO_DRING_REG:
495 		vnet_rx_vio_dring_reg(sc, tag);
496 		break;
497 	case VIO_RDX:
498 		vnet_rx_vio_rdx(sc, tag);
499 		break;
500 	default:
501 		DPRINTF(("CTRL/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
502 		break;
503 	}
504 }
505 
506 void
507 vnet_rx_vio_ver_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
508 {
509 	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
510 
511 	switch (vi->tag.stype) {
512 	case VIO_SUBTYPE_INFO:
513 		DPRINTF(("CTRL/INFO/VER_INFO\n"));
514 
515 		/* Make sure we're talking to a virtual network device. */
516 		if (vi->dev_class != VDEV_NETWORK &&
517 		    vi->dev_class != VDEV_NETWORK_SWITCH) {
518 			/* Huh, we're not talking to a network device? */
519 			printf("Not a network device\n");
520 			vi->tag.stype = VIO_SUBTYPE_NACK;
521 			vnet_sendmsg(sc, vi, sizeof(*vi));
522 			return;
523 		}
524 
525 		if (vi->major != VNET_MAJOR) {
526 			vi->tag.stype = VIO_SUBTYPE_NACK;
527 			vi->major = VNET_MAJOR;
528 			vi->minor = VNET_MINOR;
529 			vnet_sendmsg(sc, vi, sizeof(*vi));
530 			return;
531 		}
532 
533 		vi->tag.stype = VIO_SUBTYPE_ACK;
534 		vi->tag.sid = sc->sc_local_sid;
535 		vi->minor = VNET_MINOR;
536 		vnet_sendmsg(sc, vi, sizeof(*vi));
537 		sc->sc_vio_state |= VIO_RCV_VER_INFO;
538 		break;
539 
540 	case VIO_SUBTYPE_ACK:
541 		DPRINTF(("CTRL/ACK/VER_INFO\n"));
542 		if (!ISSET(sc->sc_vio_state, VIO_SND_VER_INFO)) {
543 			ldc_reset(&sc->sc_lc);
544 			break;
545 		}
546 		sc->sc_vio_state |= VIO_ACK_VER_INFO;
547 		break;
548 
549 	default:
550 		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
551 		break;
552 	}
553 
554 	if (ISSET(sc->sc_vio_state, VIO_RCV_VER_INFO) &&
555 	    ISSET(sc->sc_vio_state, VIO_ACK_VER_INFO))
556 		vnet_send_attr_info(sc);
557 }
558 
559 void
560 vnet_rx_vio_attr_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
561 {
562 	struct vnet_attr_info *ai = (struct vnet_attr_info *)tag;
563 
564 	switch (ai->tag.stype) {
565 	case VIO_SUBTYPE_INFO:
566 		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
567 		sc->sc_xfer_mode = ai->xfer_mode;
568 
569 		ai->tag.stype = VIO_SUBTYPE_ACK;
570 		ai->tag.sid = sc->sc_local_sid;
571 		vnet_sendmsg(sc, ai, sizeof(*ai));
572 		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
573 		break;
574 
575 	case VIO_SUBTYPE_ACK:
576 		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
577 		if (!ISSET(sc->sc_vio_state, VIO_SND_ATTR_INFO)) {
578 			ldc_reset(&sc->sc_lc);
579 			break;
580 		}
581 		sc->sc_vio_state |= VIO_ACK_ATTR_INFO;
582 		break;
583 
584 	default:
585 		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
586 		break;
587 	}
588 
589 	if (ISSET(sc->sc_vio_state, VIO_RCV_ATTR_INFO) &&
590 	    ISSET(sc->sc_vio_state, VIO_ACK_ATTR_INFO)) {
591 		if (sc->sc_xfer_mode == VIO_DRING_MODE)
592 			vnet_send_dring_reg(sc);
593 		else
594 			vio_send_rdx(sc);
595 	}
596 }
597 
598 void
599 vnet_rx_vio_dring_reg(struct vnet_softc *sc, struct vio_msg_tag *tag)
600 {
601 	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
602 
603 	switch (dr->tag.stype) {
604 	case VIO_SUBTYPE_INFO:
605 		DPRINTF(("CTRL/INFO/DRING_REG\n"));
606 
607 		sc->sc_peer_dring_nentries = dr->num_descriptors;
608 		sc->sc_peer_desc_size = dr->descriptor_size;
609 		sc->sc_peer_dring_cookie = dr->cookie[0];
610 
611 		dr->tag.stype = VIO_SUBTYPE_ACK;
612 		dr->tag.sid = sc->sc_local_sid;
613 		vnet_sendmsg(sc, dr, sizeof(*dr));
614 		sc->sc_vio_state |= VIO_RCV_DRING_REG;
615 		break;
616 
617 	case VIO_SUBTYPE_ACK:
618 		DPRINTF(("CTRL/ACK/DRING_REG\n"));
619 		if (!ISSET(sc->sc_vio_state, VIO_SND_DRING_REG)) {
620 			ldc_reset(&sc->sc_lc);
621 			break;
622 		}
623 
624 		sc->sc_dring_ident = dr->dring_ident;
625 		sc->sc_seq_no = 1;
626 
627 		sc->sc_vio_state |= VIO_ACK_DRING_REG;
628 		break;
629 
630 	default:
631 		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
632 		break;
633 	}
634 
635 	if (ISSET(sc->sc_vio_state, VIO_RCV_DRING_REG) &&
636 	    ISSET(sc->sc_vio_state, VIO_ACK_DRING_REG))
637 		vio_send_rdx(sc);
638 }
639 
640 void
641 vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *tag)
642 {
643 	struct ifnet *ifp = &sc->sc_ac.ac_if;
644 
645 	switch(tag->stype) {
646 	case VIO_SUBTYPE_INFO:
647 		DPRINTF(("CTRL/INFO/RDX\n"));
648 
649 		tag->stype = VIO_SUBTYPE_ACK;
650 		tag->sid = sc->sc_local_sid;
651 		vnet_sendmsg(sc, tag, sizeof(*tag));
652 		sc->sc_vio_state |= VIO_RCV_RDX;
653 		break;
654 
655 	case VIO_SUBTYPE_ACK:
656 		DPRINTF(("CTRL/ACK/RDX\n"));
657 		if (!ISSET(sc->sc_vio_state, VIO_SND_RDX)) {
658 			ldc_reset(&sc->sc_lc);
659 			break;
660 		}
661 		sc->sc_vio_state |= VIO_ACK_RDX;
662 		break;
663 
664 	default:
665 		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
666 		break;
667 	}
668 
669 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
670 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
671 		/* Link is up! */
672 		vnet_link_state(sc);
673 
674 		/* Configure multicast now that we can. */
675 		vnet_setmulti(sc, 1);
676 
677 		KERNEL_LOCK();
678 		ifq_clr_oactive(&ifp->if_snd);
679 		vnet_start(ifp);
680 		KERNEL_UNLOCK();
681 	}
682 }
683 
684 void
685 vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *vm)
686 {
687 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
688 
689 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
690 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
691 		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
692 		    tag->stype_env));
693 		return;
694 	}
695 
696 	switch(tag->stype_env) {
697 	case VIO_DESC_DATA:
698 		vnet_rx_vio_desc_data(sc, tag);
699 		break;
700 
701 	case VIO_DRING_DATA:
702 		vnet_rx_vio_dring_data(sc, tag);
703 		break;
704 
705 	default:
706 		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
707 		break;
708 	}
709 }
710 
711 void
712 vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
713 {
714 	struct vnet_desc_msg *dm = (struct vnet_desc_msg *)tag;
715 	struct ldc_conn *lc = &sc->sc_lc;
716 	struct ldc_map *map = sc->sc_lm;
717 	struct ifnet *ifp = &sc->sc_ac.ac_if;
718 	struct mbuf_list ml = MBUF_LIST_INITIALIZER();
719 	struct mbuf *m;
720 	caddr_t buf;
721 	paddr_t pa;
722 	psize_t nbytes;
723 	u_int cons;
724 	int err;
725 
726 	switch(tag->stype) {
727 	case VIO_SUBTYPE_INFO:
728 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
729 		if (buf == NULL) {
730 			ifp->if_ierrors++;
731 			goto skip;
732 		}
733 		nbytes = roundup(dm->nbytes, 8);
734 
735 		if (dm->nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
736 			ifp->if_ierrors++;
737 			goto skip;
738 		}
739 
740 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
741 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
742 		    dm->cookie[0].addr, pa, nbytes, &nbytes);
743 		if (err != H_EOK) {
744 			pool_put(&sc->sc_pool, buf);
745 			ifp->if_ierrors++;
746 			goto skip;
747 		}
748 
749 		/* Stupid OBP doesn't align properly. */
750                 m = m_devget(buf, dm->nbytes, ETHER_ALIGN);
751 		pool_put(&sc->sc_pool, buf);
752 		if (m == NULL) {
753 			ifp->if_ierrors++;
754 			goto skip;
755 		}
756 
757 		/* Pass it on. */
758 		ml_enqueue(&ml, m);
759 		if_input(ifp, &ml);
760 
761 	skip:
762 		dm->tag.stype = VIO_SUBTYPE_ACK;
763 		dm->tag.sid = sc->sc_local_sid;
764 		vnet_sendmsg(sc, dm, sizeof(*dm));
765 		break;
766 
767 	case VIO_SUBTYPE_ACK:
768 		DPRINTF(("DATA/ACK/DESC_DATA\n"));
769 
770 		if (dm->desc_handle != sc->sc_tx_cons) {
771 			printf("out of order\n");
772 			return;
773 		}
774 
775 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
776 
777 		map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
778 		atomic_dec_int(&map->lm_count);
779 
780 		pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
781 		sc->sc_vsd[cons].vsd_buf = NULL;
782 
783 		sc->sc_tx_cons++;
784 		break;
785 
786 	case VIO_SUBTYPE_NACK:
787 		DPRINTF(("DATA/NACK/DESC_DATA\n"));
788 		break;
789 
790 	default:
791 		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
792 		break;
793 	}
794 }
795 
796 void
797 vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
798 {
799 	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
800 	struct ldc_conn *lc = &sc->sc_lc;
801 	struct ifnet *ifp = &sc->sc_ac.ac_if;
802 	struct mbuf *m;
803 	paddr_t pa;
804 	psize_t nbytes;
805 	int err;
806 
807 	switch(tag->stype) {
808 	case VIO_SUBTYPE_INFO:
809 	{
810 		struct vnet_desc desc;
811 		uint64_t cookie;
812 		paddr_t desc_pa;
813 		int idx, ack_end_idx = -1;
814 		struct mbuf_list ml = MBUF_LIST_INITIALIZER();
815 
816 		idx = dm->start_idx;
817 		for (;;) {
818 			cookie = sc->sc_peer_dring_cookie.addr;
819 			cookie += idx * sc->sc_peer_desc_size;
820 			nbytes = sc->sc_peer_desc_size;
821 			pmap_extract(pmap_kernel(), (vaddr_t)&desc, &desc_pa);
822 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN, cookie,
823 			    desc_pa, nbytes, &nbytes);
824 			if (err != H_EOK) {
825 				printf("hv_ldc_copy_in %d\n", err);
826 				break;
827 			}
828 
829 			if (desc.hdr.dstate != VIO_DESC_READY)
830 				break;
831 
832 			if (desc.nbytes > (ETHER_MAX_LEN - ETHER_CRC_LEN)) {
833 				ifp->if_ierrors++;
834 				goto skip;
835 			}
836 
837 			m = MCLGETL(NULL, M_DONTWAIT, desc.nbytes);
838 			if (!m)
839 				break;
840 			m->m_len = m->m_pkthdr.len = desc.nbytes;
841 			nbytes = roundup(desc.nbytes + VNET_ETHER_ALIGN, 8);
842 
843 			pmap_extract(pmap_kernel(), (vaddr_t)m->m_data, &pa);
844 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
845 			    desc.cookie[0].addr, pa, nbytes, &nbytes);
846 			if (err != H_EOK) {
847 				m_freem(m);
848 				goto skip;
849 			}
850 			m->m_data += VNET_ETHER_ALIGN;
851 
852 			ml_enqueue(&ml, m);
853 
854 		skip:
855 			desc.hdr.dstate = VIO_DESC_DONE;
856 			nbytes = sc->sc_peer_desc_size;
857 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT, cookie,
858 			    desc_pa, nbytes, &nbytes);
859 			if (err != H_EOK)
860 				printf("hv_ldc_copy_out %d\n", err);
861 
862 			ack_end_idx = idx;
863 			if (++idx == sc->sc_peer_dring_nentries)
864 				idx = 0;
865 		}
866 
867 		if_input(ifp, &ml);
868 
869 		if (ack_end_idx == -1) {
870 			dm->tag.stype = VIO_SUBTYPE_NACK;
871 		} else {
872 			dm->tag.stype = VIO_SUBTYPE_ACK;
873 			dm->end_idx = ack_end_idx;
874 		}
875 		dm->tag.sid = sc->sc_local_sid;
876 		dm->proc_state = VIO_DP_STOPPED;
877 		vnet_sendmsg(sc, dm, sizeof(*dm));
878 		break;
879 	}
880 
881 	case VIO_SUBTYPE_ACK:
882 	{
883 		struct ldc_map *map = sc->sc_lm;
884 		u_int cons, count;
885 
886 		sc->sc_peer_state = dm->proc_state;
887 
888 		cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
889 		while (sc->sc_vd->vd_desc[cons].hdr.dstate == VIO_DESC_DONE) {
890 			map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
891 			atomic_dec_int(&map->lm_count);
892 
893 			pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
894 			sc->sc_vsd[cons].vsd_buf = NULL;
895 
896 			sc->sc_vd->vd_desc[cons].hdr.dstate = VIO_DESC_FREE;
897 			sc->sc_tx_cons++;
898 			cons = sc->sc_tx_cons & (sc->sc_vd->vd_nentries - 1);
899 		}
900 
901 		count = sc->sc_tx_prod - sc->sc_tx_cons;
902 		if (count > 0 && sc->sc_peer_state != VIO_DP_ACTIVE)
903 			vnet_send_dring_data(sc, cons);
904 
905 		KERNEL_LOCK();
906 		if (count < (sc->sc_vd->vd_nentries - 1))
907 			ifq_clr_oactive(&ifp->if_snd);
908 		if (count == 0)
909 			ifp->if_timer = 0;
910 
911 		vnet_start(ifp);
912 		KERNEL_UNLOCK();
913 		break;
914 	}
915 
916 	case VIO_SUBTYPE_NACK:
917 		DPRINTF(("DATA/NACK/DRING_DATA\n"));
918 		sc->sc_peer_state = VIO_DP_STOPPED;
919 		break;
920 
921 	default:
922 		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
923 		break;
924 	}
925 }
926 
927 void
928 vnet_ldc_reset(struct ldc_conn *lc)
929 {
930 	struct vnet_softc *sc = lc->lc_sc;
931 	int i;
932 
933 	timeout_del(&sc->sc_handshake_to);
934 	sc->sc_tx_prod = sc->sc_tx_cons = 0;
935 	sc->sc_peer_state = VIO_DP_STOPPED;
936 	sc->sc_vio_state = 0;
937 	vnet_link_state(sc);
938 
939 	sc->sc_lm->lm_next = 1;
940 	sc->sc_lm->lm_count = 1;
941 	for (i = 1; i < sc->sc_lm->lm_nentries; i++)
942 		sc->sc_lm->lm_slot[i].entry = 0;
943 
944 	for (i = 0; i < sc->sc_vd->vd_nentries; i++) {
945 		if (sc->sc_vsd[i].vsd_buf) {
946 			pool_put(&sc->sc_pool, sc->sc_vsd[i].vsd_buf);
947 			sc->sc_vsd[i].vsd_buf = NULL;
948 		}
949 		sc->sc_vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
950 	}
951 }
952 
953 void
954 vnet_ldc_start(struct ldc_conn *lc)
955 {
956 	struct vnet_softc *sc = lc->lc_sc;
957 
958 	timeout_del(&sc->sc_handshake_to);
959 	vnet_send_ver_info(sc, VNET_MAJOR, VNET_MINOR);
960 }
961 
962 void
963 vnet_sendmsg(struct vnet_softc *sc, void *msg, size_t len)
964 {
965 	struct ldc_conn *lc = &sc->sc_lc;
966 	int err;
967 
968 	err = ldc_send_unreliable(lc, msg, len);
969 	if (err)
970 		printf("%s: ldc_send_unreliable: %d\n", __func__, err);
971 }
972 
973 void
974 vnet_send_ver_info(struct vnet_softc *sc, uint16_t major, uint16_t minor)
975 {
976 	struct vio_ver_info vi;
977 
978 	bzero(&vi, sizeof(vi));
979 	vi.tag.type = VIO_TYPE_CTRL;
980 	vi.tag.stype = VIO_SUBTYPE_INFO;
981 	vi.tag.stype_env = VIO_VER_INFO;
982 	vi.tag.sid = sc->sc_local_sid;
983 	vi.major = major;
984 	vi.minor = minor;
985 	vi.dev_class = VDEV_NETWORK;
986 	vnet_sendmsg(sc, &vi, sizeof(vi));
987 
988 	sc->sc_vio_state |= VIO_SND_VER_INFO;
989 }
990 
991 void
992 vnet_send_attr_info(struct vnet_softc *sc)
993 {
994 	struct vnet_attr_info ai;
995 	int i;
996 
997 	bzero(&ai, sizeof(ai));
998 	ai.tag.type = VIO_TYPE_CTRL;
999 	ai.tag.stype = VIO_SUBTYPE_INFO;
1000 	ai.tag.stype_env = VIO_ATTR_INFO;
1001 	ai.tag.sid = sc->sc_local_sid;
1002 	ai.xfer_mode = VIO_DRING_MODE;
1003 	ai.addr_type = VNET_ADDR_ETHERMAC;
1004 	ai.ack_freq = 0;
1005 	ai.addr = 0;
1006 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
1007 		ai.addr <<= 8;
1008 		ai.addr |= sc->sc_ac.ac_enaddr[i];
1009 	}
1010 	ai.mtu = ETHER_MAX_LEN - ETHER_CRC_LEN;
1011 	vnet_sendmsg(sc, &ai, sizeof(ai));
1012 
1013 	sc->sc_vio_state |= VIO_SND_ATTR_INFO;
1014 }
1015 
1016 void
1017 vnet_send_dring_reg(struct vnet_softc *sc)
1018 {
1019 	struct vio_dring_reg dr;
1020 
1021 	bzero(&dr, sizeof(dr));
1022 	dr.tag.type = VIO_TYPE_CTRL;
1023 	dr.tag.stype = VIO_SUBTYPE_INFO;
1024 	dr.tag.stype_env = VIO_DRING_REG;
1025 	dr.tag.sid = sc->sc_local_sid;
1026 	dr.dring_ident = 0;
1027 	dr.num_descriptors = sc->sc_vd->vd_nentries;
1028 	dr.descriptor_size = sizeof(struct vnet_desc);
1029 	dr.options = VIO_TX_RING;
1030 	dr.ncookies = 1;
1031 	dr.cookie[0].addr = 0;
1032 	dr.cookie[0].size = PAGE_SIZE;
1033 	vnet_sendmsg(sc, &dr, sizeof(dr));
1034 
1035 	sc->sc_vio_state |= VIO_SND_DRING_REG;
1036 };
1037 
1038 void
1039 vio_send_rdx(struct vnet_softc *sc)
1040 {
1041 	struct vio_msg_tag tag;
1042 
1043 	tag.type = VIO_TYPE_CTRL;
1044 	tag.stype = VIO_SUBTYPE_INFO;
1045 	tag.stype_env = VIO_RDX;
1046 	tag.sid = sc->sc_local_sid;
1047 	vnet_sendmsg(sc, &tag, sizeof(tag));
1048 
1049 	sc->sc_vio_state |= VIO_SND_RDX;
1050 }
1051 
1052 void
1053 vnet_send_dring_data(struct vnet_softc *sc, uint32_t start_idx)
1054 {
1055 	struct vio_dring_msg dm;
1056 	u_int peer_state;
1057 
1058 	peer_state = atomic_swap_uint(&sc->sc_peer_state, VIO_DP_ACTIVE);
1059 	if (peer_state == VIO_DP_ACTIVE)
1060 		return;
1061 
1062 	bzero(&dm, sizeof(dm));
1063 	dm.tag.type = VIO_TYPE_DATA;
1064 	dm.tag.stype = VIO_SUBTYPE_INFO;
1065 	dm.tag.stype_env = VIO_DRING_DATA;
1066 	dm.tag.sid = sc->sc_local_sid;
1067 	dm.seq_no = sc->sc_seq_no++;
1068 	dm.dring_ident = sc->sc_dring_ident;
1069 	dm.start_idx = start_idx;
1070 	dm.end_idx = -1;
1071 	vnet_sendmsg(sc, &dm, sizeof(dm));
1072 }
1073 
1074 void
1075 vnet_start(struct ifnet *ifp)
1076 {
1077 	struct vnet_softc *sc = ifp->if_softc;
1078 	struct ldc_conn *lc = &sc->sc_lc;
1079 	struct ldc_map *map = sc->sc_lm;
1080 	struct mbuf *m;
1081 	paddr_t pa;
1082 	caddr_t buf;
1083 	uint64_t tx_head, tx_tail, tx_state;
1084 	u_int start, prod, count;
1085 	int err;
1086 
1087 	if (!(ifp->if_flags & IFF_RUNNING) || ifq_is_oactive(&ifp->if_snd))
1088 		return;
1089 
1090 	if (ifq_empty(&ifp->if_snd))
1091 		return;
1092 
1093 	/*
1094 	 * We cannot transmit packets until a VIO connection has been
1095 	 * established.
1096 	 */
1097 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1098 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1099 		return;
1100 
1101 	/*
1102 	 * Make sure there is room in the LDC transmit queue to send a
1103 	 * DRING_DATA message.
1104 	 */
1105 	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
1106 	if (err != H_EOK)
1107 		return;
1108 	tx_tail += sizeof(struct ldc_pkt);
1109 	tx_tail &= ((lc->lc_txq->lq_nentries * sizeof(struct ldc_pkt)) - 1);
1110 	if (tx_tail == tx_head) {
1111 		ifq_set_oactive(&ifp->if_snd);
1112 		return;
1113 	}
1114 
1115 	if (sc->sc_xfer_mode == VIO_DESC_MODE) {
1116 		vnet_start_desc(ifp);
1117 		return;
1118 	}
1119 
1120 	start = prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1121 	while (sc->sc_vd->vd_desc[prod].hdr.dstate == VIO_DESC_FREE) {
1122 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1123 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1124 		    map->lm_count >= map->lm_nentries) {
1125 			ifq_set_oactive(&ifp->if_snd);
1126 			break;
1127 		}
1128 
1129 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1130 		if (buf == NULL) {
1131 			ifq_set_oactive(&ifp->if_snd);
1132 			break;
1133 		}
1134 
1135 		m = ifq_dequeue(&ifp->if_snd);
1136 		if (m == NULL) {
1137 			pool_put(&sc->sc_pool, buf);
1138 			break;
1139 		}
1140 
1141 		m_copydata(m, 0, m->m_pkthdr.len, buf + VNET_ETHER_ALIGN);
1142 
1143 #if NBPFILTER > 0
1144 		/*
1145 		 * If BPF is listening on this interface, let it see the
1146 		 * packet before we commit it to the wire.
1147 		 */
1148 		if (ifp->if_bpf)
1149 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1150 #endif
1151 
1152 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1153 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1154 		while (map->lm_slot[map->lm_next].entry != 0) {
1155 			map->lm_next++;
1156 			map->lm_next &= (map->lm_nentries - 1);
1157 		}
1158 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1159 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1160 		atomic_inc_int(&map->lm_count);
1161 
1162 		sc->sc_vd->vd_desc[prod].nbytes = max(m->m_pkthdr.len, 60);
1163 		sc->sc_vd->vd_desc[prod].ncookies = 1;
1164 		sc->sc_vd->vd_desc[prod].cookie[0].addr =
1165 		    map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1166 		sc->sc_vd->vd_desc[prod].cookie[0].size = 2048;
1167 		membar_producer();
1168 		sc->sc_vd->vd_desc[prod].hdr.dstate = VIO_DESC_READY;
1169 
1170 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1171 		sc->sc_vsd[prod].vsd_buf = buf;
1172 
1173 		sc->sc_tx_prod++;
1174 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1175 
1176 		m_freem(m);
1177 	}
1178 
1179 	membar_producer();
1180 
1181 	if (start != prod && sc->sc_peer_state != VIO_DP_ACTIVE) {
1182 		vnet_send_dring_data(sc, start);
1183 		ifp->if_timer = 5;
1184 	}
1185 }
1186 
1187 void
1188 vnet_start_desc(struct ifnet *ifp)
1189 {
1190 	struct vnet_softc *sc = ifp->if_softc;
1191 	struct ldc_map *map = sc->sc_lm;
1192 	struct vnet_desc_msg dm;
1193 	struct mbuf *m;
1194 	paddr_t pa;
1195 	caddr_t buf;
1196 	u_int prod, count;
1197 
1198 	for (;;) {
1199 		count = sc->sc_tx_prod - sc->sc_tx_cons;
1200 		if (count >= (sc->sc_vd->vd_nentries - 1) ||
1201 		    map->lm_count >= map->lm_nentries) {
1202 			ifq_set_oactive(&ifp->if_snd);
1203 			return;
1204 		}
1205 
1206 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1207 		if (buf == NULL) {
1208 			ifq_set_oactive(&ifp->if_snd);
1209 			return;
1210 		}
1211 
1212 		m = ifq_dequeue(&ifp->if_snd);
1213 		if (m == NULL) {
1214 			pool_put(&sc->sc_pool, buf);
1215 			return;
1216 		}
1217 
1218 		m_copydata(m, 0, m->m_pkthdr.len, buf);
1219 
1220 #if NBPFILTER > 0
1221 		/*
1222 		 * If BPF is listening on this interface, let it see the
1223 		 * packet before we commit it to the wire.
1224 		 */
1225 		if (ifp->if_bpf)
1226 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1227 #endif
1228 
1229 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1230 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1231 		while (map->lm_slot[map->lm_next].entry != 0) {
1232 			map->lm_next++;
1233 			map->lm_next &= (map->lm_nentries - 1);
1234 		}
1235 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1236 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1237 		atomic_inc_int(&map->lm_count);
1238 
1239 		prod = sc->sc_tx_prod & (sc->sc_vd->vd_nentries - 1);
1240 		sc->sc_vsd[prod].vsd_map_idx = map->lm_next;
1241 		sc->sc_vsd[prod].vsd_buf = buf;
1242 
1243 		bzero(&dm, sizeof(dm));
1244 		dm.tag.type = VIO_TYPE_DATA;
1245 		dm.tag.stype = VIO_SUBTYPE_INFO;
1246 		dm.tag.stype_env = VIO_DESC_DATA;
1247 		dm.tag.sid = sc->sc_local_sid;
1248 		dm.seq_no = sc->sc_seq_no++;
1249 		dm.desc_handle = sc->sc_tx_prod;
1250 		dm.nbytes = max(m->m_pkthdr.len, 60);
1251 		dm.ncookies = 1;
1252 		dm.cookie[0].addr =
1253 			map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1254 		dm.cookie[0].size = 2048;
1255 		vnet_sendmsg(sc, &dm, sizeof(dm));
1256 
1257 		sc->sc_tx_prod++;
1258 		sc->sc_tx_prod &= (sc->sc_vd->vd_nentries - 1);
1259 
1260 		m_freem(m);
1261 	}
1262 }
1263 
1264 int
1265 vnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1266 {
1267 	struct vnet_softc *sc = ifp->if_softc;
1268 	struct ifreq *ifr = (struct ifreq *)data;
1269 	int s, error = 0;
1270 
1271 	s = splnet();
1272 
1273 	switch (cmd) {
1274 	case SIOCSIFADDR:
1275 		ifp->if_flags |= IFF_UP;
1276 		/* FALLTHROUGH */
1277 	case SIOCSIFFLAGS:
1278 		if (ifp->if_flags & IFF_UP) {
1279 			if ((ifp->if_flags & IFF_RUNNING) == 0)
1280 				vnet_init(ifp);
1281 		} else {
1282 			if (ifp->if_flags & IFF_RUNNING)
1283 				vnet_stop(ifp);
1284 		}
1285 		break;
1286 
1287 	case SIOCGIFMEDIA:
1288 	case SIOCSIFMEDIA:
1289 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1290 		break;
1291 
1292 	case SIOCADDMULTI:
1293 	case SIOCDELMULTI:
1294 		/*
1295 		 * XXX Removing all multicast addresses and adding
1296 		 * most of them back, is somewhat retarded.
1297 		 */
1298 		vnet_setmulti(sc, 0);
1299 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
1300 		vnet_setmulti(sc, 1);
1301 		if (error == ENETRESET)
1302 			error = 0;
1303 		break;
1304 
1305 	default:
1306 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
1307 	}
1308 
1309 	splx(s);
1310 	return (error);
1311 }
1312 
1313 void
1314 vnet_watchdog(struct ifnet *ifp)
1315 {
1316 	struct vnet_softc *sc = ifp->if_softc;
1317 
1318 	printf("%s: watchdog timeout\n", sc->sc_dv.dv_xname);
1319 }
1320 
1321 int
1322 vnet_media_change(struct ifnet *ifp)
1323 {
1324 	return (0);
1325 }
1326 
1327 void
1328 vnet_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1329 {
1330 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1331 	imr->ifm_status = IFM_AVALID;
1332 
1333 	if (LINK_STATE_IS_UP(ifp->if_link_state) &&
1334 	    ifp->if_flags & IFF_UP)
1335 		imr->ifm_status |= IFM_ACTIVE;
1336 }
1337 
1338 void
1339 vnet_link_state(struct vnet_softc *sc)
1340 {
1341 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1342 	int link_state = LINK_STATE_DOWN;
1343 
1344 	KERNEL_LOCK();
1345 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
1346 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1347 		link_state = LINK_STATE_FULL_DUPLEX;
1348 	if (ifp->if_link_state != link_state) {
1349 		ifp->if_link_state = link_state;
1350 		if_link_state_change(ifp);
1351 	}
1352 	KERNEL_UNLOCK();
1353 }
1354 
1355 void
1356 vnet_setmulti(struct vnet_softc *sc, int set)
1357 {
1358 	struct arpcom *ac = &sc->sc_ac;
1359 	struct ether_multi *enm;
1360 	struct ether_multistep step;
1361 	struct vnet_mcast_info mi;
1362 	int count = 0;
1363 
1364 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1365 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1366 		return;
1367 
1368 	bzero(&mi, sizeof(mi));
1369 	mi.tag.type = VIO_TYPE_CTRL;
1370 	mi.tag.stype = VIO_SUBTYPE_INFO;
1371 	mi.tag.stype_env = VNET_MCAST_INFO;
1372 	mi.tag.sid = sc->sc_local_sid;
1373 	mi.set = set ? 1 : 0;
1374 	KERNEL_LOCK();
1375 	ETHER_FIRST_MULTI(step, ac, enm);
1376 	while (enm != NULL) {
1377 		/* XXX What about multicast ranges? */
1378 		bcopy(enm->enm_addrlo, mi.mcast_addr[count], ETHER_ADDR_LEN);
1379 		ETHER_NEXT_MULTI(step, enm);
1380 
1381 		count++;
1382 		if (count < VNET_NUM_MCAST)
1383 			continue;
1384 
1385 		mi.count = VNET_NUM_MCAST;
1386 		vnet_sendmsg(sc, &mi, sizeof(mi));
1387 		count = 0;
1388 	}
1389 
1390 	if (count > 0) {
1391 		mi.count = count;
1392 		vnet_sendmsg(sc, &mi, sizeof(mi));
1393 	}
1394 	KERNEL_UNLOCK();
1395 }
1396 
1397 void
1398 vnet_init(struct ifnet *ifp)
1399 {
1400 	struct vnet_softc *sc = ifp->if_softc;
1401 	struct ldc_conn *lc = &sc->sc_lc;
1402 	int err;
1403 
1404 	sc->sc_lm = ldc_map_alloc(sc->sc_dmatag, 2048);
1405 	if (sc->sc_lm == NULL)
1406 		return;
1407 
1408 	err = hv_ldc_set_map_table(lc->lc_id,
1409 	    sc->sc_lm->lm_map->dm_segs[0].ds_addr, sc->sc_lm->lm_nentries);
1410 	if (err != H_EOK) {
1411 		printf("hv_ldc_set_map_table %d\n", err);
1412 		return;
1413 	}
1414 
1415 	sc->sc_vd = vnet_dring_alloc(sc->sc_dmatag, VNET_NUM_SOFT_DESC);
1416 	if (sc->sc_vd == NULL)
1417 		return;
1418 	sc->sc_vsd = malloc(VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd), M_DEVBUF,
1419 	    M_NOWAIT|M_ZERO);
1420 	if (sc->sc_vsd == NULL)
1421 		return;
1422 
1423 	sc->sc_lm->lm_slot[0].entry = sc->sc_vd->vd_map->dm_segs[0].ds_addr;
1424 	sc->sc_lm->lm_slot[0].entry &= LDC_MTE_RA_MASK;
1425 	sc->sc_lm->lm_slot[0].entry |= LDC_MTE_CPR | LDC_MTE_CPW;
1426 	sc->sc_lm->lm_next = 1;
1427 	sc->sc_lm->lm_count = 1;
1428 
1429 	err = hv_ldc_tx_qconf(lc->lc_id,
1430 	    lc->lc_txq->lq_map->dm_segs[0].ds_addr, lc->lc_txq->lq_nentries);
1431 	if (err != H_EOK)
1432 		printf("hv_ldc_tx_qconf %d\n", err);
1433 
1434 	err = hv_ldc_rx_qconf(lc->lc_id,
1435 	    lc->lc_rxq->lq_map->dm_segs[0].ds_addr, lc->lc_rxq->lq_nentries);
1436 	if (err != H_EOK)
1437 		printf("hv_ldc_rx_qconf %d\n", err);
1438 
1439 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_ENABLED);
1440 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_ENABLED);
1441 
1442 	ldc_send_vers(lc);
1443 
1444 	ifp->if_flags |= IFF_RUNNING;
1445 }
1446 
1447 void
1448 vnet_stop(struct ifnet *ifp)
1449 {
1450 	struct vnet_softc *sc = ifp->if_softc;
1451 	struct ldc_conn *lc = &sc->sc_lc;
1452 
1453 	ifp->if_flags &= ~IFF_RUNNING;
1454 	ifq_clr_oactive(&ifp->if_snd);
1455 	ifp->if_timer = 0;
1456 
1457 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_tx_ino, INTR_DISABLED);
1458 	cbus_intr_setenabled(sc->sc_bustag, sc->sc_rx_ino, INTR_DISABLED);
1459 
1460 	intr_barrier(sc->sc_tx_ih);
1461 	intr_barrier(sc->sc_rx_ih);
1462 
1463 	hv_ldc_tx_qconf(lc->lc_id, 0, 0);
1464 	hv_ldc_rx_qconf(lc->lc_id, 0, 0);
1465 	lc->lc_tx_seqid = 0;
1466 	lc->lc_state = 0;
1467 	lc->lc_tx_state = lc->lc_rx_state = LDC_CHANNEL_DOWN;
1468 	vnet_ldc_reset(lc);
1469 
1470 	free(sc->sc_vsd, M_DEVBUF, VNET_NUM_SOFT_DESC * sizeof(*sc->sc_vsd));
1471 
1472 	vnet_dring_free(sc->sc_dmatag, sc->sc_vd);
1473 
1474 	hv_ldc_set_map_table(lc->lc_id, 0, 0);
1475 	ldc_map_free(sc->sc_dmatag, sc->sc_lm);
1476 }
1477 
1478 struct vnet_dring *
1479 vnet_dring_alloc(bus_dma_tag_t t, int nentries)
1480 {
1481 	struct vnet_dring *vd;
1482 	bus_size_t size;
1483 	caddr_t va;
1484 	int nsegs;
1485 	int i;
1486 
1487 	vd = malloc(sizeof(struct vnet_dring), M_DEVBUF, M_NOWAIT);
1488 	if (vd == NULL)
1489 		return NULL;
1490 
1491 	size = roundup(nentries * sizeof(struct vnet_desc), PAGE_SIZE);
1492 
1493 	if (bus_dmamap_create(t, size, 1, size, 0,
1494 	    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &vd->vd_map) != 0)
1495 		return (NULL);
1496 
1497 	if (bus_dmamem_alloc(t, size, PAGE_SIZE, 0, &vd->vd_seg, 1,
1498 	    &nsegs, BUS_DMA_NOWAIT) != 0)
1499 		goto destroy;
1500 
1501 	if (bus_dmamem_map(t, &vd->vd_seg, 1, size, &va,
1502 	    BUS_DMA_NOWAIT) != 0)
1503 		goto free;
1504 
1505 	if (bus_dmamap_load(t, vd->vd_map, va, size, NULL,
1506 	    BUS_DMA_NOWAIT) != 0)
1507 		goto unmap;
1508 
1509 	vd->vd_desc = (struct vnet_desc *)va;
1510 	vd->vd_nentries = nentries;
1511 	bzero(vd->vd_desc, nentries * sizeof(struct vnet_desc));
1512 	for (i = 0; i < vd->vd_nentries; i++)
1513 		vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
1514 	return (vd);
1515 
1516 unmap:
1517 	bus_dmamem_unmap(t, va, size);
1518 free:
1519 	bus_dmamem_free(t, &vd->vd_seg, 1);
1520 destroy:
1521 	bus_dmamap_destroy(t, vd->vd_map);
1522 
1523 	return (NULL);
1524 }
1525 
1526 void
1527 vnet_dring_free(bus_dma_tag_t t, struct vnet_dring *vd)
1528 {
1529 	bus_size_t size;
1530 
1531 	size = vd->vd_nentries * sizeof(struct vnet_desc);
1532 	size = roundup(size, PAGE_SIZE);
1533 
1534 	bus_dmamap_unload(t, vd->vd_map);
1535 	bus_dmamem_unmap(t, (caddr_t)vd->vd_desc, size);
1536 	bus_dmamem_free(t, &vd->vd_seg, 1);
1537 	bus_dmamap_destroy(t, vd->vd_map);
1538 	free(vd, M_DEVBUF, 0);
1539 }
1540