xref: /openbsd/sys/arch/sparc64/dev/vnet.c (revision 3d8817e4)
1 /*	$OpenBSD: vnet.c,v 1.25 2010/04/15 19:47:32 kettenis Exp $	*/
2 /*
3  * Copyright (c) 2009 Mark Kettenis
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include "bpfilter.h"
19 
20 #include <sys/param.h>
21 #include <sys/device.h>
22 #include <sys/malloc.h>
23 #include <sys/mbuf.h>
24 #include <sys/socket.h>
25 #include <sys/sockio.h>
26 #include <sys/systm.h>
27 
28 #include <machine/autoconf.h>
29 #include <machine/hypervisor.h>
30 #include <machine/openfirm.h>
31 
32 #include <net/if.h>
33 #include <net/if_dl.h>
34 #include <net/if_media.h>
35 #include <net/if_types.h>
36 
37 #include <netinet/in.h>
38 #include <netinet/if_ether.h>
39 
40 #if NBPFILTER > 0
41 #include <net/bpf.h>
42 #endif
43 
44 #include <uvm/uvm.h>
45 
46 #include <sparc64/dev/cbusvar.h>
47 #include <sparc64/dev/ldcvar.h>
48 #include <sparc64/dev/viovar.h>
49 
50 /* XXX the following declaration should be elsewhere */
51 extern void myetheraddr(u_char *);
52 
53 #ifdef VNET_DEBUG
54 #define DPRINTF(x)	printf x
55 #else
56 #define DPRINTF(x)
57 #endif
58 
59 #define VNET_TX_ENTRIES		32
60 #define VNET_RX_ENTRIES		32
61 
62 struct vnet_attr_info {
63 	struct vio_msg_tag	tag;
64 	uint8_t			xfer_mode;
65 	uint8_t			addr_type;
66 	uint16_t		ack_freq;
67 	uint32_t		_reserved1;
68 	uint64_t		addr;
69 	uint64_t		mtu;
70 	uint64_t		_reserved2[3];
71 };
72 
73 /* Address types. */
74 #define VNET_ADDR_ETHERMAC	0x01
75 
76 /* Sub-Type envelopes. */
77 #define VNET_MCAST_INFO		0x0101
78 
79 #define VNET_NUM_MCAST		7
80 
81 struct vnet_mcast_info {
82 	struct vio_msg_tag	tag;
83 	uint8_t			set;
84 	uint8_t			count;
85 	uint8_t			mcast_addr[VNET_NUM_MCAST][ETHER_ADDR_LEN];
86 	uint32_t		_reserved;
87 };
88 
89 struct vnet_desc {
90 	struct vio_dring_hdr	hdr;
91 	uint32_t		nbytes;
92 	uint32_t		ncookies;
93 	struct ldc_cookie	cookie[2];
94 };
95 
96 struct vnet_desc_msg {
97 	struct vio_msg_tag	tag;
98 	uint64_t		seq_no;
99 	uint64_t		desc_handle;
100 	uint32_t		nbytes;
101 	uint32_t		ncookies;
102 	struct ldc_cookie	cookie[1];
103 };
104 
105 struct vnet_dring {
106 	bus_dmamap_t		vd_map;
107 	bus_dma_segment_t	vd_seg;
108 	struct vnet_desc	*vd_desc;
109 	int			vd_nentries;
110 };
111 
112 struct vnet_dring *vnet_dring_alloc(bus_dma_tag_t, int);
113 void	vnet_dring_free(bus_dma_tag_t, struct vnet_dring *);
114 
115 /*
116  * For now, we only support vNet 1.0.
117  */
118 #define VNET_MAJOR	1
119 #define VNET_MINOR	0
120 
121 /*
122  * The vNet protocol wants the IP header to be 64-bit aligned, so
123  * define out own variant of ETHER_ALIGN.
124  */
125 #define VNET_ETHER_ALIGN	6
126 
127 struct vnet_soft_desc {
128 	int		vsd_map_idx;
129 	caddr_t		vsd_buf;
130 };
131 
132 struct vnet_softc {
133 	struct device	sc_dv;
134 	bus_space_tag_t	sc_bustag;
135 	bus_dma_tag_t	sc_dmatag;
136 
137 	uint64_t	sc_tx_sysino;
138 	uint64_t	sc_rx_sysino;
139 	void		*sc_tx_ih;
140 	void		*sc_rx_ih;
141 
142 	struct ldc_conn	sc_lc;
143 
144 	uint16_t	sc_vio_state;
145 #define VIO_SND_VER_INFO	0x0001
146 #define VIO_ACK_VER_INFO	0x0002
147 #define VIO_RCV_VER_INFO	0x0004
148 #define VIO_SND_ATTR_INFO	0x0008
149 #define VIO_ACK_ATTR_INFO	0x0010
150 #define VIO_RCV_ATTR_INFO	0x0020
151 #define VIO_SND_DRING_REG	0x0040
152 #define VIO_ACK_DRING_REG	0x0080
153 #define VIO_RCV_DRING_REG	0x0100
154 #define VIO_SND_RDX		0x0200
155 #define VIO_ACK_RDX		0x0400
156 #define VIO_RCV_RDX		0x0800
157 
158 	uint8_t		sc_xfer_mode;
159 
160 	uint32_t	sc_local_sid;
161 	uint64_t	sc_dring_ident;
162 	uint64_t	sc_seq_no;
163 
164 	int		sc_tx_cnt;
165 	int		sc_tx_prod;
166 	int		sc_tx_cons;
167 
168 	uint8_t		sc_peer_state;
169 
170 	struct ldc_map	*sc_lm;
171 	struct vnet_dring *sc_vd;
172 	struct vnet_soft_desc *sc_vsd;
173 
174 	size_t		sc_peer_desc_size;
175 	struct ldc_cookie sc_peer_dring_cookie;
176 	int		sc_peer_dring_nentries;
177 
178 	struct pool	sc_pool;
179 
180 	struct arpcom	sc_ac;
181 	struct ifmedia	sc_media;
182 };
183 
184 int	vnet_match(struct device *, void *, void *);
185 void	vnet_attach(struct device *, struct device *, void *);
186 
187 struct cfattach vnet_ca = {
188 	sizeof(struct vnet_softc), vnet_match, vnet_attach
189 };
190 
191 struct cfdriver vnet_cd = {
192 	NULL, "vnet", DV_IFNET
193 };
194 
195 int	vnet_tx_intr(void *);
196 int	vnet_rx_intr(void *);
197 
198 void	vio_rx_data(struct ldc_conn *, struct ldc_pkt *);
199 void	vnet_rx_vio_ctrl(struct vnet_softc *, struct vio_msg *);
200 void	vnet_rx_vio_ver_info(struct vnet_softc *, struct vio_msg_tag *);
201 void	vnet_rx_vio_attr_info(struct vnet_softc *, struct vio_msg_tag *);
202 void	vnet_rx_vio_dring_reg(struct vnet_softc *, struct vio_msg_tag *);
203 void	vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *);
204 void	vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *);
205 void	vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *);
206 void	vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *);
207 
208 void	vnet_ldc_reset(struct ldc_conn *);
209 void	vnet_ldc_start(struct ldc_conn *);
210 
211 void	vio_sendmsg(struct vnet_softc *, void *, size_t);
212 void	vnet_send_ver_info(struct vnet_softc *, uint16_t, uint16_t);
213 void	vnet_send_attr_info(struct vnet_softc *);
214 void	vnet_send_dring_reg(struct vnet_softc *);
215 void	vio_send_rdx(struct vnet_softc *);
216 void	vnet_send_dring_data(struct vnet_softc *, uint32_t);
217 
218 void	vnet_start(struct ifnet *);
219 void	vnet_start_desc(struct ifnet *);
220 int	vnet_ioctl(struct ifnet *, u_long, caddr_t);
221 void	vnet_watchdog(struct ifnet *);
222 
223 int	vnet_media_change(struct ifnet *);
224 void	vnet_media_status(struct ifnet *, struct ifmediareq *);
225 
226 void	vnet_link_state(struct vnet_softc *sc);
227 
228 void	vnet_setmulti(struct vnet_softc *, int);
229 
230 void	vnet_init(struct ifnet *);
231 void	vnet_stop(struct ifnet *);
232 
233 int
234 vnet_match(struct device *parent, void *match, void *aux)
235 {
236 	struct cbus_attach_args *ca = aux;
237 
238 	if (strcmp(ca->ca_name, "network") == 0)
239 		return (1);
240 
241 	return (0);
242 }
243 
244 void
245 vnet_attach(struct device *parent, struct device *self, void *aux)
246 {
247 	struct vnet_softc *sc = (struct vnet_softc *)self;
248 	struct cbus_attach_args *ca = aux;
249 	struct ldc_conn *lc;
250 	struct ifnet *ifp;
251 
252 	sc->sc_bustag = ca->ca_bustag;
253 	sc->sc_dmatag = ca->ca_dmatag;
254 
255 	if (OF_getprop(ca->ca_node, "local-mac-address", sc->sc_ac.ac_enaddr,
256 	    ETHER_ADDR_LEN) <= 0)
257 		myetheraddr(sc->sc_ac.ac_enaddr);
258 
259 	if (cbus_intr_map(ca->ca_node, ca->ca_tx_ino, &sc->sc_tx_sysino) ||
260 	    cbus_intr_map(ca->ca_node, ca->ca_rx_ino, &sc->sc_rx_sysino)) {
261 		printf(": can't map interrupt\n");
262 		return;
263 	}
264 	printf(": ivec 0x%lx, 0x%lx", sc->sc_tx_sysino, sc->sc_rx_sysino);
265 
266 	/*
267 	 * Un-configure queues before registering interrupt handlers,
268 	 * such that we dont get any stale LDC packets or events.
269 	 */
270 	hv_ldc_tx_qconf(ca->ca_id, 0, 0);
271 	hv_ldc_rx_qconf(ca->ca_id, 0, 0);
272 
273 	sc->sc_tx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_tx_sysino,
274 	    IPL_NET, 0, vnet_tx_intr, sc, sc->sc_dv.dv_xname);
275 	sc->sc_rx_ih = bus_intr_establish(ca->ca_bustag, sc->sc_rx_sysino,
276 	    IPL_NET, 0, vnet_rx_intr, sc, sc->sc_dv.dv_xname);
277 	if (sc->sc_tx_ih == NULL || sc->sc_rx_ih == NULL) {
278 		printf(", can't establish interrupt\n");
279 		return;
280 	}
281 
282 	/*
283 	 * Disable interrupts while we have no queues allocated.
284 	 * Otherwise we may end up with an interrupt storm as soon as
285 	 * our peer places a packet in their transmit queue.
286 	 */
287 	cbus_intr_setenabled(sc->sc_tx_sysino, INTR_DISABLED);
288 	cbus_intr_setenabled(sc->sc_rx_sysino, INTR_DISABLED);
289 
290 	lc = &sc->sc_lc;
291 	lc->lc_id = ca->ca_id;
292 	lc->lc_sc = sc;
293 	lc->lc_reset = vnet_ldc_reset;
294 	lc->lc_start = vnet_ldc_start;
295 	lc->lc_rx_data = vio_rx_data;
296 
297 	lc->lc_txq = ldc_queue_alloc(sc->sc_dmatag, VNET_TX_ENTRIES);
298 	if (lc->lc_txq == NULL) {
299 		printf(", can't allocate tx queue\n");
300 		return;
301 	}
302 
303 	lc->lc_rxq = ldc_queue_alloc(sc->sc_dmatag, VNET_RX_ENTRIES);
304 	if (lc->lc_rxq == NULL) {
305 		printf(", can't allocate rx queue\n");
306 		goto free_txqueue;
307 	}
308 
309 	/*
310 	 * Each interface gets its own pool.
311 	 */
312 	pool_init(&sc->sc_pool, 2048, 0, 0, 0, sc->sc_dv.dv_xname, NULL);
313 
314 	ifp = &sc->sc_ac.ac_if;
315 	ifp->if_softc = sc;
316 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
317 	ifp->if_ioctl = vnet_ioctl;
318 	ifp->if_start = vnet_start;
319 	ifp->if_watchdog = vnet_watchdog;
320 	strlcpy(ifp->if_xname, sc->sc_dv.dv_xname, IFNAMSIZ);
321 	IFQ_SET_MAXLEN(&ifp->if_snd, 31); /* XXX */
322 	IFQ_SET_READY(&ifp->if_snd);
323 
324 	ifmedia_init(&sc->sc_media, 0, vnet_media_change, vnet_media_status);
325 	ifmedia_add(&sc->sc_media, IFM_ETHER | IFM_AUTO, 0, NULL);
326 	ifmedia_set(&sc->sc_media, IFM_ETHER | IFM_AUTO);
327 
328 	if_attach(ifp);
329 	ether_ifattach(ifp);
330 
331 	printf(", address %s\n", ether_sprintf(sc->sc_ac.ac_enaddr));
332 	return;
333 
334 free_txqueue:
335 	ldc_queue_free(sc->sc_dmatag, lc->lc_txq);
336 }
337 
338 int
339 vnet_tx_intr(void *arg)
340 {
341 	struct vnet_softc *sc = arg;
342 	struct ldc_conn *lc = &sc->sc_lc;
343 	uint64_t tx_head, tx_tail, tx_state;
344 
345 	hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
346 	if (tx_state != lc->lc_tx_state) {
347 		switch (tx_state) {
348 		case LDC_CHANNEL_DOWN:
349 			DPRINTF(("Tx link down\n"));
350 			break;
351 		case LDC_CHANNEL_UP:
352 			DPRINTF(("Tx link up\n"));
353 			break;
354 		case LDC_CHANNEL_RESET:
355 			DPRINTF(("Tx link reset\n"));
356 			break;
357 		}
358 		lc->lc_tx_state = tx_state;
359 	}
360 
361 	return (1);
362 }
363 
364 int
365 vnet_rx_intr(void *arg)
366 {
367 	struct vnet_softc *sc = arg;
368 	struct ldc_conn *lc = &sc->sc_lc;
369 	uint64_t rx_head, rx_tail, rx_state;
370 	struct ldc_pkt *lp;
371 	int err;
372 
373 	err = hv_ldc_rx_get_state(lc->lc_id, &rx_head, &rx_tail, &rx_state);
374 	if (err == H_EINVAL)
375 		return (0);
376 	if (err != H_EOK) {
377 		printf("hv_ldc_rx_get_state %d\n", err);
378 		return (0);
379 	}
380 
381 	if (rx_state != lc->lc_rx_state) {
382 		switch (rx_state) {
383 		case LDC_CHANNEL_DOWN:
384 			DPRINTF(("Rx link down\n"));
385 			lc->lc_tx_seqid = 0;
386 			lc->lc_state = 0;
387 			lc->lc_reset(lc);
388 			break;
389 		case LDC_CHANNEL_UP:
390 			DPRINTF(("Rx link up\n"));
391 			break;
392 		case LDC_CHANNEL_RESET:
393 			DPRINTF(("Rx link reset\n"));
394 			lc->lc_tx_seqid = 0;
395 			lc->lc_state = 0;
396 			lc->lc_reset(lc);
397 			break;
398 		}
399 		lc->lc_rx_state = rx_state;
400 		return (1);
401 	}
402 
403 	lp = (struct ldc_pkt *)(lc->lc_rxq->lq_va + rx_head);
404 	switch (lp->type) {
405 	case LDC_CTRL:
406 		ldc_rx_ctrl(lc, lp);
407 		break;
408 
409 	case LDC_DATA:
410 		ldc_rx_data(lc, lp);
411 		break;
412 
413 	default:
414 		DPRINTF(("%0x02/%0x02/%0x02\n", lp->type, lp->stype,
415 		    lp->ctrl));
416 		ldc_reset(lc);
417 		break;
418 	}
419 
420 	if (lc->lc_state == 0)
421 		return (1);
422 
423 	rx_head += sizeof(*lp);
424 	rx_head &= ((lc->lc_rxq->lq_nentries * sizeof(*lp)) - 1);
425 	err = hv_ldc_rx_set_qhead(lc->lc_id, rx_head);
426 	if (err != H_EOK)
427 		printf("%s: hv_ldc_rx_set_qhead %d\n", __func__, err);
428 
429 	return (1);
430 }
431 
432 void
433 vio_rx_data(struct ldc_conn *lc, struct ldc_pkt *lp)
434 {
435 	struct vio_msg *vm = (struct vio_msg *)lp;
436 
437 	switch (vm->type) {
438 	case VIO_TYPE_CTRL:
439 		if ((lp->env & LDC_FRAG_START) == 0 &&
440 		    (lp->env & LDC_FRAG_STOP) == 0)
441 			return;
442 		vnet_rx_vio_ctrl(lc->lc_sc, vm);
443 		break;
444 
445 	case VIO_TYPE_DATA:
446 		if((lp->env & LDC_FRAG_START) == 0)
447 			return;
448 		vnet_rx_vio_data(lc->lc_sc, vm);
449 		break;
450 
451 	default:
452 		DPRINTF(("Unhandled packet type 0x%02x\n", vm->type));
453 		ldc_reset(lc);
454 		break;
455 	}
456 }
457 
458 void
459 vnet_rx_vio_ctrl(struct vnet_softc *sc, struct vio_msg *vm)
460 {
461 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
462 
463 	switch (tag->stype_env) {
464 	case VIO_VER_INFO:
465 		vnet_rx_vio_ver_info(sc, tag);
466 		break;
467 	case VIO_ATTR_INFO:
468 		vnet_rx_vio_attr_info(sc, tag);
469 		break;
470 	case VIO_DRING_REG:
471 		vnet_rx_vio_dring_reg(sc, tag);
472 		break;
473 	case VIO_RDX:
474 		vnet_rx_vio_rdx(sc, tag);
475 		break;
476 	default:
477 		DPRINTF(("CTRL/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
478 		break;
479 	}
480 }
481 
482 void
483 vnet_rx_vio_ver_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
484 {
485 	struct vio_ver_info *vi = (struct vio_ver_info *)tag;
486 
487 	switch (vi->tag.stype) {
488 	case VIO_SUBTYPE_INFO:
489 		DPRINTF(("CTRL/INFO/VER_INFO\n"));
490 
491 		/* Make sure we're talking to a virtual network device. */
492 		if (vi->dev_class != VDEV_NETWORK &&
493 		    vi->dev_class != VDEV_NETWORK_SWITCH) {
494 			/* Huh, we're not talking to a network device? */
495 			printf("Not a network device\n");
496 			vi->tag.stype = VIO_SUBTYPE_NACK;
497 			vio_sendmsg(sc, vi, sizeof(*vi));
498 			return;
499 		}
500 
501 		if (vi->major != VNET_MAJOR) {
502 			vi->tag.stype = VIO_SUBTYPE_NACK;
503 			vi->major = VNET_MAJOR;
504 			vi->minor = VNET_MINOR;
505 			vio_sendmsg(sc, vi, sizeof(*vi));
506 			return;
507 		}
508 
509 		vi->tag.stype = VIO_SUBTYPE_ACK;
510 		vi->tag.sid = sc->sc_local_sid;
511 		vi->minor = VNET_MINOR;
512 		vio_sendmsg(sc, vi, sizeof(*vi));
513 		sc->sc_vio_state |= VIO_RCV_VER_INFO;
514 		break;
515 
516 	case VIO_SUBTYPE_ACK:
517 		DPRINTF(("CTRL/ACK/VER_INFO\n"));
518 		if (!ISSET(sc->sc_vio_state, VIO_SND_VER_INFO)) {
519 			ldc_reset(&sc->sc_lc);
520 			break;
521 		}
522 		sc->sc_vio_state |= VIO_ACK_VER_INFO;
523 		break;
524 
525 	default:
526 		DPRINTF(("CTRL/0x%02x/VER_INFO\n", vi->tag.stype));
527 		break;
528 	}
529 
530 	if (ISSET(sc->sc_vio_state, VIO_RCV_VER_INFO) &&
531 	    ISSET(sc->sc_vio_state, VIO_ACK_VER_INFO))
532 		vnet_send_attr_info(sc);
533 }
534 
535 void
536 vnet_rx_vio_attr_info(struct vnet_softc *sc, struct vio_msg_tag *tag)
537 {
538 	struct vnet_attr_info *ai = (struct vnet_attr_info *)tag;
539 
540 	switch (ai->tag.stype) {
541 	case VIO_SUBTYPE_INFO:
542 		DPRINTF(("CTRL/INFO/ATTR_INFO\n"));
543 		sc->sc_xfer_mode = ai->xfer_mode;
544 
545 		ai->tag.stype = VIO_SUBTYPE_ACK;
546 		ai->tag.sid = sc->sc_local_sid;
547 		vio_sendmsg(sc, ai, sizeof(*ai));
548 		sc->sc_vio_state |= VIO_RCV_ATTR_INFO;
549 		break;
550 
551 	case VIO_SUBTYPE_ACK:
552 		DPRINTF(("CTRL/ACK/ATTR_INFO\n"));
553 		if (!ISSET(sc->sc_vio_state, VIO_SND_ATTR_INFO)) {
554 			ldc_reset(&sc->sc_lc);
555 			break;
556 		}
557 		sc->sc_vio_state |= VIO_ACK_ATTR_INFO;
558 		break;
559 
560 	default:
561 		DPRINTF(("CTRL/0x%02x/ATTR_INFO\n", ai->tag.stype));
562 		break;
563 	}
564 
565 	if (ISSET(sc->sc_vio_state, VIO_RCV_ATTR_INFO) &&
566 	    ISSET(sc->sc_vio_state, VIO_ACK_ATTR_INFO)) {
567 		if (sc->sc_xfer_mode == VIO_DRING_MODE)
568 			vnet_send_dring_reg(sc);
569 		else
570 			vio_send_rdx(sc);
571 	}
572 }
573 
574 void
575 vnet_rx_vio_dring_reg(struct vnet_softc *sc, struct vio_msg_tag *tag)
576 {
577 	struct vio_dring_reg *dr = (struct vio_dring_reg *)tag;
578 
579 	switch (dr->tag.stype) {
580 	case VIO_SUBTYPE_INFO:
581 		DPRINTF(("CTRL/INFO/DRING_REG\n"));
582 
583 		sc->sc_peer_dring_nentries = dr->num_descriptors;
584 		sc->sc_peer_desc_size = dr->descriptor_size;
585 		sc->sc_peer_dring_cookie = dr->cookie[0];
586 
587 		dr->tag.stype = VIO_SUBTYPE_ACK;
588 		dr->tag.sid = sc->sc_local_sid;
589 		vio_sendmsg(sc, dr, sizeof(*dr));
590 		sc->sc_vio_state |= VIO_RCV_DRING_REG;
591 		break;
592 
593 	case VIO_SUBTYPE_ACK:
594 		DPRINTF(("CTRL/ACK/DRING_REG\n"));
595 		if (!ISSET(sc->sc_vio_state, VIO_SND_DRING_REG)) {
596 			ldc_reset(&sc->sc_lc);
597 			break;
598 		}
599 
600 		sc->sc_dring_ident = dr->dring_ident;
601 		sc->sc_seq_no = 1;
602 
603 		sc->sc_vio_state |= VIO_ACK_DRING_REG;
604 		break;
605 
606 	default:
607 		DPRINTF(("CTRL/0x%02x/DRING_REG\n", dr->tag.stype));
608 		break;
609 	}
610 
611 	if (ISSET(sc->sc_vio_state, VIO_RCV_DRING_REG) &&
612 	    ISSET(sc->sc_vio_state, VIO_ACK_DRING_REG))
613 		vio_send_rdx(sc);
614 }
615 
616 void
617 vnet_rx_vio_rdx(struct vnet_softc *sc, struct vio_msg_tag *tag)
618 {
619 	struct ifnet *ifp = &sc->sc_ac.ac_if;
620 
621 	switch(tag->stype) {
622 	case VIO_SUBTYPE_INFO:
623 		DPRINTF(("CTRL/INFO/RDX\n"));
624 
625 		tag->stype = VIO_SUBTYPE_ACK;
626 		tag->sid = sc->sc_local_sid;
627 		vio_sendmsg(sc, tag, sizeof(*tag));
628 		sc->sc_vio_state |= VIO_RCV_RDX;
629 		break;
630 
631 	case VIO_SUBTYPE_ACK:
632 		DPRINTF(("CTRL/ACK/RDX\n"));
633 		if (!ISSET(sc->sc_vio_state, VIO_SND_RDX)) {
634 			ldc_reset(&sc->sc_lc);
635 			break;
636 		}
637 		sc->sc_vio_state |= VIO_ACK_RDX;
638 		break;
639 
640 	default:
641 		DPRINTF(("CTRL/0x%02x/RDX (VIO)\n", tag->stype));
642 		break;
643 	}
644 
645 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
646 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
647 		/* Link is up! */
648 		vnet_link_state(sc);
649 
650 		/* Configure multicast now that we can. */
651 		vnet_setmulti(sc, 1);
652 		vnet_start(ifp);
653 	}
654 }
655 
656 void
657 vnet_rx_vio_data(struct vnet_softc *sc, struct vio_msg *vm)
658 {
659 	struct vio_msg_tag *tag = (struct vio_msg_tag *)&vm->type;
660 
661 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
662 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX)) {
663 		DPRINTF(("Spurious DATA/0x%02x/0x%04x\n", tag->stype,
664 		    tag->stype_env));
665 		return;
666 	}
667 
668 	switch(tag->stype_env) {
669 	case VIO_DESC_DATA:
670 		vnet_rx_vio_desc_data(sc, tag);
671 		break;
672 
673 	case VIO_DRING_DATA:
674 		vnet_rx_vio_dring_data(sc, tag);
675 		break;
676 
677 	default:
678 		DPRINTF(("DATA/0x%02x/0x%04x\n", tag->stype, tag->stype_env));
679 		break;
680 	}
681 }
682 
683 void
684 vnet_rx_vio_desc_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
685 {
686 	struct vnet_desc_msg *dm = (struct vnet_desc_msg *)tag;
687 	struct ldc_conn *lc = &sc->sc_lc;
688 	struct ldc_map *map = sc->sc_lm;
689 	struct ifnet *ifp = &sc->sc_ac.ac_if;
690 	struct mbuf *m;
691 	caddr_t buf;
692 	paddr_t pa;
693 	psize_t nbytes;
694 	int err;
695 
696 	switch(tag->stype) {
697 	case VIO_SUBTYPE_INFO:
698 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
699 		if (buf == NULL) {
700 			ifp->if_ierrors++;
701 			goto skip;
702 		}
703 		nbytes = roundup(dm->nbytes, 8);
704 
705 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
706 		err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
707 		    dm->cookie[0].addr, pa, nbytes, &nbytes);
708 		if (err != H_EOK) {
709 			pool_put(&sc->sc_pool, buf);
710 			ifp->if_ierrors++;
711 			goto skip;
712 		}
713 
714 		/* Stupid OBP doesn't align properly. */
715                 m = m_devget(buf, dm->nbytes, ETHER_ALIGN, ifp, NULL);
716 		pool_put(&sc->sc_pool, buf);
717 		if (m == NULL) {
718 			ifp->if_ierrors++;
719 			goto skip;
720 		}
721 
722 		ifp->if_ipackets++;
723 
724 #if NBPFILTER > 0
725 		if (ifp->if_bpf)
726 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN);
727 #endif /* NBPFILTER > 0 */
728 
729 		/* Pass it on. */
730 		ether_input_mbuf(ifp, m);
731 
732 	skip:
733 		dm->tag.stype = VIO_SUBTYPE_ACK;
734 		dm->tag.sid = sc->sc_local_sid;
735 		vio_sendmsg(sc, dm, sizeof(*dm));
736 		break;
737 
738 	case VIO_SUBTYPE_ACK:
739 		DPRINTF(("DATA/ACK/DESC_DATA\n"));
740 
741 		if (dm->desc_handle != sc->sc_tx_cons) {
742 			printf("out of order\n");
743 			return;
744 		}
745 
746 		map->lm_slot[sc->sc_vsd[sc->sc_tx_cons].vsd_map_idx].entry = 0;
747 		map->lm_count--;
748 
749 		pool_put(&sc->sc_pool, sc->sc_vsd[sc->sc_tx_cons].vsd_buf);
750 
751 		sc->sc_tx_cons++;
752 		sc->sc_tx_cons &= (sc->sc_vd->vd_nentries - 1);
753 		sc->sc_tx_cnt--;
754 		break;
755 
756 	case VIO_SUBTYPE_NACK:
757 		DPRINTF(("DATA/NACK/DESC_DATA\n"));
758 		break;
759 
760 	default:
761 		DPRINTF(("DATA/0x%02x/DESC_DATA\n", tag->stype));
762 		break;
763 	}
764 }
765 
766 void
767 vnet_rx_vio_dring_data(struct vnet_softc *sc, struct vio_msg_tag *tag)
768 {
769 	struct vio_dring_msg *dm = (struct vio_dring_msg *)tag;
770 	struct ldc_conn *lc = &sc->sc_lc;
771 	struct ifnet *ifp = &sc->sc_ac.ac_if;
772 	struct mbuf *m;
773 	paddr_t pa;
774 	psize_t nbytes;
775 	int err;
776 
777 	switch(tag->stype) {
778 	case VIO_SUBTYPE_INFO:
779 	{
780 		struct vnet_desc desc;
781 		uint64_t cookie;
782 		paddr_t desc_pa;
783 		int idx, ack_end_idx = -1;
784 
785 		idx = dm->start_idx;
786 		for (;;) {
787 			cookie = sc->sc_peer_dring_cookie.addr;
788 			cookie += idx * sc->sc_peer_desc_size;
789 			nbytes = sc->sc_peer_desc_size;
790 			pmap_extract(pmap_kernel(), (vaddr_t)&desc, &desc_pa);
791 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN, cookie,
792 			    desc_pa, nbytes, &nbytes);
793 			if (err != H_EOK) {
794 				printf("hv_ldc_copy_in %d\n", err);
795 				break;
796 			}
797 
798 			if (desc.hdr.dstate != VIO_DESC_READY)
799 				break;
800 
801 			m = MCLGETI(NULL, M_DONTWAIT, &sc->sc_ac.ac_if,
802 			    MCLBYTES);
803 			if (!m)
804 				break;
805 			ifp->if_ipackets++;
806 			m->m_pkthdr.rcvif = ifp;
807 			m->m_len = m->m_pkthdr.len = desc.nbytes;
808 			nbytes = roundup(desc.nbytes + VNET_ETHER_ALIGN, 8);
809 
810 			pmap_extract(pmap_kernel(), (vaddr_t)m->m_data, &pa);
811 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_IN,
812 			    desc.cookie[0].addr, pa, nbytes, &nbytes);
813 			if (err != H_EOK) {
814 				m_freem(m);
815 				goto skip;
816 			}
817 			m->m_data += VNET_ETHER_ALIGN;
818 
819 #if NBPFILTER > 0
820 			if (ifp->if_bpf)
821 				bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN);
822 #endif /* NBPFILTER > 0 */
823 
824 			/* Pass it on. */
825 			ether_input_mbuf(ifp, m);
826 
827 		skip:
828 			desc.hdr.dstate = VIO_DESC_DONE;
829 			nbytes = sc->sc_peer_desc_size;
830 			err = hv_ldc_copy(lc->lc_id, LDC_COPY_OUT, cookie,
831 			    desc_pa, nbytes, &nbytes);
832 			if (err != H_EOK)
833 				printf("hv_ldc_copy_out %d\n", err);
834 
835 			ack_end_idx = idx;
836 			if (++idx == sc->sc_peer_dring_nentries)
837 				idx = 0;
838 		}
839 
840 		if (ack_end_idx == -1) {
841 			dm->tag.stype = VIO_SUBTYPE_NACK;
842 		} else {
843 			dm->tag.stype = VIO_SUBTYPE_ACK;
844 			dm->end_idx = ack_end_idx;
845 		}
846 		dm->tag.sid = sc->sc_local_sid;
847 		dm->proc_state = VIO_DP_STOPPED;
848 		vio_sendmsg(sc, dm, sizeof(*dm));
849 		break;
850 	}
851 
852 	case VIO_SUBTYPE_ACK:
853 	{
854 		struct ldc_map *map = sc->sc_lm;
855 		int cons;
856 
857 		sc->sc_peer_state = dm->proc_state;
858 
859 		cons = sc->sc_tx_cons;
860 		while (sc->sc_vd->vd_desc[cons].hdr.dstate == VIO_DESC_DONE) {
861 			map->lm_slot[sc->sc_vsd[cons].vsd_map_idx].entry = 0;
862 			map->lm_count--;
863 
864 			pool_put(&sc->sc_pool, sc->sc_vsd[cons].vsd_buf);
865 
866 			sc->sc_vd->vd_desc[cons++].hdr.dstate = VIO_DESC_FREE;
867 			cons &= (sc->sc_vd->vd_nentries - 1);
868 			sc->sc_tx_cnt--;
869 		}
870 		sc->sc_tx_cons = cons;
871 
872 		if (sc->sc_tx_cnt > 0 && sc->sc_peer_state != VIO_DP_ACTIVE)
873 			vnet_send_dring_data(sc, sc->sc_tx_cons);
874 
875 		if (sc->sc_tx_cnt < sc->sc_vd->vd_nentries)
876 			ifp->if_flags &= ~IFF_OACTIVE;
877 		if (sc->sc_tx_cnt == 0)
878 			ifp->if_timer = 0;
879 
880 		vnet_start(ifp);
881 		break;
882 	}
883 
884 	case VIO_SUBTYPE_NACK:
885 		DPRINTF(("DATA/NACK/DRING_DATA\n"));
886 		break;
887 
888 	default:
889 		DPRINTF(("DATA/0x%02x/DRING_DATA\n", tag->stype));
890 		break;
891 	}
892 }
893 
894 void
895 vnet_ldc_reset(struct ldc_conn *lc)
896 {
897 	struct vnet_softc *sc = lc->lc_sc;
898 
899 	sc->sc_tx_cnt = sc->sc_tx_prod = sc->sc_tx_cons = 0;
900 	sc->sc_vio_state = 0;
901 	vnet_link_state(sc);
902 }
903 
904 void
905 vnet_ldc_start(struct ldc_conn *lc)
906 {
907 	struct vnet_softc *sc = lc->lc_sc;
908 
909 	vnet_send_ver_info(sc, VNET_MAJOR, VNET_MINOR);
910 }
911 
912 void
913 vio_sendmsg(struct vnet_softc *sc, void *msg, size_t len)
914 {
915 	struct ldc_conn *lc = &sc->sc_lc;
916 	struct ldc_pkt *lp;
917 	uint64_t tx_head, tx_tail, tx_state;
918 	int err;
919 
920 	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
921 	if (err != H_EOK)
922 		return;
923 
924 	lp = (struct ldc_pkt *)(lc->lc_txq->lq_va + tx_tail);
925 	bzero(lp, sizeof(struct ldc_pkt));
926 	lp->type = LDC_DATA;
927 	lp->stype = LDC_INFO;
928 	KASSERT((len & ~LDC_LEN_MASK) == 0);
929 	lp->env = len | LDC_FRAG_STOP | LDC_FRAG_START;
930 	lp->seqid = lc->lc_tx_seqid++;
931 	bcopy(msg, &lp->major, len);
932 
933 	tx_tail += sizeof(*lp);
934 	tx_tail &= ((lc->lc_txq->lq_nentries * sizeof(*lp)) - 1);
935 	err = hv_ldc_tx_set_qtail(lc->lc_id, tx_tail);
936 	if (err != H_EOK)
937 		printf("%s: hv_ldc_tx_set_qtail: %d\n", __func__, err);
938 }
939 
940 void
941 vnet_send_ver_info(struct vnet_softc *sc, uint16_t major, uint16_t minor)
942 {
943 	struct vio_ver_info vi;
944 
945 	bzero(&vi, sizeof(vi));
946 	vi.tag.type = VIO_TYPE_CTRL;
947 	vi.tag.stype = VIO_SUBTYPE_INFO;
948 	vi.tag.stype_env = VIO_VER_INFO;
949 	vi.tag.sid = sc->sc_local_sid;
950 	vi.major = major;
951 	vi.minor = minor;
952 	vi.dev_class = VDEV_NETWORK;
953 	vio_sendmsg(sc, &vi, sizeof(vi));
954 
955 	sc->sc_vio_state |= VIO_SND_VER_INFO;
956 }
957 
958 void
959 vnet_send_attr_info(struct vnet_softc *sc)
960 {
961 	struct vnet_attr_info ai;
962 	int i;
963 
964 	bzero(&ai, sizeof(ai));
965 	ai.tag.type = VIO_TYPE_CTRL;
966 	ai.tag.stype = VIO_SUBTYPE_INFO;
967 	ai.tag.stype_env = VIO_ATTR_INFO;
968 	ai.tag.sid = sc->sc_local_sid;
969 	ai.xfer_mode = VIO_DRING_MODE;
970 	ai.addr_type = VNET_ADDR_ETHERMAC;
971 	ai.ack_freq = 0;
972 	ai.addr = 0;
973 	for (i = 0; i < ETHER_ADDR_LEN; i++) {
974 		ai.addr <<= 8;
975 		ai.addr |= sc->sc_ac.ac_enaddr[i];
976 	}
977 	ai.mtu = ETHER_MAX_LEN - ETHER_CRC_LEN;
978 	vio_sendmsg(sc, &ai, sizeof(ai));
979 
980 	sc->sc_vio_state |= VIO_SND_ATTR_INFO;
981 }
982 
983 void
984 vnet_send_dring_reg(struct vnet_softc *sc)
985 {
986 	struct vio_dring_reg dr;
987 
988 	bzero(&dr, sizeof(dr));
989 	dr.tag.type = VIO_TYPE_CTRL;
990 	dr.tag.stype = VIO_SUBTYPE_INFO;
991 	dr.tag.stype_env = VIO_DRING_REG;
992 	dr.tag.sid = sc->sc_local_sid;
993 	dr.dring_ident = 0;
994 	dr.num_descriptors = sc->sc_vd->vd_nentries;
995 	dr.descriptor_size = sizeof(struct vnet_desc);
996 	dr.options = VIO_TX_RING;
997 	dr.ncookies = 1;
998 	dr.cookie[0].addr = 0;
999 	dr.cookie[0].size = PAGE_SIZE;
1000 	vio_sendmsg(sc, &dr, sizeof(dr));
1001 
1002 	sc->sc_vio_state |= VIO_SND_DRING_REG;
1003 };
1004 
1005 void
1006 vio_send_rdx(struct vnet_softc *sc)
1007 {
1008 	struct vio_msg_tag tag;
1009 
1010 	tag.type = VIO_TYPE_CTRL;
1011 	tag.stype = VIO_SUBTYPE_INFO;
1012 	tag.stype_env = VIO_RDX;
1013 	tag.sid = sc->sc_local_sid;
1014 	vio_sendmsg(sc, &tag, sizeof(tag));
1015 
1016 	sc->sc_vio_state |= VIO_SND_RDX;
1017 }
1018 
1019 void
1020 vnet_send_dring_data(struct vnet_softc *sc, uint32_t start_idx)
1021 {
1022 	struct vio_dring_msg dm;
1023 
1024 	bzero(&dm, sizeof(dm));
1025 	dm.tag.type = VIO_TYPE_DATA;
1026 	dm.tag.stype = VIO_SUBTYPE_INFO;
1027 	dm.tag.stype_env = VIO_DRING_DATA;
1028 	dm.tag.sid = sc->sc_local_sid;
1029 	dm.seq_no = sc->sc_seq_no++;
1030 	dm.dring_ident = sc->sc_dring_ident;
1031 	dm.start_idx = start_idx;
1032 	dm.end_idx = -1;
1033 	vio_sendmsg(sc, &dm, sizeof(dm));
1034 
1035 	sc->sc_peer_state = VIO_DP_ACTIVE;
1036 }
1037 
1038 void
1039 vnet_start(struct ifnet *ifp)
1040 {
1041 	struct vnet_softc *sc = ifp->if_softc;
1042 	struct ldc_conn *lc = &sc->sc_lc;
1043 	struct ldc_map *map = sc->sc_lm;
1044 	struct mbuf *m;
1045 	paddr_t pa;
1046 	caddr_t buf;
1047 	uint64_t tx_head, tx_tail, tx_state;
1048 	int err, desc;
1049 
1050 	if ((ifp->if_flags & (IFF_RUNNING | IFF_OACTIVE)) != IFF_RUNNING)
1051 		return;
1052 
1053 	if (IFQ_IS_EMPTY(&ifp->if_snd))
1054 		return;
1055 
1056 	/*
1057 	 * We cannot transmit packets until a VIO connection has been
1058 	 * established.
1059 	 */
1060 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1061 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1062 		return;
1063 
1064 	/*
1065 	 * Make sure there is room in the LDC transmit queue to send a
1066 	 * DRING_DATA message.
1067 	 */
1068 	err = hv_ldc_tx_get_state(lc->lc_id, &tx_head, &tx_tail, &tx_state);
1069 	if (err != H_EOK)
1070 		return;
1071 	tx_tail += sizeof(struct ldc_pkt);
1072 	tx_tail &= ((lc->lc_txq->lq_nentries * sizeof(struct ldc_pkt)) - 1);
1073 	if (tx_tail == tx_head) {
1074 		ifp->if_flags |= IFF_OACTIVE;
1075 		return;
1076 	}
1077 
1078 	if (sc->sc_xfer_mode == VIO_DESC_MODE) {
1079 		vnet_start_desc(ifp);
1080 		return;
1081 	}
1082 
1083 	desc = sc->sc_tx_prod;
1084 	while (sc->sc_vd->vd_desc[desc].hdr.dstate == VIO_DESC_FREE) {
1085 		IFQ_POLL(&ifp->if_snd, m);
1086 		if (m == NULL)
1087 			break;
1088 
1089 		if (sc->sc_tx_cnt >= sc->sc_vd->vd_nentries ||
1090 		    map->lm_count >= map->lm_nentries) {
1091 			ifp->if_flags |= IFF_OACTIVE;
1092 			break;
1093 		}
1094 
1095 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1096 		if (buf == NULL) {
1097 			ifp->if_flags |= IFF_OACTIVE;
1098 			break;
1099 		}
1100 		m_copydata(m, 0, m->m_pkthdr.len, buf + VNET_ETHER_ALIGN);
1101 		IFQ_DEQUEUE(&ifp->if_snd, m);
1102 
1103 #if NBPFILTER > 0
1104 		/*
1105 		 * If BPF is listening on this interface, let it see the
1106 		 * packet before we commit it to the wire.
1107 		 */
1108 		if (ifp->if_bpf)
1109 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1110 #endif
1111 
1112 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1113 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1114 		while (map->lm_slot[map->lm_next].entry != 0) {
1115 			map->lm_next++;
1116 			map->lm_next &= (map->lm_nentries - 1);
1117 		}
1118 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1119 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1120 		map->lm_count++;
1121 
1122 		sc->sc_vd->vd_desc[desc].nbytes = max(m->m_pkthdr.len, 60);
1123 		sc->sc_vd->vd_desc[desc].ncookies = 1;
1124 		sc->sc_vd->vd_desc[desc].cookie[0].addr =
1125 		    map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1126 		sc->sc_vd->vd_desc[desc].cookie[0].size = 2048;
1127 		membar(Sync);
1128 		sc->sc_vd->vd_desc[desc].hdr.dstate = VIO_DESC_READY;
1129 
1130 		sc->sc_vsd[desc].vsd_map_idx = map->lm_next;
1131 		sc->sc_vsd[desc].vsd_buf = buf;
1132 
1133 		desc++;
1134 		desc &= (sc->sc_vd->vd_nentries - 1);
1135 		sc->sc_tx_cnt++;
1136 
1137 		m_freem(m);
1138 	}
1139 
1140 	if (sc->sc_tx_cnt > 0 && sc->sc_peer_state != VIO_DP_ACTIVE) {
1141 		vnet_send_dring_data(sc, sc->sc_tx_prod);
1142 		ifp->if_timer = 5;
1143 	}
1144 
1145 	sc->sc_tx_prod = desc;
1146 }
1147 
1148 void
1149 vnet_start_desc(struct ifnet *ifp)
1150 {
1151 	struct vnet_softc *sc = ifp->if_softc;
1152 	struct ldc_map *map = sc->sc_lm;
1153 	struct vnet_desc_msg dm;
1154 	struct mbuf *m;
1155 	paddr_t pa;
1156 	caddr_t buf;
1157 
1158 	for (;;) {
1159 		IFQ_POLL(&ifp->if_snd, m);
1160 		if (m == NULL)
1161 			break;
1162 
1163 		if (sc->sc_tx_cnt >= sc->sc_vd->vd_nentries ||
1164 		    map->lm_count >= map->lm_nentries) {
1165 			ifp->if_flags |= IFF_OACTIVE;
1166 			return;
1167 		}
1168 
1169 		buf = pool_get(&sc->sc_pool, PR_NOWAIT|PR_ZERO);
1170 		if (buf == NULL) {
1171 			ifp->if_flags |= IFF_OACTIVE;
1172 			return;
1173 		}
1174 		m_copydata(m, 0, m->m_pkthdr.len, buf);
1175 		IFQ_DEQUEUE(&ifp->if_snd, m);
1176 
1177 #if NBPFILTER > 0
1178 		/*
1179 		 * If BPF is listening on this interface, let it see the
1180 		 * packet before we commit it to the wire.
1181 		 */
1182 		if (ifp->if_bpf)
1183 			bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1184 #endif
1185 
1186 		pmap_extract(pmap_kernel(), (vaddr_t)buf, &pa);
1187 		KASSERT((pa & ~PAGE_MASK) == (pa & LDC_MTE_RA_MASK));
1188 		while (map->lm_slot[map->lm_next].entry != 0) {
1189 			map->lm_next++;
1190 			map->lm_next &= (map->lm_nentries - 1);
1191 		}
1192 		map->lm_slot[map->lm_next].entry = (pa & LDC_MTE_RA_MASK);
1193 		map->lm_slot[map->lm_next].entry |= LDC_MTE_CPR;
1194 		map->lm_count++;
1195 
1196 		sc->sc_vsd[sc->sc_tx_prod].vsd_map_idx = map->lm_next;
1197 		sc->sc_vsd[sc->sc_tx_prod].vsd_buf = buf;
1198 
1199 		bzero(&dm, sizeof(dm));
1200 		dm.tag.type = VIO_TYPE_DATA;
1201 		dm.tag.stype = VIO_SUBTYPE_INFO;
1202 		dm.tag.stype_env = VIO_DESC_DATA;
1203 		dm.tag.sid = sc->sc_local_sid;
1204 		dm.seq_no = sc->sc_seq_no++;
1205 		dm.desc_handle = sc->sc_tx_prod;
1206 		dm.nbytes = max(m->m_pkthdr.len, 60);
1207 		dm.ncookies = 1;
1208 		dm.cookie[0].addr =
1209 			map->lm_next << PAGE_SHIFT | (pa & PAGE_MASK);
1210 		dm.cookie[0].size = 2048;
1211 		vio_sendmsg(sc, &dm, sizeof(dm));
1212 
1213 		sc->sc_tx_prod++;
1214 		sc->sc_tx_prod &= (sc->sc_vd->vd_nentries - 1);
1215 		sc->sc_tx_cnt++;
1216 
1217 		m_freem(m);
1218 	}
1219 }
1220 
1221 int
1222 vnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1223 {
1224 	struct vnet_softc *sc = ifp->if_softc;
1225 	struct ifaddr *ifa = (struct ifaddr *)data;
1226 	struct ifreq *ifr = (struct ifreq *)data;
1227 	int s, error = 0;
1228 
1229 	s = splnet();
1230 
1231 	switch (cmd) {
1232 	case SIOCSIFADDR:
1233 		ifp->if_flags |= IFF_UP;
1234 #ifdef INET
1235 		if (ifa->ifa_addr->sa_family == AF_INET)
1236 			arp_ifinit(&sc->sc_ac, ifa);
1237 #endif
1238 		/* FALLTHROUGH */
1239 	case SIOCSIFFLAGS:
1240 		if (ifp->if_flags & IFF_UP) {
1241 			if ((ifp->if_flags & IFF_RUNNING) == 0)
1242 				vnet_init(ifp);
1243 		} else {
1244 			if (ifp->if_flags & IFF_RUNNING)
1245 				vnet_stop(ifp);
1246 		}
1247 		break;
1248 
1249 	case SIOCGIFMEDIA:
1250 	case SIOCSIFMEDIA:
1251 		error = ifmedia_ioctl(ifp, ifr, &sc->sc_media, cmd);
1252 		break;
1253 
1254 	case SIOCADDMULTI:
1255 	case SIOCDELMULTI:
1256 		/*
1257 		 * XXX Removing all multicast addresses and adding
1258 		 * most of them back, is somewhat retarded.
1259 		 */
1260 		vnet_setmulti(sc, 0);
1261 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
1262 		vnet_setmulti(sc, 1);
1263 		if (error == ENETRESET)
1264 			error = 0;
1265 		break;
1266 
1267 	default:
1268 		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
1269 	}
1270 
1271 	splx(s);
1272 	return (error);
1273 }
1274 
1275 void
1276 vnet_watchdog(struct ifnet *ifp)
1277 {
1278 	struct vnet_softc *sc = ifp->if_softc;
1279 
1280 	printf("%s: watchdog timeout\n", sc->sc_dv.dv_xname);
1281 }
1282 
1283 int
1284 vnet_media_change(struct ifnet *ifp)
1285 {
1286 	return (0);
1287 }
1288 
1289 void
1290 vnet_media_status(struct ifnet *ifp, struct ifmediareq *imr)
1291 {
1292 	imr->ifm_active = IFM_ETHER | IFM_AUTO;
1293 	imr->ifm_status = IFM_AVALID;
1294 
1295 	if (LINK_STATE_IS_UP(ifp->if_link_state) &&
1296 	    ifp->if_flags & IFF_UP)
1297 		imr->ifm_status |= IFM_ACTIVE;
1298 }
1299 
1300 void
1301 vnet_link_state(struct vnet_softc *sc)
1302 {
1303 	struct ifnet *ifp = &sc->sc_ac.ac_if;
1304 	int link_state = LINK_STATE_DOWN;
1305 
1306 	if (ISSET(sc->sc_vio_state, VIO_RCV_RDX) &&
1307 	    ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1308 		link_state = LINK_STATE_FULL_DUPLEX;
1309 	if (ifp->if_link_state != link_state) {
1310 		ifp->if_link_state = link_state;
1311 		if_link_state_change(ifp);
1312 	}
1313 }
1314 
1315 void
1316 vnet_setmulti(struct vnet_softc *sc, int set)
1317 {
1318 	struct arpcom *ac = &sc->sc_ac;
1319 	struct ether_multi *enm;
1320 	struct ether_multistep step;
1321 	struct vnet_mcast_info mi;
1322 	int count = 0;
1323 
1324 	if (!ISSET(sc->sc_vio_state, VIO_RCV_RDX) ||
1325 	    !ISSET(sc->sc_vio_state, VIO_ACK_RDX))
1326 		return;
1327 
1328 	bzero(&mi, sizeof(mi));
1329 	mi.tag.type = VIO_TYPE_CTRL;
1330 	mi.tag.stype = VIO_SUBTYPE_INFO;
1331 	mi.tag.stype_env = VNET_MCAST_INFO;
1332 	mi.tag.sid = sc->sc_local_sid;
1333 	mi.set = set ? 1 : 0;
1334 	ETHER_FIRST_MULTI(step, ac, enm);
1335 	while (enm != NULL) {
1336 		/* XXX What about multicast ranges? */
1337 		bcopy(enm->enm_addrlo, mi.mcast_addr[count], ETHER_ADDR_LEN);
1338 		ETHER_NEXT_MULTI(step, enm);
1339 
1340 		count++;
1341 		if (count < VNET_NUM_MCAST)
1342 			continue;
1343 
1344 		mi.count = VNET_NUM_MCAST;
1345 		vio_sendmsg(sc, &mi, sizeof(mi));
1346 		count = 0;
1347 	}
1348 
1349 	if (count > 0) {
1350 		mi.count = count;
1351 		vio_sendmsg(sc, &mi, sizeof(mi));
1352 	}
1353 }
1354 
1355 void
1356 vnet_init(struct ifnet *ifp)
1357 {
1358 	struct vnet_softc *sc = ifp->if_softc;
1359 	struct ldc_conn *lc = &sc->sc_lc;
1360 	int err;
1361 
1362 	sc->sc_lm = ldc_map_alloc(sc->sc_dmatag, 2048);
1363 	if (sc->sc_lm == NULL)
1364 		return;
1365 
1366 	err = hv_ldc_set_map_table(lc->lc_id,
1367 	    sc->sc_lm->lm_map->dm_segs[0].ds_addr, sc->sc_lm->lm_nentries);
1368 	if (err != H_EOK) {
1369 		printf("hv_ldc_set_map_table %d\n", err);
1370 		return;
1371 	}
1372 
1373 	sc->sc_vd = vnet_dring_alloc(sc->sc_dmatag, 128);
1374 	if (sc->sc_vd == NULL)
1375 		return;
1376 	sc->sc_vsd = malloc(128 * sizeof(*sc->sc_vsd), M_DEVBUF, M_NOWAIT);
1377 	if (sc->sc_vsd == NULL)
1378 		return;
1379 
1380 	sc->sc_lm->lm_slot[0].entry = sc->sc_vd->vd_map->dm_segs[0].ds_addr;
1381 	sc->sc_lm->lm_slot[0].entry &= LDC_MTE_RA_MASK;
1382 	sc->sc_lm->lm_slot[0].entry |= LDC_MTE_CPR | LDC_MTE_CPW;
1383 	sc->sc_lm->lm_next = 1;
1384 	sc->sc_lm->lm_count = 1;
1385 
1386 	err = hv_ldc_tx_qconf(lc->lc_id,
1387 	    lc->lc_txq->lq_map->dm_segs[0].ds_addr, lc->lc_txq->lq_nentries);
1388 	if (err != H_EOK)
1389 		printf("hv_ldc_tx_qconf %d\n", err);
1390 
1391 	err = hv_ldc_rx_qconf(lc->lc_id,
1392 	    lc->lc_rxq->lq_map->dm_segs[0].ds_addr, lc->lc_rxq->lq_nentries);
1393 	if (err != H_EOK)
1394 		printf("hv_ldc_rx_qconf %d\n", err);
1395 
1396 	cbus_intr_setenabled(sc->sc_tx_sysino, INTR_ENABLED);
1397 	cbus_intr_setenabled(sc->sc_rx_sysino, INTR_ENABLED);
1398 
1399 	ldc_send_vers(lc);
1400 
1401 	ifp->if_flags |= IFF_RUNNING;
1402 	ifp->if_flags &= ~IFF_OACTIVE;
1403 }
1404 
1405 void
1406 vnet_stop(struct ifnet *ifp)
1407 {
1408 	struct vnet_softc *sc = ifp->if_softc;
1409 	struct ldc_conn *lc = &sc->sc_lc;
1410 
1411 	ifp->if_flags &= ~(IFF_RUNNING | IFF_OACTIVE);
1412 	ifp->if_timer = 0;
1413 
1414 	cbus_intr_setenabled(sc->sc_tx_sysino, INTR_DISABLED);
1415 	cbus_intr_setenabled(sc->sc_rx_sysino, INTR_DISABLED);
1416 
1417 	hv_ldc_tx_qconf(lc->lc_id, 0, 0);
1418 	hv_ldc_rx_qconf(lc->lc_id, 0, 0);
1419 	lc->lc_tx_state = lc->lc_rx_state = LDC_CHANNEL_DOWN;
1420 
1421 	vnet_dring_free(sc->sc_dmatag, sc->sc_vd);
1422 
1423 	hv_ldc_set_map_table(lc->lc_id, 0, 0);
1424 	ldc_map_free(sc->sc_dmatag, sc->sc_lm);
1425 }
1426 
1427 struct vnet_dring *
1428 vnet_dring_alloc(bus_dma_tag_t t, int nentries)
1429 {
1430 	struct vnet_dring *vd;
1431 	bus_size_t size;
1432 	caddr_t va;
1433 	int nsegs;
1434 	int i;
1435 
1436 	vd = malloc(sizeof(struct vnet_dring), M_DEVBUF, M_NOWAIT);
1437 	if (vd == NULL)
1438 		return NULL;
1439 
1440 	size = roundup(nentries * sizeof(struct vnet_desc), PAGE_SIZE);
1441 
1442 	if (bus_dmamap_create(t, size, 1, size, 0,
1443 	    BUS_DMA_NOWAIT | BUS_DMA_ALLOCNOW, &vd->vd_map) != 0)
1444 		return (NULL);
1445 
1446 	if (bus_dmamem_alloc(t, size, PAGE_SIZE, 0, &vd->vd_seg, 1,
1447 	    &nsegs, BUS_DMA_NOWAIT) != 0)
1448 		goto destroy;
1449 
1450 	if (bus_dmamem_map(t, &vd->vd_seg, 1, size, &va,
1451 	    BUS_DMA_NOWAIT) != 0)
1452 		goto free;
1453 
1454 	if (bus_dmamap_load(t, vd->vd_map, va, size, NULL,
1455 	    BUS_DMA_NOWAIT) != 0)
1456 		goto unmap;
1457 
1458 	vd->vd_desc = (struct vnet_desc *)va;
1459 	vd->vd_nentries = nentries;
1460 	bzero(vd->vd_desc, nentries * sizeof(struct vnet_desc));
1461 	for (i = 0; i < vd->vd_nentries; i++)
1462 		vd->vd_desc[i].hdr.dstate = VIO_DESC_FREE;
1463 	return (vd);
1464 
1465 unmap:
1466 	bus_dmamem_unmap(t, va, size);
1467 free:
1468 	bus_dmamem_free(t, &vd->vd_seg, 1);
1469 destroy:
1470 	bus_dmamap_destroy(t, vd->vd_map);
1471 
1472 	return (NULL);
1473 }
1474 
1475 void
1476 vnet_dring_free(bus_dma_tag_t t, struct vnet_dring *vd)
1477 {
1478 	bus_size_t size;
1479 
1480 	size = vd->vd_nentries * sizeof(struct vnet_desc);
1481 	size = roundup(size, PAGE_SIZE);
1482 
1483 	bus_dmamap_unload(t, vd->vd_map);
1484 	bus_dmamem_unmap(t, (caddr_t)vd->vd_desc, size);
1485 	bus_dmamem_free(t, &vd->vd_seg, 1);
1486 	bus_dmamap_destroy(t, vd->vd_map);
1487 	free(vd, M_DEVBUF);
1488 }
1489