xref: /freebsd/sys/net/if_epair.c (revision 3dd5760a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008 The FreeBSD Foundation
5  * All rights reserved.
6  * Copyright (c) 2009-2021 Bjoern A. Zeeb <bz@FreeBSD.org>
7  *
8  * This software was developed by CK Software GmbH under sponsorship
9  * from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  * notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  * notice, this list of conditions and the following disclaimer in the
18  * documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * A pair of virtual back-to-back connected ethernet like interfaces
35  * (``two interfaces with a virtual cross-over cable'').
36  *
37  * This is mostly intended to be used to provide connectivity between
38  * different virtual network stack instances.
39  */
40 
41 #include <sys/cdefs.h>
42 __FBSDID("$FreeBSD$");
43 
44 #include <sys/param.h>
45 #include <sys/hash.h>
46 #include <sys/jail.h>
47 #include <sys/kernel.h>
48 #include <sys/libkern.h>
49 #include <sys/malloc.h>
50 #include <sys/mbuf.h>
51 #include <sys/module.h>
52 #include <sys/proc.h>
53 #include <sys/queue.h>
54 #include <sys/smp.h>
55 #include <sys/socket.h>
56 #include <sys/sockio.h>
57 #include <sys/sysctl.h>
58 #include <sys/types.h>
59 #include <sys/buf_ring.h>
60 #include <sys/bus.h>
61 #include <sys/interrupt.h>
62 
63 #include <net/bpf.h>
64 #include <net/ethernet.h>
65 #include <net/if.h>
66 #include <net/if_var.h>
67 #include <net/if_clone.h>
68 #include <net/if_media.h>
69 #include <net/if_var.h>
70 #include <net/if_types.h>
71 #include <net/netisr.h>
72 #include <net/vnet.h>
73 
74 static int epair_clone_match(struct if_clone *, const char *);
75 static int epair_clone_create(struct if_clone *, char *, size_t, caddr_t);
76 static int epair_clone_destroy(struct if_clone *, struct ifnet *);
77 
78 static const char epairname[] = "epair";
79 #define	RXRSIZE	4096	/* Probably overkill by 4-8x. */
80 
81 static MALLOC_DEFINE(M_EPAIR, epairname,
82     "Pair of virtual cross-over connected Ethernet-like interfaces");
83 
84 VNET_DEFINE_STATIC(struct if_clone *, epair_cloner);
85 #define	V_epair_cloner	VNET(epair_cloner)
86 
87 static unsigned int next_index = 0;
88 #define	EPAIR_LOCK_INIT()		mtx_init(&epair_n_index_mtx, "epairidx", \
89 					    NULL, MTX_DEF)
90 #define	EPAIR_LOCK_DESTROY()		mtx_destroy(&epair_n_index_mtx)
91 #define	EPAIR_LOCK()			mtx_lock(&epair_n_index_mtx)
92 #define	EPAIR_UNLOCK()			mtx_unlock(&epair_n_index_mtx)
93 
94 static void				*swi_cookie[MAXCPU];	/* swi(9). */
95 static STAILQ_HEAD(, epair_softc)	swi_sc[MAXCPU];
96 
97 static struct mtx epair_n_index_mtx;
98 struct epair_softc {
99 	struct ifnet	*ifp;		/* This ifp. */
100 	struct ifnet	*oifp;		/* other ifp of pair. */
101 	void		*swi_cookie;	/* swi(9). */
102 	struct buf_ring	*rxring[2];
103 	volatile int	ridx;		/* 0 || 1 */
104 	struct ifmedia	media;		/* Media config (fake). */
105 	uint32_t	cpuidx;
106 	STAILQ_ENTRY(epair_softc) entry;
107 };
108 
109 static void
110 epair_clear_mbuf(struct mbuf *m)
111 {
112 	/* Remove any CSUM_SND_TAG as ether_input will barf. */
113 	if (m->m_pkthdr.csum_flags & CSUM_SND_TAG) {
114 		m_snd_tag_rele(m->m_pkthdr.snd_tag);
115 		m->m_pkthdr.snd_tag = NULL;
116 		m->m_pkthdr.csum_flags &= ~CSUM_SND_TAG;
117 	}
118 
119 	m_tag_delete_nonpersistent(m);
120 }
121 
122 static void
123 epair_if_input(struct epair_softc *sc, int ridx)
124 {
125 	struct epoch_tracker et;
126 	struct ifnet *ifp;
127 	struct mbuf *m;
128 
129 	ifp = sc->ifp;
130 	NET_EPOCH_ENTER(et);
131 	do {
132 		m = buf_ring_dequeue_sc(sc->rxring[ridx]);
133 		if (m == NULL)
134 			break;
135 
136 		MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
137 		(*ifp->if_input)(ifp, m);
138 
139 	} while (1);
140 	NET_EPOCH_EXIT(et);
141 }
142 
143 static void
144 epair_sintr(struct epair_softc *sc)
145 {
146 	int ridx, nidx;
147 
148 	if_ref(sc->ifp);
149 	do {
150 		ridx = sc->ridx;
151 		nidx = (ridx == 0) ? 1 : 0;
152 	} while (!atomic_cmpset_int(&sc->ridx, ridx, nidx));
153 	epair_if_input(sc, ridx);
154 
155 	if_rele(sc->ifp);
156 }
157 
158 static void
159 epair_intr(void *arg)
160 {
161 	struct epair_softc *sc;
162 	uint32_t cpuidx;
163 
164 	cpuidx = (uintptr_t)arg;
165 	/* If this is a problem, this is a read-mostly situation. */
166 	EPAIR_LOCK();
167 	STAILQ_FOREACH(sc, &swi_sc[cpuidx], entry) {
168 		/* Do this lockless. */
169 		if (buf_ring_empty(sc->rxring[sc->ridx]))
170 			continue;
171 		epair_sintr(sc);
172 	}
173 	EPAIR_UNLOCK();
174 
175 	return;
176 }
177 
178 static int
179 epair_menq(struct mbuf *m, struct epair_softc *osc)
180 {
181 	struct ifnet *ifp, *oifp;
182 	int len, ret;
183 	int ridx;
184 	short mflags;
185 	bool was_empty;
186 
187 	/*
188 	 * I know this looks weird. We pass the "other sc" as we need that one
189 	 * and can get both ifps from it as well.
190 	 */
191 	oifp = osc->ifp;
192 	ifp = osc->oifp;
193 
194 	M_ASSERTPKTHDR(m);
195 	epair_clear_mbuf(m);
196 	if_setrcvif(m, oifp);
197 	M_SETFIB(m, oifp->if_fib);
198 
199 	/* Save values as once the mbuf is queued, it's not ours anymore. */
200 	len = m->m_pkthdr.len;
201 	mflags = m->m_flags;
202 
203 	MPASS(m->m_nextpkt == NULL);
204 	MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0);
205 
206 	ridx = atomic_load_int(&osc->ridx);
207 	was_empty = buf_ring_empty(osc->rxring[ridx]);
208 	ret = buf_ring_enqueue(osc->rxring[ridx], m);
209 	if (ret != 0) {
210 		/* Ring is full. */
211 		m_freem(m);
212 		return (0);
213 	}
214 
215 	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
216 	/*
217 	 * IFQ_HANDOFF_ADJ/ip_handoff() update statistics,
218 	 * but as we bypass all this we have to duplicate
219 	 * the logic another time.
220 	 */
221 	if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
222 	if (mflags & (M_BCAST|M_MCAST))
223 		if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
224 	/* Someone else received the packet. */
225 	if_inc_counter(oifp, IFCOUNTER_IPACKETS, 1);
226 
227 	/* Kick the interrupt handler for the first packet. */
228 	if (was_empty && osc->swi_cookie != NULL)
229 		swi_sched(osc->swi_cookie, 0);
230 
231 	return (0);
232 }
233 
234 static void
235 epair_start(struct ifnet *ifp)
236 {
237 	struct mbuf *m;
238 	struct epair_softc *sc;
239 	struct ifnet *oifp;
240 
241 	/*
242 	 * We get packets here from ether_output via if_handoff()
243 	 * and need to put them into the input queue of the oifp
244 	 * and will put the packet into the receive-queue (rxq) of the
245 	 * other interface (oifp) of our pair.
246 	 */
247 	sc = ifp->if_softc;
248 	oifp = sc->oifp;
249 	sc = oifp->if_softc;
250 	for (;;) {
251 		IFQ_DEQUEUE(&ifp->if_snd, m);
252 		if (m == NULL)
253 			break;
254 		M_ASSERTPKTHDR(m);
255 		BPF_MTAP(ifp, m);
256 
257 		/* In case either interface is not usable drop the packet. */
258 		if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
259 		    (ifp->if_flags & IFF_UP) == 0 ||
260 		    (oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
261 		    (oifp->if_flags & IFF_UP) == 0) {
262 			m_freem(m);
263 			continue;
264 		}
265 
266 		(void) epair_menq(m, sc);
267 	}
268 }
269 
270 static int
271 epair_transmit(struct ifnet *ifp, struct mbuf *m)
272 {
273 	struct epair_softc *sc;
274 	struct ifnet *oifp;
275 	int error, len;
276 	short mflags;
277 
278 	if (m == NULL)
279 		return (0);
280 	M_ASSERTPKTHDR(m);
281 
282 	/*
283 	 * We are not going to use the interface en/dequeue mechanism
284 	 * on the TX side. We are called from ether_output_frame()
285 	 * and will put the packet into the receive-queue (rxq) of the
286 	 * other interface (oifp) of our pair.
287 	 */
288 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
289 		m_freem(m);
290 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
291 		return (ENXIO);
292 	}
293 	if ((ifp->if_flags & IFF_UP) == 0) {
294 		m_freem(m);
295 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
296 		return (ENETDOWN);
297 	}
298 
299 	BPF_MTAP(ifp, m);
300 
301 	/*
302 	 * In case the outgoing interface is not usable,
303 	 * drop the packet.
304 	 */
305 	sc = ifp->if_softc;
306 	oifp = sc->oifp;
307 	if ((oifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
308 	    (oifp->if_flags & IFF_UP) == 0) {
309 		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
310 		m_freem(m);
311 		return (0);
312 	}
313 	len = m->m_pkthdr.len;
314 	mflags = m->m_flags;
315 
316 #ifdef ALTQ
317 	/* Support ALTQ via the classic if_start() path. */
318 	IF_LOCK(&ifp->if_snd);
319 	if (ALTQ_IS_ENABLED(&ifp->if_snd)) {
320 		ALTQ_ENQUEUE(&ifp->if_snd, m, NULL, error);
321 		if (error)
322 			if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
323 		IF_UNLOCK(&ifp->if_snd);
324 		if (!error) {
325 			if_inc_counter(ifp, IFCOUNTER_OBYTES, len);
326 			if (mflags & (M_BCAST|M_MCAST))
327 				if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
328 			epair_start(ifp);
329 		}
330 		return (error);
331 	}
332 	IF_UNLOCK(&ifp->if_snd);
333 #endif
334 
335 	error = epair_menq(m, oifp->if_softc);
336 	return (error);
337 }
338 
339 static int
340 epair_media_change(struct ifnet *ifp __unused)
341 {
342 
343 	/* Do nothing. */
344 	return (0);
345 }
346 
347 static void
348 epair_media_status(struct ifnet *ifp __unused, struct ifmediareq *imr)
349 {
350 
351 	imr->ifm_status = IFM_AVALID | IFM_ACTIVE;
352 	imr->ifm_active = IFM_ETHER | IFM_10G_T | IFM_FDX;
353 }
354 
355 static int
356 epair_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
357 {
358 	struct epair_softc *sc;
359 	struct ifreq *ifr;
360 	int error;
361 
362 	ifr = (struct ifreq *)data;
363 	switch (cmd) {
364 	case SIOCSIFFLAGS:
365 	case SIOCADDMULTI:
366 	case SIOCDELMULTI:
367 		error = 0;
368 		break;
369 
370 	case SIOCSIFMEDIA:
371 	case SIOCGIFMEDIA:
372 		sc = ifp->if_softc;
373 		error = ifmedia_ioctl(ifp, ifr, &sc->media, cmd);
374 		break;
375 
376 	case SIOCSIFMTU:
377 		/* We basically allow all kinds of MTUs. */
378 		ifp->if_mtu = ifr->ifr_mtu;
379 		error = 0;
380 		break;
381 
382 	default:
383 		/* Let the common ethernet handler process this. */
384 		error = ether_ioctl(ifp, cmd, data);
385 		break;
386 	}
387 
388 	return (error);
389 }
390 
391 static void
392 epair_init(void *dummy __unused)
393 {
394 }
395 
396 /*
397  * Interface cloning functions.
398  * We use our private ones so that we can create/destroy our secondary
399  * device along with the primary one.
400  */
401 static int
402 epair_clone_match(struct if_clone *ifc, const char *name)
403 {
404 	const char *cp;
405 
406 	/*
407 	 * Our base name is epair.
408 	 * Our interfaces will be named epair<n>[ab].
409 	 * So accept anything of the following list:
410 	 * - epair
411 	 * - epair<n>
412 	 * but not the epair<n>[ab] versions.
413 	 */
414 	if (strncmp(epairname, name, sizeof(epairname)-1) != 0)
415 		return (0);
416 
417 	for (cp = name + sizeof(epairname) - 1; *cp != '\0'; cp++) {
418 		if (*cp < '0' || *cp > '9')
419 			return (0);
420 	}
421 
422 	return (1);
423 }
424 
425 static void
426 epair_clone_add(struct if_clone *ifc, struct epair_softc *scb)
427 {
428 	struct ifnet *ifp;
429 	uint8_t eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
430 
431 	ifp = scb->ifp;
432 	/* Copy epairNa etheraddr and change the last byte. */
433 	memcpy(eaddr, scb->oifp->if_hw_addr, ETHER_ADDR_LEN);
434 	eaddr[5] = 0x0b;
435 	ether_ifattach(ifp, eaddr);
436 
437 	if_clone_addif(ifc, ifp);
438 }
439 
440 static int
441 epair_clone_create(struct if_clone *ifc, char *name, size_t len, caddr_t params)
442 {
443 	struct epair_softc *sca, *scb;
444 	struct ifnet *ifp;
445 	char *dp;
446 	int error, unit, wildcard;
447 	uint64_t hostid;
448 	uint32_t key[3];
449 	uint32_t hash;
450 	uint8_t eaddr[ETHER_ADDR_LEN];	/* 00:00:00:00:00:00 */
451 
452 	/* Try to see if a special unit was requested. */
453 	error = ifc_name2unit(name, &unit);
454 	if (error != 0)
455 		return (error);
456 	wildcard = (unit < 0);
457 
458 	error = ifc_alloc_unit(ifc, &unit);
459 	if (error != 0)
460 		return (error);
461 
462 	/*
463 	 * If no unit had been given, we need to adjust the ifName.
464 	 * Also make sure there is space for our extra [ab] suffix.
465 	 */
466 	for (dp = name; *dp != '\0'; dp++);
467 	if (wildcard) {
468 		error = snprintf(dp, len - (dp - name), "%d", unit);
469 		if (error > len - (dp - name) - 1) {
470 			/* ifName too long. */
471 			ifc_free_unit(ifc, unit);
472 			return (ENOSPC);
473 		}
474 		dp += error;
475 	}
476 	if (len - (dp - name) - 1 < 1) {
477 		/* No space left for our [ab] suffix. */
478 		ifc_free_unit(ifc, unit);
479 		return (ENOSPC);
480 	}
481 	*dp = 'b';
482 	/* Must not change dp so we can replace 'a' by 'b' later. */
483 	*(dp+1) = '\0';
484 
485 	/* Check if 'a' and 'b' interfaces already exist. */
486 	if (ifunit(name) != NULL)
487 		return (EEXIST);
488 	*dp = 'a';
489 	if (ifunit(name) != NULL)
490 		return (EEXIST);
491 
492 	/* Allocate memory for both [ab] interfaces */
493 	sca = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
494 	sca->ifp = if_alloc(IFT_ETHER);
495 	if (sca->ifp == NULL) {
496 		free(sca, M_EPAIR);
497 		ifc_free_unit(ifc, unit);
498 		return (ENOSPC);
499 	}
500 	sca->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK,NULL);
501 	sca->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
502 
503 	scb = malloc(sizeof(struct epair_softc), M_EPAIR, M_WAITOK | M_ZERO);
504 	scb->ifp = if_alloc(IFT_ETHER);
505 	if (scb->ifp == NULL) {
506 		free(scb, M_EPAIR);
507 		if_free(sca->ifp);
508 		free(sca, M_EPAIR);
509 		ifc_free_unit(ifc, unit);
510 		return (ENOSPC);
511 	}
512 	scb->rxring[0] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
513 	scb->rxring[1] = buf_ring_alloc(RXRSIZE, M_EPAIR, M_WAITOK, NULL);
514 
515 	/*
516 	 * Cross-reference the interfaces so we will be able to free both.
517 	 */
518 	sca->oifp = scb->ifp;
519 	scb->oifp = sca->ifp;
520 
521 	EPAIR_LOCK();
522 #ifdef SMP
523 	/* Get an approximate distribution. */
524 	hash = next_index % mp_ncpus;
525 #else
526 	hash = 0;
527 #endif
528 	if (swi_cookie[hash] == NULL) {
529 		void *cookie;
530 
531 		EPAIR_UNLOCK();
532 		error = swi_add(NULL, epairname,
533 		    epair_intr, (void *)(uintptr_t)hash,
534 		    SWI_NET, INTR_MPSAFE, &cookie);
535 		if (error) {
536 			buf_ring_free(scb->rxring[0], M_EPAIR);
537 			buf_ring_free(scb->rxring[1], M_EPAIR);
538 			if_free(scb->ifp);
539 			free(scb, M_EPAIR);
540 			buf_ring_free(sca->rxring[0], M_EPAIR);
541 			buf_ring_free(sca->rxring[1], M_EPAIR);
542 			if_free(sca->ifp);
543 			free(sca, M_EPAIR);
544 			ifc_free_unit(ifc, unit);
545 			return (ENOSPC);
546 		}
547 		EPAIR_LOCK();
548 		/* Recheck under lock even though a race is very unlikely. */
549 		if (swi_cookie[hash] == NULL) {
550 			swi_cookie[hash] = cookie;
551 		} else {
552 			EPAIR_UNLOCK();
553 			(void) swi_remove(cookie);
554 			EPAIR_LOCK();
555 		}
556 	}
557 	sca->cpuidx = hash;
558 	STAILQ_INSERT_TAIL(&swi_sc[hash], sca, entry);
559 	sca->swi_cookie = swi_cookie[hash];
560 	scb->cpuidx = hash;
561 	STAILQ_INSERT_TAIL(&swi_sc[hash], scb, entry);
562 	scb->swi_cookie = swi_cookie[hash];
563 	EPAIR_UNLOCK();
564 
565 	/* Initialise pseudo media types. */
566 	ifmedia_init(&sca->media, 0, epair_media_change, epair_media_status);
567 	ifmedia_add(&sca->media, IFM_ETHER | IFM_10G_T, 0, NULL);
568 	ifmedia_set(&sca->media, IFM_ETHER | IFM_10G_T);
569 	ifmedia_init(&scb->media, 0, epair_media_change, epair_media_status);
570 	ifmedia_add(&scb->media, IFM_ETHER | IFM_10G_T, 0, NULL);
571 	ifmedia_set(&scb->media, IFM_ETHER | IFM_10G_T);
572 
573 	/* Finish initialization of interface <n>a. */
574 	ifp = sca->ifp;
575 	ifp->if_softc = sca;
576 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
577 	ifp->if_dname = epairname;
578 	ifp->if_dunit = unit;
579 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
580 	ifp->if_flags |= IFF_KNOWSEPOCH;
581 	ifp->if_capabilities = IFCAP_VLAN_MTU;
582 	ifp->if_capenable = IFCAP_VLAN_MTU;
583 	ifp->if_start = epair_start;
584 	ifp->if_ioctl = epair_ioctl;
585 	ifp->if_init  = epair_init;
586 	if_setsendqlen(ifp, ifqmaxlen);
587 	if_setsendqready(ifp);
588 
589 	/*
590 	 * Calculate the etheraddr hashing the hostid and the
591 	 * interface index. The result would be hopefully unique.
592 	 * Note that the "a" component of an epair instance may get moved
593 	 * to a different VNET after creation. In that case its index
594 	 * will be freed and the index can get reused by new epair instance.
595 	 * Make sure we do not create same etheraddr again.
596 	 */
597 	getcredhostid(curthread->td_ucred, (unsigned long *)&hostid);
598 	if (hostid == 0)
599 		arc4rand(&hostid, sizeof(hostid), 0);
600 
601 	EPAIR_LOCK();
602 	if (ifp->if_index > next_index)
603 		next_index = ifp->if_index;
604 	else
605 		next_index++;
606 
607 	key[0] = (uint32_t)next_index;
608 	EPAIR_UNLOCK();
609 	key[1] = (uint32_t)(hostid & 0xffffffff);
610 	key[2] = (uint32_t)((hostid >> 32) & 0xfffffffff);
611 	hash = jenkins_hash32(key, 3, 0);
612 
613 	eaddr[0] = 0x02;
614 	memcpy(&eaddr[1], &hash, 4);
615 	eaddr[5] = 0x0a;
616 	ether_ifattach(ifp, eaddr);
617 	ifp->if_baudrate = IF_Gbps(10);	/* arbitrary maximum */
618 	ifp->if_transmit = epair_transmit;
619 
620 	/* Swap the name and finish initialization of interface <n>b. */
621 	*dp = 'b';
622 
623 	ifp = scb->ifp;
624 	ifp->if_softc = scb;
625 	strlcpy(ifp->if_xname, name, IFNAMSIZ);
626 	ifp->if_dname = epairname;
627 	ifp->if_dunit = unit;
628 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
629 	ifp->if_capabilities = IFCAP_VLAN_MTU;
630 	ifp->if_capenable = IFCAP_VLAN_MTU;
631 	ifp->if_start = epair_start;
632 	ifp->if_ioctl = epair_ioctl;
633 	ifp->if_init  = epair_init;
634 	if_setsendqlen(ifp, ifqmaxlen);
635 	if_setsendqready(ifp);
636 	/* We need to play some tricks here for the second interface. */
637 	strlcpy(name, epairname, len);
638 
639 	/* Correctly set the name for the cloner list. */
640 	strlcpy(name, scb->ifp->if_xname, len);
641 	epair_clone_add(ifc, scb);
642 
643 	ifp->if_baudrate = IF_Gbps(10);	/* arbitrary maximum */
644 	ifp->if_transmit = epair_transmit;
645 
646 	/*
647 	 * Restore name to <n>a as the ifp for this will go into the
648 	 * cloner list for the initial call.
649 	 */
650 	strlcpy(name, sca->ifp->if_xname, len);
651 
652 	/* Tell the world, that we are ready to rock. */
653 	sca->ifp->if_drv_flags |= IFF_DRV_RUNNING;
654 	if_link_state_change(sca->ifp, LINK_STATE_UP);
655 	scb->ifp->if_drv_flags |= IFF_DRV_RUNNING;
656 	if_link_state_change(scb->ifp, LINK_STATE_UP);
657 
658 	return (0);
659 }
660 
661 static void
662 epair_drain_rings(struct epair_softc *sc)
663 {
664 	int ridx;
665 	struct mbuf *m;
666 
667 	for (ridx = 0; ridx < 2; ridx++) {
668 		do {
669 			m = buf_ring_dequeue_sc(sc->rxring[ridx]);
670 			if (m == NULL)
671 				break;
672 			m_freem(m);
673 		} while (1);
674 	}
675 }
676 
677 static int
678 epair_clone_destroy(struct if_clone *ifc, struct ifnet *ifp)
679 {
680 	struct ifnet *oifp;
681 	struct epair_softc *sca, *scb;
682 	int unit, error;
683 
684 	/*
685 	 * In case we called into if_clone_destroyif() ourselves
686 	 * again to remove the second interface, the softc will be
687 	 * NULL. In that case so not do anything but return success.
688 	 */
689 	if (ifp->if_softc == NULL)
690 		return (0);
691 
692 	unit = ifp->if_dunit;
693 	sca = ifp->if_softc;
694 	oifp = sca->oifp;
695 	scb = oifp->if_softc;
696 
697 	/* Frist get the interfaces down and detached. */
698 	if_link_state_change(ifp, LINK_STATE_DOWN);
699 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
700 	if_link_state_change(oifp, LINK_STATE_DOWN);
701 	oifp->if_drv_flags &= ~IFF_DRV_RUNNING;
702 
703 	ether_ifdetach(ifp);
704 	ether_ifdetach(oifp);
705 
706 	/* Second stop interrupt handler. */
707 	EPAIR_LOCK();
708 	STAILQ_REMOVE(&swi_sc[sca->cpuidx], sca, epair_softc, entry);
709 	STAILQ_REMOVE(&swi_sc[scb->cpuidx], scb, epair_softc, entry);
710 	EPAIR_UNLOCK();
711 	sca->swi_cookie = NULL;
712 	scb->swi_cookie = NULL;
713 
714 	/* Third free any queued packets and all the resources. */
715 	CURVNET_SET_QUIET(oifp->if_vnet);
716 	epair_drain_rings(scb);
717 	oifp->if_softc = NULL;
718 	error = if_clone_destroyif(ifc, oifp);
719 	if (error)
720 		panic("%s: if_clone_destroyif() for our 2nd iface failed: %d",
721 		    __func__, error);
722 	if_free(oifp);
723 	ifmedia_removeall(&scb->media);
724 	buf_ring_free(scb->rxring[0], M_EPAIR);
725 	buf_ring_free(scb->rxring[1], M_EPAIR);
726 	free(scb, M_EPAIR);
727 	CURVNET_RESTORE();
728 
729 	epair_drain_rings(sca);
730 	if_free(ifp);
731 	ifmedia_removeall(&sca->media);
732 	buf_ring_free(sca->rxring[0], M_EPAIR);
733 	buf_ring_free(sca->rxring[1], M_EPAIR);
734 	free(sca, M_EPAIR);
735 
736 	/* Last free the cloner unit. */
737 	ifc_free_unit(ifc, unit);
738 
739 	return (0);
740 }
741 
742 static void
743 vnet_epair_init(const void *unused __unused)
744 {
745 
746 	V_epair_cloner = if_clone_advanced(epairname, 0,
747 	    epair_clone_match, epair_clone_create, epair_clone_destroy);
748 }
749 VNET_SYSINIT(vnet_epair_init, SI_SUB_PSEUDO, SI_ORDER_ANY,
750     vnet_epair_init, NULL);
751 
752 static void
753 vnet_epair_uninit(const void *unused __unused)
754 {
755 
756 	if_clone_detach(V_epair_cloner);
757 }
758 VNET_SYSUNINIT(vnet_epair_uninit, SI_SUB_INIT_IF, SI_ORDER_ANY,
759     vnet_epair_uninit, NULL);
760 
761 static int
762 epair_modevent(module_t mod, int type, void *data)
763 {
764 	int i;
765 
766 	switch (type) {
767 	case MOD_LOAD:
768 		for (i = 0; i < MAXCPU; i++) {
769 			swi_cookie[i] = NULL;
770 			STAILQ_INIT(&swi_sc[i]);
771 		}
772 		EPAIR_LOCK_INIT();
773 		if (bootverbose)
774 			printf("%s: %s initialized.\n", __func__, epairname);
775 		break;
776 	case MOD_UNLOAD:
777 		EPAIR_LOCK();
778 		for (i = 0; i < MAXCPU; i++) {
779 			if (!STAILQ_EMPTY(&swi_sc[i])) {
780 				printf("%s: swi_sc[%d] active\n", __func__, i);
781 				EPAIR_UNLOCK();
782 				return (EBUSY);
783 			}
784 		}
785 		EPAIR_UNLOCK();
786 		for (i = 0; i < MAXCPU; i++)
787 			if (swi_cookie[i] != NULL)
788 				(void) swi_remove(swi_cookie[i]);
789 		EPAIR_LOCK_DESTROY();
790 		if (bootverbose)
791 			printf("%s: %s unloaded.\n", __func__, epairname);
792 		break;
793 	default:
794 		return (EOPNOTSUPP);
795 	}
796 	return (0);
797 }
798 
799 static moduledata_t epair_mod = {
800 	"if_epair",
801 	epair_modevent,
802 	0
803 };
804 
805 DECLARE_MODULE(if_epair, epair_mod, SI_SUB_PSEUDO, SI_ORDER_MIDDLE);
806 MODULE_VERSION(if_epair, 3);
807