net/lwip/ifdev.c

/* LWIP service - ifdev.c - network interface devices */

#include "lwip.h"
#include "mcast.h"
#include "ifaddr.h"
#include "rtsock.h"
#include "route.h"
#include "bpfdev.h"

#include <net/if_media.h>

/*
 * The highest possible interface index number, plus one.  We currently let
 * lwIP choose the interface index.  lwIP will generate a number between 1 and
 * 255 inclusive.  For efficiency, we use an array to look up an interface
 * device object by its index.  Thus, this array must be large enough to be
 * indexed by the largest possible index number generated by lwIP.  lwIP uses
 * an unsigned 8-bit field to store the index number.
 */
#define MAX_IFDEV	(UINT8_MAX + 1)

/* The table is indexed by the interface index minus one. */
static struct ifdev *ifdev_table[MAX_IFDEV];	/* index-based lookup table */

static TAILQ_HEAD(, ifdev) ifdev_list;		/* list of active interfaces */

static struct ifdev *ifdev_loopback;		/* loopback interface */

/*
 * The maximum number of virtual interface types--that is, interface types for
 * which interfaces may be created and destroyed dynamically.  The BSDs call
 * these "clones".  There should be enough slots for all types, which are
 * registered by their respective modules through ifdev_register().  Increase
 * as necessary.
 */
#define MAX_VTYPE	4

static struct {
	const char *ifvt_name;	/* interface name without digits (e.g. "lo") */
	size_t ifvt_namelen;	/* length of the name, excluding null term. */
	int (*ifvt_create)(const char *);	/* ifdev create function */
} ifdev_vtype[MAX_VTYPE];

static unsigned int ifdev_vtypes;	/* number of in-use vtype slots */

#define IFDEV_MIN_MTU	1280	/* minimum interface MTU, required by IPv6 */

/*
 * Initialize the network interface devices module.  This call must be issued
 * before any virtual interfaces are initialized, because the virtual types
 * array is initialized here.
 */
void
ifdev_init(void)
{

	memset(ifdev_table, 0, sizeof(ifdev_table));

	TAILQ_INIT(&ifdev_list);

	memset(ifdev_vtype, 0, sizeof(ifdev_vtype));
	ifdev_vtypes = 0;
}

/*
 * Check all active interfaces to see if any tasks need to be performed.  This
 * function is called as part of each message loop iteration.
 */
void
ifdev_poll(void)
{
	struct ifdev *ifdev;

	/*
	 * Call the polling function of the active interfaces.  Note that
	 * interfaces may not remove themselves as a result of polling!
	 */
	TAILQ_FOREACH(ifdev, &ifdev_list, ifdev_next) {
		if (ifdev->ifdev_ops->iop_poll != NULL)
			ifdev->ifdev_ops->iop_poll(ifdev);
	}
}

/*
 * Handle an incoming packet on an interface.  This function assumes ownership
 * of the packet buffers: the caller must no longer refer to it afterward.  For
 * packets looped back for a non-loopback interface, 'ifdev' is the loopback
 * interface and 'netif' is the original (non-loopback) interface's netif.  For
 * other packets, 'ifdev' is the actual interface and 'netif' is NULL.  The
 * packet is passed to BPF devices only if 'to_bpf' is set.
 */
void
ifdev_input(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif,
	int to_bpf)
{
	struct bpfdev_link *bpfl;
	err_t err;

	/*
	 * Looped-back packets are captured on the loopback device, not on the
	 * original interface.  Similarly, we account the traffic to the
	 * loopback interface.  This is a policy decision (inspired by NetBSD's
	 * behavior) and may be changed later.
	 */
	if (to_bpf) {
		TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
			bpfdev_input(bpfl, pbuf);
	}

	ifdev->ifdev_data.ifi_ipackets++;
	ifdev->ifdev_data.ifi_ibytes += pbuf->tot_len;

	if (pbuf->flags & PBUF_FLAG_LLMCAST)
		ifdev->ifdev_data.ifi_imcasts++;

	/*
	 * For looped-back packets, we must bypass the regular netif input
	 * function (as that one is for link-layer packet handling) and instead
	 * pass it directly to the IP-layer packet handling function of lwIP.
	 */
	if (netif != NULL)
		err = ip_input(pbuf, netif);
	else
		err = ifdev->ifdev_netif.input(pbuf, &ifdev->ifdev_netif);

	if (err != ERR_OK)
		pbuf_free(pbuf);
}

/*
 * Handle an outgoing packet on an interface.  Return ERR_OK if the packet was
 * transmitted or another lwIP ERR_ error code upon failure.  Either way, the
 * caller is responsible for freeing the packet buffers.  If the packet is
 * to be looped back to a non-loopback interface (because its destination is a
 * local address), 'ifdev' is the loopback interface and 'netif' is set to the
 * original interface's netif.  In all other cases, 'ifdev' is the packet's
 * source interface and 'netif' is NULL.  The packet is passed to attached BPF
 * devices only if 'to_bpf' is set.  If 'hdrcmplt' is set, the source address
 * of the data link header is already filled in; otherwise, the source address
 * must be set to the device's source address, if applicable.
 */
err_t
ifdev_output(struct ifdev * ifdev, struct pbuf * pbuf, struct netif * netif,
	int to_bpf, int hdrcmplt)
{
	struct bpfdev_link *bpfl;

	/*
	 * If the interface and/or the link is down, discard the packet without
	 * reporting it to BPF or the actual interface module.
	 */
	if (!ifdev_is_up(ifdev) || !ifdev_is_link_up(ifdev))
		return ERR_IF;	/* this should translate to ENETDOWN */

	/*
	 * If the link-layer header is not yet complete, fill in the source
	 * address now.  This exception applies to BPF-generated packets only.
	 * Complete the header before passing the packet back to BPF, which
	 * should see the completed version of the packet.
	 */
	if (!hdrcmplt && ifdev->ifdev_ops->iop_hdrcmplt != NULL)
		ifdev->ifdev_ops->iop_hdrcmplt(ifdev, pbuf);

	/*
	 * As in ifdev_input(), we use the loopback interface for BPF and
	 * statistics even if the packet originates from a non-loopback device.
	 */
	if (to_bpf) {
		TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
			bpfdev_output(bpfl, pbuf);
	}

	ifdev->ifdev_data.ifi_opackets++;
	ifdev->ifdev_data.ifi_obytes += pbuf->tot_len;

	/*
	 * TODO: this is rather imprecise, because it works only when we set
	 * the pbuf flag explicitly ourselves.  That happens only for UDP/RAW
	 * packets, and not for (e.g.) ND6 multicast traffic.  We have reasons
	 * to set the flags ourselves anyway, namely to support MSG_MCAST and
	 * MSG_BCAST on loopback interfaces, but they should be complemented by
	 * additional checks here on, say, the destination ethernet address.
	 */
	if (pbuf->flags & PBUF_FLAG_LLMCAST)
		ifdev->ifdev_data.ifi_omcasts++;

	return ifdev->ifdev_ops->iop_output(ifdev, pbuf, netif);
}

/*
 * Transmit an IPv4 packet on an interface, as requested by lwIP.  Pass on the
 * packet to the interface's link processor (e.g., etharp), unless the packet
 * should be rejected or blackholed according to route information, or it is to
 * be looped back into the interface.  The latter may occur if the destination
 * address belongs to the interface.  In that case, we send the packet over a
 * loopback interface instead.  In addition, if this is a multicast packet that
 * should be looped back, send a copy over a loopback interface as well.
 * Loopback interfaces themselves are exempt from these special cases.
 */
static err_t
ifdev_output_v4(struct netif * netif, struct pbuf * pbuf,
	const ip4_addr_t * ipaddr)
{
	struct ifdev *ifdev = netif_get_ifdev(netif);
	err_t err;

	assert(ifdev_loopback != NULL);

	/* Check for reject/blackhole routes. */
	if (!route_output_v4(ifdev, ipaddr, &err))
		return err;

	/* Handle looping of multicast packets on non-loopback interfaces. */
	if (!ifdev_is_loopback(ifdev) && (pbuf->flags & PBUF_FLAG_MCASTLOOP))
		(void)ifdev_output(ifdev_loopback, pbuf, netif,
		    FALSE /*to_bpf*/, TRUE /*hdrcmplt*/);

	/* Divert packets sent to the local interface address. */
	if (!ifdev_is_loopback(ifdev) && ifdev->ifdev_v4set &&
	    ip4_addr_cmp(netif_ip4_addr(&ifdev->ifdev_netif), ipaddr))
		ifdev = ifdev_loopback;
	else
		netif = NULL;

	if (ifdev->ifdev_ops->iop_output_v4 != NULL)
		return ifdev->ifdev_ops->iop_output_v4(ifdev_get_netif(ifdev),
		    pbuf, ipaddr);
	else
		return ifdev_output(ifdev, pbuf, netif, TRUE /*to_bpf*/,
		    TRUE /*hdrcmplt*/);
}

/*
 * Transmit an IPv6 packet on an interface, as requested by lwIP.  As for IPv4.
 */
static err_t
ifdev_output_v6(struct netif * netif, struct pbuf * pbuf,
	const ip6_addr_t * ipaddr)
{
	struct ifdev *ifdev = netif_get_ifdev(netif);
	err_t err;

	assert(ifdev_loopback != NULL);

	/* Check for reject/blackhole routes. */
	if (!route_output_v6(ifdev, ipaddr, &err))
		return err;

	/* Handle looping of multicast packets on non-loopback interfaces. */
	if (!ifdev_is_loopback(ifdev) && (pbuf->flags & PBUF_FLAG_MCASTLOOP))
		(void)ifdev_output(ifdev_loopback, pbuf, netif,
		    FALSE /*to_bpf*/, TRUE /*hdrcmplt*/);

	/* Divert packets sent to the local interface address. */
	if (!ifdev_is_loopback(ifdev) &&
	    (netif_get_ip6_addr_match(&ifdev->ifdev_netif, ipaddr) != -1 ||
	    ip6_addr_ismulticast_iflocal(ipaddr)))
		ifdev = ifdev_loopback;
	else
		netif = NULL;

	if (ifdev->ifdev_ops->iop_output_v6 != NULL)
		return ifdev->ifdev_ops->iop_output_v6(ifdev_get_netif(ifdev),
		    pbuf, ipaddr);
	else
		return ifdev_output(ifdev, pbuf, netif, TRUE /*to_bpf*/,
		    TRUE /*hdrcmplt*/);
}

/*
 * Status callback function, called by lwIP whenever certain status changes are
 * made on the netif.  These changes may be initiated either by lwIP itself or
 * by us.  We use this callback to check lwIP-initiated state changes on local
 * IPv6 addresses, using shadow state to filter out self-initiated changes.
 *
 * One day we might switch to the extended netif callback mechanism offered by
 * lwIP.  Currently, netif state changes are rare and it takes us little effort
 * to find out whether anything changed, so there is no immediate need.
 */
static void
ifdev_status_callback(struct netif * netif)
{
	struct ifdev *ifdev = netif_get_ifdev(netif);

	ifaddr_v6_check(ifdev);
}

/*
 * Initialize the netif structure for a new interface.  Most of this is handled
 * by the specific interface module.
 */
static err_t
ifdev_init_netif(struct netif * netif)
{
	struct ifdev *ifdev = netif_get_ifdev(netif);

	assert(ifdev != NULL);

	netif->output = ifdev_output_v4;
	netif->output_ip6 = ifdev_output_v6;

	netif->hwaddr_len = ifdev->ifdev_data.ifi_addrlen;
	netif->mtu = ifdev->ifdev_data.ifi_mtu;

	netif_set_status_callback(netif, ifdev_status_callback);

	return ifdev->ifdev_ops->iop_init(ifdev, netif);
}

/*
 * Retrieve an interface device by its interface index.  Return a pointer to
 * the interface device if found, or NULL otherwise.  If the given interface
 * index is zero, this function will always return NULL.
 */
struct ifdev *
ifdev_get_by_index(uint32_t ifindex)
{

	if (ifindex >= __arraycount(ifdev_table))
		return NULL;

	return ifdev_table[ifindex];
}

/*
 * Find an interface device by its name.  Return a pointer to the interface
 * device if found, or NULL otherwise.
 */
struct ifdev *
ifdev_find_by_name(const char * name)
{
	struct ifdev *ifdev;

	TAILQ_FOREACH(ifdev, &ifdev_list, ifdev_next) {
		if (!strcmp(ifdev->ifdev_name, name))
			return ifdev;
	}

	return NULL;
}

/*
 * Given either NULL or a previously returned interface device object pointer,
 * return the first or next interface device object pointer, or NULL if there
 * are no more.
 */
struct ifdev *
ifdev_enum(struct ifdev * last)
{

	if (last == NULL)
		return TAILQ_FIRST(&ifdev_list);
	else
		return TAILQ_NEXT(last, ifdev_next);
}

/*
 * Attach a BPF device as listener to this interface.
 */
void
ifdev_attach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl)
{

	TAILQ_INSERT_TAIL(&ifdev->ifdev_bpf, bpfl, bpfl_next);
}

/*
 * Detach a previously attached BPF device from this interface.
 */
void
ifdev_detach_bpf(struct ifdev * ifdev, struct bpfdev_link * bpfl)
{

	TAILQ_REMOVE(&ifdev->ifdev_bpf, bpfl, bpfl_next);
}

/*
 * Register the calling party as interested in putting the interface in
 * promiscuous mode.  There may be multiple such parties, each of which can
 * call this function once, after which they must call ifdev_clear_promisc()
 * later.  If possible, the interface is put in promiscuous mode if there is at
 * least one interested party.  Return TRUE on success, or FALSE on failure.
 */
int
ifdev_set_promisc(struct ifdev * ifdev)
{

	/*
	 * A bit silly, but we want to retain the ability to fail this call for
	 * other reasons in the future, with BPF handling that case properly.
	 */
	if (ifdev->ifdev_promisc == UINT_MAX)
		return FALSE;

	if (ifdev->ifdev_promisc++ == 0) {
		ifdev_update_ifflags(ifdev,
		    ifdev->ifdev_ifflags | IFF_PROMISC);

		if (ifdev->ifdev_ops->iop_set_promisc != NULL)
			ifdev->ifdev_ops->iop_set_promisc(ifdev, TRUE);
	}

	return TRUE;
}

/*
 * Deregister a previously registered party interested in putting the interface
 * in promiscuous mode.  Once the last party deregisters, the device is pulled
 * out of promiscuous mode.
 */
void
ifdev_clear_promisc(struct ifdev * ifdev)
{

	assert(ifdev->ifdev_promisc > 0);

	if (--ifdev->ifdev_promisc == 0) {
		if (ifdev->ifdev_ops->iop_set_promisc != NULL)
			ifdev->ifdev_ops->iop_set_promisc(ifdev, FALSE);

		ifdev_update_ifflags(ifdev,
		    ifdev->ifdev_ifflags & ~IFF_PROMISC);
	}
}

/*
 * Set NetBSD-style interface flags (IFF_) for an interface.
 */
int
ifdev_set_ifflags(struct ifdev * ifdev, unsigned int ifflags)
{
	int r;

	/* Check and update only the subset of flags that may be changed. */
	ifflags &= ~(IFF_CANTCHANGE | IFF_LOOPBACK);

	/*
	 * Important: the callback function may call ifdev_update_ifflags()
	 * itself immediately, to update read-only flags such as IFF_RUNNING
	 * based on read-write flags such as IFF_UP.  So as to make that work..
	 *
	 * 1) this function MUST succeed if the callback function succeeds;
	 * 2) this function MUST NOT make assumptions about the ifdev_ifflags
	 *    field across the callback invocation.
	 *
	 * Conversely, the callback function should be aware that the flags
	 * field will still be updated with the flags.  In this model, it is
	 * not possible for the callback function to silently change any of the
	 * given flags.  If that is ever necessary, API changes are needed.
	 */
	if ((r = ifdev->ifdev_ops->iop_set_ifflags(ifdev, ifflags)) != OK)
		return r;

	/*
	 * On success, merge the updated subset with the subset that may not be
	 * changed.
	 */
	ifflags |= ifdev->ifdev_ifflags & (IFF_CANTCHANGE | IFF_LOOPBACK);

	ifdev_update_ifflags(ifdev, ifflags);

	return OK;
}

/*
 * Update NetBSD-style interface flags (IFF_) for an interface, and perform any
 * required operations as a result of certain flags changing.  This function
 * bypasses all input checks and directly changes the flags field to exactly
 * the given set of flags.
 */
void
ifdev_update_ifflags(struct ifdev * ifdev, unsigned int ifflags)
{
	struct netif *netif;

	/*
	 * First update the flags field itself.  The new value should be
	 * visible in the routing messages generated below, for example.
	 */
	ifdev->ifdev_ifflags = ifflags;

	/*
	 * Then perform operations as a result of the flags field changing.
	 * For now, this is relevant for IFF_UP only.
	 */
	netif = ifdev_get_netif(ifdev);

	if ((ifflags & IFF_UP) && !netif_is_up(netif)) {
		netif_set_up(netif);

		rtsock_msg_ifinfo(ifdev);

		/*
		 * Check if all conditions are now met for link-local IPv6
		 * address assignment.
		 */
		ifaddr_v6_set_linklocal(ifdev);

		/* See if we should also reset address states now. */
		if (netif_is_link_up(netif))
			ifaddr_v6_set_up(ifdev);
	} else if (!(ifflags & IFF_UP) && netif_is_up(netif)) {
		netif_set_down(netif);

		rtsock_msg_ifinfo(ifdev);
	}
}

/*
 * Retrieve NetBSD-style interface capabilities (IFCAP_) for an interface: both
 * the supported and the enabled capabilities.
 */
void
ifdev_get_ifcap(struct ifdev * ifdev, uint64_t * ifcap, uint64_t * ifena)
{

	*ifcap = 0;
	*ifena = 0;

	if (ifdev->ifdev_ops->iop_get_ifcap != NULL)
		ifdev->ifdev_ops->iop_get_ifcap(ifdev, ifcap, ifena);
}

/*
 * Set enabled NetBSD-style interface capabilities (IFCAP_) for an interface.
 */
int
ifdev_set_ifcap(struct ifdev * ifdev, uint64_t ifena)
{

	if (ifdev->ifdev_ops->iop_set_ifcap != NULL)
		return ifdev->ifdev_ops->iop_set_ifcap(ifdev, ifena);
	else
		return EINVAL;
}

/*
 * Retrieve NetBSD-style media type (IFM_) for an interface.  Return OK on
 * success, with the current media type selection stored in 'ifcurrent', the
 * driver-reported active media type in 'ifactive', and the link status in
 * 'ifstatus'.  Return a negative error code on failure.
 */
int
ifdev_get_ifmedia(struct ifdev * ifdev, int * ifcurrent, int * ifactive)
{

	if (ifdev->ifdev_ops->iop_get_ifmedia == NULL)
		return ENOTTY;

	ifdev->ifdev_ops->iop_get_ifmedia(ifdev, ifcurrent, ifactive);

	return OK;
}

/*
 * Set NetBSD-style media type (IFM_) for an interface.  Return OK on success,
 * or a negative error code on failure.
 */
int
ifdev_set_ifmedia(struct ifdev * ifdev, int ifmedia)
{

	if (ifdev->ifdev_ops->iop_set_ifmedia == NULL)
		return ENOTTY;

	if (ifmedia < 0)
		return EINVAL;

	return ifdev->ifdev_ops->iop_set_ifmedia(ifdev, ifmedia);
}

/*
 * Set the Maximum Transmission Unit for an interface.  Return OK on success,
 * or a negative error code on failure.
 */
int
ifdev_set_mtu(struct ifdev * ifdev, unsigned int mtu)
{

	if (ifdev->ifdev_ops->iop_set_mtu == NULL)
		return ENOTTY;

	if (mtu < IFDEV_MIN_MTU || mtu > UINT16_MAX ||
	    !ifdev->ifdev_ops->iop_set_mtu(ifdev, mtu))
		return EINVAL;

	ifdev->ifdev_data.ifi_mtu = mtu;
	ifdev->ifdev_netif.mtu = mtu;

	return OK;
}

/*
 * Set IPv6 Neighbor Discovery related flags.
 */
int
ifdev_set_nd6flags(struct ifdev * ifdev, uint32_t nd6flags)
{

	/* For now, refuse setting any flags that are not even known. */
	if ((nd6flags & ~(ND6_IFF_PERFORMNUD | ND6_IFF_ACCEPT_RTADV |
	    ND6_IFF_IFDISABLED | ND6_IFF_OVERRIDE_RTADV |
	    ND6_IFF_AUTO_LINKLOCAL)) != 0)
		return EINVAL;

	/*
	 * Unfortunately, the mismatch between NetBSD and lwIP requires us to
	 * support but butcher ND6 flags.  The current status is as follows:
	 *
	 * - ND6_IFF_PERFORMNUD: set by default as lwIP always implements NUD;
	 *   changes are disregarded but possible, for dhcpcd(8).
	 * - ND6_IFF_ACCEPT_RTADV: disregarded but settable, for dhcpcd(8); in
	 *   our case, lwIP always processes router advertisements but never
	 *   autoconfigures addresses, so this flag has no meaning for us.
	 * - ND6_IFF_IFDISABLED: not supported; can only be cleared; we could
	 *   probably do detection of link-local address collision and set this
	 *   flag (and disable the interface if set) when that happens; TODO.
	 * - ND6_IFF_OVERRIDE_RTADV: same as _ACCEPT_ above.
	 * - ND6_IFF_AUTO_LINKLOCAL: supported, but not initialized based on
	 *   the corresponding sysctl(7) flag for reasons mentioned in ifaddr.
	 */
	if (nd6flags & ND6_IFF_IFDISABLED)
		return EINVAL;

	ifdev->ifdev_nd6flags = nd6flags;

	return OK;
}

/*
 * Report an update to the interface's active hardware address that is *not*
 * the result of a user action.  If the 'is_factory' flag is set, the address
 * is the factory (driver-given) address.  This function is for use by
 * interface modules, to update the internal state to their current external
 * state.
 */
void
ifdev_update_hwaddr(struct ifdev * ifdev, const uint8_t * hwaddr,
	int is_factory)
{

	return ifaddr_dl_update(ifdev, hwaddr, is_factory);
}

/*
 * Insert a new interface device into the list of interface devices, at a
 * location determined by policy.
 */
static void
ifdev_insert(struct ifdev * ifdev)
{
	struct ifdev *ifdev2;
	const char *p;
	unsigned int unit, unit2;
	size_t namelen;
	int found;

	/*
	 * While NetBSD can set up all interfaces in the order it wants them to
	 * appear in, we do not have such luxury: network device drivers come
	 * up and report to us in no particular predefined order, and we have
	 * no way to know how many and which will appear.  The result is that
	 * we always have to create the loopback device first, something that
	 * is explicitly said to be bad in NetBSD.  Instead, we create an
	 * illusion of a reasonable order by performing insertion sort on the
	 * interface list, using (for now) these rules, ordered by priority:
	 *
	 * 1. same-named devices are sorted by their unit number;
	 * 2. loopback interfaces are inserted after all other interfaces;
	 * 3. new devices are added at the end of their type category.
	 *
	 * In the future, other forms of real-vs-virtual sorting may be added.
	 */

	/* First check for same-named devices (#1). */
	for (p = ifdev->ifdev_name; *p != '\0' && (*p < '0' || *p > '9'); p++);

	namelen = (size_t)(p - ifdev->ifdev_name);

	for (unit = 0; *p >= '0' && *p <= '9'; p++)
		unit = unit * 10 + *p - '0';

	found = FALSE;
	TAILQ_FOREACH(ifdev2, &ifdev_list, ifdev_next) {
		if (!strncmp(ifdev->ifdev_name, ifdev2->ifdev_name, namelen) &&
		    *(p = &ifdev2->ifdev_name[namelen]) >= '0' && *p <= '9') {
			for (unit2 = 0; *p >= '0' && *p <= '9'; p++)
				unit2 = unit2 * 10 + *p - '0';

			assert(unit != unit2);

			found = TRUE;
			if (unit2 > unit)
				break;
		} else if (found)
			break;
	}

	if (found) {
		if (ifdev2 != NULL)
			TAILQ_INSERT_BEFORE(ifdev2, ifdev, ifdev_next);
		else
			TAILQ_INSERT_TAIL(&ifdev_list, ifdev, ifdev_next);

		return;
	}

	/*
	 * No same-named device found.  Is this a loopback interface?  If not,
	 * insert before the first loopback device, if any.
	 */
	if (!ifdev_is_loopback(ifdev)) {
		TAILQ_FOREACH(ifdev2, &ifdev_list, ifdev_next) {
			if (ifdev_is_loopback(ifdev2)) {
				TAILQ_INSERT_BEFORE(ifdev2, ifdev, ifdev_next);

				return;
			}
		}
	}

	/*
	 * The given device is not a loopback device, or there was no loopback
	 * device in the list, possibly because it was empty.  Add to the tail.
	 */
	TAILQ_INSERT_TAIL(&ifdev_list, ifdev, ifdev_next);
}

/*
 * Add and initialize an interface device.
 */
void
ifdev_add(struct ifdev * ifdev, const char * name, unsigned int ifflags,
	unsigned int iftype, size_t hdrlen, size_t addrlen, unsigned int dlt,
	unsigned int mtu, uint32_t nd6flags, const struct ifdev_ops * iop)
{
	unsigned int ifindex;
	ip4_addr_t ip4addr_any, ip4addr_none;

	/*
	 * Since the call to netif_add() may end up invoking some of our
	 * callbacks (the add-multicast-address ones in particular), make sure
	 * that everything else is set up first.  We cannot set up the index
	 * mapping until netif_add() returns, but this is currently no problem.
	 */
	strlcpy(ifdev->ifdev_name, name, sizeof(ifdev->ifdev_name));
	ifdev->ifdev_ifflags = 0; /* will be updated below */
	ifdev->ifdev_dlt = dlt;
	ifdev->ifdev_nd6flags = nd6flags;
	ifdev->ifdev_ops = iop;

	memset(&ifdev->ifdev_data, 0, sizeof(ifdev->ifdev_data));

	assert(addrlen <= NETIF_MAX_HWADDR_LEN);
	assert(mtu >= IFDEV_MIN_MTU && mtu <= UINT16_MAX);

	ifdev->ifdev_data.ifi_type = iftype;
	ifdev->ifdev_data.ifi_hdrlen = hdrlen;
	ifdev->ifdev_data.ifi_addrlen = addrlen;
	ifdev->ifdev_data.ifi_link_state = LINK_STATE_UNKNOWN;
	ifdev->ifdev_data.ifi_mtu = mtu;

	TAILQ_INIT(&ifdev->ifdev_bpf);

	ifaddr_init(ifdev);

	/*
	 * We have to assign an IPv4 address at netif addition time, but we may
	 * not have one yet, so pass in an "any" address for now.  Hopefully
	 * lwIP will not mistake this for a real IPv4 address if we happen to
	 * enable the interface with only an IPv6 address later on.
	 */
	ip4_addr_set_any(&ip4addr_any);
	ip4_addr_set_u32(&ip4addr_none, PP_HTONL(INADDR_NONE));

	/*
	 * Insert the new interface device into a sensible place in the current
	 * list of interfaces.
	 */
	ifdev_insert(ifdev);

	/*
	 * netif_add() can fail only as a result of the initialization callback
	 * failing, which is something that should never happen in our case.
	 */
	if (netif_add(&ifdev->ifdev_netif, &ip4addr_any, &ip4addr_none,
	    &ip4addr_any, ifdev, ifdev_init_netif, iop->iop_input) == NULL)
		panic("unable to add netif");

	/*
	 * Set up the index mapping.  Since interface index zero never
	 * generated, table slot zero is always NULL.  We could shift all
	 * elements by one to save four bytes, but there's no real point.
	 */
	ifindex = netif_get_index(&ifdev->ifdev_netif);

	if (ifindex == 0 || ifindex >= __arraycount(ifdev_table))
		panic("invalid lwIP-generated interface index %u", ifindex);

	ifdev_table[ifindex] = ifdev;

	/*
	 * Set the initial interface flags.  Use the regular procedure for this
	 * just in case the interface module is crazy enough to set the
	 * interface up right away (which is never a good idea but still).
	 */
	ifdev_update_ifflags(ifdev, ifflags);

	/*
	 * If this is the first loopback interface to be registered, save it as
	 * the loopback interface that we will use to loop back self-destined
	 * packets on other interfaces.  Do this after setting the interface
	 * flags, since those are what we use to perform this loopback check.
	 */
	if (ifdev_loopback == NULL && ifdev_is_loopback(ifdev))
		ifdev_loopback = ifdev;

	/* Finally, announce the new interface. */
	rtsock_msg_ifannounce(ifdev, TRUE /*arrival*/);
}

/*
 * Remove an interface device.  Return OK on success, or a negative error code
 * on failure.  Only loopback interfaces may be refused for removal.
 */
int
ifdev_remove(struct ifdev * ifdev)
{
	struct bpfdev_link *bpfl;

	/*
	 * If this is the loopback interface used to loop back packets for
	 * other interfaces (typically lo0), we cannot afford to get rid of it.
	 */
	if (ifdev == ifdev_loopback)
		return EPERM;

	/*
	 * Take down the interface for the purpose of sending a routing
	 * message.  NetBSD sends a RTM_IFINFO even if the interface was down
	 * already, and so we do not check whether IFF_UP was set at all here.
	 */
	ifdev_update_ifflags(ifdev, ifdev->ifdev_ifflags & ~IFF_UP);

	/*
	 * Report all associated addresses as deleted.  It is not necessary to
	 * actually delete the addresses, nor is that even possible in all
	 * cases.  In particular, the active hardware address cannot be
	 * deleted.  Since the active hardware address is used in all address
	 * change announcements, delete it at the very end.
	 */
	ifaddr_v4_clear(ifdev);
	ifaddr_v6_clear(ifdev);
	ifaddr_dl_clear(ifdev);

	/*
	 * Delete all remaining routes associated with the interface.  These
	 * are reported as well.  We do this after clearing the addresses so as
	 * not to confuse the route deletion part of clearing addresses.
	 */
	route_clear(ifdev);

	/* Finally, announce the interface itself as gone. */
	rtsock_msg_ifannounce(ifdev, FALSE /*arrival*/);

	/*
	 * Free up all per-socket multicast membership structures associated to
	 * the interface.  There is no need to leave the multicast groups.
	 */
	mcast_clear(ifdev);

	/*
	 * Also tell attached BPF devices that the interface is now gone.  Do
	 * not bother to reset the list.
	 */
	TAILQ_FOREACH(bpfl, &ifdev->ifdev_bpf, bpfl_next)
		bpfdev_detach(bpfl);

	/* Then perform the actual interface removal. */
	netif_remove(&ifdev->ifdev_netif);

	TAILQ_REMOVE(&ifdev_list, ifdev, ifdev_next);

	assert(ifdev_table[ifdev_get_index(ifdev)] == ifdev);
	ifdev_table[ifdev_get_index(ifdev)] = NULL;

	return OK;
}

/*
 * Return the loopback interface.
 */
struct ifdev *
ifdev_get_loopback(void)
{

	assert(ifdev_loopback != NULL);

	return ifdev_loopback;
}

/*
 * Report an update of the link state of the given interface, to 'unknown',
 * 'up', or 'down', using NetBSD's LINK_STATE_ values.  The link state is
 * changed in the associated lwIP netif, and is reported on monitoring routing
 * sockets.  This function is for use by interface modules, to update the
 * internal state to their current external state.
 */
void
ifdev_update_link(struct ifdev * ifdev, int iflink)
{
	struct netif *netif;
	int was_up, is_up;

	ifdev->ifdev_data.ifi_link_state = iflink;

	/*
	 * For netif, 'up' and 'unknown' are the same link state: we simply try
	 * to send and receive packets in both cases.  Thus, transitions from
	 * and to the 'down' link state are the ones that matter.
	 */
	netif = ifdev_get_netif(ifdev);

	was_up = netif_is_link_up(netif);
	is_up = (iflink != LINK_STATE_DOWN);

	if (was_up != is_up) {
		if (is_up) {
			netif_set_link_up(netif);

			/* See if we should also reset address states now. */
			if (ifdev_is_up(ifdev))
				ifaddr_v6_set_up(ifdev);
		} else
			netif_set_link_down(netif);

		rtsock_msg_ifinfo(ifdev);
	}
}

/*
 * Register a virtual interface type, using a name prefix and a function that
 * is called when creation of a virtual interface of that type is requested.
 */
void
ifdev_register(const char * name, int (* create)(const char *))
{

	if (ifdev_vtypes == __arraycount(ifdev_vtype))
		panic("too few slots for all virtual interface types");

	ifdev_vtype[ifdev_vtypes].ifvt_name = name;
	ifdev_vtype[ifdev_vtypes].ifvt_namelen = strlen(name);
	ifdev_vtype[ifdev_vtypes].ifvt_create = create;
	ifdev_vtypes++;
}

/*
 * Verify that the given name is a valid interface name that can be used for
 * creating a new interface.  In particular, check that the given name is a
 * valid interface name, consisting of an alphabetic string (the interface type
 * or driver name) followed by a number string (the unit or instance number).
 * Furthermore, make sure that the name does not already exist.  Finally, see
 * if the name prefix is reserved for a virtual interface type.  If the given
 * 'vtype_slot' pointer is not NULL, the prefix must be, and the virtual type
 * slot number is returned in 'vtype_slot' on success.  If 'vtype_slot' is
 * NULL, the name must not have a virtual interface prefix, and an error is
 * returned if it is.  Since vtype slot numbers are meaningless outside of this
 * module, external callers must always pass in NULL.  This function returns OK
 * on succes or a negative error code on error.
 */
int
ifdev_check_name(const char * name, unsigned int * vtype_slot)
{
	const char *p;
	size_t namelen;
	unsigned int slot;

	/*
	 * First see if the name is valid at all.  TODO: decide if we want to
	 * allow uppercase letters, dashes, and/or underscores.
	 */
	for (p = name; *p >= 'a' && *p <= 'z'; p++);

	if (p == name || *p == '\0')
		return EINVAL;

	namelen = (size_t)(p - name);

	for (; *p >= '0' && *p <= '9'; p++);

	if (*p != '\0')
		return EINVAL;

	/* Then make sure that it does not already exist. */
	if (ifdev_find_by_name(name) != NULL)
		return EEXIST;

	/* See if there is a matching virtual interface type for the name. */
	for (slot = 0; slot < ifdev_vtypes; slot++) {
		if (ifdev_vtype[slot].ifvt_namelen == namelen &&
		    !strncmp(ifdev_vtype[slot].ifvt_name, name, namelen))
			break;
	}

	/* The interpretation of the result depends on 'vtype_slot'. */
	if (vtype_slot != NULL) {
		if (slot == ifdev_vtypes)
			return EINVAL;

		*vtype_slot = slot;
	} else if (slot != ifdev_vtypes)
		return EINVAL;

	return OK;
}

/*
 * Create a new virtual interface.  The virtual interface type is based on the
 * given name (without unit number).  Return OK if the virtual interface has
 * been successfully created, or a negative error code otherwise.  This
 * function is used both for the SIOCIFCREATE ioctl and internally.
 */
int
ifdev_create(const char * name)
{
	unsigned int slot;
	int r;

	/* Verify that the given name is an acceptable interface name. */
	if ((r = ifdev_check_name(name, &slot)) != OK)
		return EINVAL;

	/* Let the virtual interface implementation handle the rest. */
	return ifdev_vtype[slot].ifvt_create(name);
}

/*
 * Destroy an interface, if possible.
 */
int
ifdev_destroy(struct ifdev * ifdev)
{

	if (ifdev->ifdev_ops->iop_destroy == NULL)
		return EINVAL;

	return ifdev->ifdev_ops->iop_destroy(ifdev);
}

/*
 * Enumerate the names of currently supported virtual interface types.  Return
 * a pointer to the null-terminated name prefix of the Nth virtual interface
 * type if the (zero-based) N value is within range, or NULL otherwise.
 */
const char *
ifdev_enum_vtypes(unsigned int num)
{

	if (num < ifdev_vtypes)
		return ifdev_vtype[num].ifvt_name;
	else
		return NULL;
}