xref: /dragonfly/sys/netinet/ip_carp.c (revision 2b3f93ea)
1 /*
2  * Copyright (c) 2002 Michael Shalayeff. All rights reserved.
3  * Copyright (c) 2003 Ryan McBride. All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
23  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
24  * THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 /*
27  * $FreeBSD: src/sys/netinet/ip_carp.c,v 1.48 2007/02/02 09:39:09 glebius Exp $
28  */
29 
30 #include "opt_carp.h"
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/in_cksum.h>
38 #include <sys/limits.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/msgport2.h>
42 #include <sys/time.h>
43 #include <sys/proc.h>
44 #include <sys/caps.h>
45 #include <sys/sockio.h>
46 #include <sys/socket.h>
47 #include <sys/sysctl.h>
48 #include <sys/syslog.h>
49 #include <sys/thread.h>
50 
51 #include <machine/stdarg.h>
52 #include <crypto/sha1.h>
53 
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_dl.h>
58 #include <net/if_types.h>
59 #include <net/route.h>
60 #include <net/if_clone.h>
61 #include <net/if_var.h>
62 #include <net/ifq_var.h>
63 #include <net/netmsg2.h>
64 #include <net/netisr2.h>
65 
66 #ifdef INET
67 #include <netinet/in.h>
68 #include <netinet/in_var.h>
69 #include <netinet/in_systm.h>
70 #include <netinet/ip.h>
71 #include <netinet/ip_var.h>
72 #include <netinet/if_ether.h>
73 #endif
74 
75 #ifdef INET6
76 #include <netinet/icmp6.h>
77 #include <netinet/ip6.h>
78 #include <netinet6/ip6_var.h>
79 #include <netinet6/scope6_var.h>
80 #include <netinet6/nd6.h>
81 #endif
82 
83 #include <netinet/ip_carp.h>
84 
85 /*
86  * Note about carp's MP safe approach:
87  *
88  * Brief: carp_softc (softc), carp_softc_container (scc)
89  *
90  * - All configuration operation, e.g. ioctl, add/delete inet addresses
91  *   is serialized by netisr0; not by carp's serializer
92  *
93  * - Backing interface's if_carp and carp_softc's relationship:
94  *
95  *                +---------+
96  *     if_carp -->| carp_if |
97  *                +---------+
98  *                     |
99  *                     |
100  *                     V      +---------+
101  *                  +-----+   |         |
102  *                  | scc |-->|  softc  |
103  *                  +-----+   |         |
104  *                     |      +---------+
105  *                     |
106  *                     V      +---------+
107  *                  +-----+   |         |
108  *                  | scc |-->|  softc  |
109  *                  +-----+   |         |
110  *                            +---------+
111  *
112  * - if_carp creation, modification and deletion all happen in netisr0,
113  *   as stated previously.  Since if_carp is accessed by multiple netisrs,
114  *   the modification to if_carp is conducted in the following way:
115  *
116  *   Adding carp_softc:
117  *
118  *   1) Duplicate the old carp_if to new carp_if (ncif), and insert the
119  *      to-be-added carp_softc to the new carp_if (ncif):
120  *
121  *        if_carp                     ncif
122  *           |                         |
123  *           V                         V
124  *      +---------+               +---------+
125  *      | carp_if |               | carp_if |
126  *      +---------+               +---------+
127  *           |                         |
128  *           |                         |
129  *           V        +-------+        V
130  *        +-----+     |       |     +-----+
131  *        | scc |---->| softc |<----| scc |
132  *        +-----+     |       |     +-----+
133  *           |        +-------+        |
134  *           |                         |
135  *           V        +-------+        V
136  *        +-----+     |       |     +-----+
137  *        | scc |---->| softc |<----| scc |
138  *        +-----+     |       |     +-----+
139  *                    +-------+        |
140  *                                     |
141  *                    +-------+        V
142  *                    |       |     +-----+
143  *                    | softc |<----| scc |
144  *                    |       |     +-----+
145  *                    +-------+
146  *
147  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
148  *
149  *          ocif                    if_carp
150  *           |                         |
151  *           V                         V
152  *      +---------+               +---------+
153  *      | carp_if |               | carp_if |
154  *      +---------+               +---------+
155  *           |                         |
156  *           |                         |
157  *           V        +-------+        V
158  *        +-----+     |       |     +-----+
159  *        | scc |---->| softc |<----| scc |
160  *        +-----+     |       |     +-----+
161  *           |        +-------+        |
162  *           |                         |
163  *           V        +-------+        V
164  *        +-----+     |       |     +-----+
165  *        | scc |---->| softc |<----| scc |
166  *        +-----+     |       |     +-----+
167  *                    +-------+        |
168  *                                     |
169  *                    +-------+        V
170  *                    |       |     +-----+
171  *                    | softc |<----| scc |
172  *                    |       |     +-----+
173  *                    +-------+
174  *
175  *   3) Run netmsg_service_sync(), which will make sure that
176  *      ocif is no longer accessed (all network operations
177  *      are happened only in network threads).
178  *   4) Free ocif -- only carp_if and scc are freed.
179  *
180  *
181  *   Removing carp_softc:
182  *
183  *   1) Duplicate the old carp_if to new carp_if (ncif); the to-be-deleted
184  *      carp_softc will not be duplicated.
185  *
186  *        if_carp                     ncif
187  *           |                         |
188  *           V                         V
189  *      +---------+               +---------+
190  *      | carp_if |               | carp_if |
191  *      +---------+               +---------+
192  *           |                         |
193  *           |                         |
194  *           V        +-------+        V
195  *        +-----+     |       |     +-----+
196  *        | scc |---->| softc |<----| scc |
197  *        +-----+     |       |     +-----+
198  *           |        +-------+        |
199  *           |                         |
200  *           V        +-------+        |
201  *        +-----+     |       |        |
202  *        | scc |---->| softc |        |
203  *        +-----+     |       |        |
204  *           |        +-------+        |
205  *           |                         |
206  *           V        +-------+        V
207  *        +-----+     |       |     +-----+
208  *        | scc |---->| softc |<----| scc |
209  *        +-----+     |       |     +-----+
210  *                    +-------+
211  *
212  *   2) Switch save if_carp into ocif and switch if_carp to ncif:
213  *
214  *          ocif                    if_carp
215  *           |                         |
216  *           V                         V
217  *      +---------+               +---------+
218  *      | carp_if |               | carp_if |
219  *      +---------+               +---------+
220  *           |                         |
221  *           |                         |
222  *           V        +-------+        V
223  *        +-----+     |       |     +-----+
224  *        | scc |---->| softc |<----| scc |
225  *        +-----+     |       |     +-----+
226  *           |        +-------+        |
227  *           |                         |
228  *           V        +-------+        |
229  *        +-----+     |       |        |
230  *        | scc |---->| softc |        |
231  *        +-----+     |       |        |
232  *           |        +-------+        |
233  *           |                         |
234  *           V        +-------+        V
235  *        +-----+     |       |     +-----+
236  *        | scc |---->| softc |<----| scc |
237  *        +-----+     |       |     +-----+
238  *                    +-------+
239  *
240  *   3) Run netmsg_service_sync(), which will make sure that
241  *      ocif is no longer accessed (all network operations
242  *      are happened only in network threads).
243  *   4) Free ocif -- only carp_if and scc are freed.
244  *
245  * - if_carp accessing:
246  *   The accessing code should cache the if_carp in a local temporary
247  *   variable and accessing the temporary variable along the code path
248  *   instead of accessing if_carp later on.
249  */
250 
251 #define	CARP_IFNAME		"carp"
252 #define CARP_IS_RUNNING(ifp)	\
253 	(((ifp)->if_flags & (IFF_UP | IFF_RUNNING)) == (IFF_UP | IFF_RUNNING))
254 
255 struct carp_softc;
256 
257 struct carp_vhaddr {
258 	uint32_t		vha_flags;	/* CARP_VHAF_ */
259 	struct in_ifaddr	*vha_ia;	/* carp address */
260 	struct in_ifaddr	*vha_iaback;	/* backing address */
261 	TAILQ_ENTRY(carp_vhaddr) vha_link;
262 };
263 TAILQ_HEAD(carp_vhaddr_list, carp_vhaddr);
264 
265 struct netmsg_carp {
266 	struct netmsg_base	base;
267 	struct ifnet		*nc_carpdev;
268 	struct carp_softc	*nc_softc;
269 	void			*nc_data;
270 	size_t			nc_datalen;
271 };
272 
273 struct carp_softc {
274 	struct arpcom		 arpcom;
275 	struct ifnet		*sc_carpdev;	/* parent interface */
276 	struct carp_vhaddr_list	 sc_vha_list;	/* virtual addr list */
277 
278 	const struct in_ifaddr	*sc_ia;		/* primary iface address v4 */
279 	struct ip_moptions 	 sc_imo;
280 
281 #ifdef INET6
282 	struct in6_ifaddr 	*sc_ia6;	/* primary iface address v6 */
283 	struct ip6_moptions 	 sc_im6o;
284 #endif /* INET6 */
285 
286 	enum { INIT = 0, BACKUP, MASTER }
287 				 sc_state;
288 	boolean_t		 sc_dead;
289 
290 	int			 sc_suppress;
291 
292 	int			 sc_sendad_errors;
293 #define	CARP_SENDAD_MAX_ERRORS	3
294 	int			 sc_sendad_success;
295 #define	CARP_SENDAD_MIN_SUCCESS 3
296 
297 	int			 sc_vhid;
298 	int			 sc_advskew;
299 	int			 sc_naddrs;	/* actually used IPv4 vha */
300 	int			 sc_naddrs6;
301 	int			 sc_advbase;	/* seconds */
302 	int			 sc_init_counter;
303 	uint64_t		 sc_counter;
304 
305 	/* authentication */
306 #define CARP_HMAC_PAD	64
307 	unsigned char		 sc_key[CARP_KEY_LEN];
308 	unsigned char		 sc_pad[CARP_HMAC_PAD];
309 	SHA1_CTX		 sc_sha1;
310 
311 	struct callout		 sc_ad_tmo;	/* advertisement timeout */
312 	struct netmsg_carp	 sc_ad_msg;	/* adv timeout netmsg */
313 	struct callout		 sc_md_tmo;	/* ip4 master down timeout */
314 	struct callout 		 sc_md6_tmo;	/* ip6 master down timeout */
315 	struct netmsg_carp	 sc_md_msg;	/* master down timeout netmsg */
316 
317 	LIST_ENTRY(carp_softc)	 sc_next;	/* Interface clue */
318 };
319 
320 #define sc_if	arpcom.ac_if
321 
322 struct carp_softc_container {
323 	TAILQ_ENTRY(carp_softc_container) scc_link;
324 	struct carp_softc	*scc_softc;
325 };
326 TAILQ_HEAD(carp_if, carp_softc_container);
327 
328 SYSCTL_DECL(_net_inet_carp);
329 
330 static int carp_opts[CARPCTL_MAXID] = { 0, 1, 0, 1, 0, 0, 1 }; /* XXX for now */
331 SYSCTL_INT(_net_inet_carp, CARPCTL_ALLOW, allow, CTLFLAG_RW,
332     &carp_opts[CARPCTL_ALLOW], 0, "Accept incoming CARP packets");
333 SYSCTL_INT(_net_inet_carp, CARPCTL_PREEMPT, preempt, CTLFLAG_RW,
334     &carp_opts[CARPCTL_PREEMPT], 0, "high-priority backup preemption mode");
335 SYSCTL_INT(_net_inet_carp, CARPCTL_LOG, log, CTLFLAG_RW,
336     &carp_opts[CARPCTL_LOG], 0, "log bad carp packets");
337 SYSCTL_INT(_net_inet_carp, CARPCTL_ARPBALANCE, arpbalance, CTLFLAG_RW,
338     &carp_opts[CARPCTL_ARPBALANCE], 0, "balance arp responses");
339 SYSCTL_INT(_net_inet_carp, CARPCTL_SETROUTE, setroute, CTLFLAG_RW,
340     &carp_opts[CARPCTL_SETROUTE], 0, "set route");
341 
342 static int carp_suppress_preempt = 0;
343 SYSCTL_INT(_net_inet_carp, OID_AUTO, suppress_preempt, CTLFLAG_RD,
344     &carp_suppress_preempt, 0, "Preemption is suppressed");
345 
346 static int carp_prio_ad = 1;
347 SYSCTL_INT(_net_inet_carp, OID_AUTO, prio_ad, CTLFLAG_RD,
348     &carp_prio_ad, 0, "Prioritize advertisement packet");
349 
350 static struct carpstats carpstats;
351 SYSCTL_STRUCT(_net_inet_carp, CARPCTL_STATS, stats, CTLFLAG_RW,
352     &carpstats, carpstats,
353     "CARP statistics (struct carpstats, netinet/ip_carp.h)");
354 
355 #define	CARP_LOG(...)	do {				\
356 	if (carp_opts[CARPCTL_LOG] > 0)			\
357 		log(LOG_INFO, __VA_ARGS__);		\
358 } while (0)
359 
360 #define	CARP_DEBUG(...)	do {				\
361 	if (carp_opts[CARPCTL_LOG] > 1)			\
362 		log(LOG_DEBUG, __VA_ARGS__);		\
363 } while (0)
364 
365 static struct lwkt_token carp_listtok = LWKT_TOKEN_INITIALIZER(carp_list_token);
366 
367 static void	carp_hmac_prepare(struct carp_softc *);
368 static void	carp_hmac_generate(struct carp_softc *, uint32_t *,
369 		    unsigned char *);
370 static int	carp_hmac_verify(struct carp_softc *, uint32_t *,
371 		    unsigned char *);
372 static void	carp_setroute(struct carp_softc *, int);
373 static void	carp_proto_input_c(struct carp_softc *, struct mbuf *,
374 		    struct carp_header *, sa_family_t);
375 static int 	carp_clone_create(struct if_clone *, int, caddr_t, caddr_t);
376 static int 	carp_clone_destroy(struct ifnet *);
377 static void	carp_detach(struct carp_softc *, boolean_t, boolean_t);
378 static void	carp_prepare_ad(struct carp_softc *, struct carp_header *);
379 static void	carp_send_ad_all(void);
380 static void	carp_send_ad_timeout(void *);
381 static void	carp_send_ad(struct carp_softc *);
382 static void	carp_send_arp(struct carp_softc *);
383 static void	carp_master_down_timeout(void *);
384 static void	carp_master_down(struct carp_softc *);
385 static void	carp_setrun(struct carp_softc *, sa_family_t);
386 static void	carp_set_state(struct carp_softc *, int);
387 static struct ifnet *carp_forus(struct carp_if *, const uint8_t *);
388 
389 static void	carp_init(void *);
390 static int	carp_ioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
391 static int	carp_output(struct ifnet *, struct mbuf *, struct sockaddr *,
392 		    struct rtentry *);
393 static void	carp_start(struct ifnet *, struct ifaltq_subque *);
394 
395 static void	carp_multicast_cleanup(struct carp_softc *);
396 static void	carp_add_addr(struct carp_softc *, struct ifaddr *);
397 static void	carp_del_addr(struct carp_softc *, struct ifaddr *);
398 static void	carp_config_addr(struct carp_softc *, struct ifaddr *);
399 static void	carp_link_addrs(struct carp_softc *, struct ifnet *,
400 		    struct ifaddr *);
401 static void	carp_unlink_addrs(struct carp_softc *, struct ifnet *,
402 		    struct ifaddr *);
403 static void	carp_update_addrs(struct carp_softc *, struct ifaddr *);
404 
405 static int	carp_config_vhaddr(struct carp_softc *, struct carp_vhaddr *,
406 		    struct in_ifaddr *);
407 static int	carp_activate_vhaddr(struct carp_softc *, struct carp_vhaddr *,
408 		    struct ifnet *, struct in_ifaddr *, int);
409 static void	carp_deactivate_vhaddr(struct carp_softc *,
410 		    struct carp_vhaddr *, boolean_t);
411 static int	carp_addroute_vhaddr(struct carp_softc *, struct carp_vhaddr *);
412 static void	carp_delroute_vhaddr(struct carp_softc *, struct carp_vhaddr *,
413 		    boolean_t);
414 
415 #ifdef foo
416 static void	carp_sc_state(struct carp_softc *);
417 #endif
418 #ifdef INET6
419 static void	carp_send_na(struct carp_softc *);
420 #ifdef notyet
421 static int	carp_set_addr6(struct carp_softc *, struct sockaddr_in6 *);
422 static int	carp_del_addr6(struct carp_softc *, struct sockaddr_in6 *);
423 #endif
424 static void	carp_multicast6_cleanup(struct carp_softc *);
425 #endif
426 static void	carp_stop(struct carp_softc *, boolean_t);
427 static void	carp_suspend(struct carp_softc *, boolean_t);
428 static void	carp_ioctl_stop(struct carp_softc *);
429 static int	carp_ioctl_setvh(struct carp_softc *, void *, struct ucred *);
430 static void	carp_ioctl_ifcap(struct carp_softc *, int);
431 static int	carp_ioctl_getvh(struct carp_softc *, void *, struct ucred *);
432 static int	carp_ioctl_getdevname(struct carp_softc *, struct ifdrv *);
433 static int	carp_ioctl_getvhaddr(struct carp_softc *, struct ifdrv *);
434 
435 static struct carp_if *carp_if_remove(struct carp_if *, struct carp_softc *);
436 static struct carp_if *carp_if_insert(struct carp_if *, struct carp_softc *);
437 static void	carp_if_free(struct carp_if *);
438 
439 static void	carp_ifaddr(void *, struct ifnet *, enum ifaddr_event,
440 			    struct ifaddr *);
441 static void	carp_ifdetach(void *, struct ifnet *);
442 
443 static void	carp_ifdetach_dispatch(netmsg_t);
444 static void	carp_clone_destroy_dispatch(netmsg_t);
445 static void	carp_init_dispatch(netmsg_t);
446 static void	carp_ioctl_stop_dispatch(netmsg_t);
447 static void	carp_ioctl_setvh_dispatch(netmsg_t);
448 static void	carp_ioctl_ifcap_dispatch(netmsg_t);
449 static void	carp_ioctl_getvh_dispatch(netmsg_t);
450 static void	carp_ioctl_getdevname_dispatch(netmsg_t);
451 static void	carp_ioctl_getvhaddr_dispatch(netmsg_t);
452 static void	carp_send_ad_timeout_dispatch(netmsg_t);
453 static void	carp_master_down_timeout_dispatch(netmsg_t);
454 
455 static MALLOC_DEFINE(M_CARP, "CARP", "CARP interfaces");
456 
457 static LIST_HEAD(, carp_softc) carpif_list;
458 
459 static struct if_clone carp_cloner =
460 IF_CLONE_INITIALIZER(CARP_IFNAME, carp_clone_create, carp_clone_destroy,
461 		     0, IF_MAXUNIT);
462 
463 static const uint8_t	carp_etheraddr[ETHER_ADDR_LEN] =
464 	{ 0, 0, 0x5e, 0, 1, 0 };
465 
466 static eventhandler_tag carp_ifdetach_event;
467 static eventhandler_tag carp_ifaddr_event;
468 
469 static __inline void
carp_insert_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha_new)470 carp_insert_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha_new)
471 {
472 	struct carp_vhaddr *vha;
473 	u_long new_addr, addr;
474 
475 	KKASSERT((vha_new->vha_flags & CARP_VHAF_ONLIST) == 0);
476 
477 	/*
478 	 * Virtual address list is sorted; smaller one first
479 	 */
480 	new_addr = ntohl(vha_new->vha_ia->ia_addr.sin_addr.s_addr);
481 
482 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
483 		addr = ntohl(vha->vha_ia->ia_addr.sin_addr.s_addr);
484 
485 		if (addr > new_addr)
486 			break;
487 	}
488 	if (vha == NULL)
489 		TAILQ_INSERT_TAIL(&sc->sc_vha_list, vha_new, vha_link);
490 	else
491 		TAILQ_INSERT_BEFORE(vha, vha_new, vha_link);
492 	vha_new->vha_flags |= CARP_VHAF_ONLIST;
493 }
494 
495 static __inline void
carp_remove_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha)496 carp_remove_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
497 {
498 	KKASSERT(vha->vha_flags & CARP_VHAF_ONLIST);
499 	vha->vha_flags &= ~CARP_VHAF_ONLIST;
500 	TAILQ_REMOVE(&sc->sc_vha_list, vha, vha_link);
501 }
502 
503 static void
carp_hmac_prepare(struct carp_softc * sc)504 carp_hmac_prepare(struct carp_softc *sc)
505 {
506 	uint8_t version = CARP_VERSION, type = CARP_ADVERTISEMENT;
507 	uint8_t vhid = sc->sc_vhid & 0xff;
508 	int i;
509 #ifdef INET6
510 	struct ifaddr_container *ifac;
511 	struct in6_addr in6;
512 #endif
513 #ifdef INET
514 	struct carp_vhaddr *vha;
515 #endif
516 
517 	/* XXX: possible race here */
518 
519 	/* compute ipad from key */
520 	bzero(sc->sc_pad, sizeof(sc->sc_pad));
521 	bcopy(sc->sc_key, sc->sc_pad, sizeof(sc->sc_key));
522 	for (i = 0; i < sizeof(sc->sc_pad); i++)
523 		sc->sc_pad[i] ^= 0x36;
524 
525 	/* precompute first part of inner hash */
526 	SHA1Init(&sc->sc_sha1);
527 	SHA1Update(&sc->sc_sha1, sc->sc_pad, sizeof(sc->sc_pad));
528 	SHA1Update(&sc->sc_sha1, (void *)&version, sizeof(version));
529 	SHA1Update(&sc->sc_sha1, (void *)&type, sizeof(type));
530 	SHA1Update(&sc->sc_sha1, (void *)&vhid, sizeof(vhid));
531 #ifdef INET
532 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
533 		SHA1Update(&sc->sc_sha1,
534 		    (const uint8_t *)&vha->vha_ia->ia_addr.sin_addr,
535 		    sizeof(struct in_addr));
536 	}
537 #endif /* INET */
538 #ifdef INET6
539 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
540 		struct ifaddr *ifa = ifac->ifa;
541 
542 		if (ifa->ifa_addr->sa_family == AF_INET6) {
543 			in6 = ifatoia6(ifa)->ia_addr.sin6_addr;
544 			in6_clearscope(&in6);
545 			SHA1Update(&sc->sc_sha1, (void *)&in6, sizeof(in6));
546 		}
547 	}
548 #endif /* INET6 */
549 
550 	/* convert ipad to opad */
551 	for (i = 0; i < sizeof(sc->sc_pad); i++)
552 		sc->sc_pad[i] ^= 0x36 ^ 0x5c;
553 }
554 
555 static void
carp_hmac_generate(struct carp_softc * sc,uint32_t counter[2],unsigned char md[20])556 carp_hmac_generate(struct carp_softc *sc, uint32_t counter[2],
557     unsigned char md[20])
558 {
559 	SHA1_CTX sha1ctx;
560 
561 	/* fetch first half of inner hash */
562 	bcopy(&sc->sc_sha1, &sha1ctx, sizeof(sha1ctx));
563 
564 	SHA1Update(&sha1ctx, (void *)counter, sizeof(sc->sc_counter));
565 	SHA1Final(md, &sha1ctx);
566 
567 	/* outer hash */
568 	SHA1Init(&sha1ctx);
569 	SHA1Update(&sha1ctx, sc->sc_pad, sizeof(sc->sc_pad));
570 	SHA1Update(&sha1ctx, md, 20);
571 	SHA1Final(md, &sha1ctx);
572 }
573 
574 static int
carp_hmac_verify(struct carp_softc * sc,uint32_t counter[2],unsigned char md[20])575 carp_hmac_verify(struct carp_softc *sc, uint32_t counter[2],
576     unsigned char md[20])
577 {
578 	unsigned char md2[20];
579 
580 	carp_hmac_generate(sc, counter, md2);
581 	return (bcmp(md, md2, sizeof(md2)));
582 }
583 
584 static void
carp_setroute(struct carp_softc * sc,int cmd)585 carp_setroute(struct carp_softc *sc, int cmd)
586 {
587 #ifdef INET6
588 	struct ifaddr_container *ifac;
589 #endif
590 	struct carp_vhaddr *vha;
591 
592 	KKASSERT(cmd == RTM_DELETE || cmd == RTM_ADD);
593 
594 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
595 		if (vha->vha_iaback == NULL)
596 			continue;
597 		if (cmd == RTM_DELETE)
598 			carp_delroute_vhaddr(sc, vha, FALSE);
599 		else
600 			carp_addroute_vhaddr(sc, vha);
601 	}
602 
603 #ifdef INET6
604 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
605 		struct ifaddr *ifa = ifac->ifa;
606 
607 		if (ifa->ifa_addr->sa_family == AF_INET6) {
608 			if (cmd == RTM_ADD)
609 				in6_ifaddloop(ifa);
610 			else
611 				in6_ifremloop(ifa);
612 		}
613 	}
614 #endif /* INET6 */
615 }
616 
617 static int
carp_clone_create(struct if_clone * ifc,int unit,caddr_t params __unused,caddr_t data __unused)618 carp_clone_create(struct if_clone *ifc, int unit,
619 		  caddr_t params __unused, caddr_t data __unused)
620 {
621 	struct carp_softc *sc;
622 	struct ifnet *ifp;
623 
624 	sc = kmalloc(sizeof(*sc), M_CARP, M_WAITOK | M_ZERO);
625 	ifp = &sc->sc_if;
626 
627 	sc->sc_suppress = 0;
628 	sc->sc_advbase = CARP_DFLTINTV;
629 	sc->sc_vhid = -1;	/* required setting */
630 	sc->sc_advskew = 0;
631 	sc->sc_init_counter = 1;
632 	sc->sc_naddrs = 0;
633 	sc->sc_naddrs6 = 0;
634 
635 	TAILQ_INIT(&sc->sc_vha_list);
636 
637 #ifdef INET6
638 	sc->sc_im6o.im6o_multicast_hlim = CARP_DFLTTL;
639 #endif
640 
641 	callout_init_mp(&sc->sc_ad_tmo);
642 	netmsg_init(&sc->sc_ad_msg.base, NULL, &netisr_adone_rport,
643 	    MSGF_DROPABLE | MSGF_PRIORITY, carp_send_ad_timeout_dispatch);
644 	sc->sc_ad_msg.nc_softc = sc;
645 
646 	callout_init_mp(&sc->sc_md_tmo);
647 	callout_init_mp(&sc->sc_md6_tmo);
648 	netmsg_init(&sc->sc_md_msg.base, NULL, &netisr_adone_rport,
649 	    MSGF_DROPABLE | MSGF_PRIORITY, carp_master_down_timeout_dispatch);
650 	sc->sc_md_msg.nc_softc = sc;
651 
652 	if_initname(ifp, CARP_IFNAME, unit);
653 	ifp->if_softc = sc;
654 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
655 	ifp->if_init = carp_init;
656 	ifp->if_ioctl = carp_ioctl;
657 	ifp->if_start = carp_start;
658 
659 	ifp->if_capabilities = IFCAP_TXCSUM | IFCAP_TSO;
660 	ifp->if_capenable = ifp->if_capabilities;
661 	/*
662 	 * Leave if_hwassist as it is; if_hwassist will be
663 	 * setup when this carp interface has parent.
664 	 */
665 
666 	ifq_set_maxlen(&ifp->if_snd, ifqmaxlen);
667 	ifq_set_ready(&ifp->if_snd);
668 
669 	ether_ifattach(ifp, carp_etheraddr, NULL);
670 
671 	ifp->if_type = IFT_CARP;
672 	ifp->if_output = carp_output;
673 
674 	lwkt_gettoken(&carp_listtok);
675 	LIST_INSERT_HEAD(&carpif_list, sc, sc_next);
676 	lwkt_reltoken(&carp_listtok);
677 
678 	return (0);
679 }
680 
681 static void
carp_clone_destroy_dispatch(netmsg_t msg)682 carp_clone_destroy_dispatch(netmsg_t msg)
683 {
684 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
685 	struct carp_softc *sc = cmsg->nc_softc;
686 
687 	sc->sc_dead = TRUE;
688 	carp_detach(sc, TRUE, FALSE);
689 
690 	callout_cancel(&sc->sc_ad_tmo);
691 	callout_cancel(&sc->sc_md_tmo);
692 	callout_cancel(&sc->sc_md6_tmo);
693 
694 	crit_enter();
695 	lwkt_dropmsg(&sc->sc_ad_msg.base.lmsg);
696 	lwkt_dropmsg(&sc->sc_md_msg.base.lmsg);
697 	crit_exit();
698 
699 	lwkt_replymsg(&cmsg->base.lmsg, 0);
700 }
701 
702 static int
carp_clone_destroy(struct ifnet * ifp)703 carp_clone_destroy(struct ifnet *ifp)
704 {
705 	struct carp_softc *sc = ifp->if_softc;
706 	struct netmsg_carp cmsg;
707 
708 	bzero(&cmsg, sizeof(cmsg));
709 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
710 	    carp_clone_destroy_dispatch);
711 	cmsg.nc_softc = sc;
712 
713 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
714 
715 	lwkt_gettoken(&carp_listtok);
716 	LIST_REMOVE(sc, sc_next);
717 	lwkt_reltoken(&carp_listtok);
718 
719 	bpfdetach(ifp);
720 	if_detach(ifp);
721 
722 	KASSERT(sc->sc_naddrs == 0, ("certain inet address is still active"));
723 	kfree(sc, M_CARP);
724 
725 	return 0;
726 }
727 
728 static struct carp_if *
carp_if_remove(struct carp_if * ocif,struct carp_softc * sc)729 carp_if_remove(struct carp_if *ocif, struct carp_softc *sc)
730 {
731 	struct carp_softc_container *oscc, *scc;
732 	struct carp_if *cif;
733 	int count = 0;
734 #ifdef INVARIANTS
735 	int found = 0;
736 #endif
737 
738 	TAILQ_FOREACH(oscc, ocif, scc_link) {
739 		++count;
740 #ifdef INVARIANTS
741 		if (oscc->scc_softc == sc)
742 			found = 1;
743 #endif
744 	}
745 	KASSERT(found, ("%s carp_softc is not on carp_if", __func__));
746 
747 	if (count == 1) {
748 		/* Last one is going to be unlinked */
749 		return NULL;
750 	}
751 
752 	cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
753 	TAILQ_INIT(cif);
754 
755 	TAILQ_FOREACH(oscc, ocif, scc_link) {
756 		if (oscc->scc_softc == sc)
757 			continue;
758 
759 		scc = kmalloc(sizeof(*scc), M_CARP, M_WAITOK | M_ZERO);
760 		scc->scc_softc = oscc->scc_softc;
761 		TAILQ_INSERT_TAIL(cif, scc, scc_link);
762 	}
763 
764 	return cif;
765 }
766 
767 static struct carp_if *
carp_if_insert(struct carp_if * ocif,struct carp_softc * sc)768 carp_if_insert(struct carp_if *ocif, struct carp_softc *sc)
769 {
770 	struct carp_softc_container *oscc;
771 	int onlist;
772 
773 	onlist = 0;
774 	if (ocif != NULL) {
775 		TAILQ_FOREACH(oscc, ocif, scc_link) {
776 			if (oscc->scc_softc == sc)
777 				onlist = 1;
778 		}
779 	}
780 
781 #ifdef INVARIANTS
782 	if (sc->sc_carpdev != NULL) {
783 		KASSERT(onlist, ("%s is not on %s carp list",
784 		    sc->sc_if.if_xname, sc->sc_carpdev->if_xname));
785 	} else {
786 		KASSERT(!onlist, ("%s is already on carp list",
787 		    sc->sc_if.if_xname));
788 	}
789 #endif
790 
791 	if (!onlist) {
792 		struct carp_if *cif;
793 		struct carp_softc_container *new_scc, *scc;
794 		int inserted = 0;
795 
796 		cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
797 		TAILQ_INIT(cif);
798 
799 		new_scc = kmalloc(sizeof(*new_scc), M_CARP, M_WAITOK | M_ZERO);
800 		new_scc->scc_softc = sc;
801 
802 		if (ocif != NULL) {
803 			TAILQ_FOREACH(oscc, ocif, scc_link) {
804 				if (!inserted &&
805 				    oscc->scc_softc->sc_vhid > sc->sc_vhid) {
806 					TAILQ_INSERT_TAIL(cif, new_scc,
807 					    scc_link);
808 					inserted = 1;
809 				}
810 
811 				scc = kmalloc(sizeof(*scc), M_CARP,
812 				    M_WAITOK | M_ZERO);
813 				scc->scc_softc = oscc->scc_softc;
814 				TAILQ_INSERT_TAIL(cif, scc, scc_link);
815 			}
816 		}
817 		if (!inserted)
818 			TAILQ_INSERT_TAIL(cif, new_scc, scc_link);
819 
820 		return cif;
821 	} else {
822 		return ocif;
823 	}
824 }
825 
826 static void
carp_if_free(struct carp_if * cif)827 carp_if_free(struct carp_if *cif)
828 {
829 	struct carp_softc_container *scc;
830 
831 	while ((scc = TAILQ_FIRST(cif)) != NULL) {
832 		TAILQ_REMOVE(cif, scc, scc_link);
833 		kfree(scc, M_CARP);
834 	}
835 	kfree(cif, M_CARP);
836 }
837 
838 static void
carp_detach(struct carp_softc * sc,boolean_t detach,boolean_t del_iaback)839 carp_detach(struct carp_softc *sc, boolean_t detach, boolean_t del_iaback)
840 {
841 	carp_suspend(sc, detach);
842 
843 	carp_multicast_cleanup(sc);
844 #ifdef INET6
845 	carp_multicast6_cleanup(sc);
846 #endif
847 
848 	if (!sc->sc_dead && detach) {
849 		struct carp_vhaddr *vha;
850 
851 		TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
852 			carp_deactivate_vhaddr(sc, vha, del_iaback);
853 		KKASSERT(sc->sc_naddrs == 0);
854 	}
855 
856 	if (sc->sc_carpdev != NULL) {
857 		struct ifnet *ifp = sc->sc_carpdev;
858 		struct carp_if *ocif = ifp->if_carp;
859 
860 		ifp->if_carp = carp_if_remove(ocif, sc);
861 		KASSERT(ifp->if_carp != ocif,
862 		    ("%s carp_if_remove failed", __func__));
863 
864 		sc->sc_carpdev = NULL;
865 		sc->sc_ia = NULL;
866 		sc->arpcom.ac_if.if_hwassist = 0;
867 
868 		/*
869 		 * Make sure that all protocol threads see the
870 		 * sc_carpdev and if_carp changes
871 		 */
872 		netmsg_service_sync();
873 
874 		if (ifp->if_carp == NULL) {
875 			/*
876 			 * No more carp interfaces using
877 			 * ifp as the backing interface,
878 			 * move it out of promiscous mode.
879 			 */
880 			ifpromisc(ifp, 0);
881 		}
882 
883 		/*
884 		 * The old carp list could be safely free now,
885 		 * since no one can access it.
886 		 */
887 		carp_if_free(ocif);
888 	}
889 }
890 
891 static void
carp_ifdetach_dispatch(netmsg_t msg)892 carp_ifdetach_dispatch(netmsg_t msg)
893 {
894 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
895 	struct ifnet *ifp = cmsg->nc_carpdev;
896 
897 	while (ifp->if_carp) {
898 		struct carp_softc_container *scc;
899 
900 		scc = TAILQ_FIRST((struct carp_if *)(ifp->if_carp));
901 		carp_detach(scc->scc_softc, TRUE, TRUE);
902 	}
903 	lwkt_replymsg(&cmsg->base.lmsg, 0);
904 }
905 
906 /* Detach an interface from the carp. */
907 static void
carp_ifdetach(void * arg __unused,struct ifnet * ifp)908 carp_ifdetach(void *arg __unused, struct ifnet *ifp)
909 {
910 	struct netmsg_carp cmsg;
911 
912 	ASSERT_IFNET_NOT_SERIALIZED_ALL(ifp);
913 
914 	bzero(&cmsg, sizeof(cmsg));
915 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
916 	    carp_ifdetach_dispatch);
917 	cmsg.nc_carpdev = ifp;
918 
919 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
920 }
921 
922 /*
923  * process input packet.
924  * we have rearranged checks order compared to the rfc,
925  * but it seems more efficient this way or not possible otherwise.
926  */
927 int
carp_proto_input(struct mbuf ** mp,int * offp,int proto)928 carp_proto_input(struct mbuf **mp, int *offp, int proto)
929 {
930 	struct mbuf *m = *mp;
931 	struct ip *ip = mtod(m, struct ip *);
932 	struct ifnet *ifp = m->m_pkthdr.rcvif;
933 	struct carp_header *ch;
934 	struct carp_softc *sc;
935 	int len, iphlen;
936 
937 	iphlen = *offp;
938 	*mp = NULL;
939 
940 	carpstats.carps_ipackets++;
941 
942 	if (!carp_opts[CARPCTL_ALLOW]) {
943 		m_freem(m);
944 		goto back;
945 	}
946 
947 	/* Check if received on a valid carp interface */
948 	if (ifp->if_type != IFT_CARP) {
949 		carpstats.carps_badif++;
950 		CARP_LOG("carp_proto_input: packet received on non-carp "
951 		    "interface: %s\n", ifp->if_xname);
952 		m_freem(m);
953 		goto back;
954 	}
955 
956 	if (!CARP_IS_RUNNING(ifp)) {
957 		carpstats.carps_badif++;
958 		CARP_LOG("carp_proto_input: packet received on stopped carp "
959 		    "interface: %s\n", ifp->if_xname);
960 		m_freem(m);
961 		goto back;
962 	}
963 
964 	sc = ifp->if_softc;
965 	if (sc->sc_carpdev == NULL) {
966 		carpstats.carps_badif++;
967 		CARP_LOG("carp_proto_input: packet received on defunc carp "
968 		    "interface: %s\n", ifp->if_xname);
969 		m_freem(m);
970 		goto back;
971 	}
972 
973 	if (!IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
974 		carpstats.carps_badif++;
975 		CARP_LOG("carp_proto_input: non-mcast packet on "
976 		    "interface: %s\n", ifp->if_xname);
977 		m_freem(m);
978 		goto back;
979 	}
980 
981 	/* Verify that the IP TTL is CARP_DFLTTL. */
982 	if (ip->ip_ttl != CARP_DFLTTL) {
983 		carpstats.carps_badttl++;
984 		CARP_LOG("carp_proto_input: received ttl %d != %d on %s\n",
985 		    ip->ip_ttl, CARP_DFLTTL, ifp->if_xname);
986 		m_freem(m);
987 		goto back;
988 	}
989 
990 	/* Minimal CARP packet size */
991 	len = iphlen + sizeof(*ch);
992 
993 	/*
994 	 * Verify that the received packet length is
995 	 * not less than the CARP header
996 	 */
997 	if (m->m_pkthdr.len < len) {
998 		carpstats.carps_badlen++;
999 		CARP_LOG("packet too short %d on %s\n", m->m_pkthdr.len,
1000 		    ifp->if_xname);
1001 		m_freem(m);
1002 		goto back;
1003 	}
1004 
1005 	/* Make sure that CARP header is contiguous */
1006 	if (len > m->m_len) {
1007 		m = m_pullup(m, len);
1008 		if (m == NULL) {
1009 			carpstats.carps_hdrops++;
1010 			CARP_LOG("carp_proto_input: m_pullup failed\n");
1011 			goto back;
1012 		}
1013 		ip = mtod(m, struct ip *);
1014 	}
1015 	ch = (struct carp_header *)((uint8_t *)ip + iphlen);
1016 
1017 	/* Verify the CARP checksum */
1018 	if (in_cksum_skip(m, len, iphlen)) {
1019 		carpstats.carps_badsum++;
1020 		CARP_LOG("carp_proto_input: checksum failed on %s\n",
1021 		    ifp->if_xname);
1022 		m_freem(m);
1023 		goto back;
1024 	}
1025 	carp_proto_input_c(sc, m, ch, AF_INET);
1026 back:
1027 	return(IPPROTO_DONE);
1028 }
1029 
1030 #ifdef INET6
1031 int
carp6_proto_input(struct mbuf ** mp,int * offp,int proto)1032 carp6_proto_input(struct mbuf **mp, int *offp, int proto)
1033 {
1034 	struct mbuf *m = *mp;
1035 	struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
1036 	struct ifnet *ifp = m->m_pkthdr.rcvif;
1037 	struct carp_header *ch;
1038 	struct carp_softc *sc;
1039 	u_int len;
1040 
1041 	carpstats.carps_ipackets6++;
1042 
1043 	if (!carp_opts[CARPCTL_ALLOW]) {
1044 		m_freem(m);
1045 		goto back;
1046 	}
1047 
1048 	/* check if received on a valid carp interface */
1049 	if (ifp->if_type != IFT_CARP) {
1050 		carpstats.carps_badif++;
1051 		CARP_LOG("carp6_proto_input: packet received on non-carp "
1052 		    "interface: %s\n", ifp->if_xname);
1053 		m_freem(m);
1054 		goto back;
1055 	}
1056 
1057 	if (!CARP_IS_RUNNING(ifp)) {
1058 		carpstats.carps_badif++;
1059 		CARP_LOG("carp_proto_input: packet received on stopped carp "
1060 		    "interface: %s\n", ifp->if_xname);
1061 		m_freem(m);
1062 		goto back;
1063 	}
1064 
1065 	sc = ifp->if_softc;
1066 	if (sc->sc_carpdev == NULL) {
1067 		carpstats.carps_badif++;
1068 		CARP_LOG("carp6_proto_input: packet received on defunc-carp "
1069 		    "interface: %s\n", ifp->if_xname);
1070 		m_freem(m);
1071 		goto back;
1072 	}
1073 
1074 	/* verify that the IP TTL is 255 */
1075 	if (ip6->ip6_hlim != CARP_DFLTTL) {
1076 		carpstats.carps_badttl++;
1077 		CARP_LOG("carp6_proto_input: received ttl %d != 255 on %s\n",
1078 		    ip6->ip6_hlim, ifp->if_xname);
1079 		m_freem(m);
1080 		goto back;
1081 	}
1082 
1083 	/* verify that we have a complete carp packet */
1084 	len = m->m_len;
1085 	IP6_EXTHDR_GET(ch, struct carp_header *, m, *offp, sizeof(*ch));
1086 	if (ch == NULL) {
1087 		carpstats.carps_badlen++;
1088 		CARP_LOG("carp6_proto_input: packet size %u too small\n", len);
1089 		goto back;
1090 	}
1091 
1092 	/* verify the CARP checksum */
1093 	if (in_cksum_range(m, 0, *offp, sizeof(*ch))) {
1094 		carpstats.carps_badsum++;
1095 		CARP_LOG("carp6_proto_input: checksum failed, on %s\n",
1096 		    ifp->if_xname);
1097 		m_freem(m);
1098 		goto back;
1099 	}
1100 
1101 	carp_proto_input_c(sc, m, ch, AF_INET6);
1102 back:
1103 	return (IPPROTO_DONE);
1104 }
1105 #endif /* INET6 */
1106 
1107 static void
carp_proto_input_c(struct carp_softc * sc,struct mbuf * m,struct carp_header * ch,sa_family_t af)1108 carp_proto_input_c(struct carp_softc *sc, struct mbuf *m,
1109     struct carp_header *ch, sa_family_t af)
1110 {
1111 	struct ifnet *cifp;
1112 	uint64_t tmp_counter;
1113 	struct timeval sc_tv, ch_tv;
1114 
1115 	if (sc->sc_vhid != ch->carp_vhid) {
1116 		/*
1117 		 * CARP uses multicast, however, multicast packets
1118 		 * are tapped to all CARP interfaces on the physical
1119 		 * interface receiving the CARP packets, so we don't
1120 		 * update any stats here.
1121 		 */
1122 		m_freem(m);
1123 		return;
1124 	}
1125 	cifp = &sc->sc_if;
1126 
1127 	/* verify the CARP version. */
1128 	if (ch->carp_version != CARP_VERSION) {
1129 		carpstats.carps_badver++;
1130 		CARP_LOG("%s; invalid version %d\n", cifp->if_xname,
1131 			 ch->carp_version);
1132 		m_freem(m);
1133 		return;
1134 	}
1135 
1136 	/* verify the hash */
1137 	if (carp_hmac_verify(sc, ch->carp_counter, ch->carp_md)) {
1138 		carpstats.carps_badauth++;
1139 		CARP_LOG("%s: incorrect hash\n", cifp->if_xname);
1140 		m_freem(m);
1141 		return;
1142 	}
1143 
1144 	tmp_counter = ntohl(ch->carp_counter[0]);
1145 	tmp_counter = tmp_counter<<32;
1146 	tmp_counter += ntohl(ch->carp_counter[1]);
1147 
1148 	/* XXX Replay protection goes here */
1149 
1150 	sc->sc_init_counter = 0;
1151 	sc->sc_counter = tmp_counter;
1152 
1153 	sc_tv.tv_sec = sc->sc_advbase;
1154 	if (carp_suppress_preempt && sc->sc_advskew <  240)
1155 		sc_tv.tv_usec = 240 * 1000000 / 256;
1156 	else
1157 		sc_tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1158 	ch_tv.tv_sec = ch->carp_advbase;
1159 	ch_tv.tv_usec = ch->carp_advskew * 1000000 / 256;
1160 
1161 	switch (sc->sc_state) {
1162 	case INIT:
1163 		break;
1164 
1165 	case MASTER:
1166 		/*
1167 		 * If we receive an advertisement from a master who's going to
1168 		 * be more frequent than us, go into BACKUP state.
1169 		 */
1170 		if (timevalcmp(&sc_tv, &ch_tv, >) ||
1171 		    timevalcmp(&sc_tv, &ch_tv, ==)) {
1172 			callout_stop(&sc->sc_ad_tmo);
1173 			CARP_DEBUG("%s: MASTER -> BACKUP "
1174 			   "(more frequent advertisement received)\n",
1175 			   cifp->if_xname);
1176 			carp_set_state(sc, BACKUP);
1177 			carp_setrun(sc, 0);
1178 			if (carp_opts[CARPCTL_SETROUTE])
1179 				carp_setroute(sc, RTM_DELETE);
1180 		}
1181 		break;
1182 
1183 	case BACKUP:
1184 		/*
1185 		 * If we're pre-empting masters who advertise slower than us,
1186 		 * and this one claims to be slower, treat him as down.
1187 		 */
1188 		if (carp_opts[CARPCTL_PREEMPT] &&
1189 		    timevalcmp(&sc_tv, &ch_tv, <)) {
1190 			CARP_DEBUG("%s: BACKUP -> MASTER "
1191 			    "(preempting a slower master)\n", cifp->if_xname);
1192 			carp_master_down(sc);
1193 			break;
1194 		}
1195 
1196 		/*
1197 		 *  If the master is going to advertise at such a low frequency
1198 		 *  that he's guaranteed to time out, we'd might as well just
1199 		 *  treat him as timed out now.
1200 		 */
1201 		sc_tv.tv_sec = sc->sc_advbase * 3;
1202 		if (timevalcmp(&sc_tv, &ch_tv, <)) {
1203 			CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1204 				   cifp->if_xname);
1205 			carp_master_down(sc);
1206 			break;
1207 		}
1208 
1209 		/*
1210 		 * Otherwise, we reset the counter and wait for the next
1211 		 * advertisement.
1212 		 */
1213 		carp_setrun(sc, af);
1214 		break;
1215 	}
1216 	m_freem(m);
1217 }
1218 
1219 struct mbuf *
carp_input(void * v,struct mbuf * m)1220 carp_input(void *v, struct mbuf *m)
1221 {
1222 	struct carp_if *cif = v;
1223 	struct ether_header *eh;
1224 	struct carp_softc_container *scc;
1225 	struct ifnet *ifp;
1226 
1227 	eh = mtod(m, struct ether_header *);
1228 
1229 	ifp = carp_forus(cif, eh->ether_dhost);
1230 	if (ifp != NULL) {
1231 		ether_reinput_oncpu(ifp, m, REINPUT_RUNBPF);
1232 		return NULL;
1233 	}
1234 
1235 	if ((m->m_flags & (M_BCAST | M_MCAST)) == 0)
1236 		return m;
1237 
1238 	/*
1239 	 * XXX Should really check the list of multicast addresses
1240 	 * for each CARP interface _before_ copying.
1241 	 */
1242 	TAILQ_FOREACH(scc, cif, scc_link) {
1243 		struct carp_softc *sc = scc->scc_softc;
1244 		struct mbuf *m0;
1245 
1246 		if ((sc->sc_if.if_flags & IFF_UP) == 0)
1247 			continue;
1248 
1249 		m0 = m_dup(m, M_NOWAIT);
1250 		if (m0 == NULL)
1251 			continue;
1252 
1253 		ether_reinput_oncpu(&sc->sc_if, m0, REINPUT_RUNBPF);
1254 	}
1255 	return m;
1256 }
1257 
1258 static void
carp_prepare_ad(struct carp_softc * sc,struct carp_header * ch)1259 carp_prepare_ad(struct carp_softc *sc, struct carp_header *ch)
1260 {
1261 	if (sc->sc_init_counter) {
1262 		/* this could also be seconds since unix epoch */
1263 		sc->sc_counter = karc4random();
1264 		sc->sc_counter = sc->sc_counter << 32;
1265 		sc->sc_counter += karc4random();
1266 	} else {
1267 		sc->sc_counter++;
1268 	}
1269 
1270 	ch->carp_counter[0] = htonl((sc->sc_counter >> 32) & 0xffffffff);
1271 	ch->carp_counter[1] = htonl(sc->sc_counter & 0xffffffff);
1272 
1273 	carp_hmac_generate(sc, ch->carp_counter, ch->carp_md);
1274 }
1275 
1276 static void
carp_send_ad_all(void)1277 carp_send_ad_all(void)
1278 {
1279 	struct carp_softc *sc;
1280 
1281 	LIST_FOREACH(sc, &carpif_list, sc_next) {
1282 		if (sc->sc_carpdev == NULL)
1283 			continue;
1284 
1285 		if (CARP_IS_RUNNING(&sc->sc_if) && sc->sc_state == MASTER)
1286 			carp_send_ad(sc);
1287 	}
1288 }
1289 
1290 static void
carp_send_ad_timeout(void * xsc)1291 carp_send_ad_timeout(void *xsc)
1292 {
1293 	struct carp_softc *sc = xsc;
1294 	struct netmsg_carp *cmsg = &sc->sc_ad_msg;
1295 
1296 	KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1297 	    __func__, mycpuid));
1298 
1299 	crit_enter();
1300 	if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1301 		lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1302 	crit_exit();
1303 }
1304 
1305 static void
carp_send_ad_timeout_dispatch(netmsg_t msg)1306 carp_send_ad_timeout_dispatch(netmsg_t msg)
1307 {
1308 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1309 	struct carp_softc *sc = cmsg->nc_softc;
1310 
1311 	/* Reply ASAP */
1312 	crit_enter();
1313 	lwkt_replymsg(&cmsg->base.lmsg, 0);
1314 	crit_exit();
1315 
1316 	carp_send_ad(sc);
1317 }
1318 
1319 static void
carp_send_ad(struct carp_softc * sc)1320 carp_send_ad(struct carp_softc *sc)
1321 {
1322 	struct ifnet *cifp = &sc->sc_if;
1323 	struct carp_header ch;
1324 	struct timeval tv;
1325 	struct carp_header *ch_ptr;
1326 	struct mbuf *m;
1327 	int len, advbase, advskew;
1328 
1329 	if (!CARP_IS_RUNNING(cifp)) {
1330 		/* Bow out */
1331 		advbase = 255;
1332 		advskew = 255;
1333 	} else {
1334 		advbase = sc->sc_advbase;
1335 		if (!carp_suppress_preempt || sc->sc_advskew > 240)
1336 			advskew = sc->sc_advskew;
1337 		else
1338 			advskew = 240;
1339 		tv.tv_sec = advbase;
1340 		tv.tv_usec = advskew * 1000000 / 256;
1341 	}
1342 
1343 	ch.carp_version = CARP_VERSION;
1344 	ch.carp_type = CARP_ADVERTISEMENT;
1345 	ch.carp_vhid = sc->sc_vhid;
1346 	ch.carp_advbase = advbase;
1347 	ch.carp_advskew = advskew;
1348 	ch.carp_authlen = 7;	/* XXX DEFINE */
1349 	ch.carp_pad1 = 0;	/* must be zero */
1350 	ch.carp_cksum = 0;
1351 
1352 #ifdef INET
1353 	if (sc->sc_ia != NULL) {
1354 		struct ip *ip;
1355 
1356 		MGETHDR(m, M_NOWAIT, MT_HEADER);
1357 		if (m == NULL) {
1358 			IFNET_STAT_INC(cifp, oerrors, 1);
1359 			carpstats.carps_onomem++;
1360 			/* XXX maybe less ? */
1361 			if (advbase != 255 || advskew != 255)
1362 				callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1363 				    carp_send_ad_timeout, sc);
1364 			return;
1365 		}
1366 		len = sizeof(*ip) + sizeof(ch);
1367 		m->m_pkthdr.len = len;
1368 		m->m_pkthdr.rcvif = NULL;
1369 		m->m_len = len;
1370 		MH_ALIGN(m, m->m_len);
1371 		m->m_flags |= M_MCAST;
1372 		if (carp_prio_ad)
1373 			m->m_flags |= M_PRIO;
1374 		ip = mtod(m, struct ip *);
1375 		ip->ip_v = IPVERSION;
1376 		ip->ip_hl = sizeof(*ip) >> 2;
1377 		ip->ip_tos = IPTOS_LOWDELAY;
1378 		ip->ip_len = htons(len);
1379 		ip->ip_id = ip_newid();
1380 		ip->ip_off = htons(IP_DF);
1381 		ip->ip_ttl = CARP_DFLTTL;
1382 		ip->ip_p = IPPROTO_CARP;
1383 		ip->ip_sum = 0;
1384 		ip->ip_src = sc->sc_ia->ia_addr.sin_addr;
1385 		ip->ip_dst.s_addr = htonl(INADDR_CARP_GROUP);
1386 
1387 		ch_ptr = (struct carp_header *)(&ip[1]);
1388 		bcopy(&ch, ch_ptr, sizeof(ch));
1389 		carp_prepare_ad(sc, ch_ptr);
1390 		ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip));
1391 
1392 		getmicrotime(&cifp->if_lastchange);
1393 		IFNET_STAT_INC(cifp, opackets, 1);
1394 		IFNET_STAT_INC(cifp, obytes, len);
1395 		carpstats.carps_opackets++;
1396 
1397 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) {
1398 			IFNET_STAT_INC(cifp, oerrors, 1);
1399 			if (sc->sc_sendad_errors < INT_MAX)
1400 				sc->sc_sendad_errors++;
1401 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1402 				carp_suppress_preempt++;
1403 				if (carp_suppress_preempt == 1) {
1404 					carp_send_ad_all();
1405 				}
1406 			}
1407 			sc->sc_sendad_success = 0;
1408 		} else {
1409 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1410 				if (++sc->sc_sendad_success >=
1411 				    CARP_SENDAD_MIN_SUCCESS) {
1412 					carp_suppress_preempt--;
1413 					sc->sc_sendad_errors = 0;
1414 				}
1415 			} else {
1416 				sc->sc_sendad_errors = 0;
1417 			}
1418 		}
1419 	}
1420 #endif /* INET */
1421 #ifdef INET6
1422 	if (sc->sc_ia6) {
1423 		struct ip6_hdr *ip6;
1424 
1425 		MGETHDR(m, M_NOWAIT, MT_HEADER);
1426 		if (m == NULL) {
1427 			IFNET_STAT_INC(cifp, oerrors, 1);
1428 			carpstats.carps_onomem++;
1429 			/* XXX maybe less ? */
1430 			if (advbase != 255 || advskew != 255)
1431 				callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1432 				    carp_send_ad_timeout, sc);
1433 			return;
1434 		}
1435 		len = sizeof(*ip6) + sizeof(ch);
1436 		m->m_pkthdr.len = len;
1437 		m->m_pkthdr.rcvif = NULL;
1438 		m->m_len = len;
1439 		MH_ALIGN(m, m->m_len);
1440 		m->m_flags |= M_MCAST;
1441 		ip6 = mtod(m, struct ip6_hdr *);
1442 		bzero(ip6, sizeof(*ip6));
1443 		ip6->ip6_vfc |= IPV6_VERSION;
1444 		ip6->ip6_hlim = CARP_DFLTTL;
1445 		ip6->ip6_nxt = IPPROTO_CARP;
1446 		bcopy(&sc->sc_ia6->ia_addr.sin6_addr, &ip6->ip6_src,
1447 		    sizeof(struct in6_addr));
1448 		/* set the multicast destination */
1449 
1450 		ip6->ip6_dst.s6_addr16[0] = htons(0xff02);
1451 		ip6->ip6_dst.s6_addr8[15] = 0x12;
1452 		if (in6_setscope(&ip6->ip6_dst, sc->sc_carpdev, NULL) != 0) {
1453 			IFNET_STAT_INC(cifp, oerrors, 1);
1454 			m_freem(m);
1455 			CARP_LOG("%s: in6_setscope failed\n", __func__);
1456 			return;
1457 		}
1458 
1459 		ch_ptr = (struct carp_header *)(&ip6[1]);
1460 		bcopy(&ch, ch_ptr, sizeof(ch));
1461 		carp_prepare_ad(sc, ch_ptr);
1462 		ch_ptr->carp_cksum = in_cksum_skip(m, len, sizeof(*ip6));
1463 
1464 		getmicrotime(&cifp->if_lastchange);
1465 		IFNET_STAT_INC(cifp, opackets, 1);
1466 		IFNET_STAT_INC(cifp, obytes, len);
1467 		carpstats.carps_opackets6++;
1468 
1469 		if (ip6_output(m, NULL, NULL, 0, &sc->sc_im6o, NULL, NULL)) {
1470 			IFNET_STAT_INC(cifp, oerrors, 1);
1471 			if (sc->sc_sendad_errors < INT_MAX)
1472 				sc->sc_sendad_errors++;
1473 			if (sc->sc_sendad_errors == CARP_SENDAD_MAX_ERRORS) {
1474 				carp_suppress_preempt++;
1475 				if (carp_suppress_preempt == 1) {
1476 					carp_send_ad_all();
1477 				}
1478 			}
1479 			sc->sc_sendad_success = 0;
1480 		} else {
1481 			if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS) {
1482 				if (++sc->sc_sendad_success >=
1483 				    CARP_SENDAD_MIN_SUCCESS) {
1484 					carp_suppress_preempt--;
1485 					sc->sc_sendad_errors = 0;
1486 				}
1487 			} else {
1488 				sc->sc_sendad_errors = 0;
1489 			}
1490 		}
1491 	}
1492 #endif /* INET6 */
1493 
1494 	if (advbase != 255 || advskew != 255)
1495 		callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1496 		    carp_send_ad_timeout, sc);
1497 }
1498 
1499 /*
1500  * Broadcast a gratuitous ARP request containing
1501  * the virtual router MAC address for each IP address
1502  * associated with the virtual router.
1503  */
1504 static void
carp_send_arp(struct carp_softc * sc)1505 carp_send_arp(struct carp_softc *sc)
1506 {
1507 	const struct carp_vhaddr *vha;
1508 
1509 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1510 		if (vha->vha_iaback == NULL)
1511 			continue;
1512 		arp_gratuitous(&sc->sc_if, &vha->vha_ia->ia_ifa);
1513 	}
1514 }
1515 
1516 #ifdef INET6
1517 static void
carp_send_na(struct carp_softc * sc)1518 carp_send_na(struct carp_softc *sc)
1519 {
1520 	struct ifaddr_container *ifac;
1521 	struct in6_addr *in6;
1522 	static struct in6_addr mcast = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
1523 
1524 	TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid], ifa_link) {
1525 		struct ifaddr *ifa = ifac->ifa;
1526 
1527 		if (ifa->ifa_addr->sa_family != AF_INET6)
1528 			continue;
1529 
1530 		in6 = &ifatoia6(ifa)->ia_addr.sin6_addr;
1531 		nd6_na_output(sc->sc_carpdev, &mcast, in6,
1532 		    ND_NA_FLAG_OVERRIDE, 1, NULL);
1533 		DELAY(1000);	/* XXX */
1534 	}
1535 }
1536 #endif /* INET6 */
1537 
1538 #ifdef notyet
1539 static __inline const struct carp_vhaddr *
carp_find_addr(const struct carp_softc * sc,const struct in_addr * addr)1540 carp_find_addr(const struct carp_softc *sc, const struct in_addr *addr)
1541 {
1542 	struct carp_vhaddr *vha;
1543 
1544 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1545 		if (vha->vha_iaback == NULL)
1546 			continue;
1547 
1548 		if (vha->vha_ia->ia_addr.sin_addr.s_addr == addr->s_addr)
1549 			return vha;
1550 	}
1551 	return NULL;
1552 }
1553 
1554 static int
carp_iamatch_balance(const struct carp_if * cif,const struct in_addr * itaddr,const struct in_addr * isaddr,uint8_t ** enaddr)1555 carp_iamatch_balance(const struct carp_if *cif, const struct in_addr *itaddr,
1556 		     const struct in_addr *isaddr, uint8_t **enaddr)
1557 {
1558 	const struct carp_softc *vh;
1559 	int index, count = 0;
1560 
1561 	/*
1562 	 * XXX proof of concept implementation.
1563 	 * We use the source ip to decide which virtual host should
1564 	 * handle the request. If we're master of that virtual host,
1565 	 * then we respond, otherwise, just drop the arp packet on
1566 	 * the floor.
1567 	 */
1568 
1569 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1570 		if (!CARP_IS_RUNNING(&vh->sc_if))
1571 			continue;
1572 
1573 		if (carp_find_addr(vh, itaddr) != NULL)
1574 			count++;
1575 	}
1576 	if (count == 0)
1577 		return 0;
1578 
1579 	/* this should be a hash, like pf_hash() */
1580 	index = ntohl(isaddr->s_addr) % count;
1581 	count = 0;
1582 
1583 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1584 		if (!CARP_IS_RUNNING(&vh->sc_if))
1585 			continue;
1586 
1587 		if (carp_find_addr(vh, itaddr) == NULL)
1588 			continue;
1589 
1590 		if (count == index) {
1591 			if (vh->sc_state == MASTER) {
1592 				*enaddr = IF_LLADDR(&vh->sc_if);
1593 				return 1;
1594 			} else {
1595 				return 0;
1596 			}
1597 		}
1598 		count++;
1599 	}
1600 	return 0;
1601 }
1602 #endif
1603 
1604 int
carp_iamatch(const struct in_ifaddr * ia)1605 carp_iamatch(const struct in_ifaddr *ia)
1606 {
1607 	const struct carp_softc *sc = ia->ia_ifp->if_softc;
1608 
1609 	ASSERT_NETISR0;
1610 
1611 #ifdef notyet
1612 	if (carp_opts[CARPCTL_ARPBALANCE])
1613 		return carp_iamatch_balance(cif, itaddr, isaddr, enaddr);
1614 #endif
1615 
1616 	if (!CARP_IS_RUNNING(&sc->sc_if) || sc->sc_state != MASTER)
1617 		return 0;
1618 
1619 	return 1;
1620 }
1621 
1622 #ifdef INET6
1623 struct ifaddr *
carp_iamatch6(void * v,struct in6_addr * taddr)1624 carp_iamatch6(void *v, struct in6_addr *taddr)
1625 {
1626 #ifdef foo
1627 	struct carp_if *cif = v;
1628 	struct carp_softc *vh;
1629 
1630 	TAILQ_FOREACH(vh, &cif->vhif_vrs, sc_list) {
1631 		struct ifaddr_container *ifac;
1632 
1633 		TAILQ_FOREACH(ifac, &vh->sc_if.if_addrheads[mycpuid],
1634 			      ifa_link) {
1635 			struct ifaddr *ifa = ifac->ifa;
1636 
1637 			if (IN6_ARE_ADDR_EQUAL(taddr,
1638 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1639 			    CARP_IS_RUNNING(&vh->sc_if) &&
1640 			    vh->sc_state == MASTER) {
1641 				return (ifa);
1642 			}
1643 		}
1644 	}
1645 #endif
1646 	return (NULL);
1647 }
1648 
1649 void *
carp_macmatch6(void * v,struct mbuf * m,const struct in6_addr * taddr)1650 carp_macmatch6(void *v, struct mbuf *m, const struct in6_addr *taddr)
1651 {
1652 #ifdef foo
1653 	struct m_tag *mtag;
1654 	struct carp_if *cif = v;
1655 	struct carp_softc *sc;
1656 
1657 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list) {
1658 		struct ifaddr_container *ifac;
1659 
1660 		TAILQ_FOREACH(ifac, &sc->sc_if.if_addrheads[mycpuid],
1661 			      ifa_link) {
1662 			struct ifaddr *ifa = ifac->ifa;
1663 
1664 			if (IN6_ARE_ADDR_EQUAL(taddr,
1665 			    &ifatoia6(ifa)->ia_addr.sin6_addr) &&
1666 			    CARP_IS_RUNNING(&sc->sc_if)) {
1667 				struct ifnet *ifp = &sc->sc_if;
1668 
1669 				mtag = m_tag_get(PACKET_TAG_CARP,
1670 				    sizeof(struct ifnet *), M_NOWAIT);
1671 				if (mtag == NULL) {
1672 					/* better a bit than nothing */
1673 					return (IF_LLADDR(ifp));
1674 				}
1675 				bcopy(&ifp, (caddr_t)(mtag + 1),
1676 				    sizeof(struct ifnet *));
1677 				m_tag_prepend(m, mtag);
1678 
1679 				return (IF_LLADDR(ifp));
1680 			}
1681 		}
1682 	}
1683 #endif
1684 	return (NULL);
1685 }
1686 #endif
1687 
1688 static struct ifnet *
carp_forus(struct carp_if * cif,const uint8_t * dhost)1689 carp_forus(struct carp_if *cif, const uint8_t *dhost)
1690 {
1691 	struct carp_softc_container *scc;
1692 
1693 	if (memcmp(dhost, carp_etheraddr, ETHER_ADDR_LEN - 1) != 0)
1694 		return NULL;
1695 
1696 	TAILQ_FOREACH(scc, cif, scc_link) {
1697 		struct carp_softc *sc = scc->scc_softc;
1698 		struct ifnet *ifp = &sc->sc_if;
1699 
1700 		if (CARP_IS_RUNNING(ifp) && sc->sc_state == MASTER &&
1701 		    !bcmp(dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN))
1702 			return ifp;
1703 	}
1704 	return NULL;
1705 }
1706 
1707 static void
carp_master_down_timeout(void * xsc)1708 carp_master_down_timeout(void *xsc)
1709 {
1710 	struct carp_softc *sc = xsc;
1711 	struct netmsg_carp *cmsg = &sc->sc_md_msg;
1712 
1713 	KASSERT(mycpuid == 0, ("%s not on cpu0 but on cpu%d",
1714 	    __func__, mycpuid));
1715 
1716 	crit_enter();
1717 	if (cmsg->base.lmsg.ms_flags & MSGF_DONE)
1718 		lwkt_sendmsg_oncpu(netisr_cpuport(0), &cmsg->base.lmsg);
1719 	crit_exit();
1720 }
1721 
1722 static void
carp_master_down_timeout_dispatch(netmsg_t msg)1723 carp_master_down_timeout_dispatch(netmsg_t msg)
1724 {
1725 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1726 	struct carp_softc *sc = cmsg->nc_softc;
1727 
1728 	/* Reply ASAP */
1729 	crit_enter();
1730 	lwkt_replymsg(&cmsg->base.lmsg, 0);
1731 	crit_exit();
1732 
1733 	CARP_DEBUG("%s: BACKUP -> MASTER (master timed out)\n",
1734 		   sc->sc_if.if_xname);
1735 	carp_master_down(sc);
1736 }
1737 
1738 static void
carp_master_down(struct carp_softc * sc)1739 carp_master_down(struct carp_softc *sc)
1740 {
1741 	switch (sc->sc_state) {
1742 	case INIT:
1743 		kprintf("%s: master_down event in INIT state\n",
1744 			sc->sc_if.if_xname);
1745 		break;
1746 
1747 	case MASTER:
1748 		break;
1749 
1750 	case BACKUP:
1751 		carp_set_state(sc, MASTER);
1752 		carp_send_ad(sc);
1753 		carp_send_arp(sc);
1754 #ifdef INET6
1755 		carp_send_na(sc);
1756 #endif /* INET6 */
1757 		carp_setrun(sc, 0);
1758 		if (carp_opts[CARPCTL_SETROUTE])
1759 			carp_setroute(sc, RTM_ADD);
1760 		break;
1761 	}
1762 }
1763 
1764 /*
1765  * When in backup state, af indicates whether to reset the master down timer
1766  * for v4 or v6. If it's set to zero, reset the ones which are already pending.
1767  */
1768 static void
carp_setrun(struct carp_softc * sc,sa_family_t af)1769 carp_setrun(struct carp_softc *sc, sa_family_t af)
1770 {
1771 	struct ifnet *cifp = &sc->sc_if;
1772 	struct timeval tv;
1773 
1774 	if (sc->sc_carpdev == NULL) {
1775 		carp_set_state(sc, INIT);
1776 		return;
1777 	}
1778 
1779 	if ((cifp->if_flags & IFF_RUNNING) && sc->sc_vhid > 0 &&
1780 	    (sc->sc_naddrs || sc->sc_naddrs6)) {
1781 		/* Nothing */
1782 	} else {
1783 		if (carp_opts[CARPCTL_SETROUTE])
1784 			carp_setroute(sc, RTM_DELETE);
1785 		return;
1786 	}
1787 
1788 	switch (sc->sc_state) {
1789 	case INIT:
1790 		if (carp_opts[CARPCTL_PREEMPT] && !carp_suppress_preempt) {
1791 			carp_send_ad(sc);
1792 			carp_send_arp(sc);
1793 #ifdef INET6
1794 			carp_send_na(sc);
1795 #endif /* INET6 */
1796 			CARP_DEBUG("%s: INIT -> MASTER (preempting)\n",
1797 				   cifp->if_xname);
1798 			carp_set_state(sc, MASTER);
1799 			if (carp_opts[CARPCTL_SETROUTE])
1800 				carp_setroute(sc, RTM_ADD);
1801 		} else {
1802 			CARP_DEBUG("%s: INIT -> BACKUP\n", cifp->if_xname);
1803 			carp_set_state(sc, BACKUP);
1804 			if (carp_opts[CARPCTL_SETROUTE])
1805 				carp_setroute(sc, RTM_DELETE);
1806 			carp_setrun(sc, 0);
1807 		}
1808 		break;
1809 
1810 	case BACKUP:
1811 		callout_stop(&sc->sc_ad_tmo);
1812 		tv.tv_sec = 3 * sc->sc_advbase;
1813 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1814 		switch (af) {
1815 #ifdef INET
1816 		case AF_INET:
1817 			callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1818 			    carp_master_down_timeout, sc);
1819 			break;
1820 #endif /* INET */
1821 #ifdef INET6
1822 		case AF_INET6:
1823 			callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1824 			    carp_master_down_timeout, sc);
1825 			break;
1826 #endif /* INET6 */
1827 		default:
1828 			if (sc->sc_naddrs)
1829 				callout_reset(&sc->sc_md_tmo, tvtohz_high(&tv),
1830 				    carp_master_down_timeout, sc);
1831 			if (sc->sc_naddrs6)
1832 				callout_reset(&sc->sc_md6_tmo, tvtohz_high(&tv),
1833 				    carp_master_down_timeout, sc);
1834 			break;
1835 		}
1836 		break;
1837 
1838 	case MASTER:
1839 		tv.tv_sec = sc->sc_advbase;
1840 		tv.tv_usec = sc->sc_advskew * 1000000 / 256;
1841 		callout_reset(&sc->sc_ad_tmo, tvtohz_high(&tv),
1842 		    carp_send_ad_timeout, sc);
1843 		break;
1844 	}
1845 }
1846 
1847 static void
carp_multicast_cleanup(struct carp_softc * sc)1848 carp_multicast_cleanup(struct carp_softc *sc)
1849 {
1850 	struct ip_moptions *imo = &sc->sc_imo;
1851 
1852 	if (imo->imo_num_memberships == 0)
1853 		return;
1854 	KKASSERT(imo->imo_num_memberships == 1);
1855 
1856 	in_delmulti(imo->imo_membership[0]);
1857 	imo->imo_membership[0] = NULL;
1858 	imo->imo_num_memberships = 0;
1859 	imo->imo_multicast_ifp = NULL;
1860 }
1861 
1862 #ifdef INET6
1863 static void
carp_multicast6_cleanup(struct carp_softc * sc)1864 carp_multicast6_cleanup(struct carp_softc *sc)
1865 {
1866 	struct ip6_moptions *im6o = &sc->sc_im6o;
1867 
1868 	while (!LIST_EMPTY(&im6o->im6o_memberships)) {
1869 		struct in6_multi_mship *imm =
1870 		    LIST_FIRST(&im6o->im6o_memberships);
1871 
1872 		LIST_REMOVE(imm, i6mm_chain);
1873 		in6_leavegroup(imm);
1874 	}
1875 	im6o->im6o_multicast_ifp = NULL;
1876 }
1877 #endif
1878 
1879 static void
carp_ioctl_getvhaddr_dispatch(netmsg_t msg)1880 carp_ioctl_getvhaddr_dispatch(netmsg_t msg)
1881 {
1882 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
1883 	struct carp_softc *sc = cmsg->nc_softc;
1884 	const struct carp_vhaddr *vha;
1885 	struct ifcarpvhaddr *carpa, *carpa0;
1886 	int count, len, error = 0;
1887 
1888 	count = 0;
1889 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
1890 		++count;
1891 
1892 	if (cmsg->nc_datalen == 0) {
1893 		cmsg->nc_datalen = count * sizeof(*carpa);
1894 		goto back;
1895 	} else if (count == 0 || cmsg->nc_datalen < sizeof(*carpa)) {
1896 		cmsg->nc_datalen = 0;
1897 		goto back;
1898 	}
1899 	len = min(cmsg->nc_datalen, sizeof(*carpa) * count);
1900 	KKASSERT(len >= sizeof(*carpa));
1901 
1902 	carpa0 = carpa = kmalloc(len, M_TEMP, M_WAITOK | M_NULLOK | M_ZERO);
1903 	if (carpa == NULL) {
1904 		error = ENOMEM;
1905 		goto back;
1906 	}
1907 
1908 	count = 0;
1909 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
1910 		if (len < sizeof(*carpa))
1911 			break;
1912 
1913 		carpa->carpa_flags = vha->vha_flags;
1914 		carpa->carpa_addr.sin_family = AF_INET;
1915 		carpa->carpa_addr.sin_addr = vha->vha_ia->ia_addr.sin_addr;
1916 
1917 		carpa->carpa_baddr.sin_family = AF_INET;
1918 		if (vha->vha_iaback == NULL) {
1919 			carpa->carpa_baddr.sin_addr.s_addr = INADDR_ANY;
1920 		} else {
1921 			carpa->carpa_baddr.sin_addr =
1922 			vha->vha_iaback->ia_addr.sin_addr;
1923 		}
1924 
1925 		++carpa;
1926 		++count;
1927 		len -= sizeof(*carpa);
1928 	}
1929 	cmsg->nc_datalen = sizeof(*carpa) * count;
1930 	KKASSERT(cmsg->nc_datalen > 0);
1931 
1932 	cmsg->nc_data = carpa0;
1933 
1934 back:
1935 	lwkt_replymsg(&cmsg->base.lmsg, error);
1936 }
1937 
1938 static int
carp_ioctl_getvhaddr(struct carp_softc * sc,struct ifdrv * ifd)1939 carp_ioctl_getvhaddr(struct carp_softc *sc, struct ifdrv *ifd)
1940 {
1941 	struct ifnet *ifp = &sc->arpcom.ac_if;
1942 	struct netmsg_carp cmsg;
1943 	int error;
1944 
1945 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
1946 	ifnet_deserialize_all(ifp);
1947 
1948 	bzero(&cmsg, sizeof(cmsg));
1949 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
1950 	    carp_ioctl_getvhaddr_dispatch);
1951 	cmsg.nc_softc = sc;
1952 	cmsg.nc_datalen = ifd->ifd_len;
1953 
1954 	error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
1955 
1956 	if (!error) {
1957 		if (cmsg.nc_data != NULL) {
1958 			error = copyout(cmsg.nc_data, ifd->ifd_data,
1959 			    cmsg.nc_datalen);
1960 			kfree(cmsg.nc_data, M_TEMP);
1961 		}
1962 		ifd->ifd_len = cmsg.nc_datalen;
1963 	} else {
1964 		KASSERT(cmsg.nc_data == NULL,
1965 		    ("%s temp vhaddr is alloc upon error", __func__));
1966 	}
1967 
1968 	ifnet_serialize_all(ifp);
1969 	return error;
1970 }
1971 
1972 static int
carp_config_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha,struct in_ifaddr * ia_del)1973 carp_config_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
1974     struct in_ifaddr *ia_del)
1975 {
1976 	struct ifnet *ifp;
1977 	struct in_ifaddr *ia_if;
1978 	const struct in_ifaddr *ia_vha;
1979 	struct in_ifaddr_container *iac;
1980 	int own, ia_match_carpdev;
1981 
1982 	KKASSERT(vha->vha_ia != NULL);
1983 	ia_vha = vha->vha_ia;
1984 
1985 	ia_if = NULL;
1986 	own = 0;
1987 	ia_match_carpdev = 0;
1988 	TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
1989 		struct in_ifaddr *ia = iac->ia;
1990 
1991 		if (ia == ia_del)
1992 			continue;
1993 
1994 		if (ia->ia_ifp->if_type == IFT_CARP)
1995 			continue;
1996 
1997 		if ((ia->ia_ifp->if_flags & IFF_UP) == 0)
1998 			continue;
1999 
2000 		/* and, yeah, we need a multicast-capable iface too */
2001 		if ((ia->ia_ifp->if_flags & IFF_MULTICAST) == 0)
2002 			continue;
2003 
2004 		if (ia_vha->ia_subnetmask == ia->ia_subnetmask &&
2005 		    ia_vha->ia_subnet == ia->ia_subnet) {
2006 			if (ia_vha->ia_addr.sin_addr.s_addr ==
2007 			    ia->ia_addr.sin_addr.s_addr)
2008 				own = 1;
2009 			if (ia_if == NULL) {
2010 				ia_if = ia;
2011 			} else if (sc->sc_carpdev != NULL &&
2012 			    sc->sc_carpdev == ia->ia_ifp) {
2013 				ia_if = ia;
2014 				if (ia_if->ia_flags & IFA_ROUTE) {
2015 					/*
2016 					 * Address with prefix route
2017 					 * is prefered
2018 					 */
2019 					break;
2020 				}
2021 				ia_match_carpdev = 1;
2022 			} else if (!ia_match_carpdev) {
2023 				if (ia->ia_flags & IFA_ROUTE) {
2024 					/*
2025 					 * Address with prefix route
2026 					 * is prefered over others.
2027 					 */
2028 					ia_if = ia;
2029 				}
2030 			}
2031 		}
2032 	}
2033 
2034 	carp_deactivate_vhaddr(sc, vha, FALSE);
2035 	if (!ia_if)
2036 		return ENOENT;
2037 
2038 	ifp = ia_if->ia_ifp;
2039 
2040 	/* XXX Don't allow parent iface to be changed */
2041 	if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp)
2042 		return EEXIST;
2043 
2044 	return carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
2045 }
2046 
2047 static void
carp_add_addr(struct carp_softc * sc,struct ifaddr * carp_ifa)2048 carp_add_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2049 {
2050 	struct carp_vhaddr *vha_new;
2051 	struct in_ifaddr *carp_ia;
2052 #ifdef INVARIANTS
2053 	struct carp_vhaddr *vha;
2054 #endif
2055 
2056 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2057 	carp_ia = ifatoia(carp_ifa);
2058 
2059 #ifdef INVARIANTS
2060 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
2061 		KKASSERT(vha->vha_ia != NULL && vha->vha_ia != carp_ia);
2062 #endif
2063 
2064 	vha_new = kmalloc(sizeof(*vha_new), M_CARP, M_WAITOK | M_ZERO);
2065 	vha_new->vha_ia = carp_ia;
2066 	carp_insert_vhaddr(sc, vha_new);
2067 
2068 	if (carp_config_vhaddr(sc, vha_new, NULL) != 0) {
2069 		/*
2070 		 * If the above configuration fails, it may only mean
2071 		 * that the new address is problematic.  However, the
2072 		 * carp(4) interface may already have several working
2073 		 * addresses.  Since the expected behaviour of
2074 		 * SIOC[AS]IFADDR is to put the NIC into working state,
2075 		 * we try starting the state machine manually here with
2076 		 * the hope that the carp(4)'s previously working
2077 		 * addresses still could be brought up.
2078 		 */
2079 		carp_hmac_prepare(sc);
2080 		carp_set_state(sc, INIT);
2081 		carp_setrun(sc, 0);
2082 	}
2083 }
2084 
2085 static void
carp_del_addr(struct carp_softc * sc,struct ifaddr * carp_ifa)2086 carp_del_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2087 {
2088 	struct carp_vhaddr *vha;
2089 	struct in_ifaddr *carp_ia;
2090 
2091 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2092 	carp_ia = ifatoia(carp_ifa);
2093 
2094 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2095 		KKASSERT(vha->vha_ia != NULL);
2096 		if (vha->vha_ia == carp_ia)
2097 			break;
2098 	}
2099 	KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2100 
2101 	/*
2102 	 * Remove the vhaddr from the list before deactivating
2103 	 * the vhaddr, so that the HMAC could be correctly
2104 	 * updated in carp_deactivate_vhaddr()
2105 	 */
2106 	carp_remove_vhaddr(sc, vha);
2107 
2108 	carp_deactivate_vhaddr(sc, vha, FALSE);
2109 	kfree(vha, M_CARP);
2110 }
2111 
2112 static void
carp_config_addr(struct carp_softc * sc,struct ifaddr * carp_ifa)2113 carp_config_addr(struct carp_softc *sc, struct ifaddr *carp_ifa)
2114 {
2115 	struct carp_vhaddr *vha;
2116 	struct in_ifaddr *carp_ia;
2117 
2118 	KKASSERT(carp_ifa->ifa_addr->sa_family == AF_INET);
2119 	carp_ia = ifatoia(carp_ifa);
2120 
2121 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
2122 		KKASSERT(vha->vha_ia != NULL);
2123 		if (vha->vha_ia == carp_ia)
2124 			break;
2125 	}
2126 	KASSERT(vha != NULL, ("no corresponding vhaddr %p", carp_ifa));
2127 
2128 	/* Remove then reinsert, to keep the vhaddr list sorted */
2129 	carp_remove_vhaddr(sc, vha);
2130 	carp_insert_vhaddr(sc, vha);
2131 
2132 	if (carp_config_vhaddr(sc, vha, NULL) != 0) {
2133 		/* See the comment in carp_add_addr() */
2134 		carp_hmac_prepare(sc);
2135 		carp_set_state(sc, INIT);
2136 		carp_setrun(sc, 0);
2137 	}
2138 }
2139 
2140 #ifdef notyet
2141 
2142 #ifdef INET6
2143 static int
carp_set_addr6(struct carp_softc * sc,struct sockaddr_in6 * sin6)2144 carp_set_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2145 {
2146 	struct ifnet *ifp;
2147 	struct carp_if *cif;
2148 	struct in6_ifaddr *ia, *ia_if;
2149 	struct ip6_moptions *im6o = &sc->sc_im6o;
2150 	struct in6_multi_mship *imm;
2151 	struct in6_addr in6;
2152 	int own, error;
2153 
2154 	if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
2155 		carp_setrun(sc, 0);
2156 		return (0);
2157 	}
2158 
2159 	/* we have to do it by hands to check we won't match on us */
2160 	ia_if = NULL; own = 0;
2161 	for (ia = in6_ifaddr; ia; ia = ia->ia_next) {
2162 		int i;
2163 
2164 		for (i = 0; i < 4; i++) {
2165 			if ((sin6->sin6_addr.s6_addr32[i] &
2166 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]) !=
2167 			    (ia->ia_addr.sin6_addr.s6_addr32[i] &
2168 			    ia->ia_prefixmask.sin6_addr.s6_addr32[i]))
2169 				break;
2170 		}
2171 		/* and, yeah, we need a multicast-capable iface too */
2172 		if (ia->ia_ifp != &sc->sc_if &&
2173 		    (ia->ia_ifp->if_flags & IFF_MULTICAST) &&
2174 		    (i == 4)) {
2175 			if (!ia_if)
2176 				ia_if = ia;
2177 			if (IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
2178 			    &ia->ia_addr.sin6_addr))
2179 				own++;
2180 		}
2181 	}
2182 
2183 	if (!ia_if)
2184 		return (EADDRNOTAVAIL);
2185 	ia = ia_if;
2186 	ifp = ia->ia_ifp;
2187 
2188 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0 ||
2189 	    (im6o->im6o_multicast_ifp && im6o->im6o_multicast_ifp != ifp))
2190 		return (EADDRNOTAVAIL);
2191 
2192 	if (!sc->sc_naddrs6) {
2193 		im6o->im6o_multicast_ifp = ifp;
2194 
2195 		/* join CARP multicast address */
2196 		bzero(&in6, sizeof(in6));
2197 		in6.s6_addr16[0] = htons(0xff02);
2198 		in6.s6_addr8[15] = 0x12;
2199 		if (in6_setscope(&in6, ifp, NULL) != 0)
2200 			goto cleanup;
2201 		if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2202 			goto cleanup;
2203 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2204 
2205 		/* join solicited multicast address */
2206 		bzero(&in6, sizeof(in6));
2207 		in6.s6_addr16[0] = htons(0xff02);
2208 		in6.s6_addr32[1] = 0;
2209 		in6.s6_addr32[2] = htonl(1);
2210 		in6.s6_addr32[3] = sin6->sin6_addr.s6_addr32[3];
2211 		in6.s6_addr8[12] = 0xff;
2212 		if (in6_setscope(&in6, ifp, NULL) != 0)
2213 			goto cleanup;
2214 		if ((imm = in6_joingroup(ifp, &in6, &error)) == NULL)
2215 			goto cleanup;
2216 		LIST_INSERT_HEAD(&im6o->im6o_memberships, imm, i6mm_chain);
2217 	}
2218 
2219 #ifdef foo
2220 	if (!ifp->if_carp) {
2221 		cif = kmalloc(sizeof(*cif), M_CARP, M_WAITOK | M_ZERO);
2222 
2223 		if ((error = ifpromisc(ifp, 1))) {
2224 			kfree(cif, M_CARP);
2225 			goto cleanup;
2226 		}
2227 
2228 		TAILQ_INIT(&cif->vhif_vrs);
2229 		ifp->if_carp = cif;
2230 	} else {
2231 		struct carp_softc *vr;
2232 
2233 		cif = ifp->if_carp;
2234 		TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2235 			if (vr != sc && vr->sc_vhid == sc->sc_vhid) {
2236 				error = EINVAL;
2237 				goto cleanup;
2238 			}
2239 		}
2240 	}
2241 #endif
2242 	sc->sc_ia6 = ia;
2243 	sc->sc_carpdev = ifp;
2244 
2245 #ifdef foo
2246 	{ /* XXX prevent endless loop if already in queue */
2247 	struct carp_softc *vr, *after = NULL;
2248 	int myself = 0;
2249 	cif = ifp->if_carp;
2250 
2251 	TAILQ_FOREACH(vr, &cif->vhif_vrs, sc_list) {
2252 		if (vr == sc)
2253 			myself = 1;
2254 		if (vr->sc_vhid < sc->sc_vhid)
2255 			after = vr;
2256 	}
2257 
2258 	if (!myself) {
2259 		/* We're trying to keep things in order */
2260 		if (after == NULL)
2261 			TAILQ_INSERT_TAIL(&cif->vhif_vrs, sc, sc_list);
2262 		else
2263 			TAILQ_INSERT_AFTER(&cif->vhif_vrs, after, sc, sc_list);
2264 	}
2265 	}
2266 #endif
2267 
2268 	sc->sc_naddrs6++;
2269 	if (own)
2270 		sc->sc_advskew = 0;
2271 	carp_sc_state(sc);
2272 	carp_setrun(sc, 0);
2273 
2274 	return (0);
2275 
2276 cleanup:
2277 	/* clean up multicast memberships */
2278 	if (!sc->sc_naddrs6) {
2279 		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2280 			imm = LIST_FIRST(&im6o->im6o_memberships);
2281 			LIST_REMOVE(imm, i6mm_chain);
2282 			in6_leavegroup(imm);
2283 		}
2284 	}
2285 	return (error);
2286 }
2287 
2288 static int
carp_del_addr6(struct carp_softc * sc,struct sockaddr_in6 * sin6)2289 carp_del_addr6(struct carp_softc *sc, struct sockaddr_in6 *sin6)
2290 {
2291 	int error = 0;
2292 
2293 	if (!--sc->sc_naddrs6) {
2294 		struct carp_if *cif = sc->sc_carpdev->if_carp;
2295 		struct ip6_moptions *im6o = &sc->sc_im6o;
2296 
2297 		callout_stop(&sc->sc_ad_tmo);
2298 		sc->sc_vhid = -1;
2299 		while (!LIST_EMPTY(&im6o->im6o_memberships)) {
2300 			struct in6_multi_mship *imm =
2301 			    LIST_FIRST(&im6o->im6o_memberships);
2302 
2303 			LIST_REMOVE(imm, i6mm_chain);
2304 			in6_leavegroup(imm);
2305 		}
2306 		im6o->im6o_multicast_ifp = NULL;
2307 #ifdef foo
2308 		TAILQ_REMOVE(&cif->vhif_vrs, sc, sc_list);
2309 		if (TAILQ_EMPTY(&cif->vhif_vrs)) {
2310 			sc->sc_carpdev->if_carp = NULL;
2311 			kfree(cif, M_IFADDR);
2312 		}
2313 #endif
2314 	}
2315 	return (error);
2316 }
2317 #endif /* INET6 */
2318 
2319 #endif
2320 
2321 static int
carp_ioctl(struct ifnet * ifp,u_long cmd,caddr_t addr,struct ucred * cr)2322 carp_ioctl(struct ifnet *ifp, u_long cmd, caddr_t addr, struct ucred *cr)
2323 {
2324 	struct carp_softc *sc = ifp->if_softc;
2325 	struct ifreq *ifr = (struct ifreq *)addr;
2326 	struct ifdrv *ifd = (struct ifdrv *)addr;
2327 	int error = 0;
2328 
2329 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2330 
2331 	switch (cmd) {
2332 	case SIOCSIFFLAGS:
2333 		if (ifp->if_flags & IFF_UP) {
2334 			if ((ifp->if_flags & IFF_RUNNING) == 0)
2335 				carp_init(sc);
2336 		} else if (ifp->if_flags & IFF_RUNNING) {
2337 			carp_ioctl_stop(sc);
2338 		}
2339 		break;
2340 
2341 	case SIOCSIFCAP:
2342 		carp_ioctl_ifcap(sc, ifr->ifr_reqcap);
2343 		break;
2344 
2345 	case SIOCSVH:
2346 		error = carp_ioctl_setvh(sc, ifr->ifr_data, cr);
2347 		break;
2348 
2349 	case SIOCGVH:
2350 		error = carp_ioctl_getvh(sc, ifr->ifr_data, cr);
2351 		break;
2352 
2353 	case SIOCGDRVSPEC:
2354 		switch (ifd->ifd_cmd) {
2355 		case CARPGDEVNAME:
2356 			error = carp_ioctl_getdevname(sc, ifd);
2357 			break;
2358 
2359 		case CARPGVHADDR:
2360 			error = carp_ioctl_getvhaddr(sc, ifd);
2361 			break;
2362 
2363 		default:
2364 			error = EINVAL;
2365 			break;
2366 		}
2367 		break;
2368 
2369 	default:
2370 		error = ether_ioctl(ifp, cmd, addr);
2371 		break;
2372 	}
2373 
2374 	return error;
2375 }
2376 
2377 static void
carp_ioctl_stop_dispatch(netmsg_t msg)2378 carp_ioctl_stop_dispatch(netmsg_t msg)
2379 {
2380 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2381 	struct carp_softc *sc = cmsg->nc_softc;
2382 
2383 	carp_stop(sc, FALSE);
2384 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2385 }
2386 
2387 static void
carp_ioctl_stop(struct carp_softc * sc)2388 carp_ioctl_stop(struct carp_softc *sc)
2389 {
2390 	struct ifnet *ifp = &sc->arpcom.ac_if;
2391 	struct netmsg_carp cmsg;
2392 
2393 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2394 
2395 	ifnet_deserialize_all(ifp);
2396 
2397 	bzero(&cmsg, sizeof(cmsg));
2398 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2399 	    carp_ioctl_stop_dispatch);
2400 	cmsg.nc_softc = sc;
2401 
2402 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2403 
2404 	ifnet_serialize_all(ifp);
2405 }
2406 
2407 static void
carp_ioctl_setvh_dispatch(netmsg_t msg)2408 carp_ioctl_setvh_dispatch(netmsg_t msg)
2409 {
2410 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2411 	struct carp_softc *sc = cmsg->nc_softc;
2412 	struct ifnet *ifp = &sc->arpcom.ac_if;
2413 	const struct carpreq *carpr = cmsg->nc_data;
2414 	int error;
2415 
2416 	error = 1;
2417 	if ((ifp->if_flags & IFF_RUNNING) &&
2418 	    sc->sc_state != INIT && carpr->carpr_state != sc->sc_state) {
2419 		switch (carpr->carpr_state) {
2420 		case BACKUP:
2421 			callout_stop(&sc->sc_ad_tmo);
2422 			carp_set_state(sc, BACKUP);
2423 			carp_setrun(sc, 0);
2424 			if (carp_opts[CARPCTL_SETROUTE])
2425 				carp_setroute(sc, RTM_DELETE);
2426 			break;
2427 
2428 		case MASTER:
2429 			carp_master_down(sc);
2430 			break;
2431 
2432 		default:
2433 			break;
2434 		}
2435 	}
2436 	if (carpr->carpr_vhid > 0) {
2437 		if (carpr->carpr_vhid > 255) {
2438 			error = EINVAL;
2439 			goto back;
2440 		}
2441 		if (sc->sc_carpdev) {
2442 			struct carp_if *cif = sc->sc_carpdev->if_carp;
2443 			struct carp_softc_container *scc;
2444 
2445 			TAILQ_FOREACH(scc, cif, scc_link) {
2446 				struct carp_softc *vr = scc->scc_softc;
2447 
2448 				if (vr != sc &&
2449 				    vr->sc_vhid == carpr->carpr_vhid) {
2450 					error = EEXIST;
2451 					goto back;
2452 				}
2453 			}
2454 		}
2455 		sc->sc_vhid = carpr->carpr_vhid;
2456 
2457 		IF_LLADDR(ifp)[5] = sc->sc_vhid;
2458 		bcopy(IF_LLADDR(ifp), sc->arpcom.ac_enaddr,
2459 		    ETHER_ADDR_LEN);
2460 
2461 		error--;
2462 	}
2463 	if (carpr->carpr_advbase > 0 || carpr->carpr_advskew > 0) {
2464 		if (carpr->carpr_advskew >= 255) {
2465 			error = EINVAL;
2466 			goto back;
2467 		}
2468 		if (carpr->carpr_advbase > 255) {
2469 			error = EINVAL;
2470 			goto back;
2471 		}
2472 		sc->sc_advbase = carpr->carpr_advbase;
2473 		sc->sc_advskew = carpr->carpr_advskew;
2474 		error--;
2475 	}
2476 	bcopy(carpr->carpr_key, sc->sc_key, sizeof(sc->sc_key));
2477 	if (error > 0) {
2478 		error = EINVAL;
2479 	} else {
2480 		error = 0;
2481 		carp_setrun(sc, 0);
2482 	}
2483 back:
2484 	carp_hmac_prepare(sc);
2485 
2486 	lwkt_replymsg(&cmsg->base.lmsg, error);
2487 }
2488 
2489 static int
carp_ioctl_setvh(struct carp_softc * sc,void * udata,struct ucred * cr)2490 carp_ioctl_setvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2491 {
2492 	struct ifnet *ifp = &sc->arpcom.ac_if;
2493 	struct netmsg_carp cmsg;
2494 	struct carpreq carpr;
2495 	int error;
2496 
2497 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2498 	ifnet_deserialize_all(ifp);
2499 
2500 	error = caps_priv_check(cr, SYSCAP_RESTRICTEDROOT |
2501 				    __SYSCAP_NULLCRED);
2502 	if (error)
2503 		goto back;
2504 
2505 	error = copyin(udata, &carpr, sizeof(carpr));
2506 	if (error)
2507 		goto back;
2508 
2509 	bzero(&cmsg, sizeof(cmsg));
2510 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2511 	    carp_ioctl_setvh_dispatch);
2512 	cmsg.nc_softc = sc;
2513 	cmsg.nc_data = &carpr;
2514 
2515 	error = lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2516 
2517 back:
2518 	ifnet_serialize_all(ifp);
2519 	return error;
2520 }
2521 
2522 static void
carp_ioctl_ifcap_dispatch(netmsg_t msg)2523 carp_ioctl_ifcap_dispatch(netmsg_t msg)
2524 {
2525 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2526 	struct carp_softc *sc = cmsg->nc_softc;
2527 	struct ifnet *ifp = &sc->arpcom.ac_if;
2528 	int reqcap = *((const int *)(cmsg->nc_data));
2529 	int mask;
2530 
2531 	mask = reqcap ^ ifp->if_capenable;
2532 	if (mask & IFCAP_TXCSUM) {
2533 		ifp->if_capenable ^= IFCAP_TXCSUM;
2534 		if ((ifp->if_capenable & IFCAP_TXCSUM) &&
2535 		    sc->sc_carpdev != NULL) {
2536 			ifp->if_hwassist |=
2537 			    (sc->sc_carpdev->if_hwassist &
2538 			     (CSUM_IP | CSUM_UDP | CSUM_TCP));
2539 		} else {
2540 			ifp->if_hwassist &= ~(CSUM_IP | CSUM_UDP | CSUM_TCP);
2541 		}
2542 	}
2543 	if (mask & IFCAP_TSO) {
2544 		ifp->if_capenable ^= IFCAP_TSO;
2545 		if ((ifp->if_capenable & IFCAP_TSO) &&
2546 		    sc->sc_carpdev != NULL) {
2547 			ifp->if_hwassist |=
2548 			    (sc->sc_carpdev->if_hwassist & CSUM_TSO);
2549 		} else {
2550 			ifp->if_hwassist &= ~CSUM_TSO;
2551 		}
2552 	}
2553 
2554 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2555 }
2556 
2557 static void
carp_ioctl_ifcap(struct carp_softc * sc,int reqcap)2558 carp_ioctl_ifcap(struct carp_softc *sc, int reqcap)
2559 {
2560 	struct ifnet *ifp = &sc->arpcom.ac_if;
2561 	struct netmsg_carp cmsg;
2562 
2563 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2564 	ifnet_deserialize_all(ifp);
2565 
2566 	bzero(&cmsg, sizeof(cmsg));
2567 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2568 	    carp_ioctl_ifcap_dispatch);
2569 	cmsg.nc_softc = sc;
2570 	cmsg.nc_data = &reqcap;
2571 
2572 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2573 
2574 	ifnet_serialize_all(ifp);
2575 }
2576 
2577 static void
carp_ioctl_getvh_dispatch(netmsg_t msg)2578 carp_ioctl_getvh_dispatch(netmsg_t msg)
2579 {
2580 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2581 	struct carp_softc *sc = cmsg->nc_softc;
2582 	struct carpreq *carpr = cmsg->nc_data;
2583 
2584 	carpr->carpr_state = sc->sc_state;
2585 	carpr->carpr_vhid = sc->sc_vhid;
2586 	carpr->carpr_advbase = sc->sc_advbase;
2587 	carpr->carpr_advskew = sc->sc_advskew;
2588 	bcopy(sc->sc_key, carpr->carpr_key, sizeof(carpr->carpr_key));
2589 
2590 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2591 }
2592 
2593 static int
carp_ioctl_getvh(struct carp_softc * sc,void * udata,struct ucred * cr)2594 carp_ioctl_getvh(struct carp_softc *sc, void *udata, struct ucred *cr)
2595 {
2596 	struct ifnet *ifp = &sc->arpcom.ac_if;
2597 	struct netmsg_carp cmsg;
2598 	struct carpreq carpr;
2599 	int error;
2600 
2601 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2602 	ifnet_deserialize_all(ifp);
2603 
2604 	bzero(&cmsg, sizeof(cmsg));
2605 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2606 	    carp_ioctl_getvh_dispatch);
2607 	cmsg.nc_softc = sc;
2608 	cmsg.nc_data = &carpr;
2609 
2610 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2611 
2612 	error = caps_priv_check(cr, SYSCAP_RESTRICTEDROOT |
2613 				    __SYSCAP_NULLCRED);
2614 	if (error)
2615 		bzero(carpr.carpr_key, sizeof(carpr.carpr_key));
2616 
2617 	error = copyout(&carpr, udata, sizeof(carpr));
2618 
2619 	ifnet_serialize_all(ifp);
2620 	return error;
2621 }
2622 
2623 static void
carp_ioctl_getdevname_dispatch(netmsg_t msg)2624 carp_ioctl_getdevname_dispatch(netmsg_t msg)
2625 {
2626 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2627 	struct carp_softc *sc = cmsg->nc_softc;
2628 	char *devname = cmsg->nc_data;
2629 
2630 	bzero(devname, IFNAMSIZ);
2631 	if (sc->sc_carpdev != NULL)
2632 		strlcpy(devname, sc->sc_carpdev->if_xname, IFNAMSIZ);
2633 
2634 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2635 }
2636 
2637 static int
carp_ioctl_getdevname(struct carp_softc * sc,struct ifdrv * ifd)2638 carp_ioctl_getdevname(struct carp_softc *sc, struct ifdrv *ifd)
2639 {
2640 	struct ifnet *ifp = &sc->arpcom.ac_if;
2641 	struct netmsg_carp cmsg;
2642 	char devname[IFNAMSIZ];
2643 	int error;
2644 
2645 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2646 
2647 	if (ifd->ifd_len != sizeof(devname))
2648 		return EINVAL;
2649 
2650 	ifnet_deserialize_all(ifp);
2651 
2652 	bzero(&cmsg, sizeof(cmsg));
2653 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2654 	    carp_ioctl_getdevname_dispatch);
2655 	cmsg.nc_softc = sc;
2656 	cmsg.nc_data = devname;
2657 
2658 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2659 
2660 	error = copyout(devname, ifd->ifd_data, sizeof(devname));
2661 
2662 	ifnet_serialize_all(ifp);
2663 	return error;
2664 }
2665 
2666 static void
carp_init_dispatch(netmsg_t msg)2667 carp_init_dispatch(netmsg_t msg)
2668 {
2669 	struct netmsg_carp *cmsg = (struct netmsg_carp *)msg;
2670 	struct carp_softc *sc = cmsg->nc_softc;
2671 
2672 	sc->sc_if.if_flags |= IFF_RUNNING;
2673 	carp_hmac_prepare(sc);
2674 	carp_set_state(sc, INIT);
2675 	carp_setrun(sc, 0);
2676 
2677 	lwkt_replymsg(&cmsg->base.lmsg, 0);
2678 }
2679 
2680 static void
carp_init(void * xsc)2681 carp_init(void *xsc)
2682 {
2683 	struct carp_softc *sc = xsc;
2684 	struct ifnet *ifp = &sc->arpcom.ac_if;
2685 	struct netmsg_carp cmsg;
2686 
2687 	ASSERT_IFNET_SERIALIZED_ALL(ifp);
2688 
2689 	ifnet_deserialize_all(ifp);
2690 
2691 	bzero(&cmsg, sizeof(cmsg));
2692 	netmsg_init(&cmsg.base, NULL, &curthread->td_msgport, 0,
2693 	    carp_init_dispatch);
2694 	cmsg.nc_softc = sc;
2695 
2696 	lwkt_domsg(netisr_cpuport(0), &cmsg.base.lmsg, 0);
2697 
2698 	ifnet_serialize_all(ifp);
2699 }
2700 
2701 static int
carp_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)2702 carp_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
2703     struct rtentry *rt)
2704 {
2705 	struct carp_softc *sc = ifp->if_softc;
2706 	struct ifnet *carpdev;
2707 	int error = 0;
2708 
2709 	carpdev = sc->sc_carpdev;
2710 	if (carpdev != NULL) {
2711 		if (m->m_flags & M_MCAST)
2712 			IFNET_STAT_INC(ifp, omcasts, 1);
2713 		IFNET_STAT_INC(ifp, obytes, m->m_pkthdr.len + ETHER_HDR_LEN);
2714 		IFNET_STAT_INC(ifp, opackets, 1);
2715 
2716 		/*
2717 		 * NOTE:
2718 		 * CARP's ifp is passed to backing device's
2719 		 * if_output method.
2720 		 */
2721 		carpdev->if_output(ifp, m, dst, rt);
2722 	} else {
2723 		IFNET_STAT_INC(ifp, oerrors, 1);
2724 		m_freem(m);
2725 		error = ENETUNREACH;
2726 	}
2727 	return error;
2728 }
2729 
2730 /*
2731  * Start output on carp interface. This function should never be called.
2732  */
2733 static void
carp_start(struct ifnet * ifp,struct ifaltq_subque * ifsq __unused)2734 carp_start(struct ifnet *ifp, struct ifaltq_subque *ifsq __unused)
2735 {
2736 	panic("%s: start called", ifp->if_xname);
2737 }
2738 
2739 static void
carp_set_state(struct carp_softc * sc,int state)2740 carp_set_state(struct carp_softc *sc, int state)
2741 {
2742 	struct ifnet *cifp = &sc->sc_if;
2743 
2744 	if (sc->sc_state == state)
2745 		return;
2746 	sc->sc_state = state;
2747 
2748 	switch (sc->sc_state) {
2749 	case BACKUP:
2750 		cifp->if_link_state = LINK_STATE_DOWN;
2751 		break;
2752 
2753 	case MASTER:
2754 		cifp->if_link_state = LINK_STATE_UP;
2755 		break;
2756 
2757 	default:
2758 		cifp->if_link_state = LINK_STATE_UNKNOWN;
2759 		break;
2760 	}
2761 	rt_ifmsg(cifp);
2762 }
2763 
2764 void
carp_group_demote_adj(struct ifnet * ifp,int adj)2765 carp_group_demote_adj(struct ifnet *ifp, int adj)
2766 {
2767 	struct ifg_list	*ifgl;
2768 	int *dm;
2769 
2770 	TAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) {
2771 		if (!strcmp(ifgl->ifgl_group->ifg_group, IFG_ALL))
2772 			continue;
2773 		dm = &ifgl->ifgl_group->ifg_carp_demoted;
2774 
2775 		if (*dm + adj >= 0)
2776 			*dm += adj;
2777 		else
2778 			*dm = 0;
2779 
2780 		if (adj > 0 && *dm == 1)
2781 			carp_send_ad_all();
2782 		CARP_LOG("%s demoted group %s to %d", ifp->if_xname,
2783                     ifgl->ifgl_group->ifg_group, *dm);
2784 	}
2785 }
2786 
2787 #ifdef foo
2788 void
carp_carpdev_state(void * v)2789 carp_carpdev_state(void *v)
2790 {
2791 	struct carp_if *cif = v;
2792 	struct carp_softc *sc;
2793 
2794 	TAILQ_FOREACH(sc, &cif->vhif_vrs, sc_list)
2795 		carp_sc_state(sc);
2796 }
2797 
2798 static void
carp_sc_state(struct carp_softc * sc)2799 carp_sc_state(struct carp_softc *sc)
2800 {
2801 	if (!(sc->sc_carpdev->if_flags & IFF_UP)) {
2802 		callout_stop(&sc->sc_ad_tmo);
2803 		callout_stop(&sc->sc_md_tmo);
2804 		callout_stop(&sc->sc_md6_tmo);
2805 		carp_set_state(sc, INIT);
2806 		carp_setrun(sc, 0);
2807 		if (!sc->sc_suppress) {
2808 			carp_suppress_preempt++;
2809 			if (carp_suppress_preempt == 1)
2810 				carp_send_ad_all();
2811 		}
2812 		sc->sc_suppress = 1;
2813 	} else {
2814 		carp_set_state(sc, INIT);
2815 		carp_setrun(sc, 0);
2816 		if (sc->sc_suppress)
2817 			carp_suppress_preempt--;
2818 		sc->sc_suppress = 0;
2819 	}
2820 }
2821 #endif
2822 
2823 static void
carp_stop(struct carp_softc * sc,boolean_t detach)2824 carp_stop(struct carp_softc *sc, boolean_t detach)
2825 {
2826 	sc->sc_if.if_flags &= ~IFF_RUNNING;
2827 
2828 	callout_stop(&sc->sc_ad_tmo);
2829 	callout_stop(&sc->sc_md_tmo);
2830 	callout_stop(&sc->sc_md6_tmo);
2831 
2832 	if (!detach && sc->sc_state == MASTER)
2833 		carp_send_ad(sc);
2834 
2835 	if (sc->sc_suppress)
2836 		carp_suppress_preempt--;
2837 	sc->sc_suppress = 0;
2838 
2839 	if (sc->sc_sendad_errors >= CARP_SENDAD_MAX_ERRORS)
2840 		carp_suppress_preempt--;
2841 	sc->sc_sendad_errors = 0;
2842 	sc->sc_sendad_success = 0;
2843 
2844 	carp_set_state(sc, INIT);
2845 	carp_setrun(sc, 0);
2846 }
2847 
2848 static void
carp_suspend(struct carp_softc * sc,boolean_t detach)2849 carp_suspend(struct carp_softc *sc, boolean_t detach)
2850 {
2851 	struct ifnet *cifp = &sc->sc_if;
2852 
2853 	carp_stop(sc, detach);
2854 
2855 	/* Retain the running state, if we are not dead yet */
2856 	if (!sc->sc_dead && (cifp->if_flags & IFF_UP))
2857 		cifp->if_flags |= IFF_RUNNING;
2858 }
2859 
2860 static int
carp_activate_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha,struct ifnet * ifp,struct in_ifaddr * ia_if,int own)2861 carp_activate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2862     struct ifnet *ifp, struct in_ifaddr *ia_if, int own)
2863 {
2864 	struct ip_moptions *imo = &sc->sc_imo;
2865 	struct carp_if *ocif = ifp->if_carp;
2866 	int error;
2867 
2868 	KKASSERT(vha->vha_ia != NULL);
2869 
2870 	KASSERT(ia_if != NULL, ("NULL backing address"));
2871 	KASSERT(vha->vha_iaback == NULL, ("%p is already activated", vha));
2872 	KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2873 		("inactive vhaddr %p is the address owner", vha));
2874 
2875 	KASSERT(sc->sc_carpdev == NULL || sc->sc_carpdev == ifp,
2876 		("%s is already on %s", sc->sc_if.if_xname,
2877 		 sc->sc_carpdev->if_xname));
2878 
2879 	if (ocif == NULL) {
2880 		KASSERT(sc->sc_carpdev == NULL,
2881 			("%s is already on %s", sc->sc_if.if_xname,
2882 			 sc->sc_carpdev->if_xname));
2883 
2884 		error = ifpromisc(ifp, 1);
2885 		if (error)
2886 			return error;
2887 	} else {
2888 		struct carp_softc_container *scc;
2889 
2890 		TAILQ_FOREACH(scc, ocif, scc_link) {
2891 			struct carp_softc *vr = scc->scc_softc;
2892 
2893 			if (vr != sc && vr->sc_vhid == sc->sc_vhid)
2894 				return EINVAL;
2895 		}
2896 	}
2897 
2898 	ifp->if_carp = carp_if_insert(ocif, sc);
2899 	KASSERT(ifp->if_carp != NULL, ("%s carp_if_insert failed", __func__));
2900 
2901 	sc->sc_ia = ia_if;
2902 	sc->sc_carpdev = ifp;
2903 	sc->arpcom.ac_if.if_hwassist = 0;
2904 	if (sc->arpcom.ac_if.if_capenable & IFCAP_TXCSUM) {
2905 		sc->arpcom.ac_if.if_hwassist |=
2906 		    (ifp->if_hwassist & (CSUM_IP | CSUM_UDP | CSUM_TCP));
2907 	}
2908 	if (sc->arpcom.ac_if.if_capenable & IFCAP_TSO)
2909 		sc->arpcom.ac_if.if_hwassist |= (ifp->if_hwassist & CSUM_TSO);
2910 
2911 	/*
2912 	 * Make sure that all protocol threads see the sc_carpdev and
2913 	 * if_carp changes
2914 	 */
2915 	netmsg_service_sync();
2916 
2917 	if (ocif != NULL && ifp->if_carp != ocif) {
2918 		/*
2919 		 * The old carp list could be safely free now,
2920 		 * since no one can access it.
2921 		 */
2922 		carp_if_free(ocif);
2923 	}
2924 
2925 	vha->vha_iaback = ia_if;
2926 	sc->sc_naddrs++;
2927 
2928 	if (own) {
2929 		vha->vha_flags |= CARP_VHAF_OWNER;
2930 
2931 		/* XXX save user configured advskew? */
2932 		sc->sc_advskew = 0;
2933 	}
2934 
2935 	carp_addroute_vhaddr(sc, vha);
2936 
2937 	/*
2938 	 * Join the multicast group only after the backing interface
2939 	 * has been hooked with the CARP interface.
2940 	 */
2941 	KASSERT(imo->imo_multicast_ifp == NULL ||
2942 		imo->imo_multicast_ifp == &sc->sc_if,
2943 		("%s didn't leave mcast group on %s",
2944 		 sc->sc_if.if_xname, imo->imo_multicast_ifp->if_xname));
2945 
2946 	if (imo->imo_num_memberships == 0) {
2947 		struct in_addr addr;
2948 
2949 		addr.s_addr = htonl(INADDR_CARP_GROUP);
2950 		imo->imo_membership[0] = in_addmulti(&addr, &sc->sc_if);
2951 		if (imo->imo_membership[0] == NULL) {
2952 			carp_deactivate_vhaddr(sc, vha, FALSE);
2953 			return ENOBUFS;
2954 		}
2955 
2956 		imo->imo_num_memberships++;
2957 		imo->imo_multicast_ifp = &sc->sc_if;
2958 		imo->imo_multicast_ttl = CARP_DFLTTL;
2959 		imo->imo_multicast_loop = 0;
2960 	}
2961 
2962 	carp_hmac_prepare(sc);
2963 	carp_set_state(sc, INIT);
2964 	carp_setrun(sc, 0);
2965 	return 0;
2966 }
2967 
2968 static void
carp_deactivate_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha,boolean_t del_iaback)2969 carp_deactivate_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
2970     boolean_t del_iaback)
2971 {
2972 	KKASSERT(vha->vha_ia != NULL);
2973 
2974 	carp_hmac_prepare(sc);
2975 
2976 	if (vha->vha_iaback == NULL) {
2977 		KASSERT((vha->vha_flags & CARP_VHAF_OWNER) == 0,
2978 			("inactive vhaddr %p is the address owner", vha));
2979 		return;
2980 	}
2981 
2982 	vha->vha_flags &= ~CARP_VHAF_OWNER;
2983 	carp_delroute_vhaddr(sc, vha, del_iaback);
2984 
2985 	KKASSERT(sc->sc_naddrs > 0);
2986 	vha->vha_iaback = NULL;
2987 	sc->sc_naddrs--;
2988 	if (!sc->sc_naddrs) {
2989 		if (sc->sc_naddrs6) {
2990 			carp_multicast_cleanup(sc);
2991 			sc->sc_ia = NULL;
2992 		} else {
2993 			carp_detach(sc, FALSE, del_iaback);
2994 		}
2995 	}
2996 }
2997 
2998 static void
carp_link_addrs(struct carp_softc * sc,struct ifnet * ifp,struct ifaddr * ifa_if)2999 carp_link_addrs(struct carp_softc *sc, struct ifnet *ifp, struct ifaddr *ifa_if)
3000 {
3001 	struct carp_vhaddr *vha;
3002 	struct in_ifaddr *ia_if;
3003 
3004 	KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3005 	ia_if = ifatoia(ifa_if);
3006 
3007 	/*
3008 	 * Test each inactive vhaddr against the newly added address.
3009 	 * If the newly added address could be the backing address,
3010 	 * then activate the matching vhaddr.
3011 	 */
3012 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3013 		const struct in_ifaddr *ia;
3014 		int own;
3015 
3016 		if (vha->vha_iaback != NULL)
3017 			continue;
3018 
3019 		ia = vha->vha_ia;
3020 		if (ia->ia_subnetmask != ia_if->ia_subnetmask ||
3021 		    ia->ia_subnet != ia_if->ia_subnet)
3022 			continue;
3023 
3024 		own = 0;
3025 		if (ia->ia_addr.sin_addr.s_addr ==
3026 		    ia_if->ia_addr.sin_addr.s_addr)
3027 			own = 1;
3028 
3029 		carp_activate_vhaddr(sc, vha, ifp, ia_if, own);
3030 	}
3031 }
3032 
3033 static void
carp_unlink_addrs(struct carp_softc * sc,struct ifnet * ifp,struct ifaddr * ifa_if)3034 carp_unlink_addrs(struct carp_softc *sc, struct ifnet *ifp,
3035 		  struct ifaddr *ifa_if)
3036 {
3037 	struct carp_vhaddr *vha;
3038 	struct in_ifaddr *ia_if;
3039 
3040 	KKASSERT(ifa_if->ifa_addr->sa_family == AF_INET);
3041 	ia_if = ifatoia(ifa_if);
3042 
3043 	/*
3044 	 * Ad src address is deleted; set it to NULL.
3045 	 * Following loop will try pick up a new ad src address
3046 	 * if one of the vhaddr could retain its backing address.
3047 	 */
3048 	if (sc->sc_ia == ia_if)
3049 		sc->sc_ia = NULL;
3050 
3051 	/*
3052 	 * Test each active vhaddr against the deleted address.
3053 	 * If the deleted address is vhaddr address's backing
3054 	 * address, then deactivate the vhaddr.
3055 	 */
3056 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link) {
3057 		if (vha->vha_iaback == NULL)
3058 			continue;
3059 
3060 		if (vha->vha_iaback == ia_if)
3061 			carp_deactivate_vhaddr(sc, vha, TRUE);
3062 		else if (sc->sc_ia == NULL)
3063 			sc->sc_ia = vha->vha_iaback;
3064 	}
3065 }
3066 
3067 static void
carp_update_addrs(struct carp_softc * sc,struct ifaddr * ifa_del)3068 carp_update_addrs(struct carp_softc *sc, struct ifaddr *ifa_del)
3069 {
3070 	struct carp_vhaddr *vha;
3071 
3072 	KKASSERT(sc->sc_carpdev == NULL);
3073 
3074 	TAILQ_FOREACH(vha, &sc->sc_vha_list, vha_link)
3075 		carp_config_vhaddr(sc, vha, ifatoia(ifa_del));
3076 }
3077 
3078 static void
carp_ifaddr(void * arg __unused,struct ifnet * ifp,enum ifaddr_event event,struct ifaddr * ifa)3079 carp_ifaddr(void *arg __unused, struct ifnet *ifp,
3080 	    enum ifaddr_event event, struct ifaddr *ifa)
3081 {
3082 	struct carp_softc *sc;
3083 
3084 	if (ifa->ifa_addr->sa_family != AF_INET)
3085 		return;
3086 
3087 	ASSERT_NETISR0;
3088 
3089 	if (ifp->if_type == IFT_CARP) {
3090 		/*
3091 		 * Address is changed on carp(4) interface
3092 		 */
3093 		switch (event) {
3094 		case IFADDR_EVENT_ADD:
3095 			carp_add_addr(ifp->if_softc, ifa);
3096 			break;
3097 
3098 		case IFADDR_EVENT_CHANGE:
3099 			carp_config_addr(ifp->if_softc, ifa);
3100 			break;
3101 
3102 		case IFADDR_EVENT_DELETE:
3103 			carp_del_addr(ifp->if_softc, ifa);
3104 			break;
3105 		}
3106 		return;
3107 	}
3108 
3109 	/*
3110 	 * Address is changed on non-carp(4) interface
3111 	 */
3112 	if ((ifp->if_flags & IFF_MULTICAST) == 0)
3113 		return;
3114 
3115 	LIST_FOREACH(sc, &carpif_list, sc_next) {
3116 		if (sc->sc_carpdev != NULL && sc->sc_carpdev != ifp) {
3117 			/* Not the parent iface; skip */
3118 			continue;
3119 		}
3120 
3121 		switch (event) {
3122 		case IFADDR_EVENT_ADD:
3123 			carp_link_addrs(sc, ifp, ifa);
3124 			break;
3125 
3126 		case IFADDR_EVENT_DELETE:
3127 			if (sc->sc_carpdev != NULL) {
3128 				carp_unlink_addrs(sc, ifp, ifa);
3129 				if (sc->sc_carpdev == NULL) {
3130 					/*
3131 					 * We no longer have the parent
3132 					 * interface, however, certain
3133 					 * virtual addresses, which are
3134 					 * not used because they can't
3135 					 * match the previous parent
3136 					 * interface's addresses, may now
3137 					 * match different interface's
3138 					 * addresses.
3139 					 */
3140 					carp_update_addrs(sc, ifa);
3141 				}
3142 			} else {
3143 				/*
3144 				 * The carp(4) interface didn't have a
3145 				 * parent iface, so it is not possible
3146 				 * that it will contain any address to
3147 				 * be unlinked.
3148 				 */
3149 			}
3150 			break;
3151 
3152 		case IFADDR_EVENT_CHANGE:
3153 			if (sc->sc_carpdev == NULL) {
3154 				/*
3155 				 * The carp(4) interface didn't have a
3156 				 * parent iface, so it is not possible
3157 				 * that it will contain any address to
3158 				 * be updated.
3159 				 */
3160 				carp_link_addrs(sc, ifp, ifa);
3161 			} else {
3162 				/*
3163 				 * First try breaking tie with the old
3164 				 * address.  Then see whether we could
3165 				 * link certain vhaddr to the new address.
3166 				 * If that fails, i.e. carpdev is NULL,
3167 				 * we try a global update.
3168 				 *
3169 				 * NOTE: The above order is critical.
3170 				 */
3171 				carp_unlink_addrs(sc, ifp, ifa);
3172 				carp_link_addrs(sc, ifp, ifa);
3173 				if (sc->sc_carpdev == NULL) {
3174 					/*
3175 					 * See the comment in the above
3176 					 * IFADDR_EVENT_DELETE block.
3177 					 */
3178 					carp_update_addrs(sc, NULL);
3179 				}
3180 			}
3181 			break;
3182 		}
3183 	}
3184 }
3185 
3186 void
carp_proto_ctlinput(netmsg_t msg)3187 carp_proto_ctlinput(netmsg_t msg)
3188 {
3189 	int cmd = msg->ctlinput.nm_cmd;
3190 	struct sockaddr *sa = msg->ctlinput.nm_arg;
3191 	struct in_ifaddr_container *iac;
3192 
3193 	/* We only process PRC_IFDOWN and PRC_IFUP commands */
3194 	if (cmd != PRC_IFDOWN && cmd != PRC_IFUP)
3195 		goto done;
3196 
3197 	TAILQ_FOREACH(iac, &in_ifaddrheads[mycpuid], ia_link) {
3198 		struct in_ifaddr *ia = iac->ia;
3199 		struct ifnet *ifp = ia->ia_ifp;
3200 
3201 		if (ifp->if_type == IFT_CARP)
3202 			continue;
3203 
3204 		if (ia->ia_ifa.ifa_addr == sa) {
3205 			if (cmd == PRC_IFDOWN) {
3206 				carp_ifaddr(NULL, ifp, IFADDR_EVENT_DELETE,
3207 				    &ia->ia_ifa);
3208 			} else if (cmd == PRC_IFUP) {
3209 				carp_ifaddr(NULL, ifp, IFADDR_EVENT_ADD,
3210 				    &ia->ia_ifa);
3211 			}
3212 			break;
3213 		}
3214 	}
3215 done:
3216 	lwkt_replymsg(&msg->lmsg, 0);
3217 }
3218 
3219 struct ifnet *
carp_parent(struct ifnet * cifp)3220 carp_parent(struct ifnet *cifp)
3221 {
3222 	struct carp_softc *sc;
3223 
3224 	KKASSERT(cifp->if_type == IFT_CARP);
3225 	sc = cifp->if_softc;
3226 
3227 	return sc->sc_carpdev;
3228 }
3229 
3230 #define rtinitflags(x) \
3231 	(((x)->ia_ifp->if_flags & (IFF_LOOPBACK | IFF_POINTOPOINT)) \
3232 		 ? RTF_HOST : 0)
3233 
3234 static int
carp_addroute_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha)3235 carp_addroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha)
3236 {
3237 	struct in_ifaddr *ia, *iaback;
3238 
3239 	if (sc->sc_state != MASTER)
3240 		return 0;
3241 
3242 	ia = vha->vha_ia;
3243 	KKASSERT(ia != NULL);
3244 
3245 	iaback = vha->vha_iaback;
3246 	KKASSERT(iaback != NULL);
3247 
3248 	return rtchange(&iaback->ia_ifa, &ia->ia_ifa);
3249 }
3250 
3251 static void
carp_delroute_vhaddr(struct carp_softc * sc,struct carp_vhaddr * vha,boolean_t del_iaback)3252 carp_delroute_vhaddr(struct carp_softc *sc, struct carp_vhaddr *vha,
3253     boolean_t del_iaback)
3254 {
3255 	struct in_ifaddr *ia, *iaback;
3256 
3257 	ia = vha->vha_ia;
3258 	KKASSERT(ia != NULL);
3259 
3260 	iaback = vha->vha_iaback;
3261 	KKASSERT(iaback != NULL);
3262 
3263 	if (!del_iaback && (iaback->ia_ifp->if_flags & IFF_UP)) {
3264 		rtchange(&ia->ia_ifa, &iaback->ia_ifa);
3265 		return;
3266 	}
3267 
3268 	rtinit(&ia->ia_ifa, RTM_DELETE, rtinitflags(ia));
3269 	in_ifadown_force(&ia->ia_ifa, 1);
3270 	ia->ia_flags &= ~IFA_ROUTE;
3271 }
3272 
3273 static int
carp_modevent(module_t mod,int type,void * data)3274 carp_modevent(module_t mod, int type, void *data)
3275 {
3276 	switch (type) {
3277 	case MOD_LOAD:
3278 		LIST_INIT(&carpif_list);
3279 		carp_ifdetach_event =
3280 		EVENTHANDLER_REGISTER(ifnet_detach_event, carp_ifdetach, NULL,
3281 				      EVENTHANDLER_PRI_ANY);
3282 		carp_ifaddr_event =
3283 		EVENTHANDLER_REGISTER(ifaddr_event, carp_ifaddr, NULL,
3284 				      EVENTHANDLER_PRI_FIRST);
3285 		if_clone_attach(&carp_cloner);
3286 		break;
3287 
3288 	case MOD_UNLOAD:
3289 		EVENTHANDLER_DEREGISTER(ifnet_detach_event,
3290 					carp_ifdetach_event);
3291 		EVENTHANDLER_DEREGISTER(ifaddr_event,
3292 					carp_ifaddr_event);
3293 		if_clone_detach(&carp_cloner);
3294 		break;
3295 
3296 	default:
3297 		return (EINVAL);
3298 	}
3299 	return (0);
3300 }
3301 
3302 static moduledata_t carp_mod = {
3303 	"carp",
3304 	carp_modevent,
3305 	0
3306 };
3307 DECLARE_MODULE(carp, carp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
3308