xref: /dragonfly/sys/net/pf/if_pfsync.c (revision ad9f8794)
1 /*	$OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 #include "opt_carp.h"
32 
33 #include <sys/param.h>
34 #include <sys/endian.h>
35 #include <sys/proc.h>
36 #include <sys/priv.h>
37 #include <sys/systm.h>
38 #include <sys/time.h>
39 #include <sys/mbuf.h>
40 #include <sys/socket.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/module.h>
44 #include <sys/sockio.h>
45 #include <sys/thread2.h>
46 #include <vm/vm_zone.h>
47 
48 #include <machine/inttypes.h>
49 
50 #include <net/if.h>
51 #include <net/if_types.h>
52 #include <net/route.h>
53 #include <net/bpf.h>
54 #include <netinet/in.h>
55 #include <netinet/if_ether.h>
56 #include <netinet/ip_carp.h>
57 #include <netinet/tcp.h>
58 #include <netinet/tcp_seq.h>
59 
60 #ifdef	INET
61 #include <netinet/in_systm.h>
62 #include <netinet/in_var.h>
63 #include <netinet/ip.h>
64 #include <netinet/ip_var.h>
65 #endif
66 
67 #ifdef INET6
68 #include <netinet6/nd6.h>
69 #endif /* INET6 */
70 
71 #include <net/pf/pfvar.h>
72 #include <net/pf/if_pfsync.h>
73 
74 #define	PFSYNCNAME	"pfsync"
75 
76 #define PFSYNC_MINMTU	\
77     (sizeof(struct pfsync_header) + sizeof(struct pf_state))
78 
79 #ifdef PFSYNCDEBUG
80 #define DPRINTF(x)    do { if (pfsyncdebug) kprintf x ; } while (0)
81 int pfsyncdebug;
82 #else
83 #define DPRINTF(x)
84 #endif
85 
86 struct pfsync_softc	*pfsyncif = NULL;
87 struct pfsyncstats	 pfsyncstats;
88 
89 void	pfsyncattach(int);
90 static int	pfsync_clone_destroy(struct ifnet *);
91 static int	pfsync_clone_create(struct if_clone *, int, caddr_t);
92 void	pfsync_setmtu(struct pfsync_softc *, int);
93 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
94 	    struct pf_state_peer *);
95 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
96 	    struct rtentry *);
97 int	pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *);
98 void	pfsyncstart(struct ifnet *);
99 
100 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **);
101 int	pfsync_request_update(struct pfsync_state_upd *, struct in_addr *);
102 int	pfsync_sendout(struct pfsync_softc *);
103 int	pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *);
104 void	pfsync_timeout(void *);
105 void	pfsync_send_bus(struct pfsync_softc *, u_int8_t);
106 void	pfsync_bulk_update(void *);
107 void	pfsync_bulkfail(void *);
108 
109 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface");
110 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list;
111 
112 int	pfsync_sync_ok;
113 
114 struct if_clone	pfsync_cloner =
115     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1);
116 
117 void
118 pfsyncattach(int npfsync)
119 {
120 	if_clone_attach(&pfsync_cloner);
121 }
122 static int
123 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused)
124 {
125 	struct pfsync_softc *sc;
126 	struct ifnet *ifp;
127 
128 	lwkt_gettoken(&pf_token);
129 
130 	MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC,
131 	    M_WAITOK|M_ZERO);
132 	pfsync_sync_ok = 1;
133 	sc->sc_mbuf = NULL;
134 	sc->sc_mbuf_net = NULL;
135 	sc->sc_mbuf_tdb = NULL;
136 	sc->sc_statep.s = NULL;
137 	sc->sc_statep_net.s = NULL;
138 	sc->sc_statep_tdb.t = NULL;
139 	sc->sc_maxupdates = 128;
140 	sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP);
141 	sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP);
142 	sc->sc_ureq_received = 0;
143 	sc->sc_ureq_sent = 0;
144 	sc->sc_bulk_send_next = NULL;
145 	sc->sc_bulk_terminator = NULL;
146 	sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS;
147 	lwkt_reltoken(&pf_token);
148 	ifp = &sc->sc_if;
149 	ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
150 	if_initname(ifp, ifc->ifc_name, unit);
151 	ifp->if_ioctl = pfsyncioctl;
152 	ifp->if_output = pfsyncoutput;
153 	ifp->if_start = pfsyncstart;
154 	ifp->if_type = IFT_PFSYNC;
155 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
156 	ifp->if_hdrlen = PFSYNC_HDRLEN;
157 	ifp->if_baudrate = IF_Mbps(100);
158 	ifp->if_softc = sc;
159 	pfsync_setmtu(sc, MCLBYTES);
160 	callout_init(&sc->sc_tmo);
161 	/* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */
162 	callout_init(&sc->sc_bulk_tmo);
163 	callout_init(&sc->sc_bulkfail_tmo);
164 	if_attach(ifp, NULL);
165 
166 	LIST_INSERT_HEAD(&pfsync_list, sc, sc_next);
167 
168 
169 #if NCARP > 0
170 	if_addgroup(ifp, "carp");
171 #endif
172 
173 #if NBPFILTER > 0
174 	bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN);
175 #endif
176 	lwkt_gettoken(&pf_token);
177 
178 	lwkt_reltoken(&pf_token);
179 	return (0);
180 }
181 
182 static int
183 pfsync_clone_destroy(struct ifnet *ifp)
184 {
185 	lwkt_gettoken(&pf_token);
186 	lwkt_reltoken(&pf_token);
187 
188 	struct pfsync_softc *sc = ifp->if_softc;
189 	callout_stop(&sc->sc_tmo);
190 	/* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */
191 	callout_stop(&sc->sc_bulk_tmo);
192 	callout_stop(&sc->sc_bulkfail_tmo);
193 #if NCARP > 0
194 	if (!pfsync_sync_ok)
195 		carp_group_demote_adj(&sc->sc_if, -1);
196 #endif
197 #if NBPFILTER > 0
198 	bpfdetach(ifp);
199 #endif
200 	if_detach(ifp);
201 	lwkt_gettoken(&pf_token);
202 	LIST_REMOVE(sc, sc_next);
203 	kfree(sc, M_PFSYNC);
204 	lwkt_reltoken(&pf_token);
205 
206 
207 	return 0;
208 }
209 
210 /*
211  * Start output on the pfsync interface.
212  */
213 void
214 pfsyncstart(struct ifnet *ifp)
215 {
216 	crit_enter();
217 	IF_DROP(&ifp->if_snd);
218 	IF_DRAIN(&ifp->if_snd);
219 	crit_exit();
220 }
221 
222 int
223 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
224     struct pf_state_peer *d)
225 {
226 	if (s->scrub.scrub_flag && d->scrub == NULL) {
227 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
228 		if (d->scrub == NULL)
229 			return (ENOMEM);
230 	}
231 
232 	return (0);
233 }
234 
235 void
236 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
237 {
238 	bzero(sp, sizeof(struct pfsync_state));
239 
240 	/* copy from state key */
241 	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
242 	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
243 	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
244 	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
245 	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
246 	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
247 	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
248 	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
249 	sp->proto = st->key[PF_SK_WIRE]->proto;
250 	sp->af = st->key[PF_SK_WIRE]->af;
251 
252 	/* copy from state */
253 	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
254 	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
255 	sp->creation = htonl(time_second - st->creation);
256 	sp->expire = pf_state_expires(st);
257 	if (sp->expire <= time_second)
258 		sp->expire = htonl(0);
259 	else
260 		sp->expire = htonl(sp->expire - time_second);
261 
262 	sp->direction = st->direction;
263 	sp->log = st->log;
264 	sp->timeout = st->timeout;
265 	sp->state_flags = st->state_flags;
266 	if (st->src_node)
267 		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
268 	if (st->nat_src_node)
269 		sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE;
270 
271 	bcopy(&st->id, &sp->id, sizeof(sp->id));
272 	sp->creatorid = st->creatorid;
273 	pf_state_peer_hton(&st->src, &sp->src);
274 	pf_state_peer_hton(&st->dst, &sp->dst);
275 
276 	if (st->rule.ptr == NULL)
277 		sp->rule = htonl(-1);
278 	else
279 		sp->rule = htonl(st->rule.ptr->nr);
280 	if (st->anchor.ptr == NULL)
281 		sp->anchor = htonl(-1);
282 	else
283 		sp->anchor = htonl(st->anchor.ptr->nr);
284 	if (st->nat_rule.ptr == NULL)
285 		sp->nat_rule = htonl(-1);
286 	else
287 		sp->nat_rule = htonl(st->nat_rule.ptr->nr);
288 
289 	pf_state_counter_hton(st->packets[0], sp->packets[0]);
290 	pf_state_counter_hton(st->packets[1], sp->packets[1]);
291 	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
292 	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
293 
294 }
295 
296 int
297 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
298 {
299 	struct pf_state	*st = NULL;
300 	struct pf_state_key *skw = NULL, *sks = NULL;
301 	struct pf_rule *r = NULL;
302 	struct pfi_kif	*kif;
303 	int pool_flags;
304 	int error;
305 
306 	if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) {
307 		kprintf("pfsync_insert_net_state: invalid creator id:"
308 		    " %08x\n", ntohl(sp->creatorid));
309 		return (EINVAL);
310 	}
311 
312 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
313 		if (pf_status.debug >= PF_DEBUG_MISC)
314 			kprintf("pfsync_insert_net_state: "
315 			    "unknown interface: %s\n", sp->ifname);
316 		if (flags & PFSYNC_SI_IOCTL)
317 			return (EINVAL);
318 		return (0);	/* skip this state */
319 	}
320 
321 	/*
322 	 * If the ruleset checksums match or the state is coming from the ioctl,
323 	 * it's safe to associate the state with the rule of that number.
324 	 */
325 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
326 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
327 	    pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount)
328 		r = pf_main_ruleset.rules[
329 		    PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)];
330 	else
331 		r = &pf_default_rule;
332 
333 	if ((r->max_states && r->states_cur >= r->max_states))
334 		goto cleanup;
335 
336 	if (flags & PFSYNC_SI_IOCTL)
337 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
338 	else
339 		pool_flags = PR_LIMITFAIL | PR_ZERO;
340 
341 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
342 		goto cleanup;
343 
344 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
345 		goto cleanup;
346 
347 	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
348 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
349 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
350 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
351 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
352 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) {
353 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
354 			goto cleanup;
355 	} else
356 		sks = skw;
357 
358 	/* allocate memory for scrub info */
359 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
360 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
361 		goto cleanup;
362 
363 	/* copy to state key(s) */
364 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
365 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
366 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
367 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
368 	skw->proto = sp->proto;
369 	skw->af = sp->af;
370 	if (sks != skw) {
371 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
372 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
373 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
374 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
375 		sks->proto = sp->proto;
376 		sks->af = sp->af;
377 	}
378 
379 	/* copy to state */
380 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
381 	st->creation = time_second - ntohl(sp->creation);
382 	st->expire = time_second;
383 	if (sp->expire) {
384 		/* XXX No adaptive scaling. */
385 		st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
386 	}
387 
388 	st->expire = ntohl(sp->expire) + time_second;
389 	st->direction = sp->direction;
390 	st->log = sp->log;
391 	st->timeout = sp->timeout;
392 	st->state_flags = sp->state_flags;
393 	if (!(flags & PFSYNC_SI_IOCTL))
394 		st->sync_flags = PFSTATE_FROMSYNC;
395 
396 	bcopy(sp->id, &st->id, sizeof(st->id));
397 	st->creatorid = sp->creatorid;
398 	pf_state_peer_ntoh(&sp->src, &st->src);
399 	pf_state_peer_ntoh(&sp->dst, &st->dst);
400 
401 	st->rule.ptr = r;
402 	st->nat_rule.ptr = NULL;
403 	st->anchor.ptr = NULL;
404 	st->rt_kif = NULL;
405 
406 	st->pfsync_time = 0;
407 
408 
409 	/* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */
410 	r->states_cur++;
411 	r->states_tot++;
412 
413 	if ((error = pf_state_insert(kif, skw, sks, st)) != 0) {
414 		/* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */
415 		r->states_cur--;
416 		goto cleanup_state;
417 	}
418 
419 	return (0);
420 
421  cleanup:
422 	error = ENOMEM;
423 	if (skw == sks)
424 		sks = NULL;
425 	if (skw != NULL)
426 		pool_put(&pf_state_key_pl, skw);
427 	if (sks != NULL)
428 		pool_put(&pf_state_key_pl, sks);
429 
430  cleanup_state:	/* pf_state_insert frees the state keys */
431 	if (st) {
432 		if (st->dst.scrub)
433 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
434 		if (st->src.scrub)
435 			pool_put(&pf_state_scrub_pl, st->src.scrub);
436 		pool_put(&pf_state_pl, st);
437 	}
438 	return (error);
439 }
440 
441 void
442 pfsync_input(struct mbuf *m, ...)
443 {
444 	struct ip *ip = mtod(m, struct ip *);
445 	struct pfsync_header *ph;
446 	struct pfsync_softc *sc = pfsyncif;
447 	struct pf_state *st;
448 	struct pf_state_key *sk;
449 	struct pf_state_item *si;
450 	struct pf_state_cmp id_key;
451 	struct pfsync_state *sp;
452 	struct pfsync_state_upd *up;
453 	struct pfsync_state_del *dp;
454 	struct pfsync_state_clr *cp;
455 	struct pfsync_state_upd_req *rup;
456 	struct pfsync_state_bus *bus;
457 #ifdef IPSEC
458 	struct pfsync_tdb *pt;
459 #endif
460 	struct in_addr src;
461 	struct mbuf *mp;
462 	int iplen, action, error, i, count, offp, sfail, stale = 0;
463 	u_int8_t flags = 0;
464 
465 	/* This function is not yet called from anywhere */
466 	/* Still we assume for safety that pf_token must be held */
467 	ASSERT_LWKT_TOKEN_HELD(&pf_token);
468 
469 	pfsyncstats.pfsyncs_ipackets++;
470 
471 	/* verify that we have a sync interface configured */
472 	if (!sc || !sc->sc_sync_ifp || !pf_status.running)
473 		goto done;
474 
475 	/* verify that the packet came in on the right interface */
476 	if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) {
477 		pfsyncstats.pfsyncs_badif++;
478 		goto done;
479 	}
480 
481 	/* verify that the IP TTL is 255.  */
482 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
483 		pfsyncstats.pfsyncs_badttl++;
484 		goto done;
485 	}
486 
487 	iplen = ip->ip_hl << 2;
488 
489 	if (m->m_pkthdr.len < iplen + sizeof(*ph)) {
490 		pfsyncstats.pfsyncs_hdrops++;
491 		goto done;
492 	}
493 
494 	if (iplen + sizeof(*ph) > m->m_len) {
495 		if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) {
496 			pfsyncstats.pfsyncs_hdrops++;
497 			goto done;
498 		}
499 		ip = mtod(m, struct ip *);
500 	}
501 	ph = (struct pfsync_header *)((char *)ip + iplen);
502 
503 	/* verify the version */
504 	if (ph->version != PFSYNC_VERSION) {
505 		pfsyncstats.pfsyncs_badver++;
506 		goto done;
507 	}
508 
509 	action = ph->action;
510 	count = ph->count;
511 
512 	/* make sure it's a valid action code */
513 	if (action >= PFSYNC_ACT_MAX) {
514 		pfsyncstats.pfsyncs_badact++;
515 		goto done;
516 	}
517 
518 	/* Cheaper to grab this now than having to mess with mbufs later */
519 	src = ip->ip_src;
520 
521 	if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
522 		flags |= PFSYNC_SI_CKSUM;
523 
524 	switch (action) {
525 	case PFSYNC_ACT_CLR: {
526 		struct pf_state *nexts;
527 		struct pf_state_key *nextsk;
528 		struct pfi_kif *kif;
529 		u_int32_t creatorid;
530 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
531 		    sizeof(*cp), &offp)) == NULL) {
532 			pfsyncstats.pfsyncs_badlen++;
533 			return;
534 		}
535 		cp = (struct pfsync_state_clr *)(mp->m_data + offp);
536 		creatorid = cp->creatorid;
537 
538 		crit_enter();
539 		if (cp->ifname[0] == '\0') {
540 			for (st = RB_MIN(pf_state_tree_id, &tree_id);
541 			    st; st = nexts) {
542 				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
543 				if (st->creatorid == creatorid) {
544 					st->sync_flags |= PFSTATE_FROMSYNC;
545 					pf_unlink_state(st);
546 				}
547 			}
548 		} else {
549 			if ((kif = pfi_kif_get(cp->ifname)) == NULL) {
550 				crit_exit();
551 				return;
552 			}
553 			/* XXX correct? */
554 			for (sk = RB_MIN(pf_state_tree,
555 			    &pf_statetbl); sk; sk = nextsk) {
556 				nextsk = RB_NEXT(pf_state_tree,
557 				    &pf_statetbl, sk);
558 				TAILQ_FOREACH(si, &sk->states, entry) {
559 					if (si->s->creatorid == creatorid) {
560 						si->s->sync_flags |=
561 						    PFSTATE_FROMSYNC;
562 						pf_unlink_state(si->s);
563 					}
564 				}
565 			}
566 		}
567 		crit_exit();
568 
569 		break;
570 	}
571 	case PFSYNC_ACT_INS:
572 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
573 		    count * sizeof(*sp), &offp)) == NULL) {
574 			pfsyncstats.pfsyncs_badlen++;
575 			return;
576 		}
577 
578 		crit_enter();
579 		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
580 		    i < count; i++, sp++) {
581 			/* check for invalid values */
582 			if (sp->timeout >= PFTM_MAX ||
583 			    sp->src.state > PF_TCPS_PROXY_DST ||
584 			    sp->dst.state > PF_TCPS_PROXY_DST ||
585 			    sp->direction > PF_OUT ||
586 			    (sp->af != AF_INET && sp->af != AF_INET6)) {
587 				if (pf_status.debug >= PF_DEBUG_MISC)
588 					kprintf("pfsync_insert: PFSYNC_ACT_INS: "
589 					    "invalid value\n");
590 				pfsyncstats.pfsyncs_badval++;
591 				continue;
592 			}
593 
594 			if ((error = pfsync_state_import(sp, flags))) {
595 				if (error == ENOMEM) {
596 					crit_exit();
597 					goto done;
598 				}
599 				continue;
600 			}
601 		}
602 		crit_exit();
603 		break;
604 	case PFSYNC_ACT_UPD:
605 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
606 		    count * sizeof(*sp), &offp)) == NULL) {
607 			pfsyncstats.pfsyncs_badlen++;
608 			return;
609 		}
610 
611 		crit_enter();
612 		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
613 		    i < count; i++, sp++) {
614 			int flags = PFSYNC_FLAG_STALE;
615 
616 			/* check for invalid values */
617 			if (sp->timeout >= PFTM_MAX ||
618 			    sp->src.state > PF_TCPS_PROXY_DST ||
619 			    sp->dst.state > PF_TCPS_PROXY_DST) {
620 				if (pf_status.debug >= PF_DEBUG_MISC)
621 					kprintf("pfsync_insert: PFSYNC_ACT_UPD: "
622 					    "invalid value\n");
623 				pfsyncstats.pfsyncs_badval++;
624 				continue;
625 			}
626 
627 			bcopy(sp->id, &id_key.id, sizeof(id_key.id));
628 			id_key.creatorid = sp->creatorid;
629 
630 			st = pf_find_state_byid(&id_key);
631 			if (st == NULL) {
632 				/* insert the update */
633 				if (pfsync_state_import(sp, flags))
634 					pfsyncstats.pfsyncs_badstate++;
635 				continue;
636 			}
637 			sk = st->key[PF_SK_WIRE];	/* XXX right one? */
638 			sfail = 0;
639 			if (sk->proto == IPPROTO_TCP) {
640 				/*
641 				 * The state should never go backwards except
642 				 * for syn-proxy states.  Neither should the
643 				 * sequence window slide backwards.
644 				 */
645 				if (st->src.state > sp->src.state &&
646 				    (st->src.state < PF_TCPS_PROXY_SRC ||
647 				    sp->src.state >= PF_TCPS_PROXY_SRC))
648 					sfail = 1;
649 				else if (SEQ_GT(st->src.seqlo,
650 				    ntohl(sp->src.seqlo)))
651 					sfail = 3;
652 				else if (st->dst.state > sp->dst.state) {
653 					/* There might still be useful
654 					 * information about the src state here,
655 					 * so import that part of the update,
656 					 * then "fail" so we send the updated
657 					 * state back to the peer who is missing
658 					 * our what we know. */
659 					pf_state_peer_ntoh(&sp->src, &st->src);
660 					/* XXX do anything with timeouts? */
661 					sfail = 7;
662 					flags = 0;
663 				} else if (st->dst.state >= TCPS_SYN_SENT &&
664 				    SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo)))
665 					sfail = 4;
666 			} else {
667 				/*
668 				 * Non-TCP protocol state machine always go
669 				 * forwards
670 				 */
671 				if (st->src.state > sp->src.state)
672 					sfail = 5;
673 				else if (st->dst.state > sp->dst.state)
674 					sfail = 6;
675 			}
676 			if (sfail) {
677 				if (pf_status.debug >= PF_DEBUG_MISC)
678 					kprintf("pfsync: %s stale update "
679 					    "(%d) id: %016jx "
680 					    "creatorid: %08x\n",
681 					    (sfail < 7 ?  "ignoring"
682 					     : "partial"), sfail,
683 					    (uintmax_t)be64toh(st->id),
684 					    ntohl(st->creatorid));
685 				pfsyncstats.pfsyncs_stale++;
686 
687 				if (!(sp->sync_flags & PFSTATE_STALE)) {
688 					/* we have a better state, send it */
689 					if (sc->sc_mbuf != NULL && !stale)
690 						pfsync_sendout(sc);
691 					stale++;
692 					if (!st->sync_flags)
693 						pfsync_pack_state(
694 						    PFSYNC_ACT_UPD, st, flags);
695 				}
696 				continue;
697 			}
698 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
699 			pf_state_peer_ntoh(&sp->src, &st->src);
700 			pf_state_peer_ntoh(&sp->dst, &st->dst);
701 			st->expire = ntohl(sp->expire) + time_second;
702 			st->timeout = sp->timeout;
703 		}
704 		if (stale && sc->sc_mbuf != NULL)
705 			pfsync_sendout(sc);
706 		crit_exit();
707 		break;
708 	/*
709 	 * It's not strictly necessary for us to support the "uncompressed"
710 	 * delete action, but it's relatively simple and maintains consistency.
711 	 */
712 	case PFSYNC_ACT_DEL:
713 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
714 		    count * sizeof(*sp), &offp)) == NULL) {
715 			pfsyncstats.pfsyncs_badlen++;
716 			return;
717 		}
718 
719 		crit_enter();
720 		for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp);
721 		    i < count; i++, sp++) {
722 			bcopy(sp->id, &id_key.id, sizeof(id_key.id));
723 			id_key.creatorid = sp->creatorid;
724 
725 			st = pf_find_state_byid(&id_key);
726 			if (st == NULL) {
727 				pfsyncstats.pfsyncs_badstate++;
728 				continue;
729 			}
730 			st->sync_flags |= PFSTATE_FROMSYNC;
731 			pf_unlink_state(st);
732 		}
733 		crit_exit();
734 		break;
735 	case PFSYNC_ACT_UPD_C: {
736 		int update_requested = 0;
737 
738 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
739 		    count * sizeof(*up), &offp)) == NULL) {
740 			pfsyncstats.pfsyncs_badlen++;
741 			return;
742 		}
743 
744 		crit_enter();
745 		for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp);
746 		    i < count; i++, up++) {
747 			/* check for invalid values */
748 			if (up->timeout >= PFTM_MAX ||
749 			    up->src.state > PF_TCPS_PROXY_DST ||
750 			    up->dst.state > PF_TCPS_PROXY_DST) {
751 				if (pf_status.debug >= PF_DEBUG_MISC)
752 					kprintf("pfsync_insert: "
753 					    "PFSYNC_ACT_UPD_C: "
754 					    "invalid value\n");
755 				pfsyncstats.pfsyncs_badval++;
756 				continue;
757 			}
758 
759 			bcopy(up->id, &id_key.id, sizeof(id_key.id));
760 			id_key.creatorid = up->creatorid;
761 
762 			st = pf_find_state_byid(&id_key);
763 			if (st == NULL) {
764 				/* We don't have this state. Ask for it. */
765 				error = pfsync_request_update(up, &src);
766 				if (error == ENOMEM) {
767 					crit_exit();
768 					goto done;
769 				}
770 				update_requested = 1;
771 				pfsyncstats.pfsyncs_badstate++;
772 				continue;
773 			}
774 			sk = st->key[PF_SK_WIRE]; /* XXX right one? */
775 			sfail = 0;
776 			if (sk->proto == IPPROTO_TCP) {
777 				/*
778 				 * The state should never go backwards except
779 				 * for syn-proxy states.  Neither should the
780 				 * sequence window slide backwards.
781 				 */
782 				if (st->src.state > up->src.state &&
783 				    (st->src.state < PF_TCPS_PROXY_SRC ||
784 				    up->src.state >= PF_TCPS_PROXY_SRC))
785 					sfail = 1;
786 				else if (st->dst.state > up->dst.state)
787 					sfail = 2;
788 				else if (SEQ_GT(st->src.seqlo,
789 				    ntohl(up->src.seqlo)))
790 					sfail = 3;
791 				else if (st->dst.state >= TCPS_SYN_SENT &&
792 				    SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo)))
793 					sfail = 4;
794 			} else {
795 				/*
796 				 * Non-TCP protocol state machine always go
797 				 * forwards
798 				 */
799 				if (st->src.state > up->src.state)
800 					sfail = 5;
801 				else if (st->dst.state > up->dst.state)
802 					sfail = 6;
803 			}
804 			if (sfail) {
805 				if (pf_status.debug >= PF_DEBUG_MISC)
806 					kprintf("pfsync: ignoring stale update "
807 					    "(%d) id: %016" PRIx64 " "
808 					    "creatorid: %08x\n", sfail,
809 					    be64toh(st->id),
810 					    ntohl(st->creatorid));
811 				pfsyncstats.pfsyncs_stale++;
812 
813 				/* we have a better state, send it out */
814 				if ((!stale || update_requested) &&
815 				    sc->sc_mbuf != NULL) {
816 					pfsync_sendout(sc);
817 					update_requested = 0;
818 				}
819 				stale++;
820 				if (!st->sync_flags)
821 					pfsync_pack_state(PFSYNC_ACT_UPD, st,
822 					    PFSYNC_FLAG_STALE);
823 				continue;
824 			}
825 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
826 			pf_state_peer_ntoh(&up->src, &st->src);
827 			pf_state_peer_ntoh(&up->dst, &st->dst);
828 			st->expire = ntohl(up->expire) + time_second;
829 			st->timeout = up->timeout;
830 		}
831 		if ((update_requested || stale) && sc->sc_mbuf)
832 			pfsync_sendout(sc);
833 		crit_exit();
834 		break;
835 	}
836 	case PFSYNC_ACT_DEL_C:
837 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
838 		    count * sizeof(*dp), &offp)) == NULL) {
839 			pfsyncstats.pfsyncs_badlen++;
840 			return;
841 		}
842 
843 		crit_enter();
844 		for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp);
845 		    i < count; i++, dp++) {
846 			bcopy(dp->id, &id_key.id, sizeof(id_key.id));
847 			id_key.creatorid = dp->creatorid;
848 
849 			st = pf_find_state_byid(&id_key);
850 			if (st == NULL) {
851 				pfsyncstats.pfsyncs_badstate++;
852 				continue;
853 			}
854 			st->sync_flags |= PFSTATE_FROMSYNC;
855 			pf_unlink_state(st);
856 		}
857 		crit_exit();
858 		break;
859 	case PFSYNC_ACT_INS_F:
860 	case PFSYNC_ACT_DEL_F:
861 		/* not implemented */
862 		break;
863 	case PFSYNC_ACT_UREQ:
864 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
865 		    count * sizeof(*rup), &offp)) == NULL) {
866 			pfsyncstats.pfsyncs_badlen++;
867 			return;
868 		}
869 
870 		crit_enter();
871 		if (sc->sc_mbuf != NULL)
872 			pfsync_sendout(sc);
873 		for (i = 0,
874 		    rup = (struct pfsync_state_upd_req *)(mp->m_data + offp);
875 		    i < count; i++, rup++) {
876 			bcopy(rup->id, &id_key.id, sizeof(id_key.id));
877 			id_key.creatorid = rup->creatorid;
878 
879 			if (id_key.id == 0 && id_key.creatorid == 0) {
880 				sc->sc_ureq_received = mycpu->gd_time_seconds;
881 				if (sc->sc_bulk_send_next == NULL)
882 					sc->sc_bulk_send_next =
883 					    TAILQ_FIRST(&state_list);
884 				sc->sc_bulk_terminator = sc->sc_bulk_send_next;
885 				if (pf_status.debug >= PF_DEBUG_MISC)
886 					kprintf("pfsync: received "
887 					    "bulk update request\n");
888 				pfsync_send_bus(sc, PFSYNC_BUS_START);
889 				lwkt_reltoken(&pf_token);
890 				callout_init(&sc->sc_bulk_tmo);
891 				lwkt_gettoken(&pf_token);
892 			} else {
893 				st = pf_find_state_byid(&id_key);
894 				if (st == NULL) {
895 					pfsyncstats.pfsyncs_badstate++;
896 					continue;
897 				}
898 				if (!st->sync_flags)
899 					pfsync_pack_state(PFSYNC_ACT_UPD,
900 					    st, 0);
901 			}
902 		}
903 		if (sc->sc_mbuf != NULL)
904 			pfsync_sendout(sc);
905 		crit_exit();
906 		break;
907 	case PFSYNC_ACT_BUS:
908 		/* If we're not waiting for a bulk update, who cares. */
909 		if (sc->sc_ureq_sent == 0)
910 			break;
911 
912 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
913 		    sizeof(*bus), &offp)) == NULL) {
914 			pfsyncstats.pfsyncs_badlen++;
915 			return;
916 		}
917 		bus = (struct pfsync_state_bus *)(mp->m_data + offp);
918 		switch (bus->status) {
919 		case PFSYNC_BUS_START:
920 			lwkt_reltoken(&pf_token);
921 			callout_reset(&sc->sc_bulkfail_tmo,
922 			    pf_pool_limits[PF_LIMIT_STATES].limit /
923 			    (PFSYNC_BULKPACKETS * sc->sc_maxcount),
924 			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
925 			lwkt_gettoken(&pf_token);
926 			if (pf_status.debug >= PF_DEBUG_MISC)
927 				kprintf("pfsync: received bulk "
928 				    "update start\n");
929 			break;
930 		case PFSYNC_BUS_END:
931 			if (mycpu->gd_time_seconds - ntohl(bus->endtime) >=
932 			    sc->sc_ureq_sent) {
933 				/* that's it, we're happy */
934 				sc->sc_ureq_sent = 0;
935 				sc->sc_bulk_tries = 0;
936 				lwkt_reltoken(&pf_token);
937 				callout_stop(&sc->sc_bulkfail_tmo);
938 				lwkt_gettoken(&pf_token);
939 #if NCARP > 0
940 				if (!pfsync_sync_ok) {
941 					lwkt_reltoken(&pf_token);
942 					carp_group_demote_adj(&sc->sc_if, -1);
943 					lwkt_gettoken(&pf_token);
944 				}
945 #endif
946 				pfsync_sync_ok = 1;
947 				if (pf_status.debug >= PF_DEBUG_MISC)
948 					kprintf("pfsync: received valid "
949 					    "bulk update end\n");
950 			} else {
951 				if (pf_status.debug >= PF_DEBUG_MISC)
952 					kprintf("pfsync: received invalid "
953 					    "bulk update end: bad timestamp\n");
954 			}
955 			break;
956 		}
957 		break;
958 #ifdef IPSEC
959 	case PFSYNC_ACT_TDB_UPD:
960 		if ((mp = m_pulldown(m, iplen + sizeof(*ph),
961 		    count * sizeof(*pt), &offp)) == NULL) {
962 			pfsyncstats.pfsyncs_badlen++;
963 			return;
964 		}
965 		crit_enter();
966 		for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp);
967 		    i < count; i++, pt++)
968 			pfsync_update_net_tdb(pt);
969 		crit_exit();
970 		break;
971 #endif
972 	}
973 
974 done:
975 	if (m)
976 		m_freem(m);
977 }
978 
979 int
980 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
981 	struct rtentry *rt)
982 {
983 	m_freem(m);
984 	return (0);
985 }
986 
987 /* ARGSUSED */
988 int
989 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr)
990 {
991 	struct pfsync_softc *sc = ifp->if_softc;
992 	struct ifreq *ifr = (struct ifreq *)data;
993 	struct ip_moptions *imo = &sc->sc_imo;
994 	struct pfsyncreq pfsyncr;
995 	struct ifnet    *sifp;
996 	int error;
997 
998 	lwkt_gettoken(&pf_token);
999 
1000 	switch (cmd) {
1001 	case SIOCSIFADDR:
1002 	case SIOCAIFADDR:
1003 	case SIOCSIFDSTADDR:
1004 	case SIOCSIFFLAGS:
1005 		if (ifp->if_flags & IFF_UP)
1006 			ifp->if_flags |= IFF_RUNNING;
1007 		else
1008 			ifp->if_flags &= ~IFF_RUNNING;
1009 		break;
1010 	case SIOCSIFMTU:
1011 		if (ifr->ifr_mtu < PFSYNC_MINMTU) {
1012 			lwkt_reltoken(&pf_token);
1013 			return (EINVAL);
1014 		}
1015 		if (ifr->ifr_mtu > MCLBYTES)
1016 			ifr->ifr_mtu = MCLBYTES;
1017 		crit_enter();
1018 		if (ifr->ifr_mtu < ifp->if_mtu)
1019 			pfsync_sendout(sc);
1020 		pfsync_setmtu(sc, ifr->ifr_mtu);
1021 		crit_exit();
1022 		break;
1023 	case SIOCGETPFSYNC:
1024 		bzero(&pfsyncr, sizeof(pfsyncr));
1025 		if (sc->sc_sync_ifp)
1026 			strlcpy(pfsyncr.pfsyncr_syncdev,
1027 			    sc->sc_sync_ifp->if_xname, IFNAMSIZ);
1028 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1029 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1030 		lwkt_reltoken(&pf_token);
1031 		if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))))
1032 			return (error);
1033 		lwkt_gettoken(&pf_token);
1034 		break;
1035 	case SIOCSETPFSYNC:
1036 		if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) {
1037 			lwkt_reltoken(&pf_token);
1038 			return (error);
1039 		}
1040 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) {
1041 			lwkt_reltoken(&pf_token);
1042 			return (error);
1043 		}
1044 
1045 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1046 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1047 		else
1048 			sc->sc_sync_peer.s_addr =
1049 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1050 
1051 		if (pfsyncr.pfsyncr_maxupdates > 255) {
1052 			lwkt_reltoken(&pf_token);
1053 			return (EINVAL);
1054 		}
1055 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1056 
1057 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1058 			sc->sc_sync_ifp = NULL;
1059 			if (sc->sc_mbuf_net != NULL) {
1060 				/* Don't keep stale pfsync packets around. */
1061 				crit_enter();
1062 				m_freem(sc->sc_mbuf_net);
1063 				sc->sc_mbuf_net = NULL;
1064 				sc->sc_statep_net.s = NULL;
1065 				crit_exit();
1066 			}
1067 			if (imo->imo_num_memberships > 0) {
1068 				in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1069 				imo->imo_multicast_ifp = NULL;
1070 			}
1071 			break;
1072 		}
1073 
1074 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1075 			lwkt_reltoken(&pf_token);
1076 			return (EINVAL);
1077 		}
1078 
1079 		crit_enter();
1080 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1081 		    (sc->sc_sync_ifp != NULL &&
1082 		    sifp->if_mtu < sc->sc_sync_ifp->if_mtu) ||
1083 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1084 			pfsync_sendout(sc);
1085 		sc->sc_sync_ifp = sifp;
1086 
1087 		pfsync_setmtu(sc, sc->sc_if.if_mtu);
1088 
1089 		if (imo->imo_num_memberships > 0) {
1090 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1091 			imo->imo_multicast_ifp = NULL;
1092 		}
1093 
1094 		if (sc->sc_sync_ifp &&
1095 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1096 			struct in_addr addr;
1097 
1098 			if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) {
1099 				sc->sc_sync_ifp = NULL;
1100 				lwkt_reltoken(&pf_token);
1101 				crit_exit();
1102 				return (EADDRNOTAVAIL);
1103 			}
1104 
1105 			addr.s_addr = INADDR_PFSYNC_GROUP;
1106 
1107 			if ((imo->imo_membership[0] =
1108 			    in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) {
1109 				sc->sc_sync_ifp = NULL;
1110 				lwkt_reltoken(&pf_token);
1111 				crit_exit();
1112 				return (ENOBUFS);
1113 			}
1114 			imo->imo_num_memberships++;
1115 			imo->imo_multicast_ifp = sc->sc_sync_ifp;
1116 			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1117 			imo->imo_multicast_loop = 0;
1118 		}
1119 
1120 		if (sc->sc_sync_ifp ||
1121 		    sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) {
1122 			/* Request a full state table update. */
1123 			sc->sc_ureq_sent = mycpu->gd_time_seconds;
1124 #if NCARP > 0
1125 			if (pfsync_sync_ok)
1126 				carp_group_demote_adj(&sc->sc_if, 1);
1127 #endif
1128 			pfsync_sync_ok = 0;
1129 			if (pf_status.debug >= PF_DEBUG_MISC)
1130 				kprintf("pfsync: requesting bulk update\n");
1131 			lwkt_reltoken(&pf_token);
1132 			callout_reset(&sc->sc_bulkfail_tmo, 5 * hz,
1133 			    pfsync_bulkfail, LIST_FIRST(&pfsync_list));
1134 			lwkt_gettoken(&pf_token);
1135 			error = pfsync_request_update(NULL, NULL);
1136 			if (error == ENOMEM) {
1137 				lwkt_reltoken(&pf_token);
1138 				crit_exit();
1139 				return (ENOMEM);
1140 			}
1141 			pfsync_sendout(sc);
1142 		}
1143 		crit_exit();
1144 
1145 		break;
1146 
1147 	default:
1148 		lwkt_reltoken(&pf_token);
1149 		return (ENOTTY);
1150 	}
1151 
1152 	lwkt_reltoken(&pf_token);
1153 	return (0);
1154 }
1155 
1156 void
1157 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req)
1158 {
1159 	int mtu;
1160 
1161 	if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req)
1162 		mtu = sc->sc_sync_ifp->if_mtu;
1163 	else
1164 		mtu = mtu_req;
1165 
1166 	sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) /
1167 	    sizeof(struct pfsync_state);
1168 	if (sc->sc_maxcount > 254)
1169 	    sc->sc_maxcount = 254;
1170 	sc->sc_if.if_mtu = sizeof(struct pfsync_header) +
1171 	    sc->sc_maxcount * sizeof(struct pfsync_state);
1172 }
1173 
1174 struct mbuf *
1175 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp)
1176 {
1177 	struct pfsync_header *h;
1178 	struct mbuf *m;
1179 	int len;
1180 
1181 	ASSERT_LWKT_TOKEN_HELD(&pf_token);
1182 
1183 	MGETHDR(m, M_WAITOK, MT_DATA);
1184 	if (m == NULL) {
1185 		sc->sc_if.if_oerrors++;
1186 		return (NULL);
1187 	}
1188 
1189 	switch (action) {
1190 	case PFSYNC_ACT_CLR:
1191 		len = sizeof(struct pfsync_header) +
1192 		    sizeof(struct pfsync_state_clr);
1193 		break;
1194 	case PFSYNC_ACT_UPD_C:
1195 		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) +
1196 		    sizeof(struct pfsync_header);
1197 		break;
1198 	case PFSYNC_ACT_DEL_C:
1199 		len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) +
1200 		    sizeof(struct pfsync_header);
1201 		break;
1202 	case PFSYNC_ACT_UREQ:
1203 		len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) +
1204 		    sizeof(struct pfsync_header);
1205 		break;
1206 	case PFSYNC_ACT_BUS:
1207 		len = sizeof(struct pfsync_header) +
1208 		    sizeof(struct pfsync_state_bus);
1209 		break;
1210 	case PFSYNC_ACT_TDB_UPD:
1211 		len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) +
1212 		    sizeof(struct pfsync_header);
1213 		break;
1214 	default:
1215 		len = (sc->sc_maxcount * sizeof(struct pfsync_state)) +
1216 		    sizeof(struct pfsync_header);
1217 		break;
1218 	}
1219 
1220 	if (len > MHLEN) {
1221 		MCLGET(m, M_WAITOK);
1222 		if ((m->m_flags & M_EXT) == 0) {
1223 			m_free(m);
1224 			sc->sc_if.if_oerrors++;
1225 			return (NULL);
1226 		}
1227 		m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1);
1228 	} else
1229 		MH_ALIGN(m, len);
1230 
1231 	m->m_pkthdr.rcvif = NULL;
1232 	m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header);
1233 	h = mtod(m, struct pfsync_header *);
1234 	h->version = PFSYNC_VERSION;
1235 	h->af = 0;
1236 	h->count = 0;
1237 	h->action = action;
1238 
1239 	*sp = (void *)((char *)h + PFSYNC_HDRLEN);
1240 	lwkt_reltoken(&pf_token);
1241 	callout_reset(&sc->sc_tmo, hz, pfsync_timeout,
1242 	    LIST_FIRST(&pfsync_list));
1243 	lwkt_gettoken(&pf_token);
1244 	return (m);
1245 }
1246 
1247 int
1248 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags)
1249 {
1250 	struct ifnet *ifp = NULL;
1251 	struct pfsync_softc *sc = pfsyncif;
1252 	struct pfsync_header *h, *h_net;
1253 	struct pfsync_state *sp = NULL;
1254 	struct pfsync_state_upd *up = NULL;
1255 	struct pfsync_state_del *dp = NULL;
1256 	int ret = 0;
1257 	u_int8_t i = 255, newaction = 0;
1258 
1259 	if (sc == NULL)
1260 		return (0);
1261 	ifp = &sc->sc_if;
1262 
1263 	/*
1264 	 * If a packet falls in the forest and there's nobody around to
1265 	 * hear, does it make a sound?
1266 	 */
1267 	if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL &&
1268 	    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1269 		/* Don't leave any stale pfsync packets hanging around. */
1270 		if (sc->sc_mbuf != NULL) {
1271 			m_freem(sc->sc_mbuf);
1272 			sc->sc_mbuf = NULL;
1273 			sc->sc_statep.s = NULL;
1274 		}
1275 		return (0);
1276 	}
1277 
1278 	if (action >= PFSYNC_ACT_MAX)
1279 		return (EINVAL);
1280 
1281 	crit_enter();
1282 	if (sc->sc_mbuf == NULL) {
1283 		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1284 		    (void *)&sc->sc_statep.s)) == NULL) {
1285 			crit_exit();
1286 			return (ENOMEM);
1287 		}
1288 		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1289 	} else {
1290 		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1291 		if (h->action != action) {
1292 			pfsync_sendout(sc);
1293 			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action,
1294 			    (void *)&sc->sc_statep.s)) == NULL) {
1295 				crit_exit();
1296 				return (ENOMEM);
1297 			}
1298 			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1299 		} else {
1300 			/*
1301 			 * If it's an update, look in the packet to see if
1302 			 * we already have an update for the state.
1303 			 */
1304 			if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) {
1305 				struct pfsync_state *usp =
1306 				    (void *)((char *)h + PFSYNC_HDRLEN);
1307 
1308 				for (i = 0; i < h->count; i++) {
1309 					if (!memcmp(usp->id, &st->id,
1310 					    PFSYNC_ID_LEN) &&
1311 					    usp->creatorid == st->creatorid) {
1312 						sp = usp;
1313 						sp->updates++;
1314 						break;
1315 					}
1316 					usp++;
1317 				}
1318 			}
1319 		}
1320 	}
1321 
1322 	st->pfsync_time = mycpu->gd_time_seconds;;
1323 
1324 	if (sp == NULL) {
1325 		/* not a "duplicate" update */
1326 		i = 255;
1327 		sp = sc->sc_statep.s++;
1328 		sc->sc_mbuf->m_pkthdr.len =
1329 		    sc->sc_mbuf->m_len += sizeof(struct pfsync_state);
1330 		h->count++;
1331 		bzero(sp, sizeof(*sp));
1332 
1333 		pfsync_state_export(sp, st);
1334 
1335 		if (flags & PFSYNC_FLAG_STALE)
1336 			sp->sync_flags |= PFSTATE_STALE;
1337 	} else {
1338 		pf_state_peer_hton(&st->src, &sp->src);
1339 		pf_state_peer_hton(&st->dst, &sp->dst);
1340 
1341 		if (st->expire <= time_second)
1342 			sp->expire = htonl(0);
1343 		else
1344 			sp->expire = htonl(st->expire - time_second);
1345 	}
1346 
1347 	/* do we need to build "compressed" actions for network transfer? */
1348 	if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) {
1349 		switch (action) {
1350 		case PFSYNC_ACT_UPD:
1351 			newaction = PFSYNC_ACT_UPD_C;
1352 			break;
1353 		case PFSYNC_ACT_DEL:
1354 			newaction = PFSYNC_ACT_DEL_C;
1355 			break;
1356 		default:
1357 			/* by default we just send the uncompressed states */
1358 			break;
1359 		}
1360 	}
1361 
1362 	if (newaction) {
1363 		if (sc->sc_mbuf_net == NULL) {
1364 			if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction,
1365 			    (void *)&sc->sc_statep_net.s)) == NULL) {
1366 				crit_exit();
1367 				return (ENOMEM);
1368 			}
1369 		}
1370 		h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *);
1371 
1372 		switch (newaction) {
1373 		case PFSYNC_ACT_UPD_C:
1374 			if (i != 255) {
1375 				up = (void *)((char *)h_net +
1376 				    PFSYNC_HDRLEN + (i * sizeof(*up)));
1377 				up->updates++;
1378 			} else {
1379 				h_net->count++;
1380 				sc->sc_mbuf_net->m_pkthdr.len =
1381 				    sc->sc_mbuf_net->m_len += sizeof(*up);
1382 				up = sc->sc_statep_net.u++;
1383 
1384 				bzero(up, sizeof(*up));
1385 				bcopy(&st->id, up->id, sizeof(up->id));
1386 				up->creatorid = st->creatorid;
1387 			}
1388 			up->timeout = st->timeout;
1389 			up->expire = sp->expire;
1390 			up->src = sp->src;
1391 			up->dst = sp->dst;
1392 			break;
1393 		case PFSYNC_ACT_DEL_C:
1394 			sc->sc_mbuf_net->m_pkthdr.len =
1395 			    sc->sc_mbuf_net->m_len += sizeof(*dp);
1396 			dp = sc->sc_statep_net.d++;
1397 			h_net->count++;
1398 
1399 			bzero(dp, sizeof(*dp));
1400 			bcopy(&st->id, dp->id, sizeof(dp->id));
1401 			dp->creatorid = st->creatorid;
1402 			break;
1403 		}
1404 	}
1405 
1406 	if (h->count == sc->sc_maxcount ||
1407 	    (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates)))
1408 		ret = pfsync_sendout(sc);
1409 
1410 	crit_exit();
1411 	return (ret);
1412 }
1413 
1414 int
1415 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src)
1416 {
1417 	struct ifnet *ifp = NULL;
1418 	struct pfsync_header *h;
1419 	struct pfsync_softc *sc = pfsyncif;
1420 	struct pfsync_state_upd_req *rup;
1421 	int ret = 0;
1422 
1423 	if (sc == NULL)
1424 		return (0);
1425 
1426 	ifp = &sc->sc_if;
1427 	if (sc->sc_mbuf == NULL) {
1428 		if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1429 		    (void *)&sc->sc_statep.s)) == NULL)
1430 			return (ENOMEM);
1431 		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1432 	} else {
1433 		h = mtod(sc->sc_mbuf, struct pfsync_header *);
1434 		if (h->action != PFSYNC_ACT_UREQ) {
1435 			pfsync_sendout(sc);
1436 			if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ,
1437 			    (void *)&sc->sc_statep.s)) == NULL)
1438 				return (ENOMEM);
1439 			h = mtod(sc->sc_mbuf, struct pfsync_header *);
1440 		}
1441 	}
1442 
1443 	if (src != NULL)
1444 		sc->sc_sendaddr = *src;
1445 	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup);
1446 	h->count++;
1447 	rup = sc->sc_statep.r++;
1448 	bzero(rup, sizeof(*rup));
1449 	if (up != NULL) {
1450 		bcopy(up->id, rup->id, sizeof(rup->id));
1451 		rup->creatorid = up->creatorid;
1452 	}
1453 
1454 	if (h->count == sc->sc_maxcount)
1455 		ret = pfsync_sendout(sc);
1456 
1457 	return (ret);
1458 }
1459 
1460 int
1461 pfsync_clear_states(u_int32_t creatorid, char *ifname)
1462 {
1463 	struct ifnet *ifp = NULL;
1464 	struct pfsync_softc *sc = pfsyncif;
1465 	struct pfsync_state_clr *cp;
1466 	int ret;
1467 
1468 	if (sc == NULL)
1469 		return (0);
1470 
1471 	ifp = &sc->sc_if;
1472 	crit_enter();
1473 	if (sc->sc_mbuf != NULL)
1474 		pfsync_sendout(sc);
1475 	if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR,
1476 	    (void *)&sc->sc_statep.c)) == NULL) {
1477 		crit_exit();
1478 		return (ENOMEM);
1479 	}
1480 	sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp);
1481 	cp = sc->sc_statep.c;
1482 	cp->creatorid = creatorid;
1483 	if (ifname != NULL)
1484 		strlcpy(cp->ifname, ifname, IFNAMSIZ);
1485 
1486 	ret = (pfsync_sendout(sc));
1487 	crit_exit();
1488 	return (ret);
1489 }
1490 
1491 void
1492 pfsync_timeout(void *v)
1493 {
1494 	struct pfsync_softc *sc = v;
1495 
1496 	crit_enter();
1497 	pfsync_sendout(sc);
1498 	crit_exit();
1499 }
1500 
1501 void
1502 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status)
1503 {
1504 	struct pfsync_state_bus *bus;
1505 
1506 	if (sc->sc_mbuf != NULL)
1507 		pfsync_sendout(sc);
1508 
1509 	if (pfsync_sync_ok &&
1510 	    (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS,
1511 	    (void *)&sc->sc_statep.b)) != NULL) {
1512 		sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus);
1513 		bus = sc->sc_statep.b;
1514 		bus->creatorid = pf_status.hostid;
1515 		bus->status = status;
1516 		bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received);
1517 		pfsync_sendout(sc);
1518 	}
1519 }
1520 
1521 void
1522 pfsync_bulk_update(void *v)
1523 {
1524 	struct pfsync_softc *sc = v;
1525 	int i = 0;
1526 	struct pf_state *state;
1527 
1528 	ASSERT_LWKT_TOKEN_HELD(&pf_token);
1529 
1530 	crit_enter();
1531 	if (sc->sc_mbuf != NULL)
1532 		pfsync_sendout(sc);
1533 
1534 	/*
1535 	 * Grab at most PFSYNC_BULKPACKETS worth of states which have not
1536 	 * been sent since the latest request was made.
1537 	 */
1538 	state = sc->sc_bulk_send_next;
1539 	if (state)
1540 		do {
1541 			/* send state update if syncable and not already sent */
1542 			if (!state->sync_flags
1543 			    && state->timeout < PFTM_MAX
1544 			    && state->pfsync_time <= sc->sc_ureq_received) {
1545 				pfsync_pack_state(PFSYNC_ACT_UPD, state, 0);
1546 				i++;
1547 			}
1548 
1549 			/* figure next state to send */
1550 			state = TAILQ_NEXT(state, entry_list);
1551 
1552 			/* wrap to start of list if we hit the end */
1553 			if (!state)
1554 				state = TAILQ_FIRST(&state_list);
1555 		} while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS &&
1556 		    state != sc->sc_bulk_terminator);
1557 
1558 	if (!state || state == sc->sc_bulk_terminator) {
1559 		/* we're done */
1560 		pfsync_send_bus(sc, PFSYNC_BUS_END);
1561 		sc->sc_ureq_received = 0;
1562 		sc->sc_bulk_send_next = NULL;
1563 		sc->sc_bulk_terminator = NULL;
1564 		lwkt_reltoken(&pf_token);
1565 		callout_stop(&sc->sc_bulk_tmo);
1566 		lwkt_gettoken(&pf_token);
1567 		if (pf_status.debug >= PF_DEBUG_MISC)
1568 			kprintf("pfsync: bulk update complete\n");
1569 	} else {
1570 		/* look again for more in a bit */
1571 		lwkt_reltoken(&pf_token);
1572 		callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout,
1573 			    LIST_FIRST(&pfsync_list));
1574 		lwkt_gettoken(&pf_token);
1575 		sc->sc_bulk_send_next = state;
1576 	}
1577 	if (sc->sc_mbuf != NULL)
1578 		pfsync_sendout(sc);
1579 	crit_exit();
1580 }
1581 
1582 void
1583 pfsync_bulkfail(void *v)
1584 {
1585 	struct pfsync_softc *sc = v;
1586 	int error;
1587 
1588 	ASSERT_LWKT_TOKEN_HELD(&pf_token);
1589 
1590 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
1591 		/* Try again in a bit */
1592 		lwkt_reltoken(&pf_token);
1593 		callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail,
1594 		    LIST_FIRST(&pfsync_list));
1595 		lwkt_gettoken(&pf_token);
1596 		crit_enter();
1597 		error = pfsync_request_update(NULL, NULL);
1598 		if (error == ENOMEM) {
1599 			if (pf_status.debug >= PF_DEBUG_MISC)
1600 				kprintf("pfsync: cannot allocate mbufs for "
1601 				    "bulk update\n");
1602 		} else
1603 			pfsync_sendout(sc);
1604 		crit_exit();
1605 	} else {
1606 		/* Pretend like the transfer was ok */
1607 		sc->sc_ureq_sent = 0;
1608 		sc->sc_bulk_tries = 0;
1609 #if NCARP > 0
1610 		if (!pfsync_sync_ok)
1611 			carp_group_demote_adj(&sc->sc_if, -1);
1612 #endif
1613 		pfsync_sync_ok = 1;
1614 		if (pf_status.debug >= PF_DEBUG_MISC)
1615 			kprintf("pfsync: failed to receive "
1616 			    "bulk update status\n");
1617 		lwkt_reltoken(&pf_token);
1618 		callout_stop(&sc->sc_bulkfail_tmo);
1619 		lwkt_gettoken(&pf_token);
1620 	}
1621 }
1622 
1623 /* This must be called in splnet() */
1624 int
1625 pfsync_sendout(struct pfsync_softc *sc)
1626 {
1627 #if NBPFILTER > 0
1628 	struct ifnet *ifp = &sc->sc_if;
1629 #endif
1630 	struct mbuf *m;
1631 
1632 	ASSERT_LWKT_TOKEN_HELD(&pf_token);
1633 
1634 	lwkt_reltoken(&pf_token);
1635 	callout_stop(&sc->sc_tmo);
1636 	lwkt_gettoken(&pf_token);
1637 
1638 	if (sc->sc_mbuf == NULL)
1639 		return (0);
1640 	m = sc->sc_mbuf;
1641 	sc->sc_mbuf = NULL;
1642 	sc->sc_statep.s = NULL;
1643 
1644 #if NBPFILTER > 0
1645 	if (ifp->if_bpf) {
1646 		lwkt_reltoken(&pf_token);
1647 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1648 		lwkt_gettoken(&pf_token);
1649 	}
1650 #endif
1651 
1652 	if (sc->sc_mbuf_net) {
1653 		m_freem(m);
1654 		m = sc->sc_mbuf_net;
1655 		sc->sc_mbuf_net = NULL;
1656 		sc->sc_statep_net.s = NULL;
1657 	}
1658 
1659 	return pfsync_sendout_mbuf(sc, m);
1660 }
1661 
1662 int
1663 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m)
1664 {
1665 	struct sockaddr sa;
1666 	struct ip *ip;
1667 
1668 	if (sc->sc_sync_ifp ||
1669 	    sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) {
1670 		M_PREPEND(m, sizeof(struct ip), M_WAITOK);
1671 		if (m == NULL) {
1672 			pfsyncstats.pfsyncs_onomem++;
1673 			return (0);
1674 		}
1675 		ip = mtod(m, struct ip *);
1676 		ip->ip_v = IPVERSION;
1677 		ip->ip_hl = sizeof(*ip) >> 2;
1678 		ip->ip_tos = IPTOS_LOWDELAY;
1679 		ip->ip_len = htons(m->m_pkthdr.len);
1680 		ip->ip_id = htons(ip_randomid());
1681 		ip->ip_off = htons(IP_DF);
1682 		ip->ip_ttl = PFSYNC_DFLTTL;
1683 		ip->ip_p = IPPROTO_PFSYNC;
1684 		ip->ip_sum = 0;
1685 
1686 		bzero(&sa, sizeof(sa));
1687 		ip->ip_src.s_addr = INADDR_ANY;
1688 
1689 		if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP)
1690 			m->m_flags |= M_MCAST;
1691 		ip->ip_dst = sc->sc_sendaddr;
1692 		sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr;
1693 
1694 		pfsyncstats.pfsyncs_opackets++;
1695 
1696 		if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL))
1697 			pfsyncstats.pfsyncs_oerrors++;
1698 	} else
1699 		m_freem(m);
1700 
1701 	return (0);
1702 }
1703 
1704 static int
1705 pfsync_modevent(module_t mod, int type, void *data)
1706 {
1707 	int error = 0;
1708 
1709 	struct pfsync_softc	*pfs_if, *tmp;
1710 
1711 	lwkt_gettoken(&pf_token);
1712 
1713 	switch (type) {
1714 	case MOD_LOAD:
1715 		LIST_INIT(&pfsync_list);
1716 		lwkt_reltoken(&pf_token);
1717 		if_clone_attach(&pfsync_cloner);
1718 		lwkt_gettoken(&pf_token);
1719 		/* Override the function pointer for pf_ioctl.c */
1720 		break;
1721 
1722 	case MOD_UNLOAD:
1723 		lwkt_reltoken(&pf_token);
1724 		if_clone_detach(&pfsync_cloner);
1725 		lwkt_gettoken(&pf_token);
1726 		LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) {
1727 			pfsync_clone_destroy(&pfs_if->sc_if);
1728 		}
1729 		break;
1730 
1731 	default:
1732 		error = EINVAL;
1733 		break;
1734 	}
1735 
1736 	lwkt_reltoken(&pf_token);
1737 	return error;
1738 }
1739 
1740 static moduledata_t pfsync_mod = {
1741 	"pfsync",
1742 	pfsync_modevent,
1743 	0
1744 };
1745 
1746 #define PFSYNC_MODVER 44
1747 
1748 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY);
1749 MODULE_VERSION(pfsync, PFSYNC_MODVER);
1750 
1751 
1752 
1753