xref: /openbsd/sys/net/if_pfsync.c (revision 17df1aa7)
1 /*	$OpenBSD: if_pfsync.c,v 1.146 2010/05/12 08:11:11 claudio Exp $	*/
2 
3 /*
4  * Copyright (c) 2002 Michael Shalayeff
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22  * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26  * THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
31  *
32  * Permission to use, copy, modify, and distribute this software for any
33  * purpose with or without fee is hereby granted, provided that the above
34  * copyright notice and this permission notice appear in all copies.
35  *
36  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43  */
44 
45 #include <sys/param.h>
46 #include <sys/proc.h>
47 #include <sys/systm.h>
48 #include <sys/time.h>
49 #include <sys/mbuf.h>
50 #include <sys/socket.h>
51 #include <sys/ioctl.h>
52 #include <sys/timeout.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55 #include <sys/pool.h>
56 #include <sys/syslog.h>
57 
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/route.h>
61 #include <net/bpf.h>
62 #include <net/netisr.h>
63 #include <netinet/in.h>
64 #include <netinet/if_ether.h>
65 #include <netinet/tcp.h>
66 #include <netinet/tcp_seq.h>
67 
68 #ifdef	INET
69 #include <netinet/in_systm.h>
70 #include <netinet/in_var.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip_var.h>
73 #endif
74 
75 #ifdef INET6
76 #include <netinet6/nd6.h>
77 #endif /* INET6 */
78 
79 #include "carp.h"
80 #if NCARP > 0
81 #include <netinet/ip_carp.h>
82 #endif
83 
84 #define PF_DEBUGNAME	"pfsync: "
85 #include <net/pfvar.h>
86 #include <net/if_pfsync.h>
87 
88 #include "bpfilter.h"
89 #include "pfsync.h"
90 
91 #define PFSYNC_MINPKT ( \
92 	sizeof(struct ip) + \
93 	sizeof(struct pfsync_header))
94 
95 struct pfsync_pkt {
96 	struct ip *ip;
97 	struct in_addr src;
98 	u_int8_t flags;
99 };
100 
101 int	pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *,
102 	    struct pfsync_state_peer *);
103 
104 int	pfsync_in_clr(struct pfsync_pkt *, caddr_t, int, int);
105 int	pfsync_in_iack(struct pfsync_pkt *, caddr_t, int, int);
106 int	pfsync_in_upd_c(struct pfsync_pkt *, caddr_t, int, int);
107 int	pfsync_in_ureq(struct pfsync_pkt *, caddr_t, int, int);
108 int	pfsync_in_del(struct pfsync_pkt *, caddr_t, int, int);
109 int	pfsync_in_del_c(struct pfsync_pkt *, caddr_t, int, int);
110 int	pfsync_in_bus(struct pfsync_pkt *, caddr_t, int, int);
111 int	pfsync_in_tdb(struct pfsync_pkt *, caddr_t, int, int);
112 int	pfsync_in_ins(struct pfsync_pkt *, caddr_t, int, int);
113 int	pfsync_in_upd(struct pfsync_pkt *, caddr_t, int, int);
114 int	pfsync_in_eof(struct pfsync_pkt *, caddr_t, int, int);
115 
116 int	pfsync_in_error(struct pfsync_pkt *, caddr_t, int, int);
117 
118 struct {
119 	int	(*in)(struct pfsync_pkt *, caddr_t, int, int);
120 	size_t	len;
121 } pfsync_acts[] = {
122 	/* PFSYNC_ACT_CLR */
123 	{ pfsync_in_clr,	sizeof(struct pfsync_clr) },
124 	 /* PFSYNC_ACT_OINS */
125 	{ pfsync_in_error,	0 },
126 	/* PFSYNC_ACT_INS_ACK */
127 	{ pfsync_in_iack,	sizeof(struct pfsync_ins_ack) },
128 	/* PFSYNC_ACT_OUPD */
129 	{ pfsync_in_error,	0 },
130 	/* PFSYNC_ACT_UPD_C */
131 	{ pfsync_in_upd_c,	sizeof(struct pfsync_upd_c) },
132 	/* PFSYNC_ACT_UPD_REQ */
133 	{ pfsync_in_ureq,	sizeof(struct pfsync_upd_req) },
134 	/* PFSYNC_ACT_DEL */
135 	{ pfsync_in_del,	sizeof(struct pfsync_state) },
136 	/* PFSYNC_ACT_DEL_C */
137 	{ pfsync_in_del_c,	sizeof(struct pfsync_del_c) },
138 	/* PFSYNC_ACT_INS_F */
139 	{ pfsync_in_error,	0 },
140 	/* PFSYNC_ACT_DEL_F */
141 	{ pfsync_in_error,	0 },
142 	/* PFSYNC_ACT_BUS */
143 	{ pfsync_in_bus,	sizeof(struct pfsync_bus) },
144 	/* PFSYNC_ACT_TDB */
145 	{ pfsync_in_tdb,	sizeof(struct pfsync_tdb) },
146 	/* PFSYNC_ACT_EOF */
147 	{ pfsync_in_error,	0 },
148 	/* PFSYNC_ACT_INS */
149 	{ pfsync_in_ins,	sizeof(struct pfsync_state) },
150 	/* PFSYNC_ACT_UPD */
151 	{ pfsync_in_upd,	sizeof(struct pfsync_state) }
152 };
153 
154 struct pfsync_q {
155 	void		(*write)(struct pf_state *, void *);
156 	size_t		len;
157 	u_int8_t	action;
158 };
159 
160 /* we have one of these for every PFSYNC_S_ */
161 void	pfsync_out_state(struct pf_state *, void *);
162 void	pfsync_out_iack(struct pf_state *, void *);
163 void	pfsync_out_upd_c(struct pf_state *, void *);
164 void	pfsync_out_del(struct pf_state *, void *);
165 
166 struct pfsync_q pfsync_qs[] = {
167 	{ pfsync_out_iack,  sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
168 	{ pfsync_out_upd_c, sizeof(struct pfsync_upd_c),   PFSYNC_ACT_UPD_C },
169 	{ pfsync_out_del,   sizeof(struct pfsync_del_c),   PFSYNC_ACT_DEL_C },
170 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_INS },
171 	{ pfsync_out_state, sizeof(struct pfsync_state),   PFSYNC_ACT_UPD }
172 };
173 
174 void	pfsync_q_ins(struct pf_state *, int);
175 void	pfsync_q_del(struct pf_state *);
176 
177 struct pfsync_upd_req_item {
178 	TAILQ_ENTRY(pfsync_upd_req_item)	ur_entry;
179 	struct pfsync_upd_req			ur_msg;
180 };
181 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item);
182 
183 struct pfsync_deferral {
184 	TAILQ_ENTRY(pfsync_deferral)		 pd_entry;
185 	struct pf_state				*pd_st;
186 	struct mbuf				*pd_m;
187 	struct timeout				 pd_tmo;
188 };
189 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
190 
191 #define PFSYNC_PLSIZE	MAX(sizeof(struct pfsync_upd_req_item), \
192 			    sizeof(struct pfsync_deferral))
193 
194 void	pfsync_out_tdb(struct tdb *, void *);
195 
196 struct pfsync_softc {
197 	struct ifnet		 sc_if;
198 	struct ifnet		*sc_sync_if;
199 
200 	struct pool		 sc_pool;
201 
202 	struct ip_moptions	 sc_imo;
203 
204 	struct in_addr		 sc_sync_peer;
205 	u_int8_t		 sc_maxupdates;
206 
207 	struct ip		 sc_template;
208 
209 	struct pf_state_queue	 sc_qs[PFSYNC_S_COUNT];
210 	size_t			 sc_len;
211 
212 	struct pfsync_upd_reqs	 sc_upd_req_list;
213 
214 	int			 sc_defer;
215 	struct pfsync_deferrals	 sc_deferrals;
216 	u_int			 sc_deferred;
217 
218 	void			*sc_plus;
219 	size_t			 sc_pluslen;
220 
221 	u_int32_t		 sc_ureq_sent;
222 	int			 sc_bulk_tries;
223 	struct timeout		 sc_bulkfail_tmo;
224 
225 	u_int32_t		 sc_ureq_received;
226 	struct pf_state		*sc_bulk_next;
227 	struct pf_state		*sc_bulk_last;
228 	struct timeout		 sc_bulk_tmo;
229 
230 	TAILQ_HEAD(, tdb)	 sc_tdb_q;
231 
232 	struct timeout		 sc_tmo;
233 };
234 
235 struct pfsync_softc	*pfsyncif = NULL;
236 struct pfsyncstats	 pfsyncstats;
237 
238 void	pfsyncattach(int);
239 int	pfsync_clone_create(struct if_clone *, int);
240 int	pfsync_clone_destroy(struct ifnet *);
241 int	pfsync_alloc_scrub_memory(struct pfsync_state_peer *,
242 	    struct pf_state_peer *);
243 void	pfsync_update_net_tdb(struct pfsync_tdb *);
244 int	pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
245 	    struct rtentry *);
246 int	pfsyncioctl(struct ifnet *, u_long, caddr_t);
247 void	pfsyncstart(struct ifnet *);
248 
249 struct mbuf *pfsync_if_dequeue(struct ifnet *);
250 
251 void	pfsync_deferred(struct pf_state *, int);
252 void	pfsync_undefer(struct pfsync_deferral *, int);
253 void	pfsync_defer_tmo(void *);
254 
255 void	pfsync_request_full_update(struct pfsync_softc *);
256 void	pfsync_request_update(u_int32_t, u_int64_t);
257 void	pfsync_update_state_req(struct pf_state *);
258 
259 void	pfsync_drop(struct pfsync_softc *);
260 void	pfsync_sendout(void);
261 void	pfsync_send_plus(void *, size_t);
262 void	pfsync_timeout(void *);
263 void	pfsync_tdb_timeout(void *);
264 
265 void	pfsync_bulk_start(void);
266 void	pfsync_bulk_status(u_int8_t);
267 void	pfsync_bulk_update(void *);
268 void	pfsync_bulk_fail(void *);
269 
270 #define PFSYNC_MAX_BULKTRIES	12
271 int	pfsync_sync_ok;
272 
273 struct if_clone	pfsync_cloner =
274     IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
275 
276 void
277 pfsyncattach(int npfsync)
278 {
279 	if_clone_attach(&pfsync_cloner);
280 }
281 int
282 pfsync_clone_create(struct if_clone *ifc, int unit)
283 {
284 	struct pfsync_softc *sc;
285 	struct ifnet *ifp;
286 	int q;
287 
288 	if (unit != 0)
289 		return (EINVAL);
290 
291 	pfsync_sync_ok = 1;
292 
293 	sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO);
294 	if (sc == NULL)
295 		return (ENOMEM);
296 
297 	for (q = 0; q < PFSYNC_S_COUNT; q++)
298 		TAILQ_INIT(&sc->sc_qs[q]);
299 
300 	pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL);
301 	TAILQ_INIT(&sc->sc_upd_req_list);
302 	TAILQ_INIT(&sc->sc_deferrals);
303 	sc->sc_deferred = 0;
304 
305 	TAILQ_INIT(&sc->sc_tdb_q);
306 
307 	sc->sc_len = PFSYNC_MINPKT;
308 	sc->sc_maxupdates = 128;
309 
310 	sc->sc_imo.imo_membership = (struct in_multi **)malloc(
311 	    (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS,
312 	    M_WAITOK | M_ZERO);
313 	sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS;
314 
315 	ifp = &sc->sc_if;
316 	snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit);
317 	ifp->if_softc = sc;
318 	ifp->if_ioctl = pfsyncioctl;
319 	ifp->if_output = pfsyncoutput;
320 	ifp->if_start = pfsyncstart;
321 	ifp->if_type = IFT_PFSYNC;
322 	ifp->if_snd.ifq_maxlen = ifqmaxlen;
323 	ifp->if_hdrlen = sizeof(struct pfsync_header);
324 	ifp->if_mtu = 1500; /* XXX */
325 	ifp->if_hardmtu = MCLBYTES; /* XXX */
326 	timeout_set(&sc->sc_tmo, pfsync_timeout, sc);
327 	timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc);
328 	timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc);
329 
330 	if_attach(ifp);
331 	if_alloc_sadl(ifp);
332 
333 #if NCARP > 0
334 	if_addgroup(ifp, "carp");
335 #endif
336 
337 #if NBPFILTER > 0
338 	bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
339 #endif
340 
341 	pfsyncif = sc;
342 
343 	return (0);
344 }
345 
346 int
347 pfsync_clone_destroy(struct ifnet *ifp)
348 {
349 	struct pfsync_softc *sc = ifp->if_softc;
350 	int s;
351 
352 	timeout_del(&sc->sc_bulk_tmo);
353 	timeout_del(&sc->sc_tmo);
354 #if NCARP > 0
355 	if (!pfsync_sync_ok)
356 		carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy");
357 #endif
358 #if NBPFILTER > 0
359 	bpfdetach(ifp);
360 #endif
361 	if_detach(ifp);
362 
363 	pfsync_drop(sc);
364 
365 	s = splsoftnet();
366 	while (sc->sc_deferred > 0)
367 		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
368 	splx(s);
369 
370 	pool_destroy(&sc->sc_pool);
371 	free(sc->sc_imo.imo_membership, M_IPMOPTS);
372 	free(sc, M_DEVBUF);
373 
374 	pfsyncif = NULL;
375 
376 	return (0);
377 }
378 
379 struct mbuf *
380 pfsync_if_dequeue(struct ifnet *ifp)
381 {
382 	struct mbuf *m;
383 
384 	IF_DEQUEUE(&ifp->if_snd, m);
385 
386 	return (m);
387 }
388 
389 /*
390  * Start output on the pfsync interface.
391  */
392 void
393 pfsyncstart(struct ifnet *ifp)
394 {
395 	struct mbuf *m;
396 	int s;
397 
398 	s = splnet();
399 	while ((m = pfsync_if_dequeue(ifp)) != NULL) {
400 		IF_DROP(&ifp->if_snd);
401 		m_freem(m);
402 	}
403 	splx(s);
404 }
405 
406 int
407 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s,
408     struct pf_state_peer *d)
409 {
410 	if (s->scrub.scrub_flag && d->scrub == NULL) {
411 		d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO);
412 		if (d->scrub == NULL)
413 			return (ENOMEM);
414 	}
415 
416 	return (0);
417 }
418 
419 void
420 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st)
421 {
422 	bzero(sp, sizeof(struct pfsync_state));
423 
424 	/* copy from state key */
425 	sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
426 	sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
427 	sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
428 	sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
429 	sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
430 	sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
431 	sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
432 	sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
433 	sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
434 	sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
435 	sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
436 	sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
437 	sp->proto = st->key[PF_SK_WIRE]->proto;
438 	sp->af = st->key[PF_SK_WIRE]->af;
439 
440 	/* copy from state */
441 	strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
442 	bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr));
443 	sp->creation = htonl(time_second - st->creation);
444 	sp->expire = pf_state_expires(st);
445 	if (sp->expire <= time_second)
446 		sp->expire = htonl(0);
447 	else
448 		sp->expire = htonl(sp->expire - time_second);
449 
450 	sp->direction = st->direction;
451 	sp->log = st->log;
452 	sp->timeout = st->timeout;
453 	sp->state_flags = st->state_flags;
454 	if (!SLIST_EMPTY(&st->src_nodes))
455 		sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
456 
457 	bcopy(&st->id, &sp->id, sizeof(sp->id));
458 	sp->creatorid = st->creatorid;
459 	pf_state_peer_hton(&st->src, &sp->src);
460 	pf_state_peer_hton(&st->dst, &sp->dst);
461 
462 	if (st->rule.ptr == NULL)
463 		sp->rule = htonl(-1);
464 	else
465 		sp->rule = htonl(st->rule.ptr->nr);
466 	if (st->anchor.ptr == NULL)
467 		sp->anchor = htonl(-1);
468 	else
469 		sp->anchor = htonl(st->anchor.ptr->nr);
470 	sp->nat_rule = htonl(-1);	/* left for compat, nat_rule is gone */
471 
472 	pf_state_counter_hton(st->packets[0], sp->packets[0]);
473 	pf_state_counter_hton(st->packets[1], sp->packets[1]);
474 	pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
475 	pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
476 
477 	sp->max_mss = htons(st->max_mss);
478 	sp->min_ttl = st->min_ttl;
479 	sp->set_tos = st->set_tos;
480 }
481 
482 int
483 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags)
484 {
485 	struct pf_state	*st = NULL;
486 	struct pf_state_key *skw = NULL, *sks = NULL;
487 	struct pf_rule *r = NULL;
488 	struct pfi_kif	*kif;
489 	int pool_flags;
490 	int error;
491 
492 	if (sp->creatorid == 0) {
493 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
494 		    "invalid creator id: %08x", ntohl(sp->creatorid));
495 		return (EINVAL);
496 	}
497 
498 	if ((kif = pfi_kif_get(sp->ifname)) == NULL) {
499 		DPFPRINTF(LOG_NOTICE, "pfsync_state_import: "
500 		    "unknown interface: %s", sp->ifname);
501 		if (flags & PFSYNC_SI_IOCTL)
502 			return (EINVAL);
503 		return (0);	/* skip this state */
504 	}
505 
506 	/*
507 	 * If the ruleset checksums match or the state is coming from the ioctl,
508 	 * it's safe to associate the state with the rule of that number.
509 	 */
510 	if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
511 	    (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) <
512 	    pf_main_ruleset.rules.active.rcount)
513 		r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)];
514 	else
515 		r = &pf_default_rule;
516 
517 	if ((r->max_states && r->states_cur >= r->max_states))
518 		goto cleanup;
519 
520 	if (flags & PFSYNC_SI_IOCTL)
521 		pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
522 	else
523 		pool_flags = PR_LIMITFAIL | PR_ZERO;
524 
525 	if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
526 		goto cleanup;
527 
528 	if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
529 		goto cleanup;
530 
531 	if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
532 	    &sp->key[PF_SK_STACK].addr[0], sp->af) ||
533 	    PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
534 	    &sp->key[PF_SK_STACK].addr[1], sp->af) ||
535 	    sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
536 	    sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
537 	    sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
538 		if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
539 			goto cleanup;
540 	} else
541 		sks = skw;
542 
543 	/* allocate memory for scrub info */
544 	if (pfsync_alloc_scrub_memory(&sp->src, &st->src) ||
545 	    pfsync_alloc_scrub_memory(&sp->dst, &st->dst))
546 		goto cleanup;
547 
548 	/* copy to state key(s) */
549 	skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
550 	skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
551 	skw->port[0] = sp->key[PF_SK_WIRE].port[0];
552 	skw->port[1] = sp->key[PF_SK_WIRE].port[1];
553 	skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
554 	skw->proto = sp->proto;
555 	skw->af = sp->af;
556 	if (sks != skw) {
557 		sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
558 		sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
559 		sks->port[0] = sp->key[PF_SK_STACK].port[0];
560 		sks->port[1] = sp->key[PF_SK_STACK].port[1];
561 		sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
562 		sks->proto = sp->proto;
563 		sks->af = sp->af;
564 	}
565 	st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
566 	st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
567 
568 	/* copy to state */
569 	bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr));
570 	st->creation = time_second - ntohl(sp->creation);
571 	st->expire = time_second;
572 	if (sp->expire) {
573 		/* XXX No adaptive scaling. */
574 		st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire);
575 	}
576 
577 	st->expire = ntohl(sp->expire) + time_second;
578 	st->direction = sp->direction;
579 	st->log = sp->log;
580 	st->timeout = sp->timeout;
581 	st->state_flags = sp->state_flags;
582 	st->max_mss = ntohs(sp->max_mss);
583 	st->min_ttl = sp->min_ttl;
584 	st->set_tos = sp->set_tos;
585 
586 	bcopy(sp->id, &st->id, sizeof(st->id));
587 	st->creatorid = sp->creatorid;
588 	pf_state_peer_ntoh(&sp->src, &st->src);
589 	pf_state_peer_ntoh(&sp->dst, &st->dst);
590 
591 	st->rule.ptr = r;
592 	st->anchor.ptr = NULL;
593 	st->rt_kif = NULL;
594 
595 	st->pfsync_time = time_uptime;
596 	st->sync_state = PFSYNC_S_NONE;
597 
598 	/* XXX when we have anchors, use STATE_INC_COUNTERS */
599 	r->states_cur++;
600 	r->states_tot++;
601 
602 	if (!ISSET(flags, PFSYNC_SI_IOCTL))
603 		SET(st->state_flags, PFSTATE_NOSYNC);
604 
605 	if (pf_state_insert(kif, skw, sks, st) != 0) {
606 		/* XXX when we have anchors, use STATE_DEC_COUNTERS */
607 		r->states_cur--;
608 		error = EEXIST;
609 		goto cleanup_state;
610 	}
611 
612 	if (!ISSET(flags, PFSYNC_SI_IOCTL)) {
613 		CLR(st->state_flags, PFSTATE_NOSYNC);
614 		if (ISSET(st->state_flags, PFSTATE_ACK)) {
615 			pfsync_q_ins(st, PFSYNC_S_IACK);
616 			schednetisr(NETISR_PFSYNC);
617 		}
618 	}
619 	CLR(st->state_flags, PFSTATE_ACK);
620 
621 	return (0);
622 
623  cleanup:
624 	error = ENOMEM;
625 	if (skw == sks)
626 		sks = NULL;
627 	if (skw != NULL)
628 		pool_put(&pf_state_key_pl, skw);
629 	if (sks != NULL)
630 		pool_put(&pf_state_key_pl, sks);
631 
632  cleanup_state:	/* pf_state_insert frees the state keys */
633 	if (st) {
634 		if (st->dst.scrub)
635 			pool_put(&pf_state_scrub_pl, st->dst.scrub);
636 		if (st->src.scrub)
637 			pool_put(&pf_state_scrub_pl, st->src.scrub);
638 		pool_put(&pf_state_pl, st);
639 	}
640 	return (error);
641 }
642 
643 void
644 pfsync_input(struct mbuf *m, ...)
645 {
646 	struct pfsync_softc *sc = pfsyncif;
647 	struct pfsync_pkt pkt;
648 	struct ip *ip = mtod(m, struct ip *);
649 	struct mbuf *mp;
650 	struct pfsync_header *ph;
651 	struct pfsync_subheader subh;
652 
653 	int offset, offp, len, count, mlen;
654 
655 	pfsyncstats.pfsyncs_ipackets++;
656 
657 	/* verify that we have a sync interface configured */
658 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
659 	    sc->sc_sync_if == NULL || !pf_status.running)
660 		goto done;
661 
662 	/* verify that the packet came in on the right interface */
663 	if (sc->sc_sync_if != m->m_pkthdr.rcvif) {
664 		pfsyncstats.pfsyncs_badif++;
665 		goto done;
666 	}
667 
668 	sc->sc_if.if_ipackets++;
669 	sc->sc_if.if_ibytes += m->m_pkthdr.len;
670 
671 	/* verify that the IP TTL is 255. */
672 	if (ip->ip_ttl != PFSYNC_DFLTTL) {
673 		pfsyncstats.pfsyncs_badttl++;
674 		goto done;
675 	}
676 
677 	offset = ip->ip_hl << 2;
678 	if (m->m_pkthdr.len < offset + sizeof(*ph)) {
679 		pfsyncstats.pfsyncs_hdrops++;
680 		goto done;
681 	}
682 
683 	if (offset + sizeof(*ph) > m->m_len) {
684 		if (m_pullup(m, offset + sizeof(*ph)) == NULL) {
685 			pfsyncstats.pfsyncs_hdrops++;
686 			return;
687 		}
688 		ip = mtod(m, struct ip *);
689 	}
690 	ph = (struct pfsync_header *)((char *)ip + offset);
691 
692 	/* verify the version */
693 	if (ph->version != PFSYNC_VERSION) {
694 		pfsyncstats.pfsyncs_badver++;
695 		goto done;
696 	}
697 	len = ntohs(ph->len) + offset;
698 	if (m->m_pkthdr.len < len) {
699 		pfsyncstats.pfsyncs_badlen++;
700 		goto done;
701 	}
702 
703 	/* Cheaper to grab this now than having to mess with mbufs later */
704 	pkt.ip = ip;
705 	pkt.src = ip->ip_src;
706 	pkt.flags = 0;
707 
708 	if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH))
709 		pkt.flags |= PFSYNC_SI_CKSUM;
710 
711 	offset += sizeof(*ph);
712 	while (offset <= len - sizeof(subh)) {
713 		m_copydata(m, offset, sizeof(subh), (caddr_t)&subh);
714 		offset += sizeof(subh);
715 
716 		mlen = subh.len << 2;
717 		count = ntohs(subh.count);
718 
719 		if (subh.action >= PFSYNC_ACT_MAX ||
720 		    subh.action >= nitems(pfsync_acts) ||
721 		    mlen < pfsync_acts[subh.action].len) {
722 			/*
723 			 * subheaders are always followed by at least one
724 			 * message, so if the peer is new
725 			 * enough to tell us how big its messages are then we
726 			 * know enough to skip them.
727 			 */
728 			if (count > 0 && mlen > 0) {
729 				offset += count * mlen;
730 				continue;
731 			}
732 			pfsyncstats.pfsyncs_badact++;
733 			goto done;
734 		}
735 
736 		mp = m_pulldown(m, offset, mlen * count, &offp);
737 		if (mp == NULL) {
738 			pfsyncstats.pfsyncs_badlen++;
739 			return;
740 		}
741 
742 		if (pfsync_acts[subh.action].in(&pkt, mp->m_data + offp,
743 		    mlen, count) != 0)
744 			goto done;
745 
746 		offset += mlen * count;
747 	}
748 
749 done:
750 	m_freem(m);
751 }
752 
753 int
754 pfsync_in_clr(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
755 {
756 	struct pfsync_clr *clr;
757 	int i;
758 
759 	struct pf_state *st, *nexts;
760 	struct pf_state_key *sk, *nextsk;
761 	struct pf_state_item *si;
762 	u_int32_t creatorid;
763 	int s;
764 
765 	s = splsoftnet();
766 	for (i = 0; i < count; i++) {
767 		clr = (struct pfsync_clr *)buf + len * i;
768 		creatorid = clr->creatorid;
769 
770 		if (clr->ifname[0] == '\0') {
771 			for (st = RB_MIN(pf_state_tree_id, &tree_id);
772 			    st; st = nexts) {
773 				nexts = RB_NEXT(pf_state_tree_id, &tree_id, st);
774 				if (st->creatorid == creatorid) {
775 					SET(st->state_flags, PFSTATE_NOSYNC);
776 					pf_unlink_state(st);
777 				}
778 			}
779 		} else {
780 			if (pfi_kif_get(clr->ifname) == NULL)
781 				continue;
782 
783 			/* XXX correct? */
784 			for (sk = RB_MIN(pf_state_tree, &pf_statetbl);
785 			    sk; sk = nextsk) {
786 				nextsk = RB_NEXT(pf_state_tree,
787 				    &pf_statetbl, sk);
788 				TAILQ_FOREACH(si, &sk->states, entry) {
789 					if (si->s->creatorid == creatorid) {
790 						SET(si->s->state_flags,
791 						    PFSTATE_NOSYNC);
792 						pf_unlink_state(si->s);
793 					}
794 				}
795 			}
796 		}
797 	}
798 	splx(s);
799 
800 	return (0);
801 }
802 
803 int
804 pfsync_in_ins(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
805 {
806 	struct pfsync_state *sp;
807 	int i;
808 
809 	int s;
810 
811 	s = splsoftnet();
812 	for (i = 0; i < count; i++) {
813 		sp = (struct pfsync_state *)(buf + len * i);
814 
815 		/* check for invalid values */
816 		if (sp->timeout >= PFTM_MAX ||
817 		    sp->src.state > PF_TCPS_PROXY_DST ||
818 		    sp->dst.state > PF_TCPS_PROXY_DST ||
819 		    sp->direction > PF_OUT ||
820 		    (sp->af != AF_INET && sp->af != AF_INET6)) {
821 			DPFPRINTF(LOG_NOTICE,
822 			    "pfsync_input: PFSYNC5_ACT_INS: invalid value");
823 			pfsyncstats.pfsyncs_badval++;
824 			continue;
825 		}
826 
827 		if (pfsync_state_import(sp, pkt->flags) == ENOMEM) {
828 			/* drop out, but process the rest of the actions */
829 			break;
830 		}
831 	}
832 	splx(s);
833 
834 	return (0);
835 }
836 
837 int
838 pfsync_in_iack(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
839 {
840 	struct pfsync_ins_ack *ia;
841 	struct pf_state_cmp id_key;
842 	struct pf_state *st;
843 	int i;
844 	int s;
845 
846 	s = splsoftnet();
847 	for (i = 0; i < count; i++) {
848 		ia = (struct pfsync_ins_ack *)(buf + len * i);
849 
850 		bcopy(&ia->id, &id_key.id, sizeof(id_key.id));
851 		id_key.creatorid = ia->creatorid;
852 
853 		st = pf_find_state_byid(&id_key);
854 		if (st == NULL)
855 			continue;
856 
857 		if (ISSET(st->state_flags, PFSTATE_ACK))
858 			pfsync_deferred(st, 0);
859 	}
860 	splx(s);
861 
862 	return (0);
863 }
864 
865 int
866 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src,
867     struct pfsync_state_peer *dst)
868 {
869 	int sync = 0;
870 
871 	/*
872 	 * The state should never go backwards except
873 	 * for syn-proxy states.  Neither should the
874 	 * sequence window slide backwards.
875 	 */
876 	if ((st->src.state > src->state &&
877 	    (st->src.state < PF_TCPS_PROXY_SRC ||
878 	    src->state >= PF_TCPS_PROXY_SRC)) ||
879 
880 	    (st->src.state == src->state &&
881 	    SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
882 		sync++;
883 	else
884 		pf_state_peer_ntoh(src, &st->src);
885 
886 	if ((st->dst.state > dst->state) ||
887 
888 	    (st->dst.state >= TCPS_SYN_SENT &&
889 	    SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
890 		sync++;
891 	else
892 		pf_state_peer_ntoh(dst, &st->dst);
893 
894 	return (sync);
895 }
896 
897 int
898 pfsync_in_upd(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
899 {
900 	struct pfsync_state *sp;
901 	struct pf_state_cmp id_key;
902 	struct pf_state *st;
903 	int sync;
904 
905 	int i;
906 	int s;
907 
908 
909 	s = splsoftnet();
910 	for (i = 0; i < count; i++) {
911 		sp = (struct pfsync_state *)(buf + len * i);
912 
913 		/* check for invalid values */
914 		if (sp->timeout >= PFTM_MAX ||
915 		    sp->src.state > PF_TCPS_PROXY_DST ||
916 		    sp->dst.state > PF_TCPS_PROXY_DST) {
917 			DPFPRINTF(LOG_NOTICE,
918 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
919 			pfsyncstats.pfsyncs_badval++;
920 			continue;
921 		}
922 
923 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
924 		id_key.creatorid = sp->creatorid;
925 
926 		st = pf_find_state_byid(&id_key);
927 		if (st == NULL) {
928 			/* insert the update */
929 			if (pfsync_state_import(sp, 0))
930 				pfsyncstats.pfsyncs_badstate++;
931 			continue;
932 		}
933 
934 		if (ISSET(st->state_flags, PFSTATE_ACK))
935 			pfsync_deferred(st, 1);
936 
937 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
938 			DPFPRINTF(LOG_NOTICE,
939 			    "pfsync_input: PFSYNC_ACT_UPD: invalid value");
940 			pfsyncstats.pfsyncs_badval++;
941 			continue;
942 		}
943 
944 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
945 		id_key.creatorid = sp->creatorid;
946 
947 		st = pf_find_state_byid(&id_key);
948 		if (st == NULL) {
949 			/* insert the update */
950 			if (pfsync_state_import(sp, 0))
951 				pfsyncstats.pfsyncs_badstate++;
952 			continue;
953 		}
954 
955 		if (ISSET(st->state_flags, PFSTATE_ACK))
956 			pfsync_deferred(st, 1);
957 
958 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
959 			sync = pfsync_upd_tcp(st, &sp->src, &sp->dst);
960 		else {
961 			sync = 0;
962 
963 			/*
964 			 * Non-TCP protocol state machine always go
965 			 * forwards
966 			 */
967 			if (st->src.state > sp->src.state)
968 				sync++;
969 			else
970 				pf_state_peer_ntoh(&sp->src, &st->src);
971 
972 			if (st->dst.state > sp->dst.state)
973 				sync++;
974 			else
975 				pf_state_peer_ntoh(&sp->dst, &st->dst);
976 		}
977 
978 		if (sync < 2) {
979 			pfsync_alloc_scrub_memory(&sp->dst, &st->dst);
980 			pf_state_peer_ntoh(&sp->dst, &st->dst);
981 			st->expire = ntohl(sp->expire) + time_second;
982 			st->timeout = sp->timeout;
983 		}
984 		st->pfsync_time = time_uptime;
985 
986 		if (sync) {
987 			pfsyncstats.pfsyncs_stale++;
988 
989 			pfsync_update_state(st);
990 			schednetisr(NETISR_PFSYNC);
991 		}
992 	}
993 	splx(s);
994 
995 	return (0);
996 }
997 
998 int
999 pfsync_in_upd_c(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
1000 {
1001 	struct pfsync_upd_c *up;
1002 	struct pf_state_cmp id_key;
1003 	struct pf_state *st;
1004 
1005 	int sync;
1006 
1007 	int i;
1008 	int s;
1009 
1010 	s = splsoftnet();
1011 	for (i = 0; i < count; i++) {
1012 		up = (struct pfsync_upd_c *)(buf + len * i);
1013 
1014 		/* check for invalid values */
1015 		if (up->timeout >= PFTM_MAX ||
1016 		    up->src.state > PF_TCPS_PROXY_DST ||
1017 		    up->dst.state > PF_TCPS_PROXY_DST) {
1018 			DPFPRINTF(LOG_NOTICE,
1019 			    "pfsync_input: PFSYNC_ACT_UPD_C: invalid value");
1020 			pfsyncstats.pfsyncs_badval++;
1021 			continue;
1022 		}
1023 
1024 		bcopy(&up->id, &id_key.id, sizeof(id_key.id));
1025 		id_key.creatorid = up->creatorid;
1026 
1027 		st = pf_find_state_byid(&id_key);
1028 		if (st == NULL) {
1029 			/* We don't have this state. Ask for it. */
1030 			pfsync_request_update(id_key.creatorid, id_key.id);
1031 			continue;
1032 		}
1033 
1034 		if (ISSET(st->state_flags, PFSTATE_ACK))
1035 			pfsync_deferred(st, 1);
1036 
1037 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
1038 			sync = pfsync_upd_tcp(st, &up->src, &up->dst);
1039 		else {
1040 			sync = 0;
1041 			/*
1042 			 * Non-TCP protocol state machine always go
1043 			 * forwards
1044 			 */
1045 			if (st->src.state > up->src.state)
1046 				sync++;
1047 			else
1048 				pf_state_peer_ntoh(&up->src, &st->src);
1049 
1050 			if (st->dst.state > up->dst.state)
1051 				sync++;
1052 			else
1053 				pf_state_peer_ntoh(&up->dst, &st->dst);
1054 		}
1055 		if (sync < 2) {
1056 			pfsync_alloc_scrub_memory(&up->dst, &st->dst);
1057 			pf_state_peer_ntoh(&up->dst, &st->dst);
1058 			st->expire = ntohl(up->expire) + time_second;
1059 			st->timeout = up->timeout;
1060 		}
1061 		st->pfsync_time = time_uptime;
1062 
1063 		if (sync) {
1064 			pfsyncstats.pfsyncs_stale++;
1065 
1066 			pfsync_update_state(st);
1067 			schednetisr(NETISR_PFSYNC);
1068 		}
1069 	}
1070 	splx(s);
1071 
1072 	return (0);
1073 }
1074 
1075 int
1076 pfsync_in_ureq(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
1077 {
1078 	struct pfsync_upd_req *ur;
1079 	int i;
1080 
1081 	struct pf_state_cmp id_key;
1082 	struct pf_state *st;
1083 
1084 	for (i = 0; i < count; i++) {
1085 		ur = (struct pfsync_upd_req *)(buf + len * i);
1086 
1087 		bcopy(&ur->id, &id_key.id, sizeof(id_key.id));
1088 		id_key.creatorid = ur->creatorid;
1089 
1090 		if (id_key.id == 0 && id_key.creatorid == 0)
1091 			pfsync_bulk_start();
1092 		else {
1093 			st = pf_find_state_byid(&id_key);
1094 			if (st == NULL) {
1095 				pfsyncstats.pfsyncs_badstate++;
1096 				continue;
1097 			}
1098 			if (ISSET(st->state_flags, PFSTATE_NOSYNC))
1099 				continue;
1100 
1101 			pfsync_update_state_req(st);
1102 		}
1103 	}
1104 
1105 	return (0);
1106 }
1107 
1108 int
1109 pfsync_in_del(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
1110 {
1111 	struct pfsync_state *sp;
1112 	struct pf_state_cmp id_key;
1113 	struct pf_state *st;
1114 	int i;
1115 	int s;
1116 
1117 	s = splsoftnet();
1118 	for (i = 0; i < count; i++) {
1119 		sp = (struct pfsync_state *)(buf + len * i);
1120 
1121 		bcopy(sp->id, &id_key.id, sizeof(id_key.id));
1122 		id_key.creatorid = sp->creatorid;
1123 
1124 		st = pf_find_state_byid(&id_key);
1125 		if (st == NULL) {
1126 			pfsyncstats.pfsyncs_badstate++;
1127 			continue;
1128 		}
1129 		SET(st->state_flags, PFSTATE_NOSYNC);
1130 		pf_unlink_state(st);
1131 	}
1132 	splx(s);
1133 
1134 	return (0);
1135 }
1136 
1137 int
1138 pfsync_in_del_c(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
1139 {
1140 	struct pfsync_del_c *sp;
1141 	struct pf_state_cmp id_key;
1142 	struct pf_state *st;
1143 	int i;
1144 	int s;
1145 
1146 	s = splsoftnet();
1147 	for (i = 0; i < count; i++) {
1148 		sp = (struct pfsync_del_c *)(buf + len * i);
1149 
1150 		bcopy(&sp->id, &id_key.id, sizeof(id_key.id));
1151 		id_key.creatorid = sp->creatorid;
1152 
1153 		st = pf_find_state_byid(&id_key);
1154 		if (st == NULL) {
1155 			pfsyncstats.pfsyncs_badstate++;
1156 			continue;
1157 		}
1158 
1159 		SET(st->state_flags, PFSTATE_NOSYNC);
1160 		pf_unlink_state(st);
1161 	}
1162 	splx(s);
1163 
1164 	return (0);
1165 }
1166 
1167 int
1168 pfsync_in_bus(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
1169 {
1170 	struct pfsync_softc *sc = pfsyncif;
1171 	struct pfsync_bus *bus;
1172 
1173 	/* If we're not waiting for a bulk update, who cares. */
1174 	if (sc->sc_ureq_sent == 0)
1175 		return (0);
1176 
1177 	bus = (struct pfsync_bus *)buf;
1178 
1179 	switch (bus->status) {
1180 	case PFSYNC_BUS_START:
1181 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1182 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1183 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1184 		    sizeof(struct pfsync_state)));
1185 		DPFPRINTF(LOG_INFO, "received bulk update start");
1186 		break;
1187 
1188 	case PFSYNC_BUS_END:
1189 		if (time_uptime - ntohl(bus->endtime) >=
1190 		    sc->sc_ureq_sent) {
1191 			/* that's it, we're happy */
1192 			sc->sc_ureq_sent = 0;
1193 			sc->sc_bulk_tries = 0;
1194 			timeout_del(&sc->sc_bulkfail_tmo);
1195 #if NCARP > 0
1196 			if (!pfsync_sync_ok)
1197 				carp_group_demote_adj(&sc->sc_if, -1,
1198 				    "pfsync bulk done");
1199 #endif
1200 			pfsync_sync_ok = 1;
1201 			DPFPRINTF(LOG_INFO, "received valid bulk update end");
1202 		} else {
1203 			DPFPRINTF(LOG_WARNING, "received invalid "
1204 			    "bulk update end: bad timestamp");
1205 		}
1206 		break;
1207 	}
1208 
1209 	return (0);
1210 }
1211 
1212 int
1213 pfsync_in_tdb(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
1214 {
1215 #if defined(IPSEC)
1216 	struct pfsync_tdb *tp;
1217 	int i;
1218 	int s;
1219 
1220 	s = splsoftnet();
1221 	for (i = 0; i < count; i++)
1222 		tp = (struct pfsync_tdb *)(buf + len * i);
1223 		pfsync_update_net_tdb(tp);
1224 	splx(s);
1225 #endif
1226 
1227 	return (0);
1228 }
1229 
1230 #if defined(IPSEC)
1231 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
1232 void
1233 pfsync_update_net_tdb(struct pfsync_tdb *pt)
1234 {
1235 	struct tdb		*tdb;
1236 	int			 s;
1237 
1238 	/* check for invalid values */
1239 	if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
1240 	    (pt->dst.sa.sa_family != AF_INET &&
1241 	     pt->dst.sa.sa_family != AF_INET6))
1242 		goto bad;
1243 
1244 	s = spltdb();
1245 	tdb = gettdb(pt->spi, &pt->dst, pt->sproto);
1246 	if (tdb) {
1247 		pt->rpl = ntohl(pt->rpl);
1248 		pt->cur_bytes = betoh64(pt->cur_bytes);
1249 
1250 		/* Neither replay nor byte counter should ever decrease. */
1251 		if (pt->rpl < tdb->tdb_rpl ||
1252 		    pt->cur_bytes < tdb->tdb_cur_bytes) {
1253 			splx(s);
1254 			goto bad;
1255 		}
1256 
1257 		tdb->tdb_rpl = pt->rpl;
1258 		tdb->tdb_cur_bytes = pt->cur_bytes;
1259 	}
1260 	splx(s);
1261 	return;
1262 
1263  bad:
1264 	DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
1265 	    "invalid value");
1266 	pfsyncstats.pfsyncs_badstate++;
1267 	return;
1268 }
1269 #endif
1270 
1271 
1272 int
1273 pfsync_in_eof(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
1274 {
1275 	if (len > 0 || count > 0)
1276 		pfsyncstats.pfsyncs_badact++;
1277 
1278 	/* we're done. let the caller return */
1279 	return (1);
1280 }
1281 
1282 int
1283 pfsync_in_error(struct pfsync_pkt *pkt, caddr_t buf, int len, int count)
1284 {
1285 	pfsyncstats.pfsyncs_badact++;
1286 	return (-1);
1287 }
1288 
1289 int
1290 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
1291 	struct rtentry *rt)
1292 {
1293 	m_freem(m);
1294 	return (0);
1295 }
1296 
1297 /* ARGSUSED */
1298 int
1299 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1300 {
1301 	struct proc *p = curproc;
1302 	struct pfsync_softc *sc = ifp->if_softc;
1303 	struct ifreq *ifr = (struct ifreq *)data;
1304 	struct ip_moptions *imo = &sc->sc_imo;
1305 	struct pfsyncreq pfsyncr;
1306 	struct ifnet    *sifp;
1307 	struct ip *ip;
1308 	int s, error;
1309 
1310 	switch (cmd) {
1311 #if 0
1312 	case SIOCSIFADDR:
1313 	case SIOCAIFADDR:
1314 	case SIOCSIFDSTADDR:
1315 #endif
1316 	case SIOCSIFFLAGS:
1317 		s = splnet();
1318 		if (ifp->if_flags & IFF_UP) {
1319 			ifp->if_flags |= IFF_RUNNING;
1320 			pfsync_request_full_update(sc);
1321 		} else {
1322 			ifp->if_flags &= ~IFF_RUNNING;
1323 
1324 			/* drop everything */
1325 			timeout_del(&sc->sc_tmo);
1326 			pfsync_drop(sc);
1327 
1328 			/* cancel bulk update */
1329 			timeout_del(&sc->sc_bulk_tmo);
1330 			sc->sc_bulk_next = NULL;
1331 			sc->sc_bulk_last = NULL;
1332 		}
1333 		splx(s);
1334 		break;
1335 	case SIOCSIFMTU:
1336 		s = splnet();
1337 		if (ifr->ifr_mtu <= PFSYNC_MINPKT)
1338 			return (EINVAL);
1339 		if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */
1340 			ifr->ifr_mtu = MCLBYTES;
1341 		if (ifr->ifr_mtu < ifp->if_mtu)
1342 			pfsync_sendout();
1343 		ifp->if_mtu = ifr->ifr_mtu;
1344 		splx(s);
1345 		break;
1346 	case SIOCGETPFSYNC:
1347 		bzero(&pfsyncr, sizeof(pfsyncr));
1348 		if (sc->sc_sync_if) {
1349 			strlcpy(pfsyncr.pfsyncr_syncdev,
1350 			    sc->sc_sync_if->if_xname, IFNAMSIZ);
1351 		}
1352 		pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer;
1353 		pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
1354 		pfsyncr.pfsyncr_defer = sc->sc_defer;
1355 		return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
1356 
1357 	case SIOCSETPFSYNC:
1358 		if ((error = suser(p, p->p_acflag)) != 0)
1359 			return (error);
1360 		if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr))))
1361 			return (error);
1362 
1363 		s = splnet();
1364 
1365 		if (pfsyncr.pfsyncr_syncpeer.s_addr == 0)
1366 			sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP;
1367 		else
1368 			sc->sc_sync_peer.s_addr =
1369 			    pfsyncr.pfsyncr_syncpeer.s_addr;
1370 
1371 		if (pfsyncr.pfsyncr_maxupdates > 255) {
1372 			splx(s);
1373 			return (EINVAL);
1374 		}
1375 		sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
1376 
1377 		sc->sc_defer = pfsyncr.pfsyncr_defer;
1378 
1379 		if (pfsyncr.pfsyncr_syncdev[0] == 0) {
1380 			sc->sc_sync_if = NULL;
1381 			if (imo->imo_num_memberships > 0) {
1382 				in_delmulti(imo->imo_membership[
1383 				    --imo->imo_num_memberships]);
1384 				imo->imo_multicast_ifp = NULL;
1385 			}
1386 			splx(s);
1387 			break;
1388 		}
1389 
1390 		if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) {
1391 			splx(s);
1392 			return (EINVAL);
1393 		}
1394 
1395 		if (sifp->if_mtu < sc->sc_if.if_mtu ||
1396 		    (sc->sc_sync_if != NULL &&
1397 		    sifp->if_mtu < sc->sc_sync_if->if_mtu) ||
1398 		    sifp->if_mtu < MCLBYTES - sizeof(struct ip))
1399 			pfsync_sendout();
1400 		sc->sc_sync_if = sifp;
1401 
1402 		if (imo->imo_num_memberships > 0) {
1403 			in_delmulti(imo->imo_membership[--imo->imo_num_memberships]);
1404 			imo->imo_multicast_ifp = NULL;
1405 		}
1406 
1407 		if (sc->sc_sync_if &&
1408 		    sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) {
1409 			struct in_addr addr;
1410 
1411 			if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) {
1412 				sc->sc_sync_if = NULL;
1413 				splx(s);
1414 				return (EADDRNOTAVAIL);
1415 			}
1416 
1417 			addr.s_addr = INADDR_PFSYNC_GROUP;
1418 
1419 			if ((imo->imo_membership[0] =
1420 			    in_addmulti(&addr, sc->sc_sync_if)) == NULL) {
1421 				sc->sc_sync_if = NULL;
1422 				splx(s);
1423 				return (ENOBUFS);
1424 			}
1425 			imo->imo_num_memberships++;
1426 			imo->imo_multicast_ifp = sc->sc_sync_if;
1427 			imo->imo_multicast_ttl = PFSYNC_DFLTTL;
1428 			imo->imo_multicast_loop = 0;
1429 		}
1430 
1431 		ip = &sc->sc_template;
1432 		bzero(ip, sizeof(*ip));
1433 		ip->ip_v = IPVERSION;
1434 		ip->ip_hl = sizeof(sc->sc_template) >> 2;
1435 		ip->ip_tos = IPTOS_LOWDELAY;
1436 		/* len and id are set later */
1437 		ip->ip_off = htons(IP_DF);
1438 		ip->ip_ttl = PFSYNC_DFLTTL;
1439 		ip->ip_p = IPPROTO_PFSYNC;
1440 		ip->ip_src.s_addr = INADDR_ANY;
1441 		ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr;
1442 
1443 		pfsync_request_full_update(sc);
1444 		splx(s);
1445 
1446 		break;
1447 
1448 	default:
1449 		return (ENOTTY);
1450 	}
1451 
1452 	return (0);
1453 }
1454 
1455 void
1456 pfsync_out_state(struct pf_state *st, void *buf)
1457 {
1458 	struct pfsync_state *sp = buf;
1459 
1460 	pfsync_state_export(sp, st);
1461 }
1462 
1463 void
1464 pfsync_out_iack(struct pf_state *st, void *buf)
1465 {
1466 	struct pfsync_ins_ack *iack = buf;
1467 
1468 	iack->id = st->id;
1469 	iack->creatorid = st->creatorid;
1470 }
1471 
1472 void
1473 pfsync_out_upd_c(struct pf_state *st, void *buf)
1474 {
1475 	struct pfsync_upd_c *up = buf;
1476 
1477 	bzero(up, sizeof(*up));
1478 	up->id = st->id;
1479 	pf_state_peer_hton(&st->src, &up->src);
1480 	pf_state_peer_hton(&st->dst, &up->dst);
1481 	up->creatorid = st->creatorid;
1482 
1483 	up->expire = pf_state_expires(st);
1484 	if (up->expire <= time_second)
1485 		up->expire = htonl(0);
1486 	else
1487 		up->expire = htonl(up->expire - time_second);
1488 	up->timeout = st->timeout;
1489 }
1490 
1491 void
1492 pfsync_out_del(struct pf_state *st, void *buf)
1493 {
1494 	struct pfsync_del_c *dp = buf;
1495 
1496 	dp->id = st->id;
1497 	dp->creatorid = st->creatorid;
1498 
1499 	SET(st->state_flags, PFSTATE_NOSYNC);
1500 }
1501 
1502 void
1503 pfsync_drop(struct pfsync_softc *sc)
1504 {
1505 	struct pf_state *st;
1506 	struct pfsync_upd_req_item *ur;
1507 	struct tdb *t;
1508 	int q;
1509 
1510 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1511 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1512 			continue;
1513 
1514 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1515 #ifdef PFSYNC_DEBUG
1516 			KASSERT(st->sync_state == q);
1517 #endif
1518 			st->sync_state = PFSYNC_S_NONE;
1519 		}
1520 		TAILQ_INIT(&sc->sc_qs[q]);
1521 	}
1522 
1523 	while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1524 		TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1525 		pool_put(&sc->sc_pool, ur);
1526 	}
1527 
1528 	sc->sc_plus = NULL;
1529 
1530 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1531 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry)
1532 			CLR(t->tdb_flags, TDBF_PFSYNC);
1533 
1534 		TAILQ_INIT(&sc->sc_tdb_q);
1535 	}
1536 
1537 	sc->sc_len = PFSYNC_MINPKT;
1538 }
1539 
1540 void
1541 pfsync_sendout(void)
1542 {
1543 	struct pfsync_softc *sc = pfsyncif;
1544 #if NBPFILTER > 0
1545 	struct ifnet *ifp = &sc->sc_if;
1546 #endif
1547 	struct mbuf *m;
1548 	struct ip *ip;
1549 	struct pfsync_header *ph;
1550 	struct pfsync_subheader *subh;
1551 	struct pf_state *st;
1552 	struct pfsync_upd_req_item *ur;
1553 	struct tdb *t;
1554 
1555 	int offset;
1556 	int q, count = 0;
1557 
1558 	if (sc == NULL || sc->sc_len == PFSYNC_MINPKT)
1559 		return;
1560 
1561 	if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1562 #if NBPFILTER > 0
1563 	    (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) {
1564 #else
1565 	    sc->sc_sync_if == NULL) {
1566 #endif
1567 		pfsync_drop(sc);
1568 		return;
1569 	}
1570 
1571 	MGETHDR(m, M_DONTWAIT, MT_DATA);
1572 	if (m == NULL) {
1573 		sc->sc_if.if_oerrors++;
1574 		pfsyncstats.pfsyncs_onomem++;
1575 		pfsync_drop(sc);
1576 		return;
1577 	}
1578 
1579 	if (max_linkhdr + sc->sc_len > MHLEN) {
1580 		MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len);
1581 		if (!ISSET(m->m_flags, M_EXT)) {
1582 			m_free(m);
1583 			sc->sc_if.if_oerrors++;
1584 			pfsyncstats.pfsyncs_onomem++;
1585 			pfsync_drop(sc);
1586 			return;
1587 		}
1588 	}
1589 	m->m_data += max_linkhdr;
1590 	m->m_len = m->m_pkthdr.len = sc->sc_len;
1591 
1592 	/* build the ip header */
1593 	ip = (struct ip *)m->m_data;
1594 	bcopy(&sc->sc_template, ip, sizeof(*ip));
1595 	offset = sizeof(*ip);
1596 
1597 	ip->ip_len = htons(m->m_pkthdr.len);
1598 	ip->ip_id = htons(ip_randomid());
1599 
1600 	/* build the pfsync header */
1601 	ph = (struct pfsync_header *)(m->m_data + offset);
1602 	bzero(ph, sizeof(*ph));
1603 	offset += sizeof(*ph);
1604 
1605 	ph->version = PFSYNC_VERSION;
1606 	ph->len = htons(sc->sc_len - sizeof(*ip));
1607 	bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH);
1608 
1609 	if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) {
1610 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1611 		offset += sizeof(*subh);
1612 
1613 		count = 0;
1614 		while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) {
1615 			TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry);
1616 
1617 			bcopy(&ur->ur_msg, m->m_data + offset,
1618 			    sizeof(ur->ur_msg));
1619 			offset += sizeof(ur->ur_msg);
1620 
1621 			pool_put(&sc->sc_pool, ur);
1622 
1623 			count++;
1624 		}
1625 
1626 		bzero(subh, sizeof(*subh));
1627 		subh->len = sizeof(ur->ur_msg) >> 2;
1628 		subh->action = PFSYNC_ACT_UPD_REQ;
1629 		subh->count = htons(count);
1630 	}
1631 
1632 	/* has someone built a custom region for us to add? */
1633 	if (sc->sc_plus != NULL) {
1634 		bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen);
1635 		offset += sc->sc_pluslen;
1636 
1637 		sc->sc_plus = NULL;
1638 	}
1639 
1640 	if (!TAILQ_EMPTY(&sc->sc_tdb_q)) {
1641 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1642 		offset += sizeof(*subh);
1643 
1644 		count = 0;
1645 		TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) {
1646 			pfsync_out_tdb(t, m->m_data + offset);
1647 			offset += sizeof(struct pfsync_tdb);
1648 			CLR(t->tdb_flags, TDBF_PFSYNC);
1649 
1650 			count++;
1651 		}
1652 		TAILQ_INIT(&sc->sc_tdb_q);
1653 
1654 		bzero(subh, sizeof(*subh));
1655 		subh->action = PFSYNC_ACT_TDB;
1656 		subh->len = sizeof(struct pfsync_tdb) >> 2;
1657 		subh->count = htons(count);
1658 	}
1659 
1660 	/* walk the queues */
1661 	for (q = 0; q < PFSYNC_S_COUNT; q++) {
1662 		if (TAILQ_EMPTY(&sc->sc_qs[q]))
1663 			continue;
1664 
1665 		subh = (struct pfsync_subheader *)(m->m_data + offset);
1666 		offset += sizeof(*subh);
1667 
1668 		count = 0;
1669 		TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) {
1670 #ifdef PFSYNC_DEBUG
1671 			KASSERT(st->sync_state == q);
1672 #endif
1673 			pfsync_qs[q].write(st, m->m_data + offset);
1674 			offset += pfsync_qs[q].len;
1675 
1676 			st->sync_state = PFSYNC_S_NONE;
1677 			count++;
1678 		}
1679 		TAILQ_INIT(&sc->sc_qs[q]);
1680 
1681 		bzero(subh, sizeof(*subh));
1682 		subh->action = pfsync_qs[q].action;
1683 		subh->len = pfsync_qs[q].len >> 2;
1684 		subh->count = htons(count);
1685 	}
1686 
1687 	/* we're done, let's put it on the wire */
1688 #if NBPFILTER > 0
1689 	if (ifp->if_bpf) {
1690 		m->m_data += sizeof(*ip);
1691 		m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip);
1692 		bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);
1693 		m->m_data -= sizeof(*ip);
1694 		m->m_len = m->m_pkthdr.len = sc->sc_len;
1695 	}
1696 
1697 	if (sc->sc_sync_if == NULL) {
1698 		sc->sc_len = PFSYNC_MINPKT;
1699 		m_freem(m);
1700 		return;
1701 	}
1702 #endif
1703 
1704 	sc->sc_if.if_opackets++;
1705 	sc->sc_if.if_obytes += m->m_pkthdr.len;
1706 
1707 	if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0)
1708 		pfsyncstats.pfsyncs_opackets++;
1709 	else
1710 		pfsyncstats.pfsyncs_oerrors++;
1711 
1712 	/* start again */
1713 	sc->sc_len = PFSYNC_MINPKT;
1714 }
1715 
1716 void
1717 pfsync_insert_state(struct pf_state *st)
1718 {
1719 	struct pfsync_softc *sc = pfsyncif;
1720 
1721 	splsoftassert(IPL_SOFTNET);
1722 
1723 	if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) ||
1724 	    st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) {
1725 		SET(st->state_flags, PFSTATE_NOSYNC);
1726 		return;
1727 	}
1728 
1729 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) ||
1730 	    ISSET(st->state_flags, PFSTATE_NOSYNC))
1731 		return;
1732 
1733 #ifdef PFSYNC_DEBUG
1734 	KASSERT(st->sync_state == PFSYNC_S_NONE);
1735 #endif
1736 
1737 	if (sc->sc_len == PFSYNC_MINPKT)
1738 		timeout_add_sec(&sc->sc_tmo, 1);
1739 
1740 	pfsync_q_ins(st, PFSYNC_S_INS);
1741 
1742 	st->sync_updates = 0;
1743 }
1744 
1745 int defer = 10;
1746 
1747 int
1748 pfsync_defer(struct pf_state *st, struct mbuf *m)
1749 {
1750 	struct pfsync_softc *sc = pfsyncif;
1751 	struct pfsync_deferral *pd;
1752 
1753 	splsoftassert(IPL_SOFTNET);
1754 
1755 	if (!sc->sc_defer || m->m_flags & (M_BCAST|M_MCAST))
1756 		return (0);
1757 
1758 	if (sc->sc_deferred >= 128)
1759 		pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0);
1760 
1761 	pd = pool_get(&sc->sc_pool, M_NOWAIT);
1762 	if (pd == NULL)
1763 		return (0);
1764 
1765 	m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1766 	SET(st->state_flags, PFSTATE_ACK);
1767 
1768 	pd->pd_st = st;
1769 	pd->pd_m = m;
1770 
1771 	sc->sc_deferred++;
1772 	TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry);
1773 
1774 	timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd);
1775 	timeout_add(&pd->pd_tmo, defer);
1776 
1777 	schednetisr(NETISR_PFSYNC);
1778 
1779 	return (1);
1780 }
1781 
1782 void
1783 pfsync_undefer(struct pfsync_deferral *pd, int drop)
1784 {
1785 	struct pfsync_softc *sc = pfsyncif;
1786 
1787 	splsoftassert(IPL_SOFTNET);
1788 
1789 	timeout_del(&pd->pd_tmo); /* bah */
1790 	TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry);
1791 	sc->sc_deferred--;
1792 
1793 	CLR(pd->pd_st->state_flags, PFSTATE_ACK);
1794 	if (drop)
1795 		m_freem(pd->pd_m);
1796 	else {
1797 		ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0,
1798 		    (void *)NULL, (void *)NULL);
1799 	}
1800 
1801 	pool_put(&sc->sc_pool, pd);
1802 }
1803 
1804 void
1805 pfsync_defer_tmo(void *arg)
1806 {
1807 	int s;
1808 
1809 	s = splsoftnet();
1810 	pfsync_undefer(arg, 0);
1811 	splx(s);
1812 }
1813 
1814 void
1815 pfsync_deferred(struct pf_state *st, int drop)
1816 {
1817 	struct pfsync_softc *sc = pfsyncif;
1818 	struct pfsync_deferral *pd;
1819 
1820 	splsoftassert(IPL_SOFTNET);
1821 
1822 	TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) {
1823 		 if (pd->pd_st == st) {
1824 			pfsync_undefer(pd, drop);
1825 			return;
1826 		}
1827 	}
1828 
1829 	panic("pfsync_deferred: unable to find deferred state");
1830 }
1831 
1832 void
1833 pfsync_update_state(struct pf_state *st)
1834 {
1835 	struct pfsync_softc *sc = pfsyncif;
1836 	int sync = 0;
1837 
1838 	splsoftassert(IPL_SOFTNET);
1839 
1840 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1841 		return;
1842 
1843 	if (ISSET(st->state_flags, PFSTATE_ACK))
1844 		pfsync_deferred(st, 0);
1845 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1846 		if (st->sync_state != PFSYNC_S_NONE)
1847 			pfsync_q_del(st);
1848 		return;
1849 	}
1850 
1851 	if (sc->sc_len == PFSYNC_MINPKT)
1852 		timeout_add_sec(&sc->sc_tmo, 1);
1853 
1854 	switch (st->sync_state) {
1855 	case PFSYNC_S_UPD_C:
1856 	case PFSYNC_S_UPD:
1857 	case PFSYNC_S_INS:
1858 		/* we're already handling it */
1859 
1860 		if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1861 			st->sync_updates++;
1862 			if (st->sync_updates >= sc->sc_maxupdates)
1863 				sync = 1;
1864 		}
1865 		break;
1866 
1867 	case PFSYNC_S_IACK:
1868 		pfsync_q_del(st);
1869 	case PFSYNC_S_NONE:
1870 		pfsync_q_ins(st, PFSYNC_S_UPD_C);
1871 		st->sync_updates = 0;
1872 		break;
1873 
1874 	default:
1875 		panic("pfsync_update_state: unexpected sync state %d",
1876 		    st->sync_state);
1877 	}
1878 
1879 	if (sync || (time_uptime - st->pfsync_time) < 2)
1880 		schednetisr(NETISR_PFSYNC);
1881 }
1882 
1883 void
1884 pfsync_request_full_update(struct pfsync_softc *sc)
1885 {
1886 	if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) {
1887 		/* Request a full state table update. */
1888 		sc->sc_ureq_sent = time_uptime;
1889 #if NCARP > 0
1890 		if (pfsync_sync_ok)
1891 			carp_group_demote_adj(&sc->sc_if, 1,
1892 			    "pfsync bulk start");
1893 #endif
1894 		pfsync_sync_ok = 0;
1895 		DPFPRINTF(LOG_INFO, "requesting bulk update");
1896 		timeout_add(&sc->sc_bulkfail_tmo, 4 * hz +
1897 		    pf_pool_limits[PF_LIMIT_STATES].limit /
1898 		    ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
1899 		    sizeof(struct pfsync_state)));
1900 		pfsync_request_update(0, 0);
1901 	}
1902 }
1903 
1904 void
1905 pfsync_request_update(u_int32_t creatorid, u_int64_t id)
1906 {
1907 	struct pfsync_softc *sc = pfsyncif;
1908 	struct pfsync_upd_req_item *item;
1909 	size_t nlen = sizeof(struct pfsync_upd_req);
1910 
1911 	/*
1912 	 * this code does nothing to prevent multiple update requests for the
1913 	 * same state being generated.
1914 	 */
1915 
1916 	item = pool_get(&sc->sc_pool, PR_NOWAIT);
1917 	if (item == NULL) {
1918 		/* XXX stats */
1919 		return;
1920 	}
1921 
1922 	item->ur_msg.id = id;
1923 	item->ur_msg.creatorid = creatorid;
1924 
1925 	if (TAILQ_EMPTY(&sc->sc_upd_req_list))
1926 		nlen += sizeof(struct pfsync_subheader);
1927 
1928 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
1929 		pfsync_sendout();
1930 
1931 		nlen = sizeof(struct pfsync_subheader) +
1932 		    sizeof(struct pfsync_upd_req);
1933 	}
1934 
1935 	TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry);
1936 	sc->sc_len += nlen;
1937 
1938 	schednetisr(NETISR_PFSYNC);
1939 }
1940 
1941 void
1942 pfsync_update_state_req(struct pf_state *st)
1943 {
1944 	struct pfsync_softc *sc = pfsyncif;
1945 
1946 	if (sc == NULL)
1947 		panic("pfsync_update_state_req: nonexistant instance");
1948 
1949 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1950 		if (st->sync_state != PFSYNC_S_NONE)
1951 			pfsync_q_del(st);
1952 		return;
1953 	}
1954 
1955 	switch (st->sync_state) {
1956 	case PFSYNC_S_UPD_C:
1957 	case PFSYNC_S_IACK:
1958 		pfsync_q_del(st);
1959 	case PFSYNC_S_NONE:
1960 		pfsync_q_ins(st, PFSYNC_S_UPD);
1961 		schednetisr(NETISR_PFSYNC);
1962 		return;
1963 
1964 	case PFSYNC_S_INS:
1965 	case PFSYNC_S_UPD:
1966 	case PFSYNC_S_DEL:
1967 		/* we're already handling it */
1968 		return;
1969 
1970 	default:
1971 		panic("pfsync_update_state_req: unexpected sync state %d",
1972 		    st->sync_state);
1973 	}
1974 }
1975 
1976 void
1977 pfsync_delete_state(struct pf_state *st)
1978 {
1979 	struct pfsync_softc *sc = pfsyncif;
1980 
1981 	splsoftassert(IPL_SOFTNET);
1982 
1983 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
1984 		return;
1985 
1986 	if (ISSET(st->state_flags, PFSTATE_ACK))
1987 		pfsync_deferred(st, 1);
1988 	if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1989 		if (st->sync_state != PFSYNC_S_NONE)
1990 			pfsync_q_del(st);
1991 		return;
1992 	}
1993 
1994 	if (sc->sc_len == PFSYNC_MINPKT)
1995 		timeout_add_sec(&sc->sc_tmo, 1);
1996 
1997 	switch (st->sync_state) {
1998 	case PFSYNC_S_INS:
1999 		/* we never got to tell the world so just forget about it */
2000 		pfsync_q_del(st);
2001 		return;
2002 
2003 	case PFSYNC_S_UPD_C:
2004 	case PFSYNC_S_UPD:
2005 	case PFSYNC_S_IACK:
2006 		pfsync_q_del(st);
2007 		/* FALLTHROUGH to putting it on the del list */
2008 
2009 	case PFSYNC_S_NONE:
2010 		pfsync_q_ins(st, PFSYNC_S_DEL);
2011 		return;
2012 
2013 	default:
2014 		panic("pfsync_delete_state: unexpected sync state %d",
2015 		    st->sync_state);
2016 	}
2017 }
2018 
2019 void
2020 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
2021 {
2022 	struct pfsync_softc *sc = pfsyncif;
2023 	struct {
2024 		struct pfsync_subheader subh;
2025 		struct pfsync_clr clr;
2026 	} __packed r;
2027 
2028 	splsoftassert(IPL_SOFTNET);
2029 
2030 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2031 		return;
2032 
2033 	bzero(&r, sizeof(r));
2034 
2035 	r.subh.action = PFSYNC_ACT_CLR;
2036 	r.subh.len = sizeof(struct pfsync_clr) >> 2;
2037 	r.subh.count = htons(1);
2038 
2039 	strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname));
2040 	r.clr.creatorid = creatorid;
2041 
2042 	pfsync_send_plus(&r, sizeof(r));
2043 }
2044 
2045 void
2046 pfsync_q_ins(struct pf_state *st, int q)
2047 {
2048 	struct pfsync_softc *sc = pfsyncif;
2049 	size_t nlen = pfsync_qs[q].len;
2050 
2051 	KASSERT(st->sync_state == PFSYNC_S_NONE);
2052 
2053 #if 1 || defined(PFSYNC_DEBUG)
2054 	if (sc->sc_len < PFSYNC_MINPKT)
2055 		panic("pfsync pkt len is too low %d", sc->sc_len);
2056 #endif
2057 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2058 		nlen += sizeof(struct pfsync_subheader);
2059 
2060 	if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2061 		pfsync_sendout();
2062 
2063 		nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
2064 	}
2065 
2066 	sc->sc_len += nlen;
2067 	TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list);
2068 	st->sync_state = q;
2069 }
2070 
2071 void
2072 pfsync_q_del(struct pf_state *st)
2073 {
2074 	struct pfsync_softc *sc = pfsyncif;
2075 	int q = st->sync_state;
2076 
2077 	KASSERT(st->sync_state != PFSYNC_S_NONE);
2078 
2079 	sc->sc_len -= pfsync_qs[q].len;
2080 	TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list);
2081 	st->sync_state = PFSYNC_S_NONE;
2082 
2083 	if (TAILQ_EMPTY(&sc->sc_qs[q]))
2084 		sc->sc_len -= sizeof(struct pfsync_subheader);
2085 }
2086 
2087 void
2088 pfsync_update_tdb(struct tdb *t, int output)
2089 {
2090 	struct pfsync_softc *sc = pfsyncif;
2091 	size_t nlen = sizeof(struct pfsync_tdb);
2092 
2093 	if (sc == NULL)
2094 		return;
2095 
2096 	if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) {
2097 		if (TAILQ_EMPTY(&sc->sc_tdb_q))
2098 			nlen += sizeof(struct pfsync_subheader);
2099 
2100 		if (sc->sc_len + nlen > sc->sc_if.if_mtu) {
2101 			pfsync_sendout();
2102 
2103 			nlen = sizeof(struct pfsync_subheader) +
2104 			    sizeof(struct pfsync_tdb);
2105 		}
2106 
2107 		sc->sc_len += nlen;
2108 		TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry);
2109 		SET(t->tdb_flags, TDBF_PFSYNC);
2110 		t->tdb_updates = 0;
2111 	} else {
2112 		if (++t->tdb_updates >= sc->sc_maxupdates)
2113 			schednetisr(NETISR_PFSYNC);
2114 	}
2115 
2116 	if (output)
2117 		SET(t->tdb_flags, TDBF_PFSYNC_RPL);
2118 	else
2119 		CLR(t->tdb_flags, TDBF_PFSYNC_RPL);
2120 }
2121 
2122 void
2123 pfsync_delete_tdb(struct tdb *t)
2124 {
2125 	struct pfsync_softc *sc = pfsyncif;
2126 
2127 	if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC))
2128 		return;
2129 
2130 	sc->sc_len -= sizeof(struct pfsync_tdb);
2131 	TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry);
2132 	CLR(t->tdb_flags, TDBF_PFSYNC);
2133 
2134 	if (TAILQ_EMPTY(&sc->sc_tdb_q))
2135 		sc->sc_len -= sizeof(struct pfsync_subheader);
2136 }
2137 
2138 void
2139 pfsync_out_tdb(struct tdb *t, void *buf)
2140 {
2141 	struct pfsync_tdb *ut = buf;
2142 
2143 	bzero(ut, sizeof(*ut));
2144 	ut->spi = t->tdb_spi;
2145 	bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst));
2146 	/*
2147 	 * When a failover happens, the master's rpl is probably above
2148 	 * what we see here (we may be up to a second late), so
2149 	 * increase it a bit for outbound tdbs to manage most such
2150 	 * situations.
2151 	 *
2152 	 * For now, just add an offset that is likely to be larger
2153 	 * than the number of packets we can see in one second. The RFC
2154 	 * just says the next packet must have a higher seq value.
2155 	 *
2156 	 * XXX What is a good algorithm for this? We could use
2157 	 * a rate-determined increase, but to know it, we would have
2158 	 * to extend struct tdb.
2159 	 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2160 	 * will soon be replaced anyway. For now, just don't handle
2161 	 * this edge case.
2162 	 */
2163 #define RPL_INCR 16384
2164 	ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ?
2165 	    RPL_INCR : 0));
2166 	ut->cur_bytes = htobe64(t->tdb_cur_bytes);
2167 	ut->sproto = t->tdb_sproto;
2168 }
2169 
2170 void
2171 pfsync_bulk_start(void)
2172 {
2173 	struct pfsync_softc *sc = pfsyncif;
2174 
2175 	sc->sc_ureq_received = time_uptime;
2176 
2177 	if (sc->sc_bulk_next == NULL)
2178 		sc->sc_bulk_next = TAILQ_FIRST(&state_list);
2179 	sc->sc_bulk_last = sc->sc_bulk_next;
2180 
2181 	DPFPRINTF(LOG_INFO, "received bulk update request");
2182 
2183 	pfsync_bulk_status(PFSYNC_BUS_START);
2184 	timeout_add(&sc->sc_bulk_tmo, 0);
2185 }
2186 
2187 void
2188 pfsync_bulk_update(void *arg)
2189 {
2190 	struct pfsync_softc *sc = arg;
2191 	struct pf_state *st;
2192 	int i = 0;
2193 	int s;
2194 
2195 	s = splsoftnet();
2196 
2197 	st = sc->sc_bulk_next;
2198 
2199 	while (st != sc->sc_bulk_last) {
2200 		if (st->sync_state == PFSYNC_S_NONE &&
2201 		    st->timeout < PFTM_MAX &&
2202 		    st->pfsync_time <= sc->sc_ureq_received) {
2203 			pfsync_update_state_req(st);
2204 			i++;
2205 		}
2206 
2207 		st = TAILQ_NEXT(st, entry_list);
2208 		if (st == NULL)
2209 			st = TAILQ_FIRST(&state_list);
2210 
2211 		if (i > 0 && TAILQ_EMPTY(&sc->sc_qs[PFSYNC_S_UPD])) {
2212 			sc->sc_bulk_next = st;
2213 			timeout_add(&sc->sc_bulk_tmo, 1);
2214 			goto out;
2215 		}
2216 	}
2217 
2218 	/* we're done */
2219 	sc->sc_bulk_next = NULL;
2220 	sc->sc_bulk_last = NULL;
2221 	pfsync_bulk_status(PFSYNC_BUS_END);
2222 
2223 out:
2224 	splx(s);
2225 }
2226 
2227 void
2228 pfsync_bulk_status(u_int8_t status)
2229 {
2230 	struct {
2231 		struct pfsync_subheader subh;
2232 		struct pfsync_bus bus;
2233 	} __packed r;
2234 
2235 	struct pfsync_softc *sc = pfsyncif;
2236 
2237 	bzero(&r, sizeof(r));
2238 
2239 	r.subh.action = PFSYNC_ACT_BUS;
2240 	r.subh.len = sizeof(struct pfsync_bus) >> 2;
2241 	r.subh.count = htons(1);
2242 
2243 	r.bus.creatorid = pf_status.hostid;
2244 	r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received);
2245 	r.bus.status = status;
2246 
2247 	pfsync_send_plus(&r, sizeof(r));
2248 }
2249 
2250 void
2251 pfsync_bulk_fail(void *arg)
2252 {
2253 	struct pfsync_softc *sc = arg;
2254 
2255 	if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) {
2256 		/* Try again */
2257 		timeout_add_sec(&sc->sc_bulkfail_tmo, 5);
2258 		pfsync_request_update(0, 0);
2259 	} else {
2260 		/* Pretend like the transfer was ok */
2261 		sc->sc_ureq_sent = 0;
2262 		sc->sc_bulk_tries = 0;
2263 #if NCARP > 0
2264 		if (!pfsync_sync_ok)
2265 			carp_group_demote_adj(&sc->sc_if, -1,
2266 			    "pfsync bulk fail");
2267 #endif
2268 		pfsync_sync_ok = 1;
2269 		DPFPRINTF(LOG_ERR, "failed to receive bulk update");
2270 	}
2271 }
2272 
2273 void
2274 pfsync_send_plus(void *plus, size_t pluslen)
2275 {
2276 	struct pfsync_softc *sc = pfsyncif;
2277 
2278 	if (sc->sc_len + pluslen > sc->sc_if.if_mtu)
2279 		pfsync_sendout();
2280 
2281 	sc->sc_plus = plus;
2282 	sc->sc_len += (sc->sc_pluslen = pluslen);
2283 
2284 	pfsync_sendout();
2285 }
2286 
2287 int
2288 pfsync_up(void)
2289 {
2290 	struct pfsync_softc *sc = pfsyncif;
2291 
2292 	if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING))
2293 		return (0);
2294 
2295 	return (1);
2296 }
2297 
2298 int
2299 pfsync_state_in_use(struct pf_state *st)
2300 {
2301 	struct pfsync_softc *sc = pfsyncif;
2302 
2303 	if (sc == NULL)
2304 		return (0);
2305 
2306 	if (st->sync_state != PFSYNC_S_NONE)
2307 		return (1);
2308 
2309 	if (sc->sc_bulk_next == NULL && sc->sc_bulk_last == NULL)
2310 		return (0);
2311 
2312 	return (1);
2313 }
2314 
2315 void
2316 pfsync_timeout(void *arg)
2317 {
2318 	int s;
2319 
2320 	s = splsoftnet();
2321 	pfsync_sendout();
2322 	splx(s);
2323 }
2324 
2325 /* this is a softnet/netisr handler */
2326 void
2327 pfsyncintr(void)
2328 {
2329 	pfsync_sendout();
2330 }
2331 
2332 int
2333 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
2334     size_t newlen)
2335 {
2336 	/* All sysctl names at this level are terminal. */
2337 	if (namelen != 1)
2338 		return (ENOTDIR);
2339 
2340 	switch (name[0]) {
2341 	case PFSYNCCTL_STATS:
2342 		if (newp != NULL)
2343 			return (EPERM);
2344 		return (sysctl_struct(oldp, oldlenp, newp, newlen,
2345 		    &pfsyncstats, sizeof(pfsyncstats)));
2346 	default:
2347 		return (ENOPROTOOPT);
2348 	}
2349 }
2350