1 /* $OpenBSD: if_pfsync.c,v 1.326 2024/05/24 06:38:41 sashan Exp $ */
2
3 /*
4 * Copyright (c) 2002 Michael Shalayeff
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * Copyright (c) 2009, 2022, 2023 David Gwynne <dlg@openbsd.org>
31 *
32 * Permission to use, copy, modify, and distribute this software for any
33 * purpose with or without fee is hereby granted, provided that the above
34 * copyright notice and this permission notice appear in all copies.
35 *
36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
43 */
44
45 #include "bpfilter.h"
46 #include "pfsync.h"
47 #include "kstat.h"
48
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/time.h>
52 #include <sys/malloc.h>
53 #include <sys/mbuf.h>
54 #include <sys/socket.h>
55 #include <sys/ioctl.h>
56 #include <sys/timeout.h>
57 #include <sys/kernel.h>
58 #include <sys/sysctl.h>
59 #include <sys/pool.h>
60 #include <sys/syslog.h>
61 #include <sys/tree.h>
62 #include <sys/smr.h>
63 #include <sys/percpu.h>
64 #include <sys/refcnt.h>
65 #include <sys/kstat.h>
66 #include <sys/stdarg.h>
67
68 #include <net/if.h>
69 #include <net/if_types.h>
70 #include <net/bpf.h>
71 #include <net/netisr.h>
72 #include <net/route.h>
73
74 #include <netinet/in.h>
75 #include <netinet/if_ether.h>
76 #include <netinet/ip.h>
77 #include <netinet/in_var.h>
78 #include <netinet/ip_var.h>
79 #include <netinet/ip_ipsp.h>
80 #include <netinet/ip_icmp.h>
81 #include <netinet/icmp6.h>
82 #include <netinet/tcp.h>
83 #include <netinet/tcp_seq.h>
84 #include <netinet/tcp_fsm.h>
85 #include <netinet/udp.h>
86
87 #ifdef INET6
88 #include <netinet6/in6_var.h>
89 #include <netinet/ip6.h>
90 #include <netinet6/ip6_var.h>
91 #include <netinet6/nd6.h>
92 #endif /* INET6 */
93
94 #include "carp.h"
95 #if NCARP > 0
96 #include <netinet/ip_carp.h>
97 #endif
98
99 #include <net/pfvar.h>
100 #include <net/pfvar_priv.h>
101 #include <net/if_pfsync.h>
102
103 #define PFSYNC_MINPKT ( \
104 sizeof(struct ip) + \
105 sizeof(struct pfsync_header))
106
107 struct pfsync_softc;
108
109 struct pfsync_deferral {
110 TAILQ_ENTRY(pfsync_deferral) pd_entry;
111 struct pf_state *pd_st;
112 struct mbuf *pd_m;
113 uint64_t pd_deadline;
114 };
115 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral);
116
117 #define PFSYNC_DEFER_NSEC 20000000ULL
118 #define PFSYNC_DEFER_LIMIT 128
119 #define PFSYNC_BULK_SND_IVAL_MS 20
120
121 static struct pool pfsync_deferrals_pool;
122
123 enum pfsync_bulk_req_state {
124 PFSYNC_BREQ_S_NONE,
125 PFSYNC_BREQ_S_START,
126 PFSYNC_BREQ_S_SENT,
127 PFSYNC_BREQ_S_BULK,
128 PFSYNC_BREQ_S_DONE,
129 };
130
131 static const char *pfsync_bulk_req_state_names[] = {
132 [PFSYNC_BREQ_S_NONE] = "none",
133 [PFSYNC_BREQ_S_START] = "start",
134 [PFSYNC_BREQ_S_SENT] = "sent",
135 [PFSYNC_BREQ_S_BULK] = "bulk",
136 [PFSYNC_BREQ_S_DONE] = "done",
137 };
138
139 enum pfsync_bulk_req_event {
140 PFSYNC_BREQ_EVT_UP,
141 PFSYNC_BREQ_EVT_DOWN,
142 PFSYNC_BREQ_EVT_TMO,
143 PFSYNC_BREQ_EVT_LINK,
144 PFSYNC_BREQ_EVT_BUS_START,
145 PFSYNC_BREQ_EVT_BUS_END,
146 };
147
148 static const char *pfsync_bulk_req_event_names[] = {
149 [PFSYNC_BREQ_EVT_UP] = "up",
150 [PFSYNC_BREQ_EVT_DOWN] = "down",
151 [PFSYNC_BREQ_EVT_TMO] = "timeout",
152 [PFSYNC_BREQ_EVT_LINK] = "link",
153 [PFSYNC_BREQ_EVT_BUS_START] = "bus-start",
154 [PFSYNC_BREQ_EVT_BUS_END] = "bus-end",
155 };
156
157 struct pfsync_slice {
158 struct pfsync_softc *s_pfsync;
159 struct mutex s_mtx;
160
161 struct pf_state_queue s_qs[PFSYNC_S_COUNT];
162 TAILQ_HEAD(, tdb) s_tdb_q;
163 size_t s_len;
164 struct mbuf_list s_ml;
165
166 struct taskq *s_softnet;
167 struct task s_task;
168 struct timeout s_tmo;
169
170 struct mbuf_queue s_sendq;
171 struct task s_send;
172
173 struct pfsync_deferrals s_deferrals;
174 unsigned int s_deferred;
175 struct task s_deferrals_task;
176 struct timeout s_deferrals_tmo;
177
178 uint64_t s_stat_locks;
179 uint64_t s_stat_contended;
180 uint64_t s_stat_write_nop;
181 uint64_t s_stat_task_add;
182 uint64_t s_stat_task_run;
183 uint64_t s_stat_enqueue;
184 uint64_t s_stat_dequeue;
185
186 uint64_t s_stat_defer_add;
187 uint64_t s_stat_defer_ack;
188 uint64_t s_stat_defer_run;
189 uint64_t s_stat_defer_overlimit;
190
191 struct kstat *s_kstat;
192 } __aligned(CACHELINESIZE);
193
194 #define PFSYNC_SLICE_BITS 1
195 #define PFSYNC_NSLICES (1 << PFSYNC_SLICE_BITS)
196
197 struct pfsync_softc {
198 struct ifnet sc_if;
199 unsigned int sc_dead;
200 unsigned int sc_up;
201 struct refcnt sc_refs;
202
203 /* config */
204 struct in_addr sc_syncpeer;
205 unsigned int sc_maxupdates;
206 unsigned int sc_defer;
207
208 /* operation */
209 unsigned int sc_sync_ifidx;
210 unsigned int sc_sync_if_down;
211 void *sc_inm;
212 struct task sc_ltask;
213 struct task sc_dtask;
214 struct ip sc_template;
215
216 struct pfsync_slice sc_slices[PFSYNC_NSLICES];
217
218 struct {
219 struct rwlock req_lock;
220 struct timeout req_tmo;
221 enum pfsync_bulk_req_state req_state;
222 unsigned int req_tries;
223 unsigned int req_demoted;
224 } sc_bulk_req;
225
226 struct {
227 struct rwlock snd_lock;
228 struct timeout snd_tmo;
229 time_t snd_requested;
230
231 struct pf_state *snd_next;
232 struct pf_state *snd_tail;
233 unsigned int snd_again;
234 } sc_bulk_snd;
235 };
236
237 static struct pfsync_softc *pfsyncif = NULL;
238 static struct cpumem *pfsynccounters;
239
240 static inline void
pfsyncstat_inc(enum pfsync_counters c)241 pfsyncstat_inc(enum pfsync_counters c)
242 {
243 counters_inc(pfsynccounters, c);
244 }
245
246 static int pfsync_clone_create(struct if_clone *, int);
247 static int pfsync_clone_destroy(struct ifnet *);
248
249 static int pfsync_output(struct ifnet *, struct mbuf *, struct sockaddr *,
250 struct rtentry *);
251 static void pfsync_start(struct ifqueue *);
252
253 static int pfsync_ioctl(struct ifnet *, u_long, caddr_t);
254 static int pfsync_up(struct pfsync_softc *);
255 static int pfsync_down(struct pfsync_softc *);
256
257 static int pfsync_set_mtu(struct pfsync_softc *, unsigned int);
258 static int pfsync_set_parent(struct pfsync_softc *,
259 const struct if_parent *);
260 static int pfsync_get_parent(struct pfsync_softc *, struct if_parent *);
261 static int pfsync_del_parent(struct pfsync_softc *);
262
263 static int pfsync_get_ioc(struct pfsync_softc *, struct ifreq *);
264 static int pfsync_set_ioc(struct pfsync_softc *, struct ifreq *);
265
266 static void pfsync_syncif_link(void *);
267 static void pfsync_syncif_detach(void *);
268
269 static void pfsync_sendout(struct pfsync_softc *, struct mbuf *);
270 static void pfsync_slice_drop(struct pfsync_softc *, struct pfsync_slice *);
271
272 static void pfsync_slice_tmo(void *);
273 static void pfsync_slice_task(void *);
274 static void pfsync_slice_sendq(void *);
275
276 static void pfsync_deferrals_tmo(void *);
277 static void pfsync_deferrals_task(void *);
278 static void pfsync_defer_output(struct pfsync_deferral *);
279
280 static void pfsync_bulk_req_evt(struct pfsync_softc *,
281 enum pfsync_bulk_req_event);
282 static void pfsync_bulk_req_tmo(void *);
283
284 static void pfsync_bulk_snd_tmo(void *);
285
286 #if NKSTAT > 0
287 struct pfsync_kstat_data {
288 struct kstat_kv pd_locks;
289 struct kstat_kv pd_contended;
290 struct kstat_kv pd_write_nop;
291 struct kstat_kv pd_task_add;
292 struct kstat_kv pd_task_run;
293 struct kstat_kv pd_enqueue;
294 struct kstat_kv pd_dequeue;
295 struct kstat_kv pd_qdrop;
296
297 struct kstat_kv pd_defer_len;
298 struct kstat_kv pd_defer_add;
299 struct kstat_kv pd_defer_ack;
300 struct kstat_kv pd_defer_run;
301 struct kstat_kv pd_defer_overlimit;
302 };
303
304 static const struct pfsync_kstat_data pfsync_kstat_tpl = {
305 KSTAT_KV_INITIALIZER("locks", KSTAT_KV_T_COUNTER64),
306 KSTAT_KV_INITIALIZER("contended", KSTAT_KV_T_COUNTER64),
307 KSTAT_KV_INITIALIZER("write-nops", KSTAT_KV_T_COUNTER64),
308 KSTAT_KV_INITIALIZER("send-sched", KSTAT_KV_T_COUNTER64),
309 KSTAT_KV_INITIALIZER("send-run", KSTAT_KV_T_COUNTER64),
310 KSTAT_KV_INITIALIZER("enqueues", KSTAT_KV_T_COUNTER64),
311 KSTAT_KV_INITIALIZER("dequeues", KSTAT_KV_T_COUNTER64),
312 KSTAT_KV_UNIT_INITIALIZER("qdrops",
313 KSTAT_KV_T_COUNTER32, KSTAT_KV_U_PACKETS),
314
315 KSTAT_KV_UNIT_INITIALIZER("defer-len",
316 KSTAT_KV_T_COUNTER32, KSTAT_KV_U_PACKETS),
317 KSTAT_KV_INITIALIZER("defer-add", KSTAT_KV_T_COUNTER64),
318 KSTAT_KV_INITIALIZER("defer-ack", KSTAT_KV_T_COUNTER64),
319 KSTAT_KV_INITIALIZER("defer-run", KSTAT_KV_T_COUNTER64),
320 KSTAT_KV_INITIALIZER("defer-over", KSTAT_KV_T_COUNTER64),
321 };
322
323 static int
pfsync_kstat_copy(struct kstat * ks,void * dst)324 pfsync_kstat_copy(struct kstat *ks, void *dst)
325 {
326 struct pfsync_slice *s = ks->ks_softc;
327 struct pfsync_kstat_data *pd = dst;
328
329 *pd = pfsync_kstat_tpl;
330 kstat_kv_u64(&pd->pd_locks) = s->s_stat_locks;
331 kstat_kv_u64(&pd->pd_contended) = s->s_stat_contended;
332 kstat_kv_u64(&pd->pd_write_nop) = s->s_stat_write_nop;
333 kstat_kv_u64(&pd->pd_task_add) = s->s_stat_task_add;
334 kstat_kv_u64(&pd->pd_task_run) = s->s_stat_task_run;
335 kstat_kv_u64(&pd->pd_enqueue) = s->s_stat_enqueue;
336 kstat_kv_u64(&pd->pd_dequeue) = s->s_stat_dequeue;
337 kstat_kv_u32(&pd->pd_qdrop) = mq_drops(&s->s_sendq);
338
339 kstat_kv_u32(&pd->pd_defer_len) = s->s_deferred;
340 kstat_kv_u64(&pd->pd_defer_add) = s->s_stat_defer_add;
341 kstat_kv_u64(&pd->pd_defer_ack) = s->s_stat_defer_ack;
342 kstat_kv_u64(&pd->pd_defer_run) = s->s_stat_defer_run;
343 kstat_kv_u64(&pd->pd_defer_overlimit) = s->s_stat_defer_overlimit;
344
345 return (0);
346 }
347 #endif /* NKSTAT > 0 */
348
349 #define PFSYNC_MAX_BULKTRIES 12
350
351 struct if_clone pfsync_cloner =
352 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy);
353
354 void
pfsyncattach(int npfsync)355 pfsyncattach(int npfsync)
356 {
357 pfsynccounters = counters_alloc(pfsyncs_ncounters);
358 if_clone_attach(&pfsync_cloner);
359 }
360
361 static int
pfsync_clone_create(struct if_clone * ifc,int unit)362 pfsync_clone_create(struct if_clone *ifc, int unit)
363 {
364 struct pfsync_softc *sc;
365 struct ifnet *ifp;
366 size_t i, q;
367
368 if (unit != 0)
369 return (ENXIO);
370
371 if (pfsync_deferrals_pool.pr_size == 0) {
372 pool_init(&pfsync_deferrals_pool,
373 sizeof(struct pfsync_deferral), 0,
374 IPL_MPFLOOR, 0, "pfdefer", NULL);
375 /* pool_cache_init(&pfsync_deferrals_pool); */
376 }
377
378 sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
379 if (sc == NULL)
380 return (ENOMEM);
381
382 /* sc_refs is "owned" by IFF_RUNNING */
383
384 sc->sc_syncpeer.s_addr = INADDR_PFSYNC_GROUP;
385 sc->sc_maxupdates = 128;
386 sc->sc_defer = 0;
387
388 task_set(&sc->sc_ltask, pfsync_syncif_link, sc);
389 task_set(&sc->sc_dtask, pfsync_syncif_detach, sc);
390
391 rw_init(&sc->sc_bulk_req.req_lock, "pfsyncbreq");
392 /* need process context to take net lock to call ip_output */
393 timeout_set_proc(&sc->sc_bulk_req.req_tmo, pfsync_bulk_req_tmo, sc);
394
395 rw_init(&sc->sc_bulk_snd.snd_lock, "pfsyncbsnd");
396 /* need process context to take net lock to call ip_output */
397 timeout_set_proc(&sc->sc_bulk_snd.snd_tmo, pfsync_bulk_snd_tmo, sc);
398
399 ifp = &sc->sc_if;
400 snprintf(ifp->if_xname, sizeof ifp->if_xname, "%s%d",
401 ifc->ifc_name, unit);
402 ifp->if_softc = sc;
403 ifp->if_ioctl = pfsync_ioctl;
404 ifp->if_output = pfsync_output;
405 ifp->if_qstart = pfsync_start;
406 ifp->if_type = IFT_PFSYNC;
407 ifp->if_hdrlen = sizeof(struct pfsync_header);
408 ifp->if_mtu = ETHERMTU;
409 ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
410
411 for (i = 0; i < nitems(sc->sc_slices); i++) {
412 struct pfsync_slice *s = &sc->sc_slices[i];
413
414 s->s_pfsync = sc;
415
416 mtx_init_flags(&s->s_mtx, IPL_SOFTNET, "pfslice", 0);
417 s->s_softnet = net_tq(i);
418 timeout_set(&s->s_tmo, pfsync_slice_tmo, s);
419 task_set(&s->s_task, pfsync_slice_task, s);
420
421 mq_init(&s->s_sendq, 16, IPL_SOFTNET);
422 task_set(&s->s_send, pfsync_slice_sendq, s);
423
424 s->s_len = PFSYNC_MINPKT;
425 ml_init(&s->s_ml);
426
427 for (q = 0; q < nitems(s->s_qs); q++)
428 TAILQ_INIT(&s->s_qs[q]);
429 TAILQ_INIT(&s->s_tdb_q);
430
431 /* stupid NET_LOCK */
432 timeout_set(&s->s_deferrals_tmo, pfsync_deferrals_tmo, s);
433 task_set(&s->s_deferrals_task, pfsync_deferrals_task, s);
434 TAILQ_INIT(&s->s_deferrals);
435
436 #if NKSTAT > 0
437 s->s_kstat = kstat_create(ifp->if_xname, 0, "pfsync-slice", i,
438 KSTAT_T_KV, 0);
439
440 kstat_set_mutex(s->s_kstat, &s->s_mtx);
441 s->s_kstat->ks_softc = s;
442 s->s_kstat->ks_datalen = sizeof(pfsync_kstat_tpl);
443 s->s_kstat->ks_copy = pfsync_kstat_copy;
444 kstat_install(s->s_kstat);
445 #endif
446 }
447
448 if_counters_alloc(ifp);
449 if_attach(ifp);
450 if_alloc_sadl(ifp);
451
452 #if NCARP > 0
453 if_addgroup(ifp, "carp");
454 #endif
455
456 #if NBPFILTER > 0
457 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN);
458 #endif
459
460 return (0);
461 }
462
463 static int
pfsync_clone_destroy(struct ifnet * ifp)464 pfsync_clone_destroy(struct ifnet *ifp)
465 {
466 struct pfsync_softc *sc = ifp->if_softc;
467 #if NKSTAT > 0
468 size_t i;
469 #endif
470
471 NET_LOCK();
472 sc->sc_dead = 1;
473
474 if (ISSET(ifp->if_flags, IFF_RUNNING))
475 pfsync_down(sc);
476 NET_UNLOCK();
477
478 if_detach(ifp);
479
480 #if NKSTAT > 0
481 for (i = 0; i < nitems(sc->sc_slices); i++) {
482 struct pfsync_slice *s = &sc->sc_slices[i];
483
484 kstat_destroy(s->s_kstat);
485 }
486 #endif
487
488 free(sc, M_DEVBUF, sizeof(*sc));
489
490 return (0);
491 }
492
493 static void
pfsync_dprintf(struct pfsync_softc * sc,const char * fmt,...)494 pfsync_dprintf(struct pfsync_softc *sc, const char *fmt, ...)
495 {
496 struct ifnet *ifp = &sc->sc_if;
497 va_list ap;
498
499 if (!ISSET(ifp->if_flags, IFF_DEBUG))
500 return;
501
502 printf("%s: ", ifp->if_xname);
503 va_start(ap, fmt);
504 vprintf(fmt, ap);
505 va_end(ap);
506 printf("\n");
507 }
508
509 static void
pfsync_syncif_link(void * arg)510 pfsync_syncif_link(void *arg)
511 {
512 struct pfsync_softc *sc = arg;
513 struct ifnet *ifp0;
514 unsigned int sync_if_down = 1;
515
516 ifp0 = if_get(sc->sc_sync_ifidx);
517 if (ifp0 != NULL && LINK_STATE_IS_UP(ifp0->if_link_state)) {
518 pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_LINK);
519 sync_if_down = 0;
520 }
521 if_put(ifp0);
522
523 #if NCARP > 0
524 if (sc->sc_sync_if_down != sync_if_down) {
525 carp_group_demote_adj(&sc->sc_if,
526 sync_if_down ? 1 : -1, "pfsync link");
527 }
528 #endif
529
530 sc->sc_sync_if_down = sync_if_down;
531 }
532
533 static void
pfsync_syncif_detach(void * arg)534 pfsync_syncif_detach(void *arg)
535 {
536 struct pfsync_softc *sc = arg;
537 struct ifnet *ifp = &sc->sc_if;
538
539 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
540 pfsync_down(sc);
541 if_down(ifp);
542 }
543
544 sc->sc_sync_ifidx = 0;
545 }
546
547 static int
pfsync_output(struct ifnet * ifp,struct mbuf * m,struct sockaddr * dst,struct rtentry * rt)548 pfsync_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
549 struct rtentry *rt)
550 {
551 m_freem(m); /* drop packet */
552 return (EAFNOSUPPORT);
553 }
554
555 static int
pfsync_ioctl(struct ifnet * ifp,u_long cmd,caddr_t data)556 pfsync_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
557 {
558 struct pfsync_softc *sc = ifp->if_softc;
559 struct ifreq *ifr = (struct ifreq *)data;
560 int error = ENOTTY;
561
562 switch (cmd) {
563 case SIOCSIFADDR:
564 error = EOPNOTSUPP;
565 break;
566
567 case SIOCSIFFLAGS:
568 if (ISSET(ifp->if_flags, IFF_UP)) {
569 if (!ISSET(ifp->if_flags, IFF_RUNNING))
570 error = pfsync_up(sc);
571 else
572 error = ENETRESET;
573 } else {
574 if (ISSET(ifp->if_flags, IFF_RUNNING))
575 error = pfsync_down(sc);
576 }
577 break;
578
579 case SIOCSIFMTU:
580 error = pfsync_set_mtu(sc, ifr->ifr_mtu);
581 break;
582
583 case SIOCSIFPARENT:
584 error = pfsync_set_parent(sc, (struct if_parent *)data);
585 break;
586 case SIOCGIFPARENT:
587 error = pfsync_get_parent(sc, (struct if_parent *)data);
588 break;
589 case SIOCDIFPARENT:
590 error = pfsync_del_parent(sc);
591 break;
592
593 case SIOCSETPFSYNC:
594 error = pfsync_set_ioc(sc, ifr);
595 break;
596 case SIOCGETPFSYNC:
597 error = pfsync_get_ioc(sc, ifr);
598 break;
599
600 default:
601 break;
602 }
603
604 if (error == ENETRESET)
605 error = 0;
606
607 return (error);
608 }
609
610 static int
pfsync_set_mtu(struct pfsync_softc * sc,unsigned int mtu)611 pfsync_set_mtu(struct pfsync_softc *sc, unsigned int mtu)
612 {
613 struct ifnet *ifp = &sc->sc_if;
614 struct ifnet *ifp0;
615 int error = 0;
616
617 ifp0 = if_get(sc->sc_sync_ifidx);
618 if (ifp0 == NULL)
619 return (EINVAL);
620
621 if (mtu <= PFSYNC_MINPKT || mtu > ifp0->if_mtu) {
622 error = EINVAL;
623 goto put;
624 }
625
626 /* commit */
627 ifp->if_mtu = mtu;
628
629 put:
630 if_put(ifp0);
631 return (error);
632 }
633
634 static int
pfsync_set_parent(struct pfsync_softc * sc,const struct if_parent * p)635 pfsync_set_parent(struct pfsync_softc *sc, const struct if_parent *p)
636 {
637 struct ifnet *ifp = &sc->sc_if;
638 struct ifnet *ifp0;
639 int error = 0;
640
641 ifp0 = if_unit(p->ifp_parent);
642 if (ifp0 == NULL)
643 return (ENXIO);
644
645 if (ifp0->if_index == sc->sc_sync_ifidx)
646 goto put;
647
648 if (ISSET(ifp->if_flags, IFF_RUNNING)) {
649 error = EBUSY;
650 goto put;
651 }
652
653 /* commit */
654 sc->sc_sync_ifidx = ifp0->if_index;
655
656 put:
657 if_put(ifp0);
658 return (error);
659 }
660
661 static int
pfsync_get_parent(struct pfsync_softc * sc,struct if_parent * p)662 pfsync_get_parent(struct pfsync_softc *sc, struct if_parent *p)
663 {
664 struct ifnet *ifp0;
665 int error = 0;
666
667 ifp0 = if_get(sc->sc_sync_ifidx);
668 if (ifp0 == NULL)
669 error = EADDRNOTAVAIL;
670 else
671 strlcpy(p->ifp_parent, ifp0->if_xname, sizeof(p->ifp_parent));
672 if_put(ifp0);
673
674 return (error);
675 }
676
677 static int
pfsync_del_parent(struct pfsync_softc * sc)678 pfsync_del_parent(struct pfsync_softc *sc)
679 {
680 struct ifnet *ifp = &sc->sc_if;
681
682 if (ISSET(ifp->if_flags, IFF_RUNNING))
683 return (EBUSY);
684
685 /* commit */
686 sc->sc_sync_ifidx = 0;
687
688 return (0);
689 }
690
691 static int
pfsync_get_ioc(struct pfsync_softc * sc,struct ifreq * ifr)692 pfsync_get_ioc(struct pfsync_softc *sc, struct ifreq *ifr)
693 {
694 struct pfsyncreq pfsyncr;
695 struct ifnet *ifp0;
696
697 memset(&pfsyncr, 0, sizeof(pfsyncr));
698
699 ifp0 = if_get(sc->sc_sync_ifidx);
700 if (ifp0 != NULL) {
701 strlcpy(pfsyncr.pfsyncr_syncdev, ifp0->if_xname,
702 sizeof(pfsyncr.pfsyncr_syncdev));
703 }
704 if_put(ifp0);
705
706 pfsyncr.pfsyncr_syncpeer = sc->sc_syncpeer;
707 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates;
708 pfsyncr.pfsyncr_defer = sc->sc_defer;
709
710 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)));
711 }
712
713 static int
pfsync_set_ioc(struct pfsync_softc * sc,struct ifreq * ifr)714 pfsync_set_ioc(struct pfsync_softc *sc, struct ifreq *ifr)
715 {
716 struct ifnet *ifp = &sc->sc_if;
717 struct pfsyncreq pfsyncr;
718 unsigned int sync_ifidx = sc->sc_sync_ifidx;
719 int wantdown = 0;
720 int error;
721
722 error = suser(curproc);
723 if (error != 0)
724 return (error);
725
726 error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr));
727 if (error != 0)
728 return (error);
729
730 if (pfsyncr.pfsyncr_maxupdates > 255)
731 return (EINVAL);
732
733 if (pfsyncr.pfsyncr_syncdev[0] != '\0') { /* set */
734 struct ifnet *ifp0 = if_unit(pfsyncr.pfsyncr_syncdev);
735 if (ifp0 == NULL)
736 return (ENXIO);
737
738 if (ifp0->if_index != sync_ifidx)
739 wantdown = 1;
740
741 sync_ifidx = ifp0->if_index;
742 if_put(ifp0);
743 } else { /* del */
744 wantdown = 1;
745 sync_ifidx = 0;
746 }
747
748 if (pfsyncr.pfsyncr_syncpeer.s_addr == INADDR_ANY)
749 pfsyncr.pfsyncr_syncpeer.s_addr = INADDR_PFSYNC_GROUP;
750 if (pfsyncr.pfsyncr_syncpeer.s_addr != sc->sc_syncpeer.s_addr)
751 wantdown = 1;
752
753 if (wantdown && ISSET(ifp->if_flags, IFF_RUNNING))
754 return (EBUSY);
755
756 /* commit */
757 sc->sc_sync_ifidx = sync_ifidx;
758 sc->sc_syncpeer = pfsyncr.pfsyncr_syncpeer;
759 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates;
760 sc->sc_defer = pfsyncr.pfsyncr_defer;
761
762 return (0);
763 }
764
765 static int
pfsync_up(struct pfsync_softc * sc)766 pfsync_up(struct pfsync_softc *sc)
767 {
768 struct ifnet *ifp = &sc->sc_if;
769 struct ifnet *ifp0;
770 void *inm = NULL;
771 int error = 0;
772 struct ip *ip;
773
774 NET_ASSERT_LOCKED();
775 KASSERT(!ISSET(ifp->if_flags, IFF_RUNNING));
776
777 if (sc->sc_dead)
778 return (ENXIO);
779
780 /*
781 * coordinate with pfsync_down(). if sc_up is still up and
782 * we're here then something else is tearing pfsync down.
783 */
784 if (sc->sc_up)
785 return (EBUSY);
786
787 if (sc->sc_syncpeer.s_addr == INADDR_ANY ||
788 sc->sc_syncpeer.s_addr == INADDR_BROADCAST)
789 return (EDESTADDRREQ);
790
791 ifp0 = if_get(sc->sc_sync_ifidx);
792 if (ifp0 == NULL)
793 return (ENXIO);
794
795 if (IN_MULTICAST(sc->sc_syncpeer.s_addr)) {
796 if (!ISSET(ifp0->if_flags, IFF_MULTICAST)) {
797 error = ENODEV;
798 goto put;
799 }
800 inm = in_addmulti(&sc->sc_syncpeer, ifp0);
801 if (inm == NULL) {
802 error = ECONNABORTED;
803 goto put;
804 }
805 }
806
807 sc->sc_up = 1;
808
809 ip = &sc->sc_template;
810 memset(ip, 0, sizeof(*ip));
811 ip->ip_v = IPVERSION;
812 ip->ip_hl = sizeof(*ip) >> 2;
813 ip->ip_tos = IPTOS_LOWDELAY;
814 /* len and id are set later */
815 ip->ip_off = htons(IP_DF);
816 ip->ip_ttl = PFSYNC_DFLTTL;
817 ip->ip_p = IPPROTO_PFSYNC;
818 ip->ip_src.s_addr = INADDR_ANY;
819 ip->ip_dst.s_addr = sc->sc_syncpeer.s_addr;
820
821 /* commit */
822 refcnt_init(&sc->sc_refs); /* IFF_RUNNING kind of owns this */
823
824 #if NCARP > 0
825 sc->sc_sync_if_down = 1;
826 carp_group_demote_adj(&sc->sc_if, 1, "pfsync up");
827 #endif
828
829 if_linkstatehook_add(ifp0, &sc->sc_ltask);
830 if_detachhook_add(ifp0, &sc->sc_dtask);
831
832 sc->sc_inm = inm;
833 SET(ifp->if_flags, IFF_RUNNING);
834
835 pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_UP);
836
837 refcnt_take(&sc->sc_refs); /* give one to SMR */
838 SMR_PTR_SET_LOCKED(&pfsyncif, sc);
839
840 pfsync_syncif_link(sc); /* try and push the bulk req state forward */
841
842 put:
843 if_put(ifp0);
844 return (error);
845 }
846
847 static struct mbuf *
pfsync_encap(struct pfsync_softc * sc,struct mbuf * m)848 pfsync_encap(struct pfsync_softc *sc, struct mbuf *m)
849 {
850 struct {
851 struct ip ip;
852 struct pfsync_header ph;
853 } __packed __aligned(4) *h;
854 unsigned int mlen = m->m_pkthdr.len;
855
856 m = m_prepend(m, sizeof(*h), M_DONTWAIT);
857 if (m == NULL)
858 return (NULL);
859
860 h = mtod(m, void *);
861 memset(h, 0, sizeof(*h));
862
863 mlen += sizeof(h->ph);
864 h->ph.version = PFSYNC_VERSION;
865 h->ph.len = htons(mlen);
866 /* h->ph.pfcksum */
867
868 mlen += sizeof(h->ip);
869 h->ip = sc->sc_template;
870 h->ip.ip_len = htons(mlen);
871 h->ip.ip_id = htons(ip_randomid());
872
873 return (m);
874 }
875
876 static void
pfsync_bulk_req_send(struct pfsync_softc * sc)877 pfsync_bulk_req_send(struct pfsync_softc *sc)
878 {
879 struct {
880 struct pfsync_subheader subh;
881 struct pfsync_upd_req ur;
882 } __packed __aligned(4) *h;
883 unsigned mlen = max_linkhdr +
884 sizeof(struct ip) + sizeof(struct pfsync_header) + sizeof(*h);
885 struct mbuf *m;
886
887 m = m_gethdr(M_DONTWAIT, MT_DATA);
888 if (m == NULL)
889 goto fail;
890
891 if (mlen > MHLEN) {
892 MCLGETL(m, M_DONTWAIT, mlen);
893 if (!ISSET(m->m_flags, M_EXT))
894 goto drop;
895 }
896
897 m_align(m, sizeof(*h));
898 m->m_len = m->m_pkthdr.len = sizeof(*h);
899
900 h = mtod(m, void *);
901 memset(h, 0, sizeof(*h));
902
903 h->subh.action = PFSYNC_ACT_UPD_REQ;
904 h->subh.len = sizeof(h->ur) >> 2;
905 h->subh.count = htons(1);
906
907 h->ur.id = htobe64(0);
908 h->ur.creatorid = htobe32(0);
909
910 m = pfsync_encap(sc, m);
911 if (m == NULL)
912 goto fail;
913
914 pfsync_sendout(sc, m);
915 return;
916
917 drop:
918 m_freem(m);
919 fail:
920 printf("%s: unable to request bulk update\n", sc->sc_if.if_xname);
921 }
922
923 static void
pfsync_bulk_req_nstate(struct pfsync_softc * sc,enum pfsync_bulk_req_state nstate,int seconds)924 pfsync_bulk_req_nstate(struct pfsync_softc *sc,
925 enum pfsync_bulk_req_state nstate, int seconds)
926 {
927 sc->sc_bulk_req.req_state = nstate;
928 if (seconds > 0)
929 timeout_add_sec(&sc->sc_bulk_req.req_tmo, seconds);
930 else
931 timeout_del(&sc->sc_bulk_req.req_tmo);
932 }
933
934 static void
pfsync_bulk_req_invstate(struct pfsync_softc * sc,enum pfsync_bulk_req_event evt)935 pfsync_bulk_req_invstate(struct pfsync_softc *sc,
936 enum pfsync_bulk_req_event evt)
937 {
938 panic("%s: unexpected event %s in state %s", sc->sc_if.if_xname,
939 pfsync_bulk_req_event_names[evt],
940 pfsync_bulk_req_state_names[sc->sc_bulk_req.req_state]);
941 }
942
943 static void
pfsync_bulk_req_nstate_bulk(struct pfsync_softc * sc)944 pfsync_bulk_req_nstate_bulk(struct pfsync_softc *sc)
945 {
946 /* calculate the number of packets we expect */
947 int t = pf_pool_limits[PF_LIMIT_STATES].limit /
948 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) /
949 sizeof(struct pfsync_state));
950
951 /* turn it into seconds */
952 t /= 1000 / PFSYNC_BULK_SND_IVAL_MS;
953
954 if (t == 0)
955 t = 1;
956
957 pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_BULK, t * 4);
958 }
959
960 static inline void
pfsync_bulk_req_nstate_done(struct pfsync_softc * sc)961 pfsync_bulk_req_nstate_done(struct pfsync_softc *sc)
962 {
963 pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_DONE, 0);
964
965 KASSERT(sc->sc_bulk_req.req_demoted == 1);
966 sc->sc_bulk_req.req_demoted = 0;
967
968 #if NCARP > 0
969 carp_group_demote_adj(&sc->sc_if, -32, "pfsync done");
970 #endif
971 }
972
973 static void
pfsync_bulk_req_evt(struct pfsync_softc * sc,enum pfsync_bulk_req_event evt)974 pfsync_bulk_req_evt(struct pfsync_softc *sc, enum pfsync_bulk_req_event evt)
975 {
976 struct ifnet *ifp = &sc->sc_if;
977
978 rw_enter_write(&sc->sc_bulk_req.req_lock);
979 pfsync_dprintf(sc, "%s state %s evt %s", __func__,
980 pfsync_bulk_req_state_names[sc->sc_bulk_req.req_state],
981 pfsync_bulk_req_event_names[evt]);
982
983 if (evt == PFSYNC_BREQ_EVT_DOWN) {
984 /* unconditionally move down */
985 sc->sc_bulk_req.req_tries = 0;
986 pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_NONE, 0);
987
988 if (sc->sc_bulk_req.req_demoted) {
989 sc->sc_bulk_req.req_demoted = 0;
990 #if NCARP > 0
991 carp_group_demote_adj(&sc->sc_if, -32,
992 "pfsync down");
993 #endif
994 }
995 } else switch (sc->sc_bulk_req.req_state) {
996 case PFSYNC_BREQ_S_NONE:
997 switch (evt) {
998 case PFSYNC_BREQ_EVT_UP:
999 KASSERT(sc->sc_bulk_req.req_demoted == 0);
1000 sc->sc_bulk_req.req_demoted = 1;
1001 #if NCARP > 0
1002 carp_group_demote_adj(&sc->sc_if, 32,
1003 "pfsync start");
1004 #endif
1005 pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_START, 30);
1006 break;
1007 default:
1008 pfsync_bulk_req_invstate(sc, evt);
1009 }
1010
1011 break;
1012
1013 case PFSYNC_BREQ_S_START:
1014 switch (evt) {
1015 case PFSYNC_BREQ_EVT_LINK:
1016 pfsync_bulk_req_send(sc);
1017 pfsync_bulk_req_nstate(sc, PFSYNC_BREQ_S_SENT, 2);
1018 break;
1019 case PFSYNC_BREQ_EVT_TMO:
1020 pfsync_dprintf(sc, "timeout waiting for link");
1021 pfsync_bulk_req_nstate_done(sc);
1022 break;
1023 case PFSYNC_BREQ_EVT_BUS_START:
1024 pfsync_bulk_req_nstate_bulk(sc);
1025 break;
1026 case PFSYNC_BREQ_EVT_BUS_END:
1027 /* ignore this */
1028 break;
1029 default:
1030 pfsync_bulk_req_invstate(sc, evt);
1031 }
1032 break;
1033
1034 case PFSYNC_BREQ_S_SENT:
1035 switch (evt) {
1036 case PFSYNC_BREQ_EVT_BUS_START:
1037 pfsync_bulk_req_nstate_bulk(sc);
1038 break;
1039 case PFSYNC_BREQ_EVT_BUS_END:
1040 case PFSYNC_BREQ_EVT_LINK:
1041 /* ignore this */
1042 break;
1043 case PFSYNC_BREQ_EVT_TMO:
1044 if (++sc->sc_bulk_req.req_tries <
1045 PFSYNC_MAX_BULKTRIES) {
1046 pfsync_bulk_req_send(sc);
1047 pfsync_bulk_req_nstate(sc,
1048 PFSYNC_BREQ_S_SENT, 2);
1049 break;
1050 }
1051
1052 pfsync_dprintf(sc,
1053 "timeout waiting for bulk transfer start");
1054 pfsync_bulk_req_nstate_done(sc);
1055 break;
1056 default:
1057 pfsync_bulk_req_invstate(sc, evt);
1058 }
1059 break;
1060
1061 case PFSYNC_BREQ_S_BULK:
1062 switch (evt) {
1063 case PFSYNC_BREQ_EVT_BUS_START:
1064 case PFSYNC_BREQ_EVT_LINK:
1065 /* ignore this */
1066 break;
1067 case PFSYNC_BREQ_EVT_BUS_END:
1068 pfsync_bulk_req_nstate_done(sc);
1069 break;
1070 case PFSYNC_BREQ_EVT_TMO:
1071 if (++sc->sc_bulk_req.req_tries <
1072 PFSYNC_MAX_BULKTRIES) {
1073 pfsync_bulk_req_send(sc);
1074 pfsync_bulk_req_nstate(sc,
1075 PFSYNC_BREQ_S_SENT, 2);
1076 }
1077
1078 pfsync_dprintf(sc,
1079 "timeout waiting for bulk transfer end");
1080 pfsync_bulk_req_nstate_done(sc);
1081 break;
1082 default:
1083 pfsync_bulk_req_invstate(sc, evt);
1084 }
1085 break;
1086
1087 case PFSYNC_BREQ_S_DONE: /* pfsync is up and running */
1088 switch (evt) {
1089 case PFSYNC_BREQ_EVT_BUS_START:
1090 case PFSYNC_BREQ_EVT_BUS_END:
1091 case PFSYNC_BREQ_EVT_LINK:
1092 /* nops */
1093 break;
1094 default:
1095 pfsync_bulk_req_invstate(sc, evt);
1096 }
1097 break;
1098
1099 default:
1100 panic("%s: unknown event %d", ifp->if_xname, evt);
1101 /* NOTREACHED */
1102 }
1103 rw_exit_write(&sc->sc_bulk_req.req_lock);
1104 }
1105
1106 static void
pfsync_bulk_req_tmo(void * arg)1107 pfsync_bulk_req_tmo(void *arg)
1108 {
1109 struct pfsync_softc *sc = arg;
1110
1111 NET_LOCK();
1112 pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_TMO);
1113 NET_UNLOCK();
1114 }
1115
1116 static int
pfsync_down(struct pfsync_softc * sc)1117 pfsync_down(struct pfsync_softc *sc)
1118 {
1119 struct ifnet *ifp = &sc->sc_if;
1120 struct ifnet *ifp0;
1121 struct smr_entry smr;
1122 size_t i;
1123 void *inm = NULL;
1124 unsigned int sndbar = 0;
1125 struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
1126 struct pfsync_deferral *pd;
1127
1128 NET_ASSERT_LOCKED();
1129 KASSERT(ISSET(ifp->if_flags, IFF_RUNNING));
1130
1131 /*
1132 * tearing down pfsync involves waiting for pfsync to stop
1133 * running in various contexts including softnet taskqs.
1134 * this thread cannot hold netlock while waiting for a
1135 * barrier in softnet because softnet might be waiting for
1136 * the netlock. sc->sc_up is used to coordinate with
1137 * pfsync_up.
1138 */
1139
1140 CLR(ifp->if_flags, IFF_RUNNING);
1141
1142 ifp0 = if_get(sc->sc_sync_ifidx);
1143 if (ifp0 != NULL) {
1144 if_linkstatehook_del(ifp0, &sc->sc_ltask);
1145 if_detachhook_del(ifp0, &sc->sc_dtask);
1146 }
1147 if_put(ifp0);
1148
1149 #if NCARP > 0
1150 if (sc->sc_sync_if_down)
1151 carp_group_demote_adj(&sc->sc_if, -1, "pfsync down");
1152 #endif
1153
1154 NET_UNLOCK();
1155
1156 KASSERTMSG(SMR_PTR_GET_LOCKED(&pfsyncif) == sc,
1157 "pfsyncif %p != sc %p", pfsyncif, sc);
1158 SMR_PTR_SET_LOCKED(&pfsyncif, NULL);
1159 smr_init(&smr);
1160 smr_call(&smr, (void (*)(void *))refcnt_rele_wake, &sc->sc_refs);
1161
1162 /* stop pf producing work before cleaning up the timeouts and tasks */
1163 refcnt_finalize(&sc->sc_refs, "pfsyncfini");
1164
1165 pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_DOWN);
1166
1167 rw_enter_read(&pf_state_list.pfs_rwl);
1168 rw_enter_write(&sc->sc_bulk_snd.snd_lock);
1169 if (sc->sc_bulk_snd.snd_tail != NULL) {
1170 sndbar = !timeout_del(&sc->sc_bulk_snd.snd_tmo);
1171
1172 sc->sc_bulk_snd.snd_again = 0;
1173 sc->sc_bulk_snd.snd_next = NULL;
1174 sc->sc_bulk_snd.snd_tail = NULL;
1175 }
1176 rw_exit_write(&sc->sc_bulk_snd.snd_lock);
1177 rw_exit_read(&pf_state_list.pfs_rwl);
1178
1179 /*
1180 * do a single barrier for all the timeouts. because the
1181 * timeouts in each slice are configured the same way, the
1182 * barrier for one will work for all of them.
1183 */
1184 for (i = 0; i < nitems(sc->sc_slices); i++) {
1185 struct pfsync_slice *s = &sc->sc_slices[i];
1186
1187 timeout_del(&s->s_tmo);
1188 task_del(s->s_softnet, &s->s_task);
1189 task_del(s->s_softnet, &s->s_send);
1190
1191 timeout_del(&s->s_deferrals_tmo);
1192 task_del(s->s_softnet, &s->s_deferrals_task);
1193 }
1194 timeout_barrier(&sc->sc_slices[0].s_tmo);
1195 timeout_barrier(&sc->sc_bulk_req.req_tmo); /* XXX proc */
1196 if (sndbar) {
1197 /* technically the preceding barrier does the same job */
1198 timeout_barrier(&sc->sc_bulk_snd.snd_tmo);
1199 }
1200 net_tq_barriers("pfsyncbar");
1201
1202 /* pfsync is no longer running */
1203
1204 if (sc->sc_inm != NULL) {
1205 inm = sc->sc_inm;
1206 sc->sc_inm = NULL;
1207 }
1208
1209 for (i = 0; i < nitems(sc->sc_slices); i++) {
1210 struct pfsync_slice *s = &sc->sc_slices[i];
1211 struct pf_state *st;
1212
1213 pfsync_slice_drop(sc, s);
1214 mq_purge(&s->s_sendq);
1215
1216 while ((pd = TAILQ_FIRST(&s->s_deferrals)) != NULL) {
1217 TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
1218
1219 st = pd->pd_st;
1220 st->sync_defer = NULL;
1221
1222 TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
1223 }
1224 s->s_deferred = 0;
1225 }
1226
1227 NET_LOCK();
1228 sc->sc_up = 0;
1229
1230 if (inm != NULL)
1231 in_delmulti(inm);
1232
1233 while ((pd = TAILQ_FIRST(&pds)) != NULL) {
1234 TAILQ_REMOVE(&pds, pd, pd_entry);
1235
1236 pfsync_defer_output(pd);
1237 }
1238
1239 return (0);
1240 }
1241
1242 int
pfsync_is_up(void)1243 pfsync_is_up(void)
1244 {
1245 int rv;
1246
1247 smr_read_enter();
1248 rv = SMR_PTR_GET(&pfsyncif) != NULL;
1249 smr_read_leave();
1250
1251 return (rv);
1252 }
1253
1254 static void
pfsync_start(struct ifqueue * ifq)1255 pfsync_start(struct ifqueue *ifq)
1256 {
1257 ifq_purge(ifq);
1258 }
1259
1260 struct pfsync_q {
1261 void (*write)(struct pf_state *, void *);
1262 size_t len;
1263 u_int8_t action;
1264 };
1265
1266 static struct pfsync_slice *
pfsync_slice_enter(struct pfsync_softc * sc,const struct pf_state * st)1267 pfsync_slice_enter(struct pfsync_softc *sc, const struct pf_state *st)
1268 {
1269 unsigned int idx = st->key[0]->hash % nitems(sc->sc_slices);
1270 struct pfsync_slice *s = &sc->sc_slices[idx];
1271
1272 if (!mtx_enter_try(&s->s_mtx)) {
1273 mtx_enter(&s->s_mtx);
1274 s->s_stat_contended++;
1275 }
1276 s->s_stat_locks++;
1277
1278 return (s);
1279 }
1280
1281 static void
pfsync_slice_leave(struct pfsync_softc * sc,struct pfsync_slice * s)1282 pfsync_slice_leave(struct pfsync_softc *sc, struct pfsync_slice *s)
1283 {
1284 mtx_leave(&s->s_mtx);
1285 }
1286
1287 /* we have one of these for every PFSYNC_S_ */
1288 static void pfsync_out_state(struct pf_state *, void *);
1289 static void pfsync_out_iack(struct pf_state *, void *);
1290 static void pfsync_out_upd_c(struct pf_state *, void *);
1291 static void pfsync_out_del(struct pf_state *, void *);
1292 #if defined(IPSEC)
1293 static void pfsync_out_tdb(struct tdb *, void *);
1294 #endif
1295
1296 static const struct pfsync_q pfsync_qs[] = {
1297 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK },
1298 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C },
1299 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C },
1300 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS },
1301 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }
1302 };
1303
1304 static void
pfsync_out_state(struct pf_state * st,void * buf)1305 pfsync_out_state(struct pf_state *st, void *buf)
1306 {
1307 struct pfsync_state *sp = buf;
1308
1309 mtx_enter(&st->mtx);
1310 pf_state_export(sp, st);
1311 mtx_leave(&st->mtx);
1312 }
1313
1314 static void
pfsync_out_iack(struct pf_state * st,void * buf)1315 pfsync_out_iack(struct pf_state *st, void *buf)
1316 {
1317 struct pfsync_ins_ack *iack = buf;
1318
1319 iack->id = st->id;
1320 iack->creatorid = st->creatorid;
1321 }
1322
1323 static void
pfsync_out_upd_c(struct pf_state * st,void * buf)1324 pfsync_out_upd_c(struct pf_state *st, void *buf)
1325 {
1326 struct pfsync_upd_c *up = buf;
1327
1328 memset(up, 0, sizeof(*up));
1329 up->id = st->id;
1330 up->creatorid = st->creatorid;
1331
1332 mtx_enter(&st->mtx);
1333 pf_state_peer_hton(&st->src, &up->src);
1334 pf_state_peer_hton(&st->dst, &up->dst);
1335 up->timeout = st->timeout;
1336 mtx_leave(&st->mtx);
1337 }
1338
1339 static void
pfsync_out_del(struct pf_state * st,void * buf)1340 pfsync_out_del(struct pf_state *st, void *buf)
1341 {
1342 struct pfsync_del_c *dp = buf;
1343
1344 dp->id = st->id;
1345 dp->creatorid = st->creatorid;
1346
1347 st->sync_state = PFSYNC_S_DEAD;
1348 }
1349
1350 #if defined(IPSEC)
1351 static inline void
pfsync_tdb_enter(struct tdb * tdb)1352 pfsync_tdb_enter(struct tdb *tdb)
1353 {
1354 mtx_enter(&tdb->tdb_mtx);
1355 }
1356
1357 static inline void
pfsync_tdb_leave(struct tdb * tdb)1358 pfsync_tdb_leave(struct tdb *tdb)
1359 {
1360 unsigned int snapped = ISSET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED);
1361 mtx_leave(&tdb->tdb_mtx);
1362 if (snapped)
1363 wakeup_one(&tdb->tdb_updates);
1364 }
1365 #endif /* defined(IPSEC) */
1366
1367 static void
pfsync_slice_drop(struct pfsync_softc * sc,struct pfsync_slice * s)1368 pfsync_slice_drop(struct pfsync_softc *sc, struct pfsync_slice *s)
1369 {
1370 struct pf_state *st;
1371 int q;
1372 #if defined(IPSEC)
1373 struct tdb *tdb;
1374 #endif
1375
1376 for (q = 0; q < nitems(s->s_qs); q++) {
1377 if (TAILQ_EMPTY(&s->s_qs[q]))
1378 continue;
1379
1380 while ((st = TAILQ_FIRST(&s->s_qs[q])) != NULL) {
1381 TAILQ_REMOVE(&s->s_qs[q], st, sync_list);
1382 #ifdef PFSYNC_DEBUG
1383 KASSERT(st->sync_state == q);
1384 #endif
1385 st->sync_state = PFSYNC_S_NONE;
1386 pf_state_unref(st);
1387 }
1388 }
1389
1390 #if defined(IPSEC)
1391 while ((tdb = TAILQ_FIRST(&s->s_tdb_q)) != NULL) {
1392 TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
1393
1394 pfsync_tdb_enter(tdb);
1395 KASSERT(ISSET(tdb->tdb_flags, TDBF_PFSYNC));
1396 CLR(tdb->tdb_flags, TDBF_PFSYNC);
1397 pfsync_tdb_leave(tdb);
1398 }
1399 #endif /* defined(IPSEC) */
1400
1401 timeout_del(&s->s_tmo);
1402 s->s_len = PFSYNC_MINPKT;
1403 }
1404
1405 static struct mbuf *
pfsync_slice_write(struct pfsync_slice * s)1406 pfsync_slice_write(struct pfsync_slice *s)
1407 {
1408 struct pfsync_softc *sc = s->s_pfsync;
1409 struct mbuf *m;
1410
1411 struct ip *ip;
1412 struct pfsync_header *ph;
1413 struct pfsync_subheader *subh;
1414
1415 unsigned int mlen = max_linkhdr + s->s_len;
1416 unsigned int q, count;
1417 caddr_t ptr;
1418 size_t off;
1419
1420 MUTEX_ASSERT_LOCKED(&s->s_mtx);
1421 if (s->s_len == PFSYNC_MINPKT) {
1422 s->s_stat_write_nop++;
1423 return (NULL);
1424 }
1425
1426 task_del(s->s_softnet, &s->s_task);
1427
1428 m = m_gethdr(M_DONTWAIT, MT_DATA);
1429 if (m == NULL)
1430 goto drop;
1431
1432 if (mlen > MHLEN) {
1433 MCLGETL(m, M_DONTWAIT, mlen);
1434 if (!ISSET(m->m_flags, M_EXT))
1435 goto drop;
1436 }
1437
1438 m_align(m, s->s_len);
1439 m->m_len = m->m_pkthdr.len = s->s_len;
1440
1441 ptr = mtod(m, caddr_t);
1442 off = 0;
1443
1444 ip = (struct ip *)(ptr + off);
1445 off += sizeof(*ip);
1446 *ip = sc->sc_template;
1447 ip->ip_len = htons(m->m_pkthdr.len);
1448 ip->ip_id = htons(ip_randomid());
1449
1450 ph = (struct pfsync_header *)(ptr + off);
1451 off += sizeof(*ph);
1452 memset(ph, 0, sizeof(*ph));
1453 ph->version = PFSYNC_VERSION;
1454 ph->len = htons(m->m_pkthdr.len - sizeof(*ip));
1455
1456 for (q = 0; q < nitems(s->s_qs); q++) {
1457 struct pf_state_queue *psq = &s->s_qs[q];
1458 struct pf_state *st;
1459
1460 if (TAILQ_EMPTY(psq))
1461 continue;
1462
1463 subh = (struct pfsync_subheader *)(ptr + off);
1464 off += sizeof(*subh);
1465
1466 count = 0;
1467 while ((st = TAILQ_FIRST(psq)) != NULL) {
1468 TAILQ_REMOVE(psq, st, sync_list);
1469 count++;
1470
1471 KASSERT(st->sync_state == q);
1472 /* the write handler below may override this */
1473 st->sync_state = PFSYNC_S_NONE;
1474
1475 pfsync_qs[q].write(st, ptr + off);
1476 off += pfsync_qs[q].len;
1477
1478 pf_state_unref(st);
1479 }
1480
1481 subh->action = pfsync_qs[q].action;
1482 subh->len = pfsync_qs[q].len >> 2;
1483 subh->count = htons(count);
1484 }
1485
1486 #if defined(IPSEC)
1487 if (!TAILQ_EMPTY(&s->s_tdb_q)) {
1488 struct tdb *tdb;
1489
1490 subh = (struct pfsync_subheader *)(ptr + off);
1491 off += sizeof(*subh);
1492
1493 count = 0;
1494 while ((tdb = TAILQ_FIRST(&s->s_tdb_q)) != NULL) {
1495 TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
1496 count++;
1497
1498 pfsync_tdb_enter(tdb);
1499 KASSERT(ISSET(tdb->tdb_flags, TDBF_PFSYNC));
1500
1501 /* get a consistent view of the counters */
1502 pfsync_out_tdb(tdb, ptr + off);
1503
1504 CLR(tdb->tdb_flags, TDBF_PFSYNC);
1505 pfsync_tdb_leave(tdb);
1506
1507 off += sizeof(struct pfsync_tdb);
1508 }
1509
1510 subh->action = PFSYNC_ACT_TDB;
1511 subh->len = sizeof(struct pfsync_tdb) >> 2;
1512 subh->count = htons(count);
1513 }
1514 #endif
1515
1516 timeout_del(&s->s_tmo);
1517 s->s_len = PFSYNC_MINPKT;
1518
1519 return (m);
1520 drop:
1521 m_freem(m);
1522 pfsyncstat_inc(pfsyncs_onomem);
1523 pfsync_slice_drop(sc, s);
1524 return (NULL);
1525 }
1526
1527 static void
pfsync_sendout(struct pfsync_softc * sc,struct mbuf * m)1528 pfsync_sendout(struct pfsync_softc *sc, struct mbuf *m)
1529 {
1530 struct ip_moptions imo;
1531 unsigned int len = m->m_pkthdr.len;
1532 #if NBPFILTER > 0
1533 caddr_t if_bpf = sc->sc_if.if_bpf;
1534 if (if_bpf)
1535 bpf_mtap(if_bpf, m, BPF_DIRECTION_OUT);
1536 #endif
1537
1538 imo.imo_ifidx = sc->sc_sync_ifidx;
1539 imo.imo_ttl = PFSYNC_DFLTTL;
1540 imo.imo_loop = 0;
1541 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
1542
1543 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &imo, NULL, 0) == 0) {
1544 counters_pkt(sc->sc_if.if_counters, ifc_opackets,
1545 ifc_obytes, len);
1546 pfsyncstat_inc(pfsyncs_opackets);
1547 } else {
1548 counters_inc(sc->sc_if.if_counters, ifc_oerrors);
1549 pfsyncstat_inc(pfsyncs_oerrors);
1550 }
1551 }
1552
1553 static void
pfsync_slice_tmo(void * arg)1554 pfsync_slice_tmo(void *arg)
1555 {
1556 struct pfsync_slice *s = arg;
1557
1558 task_add(s->s_softnet, &s->s_task);
1559 }
1560
1561 static void
pfsync_slice_sched(struct pfsync_slice * s)1562 pfsync_slice_sched(struct pfsync_slice *s)
1563 {
1564 s->s_stat_task_add++;
1565 task_add(s->s_softnet, &s->s_task);
1566 }
1567
1568 static void
pfsync_slice_task(void * arg)1569 pfsync_slice_task(void *arg)
1570 {
1571 struct pfsync_slice *s = arg;
1572 struct mbuf *m;
1573
1574 mtx_enter(&s->s_mtx);
1575 s->s_stat_task_run++;
1576
1577 m = pfsync_slice_write(s);
1578 mtx_leave(&s->s_mtx);
1579 if (m != NULL) {
1580 NET_LOCK();
1581 pfsync_sendout(s->s_pfsync, m);
1582 NET_UNLOCK();
1583 }
1584 }
1585
1586 static void
pfsync_slice_sendq(void * arg)1587 pfsync_slice_sendq(void *arg)
1588 {
1589 struct pfsync_slice *s = arg;
1590 struct mbuf_list ml;
1591 struct mbuf *m;
1592
1593 mq_delist(&s->s_sendq, &ml);
1594 if (ml_empty(&ml))
1595 return;
1596
1597 mtx_enter(&s->s_mtx);
1598 s->s_stat_dequeue++;
1599 mtx_leave(&s->s_mtx);
1600
1601 NET_LOCK();
1602 while ((m = ml_dequeue(&ml)) != NULL)
1603 pfsync_sendout(s->s_pfsync, m);
1604 NET_UNLOCK();
1605 }
1606
1607 static void
pfsync_q_ins(struct pfsync_slice * s,struct pf_state * st,unsigned int q)1608 pfsync_q_ins(struct pfsync_slice *s, struct pf_state *st, unsigned int q)
1609 {
1610 size_t nlen = pfsync_qs[q].len;
1611 struct mbuf *m = NULL;
1612
1613 MUTEX_ASSERT_LOCKED(&s->s_mtx);
1614 KASSERT(st->sync_state == PFSYNC_S_NONE);
1615 KASSERT(s->s_len >= PFSYNC_MINPKT);
1616
1617 if (TAILQ_EMPTY(&s->s_qs[q]))
1618 nlen += sizeof(struct pfsync_subheader);
1619
1620 if (s->s_len + nlen > s->s_pfsync->sc_if.if_mtu) {
1621 m = pfsync_slice_write(s);
1622 if (m != NULL) {
1623 s->s_stat_enqueue++;
1624 if (mq_enqueue(&s->s_sendq, m) == 0)
1625 task_add(s->s_softnet, &s->s_send);
1626 }
1627
1628 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len;
1629 }
1630
1631 s->s_len += nlen;
1632 pf_state_ref(st);
1633 TAILQ_INSERT_TAIL(&s->s_qs[q], st, sync_list);
1634 st->sync_state = q;
1635
1636 if (!timeout_pending(&s->s_tmo))
1637 timeout_add_sec(&s->s_tmo, 1);
1638 }
1639
1640 static void
pfsync_q_del(struct pfsync_slice * s,struct pf_state * st)1641 pfsync_q_del(struct pfsync_slice *s, struct pf_state *st)
1642 {
1643 unsigned int q = st->sync_state;
1644
1645 MUTEX_ASSERT_LOCKED(&s->s_mtx);
1646 KASSERT(st->sync_state < PFSYNC_S_NONE);
1647
1648 st->sync_state = PFSYNC_S_NONE;
1649 TAILQ_REMOVE(&s->s_qs[q], st, sync_list);
1650 pf_state_unref(st);
1651 s->s_len -= pfsync_qs[q].len;
1652
1653 if (TAILQ_EMPTY(&s->s_qs[q]))
1654 s->s_len -= sizeof(struct pfsync_subheader);
1655 }
1656
1657 /*
1658 * the pfsync hooks that pf calls
1659 */
1660
1661 void
pfsync_init_state(struct pf_state * st,const struct pf_state_key * skw,const struct pf_state_key * sks,int flags)1662 pfsync_init_state(struct pf_state *st, const struct pf_state_key *skw,
1663 const struct pf_state_key *sks, int flags)
1664 {
1665 /* this is called before pf_state_insert */
1666
1667 if (skw->proto == IPPROTO_PFSYNC)
1668 SET(st->state_flags, PFSTATE_NOSYNC);
1669
1670 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) {
1671 st->sync_state = PFSYNC_S_DEAD;
1672 return;
1673 }
1674
1675 if (ISSET(flags, PFSYNC_SI_IOCTL)) {
1676 /* all good */
1677 return;
1678 }
1679
1680 /* state came off the wire */
1681 if (ISSET(flags, PFSYNC_SI_PFSYNC)) {
1682 if (ISSET(st->state_flags, PFSTATE_ACK)) {
1683 CLR(st->state_flags, PFSTATE_ACK);
1684
1685 /* peer wants an iack, not an insert */
1686 st->sync_state = PFSYNC_S_SYNC;
1687 } else
1688 st->sync_state = PFSYNC_S_PFSYNC;
1689 }
1690 }
1691
1692 void
pfsync_insert_state(struct pf_state * st)1693 pfsync_insert_state(struct pf_state *st)
1694 {
1695 struct pfsync_softc *sc;
1696
1697 MUTEX_ASSERT_UNLOCKED(&st->mtx);
1698
1699 if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1700 st->sync_state == PFSYNC_S_DEAD)
1701 return;
1702
1703 smr_read_enter();
1704 sc = SMR_PTR_GET(&pfsyncif);
1705 if (sc != NULL) {
1706 struct pfsync_slice *s = pfsync_slice_enter(sc, st);
1707
1708 switch (st->sync_state) {
1709 case PFSYNC_S_UPD_C:
1710 /* we must have lost a race after insert */
1711 pfsync_q_del(s, st);
1712 /* FALLTHROUGH */
1713 case PFSYNC_S_NONE:
1714 pfsync_q_ins(s, st, PFSYNC_S_INS);
1715 break;
1716 case PFSYNC_S_SYNC:
1717 st->sync_state = PFSYNC_S_NONE; /* gross */
1718 pfsync_q_ins(s, st, PFSYNC_S_IACK);
1719 pfsync_slice_sched(s); /* the peer is waiting */
1720 break;
1721 case PFSYNC_S_PFSYNC:
1722 /* state was just inserted by pfsync */
1723 st->sync_state = PFSYNC_S_NONE;
1724 break;
1725 default:
1726 panic("%s: state %p unexpected sync_state %d",
1727 __func__, st, st->sync_state);
1728 /* NOTREACHED */
1729 }
1730
1731 pfsync_slice_leave(sc, s);
1732 }
1733 smr_read_leave();
1734 }
1735
1736 void
pfsync_update_state(struct pf_state * st)1737 pfsync_update_state(struct pf_state *st)
1738 {
1739 struct pfsync_softc *sc;
1740
1741 MUTEX_ASSERT_UNLOCKED(&st->mtx);
1742
1743 if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1744 st->sync_state == PFSYNC_S_DEAD)
1745 return;
1746
1747 smr_read_enter();
1748 sc = SMR_PTR_GET(&pfsyncif);
1749 if (sc != NULL) {
1750 struct pfsync_slice *s = pfsync_slice_enter(sc, st);
1751 int sync = 0;
1752
1753 switch (st->sync_state) {
1754 case PFSYNC_S_UPD_C:
1755 case PFSYNC_S_UPD:
1756 /* we're already handling it */
1757 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) {
1758 st->sync_updates++;
1759 if (st->sync_updates >= sc->sc_maxupdates)
1760 sync = 1;
1761 }
1762 /* FALLTHROUGH */
1763 case PFSYNC_S_INS:
1764 case PFSYNC_S_DEL:
1765 case PFSYNC_S_DEAD:
1766 break;
1767
1768 case PFSYNC_S_IACK:
1769 pfsync_q_del(s, st);
1770 /* FALLTHROUGH */
1771 case PFSYNC_S_NONE:
1772 pfsync_q_ins(s, st, PFSYNC_S_UPD_C);
1773 st->sync_updates = 0;
1774 break;
1775 default:
1776 panic("%s: state %p unexpected sync_state %d",
1777 __func__, st, st->sync_state);
1778 /* NOTREACHED */
1779 }
1780
1781 if (!sync && (getuptime() - st->pfsync_time) < 2)
1782 sync = 1;
1783
1784 if (sync)
1785 pfsync_slice_sched(s);
1786 pfsync_slice_leave(sc, s);
1787 }
1788 smr_read_leave();
1789 }
1790
1791 void
pfsync_delete_state(struct pf_state * st)1792 pfsync_delete_state(struct pf_state *st)
1793 {
1794 struct pfsync_softc *sc;
1795
1796 MUTEX_ASSERT_UNLOCKED(&st->mtx);
1797
1798 if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1799 st->sync_state == PFSYNC_S_DEAD)
1800 return;
1801
1802 smr_read_enter();
1803 sc = SMR_PTR_GET(&pfsyncif);
1804 if (sc != NULL) {
1805 struct pfsync_slice *s = pfsync_slice_enter(sc, st);
1806
1807 switch (st->sync_state) {
1808 case PFSYNC_S_INS:
1809 /* let's pretend this never happened */
1810 pfsync_q_del(s, st);
1811 break;
1812
1813 case PFSYNC_S_UPD_C:
1814 case PFSYNC_S_UPD:
1815 case PFSYNC_S_IACK:
1816 pfsync_q_del(s, st);
1817 /* FALLTHROUGH */
1818 case PFSYNC_S_NONE:
1819 pfsync_q_ins(s, st, PFSYNC_S_DEL);
1820 st->sync_updates = 0;
1821 break;
1822 case PFSYNC_S_DEL:
1823 case PFSYNC_S_DEAD:
1824 /* XXX we should count this */
1825 break;
1826 default:
1827 panic("%s: state %p unexpected sync_state %d",
1828 __func__, st, st->sync_state);
1829 /* NOTREACHED */
1830 }
1831
1832 pfsync_slice_leave(sc, s);
1833 }
1834 smr_read_leave();
1835 }
1836
1837 struct pfsync_subh_clr {
1838 struct pfsync_subheader subh;
1839 struct pfsync_clr clr;
1840 } __packed __aligned(4);
1841
1842 void
pfsync_clear_states(u_int32_t creatorid,const char * ifname)1843 pfsync_clear_states(u_int32_t creatorid, const char *ifname)
1844 {
1845 struct pfsync_softc *sc;
1846 struct pfsync_subh_clr *h;
1847 struct mbuf *m;
1848 unsigned int hlen, mlen;
1849
1850 smr_read_enter();
1851 sc = SMR_PTR_GET(&pfsyncif);
1852 if (sc != NULL)
1853 refcnt_take(&sc->sc_refs);
1854 smr_read_leave();
1855
1856 if (sc == NULL)
1857 return;
1858
1859 hlen = sizeof(sc->sc_template) +
1860 sizeof(struct pfsync_header) +
1861 sizeof(*h);
1862
1863 mlen = max_linkhdr + hlen;
1864
1865 m = m_gethdr(M_DONTWAIT, MT_DATA);
1866 if (m == NULL) {
1867 /* count error */
1868 goto leave;
1869 }
1870
1871 if (mlen > MHLEN) {
1872 MCLGETL(m, M_DONTWAIT, mlen);
1873 if (!ISSET(m->m_flags, M_EXT)) {
1874 m_freem(m);
1875 goto leave;
1876 }
1877 }
1878
1879 m_align(m, sizeof(*h));
1880 h = mtod(m, struct pfsync_subh_clr *);
1881
1882 h->subh.action = PFSYNC_ACT_CLR;
1883 h->subh.len = sizeof(h->clr) >> 2;
1884 h->subh.count = htons(1);
1885
1886 strlcpy(h->clr.ifname, ifname, sizeof(h->clr.ifname));
1887 h->clr.creatorid = creatorid;
1888
1889 m->m_pkthdr.len = m->m_len = sizeof(*h);
1890 m = pfsync_encap(sc, m);
1891 if (m == NULL)
1892 goto leave;
1893
1894 pfsync_sendout(sc, m);
1895 leave:
1896 refcnt_rele_wake(&sc->sc_refs);
1897 }
1898
1899 int
pfsync_state_in_use(struct pf_state * st)1900 pfsync_state_in_use(struct pf_state *st)
1901 {
1902 struct pfsync_softc *sc;
1903 int rv = 0;
1904
1905 smr_read_enter();
1906 sc = SMR_PTR_GET(&pfsyncif);
1907 if (sc != NULL) {
1908 /*
1909 * pfsync bulk sends run inside
1910 * rw_enter_read(&pf_state_list.pfs_rwl), and this
1911 * code (pfsync_state_in_use) is only called from the
1912 * purge code inside
1913 * rw_enter_write(&pf_state_list.pfs_rwl). therefore,
1914 * those two sections are exclusive so we can safely
1915 * look at the bulk send pointers.
1916 */
1917 /* rw_assert_wrlock(&pf_state_list.pfs_rwl); */
1918 if (sc->sc_bulk_snd.snd_next == st ||
1919 sc->sc_bulk_snd.snd_tail == st)
1920 rv = 1;
1921 }
1922 smr_read_leave();
1923
1924 return (rv);
1925 }
1926
1927 int
pfsync_defer(struct pf_state * st,struct mbuf * m)1928 pfsync_defer(struct pf_state *st, struct mbuf *m)
1929 {
1930 struct pfsync_softc *sc;
1931 struct pfsync_slice *s;
1932 struct pfsync_deferral *pd;
1933 int sched = 0;
1934 int rv = 0;
1935
1936 if (ISSET(st->state_flags, PFSTATE_NOSYNC) ||
1937 ISSET(m->m_flags, M_BCAST|M_MCAST))
1938 return (0);
1939
1940 smr_read_enter();
1941 sc = SMR_PTR_GET(&pfsyncif);
1942 if (sc == NULL || !sc->sc_defer)
1943 goto leave;
1944
1945 pd = pool_get(&pfsync_deferrals_pool, M_NOWAIT);
1946 if (pd == NULL) {
1947 goto leave;
1948 }
1949
1950 s = pfsync_slice_enter(sc, st);
1951 s->s_stat_defer_add++;
1952
1953 pd->pd_st = pf_state_ref(st);
1954 pd->pd_m = m;
1955 pd->pd_deadline = getnsecuptime() + PFSYNC_DEFER_NSEC;
1956
1957 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
1958 st->sync_defer = pd;
1959
1960 sched = s->s_deferred++;
1961 TAILQ_INSERT_TAIL(&s->s_deferrals, pd, pd_entry);
1962
1963 if (sched == 0)
1964 timeout_add_nsec(&s->s_deferrals_tmo, PFSYNC_DEFER_NSEC);
1965 else if (sched >= PFSYNC_DEFER_LIMIT) {
1966 s->s_stat_defer_overlimit++;
1967 timeout_del(&s->s_deferrals_tmo);
1968 task_add(s->s_softnet, &s->s_deferrals_task);
1969 }
1970
1971 pfsync_slice_sched(s);
1972 pfsync_slice_leave(sc, s);
1973 rv = 1;
1974 leave:
1975 smr_read_leave();
1976
1977 return (rv);
1978 }
1979
1980 static void
pfsync_deferred(struct pfsync_softc * sc,struct pf_state * st)1981 pfsync_deferred(struct pfsync_softc *sc, struct pf_state *st)
1982 {
1983 struct pfsync_slice *s;
1984 struct pfsync_deferral *pd;
1985
1986 s = pfsync_slice_enter(sc, st);
1987
1988 pd = st->sync_defer;
1989 if (pd != NULL) {
1990 s->s_stat_defer_ack++;
1991
1992 TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
1993 s->s_deferred--;
1994
1995 st = pd->pd_st;
1996 st->sync_defer = NULL;
1997 }
1998 pfsync_slice_leave(sc, s);
1999
2000 if (pd != NULL)
2001 pfsync_defer_output(pd);
2002 }
2003
2004 static void
pfsync_deferrals_tmo(void * arg)2005 pfsync_deferrals_tmo(void *arg)
2006 {
2007 struct pfsync_slice *s = arg;
2008
2009 if (READ_ONCE(s->s_deferred) > 0)
2010 task_add(s->s_softnet, &s->s_deferrals_task);
2011 }
2012
2013 static void
pfsync_deferrals_task(void * arg)2014 pfsync_deferrals_task(void *arg)
2015 {
2016 struct pfsync_slice *s = arg;
2017 struct pfsync_deferral *pd;
2018 struct pf_state *st;
2019 uint64_t now, nsec = 0;
2020 struct pfsync_deferrals pds = TAILQ_HEAD_INITIALIZER(pds);
2021
2022 now = getnsecuptime();
2023
2024 mtx_enter(&s->s_mtx);
2025 s->s_stat_defer_run++; /* maybe move this into the loop */
2026 for (;;) {
2027 pd = TAILQ_FIRST(&s->s_deferrals);
2028 if (pd == NULL)
2029 break;
2030
2031 if (s->s_deferred < PFSYNC_DEFER_LIMIT &&
2032 now < pd->pd_deadline) {
2033 nsec = pd->pd_deadline - now;
2034 break;
2035 }
2036
2037 TAILQ_REMOVE(&s->s_deferrals, pd, pd_entry);
2038 s->s_deferred--;
2039
2040 /*
2041 * detach the pd from the state. the pd still refers
2042 * to the state though.
2043 */
2044 st = pd->pd_st;
2045 st->sync_defer = NULL;
2046
2047 TAILQ_INSERT_TAIL(&pds, pd, pd_entry);
2048 }
2049 mtx_leave(&s->s_mtx);
2050
2051 if (nsec > 0) {
2052 /* we were looking at a pd, but it wasn't old enough */
2053 timeout_add_nsec(&s->s_deferrals_tmo, nsec);
2054 }
2055
2056 if (TAILQ_EMPTY(&pds))
2057 return;
2058
2059 NET_LOCK();
2060 while ((pd = TAILQ_FIRST(&pds)) != NULL) {
2061 TAILQ_REMOVE(&pds, pd, pd_entry);
2062
2063 pfsync_defer_output(pd);
2064 }
2065 NET_UNLOCK();
2066 }
2067
2068 static void
pfsync_defer_output(struct pfsync_deferral * pd)2069 pfsync_defer_output(struct pfsync_deferral *pd)
2070 {
2071 struct pf_pdesc pdesc;
2072 struct pf_state *st = pd->pd_st;
2073
2074 if (st->rt == PF_ROUTETO) {
2075 if (pf_setup_pdesc(&pdesc, st->key[PF_SK_WIRE]->af,
2076 st->direction, NULL, pd->pd_m, NULL) != PF_PASS)
2077 return;
2078 switch (st->key[PF_SK_WIRE]->af) {
2079 case AF_INET:
2080 pf_route(&pdesc, st);
2081 break;
2082 #ifdef INET6
2083 case AF_INET6:
2084 pf_route6(&pdesc, st);
2085 break;
2086 #endif /* INET6 */
2087 default:
2088 unhandled_af(st->key[PF_SK_WIRE]->af);
2089 }
2090 pd->pd_m = pdesc.m;
2091 } else {
2092 switch (st->key[PF_SK_WIRE]->af) {
2093 case AF_INET:
2094 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 0);
2095 break;
2096 #ifdef INET6
2097 case AF_INET6:
2098 ip6_output(pd->pd_m, NULL, NULL, 0, NULL, NULL);
2099 break;
2100 #endif /* INET6 */
2101 default:
2102 unhandled_af(st->key[PF_SK_WIRE]->af);
2103 }
2104
2105 pd->pd_m = NULL;
2106 }
2107
2108 pf_state_unref(st);
2109 m_freem(pd->pd_m);
2110 pool_put(&pfsync_deferrals_pool, pd);
2111 }
2112
2113 struct pfsync_subh_bus {
2114 struct pfsync_subheader subh;
2115 struct pfsync_bus bus;
2116 } __packed __aligned(4);
2117
2118 static unsigned int
pfsync_bulk_snd_bus(struct pfsync_softc * sc,struct mbuf * m,const unsigned int space,uint32_t endtime,uint8_t status)2119 pfsync_bulk_snd_bus(struct pfsync_softc *sc,
2120 struct mbuf *m, const unsigned int space,
2121 uint32_t endtime, uint8_t status)
2122 {
2123 struct pfsync_subh_bus *h;
2124 unsigned int nlen;
2125
2126 nlen = m->m_len + sizeof(*h);
2127 if (space < nlen)
2128 return (0);
2129
2130 h = (struct pfsync_subh_bus *)(mtod(m, caddr_t) + m->m_len);
2131 memset(h, 0, sizeof(*h));
2132
2133 h->subh.action = PFSYNC_ACT_BUS;
2134 h->subh.len = sizeof(h->bus) >> 2;
2135 h->subh.count = htons(1);
2136
2137 h->bus.creatorid = pf_status.hostid;
2138 h->bus.endtime = htonl(endtime);
2139 h->bus.status = status;
2140
2141 m->m_len = nlen;
2142
2143 return (1);
2144 }
2145
2146 static unsigned int
pfsync_bulk_snd_states(struct pfsync_softc * sc,struct mbuf * m,const unsigned int space,unsigned int len)2147 pfsync_bulk_snd_states(struct pfsync_softc *sc,
2148 struct mbuf *m, const unsigned int space, unsigned int len)
2149 {
2150 struct pf_state *st;
2151 struct pfsync_state *sp;
2152 unsigned int nlen;
2153 unsigned int count = 0;
2154
2155 st = sc->sc_bulk_snd.snd_next;
2156
2157 for (;;) {
2158 nlen = len + sizeof(*sp);
2159 sp = (struct pfsync_state *)(mtod(m, caddr_t) + len);
2160 if (space < nlen)
2161 break;
2162
2163 mtx_enter(&st->mtx);
2164 pf_state_export(sp, st);
2165 mtx_leave(&st->mtx);
2166
2167 /* commit */
2168 count++;
2169 m->m_len = len = nlen;
2170
2171 if (st == sc->sc_bulk_snd.snd_tail) {
2172 if (pfsync_bulk_snd_bus(sc, m, space,
2173 0, PFSYNC_BUS_END) == 0) {
2174 /* couldn't fit the BUS */
2175 st = NULL;
2176 break;
2177 }
2178
2179 /* this BUS is done */
2180 pfsync_dprintf(sc, "bulk send done (%s)", __func__);
2181 sc->sc_bulk_snd.snd_again = 0; /* XXX */
2182 sc->sc_bulk_snd.snd_next = NULL;
2183 sc->sc_bulk_snd.snd_tail = NULL;
2184 return (count);
2185 }
2186
2187 st = TAILQ_NEXT(st, entry_list);
2188 }
2189
2190 /* there's still work to do */
2191 sc->sc_bulk_snd.snd_next = st;
2192 timeout_add_msec(&sc->sc_bulk_snd.snd_tmo, PFSYNC_BULK_SND_IVAL_MS);
2193
2194 return (count);
2195 }
2196
2197 static unsigned int
pfsync_bulk_snd_sub(struct pfsync_softc * sc,struct mbuf * m,const unsigned int space)2198 pfsync_bulk_snd_sub(struct pfsync_softc *sc,
2199 struct mbuf *m, const unsigned int space)
2200 {
2201 struct pfsync_subheader *subh;
2202 unsigned int count;
2203 unsigned int len, nlen;
2204
2205 len = m->m_len;
2206 nlen = len + sizeof(*subh);
2207 if (nlen > space)
2208 return (0);
2209
2210 subh = (struct pfsync_subheader *)(mtod(m, caddr_t) + len);
2211
2212 /*
2213 * pfsync_bulk_snd_states only updates m->m_len after
2214 * filling in a state after the offset we gave it.
2215 */
2216 count = pfsync_bulk_snd_states(sc, m, space, nlen);
2217 if (count == 0)
2218 return (0);
2219
2220 subh->action = PFSYNC_ACT_UPD;
2221 subh->len = sizeof(struct pfsync_state) >> 2;
2222 subh->count = htons(count);
2223
2224 return (count);
2225 }
2226
2227 static void
pfsync_bulk_snd_start(struct pfsync_softc * sc)2228 pfsync_bulk_snd_start(struct pfsync_softc *sc)
2229 {
2230 const unsigned int space = sc->sc_if.if_mtu -
2231 (sizeof(struct ip) + sizeof(struct pfsync_header));
2232 struct mbuf *m;
2233
2234 rw_enter_read(&pf_state_list.pfs_rwl);
2235
2236 rw_enter_write(&sc->sc_bulk_snd.snd_lock);
2237 if (sc->sc_bulk_snd.snd_next != NULL) {
2238 sc->sc_bulk_snd.snd_again = 1;
2239 goto leave;
2240 }
2241
2242 mtx_enter(&pf_state_list.pfs_mtx);
2243 sc->sc_bulk_snd.snd_next = TAILQ_FIRST(&pf_state_list.pfs_list);
2244 sc->sc_bulk_snd.snd_tail = TAILQ_LAST(&pf_state_list.pfs_list,
2245 pf_state_queue);
2246 mtx_leave(&pf_state_list.pfs_mtx);
2247
2248 m = m_gethdr(M_DONTWAIT, MT_DATA);
2249 if (m == NULL)
2250 goto leave;
2251
2252 MCLGETL(m, M_DONTWAIT, max_linkhdr + sc->sc_if.if_mtu);
2253 if (!ISSET(m->m_flags, M_EXT)) {
2254 /* some error++ */
2255 m_freem(m); /* drop */
2256 goto leave;
2257 }
2258
2259 m_align(m, space);
2260 m->m_len = 0;
2261
2262 if (sc->sc_bulk_snd.snd_tail == NULL) {
2263 pfsync_dprintf(sc, "bulk send empty (%s)", __func__);
2264
2265 /* list is empty */
2266 if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_END) == 0)
2267 panic("%s: mtu is too low", __func__);
2268 goto encap;
2269 }
2270
2271 pfsync_dprintf(sc, "bulk send start (%s)", __func__);
2272
2273 /* start a bulk update. */
2274 if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_START) == 0)
2275 panic("%s: mtu is too low", __func__);
2276
2277 /* fill it up with state updates. */
2278 pfsync_bulk_snd_sub(sc, m, space);
2279
2280 encap:
2281 m->m_pkthdr.len = m->m_len;
2282 m = pfsync_encap(sc, m);
2283 if (m == NULL)
2284 goto leave;
2285
2286 pfsync_sendout(sc, m);
2287
2288 leave:
2289 rw_exit_write(&sc->sc_bulk_snd.snd_lock);
2290
2291 rw_exit_read(&pf_state_list.pfs_rwl);
2292 }
2293
2294 static void
pfsync_bulk_snd_tmo(void * arg)2295 pfsync_bulk_snd_tmo(void *arg)
2296 {
2297 struct pfsync_softc *sc = arg;
2298 const unsigned int space = sc->sc_if.if_mtu -
2299 (sizeof(struct ip) + sizeof(struct pfsync_header));
2300 struct mbuf *m;
2301
2302 m = m_gethdr(M_DONTWAIT, MT_DATA);
2303 if (m == NULL) {
2304 /* some error++ */
2305 /* retry later */
2306 timeout_add_msec(&sc->sc_bulk_snd.snd_tmo,
2307 PFSYNC_BULK_SND_IVAL_MS);
2308 return;
2309 }
2310
2311 MCLGETL(m, M_DONTWAIT, max_linkhdr + sc->sc_if.if_mtu);
2312 if (!ISSET(m->m_flags, M_EXT)) {
2313 /* some error++ */
2314 m_freem(m);
2315 /* retry later */
2316 timeout_add_msec(&sc->sc_bulk_snd.snd_tmo,
2317 PFSYNC_BULK_SND_IVAL_MS);
2318 return;
2319 }
2320
2321 m_align(m, space);
2322 m->m_len = 0;
2323
2324 rw_enter_read(&pf_state_list.pfs_rwl);
2325 rw_enter_write(&sc->sc_bulk_snd.snd_lock);
2326
2327 if (sc->sc_bulk_snd.snd_next == NULL) {
2328 /* there was no space in the previous packet for a BUS END */
2329
2330 if (pfsync_bulk_snd_bus(sc, m, space, 0, PFSYNC_BUS_END) == 0)
2331 panic("%s: mtu is too low", __func__);
2332
2333 /* this bulk is done */
2334 pfsync_dprintf(sc, "bulk send done (%s)", __func__);
2335 sc->sc_bulk_snd.snd_again = 0; /* XXX */
2336 sc->sc_bulk_snd.snd_tail = NULL;
2337 } else {
2338 pfsync_dprintf(sc, "bulk send again (%s)", __func__);
2339
2340 /* fill it up with state updates. */
2341 pfsync_bulk_snd_sub(sc, m, space);
2342 }
2343
2344 m->m_pkthdr.len = m->m_len;
2345 m = pfsync_encap(sc, m);
2346
2347 rw_exit_write(&sc->sc_bulk_snd.snd_lock);
2348 rw_exit_read(&pf_state_list.pfs_rwl);
2349
2350 if (m != NULL) {
2351 NET_LOCK();
2352 pfsync_sendout(sc, m);
2353 NET_UNLOCK();
2354 }
2355 }
2356
2357 static void
pfsync_update_state_req(struct pfsync_softc * sc,struct pf_state * st)2358 pfsync_update_state_req(struct pfsync_softc *sc, struct pf_state *st)
2359 {
2360 struct pfsync_slice *s = pfsync_slice_enter(sc, st);
2361
2362 switch (st->sync_state) {
2363 case PFSYNC_S_UPD_C:
2364 case PFSYNC_S_IACK:
2365 pfsync_q_del(s, st);
2366 /* FALLTHROUGH */
2367 case PFSYNC_S_NONE:
2368 pfsync_q_ins(s, st, PFSYNC_S_UPD);
2369 break;
2370
2371 case PFSYNC_S_INS:
2372 case PFSYNC_S_UPD:
2373 case PFSYNC_S_DEL:
2374 /* we're already handling it */
2375 break;
2376 default:
2377 panic("%s: state %p unexpected sync_state %d",
2378 __func__, st, st->sync_state);
2379 }
2380
2381 pfsync_slice_sched(s);
2382 pfsync_slice_leave(sc, s);
2383 }
2384
2385 #if defined(IPSEC)
2386 static void
pfsync_out_tdb(struct tdb * tdb,void * buf)2387 pfsync_out_tdb(struct tdb *tdb, void *buf)
2388 {
2389 struct pfsync_tdb *ut = buf;
2390
2391 memset(ut, 0, sizeof(*ut));
2392 ut->spi = tdb->tdb_spi;
2393 memcpy(&ut->dst, &tdb->tdb_dst, sizeof(ut->dst));
2394 /*
2395 * When a failover happens, the master's rpl is probably above
2396 * what we see here (we may be up to a second late), so
2397 * increase it a bit for outbound tdbs to manage most such
2398 * situations.
2399 *
2400 * For now, just add an offset that is likely to be larger
2401 * than the number of packets we can see in one second. The RFC
2402 * just says the next packet must have a higher seq value.
2403 *
2404 * XXX What is a good algorithm for this? We could use
2405 * a rate-determined increase, but to know it, we would have
2406 * to extend struct tdb.
2407 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb
2408 * will soon be replaced anyway. For now, just don't handle
2409 * this edge case.
2410 */
2411 #define RPL_INCR 16384
2412 ut->rpl = htobe64(tdb->tdb_rpl +
2413 (ISSET(tdb->tdb_flags, TDBF_PFSYNC_RPL) ? RPL_INCR : 0));
2414 ut->cur_bytes = htobe64(tdb->tdb_cur_bytes);
2415 ut->sproto = tdb->tdb_sproto;
2416 ut->rdomain = htons(tdb->tdb_rdomain);
2417 }
2418
2419 static struct pfsync_slice *
pfsync_slice_enter_tdb(struct pfsync_softc * sc,const struct tdb * t)2420 pfsync_slice_enter_tdb(struct pfsync_softc *sc, const struct tdb *t)
2421 {
2422 /*
2423 * just use the first slice for all ipsec (for now) until
2424 * it's more obvious what property (eg, spi) we can distribute
2425 * tdbs over slices with.
2426 */
2427 struct pfsync_slice *s = &sc->sc_slices[0];
2428
2429 if (!mtx_enter_try(&s->s_mtx)) {
2430 mtx_enter(&s->s_mtx);
2431 s->s_stat_contended++;
2432 }
2433 s->s_stat_locks++;
2434
2435 return (s);
2436 }
2437
2438 static void
pfsync_tdb_ins(struct pfsync_slice * s,struct tdb * tdb)2439 pfsync_tdb_ins(struct pfsync_slice *s, struct tdb *tdb)
2440 {
2441 size_t nlen = sizeof(struct pfsync_tdb);
2442 struct mbuf *m = NULL;
2443
2444 KASSERT(s->s_len >= PFSYNC_MINPKT);
2445
2446 MUTEX_ASSERT_LOCKED(&s->s_mtx);
2447 MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
2448
2449 if (TAILQ_EMPTY(&s->s_tdb_q))
2450 nlen += sizeof(struct pfsync_subheader);
2451
2452 if (s->s_len + nlen > s->s_pfsync->sc_if.if_mtu) {
2453 m = pfsync_slice_write(s);
2454 if (m != NULL) {
2455 s->s_stat_enqueue++;
2456 if (mq_enqueue(&s->s_sendq, m) == 0)
2457 task_add(s->s_softnet, &s->s_send);
2458 }
2459
2460 nlen = sizeof(struct pfsync_subheader) +
2461 sizeof(struct pfsync_tdb);
2462 }
2463
2464 s->s_len += nlen;
2465 TAILQ_INSERT_TAIL(&s->s_tdb_q, tdb, tdb_sync_entry);
2466 tdb->tdb_updates = 0;
2467
2468 if (!timeout_pending(&s->s_tmo))
2469 timeout_add_sec(&s->s_tmo, 1);
2470 }
2471
2472 static void
pfsync_tdb_del(struct pfsync_slice * s,struct tdb * tdb)2473 pfsync_tdb_del(struct pfsync_slice *s, struct tdb *tdb)
2474 {
2475 MUTEX_ASSERT_LOCKED(&s->s_mtx);
2476 MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
2477
2478 TAILQ_REMOVE(&s->s_tdb_q, tdb, tdb_sync_entry);
2479
2480 s->s_len -= sizeof(struct pfsync_tdb);
2481 if (TAILQ_EMPTY(&s->s_tdb_q))
2482 s->s_len -= sizeof(struct pfsync_subheader);
2483 }
2484
2485 /*
2486 * the reference that pfsync has to a tdb is accounted for by the
2487 * TDBF_PFSYNC flag, not by tdb_ref/tdb_unref. tdb_delete_tdb() is
2488 * called after all other references to a tdb are dropped (with
2489 * tdb_unref) as part of the tdb_free().
2490 *
2491 * tdb_free() needs to wait for pfsync to let go of the tdb though,
2492 * which would be best handled by a reference count, but tdb_free
2493 * needs the NET_LOCK which pfsync is already fighting with. instead
2494 * use the TDBF_PFSYNC_SNAPPED flag to coordinate the pfsync write/drop
2495 * with tdb_free.
2496 */
2497
2498 void
pfsync_update_tdb(struct tdb * tdb,int output)2499 pfsync_update_tdb(struct tdb *tdb, int output)
2500 {
2501 struct pfsync_softc *sc;
2502
2503 MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
2504
2505 smr_read_enter();
2506 sc = SMR_PTR_GET(&pfsyncif);
2507 if (sc != NULL) {
2508 struct pfsync_slice *s = pfsync_slice_enter_tdb(sc, tdb);
2509
2510 /* TDBF_PFSYNC is only changed while the slice mtx is held */
2511 if (!ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
2512 mtx_enter(&tdb->tdb_mtx);
2513 SET(tdb->tdb_flags, TDBF_PFSYNC);
2514 mtx_leave(&tdb->tdb_mtx);
2515
2516 pfsync_tdb_ins(s, tdb);
2517 } else if (++tdb->tdb_updates >= sc->sc_maxupdates)
2518 pfsync_slice_sched(s);
2519
2520 /* XXX no sync timestamp on tdbs to check */
2521
2522 pfsync_slice_leave(sc, s);
2523 }
2524 smr_read_leave();
2525 }
2526
2527 void
pfsync_delete_tdb(struct tdb * tdb)2528 pfsync_delete_tdb(struct tdb *tdb)
2529 {
2530 struct pfsync_softc *sc;
2531
2532 MUTEX_ASSERT_UNLOCKED(&tdb->tdb_mtx);
2533
2534 smr_read_enter();
2535 sc = SMR_PTR_GET(&pfsyncif);
2536 if (sc != NULL) {
2537 struct pfsync_slice *s = pfsync_slice_enter_tdb(sc, tdb);
2538
2539 /* TDBF_PFSYNC is only changed while the slice mtx is held */
2540 if (ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
2541 pfsync_tdb_del(s, tdb);
2542
2543 mtx_enter(&tdb->tdb_mtx);
2544 CLR(tdb->tdb_flags, TDBF_PFSYNC);
2545 mtx_leave(&tdb->tdb_mtx);
2546 }
2547
2548 pfsync_slice_leave(sc, s);
2549 }
2550 smr_read_leave();
2551
2552 /*
2553 * handle pfsync_slice_drop being called from pfsync_down
2554 * and the smr/slice access above won't work.
2555 */
2556
2557 mtx_enter(&tdb->tdb_mtx);
2558 SET(tdb->tdb_flags, TDBF_PFSYNC_SNAPPED); /* like a thanos snap */
2559 while (ISSET(tdb->tdb_flags, TDBF_PFSYNC)) {
2560 msleep_nsec(&tdb->tdb_updates, &tdb->tdb_mtx, PWAIT,
2561 "tdbfree", INFSLP);
2562 }
2563 mtx_leave(&tdb->tdb_mtx);
2564 }
2565 #endif /* defined(IPSEC) */
2566
2567 struct pfsync_act {
2568 void (*in)(struct pfsync_softc *, const caddr_t,
2569 unsigned int, unsigned int);
2570 size_t len;
2571 };
2572
2573 static void pfsync_in_clr(struct pfsync_softc *,
2574 const caddr_t, unsigned int, unsigned int);
2575 static void pfsync_in_iack(struct pfsync_softc *,
2576 const caddr_t, unsigned int, unsigned int);
2577 static void pfsync_in_upd_c(struct pfsync_softc *,
2578 const caddr_t, unsigned int, unsigned int);
2579 static void pfsync_in_ureq(struct pfsync_softc *,
2580 const caddr_t, unsigned int, unsigned int);
2581 static void pfsync_in_del(struct pfsync_softc *,
2582 const caddr_t, unsigned int, unsigned int);
2583 static void pfsync_in_del_c(struct pfsync_softc *,
2584 const caddr_t, unsigned int, unsigned int);
2585 static void pfsync_in_bus(struct pfsync_softc *,
2586 const caddr_t, unsigned int, unsigned int);
2587 static void pfsync_in_tdb(struct pfsync_softc *,
2588 const caddr_t, unsigned int, unsigned int);
2589 static void pfsync_in_ins(struct pfsync_softc *,
2590 const caddr_t, unsigned int, unsigned int);
2591 static void pfsync_in_upd(struct pfsync_softc *,
2592 const caddr_t, unsigned int, unsigned int);
2593
2594 static const struct pfsync_act pfsync_acts[] = {
2595 [PFSYNC_ACT_CLR] =
2596 { pfsync_in_clr, sizeof(struct pfsync_clr) },
2597 [PFSYNC_ACT_INS_ACK] =
2598 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) },
2599 [PFSYNC_ACT_UPD_C] =
2600 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) },
2601 [PFSYNC_ACT_UPD_REQ] =
2602 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) },
2603 [PFSYNC_ACT_DEL] =
2604 { pfsync_in_del, sizeof(struct pfsync_state) },
2605 [PFSYNC_ACT_DEL_C] =
2606 { pfsync_in_del_c, sizeof(struct pfsync_del_c) },
2607 [PFSYNC_ACT_BUS] =
2608 { pfsync_in_bus, sizeof(struct pfsync_bus) },
2609 [PFSYNC_ACT_INS] =
2610 { pfsync_in_ins, sizeof(struct pfsync_state) },
2611 [PFSYNC_ACT_UPD] =
2612 { pfsync_in_upd, sizeof(struct pfsync_state) },
2613 [PFSYNC_ACT_TDB] =
2614 { pfsync_in_tdb, sizeof(struct pfsync_tdb) },
2615 };
2616
2617 static void
pfsync_in_skip(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)2618 pfsync_in_skip(struct pfsync_softc *sc,
2619 const caddr_t buf, unsigned int mlen, unsigned int count)
2620 {
2621 /* nop */
2622 }
2623
2624 static struct mbuf *
pfsync_input(struct mbuf * m,uint8_t ttl,unsigned int hlen)2625 pfsync_input(struct mbuf *m, uint8_t ttl, unsigned int hlen)
2626 {
2627 struct pfsync_softc *sc;
2628 struct pfsync_header *ph;
2629 struct pfsync_subheader *subh;
2630 unsigned int len;
2631 void (*in)(struct pfsync_softc *,
2632 const caddr_t, unsigned int, unsigned int);
2633
2634 pfsyncstat_inc(pfsyncs_ipackets);
2635
2636 if (!pf_status.running)
2637 return (m);
2638
2639 /*
2640 * pfsyncif is only set if it is up and running correctly.
2641 */
2642 smr_read_enter();
2643 sc = SMR_PTR_GET(&pfsyncif);
2644 if (sc == NULL)
2645 goto leave;
2646
2647 if (sc->sc_sync_ifidx != m->m_pkthdr.ph_ifidx) {
2648 pfsyncstat_inc(pfsyncs_badif);
2649 goto leave;
2650 }
2651
2652 /* verify that the IP TTL is 255. */
2653 if (ttl != PFSYNC_DFLTTL) {
2654 pfsyncstat_inc(pfsyncs_badttl);
2655 goto leave;
2656 }
2657
2658 m_adj(m, hlen);
2659
2660 if (m->m_pkthdr.len < sizeof(*ph)) {
2661 pfsyncstat_inc(pfsyncs_hdrops);
2662 goto leave;
2663 }
2664 if (m->m_len < sizeof(*ph)) {
2665 m = m_pullup(m, sizeof(*ph));
2666 if (m == NULL)
2667 goto leave;
2668 }
2669
2670 ph = mtod(m, struct pfsync_header *);
2671 if (ph->version != PFSYNC_VERSION) {
2672 pfsyncstat_inc(pfsyncs_badver);
2673 goto leave;
2674 }
2675
2676 len = ntohs(ph->len);
2677 if (m->m_pkthdr.len < len) {
2678 pfsyncstat_inc(pfsyncs_badlen);
2679 goto leave;
2680 }
2681 if (m->m_pkthdr.len > len)
2682 m->m_pkthdr.len = len;
2683
2684 /* ok, it's serious now */
2685 refcnt_take(&sc->sc_refs);
2686 smr_read_leave();
2687
2688 counters_pkt(sc->sc_if.if_counters, ifc_ipackets, ifc_ibytes, len);
2689
2690 m_adj(m, sizeof(*ph));
2691
2692 while (m->m_pkthdr.len >= sizeof(*subh)) {
2693 unsigned int action, mlen, count;
2694
2695 if (m->m_len < sizeof(*subh)) {
2696 m = m_pullup(m, sizeof(*subh));
2697 if (m == NULL)
2698 goto rele;
2699 }
2700 subh = mtod(m, struct pfsync_subheader *);
2701
2702 action = subh->action;
2703 mlen = subh->len << 2;
2704 count = ntohs(subh->count);
2705
2706 if (action >= PFSYNC_ACT_MAX ||
2707 action >= nitems(pfsync_acts) ||
2708 mlen < pfsync_acts[subh->action].len) {
2709 /*
2710 * subheaders are always followed by at least one
2711 * message, so if the peer is new
2712 * enough to tell us how big its messages are then we
2713 * know enough to skip them.
2714 */
2715 if (count == 0 || mlen == 0) {
2716 pfsyncstat_inc(pfsyncs_badact);
2717 goto rele;
2718 }
2719
2720 in = pfsync_in_skip;
2721 } else {
2722 in = pfsync_acts[action].in;
2723 if (in == NULL)
2724 in = pfsync_in_skip;
2725 }
2726
2727 m_adj(m, sizeof(*subh));
2728 len = mlen * count;
2729 if (len > m->m_pkthdr.len) {
2730 pfsyncstat_inc(pfsyncs_badlen);
2731 goto rele;
2732 }
2733 if (m->m_len < len) {
2734 m = m_pullup(m, len);
2735 if (m == NULL)
2736 goto rele;
2737 }
2738
2739 (*in)(sc, mtod(m, caddr_t), mlen, count);
2740 m_adj(m, len);
2741 }
2742
2743 rele:
2744 refcnt_rele_wake(&sc->sc_refs);
2745 return (m);
2746
2747 leave:
2748 smr_read_leave();
2749 return (m);
2750 }
2751
2752 static void
pfsync_in_clr(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)2753 pfsync_in_clr(struct pfsync_softc *sc,
2754 const caddr_t buf, unsigned int mlen, unsigned int count)
2755 {
2756 const struct pfsync_clr *clr;
2757 struct pf_state *head, *tail, *st, *next;
2758 struct pfi_kif *kif;
2759 uint32_t creatorid;
2760 unsigned int i;
2761
2762 rw_enter_read(&pf_state_list.pfs_rwl);
2763
2764 /* get a view of the state list */
2765 mtx_enter(&pf_state_list.pfs_mtx);
2766 head = TAILQ_FIRST(&pf_state_list.pfs_list);
2767 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
2768 mtx_leave(&pf_state_list.pfs_mtx);
2769
2770 PF_LOCK();
2771 for (i = 0; i < count; i++) {
2772 clr = (struct pfsync_clr *)(buf + i * mlen);
2773
2774 creatorid = clr->creatorid;
2775 if (clr->ifname[0] == '\0')
2776 kif = NULL;
2777 else {
2778 kif = pfi_kif_find(clr->ifname);
2779 if (kif == NULL)
2780 continue;
2781 }
2782
2783 st = NULL;
2784 next = head;
2785
2786 PF_STATE_ENTER_WRITE();
2787 while (st != tail) {
2788 st = next;
2789 next = TAILQ_NEXT(st, entry_list);
2790
2791 if (creatorid != st->creatorid)
2792 continue;
2793 if (kif != NULL && kif != st->kif)
2794 continue;
2795
2796 mtx_enter(&st->mtx);
2797 SET(st->state_flags, PFSTATE_NOSYNC);
2798 mtx_leave(&st->mtx);
2799 pf_remove_state(st);
2800 }
2801 PF_STATE_EXIT_WRITE();
2802 }
2803 PF_UNLOCK();
2804
2805 rw_exit_read(&pf_state_list.pfs_rwl);
2806 }
2807
2808 static void
pfsync_in_ins(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)2809 pfsync_in_ins(struct pfsync_softc *sc,
2810 const caddr_t buf, unsigned int mlen, unsigned int count)
2811 {
2812 const struct pfsync_state *sp;
2813 sa_family_t af1, af2;
2814 unsigned int i;
2815
2816 PF_LOCK();
2817 for (i = 0; i < count; i++) {
2818 sp = (struct pfsync_state *)(buf + mlen * i);
2819 af1 = sp->key[0].af;
2820 af2 = sp->key[1].af;
2821
2822 /* check for invalid values */
2823 if (sp->timeout >= PFTM_MAX ||
2824 sp->src.state > PF_TCPS_PROXY_DST ||
2825 sp->dst.state > PF_TCPS_PROXY_DST ||
2826 sp->direction > PF_OUT ||
2827 (((af1 || af2) &&
2828 ((af1 != AF_INET && af1 != AF_INET6) ||
2829 (af2 != AF_INET && af2 != AF_INET6))) ||
2830 (sp->af != AF_INET && sp->af != AF_INET6))) {
2831 pfsyncstat_inc(pfsyncs_badval);
2832 continue;
2833 }
2834
2835 if (pf_state_import(sp, PFSYNC_SI_PFSYNC) == ENOMEM) {
2836 /* drop out, but process the rest of the actions */
2837 break;
2838 }
2839 }
2840 PF_UNLOCK();
2841 }
2842
2843 static void
pfsync_in_iack(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)2844 pfsync_in_iack(struct pfsync_softc *sc,
2845 const caddr_t buf, unsigned int mlen, unsigned int count)
2846 {
2847 const struct pfsync_ins_ack *ia;
2848 struct pf_state_cmp id_key;
2849 struct pf_state *st;
2850 unsigned int i;
2851
2852 for (i = 0; i < count; i++) {
2853 ia = (struct pfsync_ins_ack *)(buf + mlen * i);
2854
2855 id_key.id = ia->id;
2856 id_key.creatorid = ia->creatorid;
2857
2858 PF_STATE_ENTER_READ();
2859 st = pf_find_state_byid(&id_key);
2860 pf_state_ref(st);
2861 PF_STATE_EXIT_READ();
2862 if (st == NULL)
2863 continue;
2864
2865 if (READ_ONCE(st->sync_defer) != NULL)
2866 pfsync_deferred(sc, st);
2867
2868 pf_state_unref(st);
2869 }
2870 }
2871
2872 static int
pfsync_upd_tcp(struct pf_state * st,const struct pfsync_state_peer * src,const struct pfsync_state_peer * dst)2873 pfsync_upd_tcp(struct pf_state *st, const struct pfsync_state_peer *src,
2874 const struct pfsync_state_peer *dst)
2875 {
2876 int sync = 0;
2877
2878 /*
2879 * The state should never go backwards except
2880 * for syn-proxy states. Neither should the
2881 * sequence window slide backwards.
2882 */
2883 if ((st->src.state > src->state &&
2884 (st->src.state < PF_TCPS_PROXY_SRC ||
2885 src->state >= PF_TCPS_PROXY_SRC)) ||
2886
2887 (st->src.state == src->state &&
2888 SEQ_GT(st->src.seqlo, ntohl(src->seqlo))))
2889 sync++;
2890 else
2891 pf_state_peer_ntoh(src, &st->src);
2892
2893 if ((st->dst.state > dst->state) ||
2894
2895 (st->dst.state == dst->state &&
2896 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo))))
2897 sync++;
2898 else
2899 pf_state_peer_ntoh(dst, &st->dst);
2900
2901 return (sync);
2902 }
2903
2904 static void
pfsync_in_updates(struct pfsync_softc * sc,struct pf_state * st,const struct pfsync_state_peer * src,const struct pfsync_state_peer * dst,uint8_t timeout)2905 pfsync_in_updates(struct pfsync_softc *sc, struct pf_state *st,
2906 const struct pfsync_state_peer *src, const struct pfsync_state_peer *dst,
2907 uint8_t timeout)
2908 {
2909 struct pf_state_scrub *sscrub = NULL;
2910 struct pf_state_scrub *dscrub = NULL;
2911 int sync;
2912
2913 if (src->scrub.scrub_flag && st->src.scrub == NULL) {
2914 sscrub = pf_state_scrub_get();
2915 if (sscrub == NULL) {
2916 /* inc error? */
2917 goto out;
2918 }
2919 }
2920 if (dst->scrub.scrub_flag && st->dst.scrub == NULL) {
2921 dscrub = pf_state_scrub_get();
2922 if (dscrub == NULL) {
2923 /* inc error? */
2924 goto out;
2925 }
2926 }
2927
2928 if (READ_ONCE(st->sync_defer) != NULL)
2929 pfsync_deferred(sc, st);
2930
2931 mtx_enter(&st->mtx);
2932
2933 /* attach the scrub memory if needed */
2934 if (sscrub != NULL && st->src.scrub == NULL) {
2935 st->src.scrub = sscrub;
2936 sscrub = NULL;
2937 }
2938 if (dscrub != NULL && st->dst.scrub == NULL) {
2939 st->dst.scrub = dscrub;
2940 dscrub = NULL;
2941 }
2942
2943 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP)
2944 sync = pfsync_upd_tcp(st, src, dst);
2945 else {
2946 sync = 0;
2947
2948 /*
2949 * Non-TCP protocol state machine always go
2950 * forwards
2951 */
2952 if (st->src.state > src->state)
2953 sync++;
2954 else
2955 pf_state_peer_ntoh(src, &st->src);
2956
2957 if (st->dst.state > dst->state)
2958 sync++;
2959 else
2960 pf_state_peer_ntoh(dst, &st->dst);
2961 }
2962
2963 st->pfsync_time = getuptime();
2964 if (sync < 2) {
2965 st->expire = st->pfsync_time;
2966 st->timeout = timeout;
2967 }
2968
2969 mtx_leave(&st->mtx);
2970
2971 if (sync) {
2972 pfsyncstat_inc(pfsyncs_stale);
2973 pfsync_update_state(st);
2974 }
2975
2976 out:
2977 if (sscrub != NULL)
2978 pf_state_scrub_put(sscrub);
2979 if (dscrub != NULL)
2980 pf_state_scrub_put(dscrub);
2981 }
2982
2983
2984 static void
pfsync_in_upd(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)2985 pfsync_in_upd(struct pfsync_softc *sc,
2986 const caddr_t buf, unsigned int mlen, unsigned int count)
2987 {
2988 const struct pfsync_state *sp;
2989 struct pf_state_cmp id_key;
2990 struct pf_state *st;
2991 int error;
2992 unsigned int i;
2993
2994 for (i = 0; i < count; i++) {
2995 sp = (struct pfsync_state *)(buf + mlen * i);
2996
2997 /* check for invalid values */
2998 if (sp->timeout >= PFTM_MAX ||
2999 sp->src.state > PF_TCPS_PROXY_DST ||
3000 sp->dst.state > PF_TCPS_PROXY_DST) {
3001 pfsyncstat_inc(pfsyncs_badval);
3002 continue;
3003 }
3004
3005 id_key.id = sp->id;
3006 id_key.creatorid = sp->creatorid;
3007
3008 PF_STATE_ENTER_READ();
3009 st = pf_find_state_byid(&id_key);
3010 pf_state_ref(st);
3011 PF_STATE_EXIT_READ();
3012 if (st == NULL) {
3013 /* insert the update */
3014 PF_LOCK();
3015 error = pf_state_import(sp, PFSYNC_SI_PFSYNC);
3016 if (error)
3017 pfsyncstat_inc(pfsyncs_badstate);
3018 PF_UNLOCK();
3019 continue;
3020 }
3021
3022 pfsync_in_updates(sc, st, &sp->src, &sp->dst, sp->timeout);
3023
3024 pf_state_unref(st);
3025 }
3026 }
3027
3028 static struct mbuf *
pfsync_upd_req_init(struct pfsync_softc * sc,unsigned int count)3029 pfsync_upd_req_init(struct pfsync_softc *sc, unsigned int count)
3030 {
3031 struct mbuf *m;
3032 unsigned int mlen;
3033
3034 m = m_gethdr(M_DONTWAIT, MT_DATA);
3035 if (m == NULL) {
3036 pfsyncstat_inc(pfsyncs_onomem);
3037 return (NULL);
3038 }
3039
3040 mlen = max_linkhdr + sizeof(sc->sc_template) +
3041 sizeof(struct pfsync_header) +
3042 sizeof(struct pfsync_subheader) +
3043 sizeof(struct pfsync_upd_req) * count;
3044
3045 if (mlen > MHLEN) {
3046 MCLGETL(m, M_DONTWAIT, mlen);
3047 if (!ISSET(m->m_flags, M_EXT)) {
3048 m_freem(m);
3049 return (NULL);
3050 }
3051 }
3052
3053 m_align(m, 0);
3054 m->m_len = 0;
3055
3056 return (m);
3057 }
3058
3059 static void
pfsync_in_upd_c(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)3060 pfsync_in_upd_c(struct pfsync_softc *sc,
3061 const caddr_t buf, unsigned int mlen, unsigned int count)
3062 {
3063 const struct pfsync_upd_c *up;
3064 struct pf_state_cmp id_key;
3065 struct pf_state *st;
3066 unsigned int i;
3067 struct mbuf *m = NULL;
3068 unsigned int rcount = 0;
3069
3070 for (i = 0; i < count; i++) {
3071 up = (struct pfsync_upd_c *)(buf + mlen * i);
3072
3073 /* check for invalid values */
3074 if (up->timeout >= PFTM_MAX ||
3075 up->src.state > PF_TCPS_PROXY_DST ||
3076 up->dst.state > PF_TCPS_PROXY_DST) {
3077 pfsyncstat_inc(pfsyncs_badval);
3078 continue;
3079 }
3080
3081 id_key.id = up->id;
3082 id_key.creatorid = up->creatorid;
3083
3084 PF_STATE_ENTER_READ();
3085 st = pf_find_state_byid(&id_key);
3086 pf_state_ref(st);
3087 PF_STATE_EXIT_READ();
3088 if (st == NULL) {
3089 /* We don't have this state. Ask for it. */
3090 struct pfsync_upd_req *ur;
3091
3092 if (m == NULL) {
3093 m = pfsync_upd_req_init(sc, count);
3094 if (m == NULL) {
3095 pfsyncstat_inc(pfsyncs_onomem);
3096 continue;
3097 }
3098 }
3099
3100 m = m_prepend(m, sizeof(*ur), M_DONTWAIT);
3101 if (m == NULL) {
3102 pfsyncstat_inc(pfsyncs_onomem);
3103 continue;
3104 }
3105
3106 ur = mtod(m, struct pfsync_upd_req *);
3107 ur->id = up->id;
3108 ur->creatorid = up->creatorid;
3109 rcount++;
3110
3111 continue;
3112 }
3113
3114 pfsync_in_updates(sc, st, &up->src, &up->dst, up->timeout);
3115
3116 pf_state_unref(st);
3117 }
3118
3119 if (m != NULL) {
3120 struct pfsync_subheader *subh;
3121
3122 m = m_prepend(m, sizeof(*subh), M_DONTWAIT);
3123 if (m == NULL) {
3124 pfsyncstat_inc(pfsyncs_onomem);
3125 return;
3126 }
3127
3128 subh = mtod(m, struct pfsync_subheader *);
3129 subh->action = PFSYNC_ACT_UPD_REQ;
3130 subh->len = sizeof(struct pfsync_upd_req) >> 2;
3131 subh->count = htons(rcount);
3132
3133 m = pfsync_encap(sc, m);
3134 if (m == NULL) {
3135 pfsyncstat_inc(pfsyncs_onomem);
3136 return;
3137 }
3138
3139 pfsync_sendout(sc, m);
3140 }
3141 }
3142
3143 static void
pfsync_in_ureq(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)3144 pfsync_in_ureq(struct pfsync_softc *sc,
3145 const caddr_t buf, unsigned int mlen, unsigned int count)
3146 {
3147 const struct pfsync_upd_req *ur;
3148 struct pf_state_cmp id_key;
3149 struct pf_state *st;
3150 unsigned int i;
3151
3152 for (i = 0; i < count; i++) {
3153 ur = (struct pfsync_upd_req *)(buf + mlen * i);
3154
3155 id_key.id = ur->id;
3156 id_key.creatorid = ur->creatorid;
3157
3158 if (id_key.id == 0 && id_key.creatorid == 0) {
3159 pfsync_bulk_snd_start(sc);
3160 continue;
3161 }
3162
3163 PF_STATE_ENTER_READ();
3164 st = pf_find_state_byid(&id_key);
3165 if (st != NULL && st->timeout < PFTM_MAX &&
3166 !ISSET(st->state_flags, PFSTATE_NOSYNC))
3167 pf_state_ref(st);
3168 else
3169 st = NULL;
3170 PF_STATE_EXIT_READ();
3171 if (st == NULL) {
3172 pfsyncstat_inc(pfsyncs_badstate);
3173 continue;
3174 }
3175
3176 pfsync_update_state_req(sc, st);
3177
3178 pf_state_unref(st);
3179 }
3180 }
3181
3182 static void
pfsync_in_del(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)3183 pfsync_in_del(struct pfsync_softc *sc,
3184 const caddr_t buf, unsigned int mlen, unsigned int count)
3185 {
3186 const struct pfsync_state *sp;
3187 struct pf_state_cmp id_key;
3188 struct pf_state *st;
3189 unsigned int i;
3190
3191 PF_LOCK();
3192 PF_STATE_ENTER_WRITE();
3193 for (i = 0; i < count; i++) {
3194 sp = (struct pfsync_state *)(buf + mlen * i);
3195
3196 id_key.id = sp->id;
3197 id_key.creatorid = sp->creatorid;
3198
3199 st = pf_find_state_byid(&id_key);
3200 if (st == NULL) {
3201 pfsyncstat_inc(pfsyncs_badstate);
3202 continue;
3203 }
3204
3205 mtx_enter(&st->mtx);
3206 SET(st->state_flags, PFSTATE_NOSYNC);
3207 mtx_leave(&st->mtx);
3208 pf_remove_state(st);
3209 }
3210 PF_STATE_EXIT_WRITE();
3211 PF_UNLOCK();
3212 }
3213
3214 static void
pfsync_in_del_c(struct pfsync_softc * sc,const caddr_t buf,unsigned int mlen,unsigned int count)3215 pfsync_in_del_c(struct pfsync_softc *sc,
3216 const caddr_t buf, unsigned int mlen, unsigned int count)
3217 {
3218 const struct pfsync_del_c *sp;
3219 struct pf_state_cmp id_key;
3220 struct pf_state *st;
3221 unsigned int i;
3222
3223 PF_LOCK();
3224 PF_STATE_ENTER_WRITE();
3225 for (i = 0; i < count; i++) {
3226 sp = (struct pfsync_del_c *)(buf + mlen * i);
3227
3228 id_key.id = sp->id;
3229 id_key.creatorid = sp->creatorid;
3230
3231 st = pf_find_state_byid(&id_key);
3232 if (st == NULL) {
3233 pfsyncstat_inc(pfsyncs_badstate);
3234 continue;
3235 }
3236
3237 mtx_enter(&st->mtx);
3238 SET(st->state_flags, PFSTATE_NOSYNC);
3239 mtx_leave(&st->mtx);
3240 pf_remove_state(st);
3241 }
3242 PF_STATE_EXIT_WRITE();
3243 PF_UNLOCK();
3244 }
3245
3246 static void
pfsync_in_bus(struct pfsync_softc * sc,const caddr_t buf,unsigned int len,unsigned int count)3247 pfsync_in_bus(struct pfsync_softc *sc,
3248 const caddr_t buf, unsigned int len, unsigned int count)
3249 {
3250 const struct pfsync_bus *bus = (struct pfsync_bus *)buf;
3251
3252 switch (bus->status) {
3253 case PFSYNC_BUS_START:
3254 pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_BUS_START);
3255 break;
3256
3257 case PFSYNC_BUS_END:
3258 pfsync_bulk_req_evt(sc, PFSYNC_BREQ_EVT_BUS_END);
3259 break;
3260 }
3261 }
3262
3263 #if defined(IPSEC)
3264 /* Update an in-kernel tdb. Silently fail if no tdb is found. */
3265 static void
pfsync_update_net_tdb(const struct pfsync_tdb * pt)3266 pfsync_update_net_tdb(const struct pfsync_tdb *pt)
3267 {
3268 struct tdb *tdb;
3269
3270 NET_ASSERT_LOCKED();
3271
3272 /* check for invalid values */
3273 if (ntohl(pt->spi) <= SPI_RESERVED_MAX ||
3274 (pt->dst.sa.sa_family != AF_INET &&
3275 pt->dst.sa.sa_family != AF_INET6))
3276 goto bad;
3277
3278 tdb = gettdb(ntohs(pt->rdomain), pt->spi,
3279 (union sockaddr_union *)&pt->dst, pt->sproto);
3280 if (tdb) {
3281 uint64_t rpl = betoh64(pt->rpl);
3282 uint64_t cur_bytes = betoh64(pt->cur_bytes);
3283
3284 /* Neither replay nor byte counter should ever decrease. */
3285 mtx_enter(&tdb->tdb_mtx);
3286 if (rpl >= tdb->tdb_rpl &&
3287 cur_bytes >= tdb->tdb_cur_bytes) {
3288 tdb->tdb_rpl = rpl;
3289 tdb->tdb_cur_bytes = cur_bytes;
3290 }
3291 mtx_leave(&tdb->tdb_mtx);
3292
3293 tdb_unref(tdb);
3294 }
3295 return;
3296
3297 bad:
3298 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: "
3299 "invalid value");
3300 pfsyncstat_inc(pfsyncs_badstate);
3301 return;
3302 }
3303 #endif
3304
3305 static void
pfsync_in_tdb(struct pfsync_softc * sc,const caddr_t buf,unsigned int len,unsigned int count)3306 pfsync_in_tdb(struct pfsync_softc *sc,
3307 const caddr_t buf, unsigned int len, unsigned int count)
3308 {
3309 #if defined(IPSEC)
3310 const struct pfsync_tdb *tp;
3311 unsigned int i;
3312
3313 for (i = 0; i < count; i++) {
3314 tp = (const struct pfsync_tdb *)(buf + len * i);
3315 pfsync_update_net_tdb(tp);
3316 }
3317 #endif
3318 }
3319
3320 int
pfsync_input4(struct mbuf ** mp,int * offp,int proto,int af)3321 pfsync_input4(struct mbuf **mp, int *offp, int proto, int af)
3322 {
3323 struct mbuf *m = *mp;
3324 struct ip *ip;
3325
3326 ip = mtod(m, struct ip *);
3327
3328 m = pfsync_input(m, ip->ip_ttl, ip->ip_hl << 2);
3329
3330 m_freem(m);
3331 *mp = NULL;
3332
3333 return (IPPROTO_DONE);
3334 }
3335
3336 int
pfsync_sysctl_pfsyncstat(void * oldp,size_t * oldlenp,void * newp)3337 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp)
3338 {
3339 struct pfsyncstats pfsyncstat;
3340
3341 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t)));
3342 memset(&pfsyncstat, 0, sizeof pfsyncstat);
3343 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat,
3344 pfsyncs_ncounters, NULL);
3345 return (sysctl_rdstruct(oldp, oldlenp, newp,
3346 &pfsyncstat, sizeof(pfsyncstat)));
3347 }
3348
3349 int
pfsync_sysctl(int * name,u_int namelen,void * oldp,size_t * oldlenp,void * newp,size_t newlen)3350 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
3351 void *newp, size_t newlen)
3352 {
3353 /* All sysctl names at this level are terminal. */
3354 if (namelen != 1)
3355 return (ENOTDIR);
3356
3357 switch (name[0]) {
3358 case PFSYNCCTL_STATS:
3359 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp));
3360 default:
3361 return (ENOPROTOOPT);
3362 }
3363 }
3364