1 /* $OpenBSD: if_pfsync.c,v 1.267 2019/11/07 11:46:42 dlg Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/time.h> 48 #include <sys/malloc.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/bpf.h> 61 #include <net/netisr.h> 62 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/ip.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/ip_ipsp.h> 69 #include <netinet/ip_icmp.h> 70 #include <netinet/icmp6.h> 71 #include <netinet/tcp.h> 72 #include <netinet/tcp_seq.h> 73 #include <netinet/tcp_fsm.h> 74 #include <netinet/udp.h> 75 76 #ifdef INET6 77 #include <netinet6/in6_var.h> 78 #include <netinet/ip6.h> 79 #include <netinet6/ip6_var.h> 80 #include <netinet6/nd6.h> 81 #endif /* INET6 */ 82 83 #include "carp.h" 84 #if NCARP > 0 85 #include <netinet/ip_carp.h> 86 #endif 87 88 #define PF_DEBUGNAME "pfsync: " 89 #include <net/pfvar.h> 90 #include <net/pfvar_priv.h> 91 #include <net/if_pfsync.h> 92 93 #include "bpfilter.h" 94 #include "pfsync.h" 95 96 #define PFSYNC_MINPKT ( \ 97 sizeof(struct ip) + \ 98 sizeof(struct pfsync_header)) 99 100 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 101 struct pfsync_state_peer *); 102 103 int pfsync_in_clr(caddr_t, int, int, int); 104 int pfsync_in_iack(caddr_t, int, int, int); 105 int pfsync_in_upd_c(caddr_t, int, int, int); 106 int pfsync_in_ureq(caddr_t, int, int, int); 107 int pfsync_in_del(caddr_t, int, int, int); 108 int pfsync_in_del_c(caddr_t, int, int, int); 109 int pfsync_in_bus(caddr_t, int, int, int); 110 int pfsync_in_tdb(caddr_t, int, int, int); 111 int pfsync_in_ins(caddr_t, int, int, int); 112 int pfsync_in_upd(caddr_t, int, int, int); 113 int pfsync_in_eof(caddr_t, int, int, int); 114 115 int pfsync_in_error(caddr_t, int, int, int); 116 117 void pfsync_update_state_locked(struct pf_state *); 118 119 struct { 120 int (*in)(caddr_t, int, int, int); 121 size_t len; 122 } pfsync_acts[] = { 123 /* PFSYNC_ACT_CLR */ 124 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 125 /* PFSYNC_ACT_OINS */ 126 { pfsync_in_error, 0 }, 127 /* PFSYNC_ACT_INS_ACK */ 128 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 129 /* PFSYNC_ACT_OUPD */ 130 { pfsync_in_error, 0 }, 131 /* PFSYNC_ACT_UPD_C */ 132 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 133 /* PFSYNC_ACT_UPD_REQ */ 134 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 135 /* PFSYNC_ACT_DEL */ 136 { pfsync_in_del, sizeof(struct pfsync_state) }, 137 /* PFSYNC_ACT_DEL_C */ 138 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 139 /* PFSYNC_ACT_INS_F */ 140 { pfsync_in_error, 0 }, 141 /* PFSYNC_ACT_DEL_F */ 142 { pfsync_in_error, 0 }, 143 /* PFSYNC_ACT_BUS */ 144 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 145 /* PFSYNC_ACT_OTDB */ 146 { pfsync_in_error, 0 }, 147 /* PFSYNC_ACT_EOF */ 148 { pfsync_in_error, 0 }, 149 /* PFSYNC_ACT_INS */ 150 { pfsync_in_ins, sizeof(struct pfsync_state) }, 151 /* PFSYNC_ACT_UPD */ 152 { pfsync_in_upd, sizeof(struct pfsync_state) }, 153 /* PFSYNC_ACT_TDB */ 154 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 155 }; 156 157 struct pfsync_q { 158 void (*write)(struct pf_state *, void *); 159 size_t len; 160 u_int8_t action; 161 }; 162 163 /* we have one of these for every PFSYNC_S_ */ 164 void pfsync_out_state(struct pf_state *, void *); 165 void pfsync_out_iack(struct pf_state *, void *); 166 void pfsync_out_upd_c(struct pf_state *, void *); 167 void pfsync_out_del(struct pf_state *, void *); 168 169 struct pfsync_q pfsync_qs[] = { 170 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 171 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 172 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 173 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 174 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 175 }; 176 177 void pfsync_q_ins(struct pf_state *, int); 178 void pfsync_q_del(struct pf_state *); 179 180 struct pfsync_upd_req_item { 181 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 182 struct pfsync_upd_req ur_msg; 183 }; 184 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 185 186 struct pfsync_deferral { 187 TAILQ_ENTRY(pfsync_deferral) pd_entry; 188 struct pf_state *pd_st; 189 struct mbuf *pd_m; 190 struct timeout pd_tmo; 191 }; 192 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 193 194 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 195 sizeof(struct pfsync_deferral)) 196 197 void pfsync_out_tdb(struct tdb *, void *); 198 199 struct pfsync_softc { 200 struct ifnet sc_if; 201 struct ifnet *sc_sync_if; 202 203 struct pool sc_pool; 204 205 struct ip_moptions sc_imo; 206 207 struct in_addr sc_sync_peer; 208 u_int8_t sc_maxupdates; 209 210 struct ip sc_template; 211 212 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 213 size_t sc_len; 214 215 struct pfsync_upd_reqs sc_upd_req_list; 216 217 int sc_initial_bulk; 218 int sc_link_demoted; 219 220 int sc_defer; 221 struct pfsync_deferrals sc_deferrals; 222 u_int sc_deferred; 223 224 void *sc_plus; 225 size_t sc_pluslen; 226 227 u_int32_t sc_ureq_sent; 228 int sc_bulk_tries; 229 struct timeout sc_bulkfail_tmo; 230 231 u_int32_t sc_ureq_received; 232 struct pf_state *sc_bulk_next; 233 struct pf_state *sc_bulk_last; 234 struct timeout sc_bulk_tmo; 235 236 TAILQ_HEAD(, tdb) sc_tdb_q; 237 238 struct task sc_ltask; 239 struct task sc_dtask; 240 241 struct timeout sc_tmo; 242 }; 243 244 struct pfsync_softc *pfsyncif = NULL; 245 struct cpumem *pfsynccounters; 246 247 void pfsyncattach(int); 248 int pfsync_clone_create(struct if_clone *, int); 249 int pfsync_clone_destroy(struct ifnet *); 250 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 251 struct pf_state_peer *); 252 void pfsync_update_net_tdb(struct pfsync_tdb *); 253 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 254 struct rtentry *); 255 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 256 void pfsyncstart(struct ifnet *); 257 void pfsync_syncdev_state(void *); 258 void pfsync_ifdetach(void *); 259 260 void pfsync_deferred(struct pf_state *, int); 261 void pfsync_undefer(struct pfsync_deferral *, int); 262 void pfsync_defer_tmo(void *); 263 264 void pfsync_cancel_full_update(struct pfsync_softc *); 265 void pfsync_request_full_update(struct pfsync_softc *); 266 void pfsync_request_update(u_int32_t, u_int64_t); 267 void pfsync_update_state_req(struct pf_state *); 268 269 void pfsync_drop(struct pfsync_softc *); 270 void pfsync_sendout(void); 271 void pfsync_send_plus(void *, size_t); 272 void pfsync_timeout(void *); 273 void pfsync_tdb_timeout(void *); 274 275 void pfsync_bulk_start(void); 276 void pfsync_bulk_status(u_int8_t); 277 void pfsync_bulk_update(void *); 278 void pfsync_bulk_fail(void *); 279 #ifdef WITH_PF_LOCK 280 void pfsync_send_dispatch(void *); 281 void pfsync_send_pkt(struct mbuf *); 282 283 static struct mbuf_queue pfsync_mq; 284 static struct task pfsync_task = 285 TASK_INITIALIZER(pfsync_send_dispatch, &pfsync_mq); 286 #endif /* WITH_PF_LOCK */ 287 288 #define PFSYNC_MAX_BULKTRIES 12 289 int pfsync_sync_ok; 290 291 struct if_clone pfsync_cloner = 292 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 293 294 void 295 pfsyncattach(int npfsync) 296 { 297 if_clone_attach(&pfsync_cloner); 298 pfsynccounters = counters_alloc(pfsyncs_ncounters); 299 #ifdef WITH_PF_LOCK 300 mq_init(&pfsync_mq, 4096, IPL_SOFTNET); 301 #endif /* WITH_PF_LOCK */ 302 } 303 304 int 305 pfsync_clone_create(struct if_clone *ifc, int unit) 306 { 307 struct pfsync_softc *sc; 308 struct ifnet *ifp; 309 int q; 310 311 if (unit != 0) 312 return (EINVAL); 313 314 pfsync_sync_ok = 1; 315 316 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_WAITOK|M_ZERO); 317 for (q = 0; q < PFSYNC_S_COUNT; q++) 318 TAILQ_INIT(&sc->sc_qs[q]); 319 320 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, IPL_SOFTNET, 0, "pfsync", 321 NULL); 322 TAILQ_INIT(&sc->sc_upd_req_list); 323 TAILQ_INIT(&sc->sc_deferrals); 324 task_set(&sc->sc_ltask, pfsync_syncdev_state, sc); 325 task_set(&sc->sc_dtask, pfsync_ifdetach, sc); 326 sc->sc_deferred = 0; 327 328 TAILQ_INIT(&sc->sc_tdb_q); 329 330 sc->sc_len = PFSYNC_MINPKT; 331 sc->sc_maxupdates = 128; 332 333 sc->sc_imo.imo_membership = mallocarray(IP_MIN_MEMBERSHIPS, 334 sizeof(struct in_multi *), M_IPMOPTS, M_WAITOK|M_ZERO); 335 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 336 337 ifp = &sc->sc_if; 338 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 339 ifp->if_softc = sc; 340 ifp->if_ioctl = pfsyncioctl; 341 ifp->if_output = pfsyncoutput; 342 ifp->if_start = pfsyncstart; 343 ifp->if_type = IFT_PFSYNC; 344 IFQ_SET_MAXLEN(&ifp->if_snd, IFQ_MAXLEN); 345 ifp->if_hdrlen = sizeof(struct pfsync_header); 346 ifp->if_mtu = ETHERMTU; 347 ifp->if_xflags = IFXF_CLONED; 348 timeout_set_proc(&sc->sc_tmo, pfsync_timeout, NULL); 349 timeout_set_proc(&sc->sc_bulk_tmo, pfsync_bulk_update, NULL); 350 timeout_set_proc(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, NULL); 351 352 if_attach(ifp); 353 if_alloc_sadl(ifp); 354 355 #if NCARP > 0 356 if_addgroup(ifp, "carp"); 357 #endif 358 359 #if NBPFILTER > 0 360 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 361 #endif 362 363 pfsyncif = sc; 364 365 return (0); 366 } 367 368 int 369 pfsync_clone_destroy(struct ifnet *ifp) 370 { 371 struct pfsync_softc *sc = ifp->if_softc; 372 struct pfsync_deferral *pd; 373 374 NET_LOCK(); 375 376 #if NCARP > 0 377 if (!pfsync_sync_ok) 378 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 379 if (sc->sc_link_demoted) 380 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 381 #endif 382 if (sc->sc_sync_if) { 383 if_linkstatehook_del(sc->sc_sync_if, &sc->sc_ltask); 384 if_detachhook_del(sc->sc_sync_if, &sc->sc_dtask); 385 } 386 387 /* XXXSMP breaks atomicity */ 388 NET_UNLOCK(); 389 if_detach(ifp); 390 NET_LOCK(); 391 392 pfsync_drop(sc); 393 394 while (sc->sc_deferred > 0) { 395 pd = TAILQ_FIRST(&sc->sc_deferrals); 396 timeout_del(&pd->pd_tmo); 397 pfsync_undefer(pd, 0); 398 } 399 400 pfsyncif = NULL; 401 timeout_del(&sc->sc_bulkfail_tmo); 402 timeout_del(&sc->sc_bulk_tmo); 403 timeout_del(&sc->sc_tmo); 404 405 NET_UNLOCK(); 406 407 pool_destroy(&sc->sc_pool); 408 free(sc->sc_imo.imo_membership, M_IPMOPTS, 409 sc->sc_imo.imo_max_memberships * sizeof(struct in_multi *)); 410 free(sc, M_DEVBUF, sizeof(*sc)); 411 412 return (0); 413 } 414 415 /* 416 * Start output on the pfsync interface. 417 */ 418 void 419 pfsyncstart(struct ifnet *ifp) 420 { 421 IFQ_PURGE(&ifp->if_snd); 422 } 423 424 void 425 pfsync_syncdev_state(void *arg) 426 { 427 struct pfsync_softc *sc = arg; 428 429 if (!sc->sc_sync_if || !(sc->sc_if.if_flags & IFF_UP)) 430 return; 431 432 if (sc->sc_sync_if->if_link_state == LINK_STATE_DOWN) { 433 sc->sc_if.if_flags &= ~IFF_RUNNING; 434 if (!sc->sc_link_demoted) { 435 #if NCARP > 0 436 carp_group_demote_adj(&sc->sc_if, 1, 437 "pfsync link state down"); 438 #endif 439 sc->sc_link_demoted = 1; 440 } 441 442 /* drop everything */ 443 timeout_del(&sc->sc_tmo); 444 pfsync_drop(sc); 445 446 pfsync_cancel_full_update(sc); 447 } else if (sc->sc_link_demoted) { 448 sc->sc_if.if_flags |= IFF_RUNNING; 449 450 pfsync_request_full_update(sc); 451 } 452 } 453 454 void 455 pfsync_ifdetach(void *arg) 456 { 457 struct pfsync_softc *sc = arg; 458 459 if_linkstatehook_del(sc->sc_sync_if, &sc->sc_ltask); 460 if_detachhook_del(sc->sc_sync_if, &sc->sc_dtask); 461 462 sc->sc_sync_if = NULL; 463 } 464 465 int 466 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 467 struct pf_state_peer *d) 468 { 469 if (s->scrub.scrub_flag && d->scrub == NULL) { 470 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 471 if (d->scrub == NULL) 472 return (ENOMEM); 473 } 474 475 return (0); 476 } 477 478 void 479 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 480 { 481 pf_state_export(sp, st); 482 } 483 484 int 485 pfsync_state_import(struct pfsync_state *sp, int flags) 486 { 487 struct pf_state *st = NULL; 488 struct pf_state_key *skw = NULL, *sks = NULL; 489 struct pf_rule *r = NULL; 490 struct pfi_kif *kif; 491 int pool_flags; 492 int error; 493 494 if (sp->creatorid == 0) { 495 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 496 "invalid creator id: %08x", ntohl(sp->creatorid)); 497 return (EINVAL); 498 } 499 500 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 501 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 502 "unknown interface: %s", sp->ifname); 503 if (flags & PFSYNC_SI_IOCTL) 504 return (EINVAL); 505 return (0); /* skip this state */ 506 } 507 508 if (sp->af == 0) 509 return (0); /* skip this state */ 510 511 /* 512 * If the ruleset checksums match or the state is coming from the ioctl, 513 * it's safe to associate the state with the rule of that number. 514 */ 515 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 516 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 517 pf_main_ruleset.rules.active.rcount) 518 r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)]; 519 else 520 r = &pf_default_rule; 521 522 if ((r->max_states && r->states_cur >= r->max_states)) 523 goto cleanup; 524 525 if (flags & PFSYNC_SI_IOCTL) 526 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 527 else 528 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 529 530 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 531 goto cleanup; 532 533 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 534 goto cleanup; 535 536 if ((sp->key[PF_SK_WIRE].af && 537 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 538 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 539 &sp->key[PF_SK_STACK].addr[0], sp->af) || 540 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 541 &sp->key[PF_SK_STACK].addr[1], sp->af) || 542 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 543 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 544 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 545 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 546 goto cleanup; 547 } else 548 sks = skw; 549 550 /* allocate memory for scrub info */ 551 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 552 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 553 goto cleanup; 554 555 /* copy to state key(s) */ 556 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 557 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 558 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 559 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 560 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 561 PF_REF_INIT(skw->refcnt); 562 skw->proto = sp->proto; 563 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 564 skw->af = sp->af; 565 if (sks != skw) { 566 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 567 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 568 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 569 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 570 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 571 PF_REF_INIT(sks->refcnt); 572 if (!(sks->af = sp->key[PF_SK_STACK].af)) 573 sks->af = sp->af; 574 if (sks->af != skw->af) { 575 switch (sp->proto) { 576 case IPPROTO_ICMP: 577 sks->proto = IPPROTO_ICMPV6; 578 break; 579 case IPPROTO_ICMPV6: 580 sks->proto = IPPROTO_ICMP; 581 break; 582 default: 583 sks->proto = sp->proto; 584 } 585 } else 586 sks->proto = sp->proto; 587 } 588 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 589 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 590 591 /* copy to state */ 592 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 593 st->creation = time_uptime - ntohl(sp->creation); 594 st->expire = time_uptime; 595 if (ntohl(sp->expire)) { 596 u_int32_t timeout; 597 598 timeout = r->timeout[sp->timeout]; 599 if (!timeout) 600 timeout = pf_default_rule.timeout[sp->timeout]; 601 602 /* sp->expire may have been adaptively scaled by export. */ 603 st->expire -= timeout - ntohl(sp->expire); 604 } 605 606 st->direction = sp->direction; 607 st->log = sp->log; 608 st->timeout = sp->timeout; 609 st->state_flags = ntohs(sp->state_flags); 610 st->max_mss = ntohs(sp->max_mss); 611 st->min_ttl = sp->min_ttl; 612 st->set_tos = sp->set_tos; 613 st->set_prio[0] = sp->set_prio[0]; 614 st->set_prio[1] = sp->set_prio[1]; 615 616 st->id = sp->id; 617 st->creatorid = sp->creatorid; 618 pf_state_peer_ntoh(&sp->src, &st->src); 619 pf_state_peer_ntoh(&sp->dst, &st->dst); 620 621 st->rule.ptr = r; 622 st->anchor.ptr = NULL; 623 st->rt_kif = NULL; 624 625 st->pfsync_time = time_uptime; 626 st->sync_state = PFSYNC_S_NONE; 627 628 refcnt_init(&st->refcnt); 629 630 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 631 r->states_cur++; 632 r->states_tot++; 633 634 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 635 SET(st->state_flags, PFSTATE_NOSYNC); 636 637 /* 638 * We just set PFSTATE_NOSYNC bit, which prevents 639 * pfsync_insert_state() to insert state to pfsync. 640 */ 641 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 642 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 643 r->states_cur--; 644 error = EEXIST; 645 goto cleanup_state; 646 } 647 648 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 649 CLR(st->state_flags, PFSTATE_NOSYNC); 650 if (ISSET(st->state_flags, PFSTATE_ACK)) { 651 pfsync_q_ins(st, PFSYNC_S_IACK); 652 schednetisr(NETISR_PFSYNC); 653 } 654 } 655 CLR(st->state_flags, PFSTATE_ACK); 656 657 return (0); 658 659 cleanup: 660 error = ENOMEM; 661 if (skw == sks) 662 sks = NULL; 663 if (skw != NULL) 664 pool_put(&pf_state_key_pl, skw); 665 if (sks != NULL) 666 pool_put(&pf_state_key_pl, sks); 667 668 cleanup_state: /* pf_state_insert frees the state keys */ 669 if (st) { 670 if (st->dst.scrub) 671 pool_put(&pf_state_scrub_pl, st->dst.scrub); 672 if (st->src.scrub) 673 pool_put(&pf_state_scrub_pl, st->src.scrub); 674 pool_put(&pf_state_pl, st); 675 } 676 return (error); 677 } 678 679 int 680 pfsync_input(struct mbuf **mp, int *offp, int proto, int af) 681 { 682 struct mbuf *n, *m = *mp; 683 struct pfsync_softc *sc = pfsyncif; 684 struct ip *ip = mtod(m, struct ip *); 685 struct pfsync_header *ph; 686 struct pfsync_subheader subh; 687 int offset, noff, len, count, mlen, flags = 0; 688 int e; 689 690 NET_ASSERT_LOCKED(); 691 692 pfsyncstat_inc(pfsyncs_ipackets); 693 694 /* verify that we have a sync interface configured */ 695 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 696 sc->sc_sync_if == NULL || !pf_status.running) 697 goto done; 698 699 /* verify that the packet came in on the right interface */ 700 if (sc->sc_sync_if->if_index != m->m_pkthdr.ph_ifidx) { 701 pfsyncstat_inc(pfsyncs_badif); 702 goto done; 703 } 704 705 sc->sc_if.if_ipackets++; 706 sc->sc_if.if_ibytes += m->m_pkthdr.len; 707 708 /* verify that the IP TTL is 255. */ 709 if (ip->ip_ttl != PFSYNC_DFLTTL) { 710 pfsyncstat_inc(pfsyncs_badttl); 711 goto done; 712 } 713 714 offset = ip->ip_hl << 2; 715 n = m_pulldown(m, offset, sizeof(*ph), &noff); 716 if (n == NULL) { 717 pfsyncstat_inc(pfsyncs_hdrops); 718 return IPPROTO_DONE; 719 } 720 ph = (struct pfsync_header *)(n->m_data + noff); 721 722 /* verify the version */ 723 if (ph->version != PFSYNC_VERSION) { 724 pfsyncstat_inc(pfsyncs_badver); 725 goto done; 726 } 727 len = ntohs(ph->len) + offset; 728 if (m->m_pkthdr.len < len) { 729 pfsyncstat_inc(pfsyncs_badlen); 730 goto done; 731 } 732 733 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 734 flags = PFSYNC_SI_CKSUM; 735 736 offset += sizeof(*ph); 737 while (offset <= len - sizeof(subh)) { 738 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 739 offset += sizeof(subh); 740 741 mlen = subh.len << 2; 742 count = ntohs(subh.count); 743 744 if (subh.action >= PFSYNC_ACT_MAX || 745 subh.action >= nitems(pfsync_acts) || 746 mlen < pfsync_acts[subh.action].len) { 747 /* 748 * subheaders are always followed by at least one 749 * message, so if the peer is new 750 * enough to tell us how big its messages are then we 751 * know enough to skip them. 752 */ 753 if (count > 0 && mlen > 0) { 754 offset += count * mlen; 755 continue; 756 } 757 pfsyncstat_inc(pfsyncs_badact); 758 goto done; 759 } 760 761 n = m_pulldown(m, offset, mlen * count, &noff); 762 if (n == NULL) { 763 pfsyncstat_inc(pfsyncs_badlen); 764 return IPPROTO_DONE; 765 } 766 767 PF_LOCK(); 768 e = pfsync_acts[subh.action].in(n->m_data + noff, mlen, count, 769 flags); 770 PF_UNLOCK(); 771 if (e != 0) 772 goto done; 773 774 offset += mlen * count; 775 } 776 777 done: 778 m_freem(m); 779 return IPPROTO_DONE; 780 } 781 782 int 783 pfsync_in_clr(caddr_t buf, int len, int count, int flags) 784 { 785 struct pfsync_clr *clr; 786 struct pf_state *st, *nexts; 787 struct pfi_kif *kif; 788 u_int32_t creatorid; 789 int i; 790 791 for (i = 0; i < count; i++) { 792 clr = (struct pfsync_clr *)buf + len * i; 793 kif = NULL; 794 creatorid = clr->creatorid; 795 if (strlen(clr->ifname) && 796 (kif = pfi_kif_find(clr->ifname)) == NULL) 797 continue; 798 799 for (st = RB_MIN(pf_state_tree_id, &tree_id); st; st = nexts) { 800 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 801 if (st->creatorid == creatorid && 802 ((kif && st->kif == kif) || !kif)) { 803 SET(st->state_flags, PFSTATE_NOSYNC); 804 pf_remove_state(st); 805 } 806 } 807 } 808 809 return (0); 810 } 811 812 int 813 pfsync_in_ins(caddr_t buf, int len, int count, int flags) 814 { 815 struct pfsync_state *sp; 816 sa_family_t af1, af2; 817 int i; 818 819 for (i = 0; i < count; i++) { 820 sp = (struct pfsync_state *)(buf + len * i); 821 af1 = sp->key[0].af; 822 af2 = sp->key[1].af; 823 824 /* check for invalid values */ 825 if (sp->timeout >= PFTM_MAX || 826 sp->src.state > PF_TCPS_PROXY_DST || 827 sp->dst.state > PF_TCPS_PROXY_DST || 828 sp->direction > PF_OUT || 829 (((af1 || af2) && 830 ((af1 != AF_INET && af1 != AF_INET6) || 831 (af2 != AF_INET && af2 != AF_INET6))) || 832 (sp->af != AF_INET && sp->af != AF_INET6))) { 833 DPFPRINTF(LOG_NOTICE, 834 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 835 pfsyncstat_inc(pfsyncs_badval); 836 continue; 837 } 838 839 if (pfsync_state_import(sp, flags) == ENOMEM) { 840 /* drop out, but process the rest of the actions */ 841 break; 842 } 843 } 844 845 return (0); 846 } 847 848 int 849 pfsync_in_iack(caddr_t buf, int len, int count, int flags) 850 { 851 struct pfsync_ins_ack *ia; 852 struct pf_state_cmp id_key; 853 struct pf_state *st; 854 int i; 855 856 for (i = 0; i < count; i++) { 857 ia = (struct pfsync_ins_ack *)(buf + len * i); 858 859 id_key.id = ia->id; 860 id_key.creatorid = ia->creatorid; 861 862 st = pf_find_state_byid(&id_key); 863 if (st == NULL) 864 continue; 865 866 if (ISSET(st->state_flags, PFSTATE_ACK)) 867 pfsync_deferred(st, 0); 868 } 869 870 return (0); 871 } 872 873 int 874 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 875 struct pfsync_state_peer *dst) 876 { 877 int sync = 0; 878 879 /* 880 * The state should never go backwards except 881 * for syn-proxy states. Neither should the 882 * sequence window slide backwards. 883 */ 884 if ((st->src.state > src->state && 885 (st->src.state < PF_TCPS_PROXY_SRC || 886 src->state >= PF_TCPS_PROXY_SRC)) || 887 888 (st->src.state == src->state && 889 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 890 sync++; 891 else 892 pf_state_peer_ntoh(src, &st->src); 893 894 if ((st->dst.state > dst->state) || 895 896 (st->dst.state >= TCPS_SYN_SENT && 897 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 898 sync++; 899 else 900 pf_state_peer_ntoh(dst, &st->dst); 901 902 return (sync); 903 } 904 905 int 906 pfsync_in_upd(caddr_t buf, int len, int count, int flags) 907 { 908 struct pfsync_state *sp; 909 struct pf_state_cmp id_key; 910 struct pf_state *st; 911 int sync; 912 913 int i; 914 915 for (i = 0; i < count; i++) { 916 sp = (struct pfsync_state *)(buf + len * i); 917 918 /* check for invalid values */ 919 if (sp->timeout >= PFTM_MAX || 920 sp->src.state > PF_TCPS_PROXY_DST || 921 sp->dst.state > PF_TCPS_PROXY_DST) { 922 DPFPRINTF(LOG_NOTICE, 923 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 924 pfsyncstat_inc(pfsyncs_badval); 925 continue; 926 } 927 928 id_key.id = sp->id; 929 id_key.creatorid = sp->creatorid; 930 931 st = pf_find_state_byid(&id_key); 932 if (st == NULL) { 933 /* insert the update */ 934 if (pfsync_state_import(sp, flags)) 935 pfsyncstat_inc(pfsyncs_badstate); 936 continue; 937 } 938 939 if (ISSET(st->state_flags, PFSTATE_ACK)) 940 pfsync_deferred(st, 1); 941 942 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 943 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 944 else { 945 sync = 0; 946 947 /* 948 * Non-TCP protocol state machine always go 949 * forwards 950 */ 951 if (st->src.state > sp->src.state) 952 sync++; 953 else 954 pf_state_peer_ntoh(&sp->src, &st->src); 955 956 if (st->dst.state > sp->dst.state) 957 sync++; 958 else 959 pf_state_peer_ntoh(&sp->dst, &st->dst); 960 } 961 962 if (sync < 2) { 963 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 964 pf_state_peer_ntoh(&sp->dst, &st->dst); 965 st->expire = time_uptime; 966 st->timeout = sp->timeout; 967 } 968 st->pfsync_time = time_uptime; 969 970 if (sync) { 971 pfsyncstat_inc(pfsyncs_stale); 972 973 pfsync_update_state_locked(st); 974 schednetisr(NETISR_PFSYNC); 975 } 976 } 977 978 return (0); 979 } 980 981 int 982 pfsync_in_upd_c(caddr_t buf, int len, int count, int flags) 983 { 984 struct pfsync_upd_c *up; 985 struct pf_state_cmp id_key; 986 struct pf_state *st; 987 988 int sync; 989 990 int i; 991 992 for (i = 0; i < count; i++) { 993 up = (struct pfsync_upd_c *)(buf + len * i); 994 995 /* check for invalid values */ 996 if (up->timeout >= PFTM_MAX || 997 up->src.state > PF_TCPS_PROXY_DST || 998 up->dst.state > PF_TCPS_PROXY_DST) { 999 DPFPRINTF(LOG_NOTICE, 1000 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 1001 pfsyncstat_inc(pfsyncs_badval); 1002 continue; 1003 } 1004 1005 id_key.id = up->id; 1006 id_key.creatorid = up->creatorid; 1007 1008 st = pf_find_state_byid(&id_key); 1009 if (st == NULL) { 1010 /* We don't have this state. Ask for it. */ 1011 pfsync_request_update(id_key.creatorid, id_key.id); 1012 continue; 1013 } 1014 1015 if (ISSET(st->state_flags, PFSTATE_ACK)) 1016 pfsync_deferred(st, 1); 1017 1018 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1019 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1020 else { 1021 sync = 0; 1022 /* 1023 * Non-TCP protocol state machine always go 1024 * forwards 1025 */ 1026 if (st->src.state > up->src.state) 1027 sync++; 1028 else 1029 pf_state_peer_ntoh(&up->src, &st->src); 1030 1031 if (st->dst.state > up->dst.state) 1032 sync++; 1033 else 1034 pf_state_peer_ntoh(&up->dst, &st->dst); 1035 } 1036 if (sync < 2) { 1037 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1038 pf_state_peer_ntoh(&up->dst, &st->dst); 1039 st->expire = time_uptime; 1040 st->timeout = up->timeout; 1041 } 1042 st->pfsync_time = time_uptime; 1043 1044 if (sync) { 1045 pfsyncstat_inc(pfsyncs_stale); 1046 1047 pfsync_update_state_locked(st); 1048 schednetisr(NETISR_PFSYNC); 1049 } 1050 } 1051 1052 return (0); 1053 } 1054 1055 int 1056 pfsync_in_ureq(caddr_t buf, int len, int count, int flags) 1057 { 1058 struct pfsync_upd_req *ur; 1059 int i; 1060 1061 struct pf_state_cmp id_key; 1062 struct pf_state *st; 1063 1064 for (i = 0; i < count; i++) { 1065 ur = (struct pfsync_upd_req *)(buf + len * i); 1066 1067 id_key.id = ur->id; 1068 id_key.creatorid = ur->creatorid; 1069 1070 if (id_key.id == 0 && id_key.creatorid == 0) 1071 pfsync_bulk_start(); 1072 else { 1073 st = pf_find_state_byid(&id_key); 1074 if (st == NULL) { 1075 pfsyncstat_inc(pfsyncs_badstate); 1076 continue; 1077 } 1078 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1079 continue; 1080 1081 pfsync_update_state_req(st); 1082 } 1083 } 1084 1085 return (0); 1086 } 1087 1088 int 1089 pfsync_in_del(caddr_t buf, int len, int count, int flags) 1090 { 1091 struct pfsync_state *sp; 1092 struct pf_state_cmp id_key; 1093 struct pf_state *st; 1094 int i; 1095 1096 for (i = 0; i < count; i++) { 1097 sp = (struct pfsync_state *)(buf + len * i); 1098 1099 id_key.id = sp->id; 1100 id_key.creatorid = sp->creatorid; 1101 1102 st = pf_find_state_byid(&id_key); 1103 if (st == NULL) { 1104 pfsyncstat_inc(pfsyncs_badstate); 1105 continue; 1106 } 1107 SET(st->state_flags, PFSTATE_NOSYNC); 1108 pf_remove_state(st); 1109 } 1110 1111 return (0); 1112 } 1113 1114 int 1115 pfsync_in_del_c(caddr_t buf, int len, int count, int flags) 1116 { 1117 struct pfsync_del_c *sp; 1118 struct pf_state_cmp id_key; 1119 struct pf_state *st; 1120 int i; 1121 1122 for (i = 0; i < count; i++) { 1123 sp = (struct pfsync_del_c *)(buf + len * i); 1124 1125 id_key.id = sp->id; 1126 id_key.creatorid = sp->creatorid; 1127 1128 st = pf_find_state_byid(&id_key); 1129 if (st == NULL) { 1130 pfsyncstat_inc(pfsyncs_badstate); 1131 continue; 1132 } 1133 1134 SET(st->state_flags, PFSTATE_NOSYNC); 1135 pf_remove_state(st); 1136 } 1137 1138 return (0); 1139 } 1140 1141 int 1142 pfsync_in_bus(caddr_t buf, int len, int count, int flags) 1143 { 1144 struct pfsync_softc *sc = pfsyncif; 1145 struct pfsync_bus *bus; 1146 1147 /* If we're not waiting for a bulk update, who cares. */ 1148 if (sc->sc_ureq_sent == 0) 1149 return (0); 1150 1151 bus = (struct pfsync_bus *)buf; 1152 1153 switch (bus->status) { 1154 case PFSYNC_BUS_START: 1155 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1156 pf_pool_limits[PF_LIMIT_STATES].limit / 1157 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1158 sizeof(struct pfsync_state))); 1159 DPFPRINTF(LOG_INFO, "received bulk update start"); 1160 break; 1161 1162 case PFSYNC_BUS_END: 1163 if (time_uptime - ntohl(bus->endtime) >= 1164 sc->sc_ureq_sent) { 1165 /* that's it, we're happy */ 1166 sc->sc_ureq_sent = 0; 1167 sc->sc_bulk_tries = 0; 1168 timeout_del(&sc->sc_bulkfail_tmo); 1169 #if NCARP > 0 1170 if (!pfsync_sync_ok) 1171 carp_group_demote_adj(&sc->sc_if, -1, 1172 sc->sc_link_demoted ? 1173 "pfsync link state up" : 1174 "pfsync bulk done"); 1175 if (sc->sc_initial_bulk) { 1176 carp_group_demote_adj(&sc->sc_if, -32, 1177 "pfsync init"); 1178 sc->sc_initial_bulk = 0; 1179 } 1180 #endif 1181 pfsync_sync_ok = 1; 1182 sc->sc_link_demoted = 0; 1183 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1184 } else { 1185 DPFPRINTF(LOG_WARNING, "received invalid " 1186 "bulk update end: bad timestamp"); 1187 } 1188 break; 1189 } 1190 1191 return (0); 1192 } 1193 1194 int 1195 pfsync_in_tdb(caddr_t buf, int len, int count, int flags) 1196 { 1197 #if defined(IPSEC) 1198 struct pfsync_tdb *tp; 1199 int i; 1200 1201 for (i = 0; i < count; i++) { 1202 tp = (struct pfsync_tdb *)(buf + len * i); 1203 pfsync_update_net_tdb(tp); 1204 } 1205 #endif 1206 1207 return (0); 1208 } 1209 1210 #if defined(IPSEC) 1211 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1212 void 1213 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1214 { 1215 struct tdb *tdb; 1216 1217 NET_ASSERT_LOCKED(); 1218 1219 /* check for invalid values */ 1220 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1221 (pt->dst.sa.sa_family != AF_INET && 1222 pt->dst.sa.sa_family != AF_INET6)) 1223 goto bad; 1224 1225 tdb = gettdb(ntohs(pt->rdomain), pt->spi, 1226 (union sockaddr_union *)&pt->dst, pt->sproto); 1227 if (tdb) { 1228 pt->rpl = betoh64(pt->rpl); 1229 pt->cur_bytes = betoh64(pt->cur_bytes); 1230 1231 /* Neither replay nor byte counter should ever decrease. */ 1232 if (pt->rpl < tdb->tdb_rpl || 1233 pt->cur_bytes < tdb->tdb_cur_bytes) { 1234 goto bad; 1235 } 1236 1237 tdb->tdb_rpl = pt->rpl; 1238 tdb->tdb_cur_bytes = pt->cur_bytes; 1239 } 1240 return; 1241 1242 bad: 1243 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1244 "invalid value"); 1245 pfsyncstat_inc(pfsyncs_badstate); 1246 return; 1247 } 1248 #endif 1249 1250 1251 int 1252 pfsync_in_eof(caddr_t buf, int len, int count, int flags) 1253 { 1254 if (len > 0 || count > 0) 1255 pfsyncstat_inc(pfsyncs_badact); 1256 1257 /* we're done. let the caller return */ 1258 return (1); 1259 } 1260 1261 int 1262 pfsync_in_error(caddr_t buf, int len, int count, int flags) 1263 { 1264 pfsyncstat_inc(pfsyncs_badact); 1265 return (-1); 1266 } 1267 1268 int 1269 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1270 struct rtentry *rt) 1271 { 1272 m_freem(m); /* drop packet */ 1273 return (EAFNOSUPPORT); 1274 } 1275 1276 int 1277 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1278 { 1279 struct proc *p = curproc; 1280 struct pfsync_softc *sc = ifp->if_softc; 1281 struct ifreq *ifr = (struct ifreq *)data; 1282 struct ip_moptions *imo = &sc->sc_imo; 1283 struct pfsyncreq pfsyncr; 1284 struct ifnet *sifp; 1285 struct ip *ip; 1286 int error; 1287 1288 switch (cmd) { 1289 case SIOCSIFFLAGS: 1290 if ((ifp->if_flags & IFF_RUNNING) == 0 && 1291 (ifp->if_flags & IFF_UP)) { 1292 ifp->if_flags |= IFF_RUNNING; 1293 1294 #if NCARP > 0 1295 sc->sc_initial_bulk = 1; 1296 carp_group_demote_adj(&sc->sc_if, 32, "pfsync init"); 1297 #endif 1298 1299 pfsync_request_full_update(sc); 1300 } 1301 if ((ifp->if_flags & IFF_RUNNING) && 1302 (ifp->if_flags & IFF_UP) == 0) { 1303 ifp->if_flags &= ~IFF_RUNNING; 1304 1305 /* drop everything */ 1306 timeout_del(&sc->sc_tmo); 1307 pfsync_drop(sc); 1308 1309 pfsync_cancel_full_update(sc); 1310 } 1311 break; 1312 case SIOCSIFMTU: 1313 if (!sc->sc_sync_if || 1314 ifr->ifr_mtu <= PFSYNC_MINPKT || 1315 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1316 return (EINVAL); 1317 if (ifr->ifr_mtu < ifp->if_mtu) 1318 pfsync_sendout(); 1319 ifp->if_mtu = ifr->ifr_mtu; 1320 break; 1321 case SIOCGETPFSYNC: 1322 bzero(&pfsyncr, sizeof(pfsyncr)); 1323 if (sc->sc_sync_if) { 1324 strlcpy(pfsyncr.pfsyncr_syncdev, 1325 sc->sc_sync_if->if_xname, IFNAMSIZ); 1326 } 1327 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1328 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1329 pfsyncr.pfsyncr_defer = sc->sc_defer; 1330 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1331 1332 case SIOCSETPFSYNC: 1333 if ((error = suser(p)) != 0) 1334 return (error); 1335 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1336 return (error); 1337 1338 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1339 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1340 else 1341 sc->sc_sync_peer.s_addr = 1342 pfsyncr.pfsyncr_syncpeer.s_addr; 1343 1344 if (pfsyncr.pfsyncr_maxupdates > 255) 1345 return (EINVAL); 1346 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1347 1348 sc->sc_defer = pfsyncr.pfsyncr_defer; 1349 1350 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1351 if (sc->sc_sync_if) { 1352 if_linkstatehook_del(sc->sc_sync_if, 1353 &sc->sc_ltask); 1354 if_detachhook_del(sc->sc_sync_if, 1355 &sc->sc_dtask); 1356 } 1357 sc->sc_sync_if = NULL; 1358 if (imo->imo_num_memberships > 0) { 1359 in_delmulti(imo->imo_membership[ 1360 --imo->imo_num_memberships]); 1361 imo->imo_ifidx = 0; 1362 } 1363 break; 1364 } 1365 1366 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) 1367 return (EINVAL); 1368 1369 if (sifp->if_mtu < sc->sc_if.if_mtu || 1370 (sc->sc_sync_if != NULL && 1371 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1372 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1373 pfsync_sendout(); 1374 1375 if (sc->sc_sync_if) { 1376 if_linkstatehook_del(sc->sc_sync_if, &sc->sc_ltask); 1377 if_detachhook_del(sc->sc_sync_if, &sc->sc_dtask); 1378 } 1379 sc->sc_sync_if = sifp; 1380 1381 if (imo->imo_num_memberships > 0) { 1382 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1383 imo->imo_ifidx = 0; 1384 } 1385 1386 if (sc->sc_sync_if && 1387 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1388 struct in_addr addr; 1389 1390 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1391 sc->sc_sync_if = NULL; 1392 return (EADDRNOTAVAIL); 1393 } 1394 1395 addr.s_addr = INADDR_PFSYNC_GROUP; 1396 1397 if ((imo->imo_membership[0] = 1398 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1399 sc->sc_sync_if = NULL; 1400 return (ENOBUFS); 1401 } 1402 imo->imo_num_memberships++; 1403 imo->imo_ifidx = sc->sc_sync_if->if_index; 1404 imo->imo_ttl = PFSYNC_DFLTTL; 1405 imo->imo_loop = 0; 1406 } 1407 1408 ip = &sc->sc_template; 1409 bzero(ip, sizeof(*ip)); 1410 ip->ip_v = IPVERSION; 1411 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1412 ip->ip_tos = IPTOS_LOWDELAY; 1413 /* len and id are set later */ 1414 ip->ip_off = htons(IP_DF); 1415 ip->ip_ttl = PFSYNC_DFLTTL; 1416 ip->ip_p = IPPROTO_PFSYNC; 1417 ip->ip_src.s_addr = INADDR_ANY; 1418 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1419 1420 if_linkstatehook_add(sc->sc_sync_if, &sc->sc_ltask); 1421 if_detachhook_add(sc->sc_sync_if, &sc->sc_dtask); 1422 1423 pfsync_request_full_update(sc); 1424 1425 break; 1426 1427 default: 1428 return (ENOTTY); 1429 } 1430 1431 return (0); 1432 } 1433 1434 void 1435 pfsync_out_state(struct pf_state *st, void *buf) 1436 { 1437 struct pfsync_state *sp = buf; 1438 1439 pfsync_state_export(sp, st); 1440 } 1441 1442 void 1443 pfsync_out_iack(struct pf_state *st, void *buf) 1444 { 1445 struct pfsync_ins_ack *iack = buf; 1446 1447 iack->id = st->id; 1448 iack->creatorid = st->creatorid; 1449 } 1450 1451 void 1452 pfsync_out_upd_c(struct pf_state *st, void *buf) 1453 { 1454 struct pfsync_upd_c *up = buf; 1455 1456 bzero(up, sizeof(*up)); 1457 up->id = st->id; 1458 pf_state_peer_hton(&st->src, &up->src); 1459 pf_state_peer_hton(&st->dst, &up->dst); 1460 up->creatorid = st->creatorid; 1461 up->timeout = st->timeout; 1462 } 1463 1464 void 1465 pfsync_out_del(struct pf_state *st, void *buf) 1466 { 1467 struct pfsync_del_c *dp = buf; 1468 1469 dp->id = st->id; 1470 dp->creatorid = st->creatorid; 1471 1472 SET(st->state_flags, PFSTATE_NOSYNC); 1473 } 1474 1475 void 1476 pfsync_drop(struct pfsync_softc *sc) 1477 { 1478 struct pf_state *st; 1479 struct pfsync_upd_req_item *ur; 1480 struct tdb *t; 1481 int q; 1482 1483 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1484 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1485 continue; 1486 1487 while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { 1488 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 1489 #ifdef PFSYNC_DEBUG 1490 KASSERT(st->sync_state == q); 1491 #endif 1492 st->sync_state = PFSYNC_S_NONE; 1493 pf_state_unref(st); 1494 } 1495 } 1496 1497 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1498 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1499 pool_put(&sc->sc_pool, ur); 1500 } 1501 1502 sc->sc_plus = NULL; 1503 1504 while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { 1505 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 1506 CLR(t->tdb_flags, TDBF_PFSYNC); 1507 } 1508 1509 sc->sc_len = PFSYNC_MINPKT; 1510 } 1511 1512 #ifdef WITH_PF_LOCK 1513 void 1514 pfsync_send_dispatch(void *xmq) 1515 { 1516 struct mbuf_queue *mq = xmq; 1517 struct pfsync_softc *sc; 1518 struct mbuf *m; 1519 struct mbuf_list ml; 1520 int error; 1521 1522 mq_delist(mq, &ml); 1523 if (ml_empty(&ml)) 1524 return; 1525 1526 NET_RLOCK(); 1527 sc = pfsyncif; 1528 if (sc == NULL) { 1529 ml_purge(&ml); 1530 goto done; 1531 } 1532 1533 while ((m = ml_dequeue(&ml)) != NULL) { 1534 if ((error = ip_output(m, NULL, NULL, IP_RAWOUTPUT, 1535 &sc->sc_imo, NULL, 0)) == 0) 1536 pfsyncstat_inc(pfsyncs_opackets); 1537 else { 1538 DPFPRINTF(LOG_DEBUG, 1539 "ip_output() @ %s failed (%d)\n", __func__, error); 1540 pfsyncstat_inc(pfsyncs_oerrors); 1541 } 1542 } 1543 done: 1544 NET_RUNLOCK(); 1545 } 1546 1547 void 1548 pfsync_send_pkt(struct mbuf *m) 1549 { 1550 if (mq_enqueue(&pfsync_mq, m) != 0) { 1551 pfsyncstat_inc(pfsyncs_oerrors); 1552 DPFPRINTF(LOG_DEBUG, "mq_enqueue() @ %s failed, queue full\n", 1553 __func__); 1554 } else 1555 task_add(net_tq(0), &pfsync_task); 1556 } 1557 #endif /* WITH_PF_LOCK */ 1558 1559 void 1560 pfsync_sendout(void) 1561 { 1562 struct pfsync_softc *sc = pfsyncif; 1563 #if NBPFILTER > 0 1564 struct ifnet *ifp = &sc->sc_if; 1565 #endif 1566 struct mbuf *m; 1567 struct ip *ip; 1568 struct pfsync_header *ph; 1569 struct pfsync_subheader *subh; 1570 struct pf_state *st; 1571 struct pfsync_upd_req_item *ur; 1572 struct tdb *t; 1573 1574 int offset; 1575 int q, count = 0; 1576 1577 PF_ASSERT_LOCKED(); 1578 1579 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1580 return; 1581 1582 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1583 #if NBPFILTER > 0 1584 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1585 #else 1586 sc->sc_sync_if == NULL) { 1587 #endif 1588 pfsync_drop(sc); 1589 return; 1590 } 1591 1592 MGETHDR(m, M_DONTWAIT, MT_DATA); 1593 if (m == NULL) { 1594 sc->sc_if.if_oerrors++; 1595 pfsyncstat_inc(pfsyncs_onomem); 1596 pfsync_drop(sc); 1597 return; 1598 } 1599 1600 if (max_linkhdr + sc->sc_len > MHLEN) { 1601 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1602 if (!ISSET(m->m_flags, M_EXT)) { 1603 m_free(m); 1604 sc->sc_if.if_oerrors++; 1605 pfsyncstat_inc(pfsyncs_onomem); 1606 pfsync_drop(sc); 1607 return; 1608 } 1609 } 1610 m->m_data += max_linkhdr; 1611 m->m_len = m->m_pkthdr.len = sc->sc_len; 1612 1613 /* build the ip header */ 1614 ip = mtod(m, struct ip *); 1615 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1616 offset = sizeof(*ip); 1617 1618 ip->ip_len = htons(m->m_pkthdr.len); 1619 ip->ip_id = htons(ip_randomid()); 1620 1621 /* build the pfsync header */ 1622 ph = (struct pfsync_header *)(m->m_data + offset); 1623 bzero(ph, sizeof(*ph)); 1624 offset += sizeof(*ph); 1625 1626 ph->version = PFSYNC_VERSION; 1627 ph->len = htons(sc->sc_len - sizeof(*ip)); 1628 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1629 1630 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1631 subh = (struct pfsync_subheader *)(m->m_data + offset); 1632 offset += sizeof(*subh); 1633 1634 count = 0; 1635 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1636 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1637 1638 bcopy(&ur->ur_msg, m->m_data + offset, 1639 sizeof(ur->ur_msg)); 1640 offset += sizeof(ur->ur_msg); 1641 1642 pool_put(&sc->sc_pool, ur); 1643 1644 count++; 1645 } 1646 1647 bzero(subh, sizeof(*subh)); 1648 subh->len = sizeof(ur->ur_msg) >> 2; 1649 subh->action = PFSYNC_ACT_UPD_REQ; 1650 subh->count = htons(count); 1651 } 1652 1653 /* has someone built a custom region for us to add? */ 1654 if (sc->sc_plus != NULL) { 1655 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1656 offset += sc->sc_pluslen; 1657 1658 sc->sc_plus = NULL; 1659 } 1660 1661 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1662 subh = (struct pfsync_subheader *)(m->m_data + offset); 1663 offset += sizeof(*subh); 1664 1665 count = 0; 1666 while ((t = TAILQ_FIRST(&sc->sc_tdb_q)) != NULL) { 1667 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 1668 pfsync_out_tdb(t, m->m_data + offset); 1669 offset += sizeof(struct pfsync_tdb); 1670 CLR(t->tdb_flags, TDBF_PFSYNC); 1671 count++; 1672 } 1673 1674 bzero(subh, sizeof(*subh)); 1675 subh->action = PFSYNC_ACT_TDB; 1676 subh->len = sizeof(struct pfsync_tdb) >> 2; 1677 subh->count = htons(count); 1678 } 1679 1680 /* walk the queues */ 1681 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1682 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1683 continue; 1684 1685 subh = (struct pfsync_subheader *)(m->m_data + offset); 1686 offset += sizeof(*subh); 1687 1688 count = 0; 1689 while ((st = TAILQ_FIRST(&sc->sc_qs[q])) != NULL) { 1690 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 1691 st->sync_state = PFSYNC_S_NONE; 1692 #ifdef PFSYNC_DEBUG 1693 KASSERT(st->sync_state == q); 1694 #endif 1695 pfsync_qs[q].write(st, m->m_data + offset); 1696 offset += pfsync_qs[q].len; 1697 1698 pf_state_unref(st); 1699 count++; 1700 } 1701 1702 bzero(subh, sizeof(*subh)); 1703 subh->action = pfsync_qs[q].action; 1704 subh->len = pfsync_qs[q].len >> 2; 1705 subh->count = htons(count); 1706 } 1707 1708 /* we're done, let's put it on the wire */ 1709 #if NBPFILTER > 0 1710 if (ifp->if_bpf) { 1711 m->m_data += sizeof(*ip); 1712 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1713 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1714 m->m_data -= sizeof(*ip); 1715 m->m_len = m->m_pkthdr.len = sc->sc_len; 1716 } 1717 1718 if (sc->sc_sync_if == NULL) { 1719 sc->sc_len = PFSYNC_MINPKT; 1720 m_freem(m); 1721 return; 1722 } 1723 #endif 1724 1725 /* start again */ 1726 sc->sc_len = PFSYNC_MINPKT; 1727 1728 sc->sc_if.if_opackets++; 1729 sc->sc_if.if_obytes += m->m_pkthdr.len; 1730 1731 m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain; 1732 1733 #ifdef WITH_PF_LOCK 1734 pfsync_send_pkt(m); 1735 #else /* !WITH_PF_LOCK */ 1736 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL, 0) == 0) 1737 pfsyncstat_inc(pfsyncs_opackets); 1738 else 1739 pfsyncstat_inc(pfsyncs_oerrors); 1740 #endif /* WITH_PF_LOCK */ 1741 } 1742 1743 void 1744 pfsync_insert_state(struct pf_state *st) 1745 { 1746 struct pfsync_softc *sc = pfsyncif; 1747 1748 NET_ASSERT_LOCKED(); 1749 1750 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1751 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1752 SET(st->state_flags, PFSTATE_NOSYNC); 1753 return; 1754 } 1755 1756 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1757 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1758 return; 1759 1760 #ifdef PFSYNC_DEBUG 1761 KASSERT(st->sync_state == PFSYNC_S_NONE); 1762 #endif 1763 1764 if (sc->sc_len == PFSYNC_MINPKT) 1765 timeout_add_sec(&sc->sc_tmo, 1); 1766 1767 pfsync_q_ins(st, PFSYNC_S_INS); 1768 1769 st->sync_updates = 0; 1770 } 1771 1772 int 1773 pfsync_defer(struct pf_state *st, struct mbuf *m) 1774 { 1775 struct pfsync_softc *sc = pfsyncif; 1776 struct pfsync_deferral *pd; 1777 1778 NET_ASSERT_LOCKED(); 1779 1780 if (!sc->sc_defer || 1781 ISSET(st->state_flags, PFSTATE_NOSYNC) || 1782 m->m_flags & (M_BCAST|M_MCAST)) 1783 return (0); 1784 1785 if (sc->sc_deferred >= 128) { 1786 pd = TAILQ_FIRST(&sc->sc_deferrals); 1787 if (timeout_del(&pd->pd_tmo)) 1788 pfsync_undefer(pd, 0); 1789 } 1790 1791 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1792 if (pd == NULL) 1793 return (0); 1794 1795 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1796 SET(st->state_flags, PFSTATE_ACK); 1797 1798 pd->pd_st = pf_state_ref(st); 1799 pd->pd_m = m; 1800 1801 sc->sc_deferred++; 1802 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1803 1804 timeout_set_proc(&pd->pd_tmo, pfsync_defer_tmo, pd); 1805 timeout_add_msec(&pd->pd_tmo, 20); 1806 1807 schednetisr(NETISR_PFSYNC); 1808 1809 return (1); 1810 } 1811 1812 void 1813 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1814 { 1815 struct pfsync_softc *sc = pfsyncif; 1816 struct pf_pdesc pdesc; 1817 1818 NET_ASSERT_LOCKED(); 1819 1820 if (sc == NULL) 1821 return; 1822 1823 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1824 sc->sc_deferred--; 1825 1826 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1827 if (drop) 1828 m_freem(pd->pd_m); 1829 else { 1830 if (pd->pd_st->rule.ptr->rt == PF_ROUTETO) { 1831 if (pf_setup_pdesc(&pdesc, 1832 pd->pd_st->key[PF_SK_WIRE]->af, 1833 pd->pd_st->direction, pd->pd_st->rt_kif, 1834 pd->pd_m, NULL) != PF_PASS) { 1835 m_freem(pd->pd_m); 1836 goto out; 1837 } 1838 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1839 case AF_INET: 1840 pf_route(&pdesc, 1841 pd->pd_st->rule.ptr, pd->pd_st); 1842 break; 1843 #ifdef INET6 1844 case AF_INET6: 1845 pf_route6(&pdesc, 1846 pd->pd_st->rule.ptr, pd->pd_st); 1847 break; 1848 #endif /* INET6 */ 1849 default: 1850 unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af); 1851 } 1852 pd->pd_m = pdesc.m; 1853 } else { 1854 switch (pd->pd_st->key[PF_SK_WIRE]->af) { 1855 case AF_INET: 1856 ip_output(pd->pd_m, NULL, NULL, 0, NULL, NULL, 1857 0); 1858 break; 1859 #ifdef INET6 1860 case AF_INET6: 1861 ip6_output(pd->pd_m, NULL, NULL, 0, 1862 NULL, NULL); 1863 break; 1864 #endif /* INET6 */ 1865 default: 1866 unhandled_af(pd->pd_st->key[PF_SK_WIRE]->af); 1867 } 1868 } 1869 } 1870 out: 1871 pf_state_unref(pd->pd_st); 1872 pool_put(&sc->sc_pool, pd); 1873 } 1874 1875 void 1876 pfsync_defer_tmo(void *arg) 1877 { 1878 NET_LOCK(); 1879 pfsync_undefer(arg, 0); 1880 NET_UNLOCK(); 1881 } 1882 1883 void 1884 pfsync_deferred(struct pf_state *st, int drop) 1885 { 1886 struct pfsync_softc *sc = pfsyncif; 1887 struct pfsync_deferral *pd; 1888 1889 NET_ASSERT_LOCKED(); 1890 1891 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1892 if (pd->pd_st == st) { 1893 if (timeout_del(&pd->pd_tmo)) 1894 pfsync_undefer(pd, drop); 1895 return; 1896 } 1897 } 1898 1899 panic("pfsync_deferred: unable to find deferred state"); 1900 } 1901 1902 void 1903 pfsync_update_state_locked(struct pf_state *st) 1904 { 1905 struct pfsync_softc *sc = pfsyncif; 1906 int sync = 0; 1907 1908 NET_ASSERT_LOCKED(); 1909 PF_ASSERT_LOCKED(); 1910 1911 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1912 return; 1913 1914 if (ISSET(st->state_flags, PFSTATE_ACK)) 1915 pfsync_deferred(st, 0); 1916 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1917 if (st->sync_state != PFSYNC_S_NONE) 1918 pfsync_q_del(st); 1919 return; 1920 } 1921 1922 if (sc->sc_len == PFSYNC_MINPKT) 1923 timeout_add_sec(&sc->sc_tmo, 1); 1924 1925 switch (st->sync_state) { 1926 case PFSYNC_S_UPD_C: 1927 case PFSYNC_S_UPD: 1928 case PFSYNC_S_INS: 1929 /* we're already handling it */ 1930 1931 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1932 st->sync_updates++; 1933 if (st->sync_updates >= sc->sc_maxupdates) 1934 sync = 1; 1935 } 1936 break; 1937 1938 case PFSYNC_S_IACK: 1939 pfsync_q_del(st); 1940 case PFSYNC_S_NONE: 1941 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1942 st->sync_updates = 0; 1943 break; 1944 1945 default: 1946 panic("pfsync_update_state: unexpected sync state %d", 1947 st->sync_state); 1948 } 1949 1950 if (sync || (time_uptime - st->pfsync_time) < 2) 1951 schednetisr(NETISR_PFSYNC); 1952 } 1953 1954 void 1955 pfsync_update_state(struct pf_state *st, int *have_pf_lock) 1956 { 1957 struct pfsync_softc *sc = pfsyncif; 1958 1959 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1960 return; 1961 1962 if (*have_pf_lock == 0) { 1963 PF_LOCK(); 1964 *have_pf_lock = 1; 1965 } 1966 1967 pfsync_update_state_locked(st); 1968 } 1969 1970 void 1971 pfsync_cancel_full_update(struct pfsync_softc *sc) 1972 { 1973 if (timeout_pending(&sc->sc_bulkfail_tmo) || 1974 timeout_pending(&sc->sc_bulk_tmo)) { 1975 #if NCARP > 0 1976 if (!pfsync_sync_ok) 1977 carp_group_demote_adj(&sc->sc_if, -1, 1978 "pfsync bulk cancelled"); 1979 if (sc->sc_initial_bulk) { 1980 carp_group_demote_adj(&sc->sc_if, -32, 1981 "pfsync init"); 1982 sc->sc_initial_bulk = 0; 1983 } 1984 #endif 1985 pfsync_sync_ok = 1; 1986 DPFPRINTF(LOG_INFO, "cancelling bulk update"); 1987 } 1988 timeout_del(&sc->sc_bulkfail_tmo); 1989 timeout_del(&sc->sc_bulk_tmo); 1990 sc->sc_bulk_next = NULL; 1991 sc->sc_bulk_last = NULL; 1992 sc->sc_ureq_sent = 0; 1993 sc->sc_bulk_tries = 0; 1994 } 1995 1996 void 1997 pfsync_request_full_update(struct pfsync_softc *sc) 1998 { 1999 if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 2000 /* Request a full state table update. */ 2001 sc->sc_ureq_sent = time_uptime; 2002 #if NCARP > 0 2003 if (!sc->sc_link_demoted && pfsync_sync_ok) 2004 carp_group_demote_adj(&sc->sc_if, 1, 2005 "pfsync bulk start"); 2006 #endif 2007 pfsync_sync_ok = 0; 2008 DPFPRINTF(LOG_INFO, "requesting bulk update"); 2009 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 2010 pf_pool_limits[PF_LIMIT_STATES].limit / 2011 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 2012 sizeof(struct pfsync_state))); 2013 pfsync_request_update(0, 0); 2014 } 2015 } 2016 2017 void 2018 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 2019 { 2020 struct pfsync_softc *sc = pfsyncif; 2021 struct pfsync_upd_req_item *item; 2022 size_t nlen = sizeof(struct pfsync_upd_req); 2023 2024 /* 2025 * this code does nothing to prevent multiple update requests for the 2026 * same state being generated. 2027 */ 2028 2029 item = pool_get(&sc->sc_pool, PR_NOWAIT); 2030 if (item == NULL) { 2031 /* XXX stats */ 2032 return; 2033 } 2034 2035 item->ur_msg.id = id; 2036 item->ur_msg.creatorid = creatorid; 2037 2038 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 2039 nlen += sizeof(struct pfsync_subheader); 2040 2041 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2042 pfsync_sendout(); 2043 2044 nlen = sizeof(struct pfsync_subheader) + 2045 sizeof(struct pfsync_upd_req); 2046 } 2047 2048 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 2049 sc->sc_len += nlen; 2050 2051 schednetisr(NETISR_PFSYNC); 2052 } 2053 2054 void 2055 pfsync_update_state_req(struct pf_state *st) 2056 { 2057 struct pfsync_softc *sc = pfsyncif; 2058 2059 if (sc == NULL) 2060 panic("pfsync_update_state_req: nonexistent instance"); 2061 2062 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2063 if (st->sync_state != PFSYNC_S_NONE) 2064 pfsync_q_del(st); 2065 return; 2066 } 2067 2068 switch (st->sync_state) { 2069 case PFSYNC_S_UPD_C: 2070 case PFSYNC_S_IACK: 2071 pfsync_q_del(st); 2072 case PFSYNC_S_NONE: 2073 pfsync_q_ins(st, PFSYNC_S_UPD); 2074 schednetisr(NETISR_PFSYNC); 2075 return; 2076 2077 case PFSYNC_S_INS: 2078 case PFSYNC_S_UPD: 2079 case PFSYNC_S_DEL: 2080 /* we're already handling it */ 2081 return; 2082 2083 default: 2084 panic("pfsync_update_state_req: unexpected sync state %d", 2085 st->sync_state); 2086 } 2087 } 2088 2089 void 2090 pfsync_delete_state(struct pf_state *st) 2091 { 2092 struct pfsync_softc *sc = pfsyncif; 2093 2094 NET_ASSERT_LOCKED(); 2095 2096 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2097 return; 2098 2099 if (ISSET(st->state_flags, PFSTATE_ACK)) 2100 pfsync_deferred(st, 1); 2101 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 2102 if (st->sync_state != PFSYNC_S_NONE) 2103 pfsync_q_del(st); 2104 return; 2105 } 2106 2107 if (sc->sc_len == PFSYNC_MINPKT) 2108 timeout_add_sec(&sc->sc_tmo, 1); 2109 2110 switch (st->sync_state) { 2111 case PFSYNC_S_INS: 2112 /* we never got to tell the world so just forget about it */ 2113 pfsync_q_del(st); 2114 return; 2115 2116 case PFSYNC_S_UPD_C: 2117 case PFSYNC_S_UPD: 2118 case PFSYNC_S_IACK: 2119 pfsync_q_del(st); 2120 /* 2121 * FALLTHROUGH to putting it on the del list 2122 * Note on refence count bookeeping: 2123 * pfsync_q_del() drops reference for queue 2124 * ownership. But the st entry survives, because 2125 * our caller still holds a reference. 2126 */ 2127 2128 case PFSYNC_S_NONE: 2129 /* 2130 * We either fall through here, or there is no reference to 2131 * st owned by pfsync queues at this point. 2132 * 2133 * Calling pfsync_q_ins() puts st to del queue. The pfsync_q_ins() 2134 * grabs a reference for delete queue. 2135 */ 2136 pfsync_q_ins(st, PFSYNC_S_DEL); 2137 return; 2138 2139 default: 2140 panic("pfsync_delete_state: unexpected sync state %d", 2141 st->sync_state); 2142 } 2143 } 2144 2145 void 2146 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2147 { 2148 struct pfsync_softc *sc = pfsyncif; 2149 struct { 2150 struct pfsync_subheader subh; 2151 struct pfsync_clr clr; 2152 } __packed r; 2153 2154 NET_ASSERT_LOCKED(); 2155 2156 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2157 return; 2158 2159 bzero(&r, sizeof(r)); 2160 2161 r.subh.action = PFSYNC_ACT_CLR; 2162 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2163 r.subh.count = htons(1); 2164 2165 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2166 r.clr.creatorid = creatorid; 2167 2168 pfsync_send_plus(&r, sizeof(r)); 2169 } 2170 2171 void 2172 pfsync_q_ins(struct pf_state *st, int q) 2173 { 2174 struct pfsync_softc *sc = pfsyncif; 2175 size_t nlen = pfsync_qs[q].len; 2176 2177 KASSERT(st->sync_state == PFSYNC_S_NONE); 2178 2179 #if defined(PFSYNC_DEBUG) 2180 if (sc->sc_len < PFSYNC_MINPKT) 2181 panic("pfsync pkt len is too low %d", sc->sc_len); 2182 #endif 2183 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2184 nlen += sizeof(struct pfsync_subheader); 2185 2186 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2187 pfsync_sendout(); 2188 2189 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2190 } 2191 2192 sc->sc_len += nlen; 2193 pf_state_ref(st); 2194 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2195 st->sync_state = q; 2196 } 2197 2198 void 2199 pfsync_q_del(struct pf_state *st) 2200 { 2201 struct pfsync_softc *sc = pfsyncif; 2202 int q = st->sync_state; 2203 2204 KASSERT(st->sync_state != PFSYNC_S_NONE); 2205 2206 sc->sc_len -= pfsync_qs[q].len; 2207 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2208 st->sync_state = PFSYNC_S_NONE; 2209 pf_state_unref(st); 2210 2211 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2212 sc->sc_len -= sizeof(struct pfsync_subheader); 2213 } 2214 2215 void 2216 pfsync_update_tdb(struct tdb *t, int output) 2217 { 2218 struct pfsync_softc *sc = pfsyncif; 2219 size_t nlen = sizeof(struct pfsync_tdb); 2220 2221 if (sc == NULL) 2222 return; 2223 2224 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2225 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2226 nlen += sizeof(struct pfsync_subheader); 2227 2228 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2229 pfsync_sendout(); 2230 2231 nlen = sizeof(struct pfsync_subheader) + 2232 sizeof(struct pfsync_tdb); 2233 } 2234 2235 sc->sc_len += nlen; 2236 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2237 SET(t->tdb_flags, TDBF_PFSYNC); 2238 t->tdb_updates = 0; 2239 } else { 2240 if (++t->tdb_updates >= sc->sc_maxupdates) 2241 schednetisr(NETISR_PFSYNC); 2242 } 2243 2244 if (output) 2245 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2246 else 2247 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2248 } 2249 2250 void 2251 pfsync_delete_tdb(struct tdb *t) 2252 { 2253 struct pfsync_softc *sc = pfsyncif; 2254 2255 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2256 return; 2257 2258 sc->sc_len -= sizeof(struct pfsync_tdb); 2259 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2260 CLR(t->tdb_flags, TDBF_PFSYNC); 2261 2262 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2263 sc->sc_len -= sizeof(struct pfsync_subheader); 2264 } 2265 2266 void 2267 pfsync_out_tdb(struct tdb *t, void *buf) 2268 { 2269 struct pfsync_tdb *ut = buf; 2270 2271 bzero(ut, sizeof(*ut)); 2272 ut->spi = t->tdb_spi; 2273 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2274 /* 2275 * When a failover happens, the master's rpl is probably above 2276 * what we see here (we may be up to a second late), so 2277 * increase it a bit for outbound tdbs to manage most such 2278 * situations. 2279 * 2280 * For now, just add an offset that is likely to be larger 2281 * than the number of packets we can see in one second. The RFC 2282 * just says the next packet must have a higher seq value. 2283 * 2284 * XXX What is a good algorithm for this? We could use 2285 * a rate-determined increase, but to know it, we would have 2286 * to extend struct tdb. 2287 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2288 * will soon be replaced anyway. For now, just don't handle 2289 * this edge case. 2290 */ 2291 #define RPL_INCR 16384 2292 ut->rpl = htobe64(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2293 RPL_INCR : 0)); 2294 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2295 ut->sproto = t->tdb_sproto; 2296 ut->rdomain = htons(t->tdb_rdomain); 2297 } 2298 2299 void 2300 pfsync_bulk_start(void) 2301 { 2302 struct pfsync_softc *sc = pfsyncif; 2303 2304 DPFPRINTF(LOG_INFO, "received bulk update request"); 2305 2306 if (TAILQ_EMPTY(&state_list)) 2307 pfsync_bulk_status(PFSYNC_BUS_END); 2308 else { 2309 sc->sc_ureq_received = time_uptime; 2310 2311 if (sc->sc_bulk_next == NULL) 2312 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2313 sc->sc_bulk_last = sc->sc_bulk_next; 2314 2315 pfsync_bulk_status(PFSYNC_BUS_START); 2316 timeout_add(&sc->sc_bulk_tmo, 0); 2317 } 2318 } 2319 2320 void 2321 pfsync_bulk_update(void *arg) 2322 { 2323 struct pfsync_softc *sc; 2324 struct pf_state *st; 2325 int i = 0; 2326 2327 NET_LOCK(); 2328 sc = pfsyncif; 2329 if (sc == NULL) 2330 goto out; 2331 st = sc->sc_bulk_next; 2332 2333 for (;;) { 2334 if (st->sync_state == PFSYNC_S_NONE && 2335 st->timeout < PFTM_MAX && 2336 st->pfsync_time <= sc->sc_ureq_received) { 2337 pfsync_update_state_req(st); 2338 i++; 2339 } 2340 2341 st = TAILQ_NEXT(st, entry_list); 2342 if (st == NULL) 2343 st = TAILQ_FIRST(&state_list); 2344 2345 if (st == sc->sc_bulk_last) { 2346 /* we're done */ 2347 sc->sc_bulk_next = NULL; 2348 sc->sc_bulk_last = NULL; 2349 pfsync_bulk_status(PFSYNC_BUS_END); 2350 break; 2351 } 2352 2353 if (i > 1 && (sc->sc_if.if_mtu - sc->sc_len) < 2354 sizeof(struct pfsync_state)) { 2355 /* we've filled a packet */ 2356 sc->sc_bulk_next = st; 2357 timeout_add(&sc->sc_bulk_tmo, 1); 2358 break; 2359 } 2360 } 2361 out: 2362 NET_UNLOCK(); 2363 } 2364 2365 void 2366 pfsync_bulk_status(u_int8_t status) 2367 { 2368 struct { 2369 struct pfsync_subheader subh; 2370 struct pfsync_bus bus; 2371 } __packed r; 2372 2373 struct pfsync_softc *sc = pfsyncif; 2374 2375 bzero(&r, sizeof(r)); 2376 2377 r.subh.action = PFSYNC_ACT_BUS; 2378 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2379 r.subh.count = htons(1); 2380 2381 r.bus.creatorid = pf_status.hostid; 2382 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2383 r.bus.status = status; 2384 2385 pfsync_send_plus(&r, sizeof(r)); 2386 } 2387 2388 void 2389 pfsync_bulk_fail(void *arg) 2390 { 2391 struct pfsync_softc *sc; 2392 2393 NET_LOCK(); 2394 sc = pfsyncif; 2395 if (sc == NULL) 2396 goto out; 2397 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2398 /* Try again */ 2399 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2400 pfsync_request_update(0, 0); 2401 } else { 2402 /* Pretend like the transfer was ok */ 2403 sc->sc_ureq_sent = 0; 2404 sc->sc_bulk_tries = 0; 2405 #if NCARP > 0 2406 if (!pfsync_sync_ok) 2407 carp_group_demote_adj(&sc->sc_if, -1, 2408 sc->sc_link_demoted ? 2409 "pfsync link state up" : 2410 "pfsync bulk fail"); 2411 if (sc->sc_initial_bulk) { 2412 carp_group_demote_adj(&sc->sc_if, -32, 2413 "pfsync init"); 2414 sc->sc_initial_bulk = 0; 2415 } 2416 #endif 2417 pfsync_sync_ok = 1; 2418 sc->sc_link_demoted = 0; 2419 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2420 } 2421 out: 2422 NET_UNLOCK(); 2423 } 2424 2425 void 2426 pfsync_send_plus(void *plus, size_t pluslen) 2427 { 2428 struct pfsync_softc *sc = pfsyncif; 2429 2430 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2431 pfsync_sendout(); 2432 2433 sc->sc_plus = plus; 2434 sc->sc_len += (sc->sc_pluslen = pluslen); 2435 2436 pfsync_sendout(); 2437 } 2438 2439 int 2440 pfsync_up(void) 2441 { 2442 struct pfsync_softc *sc = pfsyncif; 2443 2444 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2445 return (0); 2446 2447 return (1); 2448 } 2449 2450 int 2451 pfsync_state_in_use(struct pf_state *st) 2452 { 2453 struct pfsync_softc *sc = pfsyncif; 2454 2455 if (sc == NULL) 2456 return (0); 2457 2458 if (st->sync_state != PFSYNC_S_NONE || 2459 st == sc->sc_bulk_next || 2460 st == sc->sc_bulk_last) 2461 return (1); 2462 2463 return (0); 2464 } 2465 2466 void 2467 pfsync_timeout(void *arg) 2468 { 2469 NET_LOCK(); 2470 PF_LOCK(); 2471 pfsync_sendout(); 2472 PF_UNLOCK(); 2473 NET_UNLOCK(); 2474 } 2475 2476 /* this is a softnet/netisr handler */ 2477 void 2478 pfsyncintr(void) 2479 { 2480 PF_LOCK(); 2481 pfsync_sendout(); 2482 PF_UNLOCK(); 2483 } 2484 2485 int 2486 pfsync_sysctl_pfsyncstat(void *oldp, size_t *oldlenp, void *newp) 2487 { 2488 struct pfsyncstats pfsyncstat; 2489 2490 CTASSERT(sizeof(pfsyncstat) == (pfsyncs_ncounters * sizeof(uint64_t))); 2491 memset(&pfsyncstat, 0, sizeof pfsyncstat); 2492 counters_read(pfsynccounters, (uint64_t *)&pfsyncstat, 2493 pfsyncs_ncounters); 2494 return (sysctl_rdstruct(oldp, oldlenp, newp, 2495 &pfsyncstat, sizeof(pfsyncstat))); 2496 } 2497 2498 int 2499 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2500 size_t newlen) 2501 { 2502 /* All sysctl names at this level are terminal. */ 2503 if (namelen != 1) 2504 return (ENOTDIR); 2505 2506 switch (name[0]) { 2507 case PFSYNCCTL_STATS: 2508 return (pfsync_sysctl_pfsyncstat(oldp, oldlenp, newp)); 2509 default: 2510 return (ENOPROTOOPT); 2511 } 2512 } 2513