1 /* $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_carp.h" 32 33 #include <sys/param.h> 34 #include <sys/endian.h> 35 #include <sys/proc.h> 36 #include <sys/priv.h> 37 #include <sys/systm.h> 38 #include <sys/time.h> 39 #include <sys/mbuf.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/sockio.h> 45 #include <sys/thread2.h> 46 #include <vm/vm_zone.h> 47 48 #include <machine/inttypes.h> 49 50 #include <net/if.h> 51 #include <net/if_types.h> 52 #include <net/route.h> 53 #include <net/bpf.h> 54 #include <netinet/in.h> 55 #include <netinet/if_ether.h> 56 #include <netinet/ip_carp.h> 57 #include <netinet/tcp.h> 58 #include <netinet/tcp_seq.h> 59 60 #ifdef INET 61 #include <netinet/in_systm.h> 62 #include <netinet/in_var.h> 63 #include <netinet/ip.h> 64 #include <netinet/ip_var.h> 65 #endif 66 67 #ifdef INET6 68 #include <netinet6/nd6.h> 69 #endif /* INET6 */ 70 71 #include <net/pf/pfvar.h> 72 #include <net/pf/if_pfsync.h> 73 74 #define PFSYNCNAME "pfsync" 75 76 #define PFSYNC_MINMTU \ 77 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 78 79 #ifdef PFSYNCDEBUG 80 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) 81 int pfsyncdebug; 82 #else 83 #define DPRINTF(x) 84 #endif 85 86 struct pfsync_softc *pfsyncif = NULL; 87 struct pfsyncstats pfsyncstats; 88 89 void pfsyncattach(int); 90 static int pfsync_clone_destroy(struct ifnet *); 91 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 92 void pfsync_setmtu(struct pfsync_softc *, int); 93 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 94 struct pf_state_peer *); 95 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 96 struct rtentry *); 97 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 98 void pfsyncstart(struct ifnet *); 99 100 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 101 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 102 int pfsync_sendout(struct pfsync_softc *); 103 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 104 void pfsync_timeout(void *); 105 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 106 void pfsync_bulk_update(void *); 107 void pfsync_bulkfail(void *); 108 109 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); 110 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; 111 112 int pfsync_sync_ok; 113 114 struct if_clone pfsync_cloner = 115 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1); 116 117 void 118 pfsyncattach(int npfsync) 119 { 120 if_clone_attach(&pfsync_cloner); 121 } 122 static int 123 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 124 { 125 struct pfsync_softc *sc; 126 struct ifnet *ifp; 127 128 lwkt_gettoken(&pf_token); 129 130 MALLOC(sc, struct pfsync_softc *, sizeof(*sc), M_PFSYNC, 131 M_WAITOK|M_ZERO); 132 pfsync_sync_ok = 1; 133 sc->sc_mbuf = NULL; 134 sc->sc_mbuf_net = NULL; 135 sc->sc_mbuf_tdb = NULL; 136 sc->sc_statep.s = NULL; 137 sc->sc_statep_net.s = NULL; 138 sc->sc_statep_tdb.t = NULL; 139 sc->sc_maxupdates = 128; 140 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP); 141 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); 142 sc->sc_ureq_received = 0; 143 sc->sc_ureq_sent = 0; 144 sc->sc_bulk_send_next = NULL; 145 sc->sc_bulk_terminator = NULL; 146 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; 147 lwkt_reltoken(&pf_token); 148 ifp = &sc->sc_if; 149 ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 150 if_initname(ifp, ifc->ifc_name, unit); 151 ifp->if_ioctl = pfsyncioctl; 152 ifp->if_output = pfsyncoutput; 153 ifp->if_start = pfsyncstart; 154 ifp->if_type = IFT_PFSYNC; 155 ifp->if_snd.ifq_maxlen = ifqmaxlen; 156 ifp->if_hdrlen = PFSYNC_HDRLEN; 157 ifp->if_baudrate = IF_Mbps(100); 158 ifp->if_softc = sc; 159 pfsync_setmtu(sc, MCLBYTES); 160 callout_init(&sc->sc_tmo); 161 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 162 callout_init(&sc->sc_bulk_tmo); 163 callout_init(&sc->sc_bulkfail_tmo); 164 if_attach(ifp, NULL); 165 166 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); 167 168 169 #if NCARP > 0 170 if_addgroup(ifp, "carp"); 171 #endif 172 173 #if NBPFILTER > 0 174 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 175 #endif 176 lwkt_gettoken(&pf_token); 177 178 lwkt_reltoken(&pf_token); 179 return (0); 180 } 181 182 static int 183 pfsync_clone_destroy(struct ifnet *ifp) 184 { 185 lwkt_gettoken(&pf_token); 186 lwkt_reltoken(&pf_token); 187 188 struct pfsync_softc *sc = ifp->if_softc; 189 callout_stop(&sc->sc_tmo); 190 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 191 callout_stop(&sc->sc_bulk_tmo); 192 callout_stop(&sc->sc_bulkfail_tmo); 193 #if NCARP > 0 194 if (!pfsync_sync_ok) 195 carp_group_demote_adj(&sc->sc_if, -1); 196 #endif 197 #if NBPFILTER > 0 198 bpfdetach(ifp); 199 #endif 200 if_detach(ifp); 201 lwkt_gettoken(&pf_token); 202 LIST_REMOVE(sc, sc_next); 203 kfree(sc, M_PFSYNC); 204 lwkt_reltoken(&pf_token); 205 206 207 return 0; 208 } 209 210 /* 211 * Start output on the pfsync interface. 212 */ 213 void 214 pfsyncstart(struct ifnet *ifp) 215 { 216 crit_enter(); 217 IF_DROP(&ifp->if_snd); 218 IF_DRAIN(&ifp->if_snd); 219 crit_exit(); 220 } 221 222 int 223 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 224 struct pf_state_peer *d) 225 { 226 if (s->scrub.scrub_flag && d->scrub == NULL) { 227 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 228 if (d->scrub == NULL) 229 return (ENOMEM); 230 } 231 232 return (0); 233 } 234 235 void 236 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 237 { 238 bzero(sp, sizeof(struct pfsync_state)); 239 240 /* copy from state key */ 241 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 242 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 243 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 244 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 245 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 246 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 247 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 248 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 249 sp->proto = st->key[PF_SK_WIRE]->proto; 250 sp->af = st->key[PF_SK_WIRE]->af; 251 252 /* copy from state */ 253 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 254 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 255 sp->creation = htonl(time_second - st->creation); 256 sp->expire = pf_state_expires(st); 257 if (sp->expire <= time_second) 258 sp->expire = htonl(0); 259 else 260 sp->expire = htonl(sp->expire - time_second); 261 262 sp->direction = st->direction; 263 sp->log = st->log; 264 sp->timeout = st->timeout; 265 sp->state_flags = st->state_flags; 266 if (st->src_node) 267 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 268 if (st->nat_src_node) 269 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 270 271 bcopy(&st->id, &sp->id, sizeof(sp->id)); 272 sp->creatorid = st->creatorid; 273 pf_state_peer_hton(&st->src, &sp->src); 274 pf_state_peer_hton(&st->dst, &sp->dst); 275 276 if (st->rule.ptr == NULL) 277 sp->rule = htonl(-1); 278 else 279 sp->rule = htonl(st->rule.ptr->nr); 280 if (st->anchor.ptr == NULL) 281 sp->anchor = htonl(-1); 282 else 283 sp->anchor = htonl(st->anchor.ptr->nr); 284 if (st->nat_rule.ptr == NULL) 285 sp->nat_rule = htonl(-1); 286 else 287 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 288 289 pf_state_counter_hton(st->packets[0], sp->packets[0]); 290 pf_state_counter_hton(st->packets[1], sp->packets[1]); 291 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 292 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 293 294 } 295 296 int 297 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 298 { 299 struct pf_state *st = NULL; 300 struct pf_state_key *skw = NULL, *sks = NULL; 301 struct pf_rule *r = NULL; 302 struct pfi_kif *kif; 303 int pool_flags; 304 int error; 305 306 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 307 kprintf("pfsync_insert_net_state: invalid creator id:" 308 " %08x\n", ntohl(sp->creatorid)); 309 return (EINVAL); 310 } 311 312 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 313 if (pf_status.debug >= PF_DEBUG_MISC) 314 kprintf("pfsync_insert_net_state: " 315 "unknown interface: %s\n", sp->ifname); 316 if (flags & PFSYNC_SI_IOCTL) 317 return (EINVAL); 318 return (0); /* skip this state */ 319 } 320 321 /* 322 * If the ruleset checksums match or the state is coming from the ioctl, 323 * it's safe to associate the state with the rule of that number. 324 */ 325 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 326 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 327 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 328 r = pf_main_ruleset.rules[ 329 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 330 else 331 r = &pf_default_rule; 332 333 if ((r->max_states && r->states_cur >= r->max_states)) 334 goto cleanup; 335 336 if (flags & PFSYNC_SI_IOCTL) 337 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 338 else 339 pool_flags = PR_LIMITFAIL | PR_ZERO; 340 341 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 342 goto cleanup; 343 344 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 345 goto cleanup; 346 347 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 348 &sp->key[PF_SK_STACK].addr[0], sp->af) || 349 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 350 &sp->key[PF_SK_STACK].addr[1], sp->af) || 351 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 352 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 353 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 354 goto cleanup; 355 } else 356 sks = skw; 357 358 /* allocate memory for scrub info */ 359 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 360 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 361 goto cleanup; 362 363 /* copy to state key(s) */ 364 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 365 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 366 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 367 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 368 skw->proto = sp->proto; 369 skw->af = sp->af; 370 if (sks != skw) { 371 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 372 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 373 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 374 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 375 sks->proto = sp->proto; 376 sks->af = sp->af; 377 } 378 379 /* copy to state */ 380 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 381 st->creation = time_second - ntohl(sp->creation); 382 st->expire = time_second; 383 if (sp->expire) { 384 /* XXX No adaptive scaling. */ 385 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 386 } 387 388 st->expire = ntohl(sp->expire) + time_second; 389 st->direction = sp->direction; 390 st->log = sp->log; 391 st->timeout = sp->timeout; 392 st->state_flags = sp->state_flags; 393 if (!(flags & PFSYNC_SI_IOCTL)) 394 st->sync_flags = PFSTATE_FROMSYNC; 395 396 bcopy(sp->id, &st->id, sizeof(st->id)); 397 st->creatorid = sp->creatorid; 398 pf_state_peer_ntoh(&sp->src, &st->src); 399 pf_state_peer_ntoh(&sp->dst, &st->dst); 400 401 st->rule.ptr = r; 402 st->nat_rule.ptr = NULL; 403 st->anchor.ptr = NULL; 404 st->rt_kif = NULL; 405 406 st->pfsync_time = 0; 407 408 409 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 410 r->states_cur++; 411 r->states_tot++; 412 413 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 414 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 415 r->states_cur--; 416 goto cleanup_state; 417 } 418 419 return (0); 420 421 cleanup: 422 error = ENOMEM; 423 if (skw == sks) 424 sks = NULL; 425 if (skw != NULL) 426 pool_put(&pf_state_key_pl, skw); 427 if (sks != NULL) 428 pool_put(&pf_state_key_pl, sks); 429 430 cleanup_state: /* pf_state_insert frees the state keys */ 431 if (st) { 432 if (st->dst.scrub) 433 pool_put(&pf_state_scrub_pl, st->dst.scrub); 434 if (st->src.scrub) 435 pool_put(&pf_state_scrub_pl, st->src.scrub); 436 pool_put(&pf_state_pl, st); 437 } 438 return (error); 439 } 440 441 void 442 pfsync_input(struct mbuf *m, ...) 443 { 444 struct ip *ip = mtod(m, struct ip *); 445 struct pfsync_header *ph; 446 struct pfsync_softc *sc = pfsyncif; 447 struct pf_state *st; 448 struct pf_state_key *sk; 449 struct pf_state_item *si; 450 struct pf_state_cmp id_key; 451 struct pfsync_state *sp; 452 struct pfsync_state_upd *up; 453 struct pfsync_state_del *dp; 454 struct pfsync_state_clr *cp; 455 struct pfsync_state_upd_req *rup; 456 struct pfsync_state_bus *bus; 457 #ifdef IPSEC 458 struct pfsync_tdb *pt; 459 #endif 460 struct in_addr src; 461 struct mbuf *mp; 462 int iplen, action, error, i, count, offp, sfail, stale = 0; 463 u_int8_t flags = 0; 464 465 /* This function is not yet called from anywhere */ 466 /* Still we assume for safety that pf_token must be held */ 467 ASSERT_LWKT_TOKEN_HELD(&pf_token); 468 469 pfsyncstats.pfsyncs_ipackets++; 470 471 /* verify that we have a sync interface configured */ 472 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 473 goto done; 474 475 /* verify that the packet came in on the right interface */ 476 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 477 pfsyncstats.pfsyncs_badif++; 478 goto done; 479 } 480 481 /* verify that the IP TTL is 255. */ 482 if (ip->ip_ttl != PFSYNC_DFLTTL) { 483 pfsyncstats.pfsyncs_badttl++; 484 goto done; 485 } 486 487 iplen = ip->ip_hl << 2; 488 489 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 490 pfsyncstats.pfsyncs_hdrops++; 491 goto done; 492 } 493 494 if (iplen + sizeof(*ph) > m->m_len) { 495 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 496 pfsyncstats.pfsyncs_hdrops++; 497 goto done; 498 } 499 ip = mtod(m, struct ip *); 500 } 501 ph = (struct pfsync_header *)((char *)ip + iplen); 502 503 /* verify the version */ 504 if (ph->version != PFSYNC_VERSION) { 505 pfsyncstats.pfsyncs_badver++; 506 goto done; 507 } 508 509 action = ph->action; 510 count = ph->count; 511 512 /* make sure it's a valid action code */ 513 if (action >= PFSYNC_ACT_MAX) { 514 pfsyncstats.pfsyncs_badact++; 515 goto done; 516 } 517 518 /* Cheaper to grab this now than having to mess with mbufs later */ 519 src = ip->ip_src; 520 521 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 522 flags |= PFSYNC_SI_CKSUM; 523 524 switch (action) { 525 case PFSYNC_ACT_CLR: { 526 struct pf_state *nexts; 527 struct pf_state_key *nextsk; 528 struct pfi_kif *kif; 529 u_int32_t creatorid; 530 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 531 sizeof(*cp), &offp)) == NULL) { 532 pfsyncstats.pfsyncs_badlen++; 533 return; 534 } 535 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 536 creatorid = cp->creatorid; 537 538 crit_enter(); 539 if (cp->ifname[0] == '\0') { 540 for (st = RB_MIN(pf_state_tree_id, &tree_id); 541 st; st = nexts) { 542 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 543 if (st->creatorid == creatorid) { 544 st->sync_flags |= PFSTATE_FROMSYNC; 545 pf_unlink_state(st); 546 } 547 } 548 } else { 549 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 550 crit_exit(); 551 return; 552 } 553 /* XXX correct? */ 554 for (sk = RB_MIN(pf_state_tree, 555 &pf_statetbl); sk; sk = nextsk) { 556 nextsk = RB_NEXT(pf_state_tree, 557 &pf_statetbl, sk); 558 TAILQ_FOREACH(si, &sk->states, entry) { 559 if (si->s->creatorid == creatorid) { 560 si->s->sync_flags |= 561 PFSTATE_FROMSYNC; 562 pf_unlink_state(si->s); 563 } 564 } 565 } 566 } 567 crit_exit(); 568 569 break; 570 } 571 case PFSYNC_ACT_INS: 572 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 573 count * sizeof(*sp), &offp)) == NULL) { 574 pfsyncstats.pfsyncs_badlen++; 575 return; 576 } 577 578 crit_enter(); 579 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 580 i < count; i++, sp++) { 581 /* check for invalid values */ 582 if (sp->timeout >= PFTM_MAX || 583 sp->src.state > PF_TCPS_PROXY_DST || 584 sp->dst.state > PF_TCPS_PROXY_DST || 585 sp->direction > PF_OUT || 586 (sp->af != AF_INET && sp->af != AF_INET6)) { 587 if (pf_status.debug >= PF_DEBUG_MISC) 588 kprintf("pfsync_insert: PFSYNC_ACT_INS: " 589 "invalid value\n"); 590 pfsyncstats.pfsyncs_badval++; 591 continue; 592 } 593 594 if ((error = pfsync_state_import(sp, flags))) { 595 if (error == ENOMEM) { 596 crit_exit(); 597 goto done; 598 } 599 continue; 600 } 601 } 602 crit_exit(); 603 break; 604 case PFSYNC_ACT_UPD: 605 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 606 count * sizeof(*sp), &offp)) == NULL) { 607 pfsyncstats.pfsyncs_badlen++; 608 return; 609 } 610 611 crit_enter(); 612 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 613 i < count; i++, sp++) { 614 int flags = PFSYNC_FLAG_STALE; 615 616 /* check for invalid values */ 617 if (sp->timeout >= PFTM_MAX || 618 sp->src.state > PF_TCPS_PROXY_DST || 619 sp->dst.state > PF_TCPS_PROXY_DST) { 620 if (pf_status.debug >= PF_DEBUG_MISC) 621 kprintf("pfsync_insert: PFSYNC_ACT_UPD: " 622 "invalid value\n"); 623 pfsyncstats.pfsyncs_badval++; 624 continue; 625 } 626 627 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 628 id_key.creatorid = sp->creatorid; 629 630 st = pf_find_state_byid(&id_key); 631 if (st == NULL) { 632 /* insert the update */ 633 if (pfsync_state_import(sp, flags)) 634 pfsyncstats.pfsyncs_badstate++; 635 continue; 636 } 637 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 638 sfail = 0; 639 if (sk->proto == IPPROTO_TCP) { 640 /* 641 * The state should never go backwards except 642 * for syn-proxy states. Neither should the 643 * sequence window slide backwards. 644 */ 645 if (st->src.state > sp->src.state && 646 (st->src.state < PF_TCPS_PROXY_SRC || 647 sp->src.state >= PF_TCPS_PROXY_SRC)) 648 sfail = 1; 649 else if (SEQ_GT(st->src.seqlo, 650 ntohl(sp->src.seqlo))) 651 sfail = 3; 652 else if (st->dst.state > sp->dst.state) { 653 /* There might still be useful 654 * information about the src state here, 655 * so import that part of the update, 656 * then "fail" so we send the updated 657 * state back to the peer who is missing 658 * our what we know. */ 659 pf_state_peer_ntoh(&sp->src, &st->src); 660 /* XXX do anything with timeouts? */ 661 sfail = 7; 662 flags = 0; 663 } else if (st->dst.state >= TCPS_SYN_SENT && 664 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 665 sfail = 4; 666 } else { 667 /* 668 * Non-TCP protocol state machine always go 669 * forwards 670 */ 671 if (st->src.state > sp->src.state) 672 sfail = 5; 673 else if (st->dst.state > sp->dst.state) 674 sfail = 6; 675 } 676 if (sfail) { 677 if (pf_status.debug >= PF_DEBUG_MISC) 678 kprintf("pfsync: %s stale update " 679 "(%d) id: %016jx " 680 "creatorid: %08x\n", 681 (sfail < 7 ? "ignoring" 682 : "partial"), sfail, 683 (uintmax_t)be64toh(st->id), 684 ntohl(st->creatorid)); 685 pfsyncstats.pfsyncs_stale++; 686 687 if (!(sp->sync_flags & PFSTATE_STALE)) { 688 /* we have a better state, send it */ 689 if (sc->sc_mbuf != NULL && !stale) 690 pfsync_sendout(sc); 691 stale++; 692 if (!st->sync_flags) 693 pfsync_pack_state( 694 PFSYNC_ACT_UPD, st, flags); 695 } 696 continue; 697 } 698 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 699 pf_state_peer_ntoh(&sp->src, &st->src); 700 pf_state_peer_ntoh(&sp->dst, &st->dst); 701 st->expire = ntohl(sp->expire) + time_second; 702 st->timeout = sp->timeout; 703 } 704 if (stale && sc->sc_mbuf != NULL) 705 pfsync_sendout(sc); 706 crit_exit(); 707 break; 708 /* 709 * It's not strictly necessary for us to support the "uncompressed" 710 * delete action, but it's relatively simple and maintains consistency. 711 */ 712 case PFSYNC_ACT_DEL: 713 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 714 count * sizeof(*sp), &offp)) == NULL) { 715 pfsyncstats.pfsyncs_badlen++; 716 return; 717 } 718 719 crit_enter(); 720 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 721 i < count; i++, sp++) { 722 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 723 id_key.creatorid = sp->creatorid; 724 725 st = pf_find_state_byid(&id_key); 726 if (st == NULL) { 727 pfsyncstats.pfsyncs_badstate++; 728 continue; 729 } 730 st->sync_flags |= PFSTATE_FROMSYNC; 731 pf_unlink_state(st); 732 } 733 crit_exit(); 734 break; 735 case PFSYNC_ACT_UPD_C: { 736 int update_requested = 0; 737 738 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 739 count * sizeof(*up), &offp)) == NULL) { 740 pfsyncstats.pfsyncs_badlen++; 741 return; 742 } 743 744 crit_enter(); 745 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 746 i < count; i++, up++) { 747 /* check for invalid values */ 748 if (up->timeout >= PFTM_MAX || 749 up->src.state > PF_TCPS_PROXY_DST || 750 up->dst.state > PF_TCPS_PROXY_DST) { 751 if (pf_status.debug >= PF_DEBUG_MISC) 752 kprintf("pfsync_insert: " 753 "PFSYNC_ACT_UPD_C: " 754 "invalid value\n"); 755 pfsyncstats.pfsyncs_badval++; 756 continue; 757 } 758 759 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 760 id_key.creatorid = up->creatorid; 761 762 st = pf_find_state_byid(&id_key); 763 if (st == NULL) { 764 /* We don't have this state. Ask for it. */ 765 error = pfsync_request_update(up, &src); 766 if (error == ENOMEM) { 767 crit_exit(); 768 goto done; 769 } 770 update_requested = 1; 771 pfsyncstats.pfsyncs_badstate++; 772 continue; 773 } 774 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 775 sfail = 0; 776 if (sk->proto == IPPROTO_TCP) { 777 /* 778 * The state should never go backwards except 779 * for syn-proxy states. Neither should the 780 * sequence window slide backwards. 781 */ 782 if (st->src.state > up->src.state && 783 (st->src.state < PF_TCPS_PROXY_SRC || 784 up->src.state >= PF_TCPS_PROXY_SRC)) 785 sfail = 1; 786 else if (st->dst.state > up->dst.state) 787 sfail = 2; 788 else if (SEQ_GT(st->src.seqlo, 789 ntohl(up->src.seqlo))) 790 sfail = 3; 791 else if (st->dst.state >= TCPS_SYN_SENT && 792 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 793 sfail = 4; 794 } else { 795 /* 796 * Non-TCP protocol state machine always go 797 * forwards 798 */ 799 if (st->src.state > up->src.state) 800 sfail = 5; 801 else if (st->dst.state > up->dst.state) 802 sfail = 6; 803 } 804 if (sfail) { 805 if (pf_status.debug >= PF_DEBUG_MISC) 806 kprintf("pfsync: ignoring stale update " 807 "(%d) id: %016" PRIx64 " " 808 "creatorid: %08x\n", sfail, 809 be64toh(st->id), 810 ntohl(st->creatorid)); 811 pfsyncstats.pfsyncs_stale++; 812 813 /* we have a better state, send it out */ 814 if ((!stale || update_requested) && 815 sc->sc_mbuf != NULL) { 816 pfsync_sendout(sc); 817 update_requested = 0; 818 } 819 stale++; 820 if (!st->sync_flags) 821 pfsync_pack_state(PFSYNC_ACT_UPD, st, 822 PFSYNC_FLAG_STALE); 823 continue; 824 } 825 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 826 pf_state_peer_ntoh(&up->src, &st->src); 827 pf_state_peer_ntoh(&up->dst, &st->dst); 828 st->expire = ntohl(up->expire) + time_second; 829 st->timeout = up->timeout; 830 } 831 if ((update_requested || stale) && sc->sc_mbuf) 832 pfsync_sendout(sc); 833 crit_exit(); 834 break; 835 } 836 case PFSYNC_ACT_DEL_C: 837 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 838 count * sizeof(*dp), &offp)) == NULL) { 839 pfsyncstats.pfsyncs_badlen++; 840 return; 841 } 842 843 crit_enter(); 844 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 845 i < count; i++, dp++) { 846 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 847 id_key.creatorid = dp->creatorid; 848 849 st = pf_find_state_byid(&id_key); 850 if (st == NULL) { 851 pfsyncstats.pfsyncs_badstate++; 852 continue; 853 } 854 st->sync_flags |= PFSTATE_FROMSYNC; 855 pf_unlink_state(st); 856 } 857 crit_exit(); 858 break; 859 case PFSYNC_ACT_INS_F: 860 case PFSYNC_ACT_DEL_F: 861 /* not implemented */ 862 break; 863 case PFSYNC_ACT_UREQ: 864 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 865 count * sizeof(*rup), &offp)) == NULL) { 866 pfsyncstats.pfsyncs_badlen++; 867 return; 868 } 869 870 crit_enter(); 871 if (sc->sc_mbuf != NULL) 872 pfsync_sendout(sc); 873 for (i = 0, 874 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 875 i < count; i++, rup++) { 876 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 877 id_key.creatorid = rup->creatorid; 878 879 if (id_key.id == 0 && id_key.creatorid == 0) { 880 sc->sc_ureq_received = mycpu->gd_time_seconds; 881 if (sc->sc_bulk_send_next == NULL) 882 sc->sc_bulk_send_next = 883 TAILQ_FIRST(&state_list); 884 sc->sc_bulk_terminator = sc->sc_bulk_send_next; 885 if (pf_status.debug >= PF_DEBUG_MISC) 886 kprintf("pfsync: received " 887 "bulk update request\n"); 888 pfsync_send_bus(sc, PFSYNC_BUS_START); 889 lwkt_reltoken(&pf_token); 890 callout_init(&sc->sc_bulk_tmo); 891 lwkt_gettoken(&pf_token); 892 } else { 893 st = pf_find_state_byid(&id_key); 894 if (st == NULL) { 895 pfsyncstats.pfsyncs_badstate++; 896 continue; 897 } 898 if (!st->sync_flags) 899 pfsync_pack_state(PFSYNC_ACT_UPD, 900 st, 0); 901 } 902 } 903 if (sc->sc_mbuf != NULL) 904 pfsync_sendout(sc); 905 crit_exit(); 906 break; 907 case PFSYNC_ACT_BUS: 908 /* If we're not waiting for a bulk update, who cares. */ 909 if (sc->sc_ureq_sent == 0) 910 break; 911 912 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 913 sizeof(*bus), &offp)) == NULL) { 914 pfsyncstats.pfsyncs_badlen++; 915 return; 916 } 917 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 918 switch (bus->status) { 919 case PFSYNC_BUS_START: 920 lwkt_reltoken(&pf_token); 921 callout_reset(&sc->sc_bulkfail_tmo, 922 pf_pool_limits[PF_LIMIT_STATES].limit / 923 (PFSYNC_BULKPACKETS * sc->sc_maxcount), 924 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 925 lwkt_gettoken(&pf_token); 926 if (pf_status.debug >= PF_DEBUG_MISC) 927 kprintf("pfsync: received bulk " 928 "update start\n"); 929 break; 930 case PFSYNC_BUS_END: 931 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= 932 sc->sc_ureq_sent) { 933 /* that's it, we're happy */ 934 sc->sc_ureq_sent = 0; 935 sc->sc_bulk_tries = 0; 936 lwkt_reltoken(&pf_token); 937 callout_stop(&sc->sc_bulkfail_tmo); 938 lwkt_gettoken(&pf_token); 939 #if NCARP > 0 940 if (!pfsync_sync_ok) { 941 lwkt_reltoken(&pf_token); 942 carp_group_demote_adj(&sc->sc_if, -1); 943 lwkt_gettoken(&pf_token); 944 } 945 #endif 946 pfsync_sync_ok = 1; 947 if (pf_status.debug >= PF_DEBUG_MISC) 948 kprintf("pfsync: received valid " 949 "bulk update end\n"); 950 } else { 951 if (pf_status.debug >= PF_DEBUG_MISC) 952 kprintf("pfsync: received invalid " 953 "bulk update end: bad timestamp\n"); 954 } 955 break; 956 } 957 break; 958 #ifdef IPSEC 959 case PFSYNC_ACT_TDB_UPD: 960 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 961 count * sizeof(*pt), &offp)) == NULL) { 962 pfsyncstats.pfsyncs_badlen++; 963 return; 964 } 965 crit_enter(); 966 for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); 967 i < count; i++, pt++) 968 pfsync_update_net_tdb(pt); 969 crit_exit(); 970 break; 971 #endif 972 } 973 974 done: 975 if (m) 976 m_freem(m); 977 } 978 979 int 980 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 981 struct rtentry *rt) 982 { 983 m_freem(m); 984 return (0); 985 } 986 987 /* ARGSUSED */ 988 int 989 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 990 { 991 struct pfsync_softc *sc = ifp->if_softc; 992 struct ifreq *ifr = (struct ifreq *)data; 993 struct ip_moptions *imo = &sc->sc_imo; 994 struct pfsyncreq pfsyncr; 995 struct ifnet *sifp; 996 int error; 997 998 lwkt_gettoken(&pf_token); 999 1000 switch (cmd) { 1001 case SIOCSIFADDR: 1002 case SIOCAIFADDR: 1003 case SIOCSIFDSTADDR: 1004 case SIOCSIFFLAGS: 1005 if (ifp->if_flags & IFF_UP) 1006 ifp->if_flags |= IFF_RUNNING; 1007 else 1008 ifp->if_flags &= ~IFF_RUNNING; 1009 break; 1010 case SIOCSIFMTU: 1011 if (ifr->ifr_mtu < PFSYNC_MINMTU) { 1012 lwkt_reltoken(&pf_token); 1013 return (EINVAL); 1014 } 1015 if (ifr->ifr_mtu > MCLBYTES) 1016 ifr->ifr_mtu = MCLBYTES; 1017 crit_enter(); 1018 if (ifr->ifr_mtu < ifp->if_mtu) 1019 pfsync_sendout(sc); 1020 pfsync_setmtu(sc, ifr->ifr_mtu); 1021 crit_exit(); 1022 break; 1023 case SIOCGETPFSYNC: 1024 bzero(&pfsyncr, sizeof(pfsyncr)); 1025 if (sc->sc_sync_ifp) 1026 strlcpy(pfsyncr.pfsyncr_syncdev, 1027 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1028 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1029 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1030 lwkt_reltoken(&pf_token); 1031 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1032 return (error); 1033 lwkt_gettoken(&pf_token); 1034 break; 1035 case SIOCSETPFSYNC: 1036 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) { 1037 lwkt_reltoken(&pf_token); 1038 return (error); 1039 } 1040 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) { 1041 lwkt_reltoken(&pf_token); 1042 return (error); 1043 } 1044 1045 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1046 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1047 else 1048 sc->sc_sync_peer.s_addr = 1049 pfsyncr.pfsyncr_syncpeer.s_addr; 1050 1051 if (pfsyncr.pfsyncr_maxupdates > 255) { 1052 lwkt_reltoken(&pf_token); 1053 return (EINVAL); 1054 } 1055 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1056 1057 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1058 sc->sc_sync_ifp = NULL; 1059 if (sc->sc_mbuf_net != NULL) { 1060 /* Don't keep stale pfsync packets around. */ 1061 crit_enter(); 1062 m_freem(sc->sc_mbuf_net); 1063 sc->sc_mbuf_net = NULL; 1064 sc->sc_statep_net.s = NULL; 1065 crit_exit(); 1066 } 1067 if (imo->imo_num_memberships > 0) { 1068 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1069 imo->imo_multicast_ifp = NULL; 1070 } 1071 break; 1072 } 1073 1074 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1075 lwkt_reltoken(&pf_token); 1076 return (EINVAL); 1077 } 1078 1079 crit_enter(); 1080 if (sifp->if_mtu < sc->sc_if.if_mtu || 1081 (sc->sc_sync_ifp != NULL && 1082 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1083 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1084 pfsync_sendout(sc); 1085 sc->sc_sync_ifp = sifp; 1086 1087 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1088 1089 if (imo->imo_num_memberships > 0) { 1090 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1091 imo->imo_multicast_ifp = NULL; 1092 } 1093 1094 if (sc->sc_sync_ifp && 1095 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1096 struct in_addr addr; 1097 1098 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1099 sc->sc_sync_ifp = NULL; 1100 lwkt_reltoken(&pf_token); 1101 crit_exit(); 1102 return (EADDRNOTAVAIL); 1103 } 1104 1105 addr.s_addr = INADDR_PFSYNC_GROUP; 1106 1107 if ((imo->imo_membership[0] = 1108 in_addmulti(&addr, sc->sc_sync_ifp)) == NULL) { 1109 sc->sc_sync_ifp = NULL; 1110 lwkt_reltoken(&pf_token); 1111 crit_exit(); 1112 return (ENOBUFS); 1113 } 1114 imo->imo_num_memberships++; 1115 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1116 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1117 imo->imo_multicast_loop = 0; 1118 } 1119 1120 if (sc->sc_sync_ifp || 1121 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1122 /* Request a full state table update. */ 1123 sc->sc_ureq_sent = mycpu->gd_time_seconds; 1124 #if NCARP > 0 1125 if (pfsync_sync_ok) 1126 carp_group_demote_adj(&sc->sc_if, 1); 1127 #endif 1128 pfsync_sync_ok = 0; 1129 if (pf_status.debug >= PF_DEBUG_MISC) 1130 kprintf("pfsync: requesting bulk update\n"); 1131 lwkt_reltoken(&pf_token); 1132 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 1133 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 1134 lwkt_gettoken(&pf_token); 1135 error = pfsync_request_update(NULL, NULL); 1136 if (error == ENOMEM) { 1137 lwkt_reltoken(&pf_token); 1138 crit_exit(); 1139 return (ENOMEM); 1140 } 1141 pfsync_sendout(sc); 1142 } 1143 crit_exit(); 1144 1145 break; 1146 1147 default: 1148 lwkt_reltoken(&pf_token); 1149 return (ENOTTY); 1150 } 1151 1152 lwkt_reltoken(&pf_token); 1153 return (0); 1154 } 1155 1156 void 1157 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1158 { 1159 int mtu; 1160 1161 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1162 mtu = sc->sc_sync_ifp->if_mtu; 1163 else 1164 mtu = mtu_req; 1165 1166 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1167 sizeof(struct pfsync_state); 1168 if (sc->sc_maxcount > 254) 1169 sc->sc_maxcount = 254; 1170 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1171 sc->sc_maxcount * sizeof(struct pfsync_state); 1172 } 1173 1174 struct mbuf * 1175 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1176 { 1177 struct pfsync_header *h; 1178 struct mbuf *m; 1179 int len; 1180 1181 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1182 1183 MGETHDR(m, M_WAITOK, MT_DATA); 1184 if (m == NULL) { 1185 sc->sc_if.if_oerrors++; 1186 return (NULL); 1187 } 1188 1189 switch (action) { 1190 case PFSYNC_ACT_CLR: 1191 len = sizeof(struct pfsync_header) + 1192 sizeof(struct pfsync_state_clr); 1193 break; 1194 case PFSYNC_ACT_UPD_C: 1195 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1196 sizeof(struct pfsync_header); 1197 break; 1198 case PFSYNC_ACT_DEL_C: 1199 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1200 sizeof(struct pfsync_header); 1201 break; 1202 case PFSYNC_ACT_UREQ: 1203 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1204 sizeof(struct pfsync_header); 1205 break; 1206 case PFSYNC_ACT_BUS: 1207 len = sizeof(struct pfsync_header) + 1208 sizeof(struct pfsync_state_bus); 1209 break; 1210 case PFSYNC_ACT_TDB_UPD: 1211 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1212 sizeof(struct pfsync_header); 1213 break; 1214 default: 1215 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1216 sizeof(struct pfsync_header); 1217 break; 1218 } 1219 1220 if (len > MHLEN) { 1221 MCLGET(m, M_WAITOK); 1222 if ((m->m_flags & M_EXT) == 0) { 1223 m_free(m); 1224 sc->sc_if.if_oerrors++; 1225 return (NULL); 1226 } 1227 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); 1228 } else 1229 MH_ALIGN(m, len); 1230 1231 m->m_pkthdr.rcvif = NULL; 1232 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1233 h = mtod(m, struct pfsync_header *); 1234 h->version = PFSYNC_VERSION; 1235 h->af = 0; 1236 h->count = 0; 1237 h->action = action; 1238 1239 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1240 lwkt_reltoken(&pf_token); 1241 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, 1242 LIST_FIRST(&pfsync_list)); 1243 lwkt_gettoken(&pf_token); 1244 return (m); 1245 } 1246 1247 int 1248 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1249 { 1250 struct ifnet *ifp = NULL; 1251 struct pfsync_softc *sc = pfsyncif; 1252 struct pfsync_header *h, *h_net; 1253 struct pfsync_state *sp = NULL; 1254 struct pfsync_state_upd *up = NULL; 1255 struct pfsync_state_del *dp = NULL; 1256 int ret = 0; 1257 u_int8_t i = 255, newaction = 0; 1258 1259 if (sc == NULL) 1260 return (0); 1261 ifp = &sc->sc_if; 1262 1263 /* 1264 * If a packet falls in the forest and there's nobody around to 1265 * hear, does it make a sound? 1266 */ 1267 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1268 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1269 /* Don't leave any stale pfsync packets hanging around. */ 1270 if (sc->sc_mbuf != NULL) { 1271 m_freem(sc->sc_mbuf); 1272 sc->sc_mbuf = NULL; 1273 sc->sc_statep.s = NULL; 1274 } 1275 return (0); 1276 } 1277 1278 if (action >= PFSYNC_ACT_MAX) 1279 return (EINVAL); 1280 1281 crit_enter(); 1282 if (sc->sc_mbuf == NULL) { 1283 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1284 (void *)&sc->sc_statep.s)) == NULL) { 1285 crit_exit(); 1286 return (ENOMEM); 1287 } 1288 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1289 } else { 1290 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1291 if (h->action != action) { 1292 pfsync_sendout(sc); 1293 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1294 (void *)&sc->sc_statep.s)) == NULL) { 1295 crit_exit(); 1296 return (ENOMEM); 1297 } 1298 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1299 } else { 1300 /* 1301 * If it's an update, look in the packet to see if 1302 * we already have an update for the state. 1303 */ 1304 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1305 struct pfsync_state *usp = 1306 (void *)((char *)h + PFSYNC_HDRLEN); 1307 1308 for (i = 0; i < h->count; i++) { 1309 if (!memcmp(usp->id, &st->id, 1310 PFSYNC_ID_LEN) && 1311 usp->creatorid == st->creatorid) { 1312 sp = usp; 1313 sp->updates++; 1314 break; 1315 } 1316 usp++; 1317 } 1318 } 1319 } 1320 } 1321 1322 st->pfsync_time = mycpu->gd_time_seconds;; 1323 1324 if (sp == NULL) { 1325 /* not a "duplicate" update */ 1326 i = 255; 1327 sp = sc->sc_statep.s++; 1328 sc->sc_mbuf->m_pkthdr.len = 1329 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1330 h->count++; 1331 bzero(sp, sizeof(*sp)); 1332 1333 pfsync_state_export(sp, st); 1334 1335 if (flags & PFSYNC_FLAG_STALE) 1336 sp->sync_flags |= PFSTATE_STALE; 1337 } else { 1338 pf_state_peer_hton(&st->src, &sp->src); 1339 pf_state_peer_hton(&st->dst, &sp->dst); 1340 1341 if (st->expire <= time_second) 1342 sp->expire = htonl(0); 1343 else 1344 sp->expire = htonl(st->expire - time_second); 1345 } 1346 1347 /* do we need to build "compressed" actions for network transfer? */ 1348 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1349 switch (action) { 1350 case PFSYNC_ACT_UPD: 1351 newaction = PFSYNC_ACT_UPD_C; 1352 break; 1353 case PFSYNC_ACT_DEL: 1354 newaction = PFSYNC_ACT_DEL_C; 1355 break; 1356 default: 1357 /* by default we just send the uncompressed states */ 1358 break; 1359 } 1360 } 1361 1362 if (newaction) { 1363 if (sc->sc_mbuf_net == NULL) { 1364 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1365 (void *)&sc->sc_statep_net.s)) == NULL) { 1366 crit_exit(); 1367 return (ENOMEM); 1368 } 1369 } 1370 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1371 1372 switch (newaction) { 1373 case PFSYNC_ACT_UPD_C: 1374 if (i != 255) { 1375 up = (void *)((char *)h_net + 1376 PFSYNC_HDRLEN + (i * sizeof(*up))); 1377 up->updates++; 1378 } else { 1379 h_net->count++; 1380 sc->sc_mbuf_net->m_pkthdr.len = 1381 sc->sc_mbuf_net->m_len += sizeof(*up); 1382 up = sc->sc_statep_net.u++; 1383 1384 bzero(up, sizeof(*up)); 1385 bcopy(&st->id, up->id, sizeof(up->id)); 1386 up->creatorid = st->creatorid; 1387 } 1388 up->timeout = st->timeout; 1389 up->expire = sp->expire; 1390 up->src = sp->src; 1391 up->dst = sp->dst; 1392 break; 1393 case PFSYNC_ACT_DEL_C: 1394 sc->sc_mbuf_net->m_pkthdr.len = 1395 sc->sc_mbuf_net->m_len += sizeof(*dp); 1396 dp = sc->sc_statep_net.d++; 1397 h_net->count++; 1398 1399 bzero(dp, sizeof(*dp)); 1400 bcopy(&st->id, dp->id, sizeof(dp->id)); 1401 dp->creatorid = st->creatorid; 1402 break; 1403 } 1404 } 1405 1406 if (h->count == sc->sc_maxcount || 1407 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1408 ret = pfsync_sendout(sc); 1409 1410 crit_exit(); 1411 return (ret); 1412 } 1413 1414 int 1415 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1416 { 1417 struct ifnet *ifp = NULL; 1418 struct pfsync_header *h; 1419 struct pfsync_softc *sc = pfsyncif; 1420 struct pfsync_state_upd_req *rup; 1421 int ret = 0; 1422 1423 if (sc == NULL) 1424 return (0); 1425 1426 ifp = &sc->sc_if; 1427 if (sc->sc_mbuf == NULL) { 1428 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1429 (void *)&sc->sc_statep.s)) == NULL) 1430 return (ENOMEM); 1431 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1432 } else { 1433 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1434 if (h->action != PFSYNC_ACT_UREQ) { 1435 pfsync_sendout(sc); 1436 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1437 (void *)&sc->sc_statep.s)) == NULL) 1438 return (ENOMEM); 1439 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1440 } 1441 } 1442 1443 if (src != NULL) 1444 sc->sc_sendaddr = *src; 1445 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1446 h->count++; 1447 rup = sc->sc_statep.r++; 1448 bzero(rup, sizeof(*rup)); 1449 if (up != NULL) { 1450 bcopy(up->id, rup->id, sizeof(rup->id)); 1451 rup->creatorid = up->creatorid; 1452 } 1453 1454 if (h->count == sc->sc_maxcount) 1455 ret = pfsync_sendout(sc); 1456 1457 return (ret); 1458 } 1459 1460 int 1461 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1462 { 1463 struct ifnet *ifp = NULL; 1464 struct pfsync_softc *sc = pfsyncif; 1465 struct pfsync_state_clr *cp; 1466 int ret; 1467 1468 if (sc == NULL) 1469 return (0); 1470 1471 ifp = &sc->sc_if; 1472 crit_enter(); 1473 if (sc->sc_mbuf != NULL) 1474 pfsync_sendout(sc); 1475 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1476 (void *)&sc->sc_statep.c)) == NULL) { 1477 crit_exit(); 1478 return (ENOMEM); 1479 } 1480 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1481 cp = sc->sc_statep.c; 1482 cp->creatorid = creatorid; 1483 if (ifname != NULL) 1484 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1485 1486 ret = (pfsync_sendout(sc)); 1487 crit_exit(); 1488 return (ret); 1489 } 1490 1491 void 1492 pfsync_timeout(void *v) 1493 { 1494 struct pfsync_softc *sc = v; 1495 1496 crit_enter(); 1497 pfsync_sendout(sc); 1498 crit_exit(); 1499 } 1500 1501 void 1502 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1503 { 1504 struct pfsync_state_bus *bus; 1505 1506 if (sc->sc_mbuf != NULL) 1507 pfsync_sendout(sc); 1508 1509 if (pfsync_sync_ok && 1510 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1511 (void *)&sc->sc_statep.b)) != NULL) { 1512 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1513 bus = sc->sc_statep.b; 1514 bus->creatorid = pf_status.hostid; 1515 bus->status = status; 1516 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); 1517 pfsync_sendout(sc); 1518 } 1519 } 1520 1521 void 1522 pfsync_bulk_update(void *v) 1523 { 1524 struct pfsync_softc *sc = v; 1525 int i = 0; 1526 struct pf_state *state; 1527 1528 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1529 1530 crit_enter(); 1531 if (sc->sc_mbuf != NULL) 1532 pfsync_sendout(sc); 1533 1534 /* 1535 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1536 * been sent since the latest request was made. 1537 */ 1538 state = sc->sc_bulk_send_next; 1539 if (state) 1540 do { 1541 /* send state update if syncable and not already sent */ 1542 if (!state->sync_flags 1543 && state->timeout < PFTM_MAX 1544 && state->pfsync_time <= sc->sc_ureq_received) { 1545 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1546 i++; 1547 } 1548 1549 /* figure next state to send */ 1550 state = TAILQ_NEXT(state, entry_list); 1551 1552 /* wrap to start of list if we hit the end */ 1553 if (!state) 1554 state = TAILQ_FIRST(&state_list); 1555 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1556 state != sc->sc_bulk_terminator); 1557 1558 if (!state || state == sc->sc_bulk_terminator) { 1559 /* we're done */ 1560 pfsync_send_bus(sc, PFSYNC_BUS_END); 1561 sc->sc_ureq_received = 0; 1562 sc->sc_bulk_send_next = NULL; 1563 sc->sc_bulk_terminator = NULL; 1564 lwkt_reltoken(&pf_token); 1565 callout_stop(&sc->sc_bulk_tmo); 1566 lwkt_gettoken(&pf_token); 1567 if (pf_status.debug >= PF_DEBUG_MISC) 1568 kprintf("pfsync: bulk update complete\n"); 1569 } else { 1570 /* look again for more in a bit */ 1571 lwkt_reltoken(&pf_token); 1572 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, 1573 LIST_FIRST(&pfsync_list)); 1574 lwkt_gettoken(&pf_token); 1575 sc->sc_bulk_send_next = state; 1576 } 1577 if (sc->sc_mbuf != NULL) 1578 pfsync_sendout(sc); 1579 crit_exit(); 1580 } 1581 1582 void 1583 pfsync_bulkfail(void *v) 1584 { 1585 struct pfsync_softc *sc = v; 1586 int error; 1587 1588 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1589 1590 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1591 /* Try again in a bit */ 1592 lwkt_reltoken(&pf_token); 1593 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, 1594 LIST_FIRST(&pfsync_list)); 1595 lwkt_gettoken(&pf_token); 1596 crit_enter(); 1597 error = pfsync_request_update(NULL, NULL); 1598 if (error == ENOMEM) { 1599 if (pf_status.debug >= PF_DEBUG_MISC) 1600 kprintf("pfsync: cannot allocate mbufs for " 1601 "bulk update\n"); 1602 } else 1603 pfsync_sendout(sc); 1604 crit_exit(); 1605 } else { 1606 /* Pretend like the transfer was ok */ 1607 sc->sc_ureq_sent = 0; 1608 sc->sc_bulk_tries = 0; 1609 #if NCARP > 0 1610 if (!pfsync_sync_ok) 1611 carp_group_demote_adj(&sc->sc_if, -1); 1612 #endif 1613 pfsync_sync_ok = 1; 1614 if (pf_status.debug >= PF_DEBUG_MISC) 1615 kprintf("pfsync: failed to receive " 1616 "bulk update status\n"); 1617 lwkt_reltoken(&pf_token); 1618 callout_stop(&sc->sc_bulkfail_tmo); 1619 lwkt_gettoken(&pf_token); 1620 } 1621 } 1622 1623 /* This must be called in splnet() */ 1624 int 1625 pfsync_sendout(struct pfsync_softc *sc) 1626 { 1627 #if NBPFILTER > 0 1628 struct ifnet *ifp = &sc->sc_if; 1629 #endif 1630 struct mbuf *m; 1631 1632 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1633 1634 lwkt_reltoken(&pf_token); 1635 callout_stop(&sc->sc_tmo); 1636 lwkt_gettoken(&pf_token); 1637 1638 if (sc->sc_mbuf == NULL) 1639 return (0); 1640 m = sc->sc_mbuf; 1641 sc->sc_mbuf = NULL; 1642 sc->sc_statep.s = NULL; 1643 1644 #if NBPFILTER > 0 1645 if (ifp->if_bpf) { 1646 lwkt_reltoken(&pf_token); 1647 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1648 lwkt_gettoken(&pf_token); 1649 } 1650 #endif 1651 1652 if (sc->sc_mbuf_net) { 1653 m_freem(m); 1654 m = sc->sc_mbuf_net; 1655 sc->sc_mbuf_net = NULL; 1656 sc->sc_statep_net.s = NULL; 1657 } 1658 1659 return pfsync_sendout_mbuf(sc, m); 1660 } 1661 1662 int 1663 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1664 { 1665 struct sockaddr sa; 1666 struct ip *ip; 1667 1668 if (sc->sc_sync_ifp || 1669 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1670 M_PREPEND(m, sizeof(struct ip), M_WAITOK); 1671 if (m == NULL) { 1672 pfsyncstats.pfsyncs_onomem++; 1673 return (0); 1674 } 1675 ip = mtod(m, struct ip *); 1676 ip->ip_v = IPVERSION; 1677 ip->ip_hl = sizeof(*ip) >> 2; 1678 ip->ip_tos = IPTOS_LOWDELAY; 1679 ip->ip_len = htons(m->m_pkthdr.len); 1680 ip->ip_id = htons(ip_randomid()); 1681 ip->ip_off = htons(IP_DF); 1682 ip->ip_ttl = PFSYNC_DFLTTL; 1683 ip->ip_p = IPPROTO_PFSYNC; 1684 ip->ip_sum = 0; 1685 1686 bzero(&sa, sizeof(sa)); 1687 ip->ip_src.s_addr = INADDR_ANY; 1688 1689 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1690 m->m_flags |= M_MCAST; 1691 ip->ip_dst = sc->sc_sendaddr; 1692 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1693 1694 pfsyncstats.pfsyncs_opackets++; 1695 1696 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1697 pfsyncstats.pfsyncs_oerrors++; 1698 } else 1699 m_freem(m); 1700 1701 return (0); 1702 } 1703 1704 static int 1705 pfsync_modevent(module_t mod, int type, void *data) 1706 { 1707 int error = 0; 1708 1709 struct pfsync_softc *pfs_if, *tmp; 1710 1711 lwkt_gettoken(&pf_token); 1712 1713 switch (type) { 1714 case MOD_LOAD: 1715 LIST_INIT(&pfsync_list); 1716 lwkt_reltoken(&pf_token); 1717 if_clone_attach(&pfsync_cloner); 1718 lwkt_gettoken(&pf_token); 1719 /* Override the function pointer for pf_ioctl.c */ 1720 break; 1721 1722 case MOD_UNLOAD: 1723 lwkt_reltoken(&pf_token); 1724 if_clone_detach(&pfsync_cloner); 1725 lwkt_gettoken(&pf_token); 1726 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) { 1727 pfsync_clone_destroy(&pfs_if->sc_if); 1728 } 1729 break; 1730 1731 default: 1732 error = EINVAL; 1733 break; 1734 } 1735 1736 lwkt_reltoken(&pf_token); 1737 return error; 1738 } 1739 1740 static moduledata_t pfsync_mod = { 1741 "pfsync", 1742 pfsync_modevent, 1743 0 1744 }; 1745 1746 #define PFSYNC_MODVER 44 1747 1748 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1749 MODULE_VERSION(pfsync, PFSYNC_MODVER); 1750 1751 1752 1753