1 /* 2 * Copyright (c) 2002 Michael Shalayeff 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ 27 */ 28 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_carp.h" 32 33 #include <sys/param.h> 34 #include <sys/endian.h> 35 #include <sys/proc.h> 36 #include <sys/priv.h> 37 #include <sys/systm.h> 38 #include <sys/time.h> 39 #include <sys/mbuf.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/msgport2.h> 45 #include <sys/sockio.h> 46 #include <sys/thread2.h> 47 48 #include <machine/inttypes.h> 49 50 #include <net/if.h> 51 #include <net/if_types.h> 52 #include <net/ifq_var.h> 53 #include <net/route.h> 54 #include <net/bpf.h> 55 #include <net/netisr2.h> 56 #include <net/netmsg2.h> 57 #include <netinet/in.h> 58 #include <netinet/if_ether.h> 59 #include <netinet/ip_carp.h> 60 #include <netinet/tcp.h> 61 #include <netinet/tcp_seq.h> 62 63 #ifdef INET 64 #include <netinet/in_systm.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #endif 69 70 #ifdef INET6 71 #include <netinet6/nd6.h> 72 #endif /* INET6 */ 73 74 #include <net/pf/pfvar.h> 75 #include <net/pf/if_pfsync.h> 76 77 #define PFSYNCNAME "pfsync" 78 79 #define PFSYNC_MINMTU \ 80 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 81 82 #ifdef PFSYNCDEBUG 83 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) 84 int pfsyncdebug; 85 #else 86 #define DPRINTF(x) 87 #endif 88 89 struct pfsync_softc *pfsyncif = NULL; 90 struct pfsyncstats pfsyncstats; 91 92 void pfsyncattach(int); 93 static int pfsync_clone_destroy(struct ifnet *); 94 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 95 void pfsync_setmtu(struct pfsync_softc *, int); 96 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 97 struct pf_state_peer *); 98 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 99 struct rtentry *); 100 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 101 void pfsyncstart(struct ifnet *, struct ifaltq_subque *); 102 103 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 104 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 105 int pfsync_sendout(struct pfsync_softc *); 106 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 107 void pfsync_timeout(void *); 108 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 109 void pfsync_bulk_update(void *); 110 void pfsync_bulkfail(void *); 111 112 static struct in_multi *pfsync_in_addmulti(struct ifnet *); 113 static void pfsync_in_delmulti(struct in_multi *); 114 115 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); 116 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; 117 118 int pfsync_sync_ok; 119 120 struct if_clone pfsync_cloner = 121 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1); 122 123 void 124 pfsyncattach(int npfsync) 125 { 126 if_clone_attach(&pfsync_cloner); 127 } 128 static int 129 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 130 { 131 struct pfsync_softc *sc; 132 struct ifnet *ifp; 133 134 lwkt_gettoken(&pf_token); 135 136 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO); 137 pfsync_sync_ok = 1; 138 sc->sc_mbuf = NULL; 139 sc->sc_mbuf_net = NULL; 140 sc->sc_mbuf_tdb = NULL; 141 sc->sc_statep.s = NULL; 142 sc->sc_statep_net.s = NULL; 143 sc->sc_statep_tdb.t = NULL; 144 sc->sc_maxupdates = 128; 145 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP); 146 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); 147 sc->sc_ureq_received = 0; 148 sc->sc_ureq_sent = 0; 149 sc->sc_bulk_send_next = NULL; 150 sc->sc_bulk_terminator = NULL; 151 sc->sc_bulk_send_cpu = 0; 152 sc->sc_bulk_terminator_cpu = 0; 153 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; 154 lwkt_reltoken(&pf_token); 155 ifp = &sc->sc_if; 156 ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 157 if_initname(ifp, ifc->ifc_name, unit); 158 ifp->if_ioctl = pfsyncioctl; 159 ifp->if_output = pfsyncoutput; 160 ifp->if_start = pfsyncstart; 161 ifp->if_type = IFT_PFSYNC; 162 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 163 ifp->if_hdrlen = PFSYNC_HDRLEN; 164 ifp->if_baudrate = IF_Mbps(100); 165 ifp->if_softc = sc; 166 pfsync_setmtu(sc, MCLBYTES); 167 callout_init(&sc->sc_tmo); 168 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 169 callout_init(&sc->sc_bulk_tmo); 170 callout_init(&sc->sc_bulkfail_tmo); 171 if_attach(ifp, NULL); 172 173 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); 174 175 176 #if NCARP > 0 177 if_addgroup(ifp, "carp"); 178 #endif 179 180 #if NBPFILTER > 0 181 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 182 #endif 183 lwkt_gettoken(&pf_token); 184 185 lwkt_reltoken(&pf_token); 186 return (0); 187 } 188 189 static int 190 pfsync_clone_destroy(struct ifnet *ifp) 191 { 192 struct netmsg_base msg; 193 194 lwkt_gettoken(&pf_token); 195 lwkt_reltoken(&pf_token); 196 197 struct pfsync_softc *sc = ifp->if_softc; 198 callout_stop(&sc->sc_tmo); 199 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 200 callout_stop(&sc->sc_bulk_tmo); 201 callout_stop(&sc->sc_bulkfail_tmo); 202 #if NCARP > 0 203 if (!pfsync_sync_ok) 204 carp_group_demote_adj(&sc->sc_if, -1); 205 #endif 206 207 /* Unpend async sendouts. */ 208 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler); 209 netisr_domsg(&msg, 0); 210 211 #if NBPFILTER > 0 212 bpfdetach(ifp); 213 #endif 214 if_detach(ifp); 215 lwkt_gettoken(&pf_token); 216 LIST_REMOVE(sc, sc_next); 217 kfree(sc, M_PFSYNC); 218 lwkt_reltoken(&pf_token); 219 220 221 return 0; 222 } 223 224 /* 225 * Start output on the pfsync interface. 226 */ 227 void 228 pfsyncstart(struct ifnet *ifp, struct ifaltq_subque *ifsq) 229 { 230 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 231 ifsq_purge(ifsq); 232 } 233 234 int 235 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 236 struct pf_state_peer *d) 237 { 238 if (s->scrub.scrub_flag && d->scrub == NULL) { 239 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO); 240 241 if (d->scrub == NULL) 242 return (ENOMEM); 243 } 244 245 return (0); 246 } 247 248 void 249 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 250 { 251 bzero(sp, sizeof(struct pfsync_state)); 252 253 /* copy from state key */ 254 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 255 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 256 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 257 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 258 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 259 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 260 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 261 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 262 sp->proto = st->key[PF_SK_WIRE]->proto; 263 sp->af = st->key[PF_SK_WIRE]->af; 264 265 /* copy from state */ 266 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 267 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 268 sp->creation = htonl(time_second - st->creation); 269 sp->expire = pf_state_expires(st); 270 if (sp->expire <= time_second) 271 sp->expire = htonl(0); 272 else 273 sp->expire = htonl(sp->expire - time_second); 274 275 sp->direction = st->direction; 276 sp->log = st->log; 277 sp->cpuid = st->cpuid; 278 sp->pickup_mode = st->pickup_mode; 279 sp->timeout = st->timeout; 280 sp->state_flags = st->state_flags; 281 if (st->src_node) 282 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 283 if (st->nat_src_node) 284 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 285 286 bcopy(&st->id, &sp->id, sizeof(sp->id)); 287 sp->creatorid = st->creatorid; 288 pf_state_peer_hton(&st->src, &sp->src); 289 pf_state_peer_hton(&st->dst, &sp->dst); 290 291 if (st->rule.ptr == NULL) 292 sp->rule = htonl(-1); 293 else 294 sp->rule = htonl(st->rule.ptr->nr); 295 if (st->anchor.ptr == NULL) 296 sp->anchor = htonl(-1); 297 else 298 sp->anchor = htonl(st->anchor.ptr->nr); 299 if (st->nat_rule.ptr == NULL) 300 sp->nat_rule = htonl(-1); 301 else 302 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 303 304 pf_state_counter_hton(st->packets[0], sp->packets[0]); 305 pf_state_counter_hton(st->packets[1], sp->packets[1]); 306 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 307 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 308 309 } 310 311 int 312 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 313 { 314 struct pf_state *st = NULL; 315 struct pf_state_key *skw = NULL, *sks = NULL; 316 struct pf_rule *r = NULL; 317 struct pfi_kif *kif; 318 int pool_flags; 319 int error; 320 321 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 322 kprintf("pfsync_insert_net_state: invalid creator id:" 323 " %08x\n", ntohl(sp->creatorid)); 324 return (EINVAL); 325 } 326 327 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 328 if (pf_status.debug >= PF_DEBUG_MISC) 329 kprintf("pfsync_insert_net_state: " 330 "unknown interface: %s\n", sp->ifname); 331 if (flags & PFSYNC_SI_IOCTL) 332 return (EINVAL); 333 return (0); /* skip this state */ 334 } 335 336 /* 337 * If the ruleset checksums match or the state is coming from the ioctl, 338 * it's safe to associate the state with the rule of that number. 339 */ 340 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 341 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 342 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 343 r = pf_main_ruleset.rules[ 344 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 345 else 346 r = &pf_default_rule; 347 348 if ((r->max_states && r->states_cur >= r->max_states)) 349 goto cleanup; 350 351 if (flags & PFSYNC_SI_IOCTL) 352 pool_flags = M_WAITOK | M_NULLOK | M_ZERO; 353 else 354 pool_flags = M_WAITOK | M_ZERO; 355 356 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL) 357 goto cleanup; 358 lockinit(&st->lk, "pfstlk", 0, 0); 359 360 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 361 goto cleanup; 362 363 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 364 &sp->key[PF_SK_STACK].addr[0], sp->af) || 365 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 366 &sp->key[PF_SK_STACK].addr[1], sp->af) || 367 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 368 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 369 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 370 goto cleanup; 371 } else 372 sks = skw; 373 374 /* allocate memory for scrub info */ 375 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 376 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 377 goto cleanup; 378 379 /* copy to state key(s) */ 380 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 381 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 382 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 383 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 384 skw->proto = sp->proto; 385 skw->af = sp->af; 386 if (sks != skw) { 387 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 388 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 389 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 390 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 391 sks->proto = sp->proto; 392 sks->af = sp->af; 393 } 394 395 /* copy to state */ 396 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 397 st->creation = time_second - ntohl(sp->creation); 398 st->expire = time_second; 399 if (sp->expire) { 400 /* XXX No adaptive scaling. */ 401 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 402 } 403 404 st->expire = ntohl(sp->expire) + time_second; 405 st->direction = sp->direction; 406 st->log = sp->log; 407 st->timeout = sp->timeout; 408 st->state_flags = sp->state_flags; 409 if (!(flags & PFSYNC_SI_IOCTL)) 410 st->sync_flags = PFSTATE_FROMSYNC; 411 412 bcopy(sp->id, &st->id, sizeof(st->id)); 413 st->creatorid = sp->creatorid; 414 pf_state_peer_ntoh(&sp->src, &st->src); 415 pf_state_peer_ntoh(&sp->dst, &st->dst); 416 417 st->rule.ptr = r; 418 st->nat_rule.ptr = NULL; 419 st->anchor.ptr = NULL; 420 st->rt_kif = NULL; 421 422 st->pfsync_time = 0; 423 424 425 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 426 r->states_cur++; 427 r->states_tot++; 428 429 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 430 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 431 r->states_cur--; 432 goto cleanup_state; 433 } 434 435 return (0); 436 437 cleanup: 438 error = ENOMEM; 439 if (skw == sks) 440 sks = NULL; 441 if (skw != NULL) 442 kfree(skw, M_PFSYNC); 443 if (sks != NULL) 444 kfree(sks, M_PFSYNC); 445 446 cleanup_state: /* pf_state_insert frees the state keys */ 447 if (st) { 448 if (st->dst.scrub) 449 kfree(st->dst.scrub, M_PFSYNC); 450 if (st->src.scrub) 451 kfree(st->src.scrub, M_PFSYNC); 452 kfree(st, M_PFSYNC); 453 } 454 return (error); 455 } 456 457 void 458 pfsync_input(struct mbuf *m, ...) 459 { 460 struct ip *ip = mtod(m, struct ip *); 461 struct pfsync_header *ph; 462 struct pfsync_softc *sc = pfsyncif; 463 struct pf_state *st; 464 struct pf_state_key *sk; 465 struct pf_state_item *si; 466 struct pf_state_cmp id_key; 467 struct pfsync_state *sp; 468 struct pfsync_state_upd *up; 469 struct pfsync_state_del *dp; 470 struct pfsync_state_clr *cp; 471 struct pfsync_state_upd_req *rup; 472 struct pfsync_state_bus *bus; 473 struct in_addr src; 474 struct mbuf *mp; 475 int iplen, action, error, i, count, offp, sfail, stale = 0; 476 u_int8_t flags = 0; 477 478 /* This function is not yet called from anywhere */ 479 /* Still we assume for safety that pf_token must be held */ 480 ASSERT_LWKT_TOKEN_HELD(&pf_token); 481 482 pfsyncstats.pfsyncs_ipackets++; 483 484 /* verify that we have a sync interface configured */ 485 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 486 goto done; 487 488 /* verify that the packet came in on the right interface */ 489 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 490 pfsyncstats.pfsyncs_badif++; 491 goto done; 492 } 493 494 /* verify that the IP TTL is 255. */ 495 if (ip->ip_ttl != PFSYNC_DFLTTL) { 496 pfsyncstats.pfsyncs_badttl++; 497 goto done; 498 } 499 500 iplen = ip->ip_hl << 2; 501 502 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 503 pfsyncstats.pfsyncs_hdrops++; 504 goto done; 505 } 506 507 if (iplen + sizeof(*ph) > m->m_len) { 508 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 509 pfsyncstats.pfsyncs_hdrops++; 510 goto done; 511 } 512 ip = mtod(m, struct ip *); 513 } 514 ph = (struct pfsync_header *)((char *)ip + iplen); 515 516 /* verify the version */ 517 if (ph->version != PFSYNC_VERSION) { 518 pfsyncstats.pfsyncs_badver++; 519 goto done; 520 } 521 522 action = ph->action; 523 count = ph->count; 524 525 /* make sure it's a valid action code */ 526 if (action >= PFSYNC_ACT_MAX) { 527 pfsyncstats.pfsyncs_badact++; 528 goto done; 529 } 530 531 /* Cheaper to grab this now than having to mess with mbufs later */ 532 src = ip->ip_src; 533 534 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 535 flags |= PFSYNC_SI_CKSUM; 536 537 switch (action) { 538 case PFSYNC_ACT_CLR: { 539 struct pf_state *nexts; 540 struct pf_state_key *nextsk; 541 struct pfi_kif *kif; 542 globaldata_t save_gd = mycpu; 543 int nn; 544 545 u_int32_t creatorid; 546 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 547 sizeof(*cp), &offp)) == NULL) { 548 pfsyncstats.pfsyncs_badlen++; 549 return; 550 } 551 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 552 creatorid = cp->creatorid; 553 554 crit_enter(); 555 if (cp->ifname[0] == '\0') { 556 lwkt_gettoken(&pf_token); 557 for (nn = 0; nn < ncpus; ++nn) { 558 lwkt_setcpu_self(globaldata_find(nn)); 559 for (st = RB_MIN(pf_state_tree_id, 560 &tree_id[nn]); 561 st; st = nexts) { 562 nexts = RB_NEXT(pf_state_tree_id, 563 &tree_id[n], st); 564 if (st->creatorid == creatorid) { 565 st->sync_flags |= 566 PFSTATE_FROMSYNC; 567 pf_unlink_state(st); 568 } 569 } 570 } 571 lwkt_setcpu_self(save_gd); 572 lwkt_reltoken(&pf_token); 573 } else { 574 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 575 crit_exit(); 576 return; 577 } 578 /* XXX correct? */ 579 lwkt_gettoken(&pf_token); 580 for (nn = 0; nn < ncpus; ++nn) { 581 lwkt_setcpu_self(globaldata_find(nn)); 582 for (sk = RB_MIN(pf_state_tree, 583 &pf_statetbl[nn]); 584 sk; 585 sk = nextsk) { 586 nextsk = RB_NEXT(pf_state_tree, 587 &pf_statetbl[n], sk); 588 TAILQ_FOREACH(si, &sk->states, entry) { 589 if (si->s->creatorid == 590 creatorid) { 591 si->s->sync_flags |= 592 PFSTATE_FROMSYNC; 593 pf_unlink_state(si->s); 594 } 595 } 596 } 597 } 598 lwkt_setcpu_self(save_gd); 599 lwkt_reltoken(&pf_token); 600 } 601 crit_exit(); 602 603 break; 604 } 605 case PFSYNC_ACT_INS: 606 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 607 count * sizeof(*sp), &offp)) == NULL) { 608 pfsyncstats.pfsyncs_badlen++; 609 return; 610 } 611 612 crit_enter(); 613 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 614 i < count; i++, sp++) { 615 /* check for invalid values */ 616 if (sp->timeout >= PFTM_MAX || 617 sp->src.state > PF_TCPS_PROXY_DST || 618 sp->dst.state > PF_TCPS_PROXY_DST || 619 sp->direction > PF_OUT || 620 (sp->af != AF_INET && sp->af != AF_INET6)) { 621 if (pf_status.debug >= PF_DEBUG_MISC) 622 kprintf("pfsync_insert: PFSYNC_ACT_INS: " 623 "invalid value\n"); 624 pfsyncstats.pfsyncs_badval++; 625 continue; 626 } 627 628 if ((error = pfsync_state_import(sp, flags))) { 629 if (error == ENOMEM) { 630 crit_exit(); 631 goto done; 632 } 633 } 634 } 635 crit_exit(); 636 break; 637 case PFSYNC_ACT_UPD: 638 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 639 count * sizeof(*sp), &offp)) == NULL) { 640 pfsyncstats.pfsyncs_badlen++; 641 return; 642 } 643 644 crit_enter(); 645 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 646 i < count; i++, sp++) { 647 int flags = PFSYNC_FLAG_STALE; 648 649 /* check for invalid values */ 650 if (sp->timeout >= PFTM_MAX || 651 sp->src.state > PF_TCPS_PROXY_DST || 652 sp->dst.state > PF_TCPS_PROXY_DST) { 653 if (pf_status.debug >= PF_DEBUG_MISC) 654 kprintf("pfsync_insert: PFSYNC_ACT_UPD: " 655 "invalid value\n"); 656 pfsyncstats.pfsyncs_badval++; 657 continue; 658 } 659 660 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 661 id_key.creatorid = sp->creatorid; 662 663 st = pf_find_state_byid(&id_key); 664 if (st == NULL) { 665 /* insert the update */ 666 if (pfsync_state_import(sp, flags)) 667 pfsyncstats.pfsyncs_badstate++; 668 continue; 669 } 670 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 671 sfail = 0; 672 if (sk->proto == IPPROTO_TCP) { 673 /* 674 * The state should never go backwards except 675 * for syn-proxy states. Neither should the 676 * sequence window slide backwards. 677 */ 678 if (st->src.state > sp->src.state && 679 (st->src.state < PF_TCPS_PROXY_SRC || 680 sp->src.state >= PF_TCPS_PROXY_SRC)) 681 sfail = 1; 682 else if (SEQ_GT(st->src.seqlo, 683 ntohl(sp->src.seqlo))) 684 sfail = 3; 685 else if (st->dst.state > sp->dst.state) { 686 /* There might still be useful 687 * information about the src state here, 688 * so import that part of the update, 689 * then "fail" so we send the updated 690 * state back to the peer who is missing 691 * our what we know. */ 692 pf_state_peer_ntoh(&sp->src, &st->src); 693 /* XXX do anything with timeouts? */ 694 sfail = 7; 695 flags = 0; 696 } else if (st->dst.state >= TCPS_SYN_SENT && 697 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 698 sfail = 4; 699 } else { 700 /* 701 * Non-TCP protocol state machine always go 702 * forwards 703 */ 704 if (st->src.state > sp->src.state) 705 sfail = 5; 706 else if (st->dst.state > sp->dst.state) 707 sfail = 6; 708 } 709 if (sfail) { 710 if (pf_status.debug >= PF_DEBUG_MISC) 711 kprintf("pfsync: %s stale update " 712 "(%d) id: %016jx " 713 "creatorid: %08x\n", 714 (sfail < 7 ? "ignoring" 715 : "partial"), sfail, 716 (uintmax_t)be64toh(st->id), 717 ntohl(st->creatorid)); 718 pfsyncstats.pfsyncs_stale++; 719 720 if (!(sp->sync_flags & PFSTATE_STALE)) { 721 /* we have a better state, send it */ 722 if (sc->sc_mbuf != NULL && !stale) 723 pfsync_sendout(sc); 724 stale++; 725 if (!st->sync_flags) 726 pfsync_pack_state( 727 PFSYNC_ACT_UPD, st, flags); 728 } 729 continue; 730 } 731 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 732 pf_state_peer_ntoh(&sp->src, &st->src); 733 pf_state_peer_ntoh(&sp->dst, &st->dst); 734 st->expire = ntohl(sp->expire) + time_second; 735 st->timeout = sp->timeout; 736 } 737 if (stale && sc->sc_mbuf != NULL) 738 pfsync_sendout(sc); 739 crit_exit(); 740 break; 741 /* 742 * It's not strictly necessary for us to support the "uncompressed" 743 * delete action, but it's relatively simple and maintains consistency. 744 */ 745 case PFSYNC_ACT_DEL: 746 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 747 count * sizeof(*sp), &offp)) == NULL) { 748 pfsyncstats.pfsyncs_badlen++; 749 return; 750 } 751 752 crit_enter(); 753 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 754 i < count; i++, sp++) { 755 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 756 id_key.creatorid = sp->creatorid; 757 758 st = pf_find_state_byid(&id_key); 759 if (st == NULL) { 760 pfsyncstats.pfsyncs_badstate++; 761 continue; 762 } 763 st->sync_flags |= PFSTATE_FROMSYNC; 764 pf_unlink_state(st); 765 } 766 crit_exit(); 767 break; 768 case PFSYNC_ACT_UPD_C: { 769 int update_requested = 0; 770 771 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 772 count * sizeof(*up), &offp)) == NULL) { 773 pfsyncstats.pfsyncs_badlen++; 774 return; 775 } 776 777 crit_enter(); 778 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 779 i < count; i++, up++) { 780 /* check for invalid values */ 781 if (up->timeout >= PFTM_MAX || 782 up->src.state > PF_TCPS_PROXY_DST || 783 up->dst.state > PF_TCPS_PROXY_DST) { 784 if (pf_status.debug >= PF_DEBUG_MISC) 785 kprintf("pfsync_insert: " 786 "PFSYNC_ACT_UPD_C: " 787 "invalid value\n"); 788 pfsyncstats.pfsyncs_badval++; 789 continue; 790 } 791 792 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 793 id_key.creatorid = up->creatorid; 794 795 st = pf_find_state_byid(&id_key); 796 if (st == NULL) { 797 /* We don't have this state. Ask for it. */ 798 error = pfsync_request_update(up, &src); 799 if (error == ENOMEM) { 800 crit_exit(); 801 goto done; 802 } 803 update_requested = 1; 804 pfsyncstats.pfsyncs_badstate++; 805 continue; 806 } 807 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 808 sfail = 0; 809 if (sk->proto == IPPROTO_TCP) { 810 /* 811 * The state should never go backwards except 812 * for syn-proxy states. Neither should the 813 * sequence window slide backwards. 814 */ 815 if (st->src.state > up->src.state && 816 (st->src.state < PF_TCPS_PROXY_SRC || 817 up->src.state >= PF_TCPS_PROXY_SRC)) 818 sfail = 1; 819 else if (st->dst.state > up->dst.state) 820 sfail = 2; 821 else if (SEQ_GT(st->src.seqlo, 822 ntohl(up->src.seqlo))) 823 sfail = 3; 824 else if (st->dst.state >= TCPS_SYN_SENT && 825 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 826 sfail = 4; 827 } else { 828 /* 829 * Non-TCP protocol state machine always go 830 * forwards 831 */ 832 if (st->src.state > up->src.state) 833 sfail = 5; 834 else if (st->dst.state > up->dst.state) 835 sfail = 6; 836 } 837 if (sfail) { 838 if (pf_status.debug >= PF_DEBUG_MISC) 839 kprintf("pfsync: ignoring stale update " 840 "(%d) id: %016" PRIx64 " " 841 "creatorid: %08x\n", sfail, 842 be64toh(st->id), 843 ntohl(st->creatorid)); 844 pfsyncstats.pfsyncs_stale++; 845 846 /* we have a better state, send it out */ 847 if ((!stale || update_requested) && 848 sc->sc_mbuf != NULL) { 849 pfsync_sendout(sc); 850 update_requested = 0; 851 } 852 stale++; 853 if (!st->sync_flags) 854 pfsync_pack_state(PFSYNC_ACT_UPD, st, 855 PFSYNC_FLAG_STALE); 856 continue; 857 } 858 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 859 pf_state_peer_ntoh(&up->src, &st->src); 860 pf_state_peer_ntoh(&up->dst, &st->dst); 861 st->expire = ntohl(up->expire) + time_second; 862 st->timeout = up->timeout; 863 } 864 if ((update_requested || stale) && sc->sc_mbuf) 865 pfsync_sendout(sc); 866 crit_exit(); 867 break; 868 } 869 case PFSYNC_ACT_DEL_C: 870 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 871 count * sizeof(*dp), &offp)) == NULL) { 872 pfsyncstats.pfsyncs_badlen++; 873 return; 874 } 875 876 crit_enter(); 877 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 878 i < count; i++, dp++) { 879 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 880 id_key.creatorid = dp->creatorid; 881 882 st = pf_find_state_byid(&id_key); 883 if (st == NULL) { 884 pfsyncstats.pfsyncs_badstate++; 885 continue; 886 } 887 st->sync_flags |= PFSTATE_FROMSYNC; 888 pf_unlink_state(st); 889 } 890 crit_exit(); 891 break; 892 case PFSYNC_ACT_INS_F: 893 case PFSYNC_ACT_DEL_F: 894 /* not implemented */ 895 break; 896 case PFSYNC_ACT_UREQ: 897 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 898 count * sizeof(*rup), &offp)) == NULL) { 899 pfsyncstats.pfsyncs_badlen++; 900 return; 901 } 902 903 crit_enter(); 904 if (sc->sc_mbuf != NULL) 905 pfsync_sendout(sc); 906 for (i = 0, 907 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 908 i < count; i++, rup++) { 909 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 910 id_key.creatorid = rup->creatorid; 911 912 if (id_key.id == 0 && id_key.creatorid == 0) { 913 sc->sc_ureq_received = mycpu->gd_time_seconds; 914 if (sc->sc_bulk_send_next == NULL) { 915 if (++sc->sc_bulk_send_cpu >= ncpus) 916 sc->sc_bulk_send_cpu = 0; 917 sc->sc_bulk_send_next = 918 TAILQ_FIRST(&state_list[sc->sc_bulk_send_cpu]); 919 } 920 sc->sc_bulk_terminator = 921 sc->sc_bulk_send_next; 922 sc->sc_bulk_terminator_cpu = 923 sc->sc_bulk_send_cpu; 924 if (pf_status.debug >= PF_DEBUG_MISC) 925 kprintf("pfsync: received " 926 "bulk update request\n"); 927 pfsync_send_bus(sc, PFSYNC_BUS_START); 928 lwkt_reltoken(&pf_token); 929 callout_init(&sc->sc_bulk_tmo); 930 lwkt_gettoken(&pf_token); 931 } else { 932 st = pf_find_state_byid(&id_key); 933 if (st == NULL) { 934 pfsyncstats.pfsyncs_badstate++; 935 continue; 936 } 937 if (!st->sync_flags) 938 pfsync_pack_state(PFSYNC_ACT_UPD, 939 st, 0); 940 } 941 } 942 if (sc->sc_mbuf != NULL) 943 pfsync_sendout(sc); 944 crit_exit(); 945 break; 946 case PFSYNC_ACT_BUS: 947 /* If we're not waiting for a bulk update, who cares. */ 948 if (sc->sc_ureq_sent == 0) 949 break; 950 951 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 952 sizeof(*bus), &offp)) == NULL) { 953 pfsyncstats.pfsyncs_badlen++; 954 return; 955 } 956 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 957 switch (bus->status) { 958 case PFSYNC_BUS_START: 959 lwkt_reltoken(&pf_token); 960 callout_reset(&sc->sc_bulkfail_tmo, 961 pf_pool_limits[PF_LIMIT_STATES].limit / 962 (PFSYNC_BULKPACKETS * sc->sc_maxcount), 963 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 964 lwkt_gettoken(&pf_token); 965 if (pf_status.debug >= PF_DEBUG_MISC) 966 kprintf("pfsync: received bulk " 967 "update start\n"); 968 break; 969 case PFSYNC_BUS_END: 970 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= 971 sc->sc_ureq_sent) { 972 /* that's it, we're happy */ 973 sc->sc_ureq_sent = 0; 974 sc->sc_bulk_tries = 0; 975 lwkt_reltoken(&pf_token); 976 callout_stop(&sc->sc_bulkfail_tmo); 977 lwkt_gettoken(&pf_token); 978 #if NCARP > 0 979 if (!pfsync_sync_ok) { 980 lwkt_reltoken(&pf_token); 981 carp_group_demote_adj(&sc->sc_if, -1); 982 lwkt_gettoken(&pf_token); 983 } 984 #endif 985 pfsync_sync_ok = 1; 986 if (pf_status.debug >= PF_DEBUG_MISC) 987 kprintf("pfsync: received valid " 988 "bulk update end\n"); 989 } else { 990 if (pf_status.debug >= PF_DEBUG_MISC) 991 kprintf("pfsync: received invalid " 992 "bulk update end: bad timestamp\n"); 993 } 994 break; 995 } 996 break; 997 } 998 999 done: 1000 if (m) 1001 m_freem(m); 1002 } 1003 1004 int 1005 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1006 struct rtentry *rt) 1007 { 1008 m_freem(m); 1009 return (0); 1010 } 1011 1012 /* ARGSUSED */ 1013 int 1014 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 1015 { 1016 struct pfsync_softc *sc = ifp->if_softc; 1017 struct ifreq *ifr = (struct ifreq *)data; 1018 struct ip_moptions *imo = &sc->sc_imo; 1019 struct pfsyncreq pfsyncr; 1020 struct ifnet *sifp; 1021 int error; 1022 1023 lwkt_gettoken(&pf_token); 1024 1025 switch (cmd) { 1026 case SIOCSIFADDR: 1027 case SIOCAIFADDR: 1028 case SIOCSIFDSTADDR: 1029 case SIOCSIFFLAGS: 1030 if (ifp->if_flags & IFF_UP) 1031 ifp->if_flags |= IFF_RUNNING; 1032 else 1033 ifp->if_flags &= ~IFF_RUNNING; 1034 break; 1035 case SIOCSIFMTU: 1036 if (ifr->ifr_mtu < PFSYNC_MINMTU) { 1037 lwkt_reltoken(&pf_token); 1038 return (EINVAL); 1039 } 1040 if (ifr->ifr_mtu > MCLBYTES) 1041 ifr->ifr_mtu = MCLBYTES; 1042 crit_enter(); 1043 if (ifr->ifr_mtu < ifp->if_mtu) 1044 pfsync_sendout(sc); 1045 pfsync_setmtu(sc, ifr->ifr_mtu); 1046 crit_exit(); 1047 break; 1048 case SIOCGETPFSYNC: 1049 bzero(&pfsyncr, sizeof(pfsyncr)); 1050 if (sc->sc_sync_ifp) 1051 strlcpy(pfsyncr.pfsyncr_syncdev, 1052 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1053 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1054 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1055 lwkt_reltoken(&pf_token); 1056 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1057 return (error); 1058 lwkt_gettoken(&pf_token); 1059 break; 1060 case SIOCSETPFSYNC: 1061 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) { 1062 lwkt_reltoken(&pf_token); 1063 return (error); 1064 } 1065 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) { 1066 lwkt_reltoken(&pf_token); 1067 return (error); 1068 } 1069 1070 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1071 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1072 else 1073 sc->sc_sync_peer.s_addr = 1074 pfsyncr.pfsyncr_syncpeer.s_addr; 1075 1076 if (pfsyncr.pfsyncr_maxupdates > 255) { 1077 lwkt_reltoken(&pf_token); 1078 return (EINVAL); 1079 } 1080 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1081 1082 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1083 sc->sc_sync_ifp = NULL; 1084 if (sc->sc_mbuf_net != NULL) { 1085 /* Don't keep stale pfsync packets around. */ 1086 crit_enter(); 1087 m_freem(sc->sc_mbuf_net); 1088 sc->sc_mbuf_net = NULL; 1089 sc->sc_statep_net.s = NULL; 1090 crit_exit(); 1091 } 1092 if (imo->imo_num_memberships > 0) { 1093 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1094 imo->imo_multicast_ifp = NULL; 1095 } 1096 break; 1097 } 1098 1099 /* 1100 * XXX not that MPSAFE; pfsync needs serious rework 1101 */ 1102 ifnet_deserialize_all(ifp); 1103 ifnet_lock(); 1104 sifp = ifunit(pfsyncr.pfsyncr_syncdev); 1105 ifnet_unlock(); 1106 ifnet_serialize_all(ifp); 1107 1108 if (sifp == NULL) { 1109 lwkt_reltoken(&pf_token); 1110 return (EINVAL); 1111 } 1112 1113 crit_enter(); 1114 if (sifp->if_mtu < sc->sc_if.if_mtu || 1115 (sc->sc_sync_ifp != NULL && 1116 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1117 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1118 pfsync_sendout(sc); 1119 sc->sc_sync_ifp = sifp; 1120 1121 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1122 1123 if (imo->imo_num_memberships > 0) { 1124 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1125 imo->imo_multicast_ifp = NULL; 1126 } 1127 1128 if (sc->sc_sync_ifp && 1129 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1130 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1131 sc->sc_sync_ifp = NULL; 1132 lwkt_reltoken(&pf_token); 1133 crit_exit(); 1134 return (EADDRNOTAVAIL); 1135 } 1136 1137 if ((imo->imo_membership[0] = 1138 pfsync_in_addmulti(sc->sc_sync_ifp)) == NULL) { 1139 sc->sc_sync_ifp = NULL; 1140 lwkt_reltoken(&pf_token); 1141 crit_exit(); 1142 return (ENOBUFS); 1143 } 1144 imo->imo_num_memberships++; 1145 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1146 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1147 imo->imo_multicast_loop = 0; 1148 } 1149 1150 if (sc->sc_sync_ifp || 1151 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1152 /* Request a full state table update. */ 1153 sc->sc_ureq_sent = mycpu->gd_time_seconds; 1154 #if NCARP > 0 1155 if (pfsync_sync_ok) 1156 carp_group_demote_adj(&sc->sc_if, 1); 1157 #endif 1158 pfsync_sync_ok = 0; 1159 if (pf_status.debug >= PF_DEBUG_MISC) 1160 kprintf("pfsync: requesting bulk update\n"); 1161 lwkt_reltoken(&pf_token); 1162 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 1163 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 1164 lwkt_gettoken(&pf_token); 1165 error = pfsync_request_update(NULL, NULL); 1166 if (error == ENOMEM) { 1167 lwkt_reltoken(&pf_token); 1168 crit_exit(); 1169 return (ENOMEM); 1170 } 1171 pfsync_sendout(sc); 1172 } 1173 crit_exit(); 1174 1175 break; 1176 1177 default: 1178 lwkt_reltoken(&pf_token); 1179 return (ENOTTY); 1180 } 1181 1182 lwkt_reltoken(&pf_token); 1183 return (0); 1184 } 1185 1186 void 1187 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1188 { 1189 int mtu; 1190 1191 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1192 mtu = sc->sc_sync_ifp->if_mtu; 1193 else 1194 mtu = mtu_req; 1195 1196 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1197 sizeof(struct pfsync_state); 1198 if (sc->sc_maxcount > 254) 1199 sc->sc_maxcount = 254; 1200 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1201 sc->sc_maxcount * sizeof(struct pfsync_state); 1202 } 1203 1204 struct mbuf * 1205 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1206 { 1207 struct pfsync_header *h; 1208 struct mbuf *m; 1209 int len; 1210 1211 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1212 1213 MGETHDR(m, M_WAITOK, MT_DATA); 1214 if (m == NULL) { 1215 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1216 return (NULL); 1217 } 1218 1219 switch (action) { 1220 case PFSYNC_ACT_CLR: 1221 len = sizeof(struct pfsync_header) + 1222 sizeof(struct pfsync_state_clr); 1223 break; 1224 case PFSYNC_ACT_UPD_C: 1225 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1226 sizeof(struct pfsync_header); 1227 break; 1228 case PFSYNC_ACT_DEL_C: 1229 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1230 sizeof(struct pfsync_header); 1231 break; 1232 case PFSYNC_ACT_UREQ: 1233 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1234 sizeof(struct pfsync_header); 1235 break; 1236 case PFSYNC_ACT_BUS: 1237 len = sizeof(struct pfsync_header) + 1238 sizeof(struct pfsync_state_bus); 1239 break; 1240 case PFSYNC_ACT_TDB_UPD: 1241 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1242 sizeof(struct pfsync_header); 1243 break; 1244 default: 1245 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1246 sizeof(struct pfsync_header); 1247 break; 1248 } 1249 1250 if (len > MHLEN) { 1251 MCLGET(m, M_WAITOK); 1252 if ((m->m_flags & M_EXT) == 0) { 1253 m_free(m); 1254 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1255 return (NULL); 1256 } 1257 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); 1258 } else 1259 MH_ALIGN(m, len); 1260 1261 m->m_pkthdr.rcvif = NULL; 1262 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1263 h = mtod(m, struct pfsync_header *); 1264 h->version = PFSYNC_VERSION; 1265 h->af = 0; 1266 h->count = 0; 1267 h->action = action; 1268 1269 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1270 lwkt_reltoken(&pf_token); 1271 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, 1272 LIST_FIRST(&pfsync_list)); 1273 lwkt_gettoken(&pf_token); 1274 return (m); 1275 } 1276 1277 int 1278 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1279 { 1280 struct ifnet *ifp = NULL; 1281 struct pfsync_softc *sc = pfsyncif; 1282 struct pfsync_header *h, *h_net; 1283 struct pfsync_state *sp = NULL; 1284 struct pfsync_state_upd *up = NULL; 1285 struct pfsync_state_del *dp = NULL; 1286 int ret = 0; 1287 u_int8_t i = 255, newaction = 0; 1288 1289 if (sc == NULL) 1290 return (0); 1291 ifp = &sc->sc_if; 1292 1293 /* 1294 * If a packet falls in the forest and there's nobody around to 1295 * hear, does it make a sound? 1296 */ 1297 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1298 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1299 /* Don't leave any stale pfsync packets hanging around. */ 1300 if (sc->sc_mbuf != NULL) { 1301 m_freem(sc->sc_mbuf); 1302 sc->sc_mbuf = NULL; 1303 sc->sc_statep.s = NULL; 1304 } 1305 return (0); 1306 } 1307 1308 if (action >= PFSYNC_ACT_MAX) 1309 return (EINVAL); 1310 1311 crit_enter(); 1312 if (sc->sc_mbuf == NULL) { 1313 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1314 (void *)&sc->sc_statep.s)) == NULL) { 1315 crit_exit(); 1316 return (ENOMEM); 1317 } 1318 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1319 } else { 1320 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1321 if (h->action != action) { 1322 pfsync_sendout(sc); 1323 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1324 (void *)&sc->sc_statep.s)) == NULL) { 1325 crit_exit(); 1326 return (ENOMEM); 1327 } 1328 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1329 } else { 1330 /* 1331 * If it's an update, look in the packet to see if 1332 * we already have an update for the state. 1333 */ 1334 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1335 struct pfsync_state *usp = 1336 (void *)((char *)h + PFSYNC_HDRLEN); 1337 1338 for (i = 0; i < h->count; i++) { 1339 if (!memcmp(usp->id, &st->id, 1340 PFSYNC_ID_LEN) && 1341 usp->creatorid == st->creatorid) { 1342 sp = usp; 1343 sp->updates++; 1344 break; 1345 } 1346 usp++; 1347 } 1348 } 1349 } 1350 } 1351 1352 st->pfsync_time = mycpu->gd_time_seconds; 1353 1354 if (sp == NULL) { 1355 /* not a "duplicate" update */ 1356 i = 255; 1357 sp = sc->sc_statep.s++; 1358 sc->sc_mbuf->m_pkthdr.len = 1359 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1360 h->count++; 1361 bzero(sp, sizeof(*sp)); 1362 1363 pfsync_state_export(sp, st); 1364 1365 if (flags & PFSYNC_FLAG_STALE) 1366 sp->sync_flags |= PFSTATE_STALE; 1367 } else { 1368 pf_state_peer_hton(&st->src, &sp->src); 1369 pf_state_peer_hton(&st->dst, &sp->dst); 1370 1371 if (st->expire <= time_second) 1372 sp->expire = htonl(0); 1373 else 1374 sp->expire = htonl(st->expire - time_second); 1375 } 1376 1377 /* do we need to build "compressed" actions for network transfer? */ 1378 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1379 switch (action) { 1380 case PFSYNC_ACT_UPD: 1381 newaction = PFSYNC_ACT_UPD_C; 1382 break; 1383 case PFSYNC_ACT_DEL: 1384 newaction = PFSYNC_ACT_DEL_C; 1385 break; 1386 default: 1387 /* by default we just send the uncompressed states */ 1388 break; 1389 } 1390 } 1391 1392 if (newaction) { 1393 if (sc->sc_mbuf_net == NULL) { 1394 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1395 (void *)&sc->sc_statep_net.s)) == NULL) { 1396 crit_exit(); 1397 return (ENOMEM); 1398 } 1399 } 1400 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1401 1402 switch (newaction) { 1403 case PFSYNC_ACT_UPD_C: 1404 if (i != 255) { 1405 up = (void *)((char *)h_net + 1406 PFSYNC_HDRLEN + (i * sizeof(*up))); 1407 up->updates++; 1408 } else { 1409 h_net->count++; 1410 sc->sc_mbuf_net->m_pkthdr.len = 1411 sc->sc_mbuf_net->m_len += sizeof(*up); 1412 up = sc->sc_statep_net.u++; 1413 1414 bzero(up, sizeof(*up)); 1415 bcopy(&st->id, up->id, sizeof(up->id)); 1416 up->creatorid = st->creatorid; 1417 } 1418 up->timeout = st->timeout; 1419 up->expire = sp->expire; 1420 up->src = sp->src; 1421 up->dst = sp->dst; 1422 break; 1423 case PFSYNC_ACT_DEL_C: 1424 sc->sc_mbuf_net->m_pkthdr.len = 1425 sc->sc_mbuf_net->m_len += sizeof(*dp); 1426 dp = sc->sc_statep_net.d++; 1427 h_net->count++; 1428 1429 bzero(dp, sizeof(*dp)); 1430 bcopy(&st->id, dp->id, sizeof(dp->id)); 1431 dp->creatorid = st->creatorid; 1432 break; 1433 } 1434 } 1435 1436 if (h->count == sc->sc_maxcount || 1437 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1438 ret = pfsync_sendout(sc); 1439 1440 crit_exit(); 1441 return (ret); 1442 } 1443 1444 int 1445 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1446 { 1447 struct pfsync_header *h; 1448 struct pfsync_softc *sc = pfsyncif; 1449 struct pfsync_state_upd_req *rup; 1450 int ret = 0; 1451 1452 if (sc == NULL) 1453 return (0); 1454 1455 if (sc->sc_mbuf == NULL) { 1456 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1457 (void *)&sc->sc_statep.s)) == NULL) 1458 return (ENOMEM); 1459 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1460 } else { 1461 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1462 if (h->action != PFSYNC_ACT_UREQ) { 1463 pfsync_sendout(sc); 1464 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1465 (void *)&sc->sc_statep.s)) == NULL) 1466 return (ENOMEM); 1467 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1468 } 1469 } 1470 1471 if (src != NULL) 1472 sc->sc_sendaddr = *src; 1473 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1474 h->count++; 1475 rup = sc->sc_statep.r++; 1476 bzero(rup, sizeof(*rup)); 1477 if (up != NULL) { 1478 bcopy(up->id, rup->id, sizeof(rup->id)); 1479 rup->creatorid = up->creatorid; 1480 } 1481 1482 if (h->count == sc->sc_maxcount) 1483 ret = pfsync_sendout(sc); 1484 1485 return (ret); 1486 } 1487 1488 int 1489 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1490 { 1491 struct pfsync_softc *sc = pfsyncif; 1492 struct pfsync_state_clr *cp; 1493 int ret; 1494 1495 if (sc == NULL) 1496 return (0); 1497 1498 crit_enter(); 1499 if (sc->sc_mbuf != NULL) 1500 pfsync_sendout(sc); 1501 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1502 (void *)&sc->sc_statep.c)) == NULL) { 1503 crit_exit(); 1504 return (ENOMEM); 1505 } 1506 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1507 cp = sc->sc_statep.c; 1508 cp->creatorid = creatorid; 1509 if (ifname != NULL) 1510 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1511 1512 ret = (pfsync_sendout(sc)); 1513 crit_exit(); 1514 return (ret); 1515 } 1516 1517 void 1518 pfsync_timeout(void *v) 1519 { 1520 struct pfsync_softc *sc = v; 1521 1522 crit_enter(); 1523 pfsync_sendout(sc); 1524 crit_exit(); 1525 } 1526 1527 void 1528 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1529 { 1530 struct pfsync_state_bus *bus; 1531 1532 if (sc->sc_mbuf != NULL) 1533 pfsync_sendout(sc); 1534 1535 if (pfsync_sync_ok && 1536 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1537 (void *)&sc->sc_statep.b)) != NULL) { 1538 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1539 bus = sc->sc_statep.b; 1540 bus->creatorid = pf_status.hostid; 1541 bus->status = status; 1542 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); 1543 pfsync_sendout(sc); 1544 } 1545 } 1546 1547 void 1548 pfsync_bulk_update(void *v) 1549 { 1550 struct pfsync_softc *sc = v; 1551 int i = 0; 1552 int cpu; 1553 struct pf_state *state; 1554 1555 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1556 1557 crit_enter(); 1558 if (sc->sc_mbuf != NULL) 1559 pfsync_sendout(sc); 1560 1561 /* 1562 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1563 * been sent since the latest request was made. 1564 */ 1565 state = sc->sc_bulk_send_next; 1566 cpu = sc->sc_bulk_send_cpu; 1567 if (state) 1568 do { 1569 /* send state update if syncable and not already sent */ 1570 if (!state->sync_flags 1571 && state->timeout < PFTM_MAX 1572 && state->pfsync_time <= sc->sc_ureq_received) { 1573 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1574 i++; 1575 } 1576 1577 /* figure next state to send */ 1578 state = TAILQ_NEXT(state, entry_list); 1579 1580 /* wrap to start of list if we hit the end */ 1581 if (state == NULL) { 1582 if (++cpu >= ncpus) 1583 cpu = 0; 1584 state = TAILQ_FIRST(&state_list[cpu]); 1585 } 1586 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1587 cpu != sc->sc_bulk_terminator_cpu && 1588 state != sc->sc_bulk_terminator); 1589 1590 if (state == NULL || (cpu == sc->sc_bulk_terminator_cpu && 1591 state == sc->sc_bulk_terminator)) { 1592 /* we're done */ 1593 pfsync_send_bus(sc, PFSYNC_BUS_END); 1594 sc->sc_ureq_received = 0; 1595 sc->sc_bulk_send_next = NULL; 1596 sc->sc_bulk_terminator = NULL; 1597 sc->sc_bulk_send_cpu = 0; 1598 sc->sc_bulk_terminator_cpu = 0; 1599 lwkt_reltoken(&pf_token); 1600 callout_stop(&sc->sc_bulk_tmo); 1601 lwkt_gettoken(&pf_token); 1602 if (pf_status.debug >= PF_DEBUG_MISC) 1603 kprintf("pfsync: bulk update complete\n"); 1604 } else { 1605 /* look again for more in a bit */ 1606 lwkt_reltoken(&pf_token); 1607 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, 1608 LIST_FIRST(&pfsync_list)); 1609 lwkt_gettoken(&pf_token); 1610 sc->sc_bulk_send_next = state; 1611 sc->sc_bulk_send_cpu = cpu; 1612 } 1613 if (sc->sc_mbuf != NULL) 1614 pfsync_sendout(sc); 1615 crit_exit(); 1616 } 1617 1618 void 1619 pfsync_bulkfail(void *v) 1620 { 1621 struct pfsync_softc *sc = v; 1622 int error; 1623 1624 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1625 1626 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1627 /* Try again in a bit */ 1628 lwkt_reltoken(&pf_token); 1629 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, 1630 LIST_FIRST(&pfsync_list)); 1631 lwkt_gettoken(&pf_token); 1632 crit_enter(); 1633 error = pfsync_request_update(NULL, NULL); 1634 if (error == ENOMEM) { 1635 if (pf_status.debug >= PF_DEBUG_MISC) 1636 kprintf("pfsync: cannot allocate mbufs for " 1637 "bulk update\n"); 1638 } else 1639 pfsync_sendout(sc); 1640 crit_exit(); 1641 } else { 1642 /* Pretend like the transfer was ok */ 1643 sc->sc_ureq_sent = 0; 1644 sc->sc_bulk_tries = 0; 1645 #if NCARP > 0 1646 if (!pfsync_sync_ok) 1647 carp_group_demote_adj(&sc->sc_if, -1); 1648 #endif 1649 pfsync_sync_ok = 1; 1650 if (pf_status.debug >= PF_DEBUG_MISC) 1651 kprintf("pfsync: failed to receive " 1652 "bulk update status\n"); 1653 lwkt_reltoken(&pf_token); 1654 callout_stop(&sc->sc_bulkfail_tmo); 1655 lwkt_gettoken(&pf_token); 1656 } 1657 } 1658 1659 static void 1660 pfsync_sendout_handler(netmsg_t nmsg) 1661 { 1662 struct netmsg_genpkt *msg = (struct netmsg_genpkt *)nmsg; 1663 1664 pfsync_sendout_mbuf(msg->arg1, msg->m); 1665 } 1666 1667 int 1668 pfsync_sendout(struct pfsync_softc *sc) 1669 { 1670 #if NBPFILTER > 0 1671 struct ifnet *ifp = &sc->sc_if; 1672 #endif 1673 struct mbuf *m; 1674 struct netmsg_genpkt *msg; 1675 1676 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1677 1678 lwkt_reltoken(&pf_token); 1679 callout_stop(&sc->sc_tmo); 1680 lwkt_gettoken(&pf_token); 1681 1682 if (sc->sc_mbuf == NULL) 1683 return (0); 1684 m = sc->sc_mbuf; 1685 sc->sc_mbuf = NULL; 1686 sc->sc_statep.s = NULL; 1687 1688 #if NBPFILTER > 0 1689 if (ifp->if_bpf) { 1690 bpf_gettoken(); 1691 if (ifp->if_bpf) 1692 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1693 bpf_reltoken(); 1694 } 1695 #endif 1696 1697 if (sc->sc_mbuf_net) { 1698 m_freem(m); 1699 m = sc->sc_mbuf_net; 1700 sc->sc_mbuf_net = NULL; 1701 sc->sc_statep_net.s = NULL; 1702 } 1703 1704 msg = &m->m_hdr.mh_genmsg; 1705 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 1706 pfsync_sendout_handler); 1707 msg->m = m; 1708 msg->arg1 = sc; 1709 netisr_sendmsg(&msg->base, 0); 1710 1711 return (0); 1712 } 1713 1714 int 1715 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1716 { 1717 struct sockaddr sa; 1718 struct ip *ip; 1719 1720 if (sc->sc_sync_ifp || 1721 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1722 M_PREPEND(m, sizeof(struct ip), M_WAITOK); 1723 if (m == NULL) { 1724 pfsyncstats.pfsyncs_onomem++; 1725 return (0); 1726 } 1727 ip = mtod(m, struct ip *); 1728 ip->ip_v = IPVERSION; 1729 ip->ip_hl = sizeof(*ip) >> 2; 1730 ip->ip_tos = IPTOS_LOWDELAY; 1731 ip->ip_len = htons(m->m_pkthdr.len); 1732 ip->ip_id = htons(ip_randomid()); 1733 ip->ip_off = htons(IP_DF); 1734 ip->ip_ttl = PFSYNC_DFLTTL; 1735 ip->ip_p = IPPROTO_PFSYNC; 1736 ip->ip_sum = 0; 1737 1738 bzero(&sa, sizeof(sa)); 1739 ip->ip_src.s_addr = INADDR_ANY; 1740 1741 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1742 m->m_flags |= M_MCAST; 1743 ip->ip_dst = sc->sc_sendaddr; 1744 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1745 1746 pfsyncstats.pfsyncs_opackets++; 1747 1748 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1749 pfsyncstats.pfsyncs_oerrors++; 1750 } else 1751 m_freem(m); 1752 1753 return (0); 1754 } 1755 1756 static int 1757 pfsync_modevent(module_t mod, int type, void *data) 1758 { 1759 int error = 0; 1760 1761 struct pfsync_softc *pfs_if, *tmp; 1762 1763 lwkt_gettoken(&pf_token); 1764 1765 switch (type) { 1766 case MOD_LOAD: 1767 LIST_INIT(&pfsync_list); 1768 lwkt_reltoken(&pf_token); 1769 if_clone_attach(&pfsync_cloner); 1770 lwkt_gettoken(&pf_token); 1771 /* Override the function pointer for pf_ioctl.c */ 1772 break; 1773 1774 case MOD_UNLOAD: 1775 lwkt_reltoken(&pf_token); 1776 if_clone_detach(&pfsync_cloner); 1777 lwkt_gettoken(&pf_token); 1778 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) { 1779 pfsync_clone_destroy(&pfs_if->sc_if); 1780 } 1781 break; 1782 1783 default: 1784 error = EINVAL; 1785 break; 1786 } 1787 1788 lwkt_reltoken(&pf_token); 1789 return error; 1790 } 1791 1792 static moduledata_t pfsync_mod = { 1793 "pfsync", 1794 pfsync_modevent, 1795 0 1796 }; 1797 1798 #define PFSYNC_MODVER 44 1799 1800 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1801 MODULE_VERSION(pfsync, PFSYNC_MODVER); 1802 1803 static void 1804 pfsync_in_addmulti_dispatch(netmsg_t nmsg) 1805 { 1806 struct lwkt_msg *lmsg = &nmsg->lmsg; 1807 struct ifnet *ifp = lmsg->u.ms_resultp; 1808 struct in_addr addr; 1809 1810 addr.s_addr = INADDR_PFSYNC_GROUP; 1811 lmsg->u.ms_resultp = in_addmulti(&addr, ifp); 1812 1813 lwkt_replymsg(lmsg, 0); 1814 } 1815 1816 static struct in_multi * 1817 pfsync_in_addmulti(struct ifnet *ifp) 1818 { 1819 struct netmsg_base nmsg; 1820 struct lwkt_msg *lmsg = &nmsg.lmsg; 1821 1822 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1823 pfsync_in_addmulti_dispatch); 1824 lmsg->u.ms_resultp = ifp; 1825 1826 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1827 return lmsg->u.ms_resultp; 1828 } 1829 1830 static void 1831 pfsync_in_delmulti_dispatch(netmsg_t nmsg) 1832 { 1833 struct lwkt_msg *lmsg = &nmsg->lmsg; 1834 1835 in_delmulti(lmsg->u.ms_resultp); 1836 lwkt_replymsg(lmsg, 0); 1837 } 1838 1839 static void 1840 pfsync_in_delmulti(struct in_multi *inm) 1841 { 1842 struct netmsg_base nmsg; 1843 struct lwkt_msg *lmsg = &nmsg.lmsg; 1844 1845 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1846 pfsync_in_delmulti_dispatch); 1847 lmsg->u.ms_resultp = inm; 1848 1849 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1850 } 1851