1 /* 2 * Copyright (c) 2002 Michael Shalayeff 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ 27 */ 28 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_carp.h" 32 33 #include <sys/param.h> 34 #include <sys/endian.h> 35 #include <sys/proc.h> 36 #include <sys/priv.h> 37 #include <sys/systm.h> 38 #include <sys/time.h> 39 #include <sys/mbuf.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/msgport2.h> 45 #include <sys/sockio.h> 46 #include <sys/thread2.h> 47 48 #include <machine/inttypes.h> 49 50 #include <net/if.h> 51 #include <net/if_types.h> 52 #include <net/ifq_var.h> 53 #include <net/route.h> 54 #include <net/bpf.h> 55 #include <net/netisr2.h> 56 #include <net/netmsg2.h> 57 #include <netinet/in.h> 58 #include <netinet/if_ether.h> 59 #include <netinet/ip_carp.h> 60 #include <netinet/tcp.h> 61 #include <netinet/tcp_seq.h> 62 63 #ifdef INET 64 #include <netinet/in_systm.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #endif 69 70 #ifdef INET6 71 #include <netinet6/nd6.h> 72 #endif /* INET6 */ 73 74 #include <net/pf/pfvar.h> 75 #include <net/pf/if_pfsync.h> 76 77 #define PFSYNCNAME "pfsync" 78 79 #define PFSYNC_MINMTU \ 80 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 81 82 #ifdef PFSYNCDEBUG 83 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) 84 int pfsyncdebug; 85 #else 86 #define DPRINTF(x) 87 #endif 88 89 struct pfsync_softc *pfsyncif = NULL; 90 struct pfsyncstats pfsyncstats; 91 92 void pfsyncattach(int); 93 static int pfsync_clone_destroy(struct ifnet *); 94 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 95 void pfsync_setmtu(struct pfsync_softc *, int); 96 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 97 struct pf_state_peer *); 98 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 99 struct rtentry *); 100 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 101 void pfsyncstart(struct ifnet *, struct ifaltq_subque *); 102 103 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 104 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 105 int pfsync_sendout(struct pfsync_softc *); 106 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 107 void pfsync_timeout(void *); 108 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 109 void pfsync_bulk_update(void *); 110 void pfsync_bulkfail(void *); 111 112 static struct in_multi *pfsync_in_addmulti(struct ifnet *); 113 static void pfsync_in_delmulti(struct in_multi *); 114 115 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); 116 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; 117 118 int pfsync_sync_ok; 119 120 struct if_clone pfsync_cloner = 121 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1); 122 123 void 124 pfsyncattach(int npfsync) 125 { 126 if_clone_attach(&pfsync_cloner); 127 } 128 static int 129 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 130 { 131 struct pfsync_softc *sc; 132 struct ifnet *ifp; 133 134 lwkt_gettoken(&pf_token); 135 136 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO); 137 pfsync_sync_ok = 1; 138 sc->sc_mbuf = NULL; 139 sc->sc_mbuf_net = NULL; 140 sc->sc_mbuf_tdb = NULL; 141 sc->sc_statep.s = NULL; 142 sc->sc_statep_net.s = NULL; 143 sc->sc_statep_tdb.t = NULL; 144 sc->sc_maxupdates = 128; 145 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP); 146 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); 147 sc->sc_ureq_received = 0; 148 sc->sc_ureq_sent = 0; 149 sc->sc_bulk_send_next = NULL; 150 sc->sc_bulk_terminator = NULL; 151 sc->sc_bulk_send_cpu = 0; 152 sc->sc_bulk_terminator_cpu = 0; 153 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; 154 lwkt_reltoken(&pf_token); 155 ifp = &sc->sc_if; 156 ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 157 if_initname(ifp, ifc->ifc_name, unit); 158 ifp->if_ioctl = pfsyncioctl; 159 ifp->if_output = pfsyncoutput; 160 ifp->if_start = pfsyncstart; 161 ifp->if_type = IFT_PFSYNC; 162 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 163 ifp->if_hdrlen = PFSYNC_HDRLEN; 164 ifp->if_baudrate = IF_Mbps(100); 165 ifp->if_softc = sc; 166 pfsync_setmtu(sc, MCLBYTES); 167 callout_init(&sc->sc_tmo); 168 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 169 callout_init(&sc->sc_bulk_tmo); 170 callout_init(&sc->sc_bulkfail_tmo); 171 if_attach(ifp, NULL); 172 173 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); 174 175 176 #if NCARP > 0 177 if_addgroup(ifp, "carp"); 178 #endif 179 180 #if NBPFILTER > 0 181 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 182 #endif 183 lwkt_gettoken(&pf_token); 184 185 lwkt_reltoken(&pf_token); 186 return (0); 187 } 188 189 static int 190 pfsync_clone_destroy(struct ifnet *ifp) 191 { 192 lwkt_gettoken(&pf_token); 193 lwkt_reltoken(&pf_token); 194 195 struct pfsync_softc *sc = ifp->if_softc; 196 callout_stop(&sc->sc_tmo); 197 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 198 callout_stop(&sc->sc_bulk_tmo); 199 callout_stop(&sc->sc_bulkfail_tmo); 200 #if NCARP > 0 201 if (!pfsync_sync_ok) 202 carp_group_demote_adj(&sc->sc_if, -1); 203 #endif 204 #if NBPFILTER > 0 205 bpfdetach(ifp); 206 #endif 207 if_detach(ifp); 208 lwkt_gettoken(&pf_token); 209 LIST_REMOVE(sc, sc_next); 210 kfree(sc, M_PFSYNC); 211 lwkt_reltoken(&pf_token); 212 213 214 return 0; 215 } 216 217 /* 218 * Start output on the pfsync interface. 219 */ 220 void 221 pfsyncstart(struct ifnet *ifp, struct ifaltq_subque *ifsq) 222 { 223 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 224 ifsq_purge(ifsq); 225 } 226 227 int 228 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 229 struct pf_state_peer *d) 230 { 231 if (s->scrub.scrub_flag && d->scrub == NULL) { 232 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO); 233 234 if (d->scrub == NULL) 235 return (ENOMEM); 236 } 237 238 return (0); 239 } 240 241 void 242 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 243 { 244 bzero(sp, sizeof(struct pfsync_state)); 245 246 /* copy from state key */ 247 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 248 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 249 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 250 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 251 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 252 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 253 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 254 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 255 sp->proto = st->key[PF_SK_WIRE]->proto; 256 sp->af = st->key[PF_SK_WIRE]->af; 257 258 /* copy from state */ 259 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 260 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 261 sp->creation = htonl(time_second - st->creation); 262 sp->expire = pf_state_expires(st); 263 if (sp->expire <= time_second) 264 sp->expire = htonl(0); 265 else 266 sp->expire = htonl(sp->expire - time_second); 267 268 sp->direction = st->direction; 269 sp->log = st->log; 270 sp->timeout = st->timeout; 271 sp->state_flags = st->state_flags; 272 if (st->src_node) 273 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 274 if (st->nat_src_node) 275 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 276 277 bcopy(&st->id, &sp->id, sizeof(sp->id)); 278 sp->creatorid = st->creatorid; 279 pf_state_peer_hton(&st->src, &sp->src); 280 pf_state_peer_hton(&st->dst, &sp->dst); 281 282 if (st->rule.ptr == NULL) 283 sp->rule = htonl(-1); 284 else 285 sp->rule = htonl(st->rule.ptr->nr); 286 if (st->anchor.ptr == NULL) 287 sp->anchor = htonl(-1); 288 else 289 sp->anchor = htonl(st->anchor.ptr->nr); 290 if (st->nat_rule.ptr == NULL) 291 sp->nat_rule = htonl(-1); 292 else 293 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 294 295 pf_state_counter_hton(st->packets[0], sp->packets[0]); 296 pf_state_counter_hton(st->packets[1], sp->packets[1]); 297 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 298 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 299 300 } 301 302 int 303 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 304 { 305 struct pf_state *st = NULL; 306 struct pf_state_key *skw = NULL, *sks = NULL; 307 struct pf_rule *r = NULL; 308 struct pfi_kif *kif; 309 int pool_flags; 310 int error; 311 312 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 313 kprintf("pfsync_insert_net_state: invalid creator id:" 314 " %08x\n", ntohl(sp->creatorid)); 315 return (EINVAL); 316 } 317 318 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 319 if (pf_status.debug >= PF_DEBUG_MISC) 320 kprintf("pfsync_insert_net_state: " 321 "unknown interface: %s\n", sp->ifname); 322 if (flags & PFSYNC_SI_IOCTL) 323 return (EINVAL); 324 return (0); /* skip this state */ 325 } 326 327 /* 328 * If the ruleset checksums match or the state is coming from the ioctl, 329 * it's safe to associate the state with the rule of that number. 330 */ 331 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 332 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 333 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 334 r = pf_main_ruleset.rules[ 335 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 336 else 337 r = &pf_default_rule; 338 339 if ((r->max_states && r->states_cur >= r->max_states)) 340 goto cleanup; 341 342 if (flags & PFSYNC_SI_IOCTL) 343 pool_flags = M_WAITOK | M_NULLOK | M_ZERO; 344 else 345 pool_flags = M_WAITOK | M_ZERO; 346 347 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL) 348 goto cleanup; 349 lockinit(&st->lk, "pfstlk", 0, 0); 350 351 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 352 goto cleanup; 353 354 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 355 &sp->key[PF_SK_STACK].addr[0], sp->af) || 356 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 357 &sp->key[PF_SK_STACK].addr[1], sp->af) || 358 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 359 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 360 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 361 goto cleanup; 362 } else 363 sks = skw; 364 365 /* allocate memory for scrub info */ 366 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 367 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 368 goto cleanup; 369 370 /* copy to state key(s) */ 371 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 372 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 373 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 374 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 375 skw->proto = sp->proto; 376 skw->af = sp->af; 377 if (sks != skw) { 378 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 379 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 380 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 381 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 382 sks->proto = sp->proto; 383 sks->af = sp->af; 384 } 385 386 /* copy to state */ 387 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 388 st->creation = time_second - ntohl(sp->creation); 389 st->expire = time_second; 390 if (sp->expire) { 391 /* XXX No adaptive scaling. */ 392 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 393 } 394 395 st->expire = ntohl(sp->expire) + time_second; 396 st->direction = sp->direction; 397 st->log = sp->log; 398 st->timeout = sp->timeout; 399 st->state_flags = sp->state_flags; 400 if (!(flags & PFSYNC_SI_IOCTL)) 401 st->sync_flags = PFSTATE_FROMSYNC; 402 403 bcopy(sp->id, &st->id, sizeof(st->id)); 404 st->creatorid = sp->creatorid; 405 pf_state_peer_ntoh(&sp->src, &st->src); 406 pf_state_peer_ntoh(&sp->dst, &st->dst); 407 408 st->rule.ptr = r; 409 st->nat_rule.ptr = NULL; 410 st->anchor.ptr = NULL; 411 st->rt_kif = NULL; 412 413 st->pfsync_time = 0; 414 415 416 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 417 r->states_cur++; 418 r->states_tot++; 419 420 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 421 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 422 r->states_cur--; 423 goto cleanup_state; 424 } 425 426 return (0); 427 428 cleanup: 429 error = ENOMEM; 430 if (skw == sks) 431 sks = NULL; 432 if (skw != NULL) 433 kfree(skw, M_PFSYNC); 434 if (sks != NULL) 435 kfree(sks, M_PFSYNC); 436 437 cleanup_state: /* pf_state_insert frees the state keys */ 438 if (st) { 439 if (st->dst.scrub) 440 kfree(st->dst.scrub, M_PFSYNC); 441 if (st->src.scrub) 442 kfree(st->src.scrub, M_PFSYNC); 443 kfree(st, M_PFSYNC); 444 } 445 return (error); 446 } 447 448 void 449 pfsync_input(struct mbuf *m, ...) 450 { 451 struct ip *ip = mtod(m, struct ip *); 452 struct pfsync_header *ph; 453 struct pfsync_softc *sc = pfsyncif; 454 struct pf_state *st; 455 struct pf_state_key *sk; 456 struct pf_state_item *si; 457 struct pf_state_cmp id_key; 458 struct pfsync_state *sp; 459 struct pfsync_state_upd *up; 460 struct pfsync_state_del *dp; 461 struct pfsync_state_clr *cp; 462 struct pfsync_state_upd_req *rup; 463 struct pfsync_state_bus *bus; 464 #ifdef IPSEC 465 struct pfsync_tdb *pt; 466 #endif 467 struct in_addr src; 468 struct mbuf *mp; 469 int iplen, action, error, i, count, offp, sfail, stale = 0; 470 u_int8_t flags = 0; 471 472 /* This function is not yet called from anywhere */ 473 /* Still we assume for safety that pf_token must be held */ 474 ASSERT_LWKT_TOKEN_HELD(&pf_token); 475 476 pfsyncstats.pfsyncs_ipackets++; 477 478 /* verify that we have a sync interface configured */ 479 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 480 goto done; 481 482 /* verify that the packet came in on the right interface */ 483 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 484 pfsyncstats.pfsyncs_badif++; 485 goto done; 486 } 487 488 /* verify that the IP TTL is 255. */ 489 if (ip->ip_ttl != PFSYNC_DFLTTL) { 490 pfsyncstats.pfsyncs_badttl++; 491 goto done; 492 } 493 494 iplen = ip->ip_hl << 2; 495 496 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 497 pfsyncstats.pfsyncs_hdrops++; 498 goto done; 499 } 500 501 if (iplen + sizeof(*ph) > m->m_len) { 502 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 503 pfsyncstats.pfsyncs_hdrops++; 504 goto done; 505 } 506 ip = mtod(m, struct ip *); 507 } 508 ph = (struct pfsync_header *)((char *)ip + iplen); 509 510 /* verify the version */ 511 if (ph->version != PFSYNC_VERSION) { 512 pfsyncstats.pfsyncs_badver++; 513 goto done; 514 } 515 516 action = ph->action; 517 count = ph->count; 518 519 /* make sure it's a valid action code */ 520 if (action >= PFSYNC_ACT_MAX) { 521 pfsyncstats.pfsyncs_badact++; 522 goto done; 523 } 524 525 /* Cheaper to grab this now than having to mess with mbufs later */ 526 src = ip->ip_src; 527 528 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 529 flags |= PFSYNC_SI_CKSUM; 530 531 switch (action) { 532 case PFSYNC_ACT_CLR: { 533 struct pf_state *nexts; 534 struct pf_state_key *nextsk; 535 struct pfi_kif *kif; 536 globaldata_t save_gd = mycpu; 537 int nn; 538 539 u_int32_t creatorid; 540 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 541 sizeof(*cp), &offp)) == NULL) { 542 pfsyncstats.pfsyncs_badlen++; 543 return; 544 } 545 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 546 creatorid = cp->creatorid; 547 548 crit_enter(); 549 if (cp->ifname[0] == '\0') { 550 lwkt_gettoken(&pf_token); 551 for (nn = 0; nn < ncpus; ++nn) { 552 lwkt_setcpu_self(globaldata_find(nn)); 553 for (st = RB_MIN(pf_state_tree_id, 554 &tree_id[nn]); 555 st; st = nexts) { 556 nexts = RB_NEXT(pf_state_tree_id, 557 &tree_id[n], st); 558 if (st->creatorid == creatorid) { 559 st->sync_flags |= 560 PFSTATE_FROMSYNC; 561 pf_unlink_state(st); 562 } 563 } 564 } 565 lwkt_setcpu_self(save_gd); 566 lwkt_reltoken(&pf_token); 567 } else { 568 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 569 crit_exit(); 570 return; 571 } 572 /* XXX correct? */ 573 lwkt_gettoken(&pf_token); 574 for (nn = 0; nn < ncpus; ++nn) { 575 lwkt_setcpu_self(globaldata_find(nn)); 576 for (sk = RB_MIN(pf_state_tree, 577 &pf_statetbl[nn]); 578 sk; 579 sk = nextsk) { 580 nextsk = RB_NEXT(pf_state_tree, 581 &pf_statetbl[n], sk); 582 TAILQ_FOREACH(si, &sk->states, entry) { 583 if (si->s->creatorid == 584 creatorid) { 585 si->s->sync_flags |= 586 PFSTATE_FROMSYNC; 587 pf_unlink_state(si->s); 588 } 589 } 590 } 591 } 592 lwkt_setcpu_self(save_gd); 593 lwkt_reltoken(&pf_token); 594 } 595 crit_exit(); 596 597 break; 598 } 599 case PFSYNC_ACT_INS: 600 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 601 count * sizeof(*sp), &offp)) == NULL) { 602 pfsyncstats.pfsyncs_badlen++; 603 return; 604 } 605 606 crit_enter(); 607 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 608 i < count; i++, sp++) { 609 /* check for invalid values */ 610 if (sp->timeout >= PFTM_MAX || 611 sp->src.state > PF_TCPS_PROXY_DST || 612 sp->dst.state > PF_TCPS_PROXY_DST || 613 sp->direction > PF_OUT || 614 (sp->af != AF_INET && sp->af != AF_INET6)) { 615 if (pf_status.debug >= PF_DEBUG_MISC) 616 kprintf("pfsync_insert: PFSYNC_ACT_INS: " 617 "invalid value\n"); 618 pfsyncstats.pfsyncs_badval++; 619 continue; 620 } 621 622 if ((error = pfsync_state_import(sp, flags))) { 623 if (error == ENOMEM) { 624 crit_exit(); 625 goto done; 626 } 627 } 628 } 629 crit_exit(); 630 break; 631 case PFSYNC_ACT_UPD: 632 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 633 count * sizeof(*sp), &offp)) == NULL) { 634 pfsyncstats.pfsyncs_badlen++; 635 return; 636 } 637 638 crit_enter(); 639 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 640 i < count; i++, sp++) { 641 int flags = PFSYNC_FLAG_STALE; 642 643 /* check for invalid values */ 644 if (sp->timeout >= PFTM_MAX || 645 sp->src.state > PF_TCPS_PROXY_DST || 646 sp->dst.state > PF_TCPS_PROXY_DST) { 647 if (pf_status.debug >= PF_DEBUG_MISC) 648 kprintf("pfsync_insert: PFSYNC_ACT_UPD: " 649 "invalid value\n"); 650 pfsyncstats.pfsyncs_badval++; 651 continue; 652 } 653 654 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 655 id_key.creatorid = sp->creatorid; 656 657 st = pf_find_state_byid(&id_key); 658 if (st == NULL) { 659 /* insert the update */ 660 if (pfsync_state_import(sp, flags)) 661 pfsyncstats.pfsyncs_badstate++; 662 continue; 663 } 664 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 665 sfail = 0; 666 if (sk->proto == IPPROTO_TCP) { 667 /* 668 * The state should never go backwards except 669 * for syn-proxy states. Neither should the 670 * sequence window slide backwards. 671 */ 672 if (st->src.state > sp->src.state && 673 (st->src.state < PF_TCPS_PROXY_SRC || 674 sp->src.state >= PF_TCPS_PROXY_SRC)) 675 sfail = 1; 676 else if (SEQ_GT(st->src.seqlo, 677 ntohl(sp->src.seqlo))) 678 sfail = 3; 679 else if (st->dst.state > sp->dst.state) { 680 /* There might still be useful 681 * information about the src state here, 682 * so import that part of the update, 683 * then "fail" so we send the updated 684 * state back to the peer who is missing 685 * our what we know. */ 686 pf_state_peer_ntoh(&sp->src, &st->src); 687 /* XXX do anything with timeouts? */ 688 sfail = 7; 689 flags = 0; 690 } else if (st->dst.state >= TCPS_SYN_SENT && 691 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 692 sfail = 4; 693 } else { 694 /* 695 * Non-TCP protocol state machine always go 696 * forwards 697 */ 698 if (st->src.state > sp->src.state) 699 sfail = 5; 700 else if (st->dst.state > sp->dst.state) 701 sfail = 6; 702 } 703 if (sfail) { 704 if (pf_status.debug >= PF_DEBUG_MISC) 705 kprintf("pfsync: %s stale update " 706 "(%d) id: %016jx " 707 "creatorid: %08x\n", 708 (sfail < 7 ? "ignoring" 709 : "partial"), sfail, 710 (uintmax_t)be64toh(st->id), 711 ntohl(st->creatorid)); 712 pfsyncstats.pfsyncs_stale++; 713 714 if (!(sp->sync_flags & PFSTATE_STALE)) { 715 /* we have a better state, send it */ 716 if (sc->sc_mbuf != NULL && !stale) 717 pfsync_sendout(sc); 718 stale++; 719 if (!st->sync_flags) 720 pfsync_pack_state( 721 PFSYNC_ACT_UPD, st, flags); 722 } 723 continue; 724 } 725 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 726 pf_state_peer_ntoh(&sp->src, &st->src); 727 pf_state_peer_ntoh(&sp->dst, &st->dst); 728 st->expire = ntohl(sp->expire) + time_second; 729 st->timeout = sp->timeout; 730 } 731 if (stale && sc->sc_mbuf != NULL) 732 pfsync_sendout(sc); 733 crit_exit(); 734 break; 735 /* 736 * It's not strictly necessary for us to support the "uncompressed" 737 * delete action, but it's relatively simple and maintains consistency. 738 */ 739 case PFSYNC_ACT_DEL: 740 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 741 count * sizeof(*sp), &offp)) == NULL) { 742 pfsyncstats.pfsyncs_badlen++; 743 return; 744 } 745 746 crit_enter(); 747 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 748 i < count; i++, sp++) { 749 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 750 id_key.creatorid = sp->creatorid; 751 752 st = pf_find_state_byid(&id_key); 753 if (st == NULL) { 754 pfsyncstats.pfsyncs_badstate++; 755 continue; 756 } 757 st->sync_flags |= PFSTATE_FROMSYNC; 758 pf_unlink_state(st); 759 } 760 crit_exit(); 761 break; 762 case PFSYNC_ACT_UPD_C: { 763 int update_requested = 0; 764 765 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 766 count * sizeof(*up), &offp)) == NULL) { 767 pfsyncstats.pfsyncs_badlen++; 768 return; 769 } 770 771 crit_enter(); 772 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 773 i < count; i++, up++) { 774 /* check for invalid values */ 775 if (up->timeout >= PFTM_MAX || 776 up->src.state > PF_TCPS_PROXY_DST || 777 up->dst.state > PF_TCPS_PROXY_DST) { 778 if (pf_status.debug >= PF_DEBUG_MISC) 779 kprintf("pfsync_insert: " 780 "PFSYNC_ACT_UPD_C: " 781 "invalid value\n"); 782 pfsyncstats.pfsyncs_badval++; 783 continue; 784 } 785 786 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 787 id_key.creatorid = up->creatorid; 788 789 st = pf_find_state_byid(&id_key); 790 if (st == NULL) { 791 /* We don't have this state. Ask for it. */ 792 error = pfsync_request_update(up, &src); 793 if (error == ENOMEM) { 794 crit_exit(); 795 goto done; 796 } 797 update_requested = 1; 798 pfsyncstats.pfsyncs_badstate++; 799 continue; 800 } 801 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 802 sfail = 0; 803 if (sk->proto == IPPROTO_TCP) { 804 /* 805 * The state should never go backwards except 806 * for syn-proxy states. Neither should the 807 * sequence window slide backwards. 808 */ 809 if (st->src.state > up->src.state && 810 (st->src.state < PF_TCPS_PROXY_SRC || 811 up->src.state >= PF_TCPS_PROXY_SRC)) 812 sfail = 1; 813 else if (st->dst.state > up->dst.state) 814 sfail = 2; 815 else if (SEQ_GT(st->src.seqlo, 816 ntohl(up->src.seqlo))) 817 sfail = 3; 818 else if (st->dst.state >= TCPS_SYN_SENT && 819 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 820 sfail = 4; 821 } else { 822 /* 823 * Non-TCP protocol state machine always go 824 * forwards 825 */ 826 if (st->src.state > up->src.state) 827 sfail = 5; 828 else if (st->dst.state > up->dst.state) 829 sfail = 6; 830 } 831 if (sfail) { 832 if (pf_status.debug >= PF_DEBUG_MISC) 833 kprintf("pfsync: ignoring stale update " 834 "(%d) id: %016" PRIx64 " " 835 "creatorid: %08x\n", sfail, 836 be64toh(st->id), 837 ntohl(st->creatorid)); 838 pfsyncstats.pfsyncs_stale++; 839 840 /* we have a better state, send it out */ 841 if ((!stale || update_requested) && 842 sc->sc_mbuf != NULL) { 843 pfsync_sendout(sc); 844 update_requested = 0; 845 } 846 stale++; 847 if (!st->sync_flags) 848 pfsync_pack_state(PFSYNC_ACT_UPD, st, 849 PFSYNC_FLAG_STALE); 850 continue; 851 } 852 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 853 pf_state_peer_ntoh(&up->src, &st->src); 854 pf_state_peer_ntoh(&up->dst, &st->dst); 855 st->expire = ntohl(up->expire) + time_second; 856 st->timeout = up->timeout; 857 } 858 if ((update_requested || stale) && sc->sc_mbuf) 859 pfsync_sendout(sc); 860 crit_exit(); 861 break; 862 } 863 case PFSYNC_ACT_DEL_C: 864 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 865 count * sizeof(*dp), &offp)) == NULL) { 866 pfsyncstats.pfsyncs_badlen++; 867 return; 868 } 869 870 crit_enter(); 871 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 872 i < count; i++, dp++) { 873 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 874 id_key.creatorid = dp->creatorid; 875 876 st = pf_find_state_byid(&id_key); 877 if (st == NULL) { 878 pfsyncstats.pfsyncs_badstate++; 879 continue; 880 } 881 st->sync_flags |= PFSTATE_FROMSYNC; 882 pf_unlink_state(st); 883 } 884 crit_exit(); 885 break; 886 case PFSYNC_ACT_INS_F: 887 case PFSYNC_ACT_DEL_F: 888 /* not implemented */ 889 break; 890 case PFSYNC_ACT_UREQ: 891 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 892 count * sizeof(*rup), &offp)) == NULL) { 893 pfsyncstats.pfsyncs_badlen++; 894 return; 895 } 896 897 crit_enter(); 898 if (sc->sc_mbuf != NULL) 899 pfsync_sendout(sc); 900 for (i = 0, 901 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 902 i < count; i++, rup++) { 903 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 904 id_key.creatorid = rup->creatorid; 905 906 if (id_key.id == 0 && id_key.creatorid == 0) { 907 sc->sc_ureq_received = mycpu->gd_time_seconds; 908 if (sc->sc_bulk_send_next == NULL) { 909 if (++sc->sc_bulk_send_cpu >= ncpus) 910 sc->sc_bulk_send_cpu = 0; 911 sc->sc_bulk_send_next = 912 TAILQ_FIRST(&state_list[sc->sc_bulk_send_cpu]); 913 } 914 sc->sc_bulk_terminator = 915 sc->sc_bulk_send_next; 916 sc->sc_bulk_terminator_cpu = 917 sc->sc_bulk_send_cpu; 918 if (pf_status.debug >= PF_DEBUG_MISC) 919 kprintf("pfsync: received " 920 "bulk update request\n"); 921 pfsync_send_bus(sc, PFSYNC_BUS_START); 922 lwkt_reltoken(&pf_token); 923 callout_init(&sc->sc_bulk_tmo); 924 lwkt_gettoken(&pf_token); 925 } else { 926 st = pf_find_state_byid(&id_key); 927 if (st == NULL) { 928 pfsyncstats.pfsyncs_badstate++; 929 continue; 930 } 931 if (!st->sync_flags) 932 pfsync_pack_state(PFSYNC_ACT_UPD, 933 st, 0); 934 } 935 } 936 if (sc->sc_mbuf != NULL) 937 pfsync_sendout(sc); 938 crit_exit(); 939 break; 940 case PFSYNC_ACT_BUS: 941 /* If we're not waiting for a bulk update, who cares. */ 942 if (sc->sc_ureq_sent == 0) 943 break; 944 945 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 946 sizeof(*bus), &offp)) == NULL) { 947 pfsyncstats.pfsyncs_badlen++; 948 return; 949 } 950 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 951 switch (bus->status) { 952 case PFSYNC_BUS_START: 953 lwkt_reltoken(&pf_token); 954 callout_reset(&sc->sc_bulkfail_tmo, 955 pf_pool_limits[PF_LIMIT_STATES].limit / 956 (PFSYNC_BULKPACKETS * sc->sc_maxcount), 957 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 958 lwkt_gettoken(&pf_token); 959 if (pf_status.debug >= PF_DEBUG_MISC) 960 kprintf("pfsync: received bulk " 961 "update start\n"); 962 break; 963 case PFSYNC_BUS_END: 964 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= 965 sc->sc_ureq_sent) { 966 /* that's it, we're happy */ 967 sc->sc_ureq_sent = 0; 968 sc->sc_bulk_tries = 0; 969 lwkt_reltoken(&pf_token); 970 callout_stop(&sc->sc_bulkfail_tmo); 971 lwkt_gettoken(&pf_token); 972 #if NCARP > 0 973 if (!pfsync_sync_ok) { 974 lwkt_reltoken(&pf_token); 975 carp_group_demote_adj(&sc->sc_if, -1); 976 lwkt_gettoken(&pf_token); 977 } 978 #endif 979 pfsync_sync_ok = 1; 980 if (pf_status.debug >= PF_DEBUG_MISC) 981 kprintf("pfsync: received valid " 982 "bulk update end\n"); 983 } else { 984 if (pf_status.debug >= PF_DEBUG_MISC) 985 kprintf("pfsync: received invalid " 986 "bulk update end: bad timestamp\n"); 987 } 988 break; 989 } 990 break; 991 #ifdef IPSEC 992 case PFSYNC_ACT_TDB_UPD: 993 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 994 count * sizeof(*pt), &offp)) == NULL) { 995 pfsyncstats.pfsyncs_badlen++; 996 return; 997 } 998 crit_enter(); 999 for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); 1000 i < count; i++, pt++) 1001 pfsync_update_net_tdb(pt); 1002 crit_exit(); 1003 break; 1004 #endif 1005 } 1006 1007 done: 1008 if (m) 1009 m_freem(m); 1010 } 1011 1012 int 1013 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1014 struct rtentry *rt) 1015 { 1016 m_freem(m); 1017 return (0); 1018 } 1019 1020 /* ARGSUSED */ 1021 int 1022 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 1023 { 1024 struct pfsync_softc *sc = ifp->if_softc; 1025 struct ifreq *ifr = (struct ifreq *)data; 1026 struct ip_moptions *imo = &sc->sc_imo; 1027 struct pfsyncreq pfsyncr; 1028 struct ifnet *sifp; 1029 int error; 1030 1031 lwkt_gettoken(&pf_token); 1032 1033 switch (cmd) { 1034 case SIOCSIFADDR: 1035 case SIOCAIFADDR: 1036 case SIOCSIFDSTADDR: 1037 case SIOCSIFFLAGS: 1038 if (ifp->if_flags & IFF_UP) 1039 ifp->if_flags |= IFF_RUNNING; 1040 else 1041 ifp->if_flags &= ~IFF_RUNNING; 1042 break; 1043 case SIOCSIFMTU: 1044 if (ifr->ifr_mtu < PFSYNC_MINMTU) { 1045 lwkt_reltoken(&pf_token); 1046 return (EINVAL); 1047 } 1048 if (ifr->ifr_mtu > MCLBYTES) 1049 ifr->ifr_mtu = MCLBYTES; 1050 crit_enter(); 1051 if (ifr->ifr_mtu < ifp->if_mtu) 1052 pfsync_sendout(sc); 1053 pfsync_setmtu(sc, ifr->ifr_mtu); 1054 crit_exit(); 1055 break; 1056 case SIOCGETPFSYNC: 1057 bzero(&pfsyncr, sizeof(pfsyncr)); 1058 if (sc->sc_sync_ifp) 1059 strlcpy(pfsyncr.pfsyncr_syncdev, 1060 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1061 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1062 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1063 lwkt_reltoken(&pf_token); 1064 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1065 return (error); 1066 lwkt_gettoken(&pf_token); 1067 break; 1068 case SIOCSETPFSYNC: 1069 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) { 1070 lwkt_reltoken(&pf_token); 1071 return (error); 1072 } 1073 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) { 1074 lwkt_reltoken(&pf_token); 1075 return (error); 1076 } 1077 1078 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1079 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1080 else 1081 sc->sc_sync_peer.s_addr = 1082 pfsyncr.pfsyncr_syncpeer.s_addr; 1083 1084 if (pfsyncr.pfsyncr_maxupdates > 255) { 1085 lwkt_reltoken(&pf_token); 1086 return (EINVAL); 1087 } 1088 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1089 1090 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1091 sc->sc_sync_ifp = NULL; 1092 if (sc->sc_mbuf_net != NULL) { 1093 /* Don't keep stale pfsync packets around. */ 1094 crit_enter(); 1095 m_freem(sc->sc_mbuf_net); 1096 sc->sc_mbuf_net = NULL; 1097 sc->sc_statep_net.s = NULL; 1098 crit_exit(); 1099 } 1100 if (imo->imo_num_memberships > 0) { 1101 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1102 imo->imo_multicast_ifp = NULL; 1103 } 1104 break; 1105 } 1106 1107 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1108 lwkt_reltoken(&pf_token); 1109 return (EINVAL); 1110 } 1111 1112 crit_enter(); 1113 if (sifp->if_mtu < sc->sc_if.if_mtu || 1114 (sc->sc_sync_ifp != NULL && 1115 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1116 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1117 pfsync_sendout(sc); 1118 sc->sc_sync_ifp = sifp; 1119 1120 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1121 1122 if (imo->imo_num_memberships > 0) { 1123 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1124 imo->imo_multicast_ifp = NULL; 1125 } 1126 1127 if (sc->sc_sync_ifp && 1128 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1129 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1130 sc->sc_sync_ifp = NULL; 1131 lwkt_reltoken(&pf_token); 1132 crit_exit(); 1133 return (EADDRNOTAVAIL); 1134 } 1135 1136 if ((imo->imo_membership[0] = 1137 pfsync_in_addmulti(sc->sc_sync_ifp)) == NULL) { 1138 sc->sc_sync_ifp = NULL; 1139 lwkt_reltoken(&pf_token); 1140 crit_exit(); 1141 return (ENOBUFS); 1142 } 1143 imo->imo_num_memberships++; 1144 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1145 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1146 imo->imo_multicast_loop = 0; 1147 } 1148 1149 if (sc->sc_sync_ifp || 1150 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1151 /* Request a full state table update. */ 1152 sc->sc_ureq_sent = mycpu->gd_time_seconds; 1153 #if NCARP > 0 1154 if (pfsync_sync_ok) 1155 carp_group_demote_adj(&sc->sc_if, 1); 1156 #endif 1157 pfsync_sync_ok = 0; 1158 if (pf_status.debug >= PF_DEBUG_MISC) 1159 kprintf("pfsync: requesting bulk update\n"); 1160 lwkt_reltoken(&pf_token); 1161 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 1162 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 1163 lwkt_gettoken(&pf_token); 1164 error = pfsync_request_update(NULL, NULL); 1165 if (error == ENOMEM) { 1166 lwkt_reltoken(&pf_token); 1167 crit_exit(); 1168 return (ENOMEM); 1169 } 1170 pfsync_sendout(sc); 1171 } 1172 crit_exit(); 1173 1174 break; 1175 1176 default: 1177 lwkt_reltoken(&pf_token); 1178 return (ENOTTY); 1179 } 1180 1181 lwkt_reltoken(&pf_token); 1182 return (0); 1183 } 1184 1185 void 1186 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1187 { 1188 int mtu; 1189 1190 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1191 mtu = sc->sc_sync_ifp->if_mtu; 1192 else 1193 mtu = mtu_req; 1194 1195 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1196 sizeof(struct pfsync_state); 1197 if (sc->sc_maxcount > 254) 1198 sc->sc_maxcount = 254; 1199 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1200 sc->sc_maxcount * sizeof(struct pfsync_state); 1201 } 1202 1203 struct mbuf * 1204 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1205 { 1206 struct pfsync_header *h; 1207 struct mbuf *m; 1208 int len; 1209 1210 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1211 1212 MGETHDR(m, M_WAITOK, MT_DATA); 1213 if (m == NULL) { 1214 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1215 return (NULL); 1216 } 1217 1218 switch (action) { 1219 case PFSYNC_ACT_CLR: 1220 len = sizeof(struct pfsync_header) + 1221 sizeof(struct pfsync_state_clr); 1222 break; 1223 case PFSYNC_ACT_UPD_C: 1224 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1225 sizeof(struct pfsync_header); 1226 break; 1227 case PFSYNC_ACT_DEL_C: 1228 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1229 sizeof(struct pfsync_header); 1230 break; 1231 case PFSYNC_ACT_UREQ: 1232 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1233 sizeof(struct pfsync_header); 1234 break; 1235 case PFSYNC_ACT_BUS: 1236 len = sizeof(struct pfsync_header) + 1237 sizeof(struct pfsync_state_bus); 1238 break; 1239 case PFSYNC_ACT_TDB_UPD: 1240 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1241 sizeof(struct pfsync_header); 1242 break; 1243 default: 1244 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1245 sizeof(struct pfsync_header); 1246 break; 1247 } 1248 1249 if (len > MHLEN) { 1250 MCLGET(m, M_WAITOK); 1251 if ((m->m_flags & M_EXT) == 0) { 1252 m_free(m); 1253 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1254 return (NULL); 1255 } 1256 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); 1257 } else 1258 MH_ALIGN(m, len); 1259 1260 m->m_pkthdr.rcvif = NULL; 1261 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1262 h = mtod(m, struct pfsync_header *); 1263 h->version = PFSYNC_VERSION; 1264 h->af = 0; 1265 h->count = 0; 1266 h->action = action; 1267 1268 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1269 lwkt_reltoken(&pf_token); 1270 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, 1271 LIST_FIRST(&pfsync_list)); 1272 lwkt_gettoken(&pf_token); 1273 return (m); 1274 } 1275 1276 int 1277 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1278 { 1279 struct ifnet *ifp = NULL; 1280 struct pfsync_softc *sc = pfsyncif; 1281 struct pfsync_header *h, *h_net; 1282 struct pfsync_state *sp = NULL; 1283 struct pfsync_state_upd *up = NULL; 1284 struct pfsync_state_del *dp = NULL; 1285 int ret = 0; 1286 u_int8_t i = 255, newaction = 0; 1287 1288 if (sc == NULL) 1289 return (0); 1290 ifp = &sc->sc_if; 1291 1292 /* 1293 * If a packet falls in the forest and there's nobody around to 1294 * hear, does it make a sound? 1295 */ 1296 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1297 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1298 /* Don't leave any stale pfsync packets hanging around. */ 1299 if (sc->sc_mbuf != NULL) { 1300 m_freem(sc->sc_mbuf); 1301 sc->sc_mbuf = NULL; 1302 sc->sc_statep.s = NULL; 1303 } 1304 return (0); 1305 } 1306 1307 if (action >= PFSYNC_ACT_MAX) 1308 return (EINVAL); 1309 1310 crit_enter(); 1311 if (sc->sc_mbuf == NULL) { 1312 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1313 (void *)&sc->sc_statep.s)) == NULL) { 1314 crit_exit(); 1315 return (ENOMEM); 1316 } 1317 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1318 } else { 1319 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1320 if (h->action != action) { 1321 pfsync_sendout(sc); 1322 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1323 (void *)&sc->sc_statep.s)) == NULL) { 1324 crit_exit(); 1325 return (ENOMEM); 1326 } 1327 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1328 } else { 1329 /* 1330 * If it's an update, look in the packet to see if 1331 * we already have an update for the state. 1332 */ 1333 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1334 struct pfsync_state *usp = 1335 (void *)((char *)h + PFSYNC_HDRLEN); 1336 1337 for (i = 0; i < h->count; i++) { 1338 if (!memcmp(usp->id, &st->id, 1339 PFSYNC_ID_LEN) && 1340 usp->creatorid == st->creatorid) { 1341 sp = usp; 1342 sp->updates++; 1343 break; 1344 } 1345 usp++; 1346 } 1347 } 1348 } 1349 } 1350 1351 st->pfsync_time = mycpu->gd_time_seconds; 1352 1353 if (sp == NULL) { 1354 /* not a "duplicate" update */ 1355 i = 255; 1356 sp = sc->sc_statep.s++; 1357 sc->sc_mbuf->m_pkthdr.len = 1358 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1359 h->count++; 1360 bzero(sp, sizeof(*sp)); 1361 1362 pfsync_state_export(sp, st); 1363 1364 if (flags & PFSYNC_FLAG_STALE) 1365 sp->sync_flags |= PFSTATE_STALE; 1366 } else { 1367 pf_state_peer_hton(&st->src, &sp->src); 1368 pf_state_peer_hton(&st->dst, &sp->dst); 1369 1370 if (st->expire <= time_second) 1371 sp->expire = htonl(0); 1372 else 1373 sp->expire = htonl(st->expire - time_second); 1374 } 1375 1376 /* do we need to build "compressed" actions for network transfer? */ 1377 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1378 switch (action) { 1379 case PFSYNC_ACT_UPD: 1380 newaction = PFSYNC_ACT_UPD_C; 1381 break; 1382 case PFSYNC_ACT_DEL: 1383 newaction = PFSYNC_ACT_DEL_C; 1384 break; 1385 default: 1386 /* by default we just send the uncompressed states */ 1387 break; 1388 } 1389 } 1390 1391 if (newaction) { 1392 if (sc->sc_mbuf_net == NULL) { 1393 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1394 (void *)&sc->sc_statep_net.s)) == NULL) { 1395 crit_exit(); 1396 return (ENOMEM); 1397 } 1398 } 1399 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1400 1401 switch (newaction) { 1402 case PFSYNC_ACT_UPD_C: 1403 if (i != 255) { 1404 up = (void *)((char *)h_net + 1405 PFSYNC_HDRLEN + (i * sizeof(*up))); 1406 up->updates++; 1407 } else { 1408 h_net->count++; 1409 sc->sc_mbuf_net->m_pkthdr.len = 1410 sc->sc_mbuf_net->m_len += sizeof(*up); 1411 up = sc->sc_statep_net.u++; 1412 1413 bzero(up, sizeof(*up)); 1414 bcopy(&st->id, up->id, sizeof(up->id)); 1415 up->creatorid = st->creatorid; 1416 } 1417 up->timeout = st->timeout; 1418 up->expire = sp->expire; 1419 up->src = sp->src; 1420 up->dst = sp->dst; 1421 break; 1422 case PFSYNC_ACT_DEL_C: 1423 sc->sc_mbuf_net->m_pkthdr.len = 1424 sc->sc_mbuf_net->m_len += sizeof(*dp); 1425 dp = sc->sc_statep_net.d++; 1426 h_net->count++; 1427 1428 bzero(dp, sizeof(*dp)); 1429 bcopy(&st->id, dp->id, sizeof(dp->id)); 1430 dp->creatorid = st->creatorid; 1431 break; 1432 } 1433 } 1434 1435 if (h->count == sc->sc_maxcount || 1436 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1437 ret = pfsync_sendout(sc); 1438 1439 crit_exit(); 1440 return (ret); 1441 } 1442 1443 int 1444 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1445 { 1446 struct pfsync_header *h; 1447 struct pfsync_softc *sc = pfsyncif; 1448 struct pfsync_state_upd_req *rup; 1449 int ret = 0; 1450 1451 if (sc == NULL) 1452 return (0); 1453 1454 if (sc->sc_mbuf == NULL) { 1455 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1456 (void *)&sc->sc_statep.s)) == NULL) 1457 return (ENOMEM); 1458 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1459 } else { 1460 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1461 if (h->action != PFSYNC_ACT_UREQ) { 1462 pfsync_sendout(sc); 1463 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1464 (void *)&sc->sc_statep.s)) == NULL) 1465 return (ENOMEM); 1466 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1467 } 1468 } 1469 1470 if (src != NULL) 1471 sc->sc_sendaddr = *src; 1472 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1473 h->count++; 1474 rup = sc->sc_statep.r++; 1475 bzero(rup, sizeof(*rup)); 1476 if (up != NULL) { 1477 bcopy(up->id, rup->id, sizeof(rup->id)); 1478 rup->creatorid = up->creatorid; 1479 } 1480 1481 if (h->count == sc->sc_maxcount) 1482 ret = pfsync_sendout(sc); 1483 1484 return (ret); 1485 } 1486 1487 int 1488 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1489 { 1490 struct pfsync_softc *sc = pfsyncif; 1491 struct pfsync_state_clr *cp; 1492 int ret; 1493 1494 if (sc == NULL) 1495 return (0); 1496 1497 crit_enter(); 1498 if (sc->sc_mbuf != NULL) 1499 pfsync_sendout(sc); 1500 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1501 (void *)&sc->sc_statep.c)) == NULL) { 1502 crit_exit(); 1503 return (ENOMEM); 1504 } 1505 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1506 cp = sc->sc_statep.c; 1507 cp->creatorid = creatorid; 1508 if (ifname != NULL) 1509 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1510 1511 ret = (pfsync_sendout(sc)); 1512 crit_exit(); 1513 return (ret); 1514 } 1515 1516 void 1517 pfsync_timeout(void *v) 1518 { 1519 struct pfsync_softc *sc = v; 1520 1521 crit_enter(); 1522 pfsync_sendout(sc); 1523 crit_exit(); 1524 } 1525 1526 void 1527 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1528 { 1529 struct pfsync_state_bus *bus; 1530 1531 if (sc->sc_mbuf != NULL) 1532 pfsync_sendout(sc); 1533 1534 if (pfsync_sync_ok && 1535 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1536 (void *)&sc->sc_statep.b)) != NULL) { 1537 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1538 bus = sc->sc_statep.b; 1539 bus->creatorid = pf_status.hostid; 1540 bus->status = status; 1541 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); 1542 pfsync_sendout(sc); 1543 } 1544 } 1545 1546 void 1547 pfsync_bulk_update(void *v) 1548 { 1549 struct pfsync_softc *sc = v; 1550 int i = 0; 1551 int cpu; 1552 struct pf_state *state; 1553 1554 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1555 1556 crit_enter(); 1557 if (sc->sc_mbuf != NULL) 1558 pfsync_sendout(sc); 1559 1560 /* 1561 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1562 * been sent since the latest request was made. 1563 */ 1564 state = sc->sc_bulk_send_next; 1565 cpu = sc->sc_bulk_send_cpu; 1566 if (state) 1567 do { 1568 /* send state update if syncable and not already sent */ 1569 if (!state->sync_flags 1570 && state->timeout < PFTM_MAX 1571 && state->pfsync_time <= sc->sc_ureq_received) { 1572 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1573 i++; 1574 } 1575 1576 /* figure next state to send */ 1577 state = TAILQ_NEXT(state, entry_list); 1578 1579 /* wrap to start of list if we hit the end */ 1580 if (state == NULL) { 1581 if (++cpu >= ncpus) 1582 cpu = 0; 1583 state = TAILQ_FIRST(&state_list[cpu]); 1584 } 1585 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1586 cpu != sc->sc_bulk_terminator_cpu && 1587 state != sc->sc_bulk_terminator); 1588 1589 if (state == NULL || (cpu == sc->sc_bulk_terminator_cpu && 1590 state == sc->sc_bulk_terminator)) { 1591 /* we're done */ 1592 pfsync_send_bus(sc, PFSYNC_BUS_END); 1593 sc->sc_ureq_received = 0; 1594 sc->sc_bulk_send_next = NULL; 1595 sc->sc_bulk_terminator = NULL; 1596 sc->sc_bulk_send_cpu = 0; 1597 sc->sc_bulk_terminator_cpu = 0; 1598 lwkt_reltoken(&pf_token); 1599 callout_stop(&sc->sc_bulk_tmo); 1600 lwkt_gettoken(&pf_token); 1601 if (pf_status.debug >= PF_DEBUG_MISC) 1602 kprintf("pfsync: bulk update complete\n"); 1603 } else { 1604 /* look again for more in a bit */ 1605 lwkt_reltoken(&pf_token); 1606 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, 1607 LIST_FIRST(&pfsync_list)); 1608 lwkt_gettoken(&pf_token); 1609 sc->sc_bulk_send_next = state; 1610 sc->sc_bulk_send_cpu = cpu; 1611 } 1612 if (sc->sc_mbuf != NULL) 1613 pfsync_sendout(sc); 1614 crit_exit(); 1615 } 1616 1617 void 1618 pfsync_bulkfail(void *v) 1619 { 1620 struct pfsync_softc *sc = v; 1621 int error; 1622 1623 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1624 1625 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1626 /* Try again in a bit */ 1627 lwkt_reltoken(&pf_token); 1628 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, 1629 LIST_FIRST(&pfsync_list)); 1630 lwkt_gettoken(&pf_token); 1631 crit_enter(); 1632 error = pfsync_request_update(NULL, NULL); 1633 if (error == ENOMEM) { 1634 if (pf_status.debug >= PF_DEBUG_MISC) 1635 kprintf("pfsync: cannot allocate mbufs for " 1636 "bulk update\n"); 1637 } else 1638 pfsync_sendout(sc); 1639 crit_exit(); 1640 } else { 1641 /* Pretend like the transfer was ok */ 1642 sc->sc_ureq_sent = 0; 1643 sc->sc_bulk_tries = 0; 1644 #if NCARP > 0 1645 if (!pfsync_sync_ok) 1646 carp_group_demote_adj(&sc->sc_if, -1); 1647 #endif 1648 pfsync_sync_ok = 1; 1649 if (pf_status.debug >= PF_DEBUG_MISC) 1650 kprintf("pfsync: failed to receive " 1651 "bulk update status\n"); 1652 lwkt_reltoken(&pf_token); 1653 callout_stop(&sc->sc_bulkfail_tmo); 1654 lwkt_gettoken(&pf_token); 1655 } 1656 } 1657 1658 /* This must be called in splnet() */ 1659 int 1660 pfsync_sendout(struct pfsync_softc *sc) 1661 { 1662 #if NBPFILTER > 0 1663 struct ifnet *ifp = &sc->sc_if; 1664 #endif 1665 struct mbuf *m; 1666 1667 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1668 1669 lwkt_reltoken(&pf_token); 1670 callout_stop(&sc->sc_tmo); 1671 lwkt_gettoken(&pf_token); 1672 1673 if (sc->sc_mbuf == NULL) 1674 return (0); 1675 m = sc->sc_mbuf; 1676 sc->sc_mbuf = NULL; 1677 sc->sc_statep.s = NULL; 1678 1679 #if NBPFILTER > 0 1680 if (ifp->if_bpf) { 1681 bpf_gettoken(); 1682 if (ifp->if_bpf) 1683 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1684 bpf_reltoken(); 1685 } 1686 #endif 1687 1688 if (sc->sc_mbuf_net) { 1689 m_freem(m); 1690 m = sc->sc_mbuf_net; 1691 sc->sc_mbuf_net = NULL; 1692 sc->sc_statep_net.s = NULL; 1693 } 1694 1695 return pfsync_sendout_mbuf(sc, m); 1696 } 1697 1698 int 1699 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1700 { 1701 struct sockaddr sa; 1702 struct ip *ip; 1703 1704 if (sc->sc_sync_ifp || 1705 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1706 M_PREPEND(m, sizeof(struct ip), M_WAITOK); 1707 if (m == NULL) { 1708 pfsyncstats.pfsyncs_onomem++; 1709 return (0); 1710 } 1711 ip = mtod(m, struct ip *); 1712 ip->ip_v = IPVERSION; 1713 ip->ip_hl = sizeof(*ip) >> 2; 1714 ip->ip_tos = IPTOS_LOWDELAY; 1715 ip->ip_len = htons(m->m_pkthdr.len); 1716 ip->ip_id = htons(ip_randomid()); 1717 ip->ip_off = htons(IP_DF); 1718 ip->ip_ttl = PFSYNC_DFLTTL; 1719 ip->ip_p = IPPROTO_PFSYNC; 1720 ip->ip_sum = 0; 1721 1722 bzero(&sa, sizeof(sa)); 1723 ip->ip_src.s_addr = INADDR_ANY; 1724 1725 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1726 m->m_flags |= M_MCAST; 1727 ip->ip_dst = sc->sc_sendaddr; 1728 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1729 1730 pfsyncstats.pfsyncs_opackets++; 1731 1732 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1733 pfsyncstats.pfsyncs_oerrors++; 1734 } else 1735 m_freem(m); 1736 1737 return (0); 1738 } 1739 1740 static int 1741 pfsync_modevent(module_t mod, int type, void *data) 1742 { 1743 int error = 0; 1744 1745 struct pfsync_softc *pfs_if, *tmp; 1746 1747 lwkt_gettoken(&pf_token); 1748 1749 switch (type) { 1750 case MOD_LOAD: 1751 LIST_INIT(&pfsync_list); 1752 lwkt_reltoken(&pf_token); 1753 if_clone_attach(&pfsync_cloner); 1754 lwkt_gettoken(&pf_token); 1755 /* Override the function pointer for pf_ioctl.c */ 1756 break; 1757 1758 case MOD_UNLOAD: 1759 lwkt_reltoken(&pf_token); 1760 if_clone_detach(&pfsync_cloner); 1761 lwkt_gettoken(&pf_token); 1762 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) { 1763 pfsync_clone_destroy(&pfs_if->sc_if); 1764 } 1765 break; 1766 1767 default: 1768 error = EINVAL; 1769 break; 1770 } 1771 1772 lwkt_reltoken(&pf_token); 1773 return error; 1774 } 1775 1776 static moduledata_t pfsync_mod = { 1777 "pfsync", 1778 pfsync_modevent, 1779 0 1780 }; 1781 1782 #define PFSYNC_MODVER 44 1783 1784 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1785 MODULE_VERSION(pfsync, PFSYNC_MODVER); 1786 1787 static void 1788 pfsync_in_addmulti_dispatch(netmsg_t nmsg) 1789 { 1790 struct lwkt_msg *lmsg = &nmsg->lmsg; 1791 struct ifnet *ifp = lmsg->u.ms_resultp; 1792 struct in_addr addr; 1793 1794 addr.s_addr = INADDR_PFSYNC_GROUP; 1795 lmsg->u.ms_resultp = in_addmulti(&addr, ifp); 1796 1797 lwkt_replymsg(lmsg, 0); 1798 } 1799 1800 static struct in_multi * 1801 pfsync_in_addmulti(struct ifnet *ifp) 1802 { 1803 struct netmsg_base nmsg; 1804 struct lwkt_msg *lmsg = &nmsg.lmsg; 1805 1806 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1807 pfsync_in_addmulti_dispatch); 1808 lmsg->u.ms_resultp = ifp; 1809 1810 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1811 return lmsg->u.ms_resultp; 1812 } 1813 1814 static void 1815 pfsync_in_delmulti_dispatch(netmsg_t nmsg) 1816 { 1817 struct lwkt_msg *lmsg = &nmsg->lmsg; 1818 1819 in_delmulti(lmsg->u.ms_resultp); 1820 lwkt_replymsg(lmsg, 0); 1821 } 1822 1823 static void 1824 pfsync_in_delmulti(struct in_multi *inm) 1825 { 1826 struct netmsg_base nmsg; 1827 struct lwkt_msg *lmsg = &nmsg.lmsg; 1828 1829 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1830 pfsync_in_delmulti_dispatch); 1831 lmsg->u.ms_resultp = inm; 1832 1833 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1834 } 1835