1 /* 2 * Copyright (c) 2002 Michael Shalayeff 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ 27 */ 28 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_carp.h" 32 #include "use_bpf.h" 33 34 #include <sys/param.h> 35 #include <sys/endian.h> 36 #include <sys/proc.h> 37 #include <sys/caps.h> 38 #include <sys/systm.h> 39 #include <sys/time.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/kernel.h> 43 #include <sys/malloc.h> 44 #include <sys/module.h> 45 #include <sys/msgport2.h> 46 #include <sys/sockio.h> 47 #include <sys/thread2.h> 48 49 #include <machine/inttypes.h> 50 51 #include <net/if.h> 52 #include <net/if_types.h> 53 #include <net/ifq_var.h> 54 #include <net/route.h> 55 #include <net/bpf.h> 56 #include <net/netisr2.h> 57 #include <net/netmsg2.h> 58 #include <netinet/in.h> 59 #include <netinet/if_ether.h> 60 #include <netinet/ip_carp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_seq.h> 63 64 #ifdef INET 65 #include <netinet/in_systm.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #endif 70 71 #ifdef INET6 72 #include <netinet6/nd6.h> 73 #endif /* INET6 */ 74 75 #include <net/pf/pfvar.h> 76 #include <net/pf/if_pfsync.h> 77 78 #define PFSYNCNAME "pfsync" 79 80 #define PFSYNC_MINMTU \ 81 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 82 83 #ifdef PFSYNCDEBUG 84 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) 85 int pfsyncdebug; 86 #else 87 #define DPRINTF(x) 88 #endif 89 90 struct pfsync_softc *pfsyncif = NULL; 91 struct pfsyncstats pfsyncstats; 92 93 void pfsyncattach(int); 94 static int pfsync_clone_destroy(struct ifnet *); 95 static int pfsync_clone_create(struct if_clone *, int, caddr_t, caddr_t); 96 void pfsync_setmtu(struct pfsync_softc *, int); 97 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 98 struct pf_state_peer *); 99 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 100 struct rtentry *); 101 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 102 void pfsyncstart(struct ifnet *, struct ifaltq_subque *); 103 104 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 105 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 106 int pfsync_sendout(struct pfsync_softc *); 107 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 108 void pfsync_timeout(void *); 109 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 110 void pfsync_bulk_update(void *); 111 void pfsync_bulkfail(void *); 112 113 static struct in_multi *pfsync_in_addmulti(struct ifnet *); 114 static void pfsync_in_delmulti(struct in_multi *); 115 116 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); 117 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; 118 119 int pfsync_sync_ok; 120 121 struct if_clone pfsync_cloner = 122 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1); 123 124 125 void 126 pfsyncattach(int npfsync) 127 { 128 if_clone_attach(&pfsync_cloner); 129 } 130 131 static int 132 pfsync_clone_create(struct if_clone *ifc, int unit, 133 caddr_t params __unused, caddr_t data __unused) 134 { 135 struct pfsync_softc *sc; 136 struct ifnet *ifp; 137 138 lwkt_gettoken(&pf_token); 139 140 pfsync_sync_ok = 1; 141 142 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO); 143 sc->sc_mbuf = NULL; 144 sc->sc_mbuf_net = NULL; 145 sc->sc_mbuf_tdb = NULL; 146 sc->sc_statep.s = NULL; 147 sc->sc_statep_net.s = NULL; 148 sc->sc_statep_tdb.t = NULL; 149 sc->sc_maxupdates = 128; 150 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP); 151 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); 152 sc->sc_ureq_received = 0; 153 sc->sc_ureq_sent = 0; 154 sc->sc_bulk_send_next = NULL; 155 sc->sc_bulk_terminator = NULL; 156 sc->sc_bulk_send_cpu = 0; 157 sc->sc_bulk_terminator_cpu = 0; 158 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; 159 lwkt_reltoken(&pf_token); 160 161 ifp = &sc->sc_if; 162 if_initname(ifp, ifc->ifc_name, unit); 163 ifp->if_ioctl = pfsyncioctl; 164 ifp->if_output = pfsyncoutput; 165 ifp->if_start = pfsyncstart; 166 ifp->if_type = IFT_PFSYNC; 167 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 168 ifp->if_hdrlen = PFSYNC_HDRLEN; 169 ifp->if_baudrate = IF_Mbps(100); 170 ifp->if_softc = sc; 171 172 pfsync_setmtu(sc, MCLBYTES); 173 callout_init(&sc->sc_tmo); 174 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 175 callout_init(&sc->sc_bulk_tmo); 176 callout_init(&sc->sc_bulkfail_tmo); 177 178 if_attach(ifp, NULL); 179 #if NBPF > 0 180 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 181 #endif 182 183 #ifdef CARP 184 if_addgroup(ifp, "carp"); 185 #endif 186 187 lwkt_gettoken(&pf_token); 188 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); 189 lwkt_reltoken(&pf_token); 190 191 return (0); 192 } 193 194 static int 195 pfsync_clone_destroy(struct ifnet *ifp) 196 { 197 struct netmsg_base msg; 198 199 lwkt_gettoken(&pf_token); 200 lwkt_reltoken(&pf_token); 201 202 struct pfsync_softc *sc = ifp->if_softc; 203 callout_stop(&sc->sc_tmo); 204 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 205 callout_stop(&sc->sc_bulk_tmo); 206 callout_stop(&sc->sc_bulkfail_tmo); 207 #ifdef CARP 208 if (!pfsync_sync_ok) 209 carp_group_demote_adj(&sc->sc_if, -1); 210 #endif 211 212 /* Unpend async sendouts. */ 213 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler); 214 netisr_domsg(&msg, 0); 215 216 #if NBPF > 0 217 bpfdetach(ifp); 218 #endif 219 if_detach(ifp); 220 lwkt_gettoken(&pf_token); 221 LIST_REMOVE(sc, sc_next); 222 kfree(sc, M_PFSYNC); 223 lwkt_reltoken(&pf_token); 224 225 return 0; 226 } 227 228 /* 229 * Start output on the pfsync interface. 230 */ 231 void 232 pfsyncstart(struct ifnet *ifp, struct ifaltq_subque *ifsq) 233 { 234 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 235 ifsq_purge(ifsq); 236 } 237 238 int 239 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 240 struct pf_state_peer *d) 241 { 242 if (s->scrub.scrub_flag && d->scrub == NULL) { 243 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO); 244 245 if (d->scrub == NULL) 246 return (ENOMEM); 247 } 248 249 return (0); 250 } 251 252 void 253 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 254 { 255 bzero(sp, sizeof(struct pfsync_state)); 256 257 /* copy from state key */ 258 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 259 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 260 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 261 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 262 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 263 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 264 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 265 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 266 sp->proto = st->key[PF_SK_WIRE]->proto; 267 sp->af = st->key[PF_SK_WIRE]->af; 268 269 /* copy from state */ 270 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 271 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 272 sp->creation = htonl(time_second - st->creation); 273 sp->expire = pf_state_expires(st); 274 if (sp->expire <= time_second) 275 sp->expire = htonl(0); 276 else 277 sp->expire = htonl(sp->expire - time_second); 278 279 sp->direction = st->direction; 280 sp->log = st->log; 281 sp->cpuid = st->cpuid; 282 sp->pickup_mode = st->pickup_mode; 283 sp->timeout = st->timeout; 284 sp->state_flags = st->state_flags; 285 if (st->src_node) 286 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 287 if (st->nat_src_node) 288 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 289 290 bcopy(&st->id, &sp->id, sizeof(sp->id)); 291 sp->creatorid = st->creatorid; 292 pf_state_peer_hton(&st->src, &sp->src); 293 pf_state_peer_hton(&st->dst, &sp->dst); 294 295 if (st->rule.ptr == NULL) 296 sp->rule = htonl(-1); 297 else 298 sp->rule = htonl(st->rule.ptr->nr); 299 if (st->anchor.ptr == NULL) 300 sp->anchor = htonl(-1); 301 else 302 sp->anchor = htonl(st->anchor.ptr->nr); 303 if (st->nat_rule.ptr == NULL) 304 sp->nat_rule = htonl(-1); 305 else 306 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 307 308 pf_state_counter_hton(st->packets[0], sp->packets[0]); 309 pf_state_counter_hton(st->packets[1], sp->packets[1]); 310 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 311 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 312 313 } 314 315 int 316 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 317 { 318 struct pf_state *st = NULL; 319 struct pf_state_key *skw = NULL, *sks = NULL; 320 struct pf_rule *r = NULL; 321 struct pfi_kif *kif; 322 int pool_flags; 323 int error; 324 325 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 326 kprintf("pfsync_insert_net_state: invalid creator id:" 327 " %08x\n", ntohl(sp->creatorid)); 328 return (EINVAL); 329 } 330 331 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 332 if (pf_status.debug >= PF_DEBUG_MISC) 333 kprintf("pfsync_insert_net_state: " 334 "unknown interface: %s\n", sp->ifname); 335 if (flags & PFSYNC_SI_IOCTL) 336 return (EINVAL); 337 return (0); /* skip this state */ 338 } 339 340 /* 341 * If the ruleset checksums match or the state is coming from the ioctl, 342 * it's safe to associate the state with the rule of that number. 343 */ 344 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 345 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 346 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 347 r = pf_main_ruleset.rules[ 348 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 349 else 350 r = &pf_default_rule; 351 352 if ((r->max_states && r->states_cur >= r->max_states)) 353 goto cleanup; 354 355 if (flags & PFSYNC_SI_IOCTL) 356 pool_flags = M_WAITOK | M_NULLOK | M_ZERO; 357 else 358 pool_flags = M_WAITOK | M_ZERO; 359 360 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL) 361 goto cleanup; 362 lockinit(&st->lk, "pfstlk", 0, 0); 363 364 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 365 goto cleanup; 366 367 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 368 &sp->key[PF_SK_STACK].addr[0], sp->af) || 369 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 370 &sp->key[PF_SK_STACK].addr[1], sp->af) || 371 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 372 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 373 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 374 goto cleanup; 375 } else 376 sks = skw; 377 378 /* allocate memory for scrub info */ 379 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 380 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 381 goto cleanup; 382 383 /* copy to state key(s) */ 384 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 385 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 386 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 387 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 388 skw->proto = sp->proto; 389 skw->af = sp->af; 390 if (sks != skw) { 391 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 392 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 393 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 394 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 395 sks->proto = sp->proto; 396 sks->af = sp->af; 397 } 398 399 /* copy to state */ 400 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 401 st->creation = time_second - ntohl(sp->creation); 402 st->expire = time_second; 403 if (sp->expire) { 404 /* XXX No adaptive scaling. */ 405 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 406 } 407 408 st->expire = ntohl(sp->expire) + time_second; 409 st->direction = sp->direction; 410 st->log = sp->log; 411 st->timeout = sp->timeout; 412 st->state_flags = sp->state_flags; 413 if (!(flags & PFSYNC_SI_IOCTL)) 414 st->sync_flags = PFSTATE_FROMSYNC; 415 416 bcopy(sp->id, &st->id, sizeof(st->id)); 417 st->creatorid = sp->creatorid; 418 pf_state_peer_ntoh(&sp->src, &st->src); 419 pf_state_peer_ntoh(&sp->dst, &st->dst); 420 421 st->rule.ptr = r; 422 st->nat_rule.ptr = NULL; 423 st->anchor.ptr = NULL; 424 st->rt_kif = NULL; 425 426 st->pfsync_time = 0; 427 428 429 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 430 r->states_cur++; 431 r->states_tot++; 432 433 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 434 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 435 r->states_cur--; 436 goto cleanup_state; 437 } 438 439 return (0); 440 441 cleanup: 442 error = ENOMEM; 443 if (skw == sks) 444 sks = NULL; 445 if (skw != NULL) 446 kfree(skw, M_PFSYNC); 447 if (sks != NULL) 448 kfree(sks, M_PFSYNC); 449 450 cleanup_state: /* pf_state_insert frees the state keys */ 451 if (st) { 452 if (st->dst.scrub) 453 kfree(st->dst.scrub, M_PFSYNC); 454 if (st->src.scrub) 455 kfree(st->src.scrub, M_PFSYNC); 456 kfree(st, M_PFSYNC); 457 } 458 return (error); 459 } 460 461 void 462 pfsync_input(struct mbuf *m, ...) 463 { 464 struct ip *ip = mtod(m, struct ip *); 465 struct pfsync_header *ph; 466 struct pfsync_softc *sc = pfsyncif; 467 struct pf_state *st; 468 struct pf_state_key *sk; 469 struct pf_state_item *si; 470 struct pf_state_cmp id_key; 471 struct pfsync_state *sp; 472 struct pfsync_state_upd *up; 473 struct pfsync_state_del *dp; 474 struct pfsync_state_clr *cp; 475 struct pfsync_state_upd_req *rup; 476 struct pfsync_state_bus *bus; 477 struct in_addr src; 478 struct mbuf *mp; 479 int iplen, action, error, i, count, offp, sfail, stale = 0; 480 u_int8_t flags = 0; 481 482 /* This function is not yet called from anywhere */ 483 /* Still we assume for safety that pf_token must be held */ 484 ASSERT_LWKT_TOKEN_HELD(&pf_token); 485 486 pfsyncstats.pfsyncs_ipackets++; 487 488 /* verify that we have a sync interface configured */ 489 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 490 goto done; 491 492 /* verify that the packet came in on the right interface */ 493 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 494 pfsyncstats.pfsyncs_badif++; 495 goto done; 496 } 497 498 /* verify that the IP TTL is 255. */ 499 if (ip->ip_ttl != PFSYNC_DFLTTL) { 500 pfsyncstats.pfsyncs_badttl++; 501 goto done; 502 } 503 504 iplen = ip->ip_hl << 2; 505 506 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 507 pfsyncstats.pfsyncs_hdrops++; 508 goto done; 509 } 510 511 if (iplen + sizeof(*ph) > m->m_len) { 512 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 513 pfsyncstats.pfsyncs_hdrops++; 514 goto done; 515 } 516 ip = mtod(m, struct ip *); 517 } 518 ph = (struct pfsync_header *)((char *)ip + iplen); 519 520 /* verify the version */ 521 if (ph->version != PFSYNC_VERSION) { 522 pfsyncstats.pfsyncs_badver++; 523 goto done; 524 } 525 526 action = ph->action; 527 count = ph->count; 528 529 /* make sure it's a valid action code */ 530 if (action >= PFSYNC_ACT_MAX) { 531 pfsyncstats.pfsyncs_badact++; 532 goto done; 533 } 534 535 /* Cheaper to grab this now than having to mess with mbufs later */ 536 src = ip->ip_src; 537 538 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 539 flags |= PFSYNC_SI_CKSUM; 540 541 switch (action) { 542 case PFSYNC_ACT_CLR: { 543 struct pf_state *nexts; 544 struct pf_state_key *nextsk; 545 struct pfi_kif *kif; 546 globaldata_t save_gd = mycpu; 547 int nn; 548 549 u_int32_t creatorid; 550 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 551 sizeof(*cp), &offp)) == NULL) { 552 pfsyncstats.pfsyncs_badlen++; 553 return; 554 } 555 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 556 creatorid = cp->creatorid; 557 558 crit_enter(); 559 if (cp->ifname[0] == '\0') { 560 lwkt_gettoken(&pf_token); 561 for (nn = 0; nn < ncpus; ++nn) { 562 lwkt_setcpu_self(globaldata_find(nn)); 563 for (st = RB_MIN(pf_state_tree_id, 564 &tree_id[nn]); 565 st; st = nexts) { 566 nexts = RB_NEXT(pf_state_tree_id, 567 &tree_id[n], st); 568 if (st->creatorid == creatorid) { 569 st->sync_flags |= 570 PFSTATE_FROMSYNC; 571 pf_unlink_state(st); 572 } 573 } 574 } 575 lwkt_setcpu_self(save_gd); 576 lwkt_reltoken(&pf_token); 577 } else { 578 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 579 crit_exit(); 580 return; 581 } 582 /* XXX correct? */ 583 lwkt_gettoken(&pf_token); 584 for (nn = 0; nn < ncpus; ++nn) { 585 lwkt_setcpu_self(globaldata_find(nn)); 586 for (sk = RB_MIN(pf_state_tree, 587 &pf_statetbl[nn]); 588 sk; 589 sk = nextsk) { 590 nextsk = RB_NEXT(pf_state_tree, 591 &pf_statetbl[n], sk); 592 TAILQ_FOREACH(si, &sk->states, entry) { 593 if (si->s->creatorid == 594 creatorid) { 595 si->s->sync_flags |= 596 PFSTATE_FROMSYNC; 597 pf_unlink_state(si->s); 598 } 599 } 600 } 601 } 602 lwkt_setcpu_self(save_gd); 603 lwkt_reltoken(&pf_token); 604 } 605 crit_exit(); 606 607 break; 608 } 609 case PFSYNC_ACT_INS: 610 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 611 count * sizeof(*sp), &offp)) == NULL) { 612 pfsyncstats.pfsyncs_badlen++; 613 return; 614 } 615 616 crit_enter(); 617 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 618 i < count; i++, sp++) { 619 /* check for invalid values */ 620 if (sp->timeout >= PFTM_MAX || 621 sp->src.state > PF_TCPS_PROXY_DST || 622 sp->dst.state > PF_TCPS_PROXY_DST || 623 sp->direction > PF_OUT || 624 (sp->af != AF_INET && sp->af != AF_INET6)) { 625 if (pf_status.debug >= PF_DEBUG_MISC) 626 kprintf("pfsync_insert: PFSYNC_ACT_INS: " 627 "invalid value\n"); 628 pfsyncstats.pfsyncs_badval++; 629 continue; 630 } 631 632 if ((error = pfsync_state_import(sp, flags))) { 633 if (error == ENOMEM) { 634 crit_exit(); 635 goto done; 636 } 637 } 638 } 639 crit_exit(); 640 break; 641 case PFSYNC_ACT_UPD: 642 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 643 count * sizeof(*sp), &offp)) == NULL) { 644 pfsyncstats.pfsyncs_badlen++; 645 return; 646 } 647 648 crit_enter(); 649 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 650 i < count; i++, sp++) { 651 int flags = PFSYNC_FLAG_STALE; 652 653 /* check for invalid values */ 654 if (sp->timeout >= PFTM_MAX || 655 sp->src.state > PF_TCPS_PROXY_DST || 656 sp->dst.state > PF_TCPS_PROXY_DST) { 657 if (pf_status.debug >= PF_DEBUG_MISC) 658 kprintf("pfsync_insert: PFSYNC_ACT_UPD: " 659 "invalid value\n"); 660 pfsyncstats.pfsyncs_badval++; 661 continue; 662 } 663 664 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 665 id_key.creatorid = sp->creatorid; 666 667 st = pf_find_state_byid(&id_key); 668 if (st == NULL) { 669 /* insert the update */ 670 if (pfsync_state_import(sp, flags)) 671 pfsyncstats.pfsyncs_badstate++; 672 continue; 673 } 674 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 675 sfail = 0; 676 if (sk->proto == IPPROTO_TCP) { 677 /* 678 * The state should never go backwards except 679 * for syn-proxy states. Neither should the 680 * sequence window slide backwards. 681 */ 682 if (st->src.state > sp->src.state && 683 (st->src.state < PF_TCPS_PROXY_SRC || 684 sp->src.state >= PF_TCPS_PROXY_SRC)) 685 sfail = 1; 686 else if (SEQ_GT(st->src.seqlo, 687 ntohl(sp->src.seqlo))) 688 sfail = 3; 689 else if (st->dst.state > sp->dst.state) { 690 /* There might still be useful 691 * information about the src state here, 692 * so import that part of the update, 693 * then "fail" so we send the updated 694 * state back to the peer who is missing 695 * our what we know. */ 696 pf_state_peer_ntoh(&sp->src, &st->src); 697 /* XXX do anything with timeouts? */ 698 sfail = 7; 699 flags = 0; 700 } else if (st->dst.state >= TCPS_SYN_SENT && 701 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 702 sfail = 4; 703 } else { 704 /* 705 * Non-TCP protocol state machine always go 706 * forwards 707 */ 708 if (st->src.state > sp->src.state) 709 sfail = 5; 710 else if (st->dst.state > sp->dst.state) 711 sfail = 6; 712 } 713 if (sfail) { 714 if (pf_status.debug >= PF_DEBUG_MISC) 715 kprintf("pfsync: %s stale update " 716 "(%d) id: %016jx " 717 "creatorid: %08x\n", 718 (sfail < 7 ? "ignoring" 719 : "partial"), sfail, 720 (uintmax_t)be64toh(st->id), 721 ntohl(st->creatorid)); 722 pfsyncstats.pfsyncs_stale++; 723 724 if (!(sp->sync_flags & PFSTATE_STALE)) { 725 /* we have a better state, send it */ 726 if (sc->sc_mbuf != NULL && !stale) 727 pfsync_sendout(sc); 728 stale++; 729 if (!st->sync_flags) 730 pfsync_pack_state( 731 PFSYNC_ACT_UPD, st, flags); 732 } 733 continue; 734 } 735 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 736 pf_state_peer_ntoh(&sp->src, &st->src); 737 pf_state_peer_ntoh(&sp->dst, &st->dst); 738 st->expire = ntohl(sp->expire) + time_second; 739 st->timeout = sp->timeout; 740 } 741 if (stale && sc->sc_mbuf != NULL) 742 pfsync_sendout(sc); 743 crit_exit(); 744 break; 745 /* 746 * It's not strictly necessary for us to support the "uncompressed" 747 * delete action, but it's relatively simple and maintains consistency. 748 */ 749 case PFSYNC_ACT_DEL: 750 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 751 count * sizeof(*sp), &offp)) == NULL) { 752 pfsyncstats.pfsyncs_badlen++; 753 return; 754 } 755 756 crit_enter(); 757 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 758 i < count; i++, sp++) { 759 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 760 id_key.creatorid = sp->creatorid; 761 762 st = pf_find_state_byid(&id_key); 763 if (st == NULL) { 764 pfsyncstats.pfsyncs_badstate++; 765 continue; 766 } 767 st->sync_flags |= PFSTATE_FROMSYNC; 768 pf_unlink_state(st); 769 } 770 crit_exit(); 771 break; 772 case PFSYNC_ACT_UPD_C: { 773 int update_requested = 0; 774 775 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 776 count * sizeof(*up), &offp)) == NULL) { 777 pfsyncstats.pfsyncs_badlen++; 778 return; 779 } 780 781 crit_enter(); 782 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 783 i < count; i++, up++) { 784 /* check for invalid values */ 785 if (up->timeout >= PFTM_MAX || 786 up->src.state > PF_TCPS_PROXY_DST || 787 up->dst.state > PF_TCPS_PROXY_DST) { 788 if (pf_status.debug >= PF_DEBUG_MISC) 789 kprintf("pfsync_insert: " 790 "PFSYNC_ACT_UPD_C: " 791 "invalid value\n"); 792 pfsyncstats.pfsyncs_badval++; 793 continue; 794 } 795 796 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 797 id_key.creatorid = up->creatorid; 798 799 st = pf_find_state_byid(&id_key); 800 if (st == NULL) { 801 /* We don't have this state. Ask for it. */ 802 error = pfsync_request_update(up, &src); 803 if (error == ENOMEM) { 804 crit_exit(); 805 goto done; 806 } 807 update_requested = 1; 808 pfsyncstats.pfsyncs_badstate++; 809 continue; 810 } 811 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 812 sfail = 0; 813 if (sk->proto == IPPROTO_TCP) { 814 /* 815 * The state should never go backwards except 816 * for syn-proxy states. Neither should the 817 * sequence window slide backwards. 818 */ 819 if (st->src.state > up->src.state && 820 (st->src.state < PF_TCPS_PROXY_SRC || 821 up->src.state >= PF_TCPS_PROXY_SRC)) 822 sfail = 1; 823 else if (st->dst.state > up->dst.state) 824 sfail = 2; 825 else if (SEQ_GT(st->src.seqlo, 826 ntohl(up->src.seqlo))) 827 sfail = 3; 828 else if (st->dst.state >= TCPS_SYN_SENT && 829 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 830 sfail = 4; 831 } else { 832 /* 833 * Non-TCP protocol state machine always go 834 * forwards 835 */ 836 if (st->src.state > up->src.state) 837 sfail = 5; 838 else if (st->dst.state > up->dst.state) 839 sfail = 6; 840 } 841 if (sfail) { 842 if (pf_status.debug >= PF_DEBUG_MISC) 843 kprintf("pfsync: ignoring stale update " 844 "(%d) id: %016" PRIx64 " " 845 "creatorid: %08x\n", sfail, 846 be64toh(st->id), 847 ntohl(st->creatorid)); 848 pfsyncstats.pfsyncs_stale++; 849 850 /* we have a better state, send it out */ 851 if ((!stale || update_requested) && 852 sc->sc_mbuf != NULL) { 853 pfsync_sendout(sc); 854 update_requested = 0; 855 } 856 stale++; 857 if (!st->sync_flags) 858 pfsync_pack_state(PFSYNC_ACT_UPD, st, 859 PFSYNC_FLAG_STALE); 860 continue; 861 } 862 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 863 pf_state_peer_ntoh(&up->src, &st->src); 864 pf_state_peer_ntoh(&up->dst, &st->dst); 865 st->expire = ntohl(up->expire) + time_second; 866 st->timeout = up->timeout; 867 } 868 if ((update_requested || stale) && sc->sc_mbuf) 869 pfsync_sendout(sc); 870 crit_exit(); 871 break; 872 } 873 case PFSYNC_ACT_DEL_C: 874 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 875 count * sizeof(*dp), &offp)) == NULL) { 876 pfsyncstats.pfsyncs_badlen++; 877 return; 878 } 879 880 crit_enter(); 881 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 882 i < count; i++, dp++) { 883 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 884 id_key.creatorid = dp->creatorid; 885 886 st = pf_find_state_byid(&id_key); 887 if (st == NULL) { 888 pfsyncstats.pfsyncs_badstate++; 889 continue; 890 } 891 st->sync_flags |= PFSTATE_FROMSYNC; 892 pf_unlink_state(st); 893 } 894 crit_exit(); 895 break; 896 case PFSYNC_ACT_INS_F: 897 case PFSYNC_ACT_DEL_F: 898 /* not implemented */ 899 break; 900 case PFSYNC_ACT_UREQ: 901 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 902 count * sizeof(*rup), &offp)) == NULL) { 903 pfsyncstats.pfsyncs_badlen++; 904 return; 905 } 906 907 crit_enter(); 908 if (sc->sc_mbuf != NULL) 909 pfsync_sendout(sc); 910 for (i = 0, 911 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 912 i < count; i++, rup++) { 913 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 914 id_key.creatorid = rup->creatorid; 915 916 if (id_key.id == 0 && id_key.creatorid == 0) { 917 sc->sc_ureq_received = mycpu->gd_time_seconds; 918 if (sc->sc_bulk_send_next == NULL) { 919 if (++sc->sc_bulk_send_cpu >= ncpus) 920 sc->sc_bulk_send_cpu = 0; 921 sc->sc_bulk_send_next = 922 TAILQ_FIRST(&state_list[sc->sc_bulk_send_cpu]); 923 } 924 sc->sc_bulk_terminator = 925 sc->sc_bulk_send_next; 926 sc->sc_bulk_terminator_cpu = 927 sc->sc_bulk_send_cpu; 928 if (pf_status.debug >= PF_DEBUG_MISC) 929 kprintf("pfsync: received " 930 "bulk update request\n"); 931 pfsync_send_bus(sc, PFSYNC_BUS_START); 932 lwkt_reltoken(&pf_token); 933 callout_init(&sc->sc_bulk_tmo); 934 lwkt_gettoken(&pf_token); 935 } else { 936 st = pf_find_state_byid(&id_key); 937 if (st == NULL) { 938 pfsyncstats.pfsyncs_badstate++; 939 continue; 940 } 941 if (!st->sync_flags) 942 pfsync_pack_state(PFSYNC_ACT_UPD, 943 st, 0); 944 } 945 } 946 if (sc->sc_mbuf != NULL) 947 pfsync_sendout(sc); 948 crit_exit(); 949 break; 950 case PFSYNC_ACT_BUS: 951 /* If we're not waiting for a bulk update, who cares. */ 952 if (sc->sc_ureq_sent == 0) 953 break; 954 955 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 956 sizeof(*bus), &offp)) == NULL) { 957 pfsyncstats.pfsyncs_badlen++; 958 return; 959 } 960 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 961 switch (bus->status) { 962 case PFSYNC_BUS_START: 963 lwkt_reltoken(&pf_token); 964 callout_reset(&sc->sc_bulkfail_tmo, 965 pf_pool_limits[PF_LIMIT_STATES].limit / 966 (PFSYNC_BULKPACKETS * sc->sc_maxcount), 967 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 968 lwkt_gettoken(&pf_token); 969 if (pf_status.debug >= PF_DEBUG_MISC) 970 kprintf("pfsync: received bulk " 971 "update start\n"); 972 break; 973 case PFSYNC_BUS_END: 974 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= 975 sc->sc_ureq_sent) { 976 /* that's it, we're happy */ 977 sc->sc_ureq_sent = 0; 978 sc->sc_bulk_tries = 0; 979 lwkt_reltoken(&pf_token); 980 callout_stop(&sc->sc_bulkfail_tmo); 981 lwkt_gettoken(&pf_token); 982 #ifdef CARP 983 if (!pfsync_sync_ok) { 984 lwkt_reltoken(&pf_token); 985 carp_group_demote_adj(&sc->sc_if, -1); 986 lwkt_gettoken(&pf_token); 987 } 988 #endif 989 pfsync_sync_ok = 1; 990 if (pf_status.debug >= PF_DEBUG_MISC) 991 kprintf("pfsync: received valid " 992 "bulk update end\n"); 993 } else { 994 if (pf_status.debug >= PF_DEBUG_MISC) 995 kprintf("pfsync: received invalid " 996 "bulk update end: bad timestamp\n"); 997 } 998 break; 999 } 1000 break; 1001 } 1002 1003 done: 1004 if (m) 1005 m_freem(m); 1006 } 1007 1008 int 1009 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1010 struct rtentry *rt) 1011 { 1012 m_freem(m); 1013 return (0); 1014 } 1015 1016 /* ARGSUSED */ 1017 int 1018 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 1019 { 1020 struct pfsync_softc *sc = ifp->if_softc; 1021 struct ifreq *ifr = (struct ifreq *)data; 1022 struct ip_moptions *imo = &sc->sc_imo; 1023 struct pfsyncreq pfsyncr; 1024 struct ifnet *sifp; 1025 int error; 1026 1027 lwkt_gettoken(&pf_token); 1028 1029 switch (cmd) { 1030 case SIOCSIFADDR: 1031 case SIOCAIFADDR: 1032 case SIOCSIFDSTADDR: 1033 case SIOCSIFFLAGS: 1034 if (ifp->if_flags & IFF_UP) 1035 ifp->if_flags |= IFF_RUNNING; 1036 else 1037 ifp->if_flags &= ~IFF_RUNNING; 1038 break; 1039 case SIOCSIFMTU: 1040 if (ifr->ifr_mtu < PFSYNC_MINMTU) { 1041 lwkt_reltoken(&pf_token); 1042 return (EINVAL); 1043 } 1044 if (ifr->ifr_mtu > MCLBYTES) 1045 ifr->ifr_mtu = MCLBYTES; 1046 crit_enter(); 1047 if (ifr->ifr_mtu < ifp->if_mtu) 1048 pfsync_sendout(sc); 1049 pfsync_setmtu(sc, ifr->ifr_mtu); 1050 crit_exit(); 1051 break; 1052 case SIOCGETPFSYNC: 1053 bzero(&pfsyncr, sizeof(pfsyncr)); 1054 if (sc->sc_sync_ifp) 1055 strlcpy(pfsyncr.pfsyncr_syncdev, 1056 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1057 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1058 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1059 lwkt_reltoken(&pf_token); 1060 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1061 return (error); 1062 lwkt_gettoken(&pf_token); 1063 break; 1064 case SIOCSETPFSYNC: 1065 error = caps_priv_check(cr, SYSCAP_RESTRICTEDROOT | 1066 __SYSCAP_NULLCRED); 1067 if (error) { 1068 lwkt_reltoken(&pf_token); 1069 return (error); 1070 } 1071 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) { 1072 lwkt_reltoken(&pf_token); 1073 return (error); 1074 } 1075 1076 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1077 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1078 else 1079 sc->sc_sync_peer.s_addr = 1080 pfsyncr.pfsyncr_syncpeer.s_addr; 1081 1082 if (pfsyncr.pfsyncr_maxupdates > 255) { 1083 lwkt_reltoken(&pf_token); 1084 return (EINVAL); 1085 } 1086 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1087 1088 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1089 sc->sc_sync_ifp = NULL; 1090 if (sc->sc_mbuf_net != NULL) { 1091 /* Don't keep stale pfsync packets around. */ 1092 crit_enter(); 1093 m_freem(sc->sc_mbuf_net); 1094 sc->sc_mbuf_net = NULL; 1095 sc->sc_statep_net.s = NULL; 1096 crit_exit(); 1097 } 1098 if (imo->imo_num_memberships > 0) { 1099 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1100 imo->imo_multicast_ifp = NULL; 1101 } 1102 break; 1103 } 1104 1105 /* 1106 * XXX not that MPSAFE; pfsync needs serious rework 1107 */ 1108 ifnet_deserialize_all(ifp); 1109 ifnet_lock(); 1110 sifp = ifunit(pfsyncr.pfsyncr_syncdev); 1111 ifnet_unlock(); 1112 ifnet_serialize_all(ifp); 1113 1114 if (sifp == NULL) { 1115 lwkt_reltoken(&pf_token); 1116 return (EINVAL); 1117 } 1118 1119 crit_enter(); 1120 if (sifp->if_mtu < sc->sc_if.if_mtu || 1121 (sc->sc_sync_ifp != NULL && 1122 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1123 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1124 pfsync_sendout(sc); 1125 sc->sc_sync_ifp = sifp; 1126 1127 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1128 1129 if (imo->imo_num_memberships > 0) { 1130 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1131 imo->imo_multicast_ifp = NULL; 1132 } 1133 1134 if (sc->sc_sync_ifp && 1135 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1136 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1137 sc->sc_sync_ifp = NULL; 1138 lwkt_reltoken(&pf_token); 1139 crit_exit(); 1140 return (EADDRNOTAVAIL); 1141 } 1142 1143 if ((imo->imo_membership[0] = 1144 pfsync_in_addmulti(sc->sc_sync_ifp)) == NULL) { 1145 sc->sc_sync_ifp = NULL; 1146 lwkt_reltoken(&pf_token); 1147 crit_exit(); 1148 return (ENOBUFS); 1149 } 1150 imo->imo_num_memberships++; 1151 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1152 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1153 imo->imo_multicast_loop = 0; 1154 } 1155 1156 if (sc->sc_sync_ifp || 1157 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1158 /* Request a full state table update. */ 1159 sc->sc_ureq_sent = mycpu->gd_time_seconds; 1160 #ifdef CARP 1161 if (pfsync_sync_ok) 1162 carp_group_demote_adj(&sc->sc_if, 1); 1163 #endif 1164 pfsync_sync_ok = 0; 1165 if (pf_status.debug >= PF_DEBUG_MISC) 1166 kprintf("pfsync: requesting bulk update\n"); 1167 lwkt_reltoken(&pf_token); 1168 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 1169 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 1170 lwkt_gettoken(&pf_token); 1171 error = pfsync_request_update(NULL, NULL); 1172 if (error == ENOMEM) { 1173 lwkt_reltoken(&pf_token); 1174 crit_exit(); 1175 return (ENOMEM); 1176 } 1177 pfsync_sendout(sc); 1178 } 1179 crit_exit(); 1180 1181 break; 1182 1183 default: 1184 lwkt_reltoken(&pf_token); 1185 return (ENOTTY); 1186 } 1187 1188 lwkt_reltoken(&pf_token); 1189 return (0); 1190 } 1191 1192 void 1193 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1194 { 1195 int mtu; 1196 1197 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1198 mtu = sc->sc_sync_ifp->if_mtu; 1199 else 1200 mtu = mtu_req; 1201 1202 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1203 sizeof(struct pfsync_state); 1204 if (sc->sc_maxcount > 254) 1205 sc->sc_maxcount = 254; 1206 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1207 sc->sc_maxcount * sizeof(struct pfsync_state); 1208 } 1209 1210 struct mbuf * 1211 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1212 { 1213 struct pfsync_header *h; 1214 struct mbuf *m; 1215 int len; 1216 1217 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1218 1219 MGETHDR(m, M_WAITOK, MT_DATA); 1220 if (m == NULL) { 1221 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1222 return (NULL); 1223 } 1224 1225 switch (action) { 1226 case PFSYNC_ACT_CLR: 1227 len = sizeof(struct pfsync_header) + 1228 sizeof(struct pfsync_state_clr); 1229 break; 1230 case PFSYNC_ACT_UPD_C: 1231 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1232 sizeof(struct pfsync_header); 1233 break; 1234 case PFSYNC_ACT_DEL_C: 1235 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1236 sizeof(struct pfsync_header); 1237 break; 1238 case PFSYNC_ACT_UREQ: 1239 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1240 sizeof(struct pfsync_header); 1241 break; 1242 case PFSYNC_ACT_BUS: 1243 len = sizeof(struct pfsync_header) + 1244 sizeof(struct pfsync_state_bus); 1245 break; 1246 case PFSYNC_ACT_TDB_UPD: 1247 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1248 sizeof(struct pfsync_header); 1249 break; 1250 default: 1251 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1252 sizeof(struct pfsync_header); 1253 break; 1254 } 1255 1256 if (len > MHLEN) { 1257 MCLGET(m, M_WAITOK); 1258 if ((m->m_flags & M_EXT) == 0) { 1259 m_free(m); 1260 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1261 return (NULL); 1262 } 1263 m->m_data += rounddown2(MCLBYTES - len, sizeof(long)); 1264 } else 1265 MH_ALIGN(m, len); 1266 1267 m->m_pkthdr.rcvif = NULL; 1268 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1269 h = mtod(m, struct pfsync_header *); 1270 h->version = PFSYNC_VERSION; 1271 h->af = 0; 1272 h->count = 0; 1273 h->action = action; 1274 1275 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1276 lwkt_reltoken(&pf_token); 1277 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, 1278 LIST_FIRST(&pfsync_list)); 1279 lwkt_gettoken(&pf_token); 1280 return (m); 1281 } 1282 1283 int 1284 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1285 { 1286 struct ifnet *ifp = NULL; 1287 struct pfsync_softc *sc = pfsyncif; 1288 struct pfsync_header *h, *h_net; 1289 struct pfsync_state *sp = NULL; 1290 struct pfsync_state_upd *up = NULL; 1291 struct pfsync_state_del *dp = NULL; 1292 int ret = 0; 1293 u_int8_t i = 255, newaction = 0; 1294 1295 if (sc == NULL) 1296 return (0); 1297 ifp = &sc->sc_if; 1298 1299 /* 1300 * If a packet falls in the forest and there's nobody around to 1301 * hear, does it make a sound? 1302 */ 1303 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1304 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1305 /* Don't leave any stale pfsync packets hanging around. */ 1306 if (sc->sc_mbuf != NULL) { 1307 m_freem(sc->sc_mbuf); 1308 sc->sc_mbuf = NULL; 1309 sc->sc_statep.s = NULL; 1310 } 1311 return (0); 1312 } 1313 1314 if (action >= PFSYNC_ACT_MAX) 1315 return (EINVAL); 1316 1317 crit_enter(); 1318 if (sc->sc_mbuf == NULL) { 1319 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1320 (void *)&sc->sc_statep.s)) == NULL) { 1321 crit_exit(); 1322 return (ENOMEM); 1323 } 1324 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1325 } else { 1326 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1327 if (h->action != action) { 1328 pfsync_sendout(sc); 1329 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1330 (void *)&sc->sc_statep.s)) == NULL) { 1331 crit_exit(); 1332 return (ENOMEM); 1333 } 1334 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1335 } else { 1336 /* 1337 * If it's an update, look in the packet to see if 1338 * we already have an update for the state. 1339 */ 1340 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1341 struct pfsync_state *usp = 1342 (void *)((char *)h + PFSYNC_HDRLEN); 1343 1344 for (i = 0; i < h->count; i++) { 1345 if (!memcmp(usp->id, &st->id, 1346 PFSYNC_ID_LEN) && 1347 usp->creatorid == st->creatorid) { 1348 sp = usp; 1349 sp->updates++; 1350 break; 1351 } 1352 usp++; 1353 } 1354 } 1355 } 1356 } 1357 1358 st->pfsync_time = mycpu->gd_time_seconds; 1359 1360 if (sp == NULL) { 1361 /* not a "duplicate" update */ 1362 i = 255; 1363 sp = sc->sc_statep.s++; 1364 sc->sc_mbuf->m_pkthdr.len = 1365 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1366 h->count++; 1367 bzero(sp, sizeof(*sp)); 1368 1369 pfsync_state_export(sp, st); 1370 1371 if (flags & PFSYNC_FLAG_STALE) 1372 sp->sync_flags |= PFSTATE_STALE; 1373 } else { 1374 pf_state_peer_hton(&st->src, &sp->src); 1375 pf_state_peer_hton(&st->dst, &sp->dst); 1376 1377 if (st->expire <= time_second) 1378 sp->expire = htonl(0); 1379 else 1380 sp->expire = htonl(st->expire - time_second); 1381 } 1382 1383 /* do we need to build "compressed" actions for network transfer? */ 1384 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1385 switch (action) { 1386 case PFSYNC_ACT_UPD: 1387 newaction = PFSYNC_ACT_UPD_C; 1388 break; 1389 case PFSYNC_ACT_DEL: 1390 newaction = PFSYNC_ACT_DEL_C; 1391 break; 1392 default: 1393 /* by default we just send the uncompressed states */ 1394 break; 1395 } 1396 } 1397 1398 if (newaction) { 1399 if (sc->sc_mbuf_net == NULL) { 1400 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1401 (void *)&sc->sc_statep_net.s)) == NULL) { 1402 crit_exit(); 1403 return (ENOMEM); 1404 } 1405 } 1406 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1407 1408 switch (newaction) { 1409 case PFSYNC_ACT_UPD_C: 1410 if (i != 255) { 1411 up = (void *)((char *)h_net + 1412 PFSYNC_HDRLEN + (i * sizeof(*up))); 1413 up->updates++; 1414 } else { 1415 h_net->count++; 1416 sc->sc_mbuf_net->m_pkthdr.len = 1417 sc->sc_mbuf_net->m_len += sizeof(*up); 1418 up = sc->sc_statep_net.u++; 1419 1420 bzero(up, sizeof(*up)); 1421 bcopy(&st->id, up->id, sizeof(up->id)); 1422 up->creatorid = st->creatorid; 1423 } 1424 up->timeout = st->timeout; 1425 up->expire = sp->expire; 1426 up->src = sp->src; 1427 up->dst = sp->dst; 1428 break; 1429 case PFSYNC_ACT_DEL_C: 1430 sc->sc_mbuf_net->m_pkthdr.len = 1431 sc->sc_mbuf_net->m_len += sizeof(*dp); 1432 dp = sc->sc_statep_net.d++; 1433 h_net->count++; 1434 1435 bzero(dp, sizeof(*dp)); 1436 bcopy(&st->id, dp->id, sizeof(dp->id)); 1437 dp->creatorid = st->creatorid; 1438 break; 1439 } 1440 } 1441 1442 if (h->count == sc->sc_maxcount || 1443 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1444 ret = pfsync_sendout(sc); 1445 1446 crit_exit(); 1447 return (ret); 1448 } 1449 1450 int 1451 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1452 { 1453 struct pfsync_header *h; 1454 struct pfsync_softc *sc = pfsyncif; 1455 struct pfsync_state_upd_req *rup; 1456 int ret = 0; 1457 1458 if (sc == NULL) 1459 return (0); 1460 1461 if (sc->sc_mbuf == NULL) { 1462 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1463 (void *)&sc->sc_statep.s)) == NULL) 1464 return (ENOMEM); 1465 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1466 } else { 1467 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1468 if (h->action != PFSYNC_ACT_UREQ) { 1469 pfsync_sendout(sc); 1470 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1471 (void *)&sc->sc_statep.s)) == NULL) 1472 return (ENOMEM); 1473 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1474 } 1475 } 1476 1477 if (src != NULL) 1478 sc->sc_sendaddr = *src; 1479 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1480 h->count++; 1481 rup = sc->sc_statep.r++; 1482 bzero(rup, sizeof(*rup)); 1483 if (up != NULL) { 1484 bcopy(up->id, rup->id, sizeof(rup->id)); 1485 rup->creatorid = up->creatorid; 1486 } 1487 1488 if (h->count == sc->sc_maxcount) 1489 ret = pfsync_sendout(sc); 1490 1491 return (ret); 1492 } 1493 1494 int 1495 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1496 { 1497 struct pfsync_softc *sc = pfsyncif; 1498 struct pfsync_state_clr *cp; 1499 int ret; 1500 1501 if (sc == NULL) 1502 return (0); 1503 1504 crit_enter(); 1505 if (sc->sc_mbuf != NULL) 1506 pfsync_sendout(sc); 1507 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1508 (void *)&sc->sc_statep.c)) == NULL) { 1509 crit_exit(); 1510 return (ENOMEM); 1511 } 1512 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1513 cp = sc->sc_statep.c; 1514 cp->creatorid = creatorid; 1515 if (ifname != NULL) 1516 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1517 1518 ret = (pfsync_sendout(sc)); 1519 crit_exit(); 1520 return (ret); 1521 } 1522 1523 void 1524 pfsync_timeout(void *v) 1525 { 1526 struct pfsync_softc *sc = v; 1527 1528 crit_enter(); 1529 pfsync_sendout(sc); 1530 crit_exit(); 1531 } 1532 1533 void 1534 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1535 { 1536 struct pfsync_state_bus *bus; 1537 1538 if (sc->sc_mbuf != NULL) 1539 pfsync_sendout(sc); 1540 1541 if (pfsync_sync_ok && 1542 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1543 (void *)&sc->sc_statep.b)) != NULL) { 1544 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1545 bus = sc->sc_statep.b; 1546 bus->creatorid = pf_status.hostid; 1547 bus->status = status; 1548 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); 1549 pfsync_sendout(sc); 1550 } 1551 } 1552 1553 void 1554 pfsync_bulk_update(void *v) 1555 { 1556 struct pfsync_softc *sc = v; 1557 int i = 0; 1558 int cpu; 1559 struct pf_state *state; 1560 1561 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1562 1563 crit_enter(); 1564 if (sc->sc_mbuf != NULL) 1565 pfsync_sendout(sc); 1566 1567 /* 1568 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1569 * been sent since the latest request was made. 1570 */ 1571 state = sc->sc_bulk_send_next; 1572 cpu = sc->sc_bulk_send_cpu; 1573 if (state) 1574 do { 1575 /* send state update if syncable and not already sent */ 1576 if (!state->sync_flags 1577 && state->timeout < PFTM_MAX 1578 && state->pfsync_time <= sc->sc_ureq_received) { 1579 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1580 i++; 1581 } 1582 1583 /* figure next state to send */ 1584 state = TAILQ_NEXT(state, entry_list); 1585 1586 /* wrap to start of list if we hit the end */ 1587 if (state == NULL) { 1588 if (++cpu >= ncpus) 1589 cpu = 0; 1590 state = TAILQ_FIRST(&state_list[cpu]); 1591 } 1592 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1593 cpu != sc->sc_bulk_terminator_cpu && 1594 state != sc->sc_bulk_terminator); 1595 1596 if (state == NULL || (cpu == sc->sc_bulk_terminator_cpu && 1597 state == sc->sc_bulk_terminator)) { 1598 /* we're done */ 1599 pfsync_send_bus(sc, PFSYNC_BUS_END); 1600 sc->sc_ureq_received = 0; 1601 sc->sc_bulk_send_next = NULL; 1602 sc->sc_bulk_terminator = NULL; 1603 sc->sc_bulk_send_cpu = 0; 1604 sc->sc_bulk_terminator_cpu = 0; 1605 lwkt_reltoken(&pf_token); 1606 callout_stop(&sc->sc_bulk_tmo); 1607 lwkt_gettoken(&pf_token); 1608 if (pf_status.debug >= PF_DEBUG_MISC) 1609 kprintf("pfsync: bulk update complete\n"); 1610 } else { 1611 /* look again for more in a bit */ 1612 lwkt_reltoken(&pf_token); 1613 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, 1614 LIST_FIRST(&pfsync_list)); 1615 lwkt_gettoken(&pf_token); 1616 sc->sc_bulk_send_next = state; 1617 sc->sc_bulk_send_cpu = cpu; 1618 } 1619 if (sc->sc_mbuf != NULL) 1620 pfsync_sendout(sc); 1621 crit_exit(); 1622 } 1623 1624 void 1625 pfsync_bulkfail(void *v) 1626 { 1627 struct pfsync_softc *sc = v; 1628 int error; 1629 1630 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1631 1632 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1633 /* Try again in a bit */ 1634 lwkt_reltoken(&pf_token); 1635 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, 1636 LIST_FIRST(&pfsync_list)); 1637 lwkt_gettoken(&pf_token); 1638 crit_enter(); 1639 error = pfsync_request_update(NULL, NULL); 1640 if (error == ENOMEM) { 1641 if (pf_status.debug >= PF_DEBUG_MISC) 1642 kprintf("pfsync: cannot allocate mbufs for " 1643 "bulk update\n"); 1644 } else 1645 pfsync_sendout(sc); 1646 crit_exit(); 1647 } else { 1648 /* Pretend like the transfer was ok */ 1649 sc->sc_ureq_sent = 0; 1650 sc->sc_bulk_tries = 0; 1651 #ifdef CARP 1652 if (!pfsync_sync_ok) 1653 carp_group_demote_adj(&sc->sc_if, -1); 1654 #endif 1655 pfsync_sync_ok = 1; 1656 if (pf_status.debug >= PF_DEBUG_MISC) 1657 kprintf("pfsync: failed to receive " 1658 "bulk update status\n"); 1659 lwkt_reltoken(&pf_token); 1660 callout_stop(&sc->sc_bulkfail_tmo); 1661 lwkt_gettoken(&pf_token); 1662 } 1663 } 1664 1665 static void 1666 pfsync_sendout_handler(netmsg_t nmsg) 1667 { 1668 struct netmsg_genpkt *msg = (struct netmsg_genpkt *)nmsg; 1669 1670 pfsync_sendout_mbuf(msg->arg1, msg->m); 1671 } 1672 1673 int 1674 pfsync_sendout(struct pfsync_softc *sc) 1675 { 1676 #if NBPF > 0 1677 struct ifnet *ifp = &sc->sc_if; 1678 #endif 1679 struct mbuf *m; 1680 struct netmsg_genpkt *msg; 1681 1682 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1683 1684 lwkt_reltoken(&pf_token); 1685 callout_stop(&sc->sc_tmo); 1686 lwkt_gettoken(&pf_token); 1687 1688 if (sc->sc_mbuf == NULL) 1689 return (0); 1690 m = sc->sc_mbuf; 1691 sc->sc_mbuf = NULL; 1692 sc->sc_statep.s = NULL; 1693 1694 #if NBPF > 0 1695 if (ifp->if_bpf) { 1696 bpf_gettoken(); 1697 if (ifp->if_bpf) 1698 bpf_mtap(ifp->if_bpf, m); 1699 bpf_reltoken(); 1700 } 1701 #endif 1702 1703 if (sc->sc_mbuf_net) { 1704 m_freem(m); 1705 m = sc->sc_mbuf_net; 1706 sc->sc_mbuf_net = NULL; 1707 sc->sc_statep_net.s = NULL; 1708 } 1709 1710 msg = &m->m_hdr.mh_genmsg; 1711 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 1712 pfsync_sendout_handler); 1713 msg->m = m; 1714 msg->arg1 = sc; 1715 netisr_sendmsg(&msg->base, 0); 1716 1717 return (0); 1718 } 1719 1720 int 1721 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1722 { 1723 struct sockaddr sa; 1724 struct ip *ip; 1725 1726 if (sc->sc_sync_ifp || 1727 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1728 M_PREPEND(m, sizeof(struct ip), M_WAITOK); 1729 if (m == NULL) { 1730 pfsyncstats.pfsyncs_onomem++; 1731 return (0); 1732 } 1733 ip = mtod(m, struct ip *); 1734 ip->ip_v = IPVERSION; 1735 ip->ip_hl = sizeof(*ip) >> 2; 1736 ip->ip_tos = IPTOS_LOWDELAY; 1737 ip->ip_len = htons(m->m_pkthdr.len); 1738 ip->ip_id = htons(ip_randomid()); 1739 ip->ip_off = htons(IP_DF); 1740 ip->ip_ttl = PFSYNC_DFLTTL; 1741 ip->ip_p = IPPROTO_PFSYNC; 1742 ip->ip_sum = 0; 1743 1744 bzero(&sa, sizeof(sa)); 1745 ip->ip_src.s_addr = INADDR_ANY; 1746 1747 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1748 m->m_flags |= M_MCAST; 1749 ip->ip_dst = sc->sc_sendaddr; 1750 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1751 1752 pfsyncstats.pfsyncs_opackets++; 1753 1754 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1755 pfsyncstats.pfsyncs_oerrors++; 1756 } else 1757 m_freem(m); 1758 1759 return (0); 1760 } 1761 1762 static int 1763 pfsync_modevent(module_t mod, int type, void *data) 1764 { 1765 int error = 0; 1766 1767 struct pfsync_softc *pfs_if, *tmp; 1768 1769 lwkt_gettoken(&pf_token); 1770 1771 switch (type) { 1772 case MOD_LOAD: 1773 LIST_INIT(&pfsync_list); 1774 lwkt_reltoken(&pf_token); 1775 if_clone_attach(&pfsync_cloner); 1776 lwkt_gettoken(&pf_token); 1777 /* Override the function pointer for pf_ioctl.c */ 1778 break; 1779 1780 case MOD_UNLOAD: 1781 lwkt_reltoken(&pf_token); 1782 if_clone_detach(&pfsync_cloner); 1783 lwkt_gettoken(&pf_token); 1784 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) { 1785 pfsync_clone_destroy(&pfs_if->sc_if); 1786 } 1787 break; 1788 1789 default: 1790 error = EINVAL; 1791 break; 1792 } 1793 1794 lwkt_reltoken(&pf_token); 1795 return error; 1796 } 1797 1798 static moduledata_t pfsync_mod = { 1799 "pfsync", 1800 pfsync_modevent, 1801 0 1802 }; 1803 1804 #define PFSYNC_MODVER 44 1805 1806 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1807 MODULE_VERSION(pfsync, PFSYNC_MODVER); 1808 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 1809 1810 static void 1811 pfsync_in_addmulti_dispatch(netmsg_t nmsg) 1812 { 1813 struct lwkt_msg *lmsg = &nmsg->lmsg; 1814 struct ifnet *ifp = lmsg->u.ms_resultp; 1815 struct in_addr addr; 1816 1817 addr.s_addr = INADDR_PFSYNC_GROUP; 1818 lmsg->u.ms_resultp = in_addmulti(&addr, ifp); 1819 1820 lwkt_replymsg(lmsg, 0); 1821 } 1822 1823 static struct in_multi * 1824 pfsync_in_addmulti(struct ifnet *ifp) 1825 { 1826 struct netmsg_base nmsg; 1827 struct lwkt_msg *lmsg = &nmsg.lmsg; 1828 1829 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1830 pfsync_in_addmulti_dispatch); 1831 lmsg->u.ms_resultp = ifp; 1832 1833 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1834 return lmsg->u.ms_resultp; 1835 } 1836 1837 static void 1838 pfsync_in_delmulti_dispatch(netmsg_t nmsg) 1839 { 1840 struct lwkt_msg *lmsg = &nmsg->lmsg; 1841 1842 in_delmulti(lmsg->u.ms_resultp); 1843 lwkt_replymsg(lmsg, 0); 1844 } 1845 1846 static void 1847 pfsync_in_delmulti(struct in_multi *inm) 1848 { 1849 struct netmsg_base nmsg; 1850 struct lwkt_msg *lmsg = &nmsg.lmsg; 1851 1852 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1853 pfsync_in_delmulti_dispatch); 1854 lmsg->u.ms_resultp = inm; 1855 1856 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1857 } 1858