1 /* 2 * Copyright (c) 2002 Michael Shalayeff 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ 27 */ 28 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_carp.h" 32 #include "use_bpf.h" 33 34 #include <sys/param.h> 35 #include <sys/endian.h> 36 #include <sys/proc.h> 37 #include <sys/caps.h> 38 #include <sys/systm.h> 39 #include <sys/time.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/kernel.h> 43 #include <sys/malloc.h> 44 #include <sys/module.h> 45 #include <sys/msgport2.h> 46 #include <sys/sockio.h> 47 #include <sys/thread2.h> 48 49 #include <machine/inttypes.h> 50 51 #include <net/if.h> 52 #include <net/if_types.h> 53 #include <net/ifq_var.h> 54 #include <net/route.h> 55 #include <net/bpf.h> 56 #include <net/netisr2.h> 57 #include <net/netmsg2.h> 58 #include <netinet/in.h> 59 #include <netinet/if_ether.h> 60 #include <netinet/ip_carp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_seq.h> 63 64 #ifdef INET 65 #include <netinet/in_systm.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #endif 70 71 #ifdef INET6 72 #include <netinet6/nd6.h> 73 #endif /* INET6 */ 74 75 #include <net/pf/pfvar.h> 76 #include <net/pf/if_pfsync.h> 77 78 #define PFSYNCNAME "pfsync" 79 80 #define PFSYNC_MINMTU \ 81 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 82 83 #ifdef PFSYNCDEBUG 84 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) 85 int pfsyncdebug; 86 #else 87 #define DPRINTF(x) 88 #endif 89 90 struct pfsync_softc *pfsyncif = NULL; 91 struct pfsyncstats pfsyncstats; 92 93 void pfsyncattach(int); 94 static int pfsync_clone_destroy(struct ifnet *); 95 static int pfsync_clone_create(struct if_clone *, int, caddr_t, caddr_t); 96 void pfsync_setmtu(struct pfsync_softc *, int); 97 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 98 struct pf_state_peer *); 99 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 100 struct rtentry *); 101 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 102 void pfsyncstart(struct ifnet *, struct ifaltq_subque *); 103 104 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 105 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 106 int pfsync_sendout(struct pfsync_softc *); 107 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 108 void pfsync_timeout(void *); 109 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 110 void pfsync_bulk_update(void *); 111 void pfsync_bulkfail(void *); 112 113 static struct in_multi *pfsync_in_addmulti(struct ifnet *); 114 static void pfsync_in_delmulti(struct in_multi *); 115 116 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); 117 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; 118 119 int pfsync_sync_ok; 120 121 struct if_clone pfsync_cloner = 122 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1); 123 124 125 void 126 pfsyncattach(int npfsync) 127 { 128 if_clone_attach(&pfsync_cloner); 129 } 130 131 static int 132 pfsync_clone_create(struct if_clone *ifc, int unit, 133 caddr_t params __unused, caddr_t data __unused) 134 { 135 struct pfsync_softc *sc; 136 struct ifnet *ifp; 137 138 lwkt_gettoken(&pf_token); 139 140 pfsync_sync_ok = 1; 141 142 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO); 143 sc->sc_mbuf = NULL; 144 sc->sc_mbuf_net = NULL; 145 sc->sc_mbuf_tdb = NULL; 146 sc->sc_statep.s = NULL; 147 sc->sc_statep_net.s = NULL; 148 sc->sc_statep_tdb.t = NULL; 149 sc->sc_maxupdates = 128; 150 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP); 151 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); 152 sc->sc_ureq_received = 0; 153 sc->sc_ureq_sent = 0; 154 sc->sc_bulk_send_next = NULL; 155 sc->sc_bulk_terminator = NULL; 156 sc->sc_bulk_send_cpu = 0; 157 sc->sc_bulk_terminator_cpu = 0; 158 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; 159 lwkt_reltoken(&pf_token); 160 161 ifp = &sc->sc_if; 162 if_initname(ifp, ifc->ifc_name, unit); 163 ifp->if_ioctl = pfsyncioctl; 164 ifp->if_output = pfsyncoutput; 165 ifp->if_start = pfsyncstart; 166 ifp->if_type = IFT_PFSYNC; 167 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 168 ifp->if_hdrlen = PFSYNC_HDRLEN; 169 ifp->if_baudrate = IF_Mbps(100); 170 ifp->if_softc = sc; 171 172 pfsync_setmtu(sc, MCLBYTES); 173 callout_init(&sc->sc_tmo); 174 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 175 callout_init(&sc->sc_bulk_tmo); 176 callout_init(&sc->sc_bulkfail_tmo); 177 178 if_attach(ifp, NULL); 179 #if NBPF > 0 180 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 181 #endif 182 183 #ifdef CARP 184 if_addgroup(ifp, "carp"); 185 #endif 186 187 lwkt_gettoken(&pf_token); 188 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); 189 lwkt_reltoken(&pf_token); 190 191 return (0); 192 } 193 194 static int 195 pfsync_clone_destroy(struct ifnet *ifp) 196 { 197 struct netmsg_base msg; 198 199 lwkt_gettoken(&pf_token); 200 lwkt_reltoken(&pf_token); 201 202 struct pfsync_softc *sc = ifp->if_softc; 203 callout_stop(&sc->sc_tmo); 204 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 205 callout_stop(&sc->sc_bulk_tmo); 206 callout_stop(&sc->sc_bulkfail_tmo); 207 #ifdef CARP 208 if (!pfsync_sync_ok) 209 carp_group_demote_adj(&sc->sc_if, -1); 210 #endif 211 212 /* Unpend async sendouts. */ 213 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler); 214 netisr_domsg(&msg, 0); 215 216 #if NBPF > 0 217 bpfdetach(ifp); 218 #endif 219 if_detach(ifp); 220 lwkt_gettoken(&pf_token); 221 LIST_REMOVE(sc, sc_next); 222 kfree(sc, M_PFSYNC); 223 lwkt_reltoken(&pf_token); 224 225 return 0; 226 } 227 228 /* 229 * Start output on the pfsync interface. 230 */ 231 void 232 pfsyncstart(struct ifnet *ifp, struct ifaltq_subque *ifsq) 233 { 234 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 235 ifsq_purge(ifsq); 236 } 237 238 int 239 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 240 struct pf_state_peer *d) 241 { 242 if (s->scrub.scrub_flag && d->scrub == NULL) { 243 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO); 244 245 if (d->scrub == NULL) 246 return (ENOMEM); 247 } 248 249 return (0); 250 } 251 252 void 253 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 254 { 255 bzero(sp, sizeof(struct pfsync_state)); 256 257 /* copy from state key */ 258 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 259 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 260 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 261 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 262 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 263 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 264 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 265 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 266 sp->proto = st->key[PF_SK_WIRE]->proto; 267 sp->af = st->key[PF_SK_WIRE]->af; 268 269 /* copy from state */ 270 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 271 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 272 sp->creation = htonl(time_second - st->creation); 273 sp->expire = pf_state_expires(st); 274 if (sp->expire <= time_second) 275 sp->expire = htonl(0); 276 else 277 sp->expire = htonl(sp->expire - time_second); 278 279 sp->direction = st->direction; 280 sp->log = st->log; 281 sp->cpuid = st->cpuid; 282 sp->pickup_mode = st->pickup_mode; 283 sp->timeout = st->timeout; 284 sp->state_flags = st->state_flags; 285 if (st->src_node) 286 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 287 if (st->nat_src_node) 288 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 289 290 bcopy(&st->id, &sp->id, sizeof(sp->id)); 291 sp->creatorid = st->creatorid; 292 pf_state_peer_hton(&st->src, &sp->src); 293 pf_state_peer_hton(&st->dst, &sp->dst); 294 295 if (st->rule.ptr == NULL) 296 sp->rule = htonl(-1); 297 else 298 sp->rule = htonl(st->rule.ptr->nr); 299 if (st->anchor.ptr == NULL) 300 sp->anchor = htonl(-1); 301 else 302 sp->anchor = htonl(st->anchor.ptr->nr); 303 if (st->nat_rule.ptr == NULL) 304 sp->nat_rule = htonl(-1); 305 else 306 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 307 308 pf_state_counter_hton(st->packets[0], sp->packets[0]); 309 pf_state_counter_hton(st->packets[1], sp->packets[1]); 310 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 311 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 312 313 } 314 315 int 316 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 317 { 318 struct pf_state *st = NULL; 319 struct pf_state_key *skw = NULL, *sks = NULL; 320 struct pf_rule *r = NULL; 321 struct pfi_kif *kif; 322 int pool_flags; 323 int error; 324 325 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 326 kprintf("pfsync_insert_net_state: invalid creator id:" 327 " %08x\n", ntohl(sp->creatorid)); 328 return (EINVAL); 329 } 330 331 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 332 if (pf_status.debug >= PF_DEBUG_MISC) 333 kprintf("pfsync_insert_net_state: " 334 "unknown interface: %s\n", sp->ifname); 335 if (flags & PFSYNC_SI_IOCTL) 336 return (EINVAL); 337 return (0); /* skip this state */ 338 } 339 340 /* 341 * If the ruleset checksums match or the state is coming from the ioctl, 342 * it's safe to associate the state with the rule of that number. 343 */ 344 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 345 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 346 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 347 r = pf_main_ruleset.rules[ 348 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 349 else 350 r = &pf_default_rule; 351 352 if ((r->max_states && r->states_cur >= r->max_states)) 353 goto cleanup; 354 355 if (flags & PFSYNC_SI_IOCTL) 356 pool_flags = M_WAITOK | M_NULLOK | M_ZERO; 357 else 358 pool_flags = M_WAITOK | M_ZERO; 359 360 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL) 361 goto cleanup; 362 lockinit(&st->lk, "pfstlk", 0, 0); 363 364 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 365 goto cleanup; 366 367 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 368 &sp->key[PF_SK_STACK].addr[0], sp->af) || 369 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 370 &sp->key[PF_SK_STACK].addr[1], sp->af) || 371 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 372 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 373 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 374 goto cleanup; 375 } else 376 sks = skw; 377 378 /* allocate memory for scrub info */ 379 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 380 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 381 goto cleanup; 382 383 /* copy to state key(s) */ 384 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 385 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 386 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 387 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 388 skw->proto = sp->proto; 389 skw->af = sp->af; 390 if (sks != skw) { 391 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 392 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 393 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 394 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 395 sks->proto = sp->proto; 396 sks->af = sp->af; 397 } 398 399 /* copy to state */ 400 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 401 st->creation = time_second - ntohl(sp->creation); 402 st->expire = time_second; 403 if (sp->expire) { 404 /* XXX No adaptive scaling. */ 405 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 406 } 407 408 st->expire = ntohl(sp->expire) + time_second; 409 st->direction = sp->direction; 410 st->log = sp->log; 411 st->timeout = sp->timeout; 412 st->state_flags = sp->state_flags; 413 if (!(flags & PFSYNC_SI_IOCTL)) 414 st->sync_flags = PFSTATE_FROMSYNC; 415 416 bcopy(sp->id, &st->id, sizeof(st->id)); 417 st->creatorid = sp->creatorid; 418 pf_state_peer_ntoh(&sp->src, &st->src); 419 pf_state_peer_ntoh(&sp->dst, &st->dst); 420 421 st->rule.ptr = r; 422 st->nat_rule.ptr = NULL; 423 st->anchor.ptr = NULL; 424 st->rt_kif = NULL; 425 426 st->pfsync_time = 0; 427 428 429 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 430 r->states_cur++; 431 r->states_tot++; 432 433 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 434 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 435 r->states_cur--; 436 goto cleanup_state; 437 } 438 439 return (0); 440 441 cleanup: 442 error = ENOMEM; 443 if (skw == sks) 444 sks = NULL; 445 if (skw != NULL) 446 kfree(skw, M_PFSYNC); 447 if (sks != NULL) 448 kfree(sks, M_PFSYNC); 449 450 cleanup_state: /* pf_state_insert frees the state keys */ 451 if (st) { 452 if (st->dst.scrub) 453 kfree(st->dst.scrub, M_PFSYNC); 454 if (st->src.scrub) 455 kfree(st->src.scrub, M_PFSYNC); 456 kfree(st, M_PFSYNC); 457 } 458 return (error); 459 } 460 461 void 462 pfsync_input(struct mbuf *m, ...) 463 { 464 struct ip *ip = mtod(m, struct ip *); 465 struct pfsync_header *ph; 466 struct pfsync_softc *sc = pfsyncif; 467 struct pf_state *st; 468 struct pf_state_key *sk; 469 struct pf_state_item *si; 470 struct pf_state_cmp id_key; 471 struct pfsync_state *sp; 472 struct pfsync_state_upd *up; 473 struct pfsync_state_del *dp; 474 struct pfsync_state_clr *cp; 475 struct pfsync_state_upd_req *rup; 476 struct pfsync_state_bus *bus; 477 struct in_addr src; 478 struct mbuf *mp; 479 int iplen, action, error, i, count, offp, sfail, stale = 0; 480 u_int8_t flags = 0; 481 482 /* This function is not yet called from anywhere */ 483 /* Still we assume for safety that pf_token must be held */ 484 ASSERT_LWKT_TOKEN_HELD(&pf_token); 485 486 pfsyncstats.pfsyncs_ipackets++; 487 488 /* verify that we have a sync interface configured */ 489 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 490 goto done; 491 492 /* verify that the packet came in on the right interface */ 493 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 494 pfsyncstats.pfsyncs_badif++; 495 goto done; 496 } 497 498 /* verify that the IP TTL is 255. */ 499 if (ip->ip_ttl != PFSYNC_DFLTTL) { 500 pfsyncstats.pfsyncs_badttl++; 501 goto done; 502 } 503 504 iplen = ip->ip_hl << 2; 505 506 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 507 pfsyncstats.pfsyncs_hdrops++; 508 goto done; 509 } 510 511 if (iplen + sizeof(*ph) > m->m_len) { 512 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 513 pfsyncstats.pfsyncs_hdrops++; 514 goto done; 515 } 516 ip = mtod(m, struct ip *); 517 } 518 ph = (struct pfsync_header *)((char *)ip + iplen); 519 520 /* verify the version */ 521 if (ph->version != PFSYNC_VERSION) { 522 pfsyncstats.pfsyncs_badver++; 523 goto done; 524 } 525 526 action = ph->action; 527 count = ph->count; 528 529 /* make sure it's a valid action code */ 530 if (action >= PFSYNC_ACT_MAX) { 531 pfsyncstats.pfsyncs_badact++; 532 goto done; 533 } 534 535 /* Cheaper to grab this now than having to mess with mbufs later */ 536 src = ip->ip_src; 537 538 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 539 flags |= PFSYNC_SI_CKSUM; 540 541 switch (action) { 542 case PFSYNC_ACT_CLR: { 543 struct pf_state *nexts; 544 struct pf_state_key *nextsk; 545 struct pfi_kif *kif; 546 globaldata_t save_gd = mycpu; 547 int nn; 548 549 u_int32_t creatorid; 550 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 551 sizeof(*cp), &offp)) == NULL) { 552 pfsyncstats.pfsyncs_badlen++; 553 return; 554 } 555 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 556 creatorid = cp->creatorid; 557 558 crit_enter(); 559 if (cp->ifname[0] == '\0') { 560 lwkt_gettoken(&pf_token); 561 for (nn = 0; nn < ncpus; ++nn) { 562 lwkt_setcpu_self(globaldata_find(nn)); 563 for (st = RB_MIN(pf_state_tree_id, 564 &tree_id[nn]); 565 st; st = nexts) { 566 nexts = RB_NEXT(pf_state_tree_id, 567 &tree_id[n], st); 568 if (st->creatorid == creatorid) { 569 st->sync_flags |= 570 PFSTATE_FROMSYNC; 571 pf_unlink_state(st); 572 } 573 } 574 } 575 lwkt_setcpu_self(save_gd); 576 lwkt_reltoken(&pf_token); 577 } else { 578 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 579 crit_exit(); 580 return; 581 } 582 /* XXX correct? */ 583 lwkt_gettoken(&pf_token); 584 for (nn = 0; nn < ncpus; ++nn) { 585 lwkt_setcpu_self(globaldata_find(nn)); 586 for (sk = RB_MIN(pf_state_tree, 587 &pf_statetbl[nn]); 588 sk; 589 sk = nextsk) { 590 nextsk = RB_NEXT(pf_state_tree, 591 &pf_statetbl[n], sk); 592 TAILQ_FOREACH(si, &sk->states, entry) { 593 if (si->s->creatorid == 594 creatorid) { 595 si->s->sync_flags |= 596 PFSTATE_FROMSYNC; 597 pf_unlink_state(si->s); 598 } 599 } 600 } 601 } 602 lwkt_setcpu_self(save_gd); 603 lwkt_reltoken(&pf_token); 604 } 605 crit_exit(); 606 607 break; 608 } 609 case PFSYNC_ACT_INS: 610 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 611 count * sizeof(*sp), &offp)) == NULL) { 612 pfsyncstats.pfsyncs_badlen++; 613 return; 614 } 615 616 crit_enter(); 617 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 618 i < count; i++, sp++) { 619 /* check for invalid values */ 620 if (sp->timeout >= PFTM_MAX || 621 sp->src.state > PF_TCPS_PROXY_DST || 622 sp->dst.state > PF_TCPS_PROXY_DST || 623 sp->direction > PF_OUT || 624 (sp->af != AF_INET && sp->af != AF_INET6)) { 625 if (pf_status.debug >= PF_DEBUG_MISC) 626 kprintf("pfsync_insert: PFSYNC_ACT_INS: " 627 "invalid value\n"); 628 pfsyncstats.pfsyncs_badval++; 629 continue; 630 } 631 632 if ((error = pfsync_state_import(sp, flags))) { 633 if (error == ENOMEM) { 634 crit_exit(); 635 goto done; 636 } 637 } 638 } 639 crit_exit(); 640 break; 641 case PFSYNC_ACT_UPD: 642 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 643 count * sizeof(*sp), &offp)) == NULL) { 644 pfsyncstats.pfsyncs_badlen++; 645 return; 646 } 647 648 crit_enter(); 649 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 650 i < count; i++, sp++) { 651 int flags = PFSYNC_FLAG_STALE; 652 653 /* check for invalid values */ 654 if (sp->timeout >= PFTM_MAX || 655 sp->src.state > PF_TCPS_PROXY_DST || 656 sp->dst.state > PF_TCPS_PROXY_DST) { 657 if (pf_status.debug >= PF_DEBUG_MISC) 658 kprintf("pfsync_insert: PFSYNC_ACT_UPD: " 659 "invalid value\n"); 660 pfsyncstats.pfsyncs_badval++; 661 continue; 662 } 663 664 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 665 id_key.creatorid = sp->creatorid; 666 667 st = pf_find_state_byid(&id_key); 668 if (st == NULL) { 669 /* insert the update */ 670 if (pfsync_state_import(sp, flags)) 671 pfsyncstats.pfsyncs_badstate++; 672 continue; 673 } 674 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 675 sfail = 0; 676 if (sk->proto == IPPROTO_TCP) { 677 /* 678 * The state should never go backwards except 679 * for syn-proxy states. Neither should the 680 * sequence window slide backwards. 681 */ 682 if (st->src.state > sp->src.state && 683 (st->src.state < PF_TCPS_PROXY_SRC || 684 sp->src.state >= PF_TCPS_PROXY_SRC)) 685 sfail = 1; 686 else if (SEQ_GT(st->src.seqlo, 687 ntohl(sp->src.seqlo))) 688 sfail = 3; 689 else if (st->dst.state > sp->dst.state) { 690 /* There might still be useful 691 * information about the src state here, 692 * so import that part of the update, 693 * then "fail" so we send the updated 694 * state back to the peer who is missing 695 * our what we know. */ 696 pf_state_peer_ntoh(&sp->src, &st->src); 697 /* XXX do anything with timeouts? */ 698 sfail = 7; 699 flags = 0; 700 } else if (st->dst.state >= TCPS_SYN_SENT && 701 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 702 sfail = 4; 703 } else { 704 /* 705 * Non-TCP protocol state machine always go 706 * forwards 707 */ 708 if (st->src.state > sp->src.state) 709 sfail = 5; 710 else if (st->dst.state > sp->dst.state) 711 sfail = 6; 712 } 713 if (sfail) { 714 if (pf_status.debug >= PF_DEBUG_MISC) 715 kprintf("pfsync: %s stale update " 716 "(%d) id: %016jx " 717 "creatorid: %08x\n", 718 (sfail < 7 ? "ignoring" 719 : "partial"), sfail, 720 (uintmax_t)be64toh(st->id), 721 ntohl(st->creatorid)); 722 pfsyncstats.pfsyncs_stale++; 723 724 if (!(sp->sync_flags & PFSTATE_STALE)) { 725 /* we have a better state, send it */ 726 if (sc->sc_mbuf != NULL && !stale) 727 pfsync_sendout(sc); 728 stale++; 729 if (!st->sync_flags) 730 pfsync_pack_state( 731 PFSYNC_ACT_UPD, st, flags); 732 } 733 continue; 734 } 735 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 736 pf_state_peer_ntoh(&sp->src, &st->src); 737 pf_state_peer_ntoh(&sp->dst, &st->dst); 738 st->expire = ntohl(sp->expire) + time_second; 739 st->timeout = sp->timeout; 740 } 741 if (stale && sc->sc_mbuf != NULL) 742 pfsync_sendout(sc); 743 crit_exit(); 744 break; 745 /* 746 * It's not strictly necessary for us to support the "uncompressed" 747 * delete action, but it's relatively simple and maintains consistency. 748 */ 749 case PFSYNC_ACT_DEL: 750 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 751 count * sizeof(*sp), &offp)) == NULL) { 752 pfsyncstats.pfsyncs_badlen++; 753 return; 754 } 755 756 crit_enter(); 757 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 758 i < count; i++, sp++) { 759 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 760 id_key.creatorid = sp->creatorid; 761 762 st = pf_find_state_byid(&id_key); 763 if (st == NULL) { 764 pfsyncstats.pfsyncs_badstate++; 765 continue; 766 } 767 st->sync_flags |= PFSTATE_FROMSYNC; 768 pf_unlink_state(st); 769 } 770 crit_exit(); 771 break; 772 case PFSYNC_ACT_UPD_C: { 773 int update_requested = 0; 774 775 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 776 count * sizeof(*up), &offp)) == NULL) { 777 pfsyncstats.pfsyncs_badlen++; 778 return; 779 } 780 781 crit_enter(); 782 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 783 i < count; i++, up++) { 784 /* check for invalid values */ 785 if (up->timeout >= PFTM_MAX || 786 up->src.state > PF_TCPS_PROXY_DST || 787 up->dst.state > PF_TCPS_PROXY_DST) { 788 if (pf_status.debug >= PF_DEBUG_MISC) 789 kprintf("pfsync_insert: " 790 "PFSYNC_ACT_UPD_C: " 791 "invalid value\n"); 792 pfsyncstats.pfsyncs_badval++; 793 continue; 794 } 795 796 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 797 id_key.creatorid = up->creatorid; 798 799 st = pf_find_state_byid(&id_key); 800 if (st == NULL) { 801 /* We don't have this state. Ask for it. */ 802 error = pfsync_request_update(up, &src); 803 if (error == ENOMEM) { 804 crit_exit(); 805 goto done; 806 } 807 update_requested = 1; 808 pfsyncstats.pfsyncs_badstate++; 809 continue; 810 } 811 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 812 sfail = 0; 813 if (sk->proto == IPPROTO_TCP) { 814 /* 815 * The state should never go backwards except 816 * for syn-proxy states. Neither should the 817 * sequence window slide backwards. 818 */ 819 if (st->src.state > up->src.state && 820 (st->src.state < PF_TCPS_PROXY_SRC || 821 up->src.state >= PF_TCPS_PROXY_SRC)) 822 sfail = 1; 823 else if (st->dst.state > up->dst.state) 824 sfail = 2; 825 else if (SEQ_GT(st->src.seqlo, 826 ntohl(up->src.seqlo))) 827 sfail = 3; 828 else if (st->dst.state >= TCPS_SYN_SENT && 829 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 830 sfail = 4; 831 } else { 832 /* 833 * Non-TCP protocol state machine always go 834 * forwards 835 */ 836 if (st->src.state > up->src.state) 837 sfail = 5; 838 else if (st->dst.state > up->dst.state) 839 sfail = 6; 840 } 841 if (sfail) { 842 if (pf_status.debug >= PF_DEBUG_MISC) 843 kprintf("pfsync: ignoring stale update " 844 "(%d) id: %016" PRIx64 " " 845 "creatorid: %08x\n", sfail, 846 be64toh(st->id), 847 ntohl(st->creatorid)); 848 pfsyncstats.pfsyncs_stale++; 849 850 /* we have a better state, send it out */ 851 if ((!stale || update_requested) && 852 sc->sc_mbuf != NULL) { 853 pfsync_sendout(sc); 854 update_requested = 0; 855 } 856 stale++; 857 if (!st->sync_flags) 858 pfsync_pack_state(PFSYNC_ACT_UPD, st, 859 PFSYNC_FLAG_STALE); 860 continue; 861 } 862 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 863 pf_state_peer_ntoh(&up->src, &st->src); 864 pf_state_peer_ntoh(&up->dst, &st->dst); 865 st->expire = ntohl(up->expire) + time_second; 866 st->timeout = up->timeout; 867 } 868 if ((update_requested || stale) && sc->sc_mbuf) 869 pfsync_sendout(sc); 870 crit_exit(); 871 break; 872 } 873 case PFSYNC_ACT_DEL_C: 874 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 875 count * sizeof(*dp), &offp)) == NULL) { 876 pfsyncstats.pfsyncs_badlen++; 877 return; 878 } 879 880 crit_enter(); 881 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 882 i < count; i++, dp++) { 883 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 884 id_key.creatorid = dp->creatorid; 885 886 st = pf_find_state_byid(&id_key); 887 if (st == NULL) { 888 pfsyncstats.pfsyncs_badstate++; 889 continue; 890 } 891 st->sync_flags |= PFSTATE_FROMSYNC; 892 pf_unlink_state(st); 893 } 894 crit_exit(); 895 break; 896 case PFSYNC_ACT_INS_F: 897 case PFSYNC_ACT_DEL_F: 898 /* not implemented */ 899 break; 900 case PFSYNC_ACT_UREQ: 901 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 902 count * sizeof(*rup), &offp)) == NULL) { 903 pfsyncstats.pfsyncs_badlen++; 904 return; 905 } 906 907 crit_enter(); 908 if (sc->sc_mbuf != NULL) 909 pfsync_sendout(sc); 910 for (i = 0, 911 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 912 i < count; i++, rup++) { 913 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 914 id_key.creatorid = rup->creatorid; 915 916 if (id_key.id == 0 && id_key.creatorid == 0) { 917 sc->sc_ureq_received = mycpu->gd_time_seconds; 918 if (sc->sc_bulk_send_next == NULL) { 919 if (++sc->sc_bulk_send_cpu >= ncpus) 920 sc->sc_bulk_send_cpu = 0; 921 sc->sc_bulk_send_next = 922 TAILQ_FIRST(&state_list[sc->sc_bulk_send_cpu]); 923 } 924 sc->sc_bulk_terminator = 925 sc->sc_bulk_send_next; 926 sc->sc_bulk_terminator_cpu = 927 sc->sc_bulk_send_cpu; 928 if (pf_status.debug >= PF_DEBUG_MISC) 929 kprintf("pfsync: received " 930 "bulk update request\n"); 931 pfsync_send_bus(sc, PFSYNC_BUS_START); 932 lwkt_reltoken(&pf_token); 933 callout_init(&sc->sc_bulk_tmo); 934 lwkt_gettoken(&pf_token); 935 } else { 936 st = pf_find_state_byid(&id_key); 937 if (st == NULL) { 938 pfsyncstats.pfsyncs_badstate++; 939 continue; 940 } 941 if (!st->sync_flags) 942 pfsync_pack_state(PFSYNC_ACT_UPD, 943 st, 0); 944 } 945 } 946 if (sc->sc_mbuf != NULL) 947 pfsync_sendout(sc); 948 crit_exit(); 949 break; 950 case PFSYNC_ACT_BUS: 951 /* If we're not waiting for a bulk update, who cares. */ 952 if (sc->sc_ureq_sent == 0) 953 break; 954 955 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 956 sizeof(*bus), &offp)) == NULL) { 957 pfsyncstats.pfsyncs_badlen++; 958 return; 959 } 960 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 961 switch (bus->status) { 962 case PFSYNC_BUS_START: 963 lwkt_reltoken(&pf_token); 964 callout_reset(&sc->sc_bulkfail_tmo, 965 pf_pool_limits[PF_LIMIT_STATES].limit / 966 (PFSYNC_BULKPACKETS * sc->sc_maxcount), 967 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 968 lwkt_gettoken(&pf_token); 969 if (pf_status.debug >= PF_DEBUG_MISC) 970 kprintf("pfsync: received bulk " 971 "update start\n"); 972 break; 973 case PFSYNC_BUS_END: 974 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= 975 sc->sc_ureq_sent) { 976 /* that's it, we're happy */ 977 sc->sc_ureq_sent = 0; 978 sc->sc_bulk_tries = 0; 979 lwkt_reltoken(&pf_token); 980 callout_stop(&sc->sc_bulkfail_tmo); 981 lwkt_gettoken(&pf_token); 982 #ifdef CARP 983 if (!pfsync_sync_ok) { 984 lwkt_reltoken(&pf_token); 985 carp_group_demote_adj(&sc->sc_if, -1); 986 lwkt_gettoken(&pf_token); 987 } 988 #endif 989 pfsync_sync_ok = 1; 990 if (pf_status.debug >= PF_DEBUG_MISC) 991 kprintf("pfsync: received valid " 992 "bulk update end\n"); 993 } else { 994 if (pf_status.debug >= PF_DEBUG_MISC) 995 kprintf("pfsync: received invalid " 996 "bulk update end: bad timestamp\n"); 997 } 998 break; 999 } 1000 break; 1001 } 1002 1003 done: 1004 if (m) 1005 m_freem(m); 1006 } 1007 1008 int 1009 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1010 struct rtentry *rt) 1011 { 1012 m_freem(m); 1013 return (0); 1014 } 1015 1016 /* ARGSUSED */ 1017 int 1018 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 1019 { 1020 struct pfsync_softc *sc = ifp->if_softc; 1021 struct ifreq *ifr = (struct ifreq *)data; 1022 struct ip_moptions *imo = &sc->sc_imo; 1023 struct pfsyncreq pfsyncr; 1024 struct ifnet *sifp; 1025 int error; 1026 1027 lwkt_gettoken(&pf_token); 1028 1029 switch (cmd) { 1030 case SIOCSIFADDR: 1031 case SIOCAIFADDR: 1032 case SIOCSIFDSTADDR: 1033 case SIOCSIFFLAGS: 1034 if (ifp->if_flags & IFF_UP) 1035 ifp->if_flags |= IFF_RUNNING; 1036 else 1037 ifp->if_flags &= ~IFF_RUNNING; 1038 break; 1039 case SIOCSIFMTU: 1040 if (ifr->ifr_mtu < PFSYNC_MINMTU) { 1041 lwkt_reltoken(&pf_token); 1042 return (EINVAL); 1043 } 1044 if (ifr->ifr_mtu > MCLBYTES) 1045 ifr->ifr_mtu = MCLBYTES; 1046 crit_enter(); 1047 if (ifr->ifr_mtu < ifp->if_mtu) 1048 pfsync_sendout(sc); 1049 pfsync_setmtu(sc, ifr->ifr_mtu); 1050 crit_exit(); 1051 break; 1052 case SIOCGETPFSYNC: 1053 bzero(&pfsyncr, sizeof(pfsyncr)); 1054 if (sc->sc_sync_ifp) 1055 strlcpy(pfsyncr.pfsyncr_syncdev, 1056 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1057 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1058 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1059 lwkt_reltoken(&pf_token); 1060 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1061 return (error); 1062 lwkt_gettoken(&pf_token); 1063 break; 1064 case SIOCSETPFSYNC: 1065 error = caps_priv_check(cr, SYSCAP_RESTRICTEDROOT | 1066 __SYSCAP_NULLCRED); 1067 if (error) { 1068 lwkt_reltoken(&pf_token); 1069 return (error); 1070 } 1071 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) { 1072 lwkt_reltoken(&pf_token); 1073 return (error); 1074 } 1075 1076 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1077 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1078 else 1079 sc->sc_sync_peer.s_addr = 1080 pfsyncr.pfsyncr_syncpeer.s_addr; 1081 1082 if (pfsyncr.pfsyncr_maxupdates > 255) { 1083 lwkt_reltoken(&pf_token); 1084 return (EINVAL); 1085 } 1086 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1087 1088 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1089 sc->sc_sync_ifp = NULL; 1090 if (sc->sc_mbuf_net != NULL) { 1091 /* Don't keep stale pfsync packets around. */ 1092 crit_enter(); 1093 m_freem(sc->sc_mbuf_net); 1094 sc->sc_mbuf_net = NULL; 1095 sc->sc_statep_net.s = NULL; 1096 crit_exit(); 1097 } 1098 if (imo->imo_num_memberships > 0) { 1099 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1100 imo->imo_multicast_ifp = NULL; 1101 } 1102 break; 1103 } 1104 1105 /* 1106 * XXX not that MPSAFE; pfsync needs serious rework 1107 */ 1108 ifnet_deserialize_all(ifp); 1109 ifnet_lock(); 1110 sifp = ifunit(pfsyncr.pfsyncr_syncdev); 1111 ifnet_unlock(); 1112 ifnet_serialize_all(ifp); 1113 1114 if (sifp == NULL) { 1115 lwkt_reltoken(&pf_token); 1116 return (EINVAL); 1117 } 1118 1119 crit_enter(); 1120 if (sifp->if_mtu < sc->sc_if.if_mtu || 1121 (sc->sc_sync_ifp != NULL && 1122 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1123 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1124 pfsync_sendout(sc); 1125 sc->sc_sync_ifp = sifp; 1126 1127 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1128 1129 if (imo->imo_num_memberships > 0) { 1130 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1131 imo->imo_multicast_ifp = NULL; 1132 } 1133 1134 if (sc->sc_sync_ifp && 1135 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1136 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1137 sc->sc_sync_ifp = NULL; 1138 lwkt_reltoken(&pf_token); 1139 crit_exit(); 1140 return (EADDRNOTAVAIL); 1141 } 1142 1143 if ((imo->imo_membership[0] = 1144 pfsync_in_addmulti(sc->sc_sync_ifp)) == NULL) { 1145 sc->sc_sync_ifp = NULL; 1146 lwkt_reltoken(&pf_token); 1147 crit_exit(); 1148 return (ENOBUFS); 1149 } 1150 imo->imo_num_memberships++; 1151 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1152 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1153 imo->imo_multicast_loop = 0; 1154 } 1155 1156 if (sc->sc_sync_ifp || 1157 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1158 /* Request a full state table update. */ 1159 sc->sc_ureq_sent = mycpu->gd_time_seconds; 1160 #ifdef CARP 1161 if (pfsync_sync_ok) 1162 carp_group_demote_adj(&sc->sc_if, 1); 1163 #endif 1164 pfsync_sync_ok = 0; 1165 if (pf_status.debug >= PF_DEBUG_MISC) 1166 kprintf("pfsync: requesting bulk update\n"); 1167 lwkt_reltoken(&pf_token); 1168 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 1169 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 1170 lwkt_gettoken(&pf_token); 1171 error = pfsync_request_update(NULL, NULL); 1172 if (error == ENOMEM) { 1173 lwkt_reltoken(&pf_token); 1174 crit_exit(); 1175 return (ENOMEM); 1176 } 1177 pfsync_sendout(sc); 1178 } 1179 crit_exit(); 1180 1181 break; 1182 1183 default: 1184 lwkt_reltoken(&pf_token); 1185 return (ENOTTY); 1186 } 1187 1188 lwkt_reltoken(&pf_token); 1189 return (0); 1190 } 1191 1192 void 1193 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1194 { 1195 int mtu; 1196 1197 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1198 mtu = sc->sc_sync_ifp->if_mtu; 1199 else 1200 mtu = mtu_req; 1201 1202 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1203 sizeof(struct pfsync_state); 1204 if (sc->sc_maxcount > 254) 1205 sc->sc_maxcount = 254; 1206 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1207 sc->sc_maxcount * sizeof(struct pfsync_state); 1208 } 1209 1210 struct mbuf * 1211 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1212 { 1213 struct pfsync_header *h; 1214 struct mbuf *m; 1215 int len; 1216 1217 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1218 1219 MGETHDR(m, M_WAITOK, MT_DATA); 1220 1221 switch (action) { 1222 case PFSYNC_ACT_CLR: 1223 len = sizeof(struct pfsync_header) + 1224 sizeof(struct pfsync_state_clr); 1225 break; 1226 case PFSYNC_ACT_UPD_C: 1227 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1228 sizeof(struct pfsync_header); 1229 break; 1230 case PFSYNC_ACT_DEL_C: 1231 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1232 sizeof(struct pfsync_header); 1233 break; 1234 case PFSYNC_ACT_UREQ: 1235 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1236 sizeof(struct pfsync_header); 1237 break; 1238 case PFSYNC_ACT_BUS: 1239 len = sizeof(struct pfsync_header) + 1240 sizeof(struct pfsync_state_bus); 1241 break; 1242 case PFSYNC_ACT_TDB_UPD: 1243 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1244 sizeof(struct pfsync_header); 1245 break; 1246 default: 1247 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1248 sizeof(struct pfsync_header); 1249 break; 1250 } 1251 1252 if (len > MHLEN) { 1253 MCLGET(m, M_WAITOK); 1254 m->m_data += rounddown2(MCLBYTES - len, sizeof(long)); 1255 } else 1256 MH_ALIGN(m, len); 1257 1258 m->m_pkthdr.rcvif = NULL; 1259 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1260 h = mtod(m, struct pfsync_header *); 1261 h->version = PFSYNC_VERSION; 1262 h->af = 0; 1263 h->count = 0; 1264 h->action = action; 1265 1266 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1267 lwkt_reltoken(&pf_token); 1268 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, 1269 LIST_FIRST(&pfsync_list)); 1270 lwkt_gettoken(&pf_token); 1271 return (m); 1272 } 1273 1274 int 1275 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1276 { 1277 struct ifnet *ifp = NULL; 1278 struct pfsync_softc *sc = pfsyncif; 1279 struct pfsync_header *h, *h_net; 1280 struct pfsync_state *sp = NULL; 1281 struct pfsync_state_upd *up = NULL; 1282 struct pfsync_state_del *dp = NULL; 1283 int ret = 0; 1284 u_int8_t i = 255, newaction = 0; 1285 1286 if (sc == NULL) 1287 return (0); 1288 ifp = &sc->sc_if; 1289 1290 /* 1291 * If a packet falls in the forest and there's nobody around to 1292 * hear, does it make a sound? 1293 */ 1294 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1295 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1296 /* Don't leave any stale pfsync packets hanging around. */ 1297 if (sc->sc_mbuf != NULL) { 1298 m_freem(sc->sc_mbuf); 1299 sc->sc_mbuf = NULL; 1300 sc->sc_statep.s = NULL; 1301 } 1302 return (0); 1303 } 1304 1305 if (action >= PFSYNC_ACT_MAX) 1306 return (EINVAL); 1307 1308 crit_enter(); 1309 if (sc->sc_mbuf == NULL) { 1310 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1311 (void *)&sc->sc_statep.s)) == NULL) { 1312 crit_exit(); 1313 return (ENOMEM); 1314 } 1315 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1316 } else { 1317 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1318 if (h->action != action) { 1319 pfsync_sendout(sc); 1320 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1321 (void *)&sc->sc_statep.s)) == NULL) { 1322 crit_exit(); 1323 return (ENOMEM); 1324 } 1325 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1326 } else { 1327 /* 1328 * If it's an update, look in the packet to see if 1329 * we already have an update for the state. 1330 */ 1331 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1332 struct pfsync_state *usp = 1333 (void *)((char *)h + PFSYNC_HDRLEN); 1334 1335 for (i = 0; i < h->count; i++) { 1336 if (!memcmp(usp->id, &st->id, 1337 PFSYNC_ID_LEN) && 1338 usp->creatorid == st->creatorid) { 1339 sp = usp; 1340 sp->updates++; 1341 break; 1342 } 1343 usp++; 1344 } 1345 } 1346 } 1347 } 1348 1349 st->pfsync_time = mycpu->gd_time_seconds; 1350 1351 if (sp == NULL) { 1352 /* not a "duplicate" update */ 1353 i = 255; 1354 sp = sc->sc_statep.s++; 1355 sc->sc_mbuf->m_pkthdr.len = 1356 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1357 h->count++; 1358 bzero(sp, sizeof(*sp)); 1359 1360 pfsync_state_export(sp, st); 1361 1362 if (flags & PFSYNC_FLAG_STALE) 1363 sp->sync_flags |= PFSTATE_STALE; 1364 } else { 1365 pf_state_peer_hton(&st->src, &sp->src); 1366 pf_state_peer_hton(&st->dst, &sp->dst); 1367 1368 if (st->expire <= time_second) 1369 sp->expire = htonl(0); 1370 else 1371 sp->expire = htonl(st->expire - time_second); 1372 } 1373 1374 /* do we need to build "compressed" actions for network transfer? */ 1375 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1376 switch (action) { 1377 case PFSYNC_ACT_UPD: 1378 newaction = PFSYNC_ACT_UPD_C; 1379 break; 1380 case PFSYNC_ACT_DEL: 1381 newaction = PFSYNC_ACT_DEL_C; 1382 break; 1383 default: 1384 /* by default we just send the uncompressed states */ 1385 break; 1386 } 1387 } 1388 1389 if (newaction) { 1390 if (sc->sc_mbuf_net == NULL) { 1391 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1392 (void *)&sc->sc_statep_net.s)) == NULL) { 1393 crit_exit(); 1394 return (ENOMEM); 1395 } 1396 } 1397 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1398 1399 switch (newaction) { 1400 case PFSYNC_ACT_UPD_C: 1401 if (i != 255) { 1402 up = (void *)((char *)h_net + 1403 PFSYNC_HDRLEN + (i * sizeof(*up))); 1404 up->updates++; 1405 } else { 1406 h_net->count++; 1407 sc->sc_mbuf_net->m_pkthdr.len = 1408 sc->sc_mbuf_net->m_len += sizeof(*up); 1409 up = sc->sc_statep_net.u++; 1410 1411 bzero(up, sizeof(*up)); 1412 bcopy(&st->id, up->id, sizeof(up->id)); 1413 up->creatorid = st->creatorid; 1414 } 1415 up->timeout = st->timeout; 1416 up->expire = sp->expire; 1417 up->src = sp->src; 1418 up->dst = sp->dst; 1419 break; 1420 case PFSYNC_ACT_DEL_C: 1421 sc->sc_mbuf_net->m_pkthdr.len = 1422 sc->sc_mbuf_net->m_len += sizeof(*dp); 1423 dp = sc->sc_statep_net.d++; 1424 h_net->count++; 1425 1426 bzero(dp, sizeof(*dp)); 1427 bcopy(&st->id, dp->id, sizeof(dp->id)); 1428 dp->creatorid = st->creatorid; 1429 break; 1430 } 1431 } 1432 1433 if (h->count == sc->sc_maxcount || 1434 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1435 ret = pfsync_sendout(sc); 1436 1437 crit_exit(); 1438 return (ret); 1439 } 1440 1441 int 1442 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1443 { 1444 struct pfsync_header *h; 1445 struct pfsync_softc *sc = pfsyncif; 1446 struct pfsync_state_upd_req *rup; 1447 int ret = 0; 1448 1449 if (sc == NULL) 1450 return (0); 1451 1452 if (sc->sc_mbuf == NULL) { 1453 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1454 (void *)&sc->sc_statep.s)) == NULL) 1455 return (ENOMEM); 1456 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1457 } else { 1458 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1459 if (h->action != PFSYNC_ACT_UREQ) { 1460 pfsync_sendout(sc); 1461 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1462 (void *)&sc->sc_statep.s)) == NULL) 1463 return (ENOMEM); 1464 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1465 } 1466 } 1467 1468 if (src != NULL) 1469 sc->sc_sendaddr = *src; 1470 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1471 h->count++; 1472 rup = sc->sc_statep.r++; 1473 bzero(rup, sizeof(*rup)); 1474 if (up != NULL) { 1475 bcopy(up->id, rup->id, sizeof(rup->id)); 1476 rup->creatorid = up->creatorid; 1477 } 1478 1479 if (h->count == sc->sc_maxcount) 1480 ret = pfsync_sendout(sc); 1481 1482 return (ret); 1483 } 1484 1485 int 1486 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1487 { 1488 struct pfsync_softc *sc = pfsyncif; 1489 struct pfsync_state_clr *cp; 1490 int ret; 1491 1492 if (sc == NULL) 1493 return (0); 1494 1495 crit_enter(); 1496 if (sc->sc_mbuf != NULL) 1497 pfsync_sendout(sc); 1498 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1499 (void *)&sc->sc_statep.c)) == NULL) { 1500 crit_exit(); 1501 return (ENOMEM); 1502 } 1503 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1504 cp = sc->sc_statep.c; 1505 cp->creatorid = creatorid; 1506 if (ifname != NULL) 1507 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1508 1509 ret = (pfsync_sendout(sc)); 1510 crit_exit(); 1511 return (ret); 1512 } 1513 1514 void 1515 pfsync_timeout(void *v) 1516 { 1517 struct pfsync_softc *sc = v; 1518 1519 crit_enter(); 1520 pfsync_sendout(sc); 1521 crit_exit(); 1522 } 1523 1524 void 1525 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1526 { 1527 struct pfsync_state_bus *bus; 1528 1529 if (sc->sc_mbuf != NULL) 1530 pfsync_sendout(sc); 1531 1532 if (pfsync_sync_ok && 1533 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1534 (void *)&sc->sc_statep.b)) != NULL) { 1535 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1536 bus = sc->sc_statep.b; 1537 bus->creatorid = pf_status.hostid; 1538 bus->status = status; 1539 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); 1540 pfsync_sendout(sc); 1541 } 1542 } 1543 1544 void 1545 pfsync_bulk_update(void *v) 1546 { 1547 struct pfsync_softc *sc = v; 1548 int i = 0; 1549 int cpu; 1550 struct pf_state *state; 1551 1552 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1553 1554 crit_enter(); 1555 if (sc->sc_mbuf != NULL) 1556 pfsync_sendout(sc); 1557 1558 /* 1559 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1560 * been sent since the latest request was made. 1561 */ 1562 state = sc->sc_bulk_send_next; 1563 cpu = sc->sc_bulk_send_cpu; 1564 if (state) 1565 do { 1566 /* send state update if syncable and not already sent */ 1567 if (!state->sync_flags 1568 && state->timeout < PFTM_MAX 1569 && state->pfsync_time <= sc->sc_ureq_received) { 1570 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1571 i++; 1572 } 1573 1574 /* figure next state to send */ 1575 state = TAILQ_NEXT(state, entry_list); 1576 1577 /* wrap to start of list if we hit the end */ 1578 if (state == NULL) { 1579 if (++cpu >= ncpus) 1580 cpu = 0; 1581 state = TAILQ_FIRST(&state_list[cpu]); 1582 } 1583 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1584 cpu != sc->sc_bulk_terminator_cpu && 1585 state != sc->sc_bulk_terminator); 1586 1587 if (state == NULL || (cpu == sc->sc_bulk_terminator_cpu && 1588 state == sc->sc_bulk_terminator)) { 1589 /* we're done */ 1590 pfsync_send_bus(sc, PFSYNC_BUS_END); 1591 sc->sc_ureq_received = 0; 1592 sc->sc_bulk_send_next = NULL; 1593 sc->sc_bulk_terminator = NULL; 1594 sc->sc_bulk_send_cpu = 0; 1595 sc->sc_bulk_terminator_cpu = 0; 1596 lwkt_reltoken(&pf_token); 1597 callout_stop(&sc->sc_bulk_tmo); 1598 lwkt_gettoken(&pf_token); 1599 if (pf_status.debug >= PF_DEBUG_MISC) 1600 kprintf("pfsync: bulk update complete\n"); 1601 } else { 1602 /* look again for more in a bit */ 1603 lwkt_reltoken(&pf_token); 1604 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, 1605 LIST_FIRST(&pfsync_list)); 1606 lwkt_gettoken(&pf_token); 1607 sc->sc_bulk_send_next = state; 1608 sc->sc_bulk_send_cpu = cpu; 1609 } 1610 if (sc->sc_mbuf != NULL) 1611 pfsync_sendout(sc); 1612 crit_exit(); 1613 } 1614 1615 void 1616 pfsync_bulkfail(void *v) 1617 { 1618 struct pfsync_softc *sc = v; 1619 int error; 1620 1621 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1622 1623 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1624 /* Try again in a bit */ 1625 lwkt_reltoken(&pf_token); 1626 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, 1627 LIST_FIRST(&pfsync_list)); 1628 lwkt_gettoken(&pf_token); 1629 crit_enter(); 1630 error = pfsync_request_update(NULL, NULL); 1631 if (error == ENOMEM) { 1632 if (pf_status.debug >= PF_DEBUG_MISC) 1633 kprintf("pfsync: cannot allocate mbufs for " 1634 "bulk update\n"); 1635 } else 1636 pfsync_sendout(sc); 1637 crit_exit(); 1638 } else { 1639 /* Pretend like the transfer was ok */ 1640 sc->sc_ureq_sent = 0; 1641 sc->sc_bulk_tries = 0; 1642 #ifdef CARP 1643 if (!pfsync_sync_ok) 1644 carp_group_demote_adj(&sc->sc_if, -1); 1645 #endif 1646 pfsync_sync_ok = 1; 1647 if (pf_status.debug >= PF_DEBUG_MISC) 1648 kprintf("pfsync: failed to receive " 1649 "bulk update status\n"); 1650 lwkt_reltoken(&pf_token); 1651 callout_stop(&sc->sc_bulkfail_tmo); 1652 lwkt_gettoken(&pf_token); 1653 } 1654 } 1655 1656 static void 1657 pfsync_sendout_handler(netmsg_t nmsg) 1658 { 1659 struct netmsg_genpkt *msg = (struct netmsg_genpkt *)nmsg; 1660 1661 pfsync_sendout_mbuf(msg->arg1, msg->m); 1662 } 1663 1664 int 1665 pfsync_sendout(struct pfsync_softc *sc) 1666 { 1667 #if NBPF > 0 1668 struct ifnet *ifp = &sc->sc_if; 1669 #endif 1670 struct mbuf *m; 1671 struct netmsg_genpkt *msg; 1672 1673 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1674 1675 lwkt_reltoken(&pf_token); 1676 callout_stop(&sc->sc_tmo); 1677 lwkt_gettoken(&pf_token); 1678 1679 if (sc->sc_mbuf == NULL) 1680 return (0); 1681 m = sc->sc_mbuf; 1682 sc->sc_mbuf = NULL; 1683 sc->sc_statep.s = NULL; 1684 1685 #if NBPF > 0 1686 if (ifp->if_bpf) { 1687 bpf_gettoken(); 1688 if (ifp->if_bpf) 1689 bpf_mtap(ifp->if_bpf, m); 1690 bpf_reltoken(); 1691 } 1692 #endif 1693 1694 if (sc->sc_mbuf_net) { 1695 m_freem(m); 1696 m = sc->sc_mbuf_net; 1697 sc->sc_mbuf_net = NULL; 1698 sc->sc_statep_net.s = NULL; 1699 } 1700 1701 msg = &m->m_hdr.mh_genmsg; 1702 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 1703 pfsync_sendout_handler); 1704 msg->m = m; 1705 msg->arg1 = sc; 1706 netisr_sendmsg(&msg->base, 0); 1707 1708 return (0); 1709 } 1710 1711 int 1712 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1713 { 1714 struct sockaddr sa; 1715 struct ip *ip; 1716 1717 if (sc->sc_sync_ifp || 1718 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1719 M_PREPEND(m, sizeof(struct ip), M_WAITOK); 1720 ip = mtod(m, struct ip *); 1721 ip->ip_v = IPVERSION; 1722 ip->ip_hl = sizeof(*ip) >> 2; 1723 ip->ip_tos = IPTOS_LOWDELAY; 1724 ip->ip_len = htons(m->m_pkthdr.len); 1725 ip->ip_id = htons(ip_randomid()); 1726 ip->ip_off = htons(IP_DF); 1727 ip->ip_ttl = PFSYNC_DFLTTL; 1728 ip->ip_p = IPPROTO_PFSYNC; 1729 ip->ip_sum = 0; 1730 1731 bzero(&sa, sizeof(sa)); 1732 ip->ip_src.s_addr = INADDR_ANY; 1733 1734 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1735 m->m_flags |= M_MCAST; 1736 ip->ip_dst = sc->sc_sendaddr; 1737 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1738 1739 pfsyncstats.pfsyncs_opackets++; 1740 1741 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1742 pfsyncstats.pfsyncs_oerrors++; 1743 } else 1744 m_freem(m); 1745 1746 return (0); 1747 } 1748 1749 static int 1750 pfsync_modevent(module_t mod, int type, void *data) 1751 { 1752 int error = 0; 1753 1754 struct pfsync_softc *pfs_if, *tmp; 1755 1756 lwkt_gettoken(&pf_token); 1757 1758 switch (type) { 1759 case MOD_LOAD: 1760 LIST_INIT(&pfsync_list); 1761 lwkt_reltoken(&pf_token); 1762 if_clone_attach(&pfsync_cloner); 1763 lwkt_gettoken(&pf_token); 1764 /* Override the function pointer for pf_ioctl.c */ 1765 break; 1766 1767 case MOD_UNLOAD: 1768 lwkt_reltoken(&pf_token); 1769 if_clone_detach(&pfsync_cloner); 1770 lwkt_gettoken(&pf_token); 1771 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) { 1772 pfsync_clone_destroy(&pfs_if->sc_if); 1773 } 1774 break; 1775 1776 default: 1777 error = EINVAL; 1778 break; 1779 } 1780 1781 lwkt_reltoken(&pf_token); 1782 return error; 1783 } 1784 1785 static moduledata_t pfsync_mod = { 1786 "pfsync", 1787 pfsync_modevent, 1788 0 1789 }; 1790 1791 #define PFSYNC_MODVER 44 1792 1793 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1794 MODULE_VERSION(pfsync, PFSYNC_MODVER); 1795 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 1796 1797 static void 1798 pfsync_in_addmulti_dispatch(netmsg_t nmsg) 1799 { 1800 struct lwkt_msg *lmsg = &nmsg->lmsg; 1801 struct ifnet *ifp = lmsg->u.ms_resultp; 1802 struct in_addr addr; 1803 1804 addr.s_addr = INADDR_PFSYNC_GROUP; 1805 lmsg->u.ms_resultp = in_addmulti(&addr, ifp); 1806 1807 lwkt_replymsg(lmsg, 0); 1808 } 1809 1810 static struct in_multi * 1811 pfsync_in_addmulti(struct ifnet *ifp) 1812 { 1813 struct netmsg_base nmsg; 1814 struct lwkt_msg *lmsg = &nmsg.lmsg; 1815 1816 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1817 pfsync_in_addmulti_dispatch); 1818 lmsg->u.ms_resultp = ifp; 1819 1820 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1821 return lmsg->u.ms_resultp; 1822 } 1823 1824 static void 1825 pfsync_in_delmulti_dispatch(netmsg_t nmsg) 1826 { 1827 struct lwkt_msg *lmsg = &nmsg->lmsg; 1828 1829 in_delmulti(lmsg->u.ms_resultp); 1830 lwkt_replymsg(lmsg, 0); 1831 } 1832 1833 static void 1834 pfsync_in_delmulti(struct in_multi *inm) 1835 { 1836 struct netmsg_base nmsg; 1837 struct lwkt_msg *lmsg = &nmsg.lmsg; 1838 1839 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1840 pfsync_in_delmulti_dispatch); 1841 lmsg->u.ms_resultp = inm; 1842 1843 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1844 } 1845