1 /* 2 * Copyright (c) 2002 Michael Shalayeff 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 23 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 24 * THE POSSIBILITY OF SUCH DAMAGE. 25 * 26 * $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ 27 */ 28 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_carp.h" 32 #include "use_bpf.h" 33 34 #include <sys/param.h> 35 #include <sys/endian.h> 36 #include <sys/proc.h> 37 #include <sys/priv.h> 38 #include <sys/systm.h> 39 #include <sys/time.h> 40 #include <sys/mbuf.h> 41 #include <sys/socket.h> 42 #include <sys/kernel.h> 43 #include <sys/malloc.h> 44 #include <sys/module.h> 45 #include <sys/msgport2.h> 46 #include <sys/sockio.h> 47 #include <sys/thread2.h> 48 49 #include <machine/inttypes.h> 50 51 #include <net/if.h> 52 #include <net/if_types.h> 53 #include <net/ifq_var.h> 54 #include <net/route.h> 55 #include <net/bpf.h> 56 #include <net/netisr2.h> 57 #include <net/netmsg2.h> 58 #include <netinet/in.h> 59 #include <netinet/if_ether.h> 60 #include <netinet/ip_carp.h> 61 #include <netinet/tcp.h> 62 #include <netinet/tcp_seq.h> 63 64 #ifdef INET 65 #include <netinet/in_systm.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip.h> 68 #include <netinet/ip_var.h> 69 #endif 70 71 #ifdef INET6 72 #include <netinet6/nd6.h> 73 #endif /* INET6 */ 74 75 #include <net/pf/pfvar.h> 76 #include <net/pf/if_pfsync.h> 77 78 #define PFSYNCNAME "pfsync" 79 80 #define PFSYNC_MINMTU \ 81 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 82 83 #ifdef PFSYNCDEBUG 84 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) 85 int pfsyncdebug; 86 #else 87 #define DPRINTF(x) 88 #endif 89 90 struct pfsync_softc *pfsyncif = NULL; 91 struct pfsyncstats pfsyncstats; 92 93 void pfsyncattach(int); 94 static int pfsync_clone_destroy(struct ifnet *); 95 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 96 void pfsync_setmtu(struct pfsync_softc *, int); 97 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 98 struct pf_state_peer *); 99 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 100 struct rtentry *); 101 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 102 void pfsyncstart(struct ifnet *, struct ifaltq_subque *); 103 104 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 105 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 106 int pfsync_sendout(struct pfsync_softc *); 107 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 108 void pfsync_timeout(void *); 109 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 110 void pfsync_bulk_update(void *); 111 void pfsync_bulkfail(void *); 112 113 static struct in_multi *pfsync_in_addmulti(struct ifnet *); 114 static void pfsync_in_delmulti(struct in_multi *); 115 116 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); 117 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; 118 119 int pfsync_sync_ok; 120 121 struct if_clone pfsync_cloner = 122 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1); 123 124 void 125 pfsyncattach(int npfsync) 126 { 127 if_clone_attach(&pfsync_cloner); 128 } 129 static int 130 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 131 { 132 struct pfsync_softc *sc; 133 struct ifnet *ifp; 134 135 lwkt_gettoken(&pf_token); 136 137 pfsync_sync_ok = 1; 138 139 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO); 140 sc->sc_mbuf = NULL; 141 sc->sc_mbuf_net = NULL; 142 sc->sc_mbuf_tdb = NULL; 143 sc->sc_statep.s = NULL; 144 sc->sc_statep_net.s = NULL; 145 sc->sc_statep_tdb.t = NULL; 146 sc->sc_maxupdates = 128; 147 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP); 148 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); 149 sc->sc_ureq_received = 0; 150 sc->sc_ureq_sent = 0; 151 sc->sc_bulk_send_next = NULL; 152 sc->sc_bulk_terminator = NULL; 153 sc->sc_bulk_send_cpu = 0; 154 sc->sc_bulk_terminator_cpu = 0; 155 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; 156 lwkt_reltoken(&pf_token); 157 158 ifp = &sc->sc_if; 159 if_initname(ifp, ifc->ifc_name, unit); 160 ifp->if_ioctl = pfsyncioctl; 161 ifp->if_output = pfsyncoutput; 162 ifp->if_start = pfsyncstart; 163 ifp->if_type = IFT_PFSYNC; 164 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 165 ifp->if_hdrlen = PFSYNC_HDRLEN; 166 ifp->if_baudrate = IF_Mbps(100); 167 ifp->if_softc = sc; 168 169 pfsync_setmtu(sc, MCLBYTES); 170 callout_init(&sc->sc_tmo); 171 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 172 callout_init(&sc->sc_bulk_tmo); 173 callout_init(&sc->sc_bulkfail_tmo); 174 175 if_attach(ifp, NULL); 176 #if NBPF > 0 177 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 178 #endif 179 180 #ifdef CARP 181 if_addgroup(ifp, "carp"); 182 #endif 183 184 lwkt_gettoken(&pf_token); 185 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); 186 lwkt_reltoken(&pf_token); 187 188 return (0); 189 } 190 191 static int 192 pfsync_clone_destroy(struct ifnet *ifp) 193 { 194 struct netmsg_base msg; 195 196 lwkt_gettoken(&pf_token); 197 lwkt_reltoken(&pf_token); 198 199 struct pfsync_softc *sc = ifp->if_softc; 200 callout_stop(&sc->sc_tmo); 201 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 202 callout_stop(&sc->sc_bulk_tmo); 203 callout_stop(&sc->sc_bulkfail_tmo); 204 #ifdef CARP 205 if (!pfsync_sync_ok) 206 carp_group_demote_adj(&sc->sc_if, -1); 207 #endif 208 209 /* Unpend async sendouts. */ 210 netmsg_init(&msg, NULL, &curthread->td_msgport, 0, netmsg_sync_handler); 211 netisr_domsg(&msg, 0); 212 213 #if NBPF > 0 214 bpfdetach(ifp); 215 #endif 216 if_detach(ifp); 217 lwkt_gettoken(&pf_token); 218 LIST_REMOVE(sc, sc_next); 219 kfree(sc, M_PFSYNC); 220 lwkt_reltoken(&pf_token); 221 222 return 0; 223 } 224 225 /* 226 * Start output on the pfsync interface. 227 */ 228 void 229 pfsyncstart(struct ifnet *ifp, struct ifaltq_subque *ifsq) 230 { 231 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 232 ifsq_purge(ifsq); 233 } 234 235 int 236 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 237 struct pf_state_peer *d) 238 { 239 if (s->scrub.scrub_flag && d->scrub == NULL) { 240 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO); 241 242 if (d->scrub == NULL) 243 return (ENOMEM); 244 } 245 246 return (0); 247 } 248 249 void 250 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 251 { 252 bzero(sp, sizeof(struct pfsync_state)); 253 254 /* copy from state key */ 255 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 256 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 257 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 258 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 259 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 260 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 261 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 262 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 263 sp->proto = st->key[PF_SK_WIRE]->proto; 264 sp->af = st->key[PF_SK_WIRE]->af; 265 266 /* copy from state */ 267 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 268 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 269 sp->creation = htonl(time_second - st->creation); 270 sp->expire = pf_state_expires(st); 271 if (sp->expire <= time_second) 272 sp->expire = htonl(0); 273 else 274 sp->expire = htonl(sp->expire - time_second); 275 276 sp->direction = st->direction; 277 sp->log = st->log; 278 sp->cpuid = st->cpuid; 279 sp->pickup_mode = st->pickup_mode; 280 sp->timeout = st->timeout; 281 sp->state_flags = st->state_flags; 282 if (st->src_node) 283 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 284 if (st->nat_src_node) 285 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 286 287 bcopy(&st->id, &sp->id, sizeof(sp->id)); 288 sp->creatorid = st->creatorid; 289 pf_state_peer_hton(&st->src, &sp->src); 290 pf_state_peer_hton(&st->dst, &sp->dst); 291 292 if (st->rule.ptr == NULL) 293 sp->rule = htonl(-1); 294 else 295 sp->rule = htonl(st->rule.ptr->nr); 296 if (st->anchor.ptr == NULL) 297 sp->anchor = htonl(-1); 298 else 299 sp->anchor = htonl(st->anchor.ptr->nr); 300 if (st->nat_rule.ptr == NULL) 301 sp->nat_rule = htonl(-1); 302 else 303 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 304 305 pf_state_counter_hton(st->packets[0], sp->packets[0]); 306 pf_state_counter_hton(st->packets[1], sp->packets[1]); 307 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 308 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 309 310 } 311 312 int 313 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 314 { 315 struct pf_state *st = NULL; 316 struct pf_state_key *skw = NULL, *sks = NULL; 317 struct pf_rule *r = NULL; 318 struct pfi_kif *kif; 319 int pool_flags; 320 int error; 321 322 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 323 kprintf("pfsync_insert_net_state: invalid creator id:" 324 " %08x\n", ntohl(sp->creatorid)); 325 return (EINVAL); 326 } 327 328 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 329 if (pf_status.debug >= PF_DEBUG_MISC) 330 kprintf("pfsync_insert_net_state: " 331 "unknown interface: %s\n", sp->ifname); 332 if (flags & PFSYNC_SI_IOCTL) 333 return (EINVAL); 334 return (0); /* skip this state */ 335 } 336 337 /* 338 * If the ruleset checksums match or the state is coming from the ioctl, 339 * it's safe to associate the state with the rule of that number. 340 */ 341 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 342 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 343 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 344 r = pf_main_ruleset.rules[ 345 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 346 else 347 r = &pf_default_rule; 348 349 if ((r->max_states && r->states_cur >= r->max_states)) 350 goto cleanup; 351 352 if (flags & PFSYNC_SI_IOCTL) 353 pool_flags = M_WAITOK | M_NULLOK | M_ZERO; 354 else 355 pool_flags = M_WAITOK | M_ZERO; 356 357 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL) 358 goto cleanup; 359 lockinit(&st->lk, "pfstlk", 0, 0); 360 361 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 362 goto cleanup; 363 364 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 365 &sp->key[PF_SK_STACK].addr[0], sp->af) || 366 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 367 &sp->key[PF_SK_STACK].addr[1], sp->af) || 368 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 369 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 370 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 371 goto cleanup; 372 } else 373 sks = skw; 374 375 /* allocate memory for scrub info */ 376 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 377 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 378 goto cleanup; 379 380 /* copy to state key(s) */ 381 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 382 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 383 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 384 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 385 skw->proto = sp->proto; 386 skw->af = sp->af; 387 if (sks != skw) { 388 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 389 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 390 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 391 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 392 sks->proto = sp->proto; 393 sks->af = sp->af; 394 } 395 396 /* copy to state */ 397 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 398 st->creation = time_second - ntohl(sp->creation); 399 st->expire = time_second; 400 if (sp->expire) { 401 /* XXX No adaptive scaling. */ 402 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 403 } 404 405 st->expire = ntohl(sp->expire) + time_second; 406 st->direction = sp->direction; 407 st->log = sp->log; 408 st->timeout = sp->timeout; 409 st->state_flags = sp->state_flags; 410 if (!(flags & PFSYNC_SI_IOCTL)) 411 st->sync_flags = PFSTATE_FROMSYNC; 412 413 bcopy(sp->id, &st->id, sizeof(st->id)); 414 st->creatorid = sp->creatorid; 415 pf_state_peer_ntoh(&sp->src, &st->src); 416 pf_state_peer_ntoh(&sp->dst, &st->dst); 417 418 st->rule.ptr = r; 419 st->nat_rule.ptr = NULL; 420 st->anchor.ptr = NULL; 421 st->rt_kif = NULL; 422 423 st->pfsync_time = 0; 424 425 426 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 427 r->states_cur++; 428 r->states_tot++; 429 430 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 431 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 432 r->states_cur--; 433 goto cleanup_state; 434 } 435 436 return (0); 437 438 cleanup: 439 error = ENOMEM; 440 if (skw == sks) 441 sks = NULL; 442 if (skw != NULL) 443 kfree(skw, M_PFSYNC); 444 if (sks != NULL) 445 kfree(sks, M_PFSYNC); 446 447 cleanup_state: /* pf_state_insert frees the state keys */ 448 if (st) { 449 if (st->dst.scrub) 450 kfree(st->dst.scrub, M_PFSYNC); 451 if (st->src.scrub) 452 kfree(st->src.scrub, M_PFSYNC); 453 kfree(st, M_PFSYNC); 454 } 455 return (error); 456 } 457 458 void 459 pfsync_input(struct mbuf *m, ...) 460 { 461 struct ip *ip = mtod(m, struct ip *); 462 struct pfsync_header *ph; 463 struct pfsync_softc *sc = pfsyncif; 464 struct pf_state *st; 465 struct pf_state_key *sk; 466 struct pf_state_item *si; 467 struct pf_state_cmp id_key; 468 struct pfsync_state *sp; 469 struct pfsync_state_upd *up; 470 struct pfsync_state_del *dp; 471 struct pfsync_state_clr *cp; 472 struct pfsync_state_upd_req *rup; 473 struct pfsync_state_bus *bus; 474 struct in_addr src; 475 struct mbuf *mp; 476 int iplen, action, error, i, count, offp, sfail, stale = 0; 477 u_int8_t flags = 0; 478 479 /* This function is not yet called from anywhere */ 480 /* Still we assume for safety that pf_token must be held */ 481 ASSERT_LWKT_TOKEN_HELD(&pf_token); 482 483 pfsyncstats.pfsyncs_ipackets++; 484 485 /* verify that we have a sync interface configured */ 486 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 487 goto done; 488 489 /* verify that the packet came in on the right interface */ 490 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 491 pfsyncstats.pfsyncs_badif++; 492 goto done; 493 } 494 495 /* verify that the IP TTL is 255. */ 496 if (ip->ip_ttl != PFSYNC_DFLTTL) { 497 pfsyncstats.pfsyncs_badttl++; 498 goto done; 499 } 500 501 iplen = ip->ip_hl << 2; 502 503 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 504 pfsyncstats.pfsyncs_hdrops++; 505 goto done; 506 } 507 508 if (iplen + sizeof(*ph) > m->m_len) { 509 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 510 pfsyncstats.pfsyncs_hdrops++; 511 goto done; 512 } 513 ip = mtod(m, struct ip *); 514 } 515 ph = (struct pfsync_header *)((char *)ip + iplen); 516 517 /* verify the version */ 518 if (ph->version != PFSYNC_VERSION) { 519 pfsyncstats.pfsyncs_badver++; 520 goto done; 521 } 522 523 action = ph->action; 524 count = ph->count; 525 526 /* make sure it's a valid action code */ 527 if (action >= PFSYNC_ACT_MAX) { 528 pfsyncstats.pfsyncs_badact++; 529 goto done; 530 } 531 532 /* Cheaper to grab this now than having to mess with mbufs later */ 533 src = ip->ip_src; 534 535 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 536 flags |= PFSYNC_SI_CKSUM; 537 538 switch (action) { 539 case PFSYNC_ACT_CLR: { 540 struct pf_state *nexts; 541 struct pf_state_key *nextsk; 542 struct pfi_kif *kif; 543 globaldata_t save_gd = mycpu; 544 int nn; 545 546 u_int32_t creatorid; 547 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 548 sizeof(*cp), &offp)) == NULL) { 549 pfsyncstats.pfsyncs_badlen++; 550 return; 551 } 552 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 553 creatorid = cp->creatorid; 554 555 crit_enter(); 556 if (cp->ifname[0] == '\0') { 557 lwkt_gettoken(&pf_token); 558 for (nn = 0; nn < ncpus; ++nn) { 559 lwkt_setcpu_self(globaldata_find(nn)); 560 for (st = RB_MIN(pf_state_tree_id, 561 &tree_id[nn]); 562 st; st = nexts) { 563 nexts = RB_NEXT(pf_state_tree_id, 564 &tree_id[n], st); 565 if (st->creatorid == creatorid) { 566 st->sync_flags |= 567 PFSTATE_FROMSYNC; 568 pf_unlink_state(st); 569 } 570 } 571 } 572 lwkt_setcpu_self(save_gd); 573 lwkt_reltoken(&pf_token); 574 } else { 575 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 576 crit_exit(); 577 return; 578 } 579 /* XXX correct? */ 580 lwkt_gettoken(&pf_token); 581 for (nn = 0; nn < ncpus; ++nn) { 582 lwkt_setcpu_self(globaldata_find(nn)); 583 for (sk = RB_MIN(pf_state_tree, 584 &pf_statetbl[nn]); 585 sk; 586 sk = nextsk) { 587 nextsk = RB_NEXT(pf_state_tree, 588 &pf_statetbl[n], sk); 589 TAILQ_FOREACH(si, &sk->states, entry) { 590 if (si->s->creatorid == 591 creatorid) { 592 si->s->sync_flags |= 593 PFSTATE_FROMSYNC; 594 pf_unlink_state(si->s); 595 } 596 } 597 } 598 } 599 lwkt_setcpu_self(save_gd); 600 lwkt_reltoken(&pf_token); 601 } 602 crit_exit(); 603 604 break; 605 } 606 case PFSYNC_ACT_INS: 607 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 608 count * sizeof(*sp), &offp)) == NULL) { 609 pfsyncstats.pfsyncs_badlen++; 610 return; 611 } 612 613 crit_enter(); 614 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 615 i < count; i++, sp++) { 616 /* check for invalid values */ 617 if (sp->timeout >= PFTM_MAX || 618 sp->src.state > PF_TCPS_PROXY_DST || 619 sp->dst.state > PF_TCPS_PROXY_DST || 620 sp->direction > PF_OUT || 621 (sp->af != AF_INET && sp->af != AF_INET6)) { 622 if (pf_status.debug >= PF_DEBUG_MISC) 623 kprintf("pfsync_insert: PFSYNC_ACT_INS: " 624 "invalid value\n"); 625 pfsyncstats.pfsyncs_badval++; 626 continue; 627 } 628 629 if ((error = pfsync_state_import(sp, flags))) { 630 if (error == ENOMEM) { 631 crit_exit(); 632 goto done; 633 } 634 } 635 } 636 crit_exit(); 637 break; 638 case PFSYNC_ACT_UPD: 639 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 640 count * sizeof(*sp), &offp)) == NULL) { 641 pfsyncstats.pfsyncs_badlen++; 642 return; 643 } 644 645 crit_enter(); 646 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 647 i < count; i++, sp++) { 648 int flags = PFSYNC_FLAG_STALE; 649 650 /* check for invalid values */ 651 if (sp->timeout >= PFTM_MAX || 652 sp->src.state > PF_TCPS_PROXY_DST || 653 sp->dst.state > PF_TCPS_PROXY_DST) { 654 if (pf_status.debug >= PF_DEBUG_MISC) 655 kprintf("pfsync_insert: PFSYNC_ACT_UPD: " 656 "invalid value\n"); 657 pfsyncstats.pfsyncs_badval++; 658 continue; 659 } 660 661 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 662 id_key.creatorid = sp->creatorid; 663 664 st = pf_find_state_byid(&id_key); 665 if (st == NULL) { 666 /* insert the update */ 667 if (pfsync_state_import(sp, flags)) 668 pfsyncstats.pfsyncs_badstate++; 669 continue; 670 } 671 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 672 sfail = 0; 673 if (sk->proto == IPPROTO_TCP) { 674 /* 675 * The state should never go backwards except 676 * for syn-proxy states. Neither should the 677 * sequence window slide backwards. 678 */ 679 if (st->src.state > sp->src.state && 680 (st->src.state < PF_TCPS_PROXY_SRC || 681 sp->src.state >= PF_TCPS_PROXY_SRC)) 682 sfail = 1; 683 else if (SEQ_GT(st->src.seqlo, 684 ntohl(sp->src.seqlo))) 685 sfail = 3; 686 else if (st->dst.state > sp->dst.state) { 687 /* There might still be useful 688 * information about the src state here, 689 * so import that part of the update, 690 * then "fail" so we send the updated 691 * state back to the peer who is missing 692 * our what we know. */ 693 pf_state_peer_ntoh(&sp->src, &st->src); 694 /* XXX do anything with timeouts? */ 695 sfail = 7; 696 flags = 0; 697 } else if (st->dst.state >= TCPS_SYN_SENT && 698 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 699 sfail = 4; 700 } else { 701 /* 702 * Non-TCP protocol state machine always go 703 * forwards 704 */ 705 if (st->src.state > sp->src.state) 706 sfail = 5; 707 else if (st->dst.state > sp->dst.state) 708 sfail = 6; 709 } 710 if (sfail) { 711 if (pf_status.debug >= PF_DEBUG_MISC) 712 kprintf("pfsync: %s stale update " 713 "(%d) id: %016jx " 714 "creatorid: %08x\n", 715 (sfail < 7 ? "ignoring" 716 : "partial"), sfail, 717 (uintmax_t)be64toh(st->id), 718 ntohl(st->creatorid)); 719 pfsyncstats.pfsyncs_stale++; 720 721 if (!(sp->sync_flags & PFSTATE_STALE)) { 722 /* we have a better state, send it */ 723 if (sc->sc_mbuf != NULL && !stale) 724 pfsync_sendout(sc); 725 stale++; 726 if (!st->sync_flags) 727 pfsync_pack_state( 728 PFSYNC_ACT_UPD, st, flags); 729 } 730 continue; 731 } 732 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 733 pf_state_peer_ntoh(&sp->src, &st->src); 734 pf_state_peer_ntoh(&sp->dst, &st->dst); 735 st->expire = ntohl(sp->expire) + time_second; 736 st->timeout = sp->timeout; 737 } 738 if (stale && sc->sc_mbuf != NULL) 739 pfsync_sendout(sc); 740 crit_exit(); 741 break; 742 /* 743 * It's not strictly necessary for us to support the "uncompressed" 744 * delete action, but it's relatively simple and maintains consistency. 745 */ 746 case PFSYNC_ACT_DEL: 747 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 748 count * sizeof(*sp), &offp)) == NULL) { 749 pfsyncstats.pfsyncs_badlen++; 750 return; 751 } 752 753 crit_enter(); 754 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 755 i < count; i++, sp++) { 756 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 757 id_key.creatorid = sp->creatorid; 758 759 st = pf_find_state_byid(&id_key); 760 if (st == NULL) { 761 pfsyncstats.pfsyncs_badstate++; 762 continue; 763 } 764 st->sync_flags |= PFSTATE_FROMSYNC; 765 pf_unlink_state(st); 766 } 767 crit_exit(); 768 break; 769 case PFSYNC_ACT_UPD_C: { 770 int update_requested = 0; 771 772 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 773 count * sizeof(*up), &offp)) == NULL) { 774 pfsyncstats.pfsyncs_badlen++; 775 return; 776 } 777 778 crit_enter(); 779 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 780 i < count; i++, up++) { 781 /* check for invalid values */ 782 if (up->timeout >= PFTM_MAX || 783 up->src.state > PF_TCPS_PROXY_DST || 784 up->dst.state > PF_TCPS_PROXY_DST) { 785 if (pf_status.debug >= PF_DEBUG_MISC) 786 kprintf("pfsync_insert: " 787 "PFSYNC_ACT_UPD_C: " 788 "invalid value\n"); 789 pfsyncstats.pfsyncs_badval++; 790 continue; 791 } 792 793 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 794 id_key.creatorid = up->creatorid; 795 796 st = pf_find_state_byid(&id_key); 797 if (st == NULL) { 798 /* We don't have this state. Ask for it. */ 799 error = pfsync_request_update(up, &src); 800 if (error == ENOMEM) { 801 crit_exit(); 802 goto done; 803 } 804 update_requested = 1; 805 pfsyncstats.pfsyncs_badstate++; 806 continue; 807 } 808 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 809 sfail = 0; 810 if (sk->proto == IPPROTO_TCP) { 811 /* 812 * The state should never go backwards except 813 * for syn-proxy states. Neither should the 814 * sequence window slide backwards. 815 */ 816 if (st->src.state > up->src.state && 817 (st->src.state < PF_TCPS_PROXY_SRC || 818 up->src.state >= PF_TCPS_PROXY_SRC)) 819 sfail = 1; 820 else if (st->dst.state > up->dst.state) 821 sfail = 2; 822 else if (SEQ_GT(st->src.seqlo, 823 ntohl(up->src.seqlo))) 824 sfail = 3; 825 else if (st->dst.state >= TCPS_SYN_SENT && 826 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 827 sfail = 4; 828 } else { 829 /* 830 * Non-TCP protocol state machine always go 831 * forwards 832 */ 833 if (st->src.state > up->src.state) 834 sfail = 5; 835 else if (st->dst.state > up->dst.state) 836 sfail = 6; 837 } 838 if (sfail) { 839 if (pf_status.debug >= PF_DEBUG_MISC) 840 kprintf("pfsync: ignoring stale update " 841 "(%d) id: %016" PRIx64 " " 842 "creatorid: %08x\n", sfail, 843 be64toh(st->id), 844 ntohl(st->creatorid)); 845 pfsyncstats.pfsyncs_stale++; 846 847 /* we have a better state, send it out */ 848 if ((!stale || update_requested) && 849 sc->sc_mbuf != NULL) { 850 pfsync_sendout(sc); 851 update_requested = 0; 852 } 853 stale++; 854 if (!st->sync_flags) 855 pfsync_pack_state(PFSYNC_ACT_UPD, st, 856 PFSYNC_FLAG_STALE); 857 continue; 858 } 859 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 860 pf_state_peer_ntoh(&up->src, &st->src); 861 pf_state_peer_ntoh(&up->dst, &st->dst); 862 st->expire = ntohl(up->expire) + time_second; 863 st->timeout = up->timeout; 864 } 865 if ((update_requested || stale) && sc->sc_mbuf) 866 pfsync_sendout(sc); 867 crit_exit(); 868 break; 869 } 870 case PFSYNC_ACT_DEL_C: 871 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 872 count * sizeof(*dp), &offp)) == NULL) { 873 pfsyncstats.pfsyncs_badlen++; 874 return; 875 } 876 877 crit_enter(); 878 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 879 i < count; i++, dp++) { 880 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 881 id_key.creatorid = dp->creatorid; 882 883 st = pf_find_state_byid(&id_key); 884 if (st == NULL) { 885 pfsyncstats.pfsyncs_badstate++; 886 continue; 887 } 888 st->sync_flags |= PFSTATE_FROMSYNC; 889 pf_unlink_state(st); 890 } 891 crit_exit(); 892 break; 893 case PFSYNC_ACT_INS_F: 894 case PFSYNC_ACT_DEL_F: 895 /* not implemented */ 896 break; 897 case PFSYNC_ACT_UREQ: 898 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 899 count * sizeof(*rup), &offp)) == NULL) { 900 pfsyncstats.pfsyncs_badlen++; 901 return; 902 } 903 904 crit_enter(); 905 if (sc->sc_mbuf != NULL) 906 pfsync_sendout(sc); 907 for (i = 0, 908 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 909 i < count; i++, rup++) { 910 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 911 id_key.creatorid = rup->creatorid; 912 913 if (id_key.id == 0 && id_key.creatorid == 0) { 914 sc->sc_ureq_received = mycpu->gd_time_seconds; 915 if (sc->sc_bulk_send_next == NULL) { 916 if (++sc->sc_bulk_send_cpu >= ncpus) 917 sc->sc_bulk_send_cpu = 0; 918 sc->sc_bulk_send_next = 919 TAILQ_FIRST(&state_list[sc->sc_bulk_send_cpu]); 920 } 921 sc->sc_bulk_terminator = 922 sc->sc_bulk_send_next; 923 sc->sc_bulk_terminator_cpu = 924 sc->sc_bulk_send_cpu; 925 if (pf_status.debug >= PF_DEBUG_MISC) 926 kprintf("pfsync: received " 927 "bulk update request\n"); 928 pfsync_send_bus(sc, PFSYNC_BUS_START); 929 lwkt_reltoken(&pf_token); 930 callout_init(&sc->sc_bulk_tmo); 931 lwkt_gettoken(&pf_token); 932 } else { 933 st = pf_find_state_byid(&id_key); 934 if (st == NULL) { 935 pfsyncstats.pfsyncs_badstate++; 936 continue; 937 } 938 if (!st->sync_flags) 939 pfsync_pack_state(PFSYNC_ACT_UPD, 940 st, 0); 941 } 942 } 943 if (sc->sc_mbuf != NULL) 944 pfsync_sendout(sc); 945 crit_exit(); 946 break; 947 case PFSYNC_ACT_BUS: 948 /* If we're not waiting for a bulk update, who cares. */ 949 if (sc->sc_ureq_sent == 0) 950 break; 951 952 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 953 sizeof(*bus), &offp)) == NULL) { 954 pfsyncstats.pfsyncs_badlen++; 955 return; 956 } 957 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 958 switch (bus->status) { 959 case PFSYNC_BUS_START: 960 lwkt_reltoken(&pf_token); 961 callout_reset(&sc->sc_bulkfail_tmo, 962 pf_pool_limits[PF_LIMIT_STATES].limit / 963 (PFSYNC_BULKPACKETS * sc->sc_maxcount), 964 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 965 lwkt_gettoken(&pf_token); 966 if (pf_status.debug >= PF_DEBUG_MISC) 967 kprintf("pfsync: received bulk " 968 "update start\n"); 969 break; 970 case PFSYNC_BUS_END: 971 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= 972 sc->sc_ureq_sent) { 973 /* that's it, we're happy */ 974 sc->sc_ureq_sent = 0; 975 sc->sc_bulk_tries = 0; 976 lwkt_reltoken(&pf_token); 977 callout_stop(&sc->sc_bulkfail_tmo); 978 lwkt_gettoken(&pf_token); 979 #ifdef CARP 980 if (!pfsync_sync_ok) { 981 lwkt_reltoken(&pf_token); 982 carp_group_demote_adj(&sc->sc_if, -1); 983 lwkt_gettoken(&pf_token); 984 } 985 #endif 986 pfsync_sync_ok = 1; 987 if (pf_status.debug >= PF_DEBUG_MISC) 988 kprintf("pfsync: received valid " 989 "bulk update end\n"); 990 } else { 991 if (pf_status.debug >= PF_DEBUG_MISC) 992 kprintf("pfsync: received invalid " 993 "bulk update end: bad timestamp\n"); 994 } 995 break; 996 } 997 break; 998 } 999 1000 done: 1001 if (m) 1002 m_freem(m); 1003 } 1004 1005 int 1006 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1007 struct rtentry *rt) 1008 { 1009 m_freem(m); 1010 return (0); 1011 } 1012 1013 /* ARGSUSED */ 1014 int 1015 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 1016 { 1017 struct pfsync_softc *sc = ifp->if_softc; 1018 struct ifreq *ifr = (struct ifreq *)data; 1019 struct ip_moptions *imo = &sc->sc_imo; 1020 struct pfsyncreq pfsyncr; 1021 struct ifnet *sifp; 1022 int error; 1023 1024 lwkt_gettoken(&pf_token); 1025 1026 switch (cmd) { 1027 case SIOCSIFADDR: 1028 case SIOCAIFADDR: 1029 case SIOCSIFDSTADDR: 1030 case SIOCSIFFLAGS: 1031 if (ifp->if_flags & IFF_UP) 1032 ifp->if_flags |= IFF_RUNNING; 1033 else 1034 ifp->if_flags &= ~IFF_RUNNING; 1035 break; 1036 case SIOCSIFMTU: 1037 if (ifr->ifr_mtu < PFSYNC_MINMTU) { 1038 lwkt_reltoken(&pf_token); 1039 return (EINVAL); 1040 } 1041 if (ifr->ifr_mtu > MCLBYTES) 1042 ifr->ifr_mtu = MCLBYTES; 1043 crit_enter(); 1044 if (ifr->ifr_mtu < ifp->if_mtu) 1045 pfsync_sendout(sc); 1046 pfsync_setmtu(sc, ifr->ifr_mtu); 1047 crit_exit(); 1048 break; 1049 case SIOCGETPFSYNC: 1050 bzero(&pfsyncr, sizeof(pfsyncr)); 1051 if (sc->sc_sync_ifp) 1052 strlcpy(pfsyncr.pfsyncr_syncdev, 1053 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1054 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1055 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1056 lwkt_reltoken(&pf_token); 1057 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1058 return (error); 1059 lwkt_gettoken(&pf_token); 1060 break; 1061 case SIOCSETPFSYNC: 1062 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) { 1063 lwkt_reltoken(&pf_token); 1064 return (error); 1065 } 1066 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) { 1067 lwkt_reltoken(&pf_token); 1068 return (error); 1069 } 1070 1071 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1072 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1073 else 1074 sc->sc_sync_peer.s_addr = 1075 pfsyncr.pfsyncr_syncpeer.s_addr; 1076 1077 if (pfsyncr.pfsyncr_maxupdates > 255) { 1078 lwkt_reltoken(&pf_token); 1079 return (EINVAL); 1080 } 1081 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1082 1083 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1084 sc->sc_sync_ifp = NULL; 1085 if (sc->sc_mbuf_net != NULL) { 1086 /* Don't keep stale pfsync packets around. */ 1087 crit_enter(); 1088 m_freem(sc->sc_mbuf_net); 1089 sc->sc_mbuf_net = NULL; 1090 sc->sc_statep_net.s = NULL; 1091 crit_exit(); 1092 } 1093 if (imo->imo_num_memberships > 0) { 1094 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1095 imo->imo_multicast_ifp = NULL; 1096 } 1097 break; 1098 } 1099 1100 /* 1101 * XXX not that MPSAFE; pfsync needs serious rework 1102 */ 1103 ifnet_deserialize_all(ifp); 1104 ifnet_lock(); 1105 sifp = ifunit(pfsyncr.pfsyncr_syncdev); 1106 ifnet_unlock(); 1107 ifnet_serialize_all(ifp); 1108 1109 if (sifp == NULL) { 1110 lwkt_reltoken(&pf_token); 1111 return (EINVAL); 1112 } 1113 1114 crit_enter(); 1115 if (sifp->if_mtu < sc->sc_if.if_mtu || 1116 (sc->sc_sync_ifp != NULL && 1117 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1118 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1119 pfsync_sendout(sc); 1120 sc->sc_sync_ifp = sifp; 1121 1122 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1123 1124 if (imo->imo_num_memberships > 0) { 1125 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1126 imo->imo_multicast_ifp = NULL; 1127 } 1128 1129 if (sc->sc_sync_ifp && 1130 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1131 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1132 sc->sc_sync_ifp = NULL; 1133 lwkt_reltoken(&pf_token); 1134 crit_exit(); 1135 return (EADDRNOTAVAIL); 1136 } 1137 1138 if ((imo->imo_membership[0] = 1139 pfsync_in_addmulti(sc->sc_sync_ifp)) == NULL) { 1140 sc->sc_sync_ifp = NULL; 1141 lwkt_reltoken(&pf_token); 1142 crit_exit(); 1143 return (ENOBUFS); 1144 } 1145 imo->imo_num_memberships++; 1146 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1147 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1148 imo->imo_multicast_loop = 0; 1149 } 1150 1151 if (sc->sc_sync_ifp || 1152 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1153 /* Request a full state table update. */ 1154 sc->sc_ureq_sent = mycpu->gd_time_seconds; 1155 #ifdef CARP 1156 if (pfsync_sync_ok) 1157 carp_group_demote_adj(&sc->sc_if, 1); 1158 #endif 1159 pfsync_sync_ok = 0; 1160 if (pf_status.debug >= PF_DEBUG_MISC) 1161 kprintf("pfsync: requesting bulk update\n"); 1162 lwkt_reltoken(&pf_token); 1163 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 1164 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 1165 lwkt_gettoken(&pf_token); 1166 error = pfsync_request_update(NULL, NULL); 1167 if (error == ENOMEM) { 1168 lwkt_reltoken(&pf_token); 1169 crit_exit(); 1170 return (ENOMEM); 1171 } 1172 pfsync_sendout(sc); 1173 } 1174 crit_exit(); 1175 1176 break; 1177 1178 default: 1179 lwkt_reltoken(&pf_token); 1180 return (ENOTTY); 1181 } 1182 1183 lwkt_reltoken(&pf_token); 1184 return (0); 1185 } 1186 1187 void 1188 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1189 { 1190 int mtu; 1191 1192 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1193 mtu = sc->sc_sync_ifp->if_mtu; 1194 else 1195 mtu = mtu_req; 1196 1197 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1198 sizeof(struct pfsync_state); 1199 if (sc->sc_maxcount > 254) 1200 sc->sc_maxcount = 254; 1201 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1202 sc->sc_maxcount * sizeof(struct pfsync_state); 1203 } 1204 1205 struct mbuf * 1206 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1207 { 1208 struct pfsync_header *h; 1209 struct mbuf *m; 1210 int len; 1211 1212 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1213 1214 MGETHDR(m, M_WAITOK, MT_DATA); 1215 if (m == NULL) { 1216 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1217 return (NULL); 1218 } 1219 1220 switch (action) { 1221 case PFSYNC_ACT_CLR: 1222 len = sizeof(struct pfsync_header) + 1223 sizeof(struct pfsync_state_clr); 1224 break; 1225 case PFSYNC_ACT_UPD_C: 1226 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1227 sizeof(struct pfsync_header); 1228 break; 1229 case PFSYNC_ACT_DEL_C: 1230 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1231 sizeof(struct pfsync_header); 1232 break; 1233 case PFSYNC_ACT_UREQ: 1234 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1235 sizeof(struct pfsync_header); 1236 break; 1237 case PFSYNC_ACT_BUS: 1238 len = sizeof(struct pfsync_header) + 1239 sizeof(struct pfsync_state_bus); 1240 break; 1241 case PFSYNC_ACT_TDB_UPD: 1242 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1243 sizeof(struct pfsync_header); 1244 break; 1245 default: 1246 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1247 sizeof(struct pfsync_header); 1248 break; 1249 } 1250 1251 if (len > MHLEN) { 1252 MCLGET(m, M_WAITOK); 1253 if ((m->m_flags & M_EXT) == 0) { 1254 m_free(m); 1255 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1256 return (NULL); 1257 } 1258 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); 1259 } else 1260 MH_ALIGN(m, len); 1261 1262 m->m_pkthdr.rcvif = NULL; 1263 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1264 h = mtod(m, struct pfsync_header *); 1265 h->version = PFSYNC_VERSION; 1266 h->af = 0; 1267 h->count = 0; 1268 h->action = action; 1269 1270 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1271 lwkt_reltoken(&pf_token); 1272 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, 1273 LIST_FIRST(&pfsync_list)); 1274 lwkt_gettoken(&pf_token); 1275 return (m); 1276 } 1277 1278 int 1279 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1280 { 1281 struct ifnet *ifp = NULL; 1282 struct pfsync_softc *sc = pfsyncif; 1283 struct pfsync_header *h, *h_net; 1284 struct pfsync_state *sp = NULL; 1285 struct pfsync_state_upd *up = NULL; 1286 struct pfsync_state_del *dp = NULL; 1287 int ret = 0; 1288 u_int8_t i = 255, newaction = 0; 1289 1290 if (sc == NULL) 1291 return (0); 1292 ifp = &sc->sc_if; 1293 1294 /* 1295 * If a packet falls in the forest and there's nobody around to 1296 * hear, does it make a sound? 1297 */ 1298 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1299 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1300 /* Don't leave any stale pfsync packets hanging around. */ 1301 if (sc->sc_mbuf != NULL) { 1302 m_freem(sc->sc_mbuf); 1303 sc->sc_mbuf = NULL; 1304 sc->sc_statep.s = NULL; 1305 } 1306 return (0); 1307 } 1308 1309 if (action >= PFSYNC_ACT_MAX) 1310 return (EINVAL); 1311 1312 crit_enter(); 1313 if (sc->sc_mbuf == NULL) { 1314 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1315 (void *)&sc->sc_statep.s)) == NULL) { 1316 crit_exit(); 1317 return (ENOMEM); 1318 } 1319 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1320 } else { 1321 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1322 if (h->action != action) { 1323 pfsync_sendout(sc); 1324 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1325 (void *)&sc->sc_statep.s)) == NULL) { 1326 crit_exit(); 1327 return (ENOMEM); 1328 } 1329 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1330 } else { 1331 /* 1332 * If it's an update, look in the packet to see if 1333 * we already have an update for the state. 1334 */ 1335 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1336 struct pfsync_state *usp = 1337 (void *)((char *)h + PFSYNC_HDRLEN); 1338 1339 for (i = 0; i < h->count; i++) { 1340 if (!memcmp(usp->id, &st->id, 1341 PFSYNC_ID_LEN) && 1342 usp->creatorid == st->creatorid) { 1343 sp = usp; 1344 sp->updates++; 1345 break; 1346 } 1347 usp++; 1348 } 1349 } 1350 } 1351 } 1352 1353 st->pfsync_time = mycpu->gd_time_seconds; 1354 1355 if (sp == NULL) { 1356 /* not a "duplicate" update */ 1357 i = 255; 1358 sp = sc->sc_statep.s++; 1359 sc->sc_mbuf->m_pkthdr.len = 1360 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1361 h->count++; 1362 bzero(sp, sizeof(*sp)); 1363 1364 pfsync_state_export(sp, st); 1365 1366 if (flags & PFSYNC_FLAG_STALE) 1367 sp->sync_flags |= PFSTATE_STALE; 1368 } else { 1369 pf_state_peer_hton(&st->src, &sp->src); 1370 pf_state_peer_hton(&st->dst, &sp->dst); 1371 1372 if (st->expire <= time_second) 1373 sp->expire = htonl(0); 1374 else 1375 sp->expire = htonl(st->expire - time_second); 1376 } 1377 1378 /* do we need to build "compressed" actions for network transfer? */ 1379 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1380 switch (action) { 1381 case PFSYNC_ACT_UPD: 1382 newaction = PFSYNC_ACT_UPD_C; 1383 break; 1384 case PFSYNC_ACT_DEL: 1385 newaction = PFSYNC_ACT_DEL_C; 1386 break; 1387 default: 1388 /* by default we just send the uncompressed states */ 1389 break; 1390 } 1391 } 1392 1393 if (newaction) { 1394 if (sc->sc_mbuf_net == NULL) { 1395 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1396 (void *)&sc->sc_statep_net.s)) == NULL) { 1397 crit_exit(); 1398 return (ENOMEM); 1399 } 1400 } 1401 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1402 1403 switch (newaction) { 1404 case PFSYNC_ACT_UPD_C: 1405 if (i != 255) { 1406 up = (void *)((char *)h_net + 1407 PFSYNC_HDRLEN + (i * sizeof(*up))); 1408 up->updates++; 1409 } else { 1410 h_net->count++; 1411 sc->sc_mbuf_net->m_pkthdr.len = 1412 sc->sc_mbuf_net->m_len += sizeof(*up); 1413 up = sc->sc_statep_net.u++; 1414 1415 bzero(up, sizeof(*up)); 1416 bcopy(&st->id, up->id, sizeof(up->id)); 1417 up->creatorid = st->creatorid; 1418 } 1419 up->timeout = st->timeout; 1420 up->expire = sp->expire; 1421 up->src = sp->src; 1422 up->dst = sp->dst; 1423 break; 1424 case PFSYNC_ACT_DEL_C: 1425 sc->sc_mbuf_net->m_pkthdr.len = 1426 sc->sc_mbuf_net->m_len += sizeof(*dp); 1427 dp = sc->sc_statep_net.d++; 1428 h_net->count++; 1429 1430 bzero(dp, sizeof(*dp)); 1431 bcopy(&st->id, dp->id, sizeof(dp->id)); 1432 dp->creatorid = st->creatorid; 1433 break; 1434 } 1435 } 1436 1437 if (h->count == sc->sc_maxcount || 1438 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1439 ret = pfsync_sendout(sc); 1440 1441 crit_exit(); 1442 return (ret); 1443 } 1444 1445 int 1446 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1447 { 1448 struct pfsync_header *h; 1449 struct pfsync_softc *sc = pfsyncif; 1450 struct pfsync_state_upd_req *rup; 1451 int ret = 0; 1452 1453 if (sc == NULL) 1454 return (0); 1455 1456 if (sc->sc_mbuf == NULL) { 1457 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1458 (void *)&sc->sc_statep.s)) == NULL) 1459 return (ENOMEM); 1460 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1461 } else { 1462 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1463 if (h->action != PFSYNC_ACT_UREQ) { 1464 pfsync_sendout(sc); 1465 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1466 (void *)&sc->sc_statep.s)) == NULL) 1467 return (ENOMEM); 1468 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1469 } 1470 } 1471 1472 if (src != NULL) 1473 sc->sc_sendaddr = *src; 1474 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1475 h->count++; 1476 rup = sc->sc_statep.r++; 1477 bzero(rup, sizeof(*rup)); 1478 if (up != NULL) { 1479 bcopy(up->id, rup->id, sizeof(rup->id)); 1480 rup->creatorid = up->creatorid; 1481 } 1482 1483 if (h->count == sc->sc_maxcount) 1484 ret = pfsync_sendout(sc); 1485 1486 return (ret); 1487 } 1488 1489 int 1490 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1491 { 1492 struct pfsync_softc *sc = pfsyncif; 1493 struct pfsync_state_clr *cp; 1494 int ret; 1495 1496 if (sc == NULL) 1497 return (0); 1498 1499 crit_enter(); 1500 if (sc->sc_mbuf != NULL) 1501 pfsync_sendout(sc); 1502 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1503 (void *)&sc->sc_statep.c)) == NULL) { 1504 crit_exit(); 1505 return (ENOMEM); 1506 } 1507 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1508 cp = sc->sc_statep.c; 1509 cp->creatorid = creatorid; 1510 if (ifname != NULL) 1511 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1512 1513 ret = (pfsync_sendout(sc)); 1514 crit_exit(); 1515 return (ret); 1516 } 1517 1518 void 1519 pfsync_timeout(void *v) 1520 { 1521 struct pfsync_softc *sc = v; 1522 1523 crit_enter(); 1524 pfsync_sendout(sc); 1525 crit_exit(); 1526 } 1527 1528 void 1529 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1530 { 1531 struct pfsync_state_bus *bus; 1532 1533 if (sc->sc_mbuf != NULL) 1534 pfsync_sendout(sc); 1535 1536 if (pfsync_sync_ok && 1537 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1538 (void *)&sc->sc_statep.b)) != NULL) { 1539 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1540 bus = sc->sc_statep.b; 1541 bus->creatorid = pf_status.hostid; 1542 bus->status = status; 1543 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); 1544 pfsync_sendout(sc); 1545 } 1546 } 1547 1548 void 1549 pfsync_bulk_update(void *v) 1550 { 1551 struct pfsync_softc *sc = v; 1552 int i = 0; 1553 int cpu; 1554 struct pf_state *state; 1555 1556 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1557 1558 crit_enter(); 1559 if (sc->sc_mbuf != NULL) 1560 pfsync_sendout(sc); 1561 1562 /* 1563 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1564 * been sent since the latest request was made. 1565 */ 1566 state = sc->sc_bulk_send_next; 1567 cpu = sc->sc_bulk_send_cpu; 1568 if (state) 1569 do { 1570 /* send state update if syncable and not already sent */ 1571 if (!state->sync_flags 1572 && state->timeout < PFTM_MAX 1573 && state->pfsync_time <= sc->sc_ureq_received) { 1574 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1575 i++; 1576 } 1577 1578 /* figure next state to send */ 1579 state = TAILQ_NEXT(state, entry_list); 1580 1581 /* wrap to start of list if we hit the end */ 1582 if (state == NULL) { 1583 if (++cpu >= ncpus) 1584 cpu = 0; 1585 state = TAILQ_FIRST(&state_list[cpu]); 1586 } 1587 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1588 cpu != sc->sc_bulk_terminator_cpu && 1589 state != sc->sc_bulk_terminator); 1590 1591 if (state == NULL || (cpu == sc->sc_bulk_terminator_cpu && 1592 state == sc->sc_bulk_terminator)) { 1593 /* we're done */ 1594 pfsync_send_bus(sc, PFSYNC_BUS_END); 1595 sc->sc_ureq_received = 0; 1596 sc->sc_bulk_send_next = NULL; 1597 sc->sc_bulk_terminator = NULL; 1598 sc->sc_bulk_send_cpu = 0; 1599 sc->sc_bulk_terminator_cpu = 0; 1600 lwkt_reltoken(&pf_token); 1601 callout_stop(&sc->sc_bulk_tmo); 1602 lwkt_gettoken(&pf_token); 1603 if (pf_status.debug >= PF_DEBUG_MISC) 1604 kprintf("pfsync: bulk update complete\n"); 1605 } else { 1606 /* look again for more in a bit */ 1607 lwkt_reltoken(&pf_token); 1608 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, 1609 LIST_FIRST(&pfsync_list)); 1610 lwkt_gettoken(&pf_token); 1611 sc->sc_bulk_send_next = state; 1612 sc->sc_bulk_send_cpu = cpu; 1613 } 1614 if (sc->sc_mbuf != NULL) 1615 pfsync_sendout(sc); 1616 crit_exit(); 1617 } 1618 1619 void 1620 pfsync_bulkfail(void *v) 1621 { 1622 struct pfsync_softc *sc = v; 1623 int error; 1624 1625 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1626 1627 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1628 /* Try again in a bit */ 1629 lwkt_reltoken(&pf_token); 1630 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, 1631 LIST_FIRST(&pfsync_list)); 1632 lwkt_gettoken(&pf_token); 1633 crit_enter(); 1634 error = pfsync_request_update(NULL, NULL); 1635 if (error == ENOMEM) { 1636 if (pf_status.debug >= PF_DEBUG_MISC) 1637 kprintf("pfsync: cannot allocate mbufs for " 1638 "bulk update\n"); 1639 } else 1640 pfsync_sendout(sc); 1641 crit_exit(); 1642 } else { 1643 /* Pretend like the transfer was ok */ 1644 sc->sc_ureq_sent = 0; 1645 sc->sc_bulk_tries = 0; 1646 #ifdef CARP 1647 if (!pfsync_sync_ok) 1648 carp_group_demote_adj(&sc->sc_if, -1); 1649 #endif 1650 pfsync_sync_ok = 1; 1651 if (pf_status.debug >= PF_DEBUG_MISC) 1652 kprintf("pfsync: failed to receive " 1653 "bulk update status\n"); 1654 lwkt_reltoken(&pf_token); 1655 callout_stop(&sc->sc_bulkfail_tmo); 1656 lwkt_gettoken(&pf_token); 1657 } 1658 } 1659 1660 static void 1661 pfsync_sendout_handler(netmsg_t nmsg) 1662 { 1663 struct netmsg_genpkt *msg = (struct netmsg_genpkt *)nmsg; 1664 1665 pfsync_sendout_mbuf(msg->arg1, msg->m); 1666 } 1667 1668 int 1669 pfsync_sendout(struct pfsync_softc *sc) 1670 { 1671 #if NBPF > 0 1672 struct ifnet *ifp = &sc->sc_if; 1673 #endif 1674 struct mbuf *m; 1675 struct netmsg_genpkt *msg; 1676 1677 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1678 1679 lwkt_reltoken(&pf_token); 1680 callout_stop(&sc->sc_tmo); 1681 lwkt_gettoken(&pf_token); 1682 1683 if (sc->sc_mbuf == NULL) 1684 return (0); 1685 m = sc->sc_mbuf; 1686 sc->sc_mbuf = NULL; 1687 sc->sc_statep.s = NULL; 1688 1689 #if NBPF > 0 1690 if (ifp->if_bpf) { 1691 bpf_gettoken(); 1692 if (ifp->if_bpf) 1693 bpf_mtap(ifp->if_bpf, m); 1694 bpf_reltoken(); 1695 } 1696 #endif 1697 1698 if (sc->sc_mbuf_net) { 1699 m_freem(m); 1700 m = sc->sc_mbuf_net; 1701 sc->sc_mbuf_net = NULL; 1702 sc->sc_statep_net.s = NULL; 1703 } 1704 1705 msg = &m->m_hdr.mh_genmsg; 1706 netmsg_init(&msg->base, NULL, &netisr_apanic_rport, 0, 1707 pfsync_sendout_handler); 1708 msg->m = m; 1709 msg->arg1 = sc; 1710 netisr_sendmsg(&msg->base, 0); 1711 1712 return (0); 1713 } 1714 1715 int 1716 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1717 { 1718 struct sockaddr sa; 1719 struct ip *ip; 1720 1721 if (sc->sc_sync_ifp || 1722 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1723 M_PREPEND(m, sizeof(struct ip), M_WAITOK); 1724 if (m == NULL) { 1725 pfsyncstats.pfsyncs_onomem++; 1726 return (0); 1727 } 1728 ip = mtod(m, struct ip *); 1729 ip->ip_v = IPVERSION; 1730 ip->ip_hl = sizeof(*ip) >> 2; 1731 ip->ip_tos = IPTOS_LOWDELAY; 1732 ip->ip_len = htons(m->m_pkthdr.len); 1733 ip->ip_id = htons(ip_randomid()); 1734 ip->ip_off = htons(IP_DF); 1735 ip->ip_ttl = PFSYNC_DFLTTL; 1736 ip->ip_p = IPPROTO_PFSYNC; 1737 ip->ip_sum = 0; 1738 1739 bzero(&sa, sizeof(sa)); 1740 ip->ip_src.s_addr = INADDR_ANY; 1741 1742 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1743 m->m_flags |= M_MCAST; 1744 ip->ip_dst = sc->sc_sendaddr; 1745 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1746 1747 pfsyncstats.pfsyncs_opackets++; 1748 1749 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1750 pfsyncstats.pfsyncs_oerrors++; 1751 } else 1752 m_freem(m); 1753 1754 return (0); 1755 } 1756 1757 static int 1758 pfsync_modevent(module_t mod, int type, void *data) 1759 { 1760 int error = 0; 1761 1762 struct pfsync_softc *pfs_if, *tmp; 1763 1764 lwkt_gettoken(&pf_token); 1765 1766 switch (type) { 1767 case MOD_LOAD: 1768 LIST_INIT(&pfsync_list); 1769 lwkt_reltoken(&pf_token); 1770 if_clone_attach(&pfsync_cloner); 1771 lwkt_gettoken(&pf_token); 1772 /* Override the function pointer for pf_ioctl.c */ 1773 break; 1774 1775 case MOD_UNLOAD: 1776 lwkt_reltoken(&pf_token); 1777 if_clone_detach(&pfsync_cloner); 1778 lwkt_gettoken(&pf_token); 1779 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) { 1780 pfsync_clone_destroy(&pfs_if->sc_if); 1781 } 1782 break; 1783 1784 default: 1785 error = EINVAL; 1786 break; 1787 } 1788 1789 lwkt_reltoken(&pf_token); 1790 return error; 1791 } 1792 1793 static moduledata_t pfsync_mod = { 1794 "pfsync", 1795 pfsync_modevent, 1796 0 1797 }; 1798 1799 #define PFSYNC_MODVER 44 1800 1801 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1802 MODULE_VERSION(pfsync, PFSYNC_MODVER); 1803 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 1804 1805 static void 1806 pfsync_in_addmulti_dispatch(netmsg_t nmsg) 1807 { 1808 struct lwkt_msg *lmsg = &nmsg->lmsg; 1809 struct ifnet *ifp = lmsg->u.ms_resultp; 1810 struct in_addr addr; 1811 1812 addr.s_addr = INADDR_PFSYNC_GROUP; 1813 lmsg->u.ms_resultp = in_addmulti(&addr, ifp); 1814 1815 lwkt_replymsg(lmsg, 0); 1816 } 1817 1818 static struct in_multi * 1819 pfsync_in_addmulti(struct ifnet *ifp) 1820 { 1821 struct netmsg_base nmsg; 1822 struct lwkt_msg *lmsg = &nmsg.lmsg; 1823 1824 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1825 pfsync_in_addmulti_dispatch); 1826 lmsg->u.ms_resultp = ifp; 1827 1828 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1829 return lmsg->u.ms_resultp; 1830 } 1831 1832 static void 1833 pfsync_in_delmulti_dispatch(netmsg_t nmsg) 1834 { 1835 struct lwkt_msg *lmsg = &nmsg->lmsg; 1836 1837 in_delmulti(lmsg->u.ms_resultp); 1838 lwkt_replymsg(lmsg, 0); 1839 } 1840 1841 static void 1842 pfsync_in_delmulti(struct in_multi *inm) 1843 { 1844 struct netmsg_base nmsg; 1845 struct lwkt_msg *lmsg = &nmsg.lmsg; 1846 1847 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1848 pfsync_in_delmulti_dispatch); 1849 lmsg->u.ms_resultp = inm; 1850 1851 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1852 } 1853