1 /* $OpenBSD: if_pfsync.c,v 1.98 2008/06/29 08:42:15 mcbride Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include "opt_inet.h" 30 #include "opt_inet6.h" 31 #include "opt_carp.h" 32 33 #include <sys/param.h> 34 #include <sys/endian.h> 35 #include <sys/proc.h> 36 #include <sys/priv.h> 37 #include <sys/systm.h> 38 #include <sys/time.h> 39 #include <sys/mbuf.h> 40 #include <sys/socket.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/msgport2.h> 45 #include <sys/sockio.h> 46 #include <sys/thread2.h> 47 48 #include <machine/inttypes.h> 49 50 #include <net/if.h> 51 #include <net/if_types.h> 52 #include <net/ifq_var.h> 53 #include <net/route.h> 54 #include <net/bpf.h> 55 #include <net/netisr2.h> 56 #include <net/netmsg2.h> 57 #include <netinet/in.h> 58 #include <netinet/if_ether.h> 59 #include <netinet/ip_carp.h> 60 #include <netinet/tcp.h> 61 #include <netinet/tcp_seq.h> 62 63 #ifdef INET 64 #include <netinet/in_systm.h> 65 #include <netinet/in_var.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #endif 69 70 #ifdef INET6 71 #include <netinet6/nd6.h> 72 #endif /* INET6 */ 73 74 #include <net/pf/pfvar.h> 75 #include <net/pf/if_pfsync.h> 76 77 #define PFSYNCNAME "pfsync" 78 79 #define PFSYNC_MINMTU \ 80 (sizeof(struct pfsync_header) + sizeof(struct pf_state)) 81 82 #ifdef PFSYNCDEBUG 83 #define DPRINTF(x) do { if (pfsyncdebug) kprintf x ; } while (0) 84 int pfsyncdebug; 85 #else 86 #define DPRINTF(x) 87 #endif 88 89 struct pfsync_softc *pfsyncif = NULL; 90 struct pfsyncstats pfsyncstats; 91 92 void pfsyncattach(int); 93 static int pfsync_clone_destroy(struct ifnet *); 94 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 95 void pfsync_setmtu(struct pfsync_softc *, int); 96 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 97 struct pf_state_peer *); 98 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 99 struct rtentry *); 100 int pfsyncioctl(struct ifnet *, u_long, caddr_t, struct ucred *); 101 void pfsyncstart(struct ifnet *, struct ifaltq_subque *); 102 103 struct mbuf *pfsync_get_mbuf(struct pfsync_softc *, u_int8_t, void **); 104 int pfsync_request_update(struct pfsync_state_upd *, struct in_addr *); 105 int pfsync_sendout(struct pfsync_softc *); 106 int pfsync_sendout_mbuf(struct pfsync_softc *, struct mbuf *); 107 void pfsync_timeout(void *); 108 void pfsync_send_bus(struct pfsync_softc *, u_int8_t); 109 void pfsync_bulk_update(void *); 110 void pfsync_bulkfail(void *); 111 112 static struct in_multi *pfsync_in_addmulti(struct ifnet *); 113 static void pfsync_in_delmulti(struct in_multi *); 114 115 static MALLOC_DEFINE(M_PFSYNC, PFSYNCNAME, "Packet Filter State Sync. Interface"); 116 static LIST_HEAD(pfsync_list, pfsync_softc) pfsync_list; 117 118 int pfsync_sync_ok; 119 120 struct if_clone pfsync_cloner = 121 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy, 1 ,1); 122 123 void 124 pfsyncattach(int npfsync) 125 { 126 if_clone_attach(&pfsync_cloner); 127 } 128 static int 129 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param __unused) 130 { 131 struct pfsync_softc *sc; 132 struct ifnet *ifp; 133 134 lwkt_gettoken(&pf_token); 135 136 sc = kmalloc(sizeof(*sc), M_PFSYNC, M_WAITOK | M_ZERO); 137 pfsync_sync_ok = 1; 138 sc->sc_mbuf = NULL; 139 sc->sc_mbuf_net = NULL; 140 sc->sc_mbuf_tdb = NULL; 141 sc->sc_statep.s = NULL; 142 sc->sc_statep_net.s = NULL; 143 sc->sc_statep_tdb.t = NULL; 144 sc->sc_maxupdates = 128; 145 sc->sc_sync_peer.s_addr =htonl(INADDR_PFSYNC_GROUP); 146 sc->sc_sendaddr.s_addr = htonl(INADDR_PFSYNC_GROUP); 147 sc->sc_ureq_received = 0; 148 sc->sc_ureq_sent = 0; 149 sc->sc_bulk_send_next = NULL; 150 sc->sc_bulk_terminator = NULL; 151 sc->sc_imo.imo_max_memberships = IP_MAX_MEMBERSHIPS; 152 lwkt_reltoken(&pf_token); 153 ifp = &sc->sc_if; 154 ksnprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 155 if_initname(ifp, ifc->ifc_name, unit); 156 ifp->if_ioctl = pfsyncioctl; 157 ifp->if_output = pfsyncoutput; 158 ifp->if_start = pfsyncstart; 159 ifp->if_type = IFT_PFSYNC; 160 ifq_set_maxlen(&ifp->if_snd, ifqmaxlen); 161 ifp->if_hdrlen = PFSYNC_HDRLEN; 162 ifp->if_baudrate = IF_Mbps(100); 163 ifp->if_softc = sc; 164 pfsync_setmtu(sc, MCLBYTES); 165 callout_init(&sc->sc_tmo); 166 /* callout_init(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 167 callout_init(&sc->sc_bulk_tmo); 168 callout_init(&sc->sc_bulkfail_tmo); 169 if_attach(ifp, NULL); 170 171 LIST_INSERT_HEAD(&pfsync_list, sc, sc_next); 172 173 174 #if NCARP > 0 175 if_addgroup(ifp, "carp"); 176 #endif 177 178 #if NBPFILTER > 0 179 bpfattach(&sc->sc_if, DLT_PFSYNC, PFSYNC_HDRLEN); 180 #endif 181 lwkt_gettoken(&pf_token); 182 183 lwkt_reltoken(&pf_token); 184 return (0); 185 } 186 187 static int 188 pfsync_clone_destroy(struct ifnet *ifp) 189 { 190 lwkt_gettoken(&pf_token); 191 lwkt_reltoken(&pf_token); 192 193 struct pfsync_softc *sc = ifp->if_softc; 194 callout_stop(&sc->sc_tmo); 195 /* callout_stop(&sc->sc_tdb_tmo); XXX we don't support tdb (yet) */ 196 callout_stop(&sc->sc_bulk_tmo); 197 callout_stop(&sc->sc_bulkfail_tmo); 198 #if NCARP > 0 199 if (!pfsync_sync_ok) 200 carp_group_demote_adj(&sc->sc_if, -1); 201 #endif 202 #if NBPFILTER > 0 203 bpfdetach(ifp); 204 #endif 205 if_detach(ifp); 206 lwkt_gettoken(&pf_token); 207 LIST_REMOVE(sc, sc_next); 208 kfree(sc, M_PFSYNC); 209 lwkt_reltoken(&pf_token); 210 211 212 return 0; 213 } 214 215 /* 216 * Start output on the pfsync interface. 217 */ 218 void 219 pfsyncstart(struct ifnet *ifp, struct ifaltq_subque *ifsq) 220 { 221 ASSERT_ALTQ_SQ_DEFAULT(ifp, ifsq); 222 ifsq_purge(ifsq); 223 } 224 225 int 226 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 227 struct pf_state_peer *d) 228 { 229 if (s->scrub.scrub_flag && d->scrub == NULL) { 230 d->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSYNC, M_NOWAIT|M_ZERO); 231 232 if (d->scrub == NULL) 233 return (ENOMEM); 234 } 235 236 return (0); 237 } 238 239 void 240 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 241 { 242 bzero(sp, sizeof(struct pfsync_state)); 243 244 /* copy from state key */ 245 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 246 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 247 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 248 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 249 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 250 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 251 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 252 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 253 sp->proto = st->key[PF_SK_WIRE]->proto; 254 sp->af = st->key[PF_SK_WIRE]->af; 255 256 /* copy from state */ 257 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 258 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 259 sp->creation = htonl(time_second - st->creation); 260 sp->expire = pf_state_expires(st); 261 if (sp->expire <= time_second) 262 sp->expire = htonl(0); 263 else 264 sp->expire = htonl(sp->expire - time_second); 265 266 sp->direction = st->direction; 267 sp->log = st->log; 268 sp->timeout = st->timeout; 269 sp->state_flags = st->state_flags; 270 if (st->src_node) 271 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 272 if (st->nat_src_node) 273 sp->sync_flags |= PFSYNC_FLAG_NATSRCNODE; 274 275 bcopy(&st->id, &sp->id, sizeof(sp->id)); 276 sp->creatorid = st->creatorid; 277 pf_state_peer_hton(&st->src, &sp->src); 278 pf_state_peer_hton(&st->dst, &sp->dst); 279 280 if (st->rule.ptr == NULL) 281 sp->rule = htonl(-1); 282 else 283 sp->rule = htonl(st->rule.ptr->nr); 284 if (st->anchor.ptr == NULL) 285 sp->anchor = htonl(-1); 286 else 287 sp->anchor = htonl(st->anchor.ptr->nr); 288 if (st->nat_rule.ptr == NULL) 289 sp->nat_rule = htonl(-1); 290 else 291 sp->nat_rule = htonl(st->nat_rule.ptr->nr); 292 293 pf_state_counter_hton(st->packets[0], sp->packets[0]); 294 pf_state_counter_hton(st->packets[1], sp->packets[1]); 295 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 296 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 297 298 } 299 300 int 301 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 302 { 303 struct pf_state *st = NULL; 304 struct pf_state_key *skw = NULL, *sks = NULL; 305 struct pf_rule *r = NULL; 306 struct pfi_kif *kif; 307 int pool_flags; 308 int error; 309 310 if (sp->creatorid == 0 && pf_status.debug >= PF_DEBUG_MISC) { 311 kprintf("pfsync_insert_net_state: invalid creator id:" 312 " %08x\n", ntohl(sp->creatorid)); 313 return (EINVAL); 314 } 315 316 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 317 if (pf_status.debug >= PF_DEBUG_MISC) 318 kprintf("pfsync_insert_net_state: " 319 "unknown interface: %s\n", sp->ifname); 320 if (flags & PFSYNC_SI_IOCTL) 321 return (EINVAL); 322 return (0); /* skip this state */ 323 } 324 325 /* 326 * If the ruleset checksums match or the state is coming from the ioctl, 327 * it's safe to associate the state with the rule of that number. 328 */ 329 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 330 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 331 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 332 r = pf_main_ruleset.rules[ 333 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 334 else 335 r = &pf_default_rule; 336 337 if ((r->max_states && r->states_cur >= r->max_states)) 338 goto cleanup; 339 340 if (flags & PFSYNC_SI_IOCTL) 341 pool_flags = M_WAITOK | M_NULLOK | M_ZERO; 342 else 343 pool_flags = M_WAITOK | M_ZERO; 344 345 if ((st = kmalloc(sizeof(struct pf_state), M_PFSYNC, pool_flags)) == NULL) 346 goto cleanup; 347 348 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 349 goto cleanup; 350 351 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 352 &sp->key[PF_SK_STACK].addr[0], sp->af) || 353 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 354 &sp->key[PF_SK_STACK].addr[1], sp->af) || 355 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 356 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1]) { 357 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 358 goto cleanup; 359 } else 360 sks = skw; 361 362 /* allocate memory for scrub info */ 363 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 364 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 365 goto cleanup; 366 367 /* copy to state key(s) */ 368 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 369 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 370 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 371 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 372 skw->proto = sp->proto; 373 skw->af = sp->af; 374 if (sks != skw) { 375 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 376 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 377 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 378 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 379 sks->proto = sp->proto; 380 sks->af = sp->af; 381 } 382 383 /* copy to state */ 384 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 385 st->creation = time_second - ntohl(sp->creation); 386 st->expire = time_second; 387 if (sp->expire) { 388 /* XXX No adaptive scaling. */ 389 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 390 } 391 392 st->expire = ntohl(sp->expire) + time_second; 393 st->direction = sp->direction; 394 st->log = sp->log; 395 st->timeout = sp->timeout; 396 st->state_flags = sp->state_flags; 397 if (!(flags & PFSYNC_SI_IOCTL)) 398 st->sync_flags = PFSTATE_FROMSYNC; 399 400 bcopy(sp->id, &st->id, sizeof(st->id)); 401 st->creatorid = sp->creatorid; 402 pf_state_peer_ntoh(&sp->src, &st->src); 403 pf_state_peer_ntoh(&sp->dst, &st->dst); 404 405 st->rule.ptr = r; 406 st->nat_rule.ptr = NULL; 407 st->anchor.ptr = NULL; 408 st->rt_kif = NULL; 409 410 st->pfsync_time = 0; 411 412 413 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 414 r->states_cur++; 415 r->states_tot++; 416 417 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) { 418 /* XXX when we have nat_rule/anchors, use STATE_DEC_COUNTERS */ 419 r->states_cur--; 420 goto cleanup_state; 421 } 422 423 return (0); 424 425 cleanup: 426 error = ENOMEM; 427 if (skw == sks) 428 sks = NULL; 429 if (skw != NULL) 430 kfree(skw, M_PFSYNC); 431 if (sks != NULL) 432 kfree(sks, M_PFSYNC); 433 434 cleanup_state: /* pf_state_insert frees the state keys */ 435 if (st) { 436 if (st->dst.scrub) 437 kfree(st->dst.scrub, M_PFSYNC); 438 if (st->src.scrub) 439 kfree(st->src.scrub, M_PFSYNC); 440 kfree(st, M_PFSYNC); 441 } 442 return (error); 443 } 444 445 void 446 pfsync_input(struct mbuf *m, ...) 447 { 448 struct ip *ip = mtod(m, struct ip *); 449 struct pfsync_header *ph; 450 struct pfsync_softc *sc = pfsyncif; 451 struct pf_state *st; 452 struct pf_state_key *sk; 453 struct pf_state_item *si; 454 struct pf_state_cmp id_key; 455 struct pfsync_state *sp; 456 struct pfsync_state_upd *up; 457 struct pfsync_state_del *dp; 458 struct pfsync_state_clr *cp; 459 struct pfsync_state_upd_req *rup; 460 struct pfsync_state_bus *bus; 461 #ifdef IPSEC 462 struct pfsync_tdb *pt; 463 #endif 464 struct in_addr src; 465 struct mbuf *mp; 466 int iplen, action, error, i, count, offp, sfail, stale = 0; 467 u_int8_t flags = 0; 468 469 /* This function is not yet called from anywhere */ 470 /* Still we assume for safety that pf_token must be held */ 471 ASSERT_LWKT_TOKEN_HELD(&pf_token); 472 473 pfsyncstats.pfsyncs_ipackets++; 474 475 /* verify that we have a sync interface configured */ 476 if (!sc || !sc->sc_sync_ifp || !pf_status.running) 477 goto done; 478 479 /* verify that the packet came in on the right interface */ 480 if (sc->sc_sync_ifp != m->m_pkthdr.rcvif) { 481 pfsyncstats.pfsyncs_badif++; 482 goto done; 483 } 484 485 /* verify that the IP TTL is 255. */ 486 if (ip->ip_ttl != PFSYNC_DFLTTL) { 487 pfsyncstats.pfsyncs_badttl++; 488 goto done; 489 } 490 491 iplen = ip->ip_hl << 2; 492 493 if (m->m_pkthdr.len < iplen + sizeof(*ph)) { 494 pfsyncstats.pfsyncs_hdrops++; 495 goto done; 496 } 497 498 if (iplen + sizeof(*ph) > m->m_len) { 499 if ((m = m_pullup(m, iplen + sizeof(*ph))) == NULL) { 500 pfsyncstats.pfsyncs_hdrops++; 501 goto done; 502 } 503 ip = mtod(m, struct ip *); 504 } 505 ph = (struct pfsync_header *)((char *)ip + iplen); 506 507 /* verify the version */ 508 if (ph->version != PFSYNC_VERSION) { 509 pfsyncstats.pfsyncs_badver++; 510 goto done; 511 } 512 513 action = ph->action; 514 count = ph->count; 515 516 /* make sure it's a valid action code */ 517 if (action >= PFSYNC_ACT_MAX) { 518 pfsyncstats.pfsyncs_badact++; 519 goto done; 520 } 521 522 /* Cheaper to grab this now than having to mess with mbufs later */ 523 src = ip->ip_src; 524 525 if (!bcmp(&ph->pf_chksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 526 flags |= PFSYNC_SI_CKSUM; 527 528 switch (action) { 529 case PFSYNC_ACT_CLR: { 530 struct pf_state *nexts; 531 struct pf_state_key *nextsk; 532 struct pfi_kif *kif; 533 u_int32_t creatorid; 534 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 535 sizeof(*cp), &offp)) == NULL) { 536 pfsyncstats.pfsyncs_badlen++; 537 return; 538 } 539 cp = (struct pfsync_state_clr *)(mp->m_data + offp); 540 creatorid = cp->creatorid; 541 542 crit_enter(); 543 if (cp->ifname[0] == '\0') { 544 for (st = RB_MIN(pf_state_tree_id, &tree_id); 545 st; st = nexts) { 546 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 547 if (st->creatorid == creatorid) { 548 st->sync_flags |= PFSTATE_FROMSYNC; 549 pf_unlink_state(st); 550 } 551 } 552 } else { 553 if ((kif = pfi_kif_get(cp->ifname)) == NULL) { 554 crit_exit(); 555 return; 556 } 557 /* XXX correct? */ 558 for (sk = RB_MIN(pf_state_tree, 559 &pf_statetbl); sk; sk = nextsk) { 560 nextsk = RB_NEXT(pf_state_tree, 561 &pf_statetbl, sk); 562 TAILQ_FOREACH(si, &sk->states, entry) { 563 if (si->s->creatorid == creatorid) { 564 si->s->sync_flags |= 565 PFSTATE_FROMSYNC; 566 pf_unlink_state(si->s); 567 } 568 } 569 } 570 } 571 crit_exit(); 572 573 break; 574 } 575 case PFSYNC_ACT_INS: 576 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 577 count * sizeof(*sp), &offp)) == NULL) { 578 pfsyncstats.pfsyncs_badlen++; 579 return; 580 } 581 582 crit_enter(); 583 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 584 i < count; i++, sp++) { 585 /* check for invalid values */ 586 if (sp->timeout >= PFTM_MAX || 587 sp->src.state > PF_TCPS_PROXY_DST || 588 sp->dst.state > PF_TCPS_PROXY_DST || 589 sp->direction > PF_OUT || 590 (sp->af != AF_INET && sp->af != AF_INET6)) { 591 if (pf_status.debug >= PF_DEBUG_MISC) 592 kprintf("pfsync_insert: PFSYNC_ACT_INS: " 593 "invalid value\n"); 594 pfsyncstats.pfsyncs_badval++; 595 continue; 596 } 597 598 if ((error = pfsync_state_import(sp, flags))) { 599 if (error == ENOMEM) { 600 crit_exit(); 601 goto done; 602 } 603 continue; 604 } 605 } 606 crit_exit(); 607 break; 608 case PFSYNC_ACT_UPD: 609 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 610 count * sizeof(*sp), &offp)) == NULL) { 611 pfsyncstats.pfsyncs_badlen++; 612 return; 613 } 614 615 crit_enter(); 616 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 617 i < count; i++, sp++) { 618 int flags = PFSYNC_FLAG_STALE; 619 620 /* check for invalid values */ 621 if (sp->timeout >= PFTM_MAX || 622 sp->src.state > PF_TCPS_PROXY_DST || 623 sp->dst.state > PF_TCPS_PROXY_DST) { 624 if (pf_status.debug >= PF_DEBUG_MISC) 625 kprintf("pfsync_insert: PFSYNC_ACT_UPD: " 626 "invalid value\n"); 627 pfsyncstats.pfsyncs_badval++; 628 continue; 629 } 630 631 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 632 id_key.creatorid = sp->creatorid; 633 634 st = pf_find_state_byid(&id_key); 635 if (st == NULL) { 636 /* insert the update */ 637 if (pfsync_state_import(sp, flags)) 638 pfsyncstats.pfsyncs_badstate++; 639 continue; 640 } 641 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 642 sfail = 0; 643 if (sk->proto == IPPROTO_TCP) { 644 /* 645 * The state should never go backwards except 646 * for syn-proxy states. Neither should the 647 * sequence window slide backwards. 648 */ 649 if (st->src.state > sp->src.state && 650 (st->src.state < PF_TCPS_PROXY_SRC || 651 sp->src.state >= PF_TCPS_PROXY_SRC)) 652 sfail = 1; 653 else if (SEQ_GT(st->src.seqlo, 654 ntohl(sp->src.seqlo))) 655 sfail = 3; 656 else if (st->dst.state > sp->dst.state) { 657 /* There might still be useful 658 * information about the src state here, 659 * so import that part of the update, 660 * then "fail" so we send the updated 661 * state back to the peer who is missing 662 * our what we know. */ 663 pf_state_peer_ntoh(&sp->src, &st->src); 664 /* XXX do anything with timeouts? */ 665 sfail = 7; 666 flags = 0; 667 } else if (st->dst.state >= TCPS_SYN_SENT && 668 SEQ_GT(st->dst.seqlo, ntohl(sp->dst.seqlo))) 669 sfail = 4; 670 } else { 671 /* 672 * Non-TCP protocol state machine always go 673 * forwards 674 */ 675 if (st->src.state > sp->src.state) 676 sfail = 5; 677 else if (st->dst.state > sp->dst.state) 678 sfail = 6; 679 } 680 if (sfail) { 681 if (pf_status.debug >= PF_DEBUG_MISC) 682 kprintf("pfsync: %s stale update " 683 "(%d) id: %016jx " 684 "creatorid: %08x\n", 685 (sfail < 7 ? "ignoring" 686 : "partial"), sfail, 687 (uintmax_t)be64toh(st->id), 688 ntohl(st->creatorid)); 689 pfsyncstats.pfsyncs_stale++; 690 691 if (!(sp->sync_flags & PFSTATE_STALE)) { 692 /* we have a better state, send it */ 693 if (sc->sc_mbuf != NULL && !stale) 694 pfsync_sendout(sc); 695 stale++; 696 if (!st->sync_flags) 697 pfsync_pack_state( 698 PFSYNC_ACT_UPD, st, flags); 699 } 700 continue; 701 } 702 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 703 pf_state_peer_ntoh(&sp->src, &st->src); 704 pf_state_peer_ntoh(&sp->dst, &st->dst); 705 st->expire = ntohl(sp->expire) + time_second; 706 st->timeout = sp->timeout; 707 } 708 if (stale && sc->sc_mbuf != NULL) 709 pfsync_sendout(sc); 710 crit_exit(); 711 break; 712 /* 713 * It's not strictly necessary for us to support the "uncompressed" 714 * delete action, but it's relatively simple and maintains consistency. 715 */ 716 case PFSYNC_ACT_DEL: 717 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 718 count * sizeof(*sp), &offp)) == NULL) { 719 pfsyncstats.pfsyncs_badlen++; 720 return; 721 } 722 723 crit_enter(); 724 for (i = 0, sp = (struct pfsync_state *)(mp->m_data + offp); 725 i < count; i++, sp++) { 726 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 727 id_key.creatorid = sp->creatorid; 728 729 st = pf_find_state_byid(&id_key); 730 if (st == NULL) { 731 pfsyncstats.pfsyncs_badstate++; 732 continue; 733 } 734 st->sync_flags |= PFSTATE_FROMSYNC; 735 pf_unlink_state(st); 736 } 737 crit_exit(); 738 break; 739 case PFSYNC_ACT_UPD_C: { 740 int update_requested = 0; 741 742 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 743 count * sizeof(*up), &offp)) == NULL) { 744 pfsyncstats.pfsyncs_badlen++; 745 return; 746 } 747 748 crit_enter(); 749 for (i = 0, up = (struct pfsync_state_upd *)(mp->m_data + offp); 750 i < count; i++, up++) { 751 /* check for invalid values */ 752 if (up->timeout >= PFTM_MAX || 753 up->src.state > PF_TCPS_PROXY_DST || 754 up->dst.state > PF_TCPS_PROXY_DST) { 755 if (pf_status.debug >= PF_DEBUG_MISC) 756 kprintf("pfsync_insert: " 757 "PFSYNC_ACT_UPD_C: " 758 "invalid value\n"); 759 pfsyncstats.pfsyncs_badval++; 760 continue; 761 } 762 763 bcopy(up->id, &id_key.id, sizeof(id_key.id)); 764 id_key.creatorid = up->creatorid; 765 766 st = pf_find_state_byid(&id_key); 767 if (st == NULL) { 768 /* We don't have this state. Ask for it. */ 769 error = pfsync_request_update(up, &src); 770 if (error == ENOMEM) { 771 crit_exit(); 772 goto done; 773 } 774 update_requested = 1; 775 pfsyncstats.pfsyncs_badstate++; 776 continue; 777 } 778 sk = st->key[PF_SK_WIRE]; /* XXX right one? */ 779 sfail = 0; 780 if (sk->proto == IPPROTO_TCP) { 781 /* 782 * The state should never go backwards except 783 * for syn-proxy states. Neither should the 784 * sequence window slide backwards. 785 */ 786 if (st->src.state > up->src.state && 787 (st->src.state < PF_TCPS_PROXY_SRC || 788 up->src.state >= PF_TCPS_PROXY_SRC)) 789 sfail = 1; 790 else if (st->dst.state > up->dst.state) 791 sfail = 2; 792 else if (SEQ_GT(st->src.seqlo, 793 ntohl(up->src.seqlo))) 794 sfail = 3; 795 else if (st->dst.state >= TCPS_SYN_SENT && 796 SEQ_GT(st->dst.seqlo, ntohl(up->dst.seqlo))) 797 sfail = 4; 798 } else { 799 /* 800 * Non-TCP protocol state machine always go 801 * forwards 802 */ 803 if (st->src.state > up->src.state) 804 sfail = 5; 805 else if (st->dst.state > up->dst.state) 806 sfail = 6; 807 } 808 if (sfail) { 809 if (pf_status.debug >= PF_DEBUG_MISC) 810 kprintf("pfsync: ignoring stale update " 811 "(%d) id: %016" PRIx64 " " 812 "creatorid: %08x\n", sfail, 813 be64toh(st->id), 814 ntohl(st->creatorid)); 815 pfsyncstats.pfsyncs_stale++; 816 817 /* we have a better state, send it out */ 818 if ((!stale || update_requested) && 819 sc->sc_mbuf != NULL) { 820 pfsync_sendout(sc); 821 update_requested = 0; 822 } 823 stale++; 824 if (!st->sync_flags) 825 pfsync_pack_state(PFSYNC_ACT_UPD, st, 826 PFSYNC_FLAG_STALE); 827 continue; 828 } 829 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 830 pf_state_peer_ntoh(&up->src, &st->src); 831 pf_state_peer_ntoh(&up->dst, &st->dst); 832 st->expire = ntohl(up->expire) + time_second; 833 st->timeout = up->timeout; 834 } 835 if ((update_requested || stale) && sc->sc_mbuf) 836 pfsync_sendout(sc); 837 crit_exit(); 838 break; 839 } 840 case PFSYNC_ACT_DEL_C: 841 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 842 count * sizeof(*dp), &offp)) == NULL) { 843 pfsyncstats.pfsyncs_badlen++; 844 return; 845 } 846 847 crit_enter(); 848 for (i = 0, dp = (struct pfsync_state_del *)(mp->m_data + offp); 849 i < count; i++, dp++) { 850 bcopy(dp->id, &id_key.id, sizeof(id_key.id)); 851 id_key.creatorid = dp->creatorid; 852 853 st = pf_find_state_byid(&id_key); 854 if (st == NULL) { 855 pfsyncstats.pfsyncs_badstate++; 856 continue; 857 } 858 st->sync_flags |= PFSTATE_FROMSYNC; 859 pf_unlink_state(st); 860 } 861 crit_exit(); 862 break; 863 case PFSYNC_ACT_INS_F: 864 case PFSYNC_ACT_DEL_F: 865 /* not implemented */ 866 break; 867 case PFSYNC_ACT_UREQ: 868 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 869 count * sizeof(*rup), &offp)) == NULL) { 870 pfsyncstats.pfsyncs_badlen++; 871 return; 872 } 873 874 crit_enter(); 875 if (sc->sc_mbuf != NULL) 876 pfsync_sendout(sc); 877 for (i = 0, 878 rup = (struct pfsync_state_upd_req *)(mp->m_data + offp); 879 i < count; i++, rup++) { 880 bcopy(rup->id, &id_key.id, sizeof(id_key.id)); 881 id_key.creatorid = rup->creatorid; 882 883 if (id_key.id == 0 && id_key.creatorid == 0) { 884 sc->sc_ureq_received = mycpu->gd_time_seconds; 885 if (sc->sc_bulk_send_next == NULL) 886 sc->sc_bulk_send_next = 887 TAILQ_FIRST(&state_list); 888 sc->sc_bulk_terminator = sc->sc_bulk_send_next; 889 if (pf_status.debug >= PF_DEBUG_MISC) 890 kprintf("pfsync: received " 891 "bulk update request\n"); 892 pfsync_send_bus(sc, PFSYNC_BUS_START); 893 lwkt_reltoken(&pf_token); 894 callout_init(&sc->sc_bulk_tmo); 895 lwkt_gettoken(&pf_token); 896 } else { 897 st = pf_find_state_byid(&id_key); 898 if (st == NULL) { 899 pfsyncstats.pfsyncs_badstate++; 900 continue; 901 } 902 if (!st->sync_flags) 903 pfsync_pack_state(PFSYNC_ACT_UPD, 904 st, 0); 905 } 906 } 907 if (sc->sc_mbuf != NULL) 908 pfsync_sendout(sc); 909 crit_exit(); 910 break; 911 case PFSYNC_ACT_BUS: 912 /* If we're not waiting for a bulk update, who cares. */ 913 if (sc->sc_ureq_sent == 0) 914 break; 915 916 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 917 sizeof(*bus), &offp)) == NULL) { 918 pfsyncstats.pfsyncs_badlen++; 919 return; 920 } 921 bus = (struct pfsync_state_bus *)(mp->m_data + offp); 922 switch (bus->status) { 923 case PFSYNC_BUS_START: 924 lwkt_reltoken(&pf_token); 925 callout_reset(&sc->sc_bulkfail_tmo, 926 pf_pool_limits[PF_LIMIT_STATES].limit / 927 (PFSYNC_BULKPACKETS * sc->sc_maxcount), 928 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 929 lwkt_gettoken(&pf_token); 930 if (pf_status.debug >= PF_DEBUG_MISC) 931 kprintf("pfsync: received bulk " 932 "update start\n"); 933 break; 934 case PFSYNC_BUS_END: 935 if (mycpu->gd_time_seconds - ntohl(bus->endtime) >= 936 sc->sc_ureq_sent) { 937 /* that's it, we're happy */ 938 sc->sc_ureq_sent = 0; 939 sc->sc_bulk_tries = 0; 940 lwkt_reltoken(&pf_token); 941 callout_stop(&sc->sc_bulkfail_tmo); 942 lwkt_gettoken(&pf_token); 943 #if NCARP > 0 944 if (!pfsync_sync_ok) { 945 lwkt_reltoken(&pf_token); 946 carp_group_demote_adj(&sc->sc_if, -1); 947 lwkt_gettoken(&pf_token); 948 } 949 #endif 950 pfsync_sync_ok = 1; 951 if (pf_status.debug >= PF_DEBUG_MISC) 952 kprintf("pfsync: received valid " 953 "bulk update end\n"); 954 } else { 955 if (pf_status.debug >= PF_DEBUG_MISC) 956 kprintf("pfsync: received invalid " 957 "bulk update end: bad timestamp\n"); 958 } 959 break; 960 } 961 break; 962 #ifdef IPSEC 963 case PFSYNC_ACT_TDB_UPD: 964 if ((mp = m_pulldown(m, iplen + sizeof(*ph), 965 count * sizeof(*pt), &offp)) == NULL) { 966 pfsyncstats.pfsyncs_badlen++; 967 return; 968 } 969 crit_enter(); 970 for (i = 0, pt = (struct pfsync_tdb *)(mp->m_data + offp); 971 i < count; i++, pt++) 972 pfsync_update_net_tdb(pt); 973 crit_exit(); 974 break; 975 #endif 976 } 977 978 done: 979 if (m) 980 m_freem(m); 981 } 982 983 int 984 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 985 struct rtentry *rt) 986 { 987 m_freem(m); 988 return (0); 989 } 990 991 /* ARGSUSED */ 992 int 993 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data, struct ucred *cr) 994 { 995 struct pfsync_softc *sc = ifp->if_softc; 996 struct ifreq *ifr = (struct ifreq *)data; 997 struct ip_moptions *imo = &sc->sc_imo; 998 struct pfsyncreq pfsyncr; 999 struct ifnet *sifp; 1000 int error; 1001 1002 lwkt_gettoken(&pf_token); 1003 1004 switch (cmd) { 1005 case SIOCSIFADDR: 1006 case SIOCAIFADDR: 1007 case SIOCSIFDSTADDR: 1008 case SIOCSIFFLAGS: 1009 if (ifp->if_flags & IFF_UP) 1010 ifp->if_flags |= IFF_RUNNING; 1011 else 1012 ifp->if_flags &= ~IFF_RUNNING; 1013 break; 1014 case SIOCSIFMTU: 1015 if (ifr->ifr_mtu < PFSYNC_MINMTU) { 1016 lwkt_reltoken(&pf_token); 1017 return (EINVAL); 1018 } 1019 if (ifr->ifr_mtu > MCLBYTES) 1020 ifr->ifr_mtu = MCLBYTES; 1021 crit_enter(); 1022 if (ifr->ifr_mtu < ifp->if_mtu) 1023 pfsync_sendout(sc); 1024 pfsync_setmtu(sc, ifr->ifr_mtu); 1025 crit_exit(); 1026 break; 1027 case SIOCGETPFSYNC: 1028 bzero(&pfsyncr, sizeof(pfsyncr)); 1029 if (sc->sc_sync_ifp) 1030 strlcpy(pfsyncr.pfsyncr_syncdev, 1031 sc->sc_sync_ifp->if_xname, IFNAMSIZ); 1032 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1033 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1034 lwkt_reltoken(&pf_token); 1035 if ((error = copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr)))) 1036 return (error); 1037 lwkt_gettoken(&pf_token); 1038 break; 1039 case SIOCSETPFSYNC: 1040 if ((error = priv_check_cred(cr, PRIV_ROOT, NULL_CRED_OKAY)) != 0) { 1041 lwkt_reltoken(&pf_token); 1042 return (error); 1043 } 1044 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) { 1045 lwkt_reltoken(&pf_token); 1046 return (error); 1047 } 1048 1049 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1050 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1051 else 1052 sc->sc_sync_peer.s_addr = 1053 pfsyncr.pfsyncr_syncpeer.s_addr; 1054 1055 if (pfsyncr.pfsyncr_maxupdates > 255) { 1056 lwkt_reltoken(&pf_token); 1057 return (EINVAL); 1058 } 1059 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1060 1061 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1062 sc->sc_sync_ifp = NULL; 1063 if (sc->sc_mbuf_net != NULL) { 1064 /* Don't keep stale pfsync packets around. */ 1065 crit_enter(); 1066 m_freem(sc->sc_mbuf_net); 1067 sc->sc_mbuf_net = NULL; 1068 sc->sc_statep_net.s = NULL; 1069 crit_exit(); 1070 } 1071 if (imo->imo_num_memberships > 0) { 1072 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1073 imo->imo_multicast_ifp = NULL; 1074 } 1075 break; 1076 } 1077 1078 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1079 lwkt_reltoken(&pf_token); 1080 return (EINVAL); 1081 } 1082 1083 crit_enter(); 1084 if (sifp->if_mtu < sc->sc_if.if_mtu || 1085 (sc->sc_sync_ifp != NULL && 1086 sifp->if_mtu < sc->sc_sync_ifp->if_mtu) || 1087 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1088 pfsync_sendout(sc); 1089 sc->sc_sync_ifp = sifp; 1090 1091 pfsync_setmtu(sc, sc->sc_if.if_mtu); 1092 1093 if (imo->imo_num_memberships > 0) { 1094 pfsync_in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1095 imo->imo_multicast_ifp = NULL; 1096 } 1097 1098 if (sc->sc_sync_ifp && 1099 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1100 if (!(sc->sc_sync_ifp->if_flags & IFF_MULTICAST)) { 1101 sc->sc_sync_ifp = NULL; 1102 lwkt_reltoken(&pf_token); 1103 crit_exit(); 1104 return (EADDRNOTAVAIL); 1105 } 1106 1107 if ((imo->imo_membership[0] = 1108 pfsync_in_addmulti(sc->sc_sync_ifp)) == NULL) { 1109 sc->sc_sync_ifp = NULL; 1110 lwkt_reltoken(&pf_token); 1111 crit_exit(); 1112 return (ENOBUFS); 1113 } 1114 imo->imo_num_memberships++; 1115 imo->imo_multicast_ifp = sc->sc_sync_ifp; 1116 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1117 imo->imo_multicast_loop = 0; 1118 } 1119 1120 if (sc->sc_sync_ifp || 1121 sc->sc_sendaddr.s_addr != INADDR_PFSYNC_GROUP) { 1122 /* Request a full state table update. */ 1123 sc->sc_ureq_sent = mycpu->gd_time_seconds; 1124 #if NCARP > 0 1125 if (pfsync_sync_ok) 1126 carp_group_demote_adj(&sc->sc_if, 1); 1127 #endif 1128 pfsync_sync_ok = 0; 1129 if (pf_status.debug >= PF_DEBUG_MISC) 1130 kprintf("pfsync: requesting bulk update\n"); 1131 lwkt_reltoken(&pf_token); 1132 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 1133 pfsync_bulkfail, LIST_FIRST(&pfsync_list)); 1134 lwkt_gettoken(&pf_token); 1135 error = pfsync_request_update(NULL, NULL); 1136 if (error == ENOMEM) { 1137 lwkt_reltoken(&pf_token); 1138 crit_exit(); 1139 return (ENOMEM); 1140 } 1141 pfsync_sendout(sc); 1142 } 1143 crit_exit(); 1144 1145 break; 1146 1147 default: 1148 lwkt_reltoken(&pf_token); 1149 return (ENOTTY); 1150 } 1151 1152 lwkt_reltoken(&pf_token); 1153 return (0); 1154 } 1155 1156 void 1157 pfsync_setmtu(struct pfsync_softc *sc, int mtu_req) 1158 { 1159 int mtu; 1160 1161 if (sc->sc_sync_ifp && sc->sc_sync_ifp->if_mtu < mtu_req) 1162 mtu = sc->sc_sync_ifp->if_mtu; 1163 else 1164 mtu = mtu_req; 1165 1166 sc->sc_maxcount = (mtu - sizeof(struct pfsync_header)) / 1167 sizeof(struct pfsync_state); 1168 if (sc->sc_maxcount > 254) 1169 sc->sc_maxcount = 254; 1170 sc->sc_if.if_mtu = sizeof(struct pfsync_header) + 1171 sc->sc_maxcount * sizeof(struct pfsync_state); 1172 } 1173 1174 struct mbuf * 1175 pfsync_get_mbuf(struct pfsync_softc *sc, u_int8_t action, void **sp) 1176 { 1177 struct pfsync_header *h; 1178 struct mbuf *m; 1179 int len; 1180 1181 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1182 1183 MGETHDR(m, M_WAITOK, MT_DATA); 1184 if (m == NULL) { 1185 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1186 return (NULL); 1187 } 1188 1189 switch (action) { 1190 case PFSYNC_ACT_CLR: 1191 len = sizeof(struct pfsync_header) + 1192 sizeof(struct pfsync_state_clr); 1193 break; 1194 case PFSYNC_ACT_UPD_C: 1195 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd)) + 1196 sizeof(struct pfsync_header); 1197 break; 1198 case PFSYNC_ACT_DEL_C: 1199 len = (sc->sc_maxcount * sizeof(struct pfsync_state_del)) + 1200 sizeof(struct pfsync_header); 1201 break; 1202 case PFSYNC_ACT_UREQ: 1203 len = (sc->sc_maxcount * sizeof(struct pfsync_state_upd_req)) + 1204 sizeof(struct pfsync_header); 1205 break; 1206 case PFSYNC_ACT_BUS: 1207 len = sizeof(struct pfsync_header) + 1208 sizeof(struct pfsync_state_bus); 1209 break; 1210 case PFSYNC_ACT_TDB_UPD: 1211 len = (sc->sc_maxcount * sizeof(struct pfsync_tdb)) + 1212 sizeof(struct pfsync_header); 1213 break; 1214 default: 1215 len = (sc->sc_maxcount * sizeof(struct pfsync_state)) + 1216 sizeof(struct pfsync_header); 1217 break; 1218 } 1219 1220 if (len > MHLEN) { 1221 MCLGET(m, M_WAITOK); 1222 if ((m->m_flags & M_EXT) == 0) { 1223 m_free(m); 1224 IFNET_STAT_INC(&sc->sc_if, oerrors, 1); 1225 return (NULL); 1226 } 1227 m->m_data += (MCLBYTES - len) &~ (sizeof(long) - 1); 1228 } else 1229 MH_ALIGN(m, len); 1230 1231 m->m_pkthdr.rcvif = NULL; 1232 m->m_pkthdr.len = m->m_len = sizeof(struct pfsync_header); 1233 h = mtod(m, struct pfsync_header *); 1234 h->version = PFSYNC_VERSION; 1235 h->af = 0; 1236 h->count = 0; 1237 h->action = action; 1238 1239 *sp = (void *)((char *)h + PFSYNC_HDRLEN); 1240 lwkt_reltoken(&pf_token); 1241 callout_reset(&sc->sc_tmo, hz, pfsync_timeout, 1242 LIST_FIRST(&pfsync_list)); 1243 lwkt_gettoken(&pf_token); 1244 return (m); 1245 } 1246 1247 int 1248 pfsync_pack_state(u_int8_t action, struct pf_state *st, int flags) 1249 { 1250 struct ifnet *ifp = NULL; 1251 struct pfsync_softc *sc = pfsyncif; 1252 struct pfsync_header *h, *h_net; 1253 struct pfsync_state *sp = NULL; 1254 struct pfsync_state_upd *up = NULL; 1255 struct pfsync_state_del *dp = NULL; 1256 int ret = 0; 1257 u_int8_t i = 255, newaction = 0; 1258 1259 if (sc == NULL) 1260 return (0); 1261 ifp = &sc->sc_if; 1262 1263 /* 1264 * If a packet falls in the forest and there's nobody around to 1265 * hear, does it make a sound? 1266 */ 1267 if (ifp->if_bpf == NULL && sc->sc_sync_ifp == NULL && 1268 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1269 /* Don't leave any stale pfsync packets hanging around. */ 1270 if (sc->sc_mbuf != NULL) { 1271 m_freem(sc->sc_mbuf); 1272 sc->sc_mbuf = NULL; 1273 sc->sc_statep.s = NULL; 1274 } 1275 return (0); 1276 } 1277 1278 if (action >= PFSYNC_ACT_MAX) 1279 return (EINVAL); 1280 1281 crit_enter(); 1282 if (sc->sc_mbuf == NULL) { 1283 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1284 (void *)&sc->sc_statep.s)) == NULL) { 1285 crit_exit(); 1286 return (ENOMEM); 1287 } 1288 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1289 } else { 1290 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1291 if (h->action != action) { 1292 pfsync_sendout(sc); 1293 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, action, 1294 (void *)&sc->sc_statep.s)) == NULL) { 1295 crit_exit(); 1296 return (ENOMEM); 1297 } 1298 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1299 } else { 1300 /* 1301 * If it's an update, look in the packet to see if 1302 * we already have an update for the state. 1303 */ 1304 if (action == PFSYNC_ACT_UPD && sc->sc_maxupdates) { 1305 struct pfsync_state *usp = 1306 (void *)((char *)h + PFSYNC_HDRLEN); 1307 1308 for (i = 0; i < h->count; i++) { 1309 if (!memcmp(usp->id, &st->id, 1310 PFSYNC_ID_LEN) && 1311 usp->creatorid == st->creatorid) { 1312 sp = usp; 1313 sp->updates++; 1314 break; 1315 } 1316 usp++; 1317 } 1318 } 1319 } 1320 } 1321 1322 st->pfsync_time = mycpu->gd_time_seconds; 1323 1324 if (sp == NULL) { 1325 /* not a "duplicate" update */ 1326 i = 255; 1327 sp = sc->sc_statep.s++; 1328 sc->sc_mbuf->m_pkthdr.len = 1329 sc->sc_mbuf->m_len += sizeof(struct pfsync_state); 1330 h->count++; 1331 bzero(sp, sizeof(*sp)); 1332 1333 pfsync_state_export(sp, st); 1334 1335 if (flags & PFSYNC_FLAG_STALE) 1336 sp->sync_flags |= PFSTATE_STALE; 1337 } else { 1338 pf_state_peer_hton(&st->src, &sp->src); 1339 pf_state_peer_hton(&st->dst, &sp->dst); 1340 1341 if (st->expire <= time_second) 1342 sp->expire = htonl(0); 1343 else 1344 sp->expire = htonl(st->expire - time_second); 1345 } 1346 1347 /* do we need to build "compressed" actions for network transfer? */ 1348 if (sc->sc_sync_ifp && flags & PFSYNC_FLAG_COMPRESS) { 1349 switch (action) { 1350 case PFSYNC_ACT_UPD: 1351 newaction = PFSYNC_ACT_UPD_C; 1352 break; 1353 case PFSYNC_ACT_DEL: 1354 newaction = PFSYNC_ACT_DEL_C; 1355 break; 1356 default: 1357 /* by default we just send the uncompressed states */ 1358 break; 1359 } 1360 } 1361 1362 if (newaction) { 1363 if (sc->sc_mbuf_net == NULL) { 1364 if ((sc->sc_mbuf_net = pfsync_get_mbuf(sc, newaction, 1365 (void *)&sc->sc_statep_net.s)) == NULL) { 1366 crit_exit(); 1367 return (ENOMEM); 1368 } 1369 } 1370 h_net = mtod(sc->sc_mbuf_net, struct pfsync_header *); 1371 1372 switch (newaction) { 1373 case PFSYNC_ACT_UPD_C: 1374 if (i != 255) { 1375 up = (void *)((char *)h_net + 1376 PFSYNC_HDRLEN + (i * sizeof(*up))); 1377 up->updates++; 1378 } else { 1379 h_net->count++; 1380 sc->sc_mbuf_net->m_pkthdr.len = 1381 sc->sc_mbuf_net->m_len += sizeof(*up); 1382 up = sc->sc_statep_net.u++; 1383 1384 bzero(up, sizeof(*up)); 1385 bcopy(&st->id, up->id, sizeof(up->id)); 1386 up->creatorid = st->creatorid; 1387 } 1388 up->timeout = st->timeout; 1389 up->expire = sp->expire; 1390 up->src = sp->src; 1391 up->dst = sp->dst; 1392 break; 1393 case PFSYNC_ACT_DEL_C: 1394 sc->sc_mbuf_net->m_pkthdr.len = 1395 sc->sc_mbuf_net->m_len += sizeof(*dp); 1396 dp = sc->sc_statep_net.d++; 1397 h_net->count++; 1398 1399 bzero(dp, sizeof(*dp)); 1400 bcopy(&st->id, dp->id, sizeof(dp->id)); 1401 dp->creatorid = st->creatorid; 1402 break; 1403 } 1404 } 1405 1406 if (h->count == sc->sc_maxcount || 1407 (sc->sc_maxupdates && (sp->updates >= sc->sc_maxupdates))) 1408 ret = pfsync_sendout(sc); 1409 1410 crit_exit(); 1411 return (ret); 1412 } 1413 1414 int 1415 pfsync_request_update(struct pfsync_state_upd *up, struct in_addr *src) 1416 { 1417 struct pfsync_header *h; 1418 struct pfsync_softc *sc = pfsyncif; 1419 struct pfsync_state_upd_req *rup; 1420 int ret = 0; 1421 1422 if (sc == NULL) 1423 return (0); 1424 1425 if (sc->sc_mbuf == NULL) { 1426 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1427 (void *)&sc->sc_statep.s)) == NULL) 1428 return (ENOMEM); 1429 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1430 } else { 1431 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1432 if (h->action != PFSYNC_ACT_UREQ) { 1433 pfsync_sendout(sc); 1434 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_UREQ, 1435 (void *)&sc->sc_statep.s)) == NULL) 1436 return (ENOMEM); 1437 h = mtod(sc->sc_mbuf, struct pfsync_header *); 1438 } 1439 } 1440 1441 if (src != NULL) 1442 sc->sc_sendaddr = *src; 1443 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*rup); 1444 h->count++; 1445 rup = sc->sc_statep.r++; 1446 bzero(rup, sizeof(*rup)); 1447 if (up != NULL) { 1448 bcopy(up->id, rup->id, sizeof(rup->id)); 1449 rup->creatorid = up->creatorid; 1450 } 1451 1452 if (h->count == sc->sc_maxcount) 1453 ret = pfsync_sendout(sc); 1454 1455 return (ret); 1456 } 1457 1458 int 1459 pfsync_clear_states(u_int32_t creatorid, char *ifname) 1460 { 1461 struct pfsync_softc *sc = pfsyncif; 1462 struct pfsync_state_clr *cp; 1463 int ret; 1464 1465 if (sc == NULL) 1466 return (0); 1467 1468 crit_enter(); 1469 if (sc->sc_mbuf != NULL) 1470 pfsync_sendout(sc); 1471 if ((sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_CLR, 1472 (void *)&sc->sc_statep.c)) == NULL) { 1473 crit_exit(); 1474 return (ENOMEM); 1475 } 1476 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*cp); 1477 cp = sc->sc_statep.c; 1478 cp->creatorid = creatorid; 1479 if (ifname != NULL) 1480 strlcpy(cp->ifname, ifname, IFNAMSIZ); 1481 1482 ret = (pfsync_sendout(sc)); 1483 crit_exit(); 1484 return (ret); 1485 } 1486 1487 void 1488 pfsync_timeout(void *v) 1489 { 1490 struct pfsync_softc *sc = v; 1491 1492 crit_enter(); 1493 pfsync_sendout(sc); 1494 crit_exit(); 1495 } 1496 1497 void 1498 pfsync_send_bus(struct pfsync_softc *sc, u_int8_t status) 1499 { 1500 struct pfsync_state_bus *bus; 1501 1502 if (sc->sc_mbuf != NULL) 1503 pfsync_sendout(sc); 1504 1505 if (pfsync_sync_ok && 1506 (sc->sc_mbuf = pfsync_get_mbuf(sc, PFSYNC_ACT_BUS, 1507 (void *)&sc->sc_statep.b)) != NULL) { 1508 sc->sc_mbuf->m_pkthdr.len = sc->sc_mbuf->m_len += sizeof(*bus); 1509 bus = sc->sc_statep.b; 1510 bus->creatorid = pf_status.hostid; 1511 bus->status = status; 1512 bus->endtime = htonl(mycpu->gd_time_seconds - sc->sc_ureq_received); 1513 pfsync_sendout(sc); 1514 } 1515 } 1516 1517 void 1518 pfsync_bulk_update(void *v) 1519 { 1520 struct pfsync_softc *sc = v; 1521 int i = 0; 1522 struct pf_state *state; 1523 1524 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1525 1526 crit_enter(); 1527 if (sc->sc_mbuf != NULL) 1528 pfsync_sendout(sc); 1529 1530 /* 1531 * Grab at most PFSYNC_BULKPACKETS worth of states which have not 1532 * been sent since the latest request was made. 1533 */ 1534 state = sc->sc_bulk_send_next; 1535 if (state) 1536 do { 1537 /* send state update if syncable and not already sent */ 1538 if (!state->sync_flags 1539 && state->timeout < PFTM_MAX 1540 && state->pfsync_time <= sc->sc_ureq_received) { 1541 pfsync_pack_state(PFSYNC_ACT_UPD, state, 0); 1542 i++; 1543 } 1544 1545 /* figure next state to send */ 1546 state = TAILQ_NEXT(state, entry_list); 1547 1548 /* wrap to start of list if we hit the end */ 1549 if (!state) 1550 state = TAILQ_FIRST(&state_list); 1551 } while (i < sc->sc_maxcount * PFSYNC_BULKPACKETS && 1552 state != sc->sc_bulk_terminator); 1553 1554 if (!state || state == sc->sc_bulk_terminator) { 1555 /* we're done */ 1556 pfsync_send_bus(sc, PFSYNC_BUS_END); 1557 sc->sc_ureq_received = 0; 1558 sc->sc_bulk_send_next = NULL; 1559 sc->sc_bulk_terminator = NULL; 1560 lwkt_reltoken(&pf_token); 1561 callout_stop(&sc->sc_bulk_tmo); 1562 lwkt_gettoken(&pf_token); 1563 if (pf_status.debug >= PF_DEBUG_MISC) 1564 kprintf("pfsync: bulk update complete\n"); 1565 } else { 1566 /* look again for more in a bit */ 1567 lwkt_reltoken(&pf_token); 1568 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_timeout, 1569 LIST_FIRST(&pfsync_list)); 1570 lwkt_gettoken(&pf_token); 1571 sc->sc_bulk_send_next = state; 1572 } 1573 if (sc->sc_mbuf != NULL) 1574 pfsync_sendout(sc); 1575 crit_exit(); 1576 } 1577 1578 void 1579 pfsync_bulkfail(void *v) 1580 { 1581 struct pfsync_softc *sc = v; 1582 int error; 1583 1584 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1585 1586 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 1587 /* Try again in a bit */ 1588 lwkt_reltoken(&pf_token); 1589 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulkfail, 1590 LIST_FIRST(&pfsync_list)); 1591 lwkt_gettoken(&pf_token); 1592 crit_enter(); 1593 error = pfsync_request_update(NULL, NULL); 1594 if (error == ENOMEM) { 1595 if (pf_status.debug >= PF_DEBUG_MISC) 1596 kprintf("pfsync: cannot allocate mbufs for " 1597 "bulk update\n"); 1598 } else 1599 pfsync_sendout(sc); 1600 crit_exit(); 1601 } else { 1602 /* Pretend like the transfer was ok */ 1603 sc->sc_ureq_sent = 0; 1604 sc->sc_bulk_tries = 0; 1605 #if NCARP > 0 1606 if (!pfsync_sync_ok) 1607 carp_group_demote_adj(&sc->sc_if, -1); 1608 #endif 1609 pfsync_sync_ok = 1; 1610 if (pf_status.debug >= PF_DEBUG_MISC) 1611 kprintf("pfsync: failed to receive " 1612 "bulk update status\n"); 1613 lwkt_reltoken(&pf_token); 1614 callout_stop(&sc->sc_bulkfail_tmo); 1615 lwkt_gettoken(&pf_token); 1616 } 1617 } 1618 1619 /* This must be called in splnet() */ 1620 int 1621 pfsync_sendout(struct pfsync_softc *sc) 1622 { 1623 #if NBPFILTER > 0 1624 struct ifnet *ifp = &sc->sc_if; 1625 #endif 1626 struct mbuf *m; 1627 1628 ASSERT_LWKT_TOKEN_HELD(&pf_token); 1629 1630 lwkt_reltoken(&pf_token); 1631 callout_stop(&sc->sc_tmo); 1632 lwkt_gettoken(&pf_token); 1633 1634 if (sc->sc_mbuf == NULL) 1635 return (0); 1636 m = sc->sc_mbuf; 1637 sc->sc_mbuf = NULL; 1638 sc->sc_statep.s = NULL; 1639 1640 #if NBPFILTER > 0 1641 if (ifp->if_bpf) { 1642 bpf_gettoken(); 1643 if (ifp->if_bpf) 1644 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1645 bpf_reltoken(); 1646 } 1647 #endif 1648 1649 if (sc->sc_mbuf_net) { 1650 m_freem(m); 1651 m = sc->sc_mbuf_net; 1652 sc->sc_mbuf_net = NULL; 1653 sc->sc_statep_net.s = NULL; 1654 } 1655 1656 return pfsync_sendout_mbuf(sc, m); 1657 } 1658 1659 int 1660 pfsync_sendout_mbuf(struct pfsync_softc *sc, struct mbuf *m) 1661 { 1662 struct sockaddr sa; 1663 struct ip *ip; 1664 1665 if (sc->sc_sync_ifp || 1666 sc->sc_sync_peer.s_addr != INADDR_PFSYNC_GROUP) { 1667 M_PREPEND(m, sizeof(struct ip), M_WAITOK); 1668 if (m == NULL) { 1669 pfsyncstats.pfsyncs_onomem++; 1670 return (0); 1671 } 1672 ip = mtod(m, struct ip *); 1673 ip->ip_v = IPVERSION; 1674 ip->ip_hl = sizeof(*ip) >> 2; 1675 ip->ip_tos = IPTOS_LOWDELAY; 1676 ip->ip_len = htons(m->m_pkthdr.len); 1677 ip->ip_id = htons(ip_randomid()); 1678 ip->ip_off = htons(IP_DF); 1679 ip->ip_ttl = PFSYNC_DFLTTL; 1680 ip->ip_p = IPPROTO_PFSYNC; 1681 ip->ip_sum = 0; 1682 1683 bzero(&sa, sizeof(sa)); 1684 ip->ip_src.s_addr = INADDR_ANY; 1685 1686 if (sc->sc_sendaddr.s_addr == INADDR_PFSYNC_GROUP) 1687 m->m_flags |= M_MCAST; 1688 ip->ip_dst = sc->sc_sendaddr; 1689 sc->sc_sendaddr.s_addr = sc->sc_sync_peer.s_addr; 1690 1691 pfsyncstats.pfsyncs_opackets++; 1692 1693 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL)) 1694 pfsyncstats.pfsyncs_oerrors++; 1695 } else 1696 m_freem(m); 1697 1698 return (0); 1699 } 1700 1701 static int 1702 pfsync_modevent(module_t mod, int type, void *data) 1703 { 1704 int error = 0; 1705 1706 struct pfsync_softc *pfs_if, *tmp; 1707 1708 lwkt_gettoken(&pf_token); 1709 1710 switch (type) { 1711 case MOD_LOAD: 1712 LIST_INIT(&pfsync_list); 1713 lwkt_reltoken(&pf_token); 1714 if_clone_attach(&pfsync_cloner); 1715 lwkt_gettoken(&pf_token); 1716 /* Override the function pointer for pf_ioctl.c */ 1717 break; 1718 1719 case MOD_UNLOAD: 1720 lwkt_reltoken(&pf_token); 1721 if_clone_detach(&pfsync_cloner); 1722 lwkt_gettoken(&pf_token); 1723 LIST_FOREACH_MUTABLE(pfs_if, &pfsync_list, sc_next, tmp) { 1724 pfsync_clone_destroy(&pfs_if->sc_if); 1725 } 1726 break; 1727 1728 default: 1729 error = EINVAL; 1730 break; 1731 } 1732 1733 lwkt_reltoken(&pf_token); 1734 return error; 1735 } 1736 1737 static moduledata_t pfsync_mod = { 1738 "pfsync", 1739 pfsync_modevent, 1740 0 1741 }; 1742 1743 #define PFSYNC_MODVER 44 1744 1745 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 1746 MODULE_VERSION(pfsync, PFSYNC_MODVER); 1747 1748 static void 1749 pfsync_in_addmulti_dispatch(netmsg_t nmsg) 1750 { 1751 struct lwkt_msg *lmsg = &nmsg->lmsg; 1752 struct ifnet *ifp = lmsg->u.ms_resultp; 1753 struct in_addr addr; 1754 1755 addr.s_addr = INADDR_PFSYNC_GROUP; 1756 lmsg->u.ms_resultp = in_addmulti(&addr, ifp); 1757 1758 lwkt_replymsg(lmsg, 0); 1759 } 1760 1761 static struct in_multi * 1762 pfsync_in_addmulti(struct ifnet *ifp) 1763 { 1764 struct netmsg_base nmsg; 1765 struct lwkt_msg *lmsg = &nmsg.lmsg; 1766 1767 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1768 pfsync_in_addmulti_dispatch); 1769 lmsg->u.ms_resultp = ifp; 1770 1771 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1772 return lmsg->u.ms_resultp; 1773 } 1774 1775 static void 1776 pfsync_in_delmulti_dispatch(netmsg_t nmsg) 1777 { 1778 struct lwkt_msg *lmsg = &nmsg->lmsg; 1779 1780 in_delmulti(lmsg->u.ms_resultp); 1781 lwkt_replymsg(lmsg, 0); 1782 } 1783 1784 static void 1785 pfsync_in_delmulti(struct in_multi *inm) 1786 { 1787 struct netmsg_base nmsg; 1788 struct lwkt_msg *lmsg = &nmsg.lmsg; 1789 1790 netmsg_init(&nmsg, NULL, &curthread->td_msgport, 0, 1791 pfsync_in_delmulti_dispatch); 1792 lmsg->u.ms_resultp = inm; 1793 1794 lwkt_domsg(netisr_cpuport(0), lmsg, 0); 1795 } 1796