1 /* $OpenBSD: if_pfsync.c,v 1.146 2010/05/12 08:11:11 claudio Exp $ */ 2 3 /* 4 * Copyright (c) 2002 Michael Shalayeff 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 25 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 26 * THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 31 * 32 * Permission to use, copy, modify, and distribute this software for any 33 * purpose with or without fee is hereby granted, provided that the above 34 * copyright notice and this permission notice appear in all copies. 35 * 36 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 37 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 38 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 39 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 40 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 41 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 42 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 43 */ 44 45 #include <sys/param.h> 46 #include <sys/proc.h> 47 #include <sys/systm.h> 48 #include <sys/time.h> 49 #include <sys/mbuf.h> 50 #include <sys/socket.h> 51 #include <sys/ioctl.h> 52 #include <sys/timeout.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 #include <sys/pool.h> 56 #include <sys/syslog.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 #include <net/bpf.h> 62 #include <net/netisr.h> 63 #include <netinet/in.h> 64 #include <netinet/if_ether.h> 65 #include <netinet/tcp.h> 66 #include <netinet/tcp_seq.h> 67 68 #ifdef INET 69 #include <netinet/in_systm.h> 70 #include <netinet/in_var.h> 71 #include <netinet/ip.h> 72 #include <netinet/ip_var.h> 73 #endif 74 75 #ifdef INET6 76 #include <netinet6/nd6.h> 77 #endif /* INET6 */ 78 79 #include "carp.h" 80 #if NCARP > 0 81 #include <netinet/ip_carp.h> 82 #endif 83 84 #define PF_DEBUGNAME "pfsync: " 85 #include <net/pfvar.h> 86 #include <net/if_pfsync.h> 87 88 #include "bpfilter.h" 89 #include "pfsync.h" 90 91 #define PFSYNC_MINPKT ( \ 92 sizeof(struct ip) + \ 93 sizeof(struct pfsync_header)) 94 95 struct pfsync_pkt { 96 struct ip *ip; 97 struct in_addr src; 98 u_int8_t flags; 99 }; 100 101 int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 102 struct pfsync_state_peer *); 103 104 int pfsync_in_clr(struct pfsync_pkt *, caddr_t, int, int); 105 int pfsync_in_iack(struct pfsync_pkt *, caddr_t, int, int); 106 int pfsync_in_upd_c(struct pfsync_pkt *, caddr_t, int, int); 107 int pfsync_in_ureq(struct pfsync_pkt *, caddr_t, int, int); 108 int pfsync_in_del(struct pfsync_pkt *, caddr_t, int, int); 109 int pfsync_in_del_c(struct pfsync_pkt *, caddr_t, int, int); 110 int pfsync_in_bus(struct pfsync_pkt *, caddr_t, int, int); 111 int pfsync_in_tdb(struct pfsync_pkt *, caddr_t, int, int); 112 int pfsync_in_ins(struct pfsync_pkt *, caddr_t, int, int); 113 int pfsync_in_upd(struct pfsync_pkt *, caddr_t, int, int); 114 int pfsync_in_eof(struct pfsync_pkt *, caddr_t, int, int); 115 116 int pfsync_in_error(struct pfsync_pkt *, caddr_t, int, int); 117 118 struct { 119 int (*in)(struct pfsync_pkt *, caddr_t, int, int); 120 size_t len; 121 } pfsync_acts[] = { 122 /* PFSYNC_ACT_CLR */ 123 { pfsync_in_clr, sizeof(struct pfsync_clr) }, 124 /* PFSYNC_ACT_OINS */ 125 { pfsync_in_error, 0 }, 126 /* PFSYNC_ACT_INS_ACK */ 127 { pfsync_in_iack, sizeof(struct pfsync_ins_ack) }, 128 /* PFSYNC_ACT_OUPD */ 129 { pfsync_in_error, 0 }, 130 /* PFSYNC_ACT_UPD_C */ 131 { pfsync_in_upd_c, sizeof(struct pfsync_upd_c) }, 132 /* PFSYNC_ACT_UPD_REQ */ 133 { pfsync_in_ureq, sizeof(struct pfsync_upd_req) }, 134 /* PFSYNC_ACT_DEL */ 135 { pfsync_in_del, sizeof(struct pfsync_state) }, 136 /* PFSYNC_ACT_DEL_C */ 137 { pfsync_in_del_c, sizeof(struct pfsync_del_c) }, 138 /* PFSYNC_ACT_INS_F */ 139 { pfsync_in_error, 0 }, 140 /* PFSYNC_ACT_DEL_F */ 141 { pfsync_in_error, 0 }, 142 /* PFSYNC_ACT_BUS */ 143 { pfsync_in_bus, sizeof(struct pfsync_bus) }, 144 /* PFSYNC_ACT_TDB */ 145 { pfsync_in_tdb, sizeof(struct pfsync_tdb) }, 146 /* PFSYNC_ACT_EOF */ 147 { pfsync_in_error, 0 }, 148 /* PFSYNC_ACT_INS */ 149 { pfsync_in_ins, sizeof(struct pfsync_state) }, 150 /* PFSYNC_ACT_UPD */ 151 { pfsync_in_upd, sizeof(struct pfsync_state) } 152 }; 153 154 struct pfsync_q { 155 void (*write)(struct pf_state *, void *); 156 size_t len; 157 u_int8_t action; 158 }; 159 160 /* we have one of these for every PFSYNC_S_ */ 161 void pfsync_out_state(struct pf_state *, void *); 162 void pfsync_out_iack(struct pf_state *, void *); 163 void pfsync_out_upd_c(struct pf_state *, void *); 164 void pfsync_out_del(struct pf_state *, void *); 165 166 struct pfsync_q pfsync_qs[] = { 167 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 168 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 169 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C }, 170 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 171 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD } 172 }; 173 174 void pfsync_q_ins(struct pf_state *, int); 175 void pfsync_q_del(struct pf_state *); 176 177 struct pfsync_upd_req_item { 178 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 179 struct pfsync_upd_req ur_msg; 180 }; 181 TAILQ_HEAD(pfsync_upd_reqs, pfsync_upd_req_item); 182 183 struct pfsync_deferral { 184 TAILQ_ENTRY(pfsync_deferral) pd_entry; 185 struct pf_state *pd_st; 186 struct mbuf *pd_m; 187 struct timeout pd_tmo; 188 }; 189 TAILQ_HEAD(pfsync_deferrals, pfsync_deferral); 190 191 #define PFSYNC_PLSIZE MAX(sizeof(struct pfsync_upd_req_item), \ 192 sizeof(struct pfsync_deferral)) 193 194 void pfsync_out_tdb(struct tdb *, void *); 195 196 struct pfsync_softc { 197 struct ifnet sc_if; 198 struct ifnet *sc_sync_if; 199 200 struct pool sc_pool; 201 202 struct ip_moptions sc_imo; 203 204 struct in_addr sc_sync_peer; 205 u_int8_t sc_maxupdates; 206 207 struct ip sc_template; 208 209 struct pf_state_queue sc_qs[PFSYNC_S_COUNT]; 210 size_t sc_len; 211 212 struct pfsync_upd_reqs sc_upd_req_list; 213 214 int sc_defer; 215 struct pfsync_deferrals sc_deferrals; 216 u_int sc_deferred; 217 218 void *sc_plus; 219 size_t sc_pluslen; 220 221 u_int32_t sc_ureq_sent; 222 int sc_bulk_tries; 223 struct timeout sc_bulkfail_tmo; 224 225 u_int32_t sc_ureq_received; 226 struct pf_state *sc_bulk_next; 227 struct pf_state *sc_bulk_last; 228 struct timeout sc_bulk_tmo; 229 230 TAILQ_HEAD(, tdb) sc_tdb_q; 231 232 struct timeout sc_tmo; 233 }; 234 235 struct pfsync_softc *pfsyncif = NULL; 236 struct pfsyncstats pfsyncstats; 237 238 void pfsyncattach(int); 239 int pfsync_clone_create(struct if_clone *, int); 240 int pfsync_clone_destroy(struct ifnet *); 241 int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 242 struct pf_state_peer *); 243 void pfsync_update_net_tdb(struct pfsync_tdb *); 244 int pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *, 245 struct rtentry *); 246 int pfsyncioctl(struct ifnet *, u_long, caddr_t); 247 void pfsyncstart(struct ifnet *); 248 249 struct mbuf *pfsync_if_dequeue(struct ifnet *); 250 251 void pfsync_deferred(struct pf_state *, int); 252 void pfsync_undefer(struct pfsync_deferral *, int); 253 void pfsync_defer_tmo(void *); 254 255 void pfsync_request_full_update(struct pfsync_softc *); 256 void pfsync_request_update(u_int32_t, u_int64_t); 257 void pfsync_update_state_req(struct pf_state *); 258 259 void pfsync_drop(struct pfsync_softc *); 260 void pfsync_sendout(void); 261 void pfsync_send_plus(void *, size_t); 262 void pfsync_timeout(void *); 263 void pfsync_tdb_timeout(void *); 264 265 void pfsync_bulk_start(void); 266 void pfsync_bulk_status(u_int8_t); 267 void pfsync_bulk_update(void *); 268 void pfsync_bulk_fail(void *); 269 270 #define PFSYNC_MAX_BULKTRIES 12 271 int pfsync_sync_ok; 272 273 struct if_clone pfsync_cloner = 274 IF_CLONE_INITIALIZER("pfsync", pfsync_clone_create, pfsync_clone_destroy); 275 276 void 277 pfsyncattach(int npfsync) 278 { 279 if_clone_attach(&pfsync_cloner); 280 } 281 int 282 pfsync_clone_create(struct if_clone *ifc, int unit) 283 { 284 struct pfsync_softc *sc; 285 struct ifnet *ifp; 286 int q; 287 288 if (unit != 0) 289 return (EINVAL); 290 291 pfsync_sync_ok = 1; 292 293 sc = malloc(sizeof(*pfsyncif), M_DEVBUF, M_NOWAIT | M_ZERO); 294 if (sc == NULL) 295 return (ENOMEM); 296 297 for (q = 0; q < PFSYNC_S_COUNT; q++) 298 TAILQ_INIT(&sc->sc_qs[q]); 299 300 pool_init(&sc->sc_pool, PFSYNC_PLSIZE, 0, 0, 0, "pfsync", NULL); 301 TAILQ_INIT(&sc->sc_upd_req_list); 302 TAILQ_INIT(&sc->sc_deferrals); 303 sc->sc_deferred = 0; 304 305 TAILQ_INIT(&sc->sc_tdb_q); 306 307 sc->sc_len = PFSYNC_MINPKT; 308 sc->sc_maxupdates = 128; 309 310 sc->sc_imo.imo_membership = (struct in_multi **)malloc( 311 (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_IPMOPTS, 312 M_WAITOK | M_ZERO); 313 sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; 314 315 ifp = &sc->sc_if; 316 snprintf(ifp->if_xname, sizeof ifp->if_xname, "pfsync%d", unit); 317 ifp->if_softc = sc; 318 ifp->if_ioctl = pfsyncioctl; 319 ifp->if_output = pfsyncoutput; 320 ifp->if_start = pfsyncstart; 321 ifp->if_type = IFT_PFSYNC; 322 ifp->if_snd.ifq_maxlen = ifqmaxlen; 323 ifp->if_hdrlen = sizeof(struct pfsync_header); 324 ifp->if_mtu = 1500; /* XXX */ 325 ifp->if_hardmtu = MCLBYTES; /* XXX */ 326 timeout_set(&sc->sc_tmo, pfsync_timeout, sc); 327 timeout_set(&sc->sc_bulk_tmo, pfsync_bulk_update, sc); 328 timeout_set(&sc->sc_bulkfail_tmo, pfsync_bulk_fail, sc); 329 330 if_attach(ifp); 331 if_alloc_sadl(ifp); 332 333 #if NCARP > 0 334 if_addgroup(ifp, "carp"); 335 #endif 336 337 #if NBPFILTER > 0 338 bpfattach(&sc->sc_if.if_bpf, ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 339 #endif 340 341 pfsyncif = sc; 342 343 return (0); 344 } 345 346 int 347 pfsync_clone_destroy(struct ifnet *ifp) 348 { 349 struct pfsync_softc *sc = ifp->if_softc; 350 int s; 351 352 timeout_del(&sc->sc_bulk_tmo); 353 timeout_del(&sc->sc_tmo); 354 #if NCARP > 0 355 if (!pfsync_sync_ok) 356 carp_group_demote_adj(&sc->sc_if, -1, "pfsync destroy"); 357 #endif 358 #if NBPFILTER > 0 359 bpfdetach(ifp); 360 #endif 361 if_detach(ifp); 362 363 pfsync_drop(sc); 364 365 s = splsoftnet(); 366 while (sc->sc_deferred > 0) 367 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 368 splx(s); 369 370 pool_destroy(&sc->sc_pool); 371 free(sc->sc_imo.imo_membership, M_IPMOPTS); 372 free(sc, M_DEVBUF); 373 374 pfsyncif = NULL; 375 376 return (0); 377 } 378 379 struct mbuf * 380 pfsync_if_dequeue(struct ifnet *ifp) 381 { 382 struct mbuf *m; 383 384 IF_DEQUEUE(&ifp->if_snd, m); 385 386 return (m); 387 } 388 389 /* 390 * Start output on the pfsync interface. 391 */ 392 void 393 pfsyncstart(struct ifnet *ifp) 394 { 395 struct mbuf *m; 396 int s; 397 398 s = splnet(); 399 while ((m = pfsync_if_dequeue(ifp)) != NULL) { 400 IF_DROP(&ifp->if_snd); 401 m_freem(m); 402 } 403 splx(s); 404 } 405 406 int 407 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 408 struct pf_state_peer *d) 409 { 410 if (s->scrub.scrub_flag && d->scrub == NULL) { 411 d->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT | PR_ZERO); 412 if (d->scrub == NULL) 413 return (ENOMEM); 414 } 415 416 return (0); 417 } 418 419 void 420 pfsync_state_export(struct pfsync_state *sp, struct pf_state *st) 421 { 422 bzero(sp, sizeof(struct pfsync_state)); 423 424 /* copy from state key */ 425 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 426 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 427 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 428 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 429 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 430 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 431 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 432 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 433 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 434 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 435 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 436 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 437 sp->proto = st->key[PF_SK_WIRE]->proto; 438 sp->af = st->key[PF_SK_WIRE]->af; 439 440 /* copy from state */ 441 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 442 bcopy(&st->rt_addr, &sp->rt_addr, sizeof(sp->rt_addr)); 443 sp->creation = htonl(time_second - st->creation); 444 sp->expire = pf_state_expires(st); 445 if (sp->expire <= time_second) 446 sp->expire = htonl(0); 447 else 448 sp->expire = htonl(sp->expire - time_second); 449 450 sp->direction = st->direction; 451 sp->log = st->log; 452 sp->timeout = st->timeout; 453 sp->state_flags = st->state_flags; 454 if (!SLIST_EMPTY(&st->src_nodes)) 455 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 456 457 bcopy(&st->id, &sp->id, sizeof(sp->id)); 458 sp->creatorid = st->creatorid; 459 pf_state_peer_hton(&st->src, &sp->src); 460 pf_state_peer_hton(&st->dst, &sp->dst); 461 462 if (st->rule.ptr == NULL) 463 sp->rule = htonl(-1); 464 else 465 sp->rule = htonl(st->rule.ptr->nr); 466 if (st->anchor.ptr == NULL) 467 sp->anchor = htonl(-1); 468 else 469 sp->anchor = htonl(st->anchor.ptr->nr); 470 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 471 472 pf_state_counter_hton(st->packets[0], sp->packets[0]); 473 pf_state_counter_hton(st->packets[1], sp->packets[1]); 474 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 475 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 476 477 sp->max_mss = htons(st->max_mss); 478 sp->min_ttl = st->min_ttl; 479 sp->set_tos = st->set_tos; 480 } 481 482 int 483 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 484 { 485 struct pf_state *st = NULL; 486 struct pf_state_key *skw = NULL, *sks = NULL; 487 struct pf_rule *r = NULL; 488 struct pfi_kif *kif; 489 int pool_flags; 490 int error; 491 492 if (sp->creatorid == 0) { 493 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 494 "invalid creator id: %08x", ntohl(sp->creatorid)); 495 return (EINVAL); 496 } 497 498 if ((kif = pfi_kif_get(sp->ifname)) == NULL) { 499 DPFPRINTF(LOG_NOTICE, "pfsync_state_import: " 500 "unknown interface: %s", sp->ifname); 501 if (flags & PFSYNC_SI_IOCTL) 502 return (EINVAL); 503 return (0); /* skip this state */ 504 } 505 506 /* 507 * If the ruleset checksums match or the state is coming from the ioctl, 508 * it's safe to associate the state with the rule of that number. 509 */ 510 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 511 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 512 pf_main_ruleset.rules.active.rcount) 513 r = pf_main_ruleset.rules.active.ptr_array[ntohl(sp->rule)]; 514 else 515 r = &pf_default_rule; 516 517 if ((r->max_states && r->states_cur >= r->max_states)) 518 goto cleanup; 519 520 if (flags & PFSYNC_SI_IOCTL) 521 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 522 else 523 pool_flags = PR_LIMITFAIL | PR_ZERO; 524 525 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 526 goto cleanup; 527 528 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 529 goto cleanup; 530 531 if (PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 532 &sp->key[PF_SK_STACK].addr[0], sp->af) || 533 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 534 &sp->key[PF_SK_STACK].addr[1], sp->af) || 535 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 536 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 537 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 538 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 539 goto cleanup; 540 } else 541 sks = skw; 542 543 /* allocate memory for scrub info */ 544 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 545 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 546 goto cleanup; 547 548 /* copy to state key(s) */ 549 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 550 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 551 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 552 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 553 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 554 skw->proto = sp->proto; 555 skw->af = sp->af; 556 if (sks != skw) { 557 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 558 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 559 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 560 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 561 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 562 sks->proto = sp->proto; 563 sks->af = sp->af; 564 } 565 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 566 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 567 568 /* copy to state */ 569 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 570 st->creation = time_second - ntohl(sp->creation); 571 st->expire = time_second; 572 if (sp->expire) { 573 /* XXX No adaptive scaling. */ 574 st->expire -= r->timeout[sp->timeout] - ntohl(sp->expire); 575 } 576 577 st->expire = ntohl(sp->expire) + time_second; 578 st->direction = sp->direction; 579 st->log = sp->log; 580 st->timeout = sp->timeout; 581 st->state_flags = sp->state_flags; 582 st->max_mss = ntohs(sp->max_mss); 583 st->min_ttl = sp->min_ttl; 584 st->set_tos = sp->set_tos; 585 586 bcopy(sp->id, &st->id, sizeof(st->id)); 587 st->creatorid = sp->creatorid; 588 pf_state_peer_ntoh(&sp->src, &st->src); 589 pf_state_peer_ntoh(&sp->dst, &st->dst); 590 591 st->rule.ptr = r; 592 st->anchor.ptr = NULL; 593 st->rt_kif = NULL; 594 595 st->pfsync_time = time_uptime; 596 st->sync_state = PFSYNC_S_NONE; 597 598 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 599 r->states_cur++; 600 r->states_tot++; 601 602 if (!ISSET(flags, PFSYNC_SI_IOCTL)) 603 SET(st->state_flags, PFSTATE_NOSYNC); 604 605 if (pf_state_insert(kif, skw, sks, st) != 0) { 606 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 607 r->states_cur--; 608 error = EEXIST; 609 goto cleanup_state; 610 } 611 612 if (!ISSET(flags, PFSYNC_SI_IOCTL)) { 613 CLR(st->state_flags, PFSTATE_NOSYNC); 614 if (ISSET(st->state_flags, PFSTATE_ACK)) { 615 pfsync_q_ins(st, PFSYNC_S_IACK); 616 schednetisr(NETISR_PFSYNC); 617 } 618 } 619 CLR(st->state_flags, PFSTATE_ACK); 620 621 return (0); 622 623 cleanup: 624 error = ENOMEM; 625 if (skw == sks) 626 sks = NULL; 627 if (skw != NULL) 628 pool_put(&pf_state_key_pl, skw); 629 if (sks != NULL) 630 pool_put(&pf_state_key_pl, sks); 631 632 cleanup_state: /* pf_state_insert frees the state keys */ 633 if (st) { 634 if (st->dst.scrub) 635 pool_put(&pf_state_scrub_pl, st->dst.scrub); 636 if (st->src.scrub) 637 pool_put(&pf_state_scrub_pl, st->src.scrub); 638 pool_put(&pf_state_pl, st); 639 } 640 return (error); 641 } 642 643 void 644 pfsync_input(struct mbuf *m, ...) 645 { 646 struct pfsync_softc *sc = pfsyncif; 647 struct pfsync_pkt pkt; 648 struct ip *ip = mtod(m, struct ip *); 649 struct mbuf *mp; 650 struct pfsync_header *ph; 651 struct pfsync_subheader subh; 652 653 int offset, offp, len, count, mlen; 654 655 pfsyncstats.pfsyncs_ipackets++; 656 657 /* verify that we have a sync interface configured */ 658 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 659 sc->sc_sync_if == NULL || !pf_status.running) 660 goto done; 661 662 /* verify that the packet came in on the right interface */ 663 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 664 pfsyncstats.pfsyncs_badif++; 665 goto done; 666 } 667 668 sc->sc_if.if_ipackets++; 669 sc->sc_if.if_ibytes += m->m_pkthdr.len; 670 671 /* verify that the IP TTL is 255. */ 672 if (ip->ip_ttl != PFSYNC_DFLTTL) { 673 pfsyncstats.pfsyncs_badttl++; 674 goto done; 675 } 676 677 offset = ip->ip_hl << 2; 678 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 679 pfsyncstats.pfsyncs_hdrops++; 680 goto done; 681 } 682 683 if (offset + sizeof(*ph) > m->m_len) { 684 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 685 pfsyncstats.pfsyncs_hdrops++; 686 return; 687 } 688 ip = mtod(m, struct ip *); 689 } 690 ph = (struct pfsync_header *)((char *)ip + offset); 691 692 /* verify the version */ 693 if (ph->version != PFSYNC_VERSION) { 694 pfsyncstats.pfsyncs_badver++; 695 goto done; 696 } 697 len = ntohs(ph->len) + offset; 698 if (m->m_pkthdr.len < len) { 699 pfsyncstats.pfsyncs_badlen++; 700 goto done; 701 } 702 703 /* Cheaper to grab this now than having to mess with mbufs later */ 704 pkt.ip = ip; 705 pkt.src = ip->ip_src; 706 pkt.flags = 0; 707 708 if (!bcmp(&ph->pfcksum, &pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 709 pkt.flags |= PFSYNC_SI_CKSUM; 710 711 offset += sizeof(*ph); 712 while (offset <= len - sizeof(subh)) { 713 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 714 offset += sizeof(subh); 715 716 mlen = subh.len << 2; 717 count = ntohs(subh.count); 718 719 if (subh.action >= PFSYNC_ACT_MAX || 720 subh.action >= nitems(pfsync_acts) || 721 mlen < pfsync_acts[subh.action].len) { 722 /* 723 * subheaders are always followed by at least one 724 * message, so if the peer is new 725 * enough to tell us how big its messages are then we 726 * know enough to skip them. 727 */ 728 if (count > 0 && mlen > 0) { 729 offset += count * mlen; 730 continue; 731 } 732 pfsyncstats.pfsyncs_badact++; 733 goto done; 734 } 735 736 mp = m_pulldown(m, offset, mlen * count, &offp); 737 if (mp == NULL) { 738 pfsyncstats.pfsyncs_badlen++; 739 return; 740 } 741 742 if (pfsync_acts[subh.action].in(&pkt, mp->m_data + offp, 743 mlen, count) != 0) 744 goto done; 745 746 offset += mlen * count; 747 } 748 749 done: 750 m_freem(m); 751 } 752 753 int 754 pfsync_in_clr(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 755 { 756 struct pfsync_clr *clr; 757 int i; 758 759 struct pf_state *st, *nexts; 760 struct pf_state_key *sk, *nextsk; 761 struct pf_state_item *si; 762 u_int32_t creatorid; 763 int s; 764 765 s = splsoftnet(); 766 for (i = 0; i < count; i++) { 767 clr = (struct pfsync_clr *)buf + len * i; 768 creatorid = clr->creatorid; 769 770 if (clr->ifname[0] == '\0') { 771 for (st = RB_MIN(pf_state_tree_id, &tree_id); 772 st; st = nexts) { 773 nexts = RB_NEXT(pf_state_tree_id, &tree_id, st); 774 if (st->creatorid == creatorid) { 775 SET(st->state_flags, PFSTATE_NOSYNC); 776 pf_unlink_state(st); 777 } 778 } 779 } else { 780 if (pfi_kif_get(clr->ifname) == NULL) 781 continue; 782 783 /* XXX correct? */ 784 for (sk = RB_MIN(pf_state_tree, &pf_statetbl); 785 sk; sk = nextsk) { 786 nextsk = RB_NEXT(pf_state_tree, 787 &pf_statetbl, sk); 788 TAILQ_FOREACH(si, &sk->states, entry) { 789 if (si->s->creatorid == creatorid) { 790 SET(si->s->state_flags, 791 PFSTATE_NOSYNC); 792 pf_unlink_state(si->s); 793 } 794 } 795 } 796 } 797 } 798 splx(s); 799 800 return (0); 801 } 802 803 int 804 pfsync_in_ins(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 805 { 806 struct pfsync_state *sp; 807 int i; 808 809 int s; 810 811 s = splsoftnet(); 812 for (i = 0; i < count; i++) { 813 sp = (struct pfsync_state *)(buf + len * i); 814 815 /* check for invalid values */ 816 if (sp->timeout >= PFTM_MAX || 817 sp->src.state > PF_TCPS_PROXY_DST || 818 sp->dst.state > PF_TCPS_PROXY_DST || 819 sp->direction > PF_OUT || 820 (sp->af != AF_INET && sp->af != AF_INET6)) { 821 DPFPRINTF(LOG_NOTICE, 822 "pfsync_input: PFSYNC5_ACT_INS: invalid value"); 823 pfsyncstats.pfsyncs_badval++; 824 continue; 825 } 826 827 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) { 828 /* drop out, but process the rest of the actions */ 829 break; 830 } 831 } 832 splx(s); 833 834 return (0); 835 } 836 837 int 838 pfsync_in_iack(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 839 { 840 struct pfsync_ins_ack *ia; 841 struct pf_state_cmp id_key; 842 struct pf_state *st; 843 int i; 844 int s; 845 846 s = splsoftnet(); 847 for (i = 0; i < count; i++) { 848 ia = (struct pfsync_ins_ack *)(buf + len * i); 849 850 bcopy(&ia->id, &id_key.id, sizeof(id_key.id)); 851 id_key.creatorid = ia->creatorid; 852 853 st = pf_find_state_byid(&id_key); 854 if (st == NULL) 855 continue; 856 857 if (ISSET(st->state_flags, PFSTATE_ACK)) 858 pfsync_deferred(st, 0); 859 } 860 splx(s); 861 862 return (0); 863 } 864 865 int 866 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 867 struct pfsync_state_peer *dst) 868 { 869 int sync = 0; 870 871 /* 872 * The state should never go backwards except 873 * for syn-proxy states. Neither should the 874 * sequence window slide backwards. 875 */ 876 if ((st->src.state > src->state && 877 (st->src.state < PF_TCPS_PROXY_SRC || 878 src->state >= PF_TCPS_PROXY_SRC)) || 879 880 (st->src.state == src->state && 881 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 882 sync++; 883 else 884 pf_state_peer_ntoh(src, &st->src); 885 886 if ((st->dst.state > dst->state) || 887 888 (st->dst.state >= TCPS_SYN_SENT && 889 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 890 sync++; 891 else 892 pf_state_peer_ntoh(dst, &st->dst); 893 894 return (sync); 895 } 896 897 int 898 pfsync_in_upd(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 899 { 900 struct pfsync_state *sp; 901 struct pf_state_cmp id_key; 902 struct pf_state *st; 903 int sync; 904 905 int i; 906 int s; 907 908 909 s = splsoftnet(); 910 for (i = 0; i < count; i++) { 911 sp = (struct pfsync_state *)(buf + len * i); 912 913 /* check for invalid values */ 914 if (sp->timeout >= PFTM_MAX || 915 sp->src.state > PF_TCPS_PROXY_DST || 916 sp->dst.state > PF_TCPS_PROXY_DST) { 917 DPFPRINTF(LOG_NOTICE, 918 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 919 pfsyncstats.pfsyncs_badval++; 920 continue; 921 } 922 923 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 924 id_key.creatorid = sp->creatorid; 925 926 st = pf_find_state_byid(&id_key); 927 if (st == NULL) { 928 /* insert the update */ 929 if (pfsync_state_import(sp, 0)) 930 pfsyncstats.pfsyncs_badstate++; 931 continue; 932 } 933 934 if (ISSET(st->state_flags, PFSTATE_ACK)) 935 pfsync_deferred(st, 1); 936 937 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 938 DPFPRINTF(LOG_NOTICE, 939 "pfsync_input: PFSYNC_ACT_UPD: invalid value"); 940 pfsyncstats.pfsyncs_badval++; 941 continue; 942 } 943 944 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 945 id_key.creatorid = sp->creatorid; 946 947 st = pf_find_state_byid(&id_key); 948 if (st == NULL) { 949 /* insert the update */ 950 if (pfsync_state_import(sp, 0)) 951 pfsyncstats.pfsyncs_badstate++; 952 continue; 953 } 954 955 if (ISSET(st->state_flags, PFSTATE_ACK)) 956 pfsync_deferred(st, 1); 957 958 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 959 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 960 else { 961 sync = 0; 962 963 /* 964 * Non-TCP protocol state machine always go 965 * forwards 966 */ 967 if (st->src.state > sp->src.state) 968 sync++; 969 else 970 pf_state_peer_ntoh(&sp->src, &st->src); 971 972 if (st->dst.state > sp->dst.state) 973 sync++; 974 else 975 pf_state_peer_ntoh(&sp->dst, &st->dst); 976 } 977 978 if (sync < 2) { 979 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 980 pf_state_peer_ntoh(&sp->dst, &st->dst); 981 st->expire = ntohl(sp->expire) + time_second; 982 st->timeout = sp->timeout; 983 } 984 st->pfsync_time = time_uptime; 985 986 if (sync) { 987 pfsyncstats.pfsyncs_stale++; 988 989 pfsync_update_state(st); 990 schednetisr(NETISR_PFSYNC); 991 } 992 } 993 splx(s); 994 995 return (0); 996 } 997 998 int 999 pfsync_in_upd_c(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 1000 { 1001 struct pfsync_upd_c *up; 1002 struct pf_state_cmp id_key; 1003 struct pf_state *st; 1004 1005 int sync; 1006 1007 int i; 1008 int s; 1009 1010 s = splsoftnet(); 1011 for (i = 0; i < count; i++) { 1012 up = (struct pfsync_upd_c *)(buf + len * i); 1013 1014 /* check for invalid values */ 1015 if (up->timeout >= PFTM_MAX || 1016 up->src.state > PF_TCPS_PROXY_DST || 1017 up->dst.state > PF_TCPS_PROXY_DST) { 1018 DPFPRINTF(LOG_NOTICE, 1019 "pfsync_input: PFSYNC_ACT_UPD_C: invalid value"); 1020 pfsyncstats.pfsyncs_badval++; 1021 continue; 1022 } 1023 1024 bcopy(&up->id, &id_key.id, sizeof(id_key.id)); 1025 id_key.creatorid = up->creatorid; 1026 1027 st = pf_find_state_byid(&id_key); 1028 if (st == NULL) { 1029 /* We don't have this state. Ask for it. */ 1030 pfsync_request_update(id_key.creatorid, id_key.id); 1031 continue; 1032 } 1033 1034 if (ISSET(st->state_flags, PFSTATE_ACK)) 1035 pfsync_deferred(st, 1); 1036 1037 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 1038 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 1039 else { 1040 sync = 0; 1041 /* 1042 * Non-TCP protocol state machine always go 1043 * forwards 1044 */ 1045 if (st->src.state > up->src.state) 1046 sync++; 1047 else 1048 pf_state_peer_ntoh(&up->src, &st->src); 1049 1050 if (st->dst.state > up->dst.state) 1051 sync++; 1052 else 1053 pf_state_peer_ntoh(&up->dst, &st->dst); 1054 } 1055 if (sync < 2) { 1056 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 1057 pf_state_peer_ntoh(&up->dst, &st->dst); 1058 st->expire = ntohl(up->expire) + time_second; 1059 st->timeout = up->timeout; 1060 } 1061 st->pfsync_time = time_uptime; 1062 1063 if (sync) { 1064 pfsyncstats.pfsyncs_stale++; 1065 1066 pfsync_update_state(st); 1067 schednetisr(NETISR_PFSYNC); 1068 } 1069 } 1070 splx(s); 1071 1072 return (0); 1073 } 1074 1075 int 1076 pfsync_in_ureq(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 1077 { 1078 struct pfsync_upd_req *ur; 1079 int i; 1080 1081 struct pf_state_cmp id_key; 1082 struct pf_state *st; 1083 1084 for (i = 0; i < count; i++) { 1085 ur = (struct pfsync_upd_req *)(buf + len * i); 1086 1087 bcopy(&ur->id, &id_key.id, sizeof(id_key.id)); 1088 id_key.creatorid = ur->creatorid; 1089 1090 if (id_key.id == 0 && id_key.creatorid == 0) 1091 pfsync_bulk_start(); 1092 else { 1093 st = pf_find_state_byid(&id_key); 1094 if (st == NULL) { 1095 pfsyncstats.pfsyncs_badstate++; 1096 continue; 1097 } 1098 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) 1099 continue; 1100 1101 pfsync_update_state_req(st); 1102 } 1103 } 1104 1105 return (0); 1106 } 1107 1108 int 1109 pfsync_in_del(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 1110 { 1111 struct pfsync_state *sp; 1112 struct pf_state_cmp id_key; 1113 struct pf_state *st; 1114 int i; 1115 int s; 1116 1117 s = splsoftnet(); 1118 for (i = 0; i < count; i++) { 1119 sp = (struct pfsync_state *)(buf + len * i); 1120 1121 bcopy(sp->id, &id_key.id, sizeof(id_key.id)); 1122 id_key.creatorid = sp->creatorid; 1123 1124 st = pf_find_state_byid(&id_key); 1125 if (st == NULL) { 1126 pfsyncstats.pfsyncs_badstate++; 1127 continue; 1128 } 1129 SET(st->state_flags, PFSTATE_NOSYNC); 1130 pf_unlink_state(st); 1131 } 1132 splx(s); 1133 1134 return (0); 1135 } 1136 1137 int 1138 pfsync_in_del_c(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 1139 { 1140 struct pfsync_del_c *sp; 1141 struct pf_state_cmp id_key; 1142 struct pf_state *st; 1143 int i; 1144 int s; 1145 1146 s = splsoftnet(); 1147 for (i = 0; i < count; i++) { 1148 sp = (struct pfsync_del_c *)(buf + len * i); 1149 1150 bcopy(&sp->id, &id_key.id, sizeof(id_key.id)); 1151 id_key.creatorid = sp->creatorid; 1152 1153 st = pf_find_state_byid(&id_key); 1154 if (st == NULL) { 1155 pfsyncstats.pfsyncs_badstate++; 1156 continue; 1157 } 1158 1159 SET(st->state_flags, PFSTATE_NOSYNC); 1160 pf_unlink_state(st); 1161 } 1162 splx(s); 1163 1164 return (0); 1165 } 1166 1167 int 1168 pfsync_in_bus(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 1169 { 1170 struct pfsync_softc *sc = pfsyncif; 1171 struct pfsync_bus *bus; 1172 1173 /* If we're not waiting for a bulk update, who cares. */ 1174 if (sc->sc_ureq_sent == 0) 1175 return (0); 1176 1177 bus = (struct pfsync_bus *)buf; 1178 1179 switch (bus->status) { 1180 case PFSYNC_BUS_START: 1181 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1182 pf_pool_limits[PF_LIMIT_STATES].limit / 1183 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1184 sizeof(struct pfsync_state))); 1185 DPFPRINTF(LOG_INFO, "received bulk update start"); 1186 break; 1187 1188 case PFSYNC_BUS_END: 1189 if (time_uptime - ntohl(bus->endtime) >= 1190 sc->sc_ureq_sent) { 1191 /* that's it, we're happy */ 1192 sc->sc_ureq_sent = 0; 1193 sc->sc_bulk_tries = 0; 1194 timeout_del(&sc->sc_bulkfail_tmo); 1195 #if NCARP > 0 1196 if (!pfsync_sync_ok) 1197 carp_group_demote_adj(&sc->sc_if, -1, 1198 "pfsync bulk done"); 1199 #endif 1200 pfsync_sync_ok = 1; 1201 DPFPRINTF(LOG_INFO, "received valid bulk update end"); 1202 } else { 1203 DPFPRINTF(LOG_WARNING, "received invalid " 1204 "bulk update end: bad timestamp"); 1205 } 1206 break; 1207 } 1208 1209 return (0); 1210 } 1211 1212 int 1213 pfsync_in_tdb(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 1214 { 1215 #if defined(IPSEC) 1216 struct pfsync_tdb *tp; 1217 int i; 1218 int s; 1219 1220 s = splsoftnet(); 1221 for (i = 0; i < count; i++) 1222 tp = (struct pfsync_tdb *)(buf + len * i); 1223 pfsync_update_net_tdb(tp); 1224 splx(s); 1225 #endif 1226 1227 return (0); 1228 } 1229 1230 #if defined(IPSEC) 1231 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1232 void 1233 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1234 { 1235 struct tdb *tdb; 1236 int s; 1237 1238 /* check for invalid values */ 1239 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1240 (pt->dst.sa.sa_family != AF_INET && 1241 pt->dst.sa.sa_family != AF_INET6)) 1242 goto bad; 1243 1244 s = spltdb(); 1245 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1246 if (tdb) { 1247 pt->rpl = ntohl(pt->rpl); 1248 pt->cur_bytes = betoh64(pt->cur_bytes); 1249 1250 /* Neither replay nor byte counter should ever decrease. */ 1251 if (pt->rpl < tdb->tdb_rpl || 1252 pt->cur_bytes < tdb->tdb_cur_bytes) { 1253 splx(s); 1254 goto bad; 1255 } 1256 1257 tdb->tdb_rpl = pt->rpl; 1258 tdb->tdb_cur_bytes = pt->cur_bytes; 1259 } 1260 splx(s); 1261 return; 1262 1263 bad: 1264 DPFPRINTF(LOG_WARNING, "pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1265 "invalid value"); 1266 pfsyncstats.pfsyncs_badstate++; 1267 return; 1268 } 1269 #endif 1270 1271 1272 int 1273 pfsync_in_eof(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 1274 { 1275 if (len > 0 || count > 0) 1276 pfsyncstats.pfsyncs_badact++; 1277 1278 /* we're done. let the caller return */ 1279 return (1); 1280 } 1281 1282 int 1283 pfsync_in_error(struct pfsync_pkt *pkt, caddr_t buf, int len, int count) 1284 { 1285 pfsyncstats.pfsyncs_badact++; 1286 return (-1); 1287 } 1288 1289 int 1290 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst, 1291 struct rtentry *rt) 1292 { 1293 m_freem(m); 1294 return (0); 1295 } 1296 1297 /* ARGSUSED */ 1298 int 1299 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1300 { 1301 struct proc *p = curproc; 1302 struct pfsync_softc *sc = ifp->if_softc; 1303 struct ifreq *ifr = (struct ifreq *)data; 1304 struct ip_moptions *imo = &sc->sc_imo; 1305 struct pfsyncreq pfsyncr; 1306 struct ifnet *sifp; 1307 struct ip *ip; 1308 int s, error; 1309 1310 switch (cmd) { 1311 #if 0 1312 case SIOCSIFADDR: 1313 case SIOCAIFADDR: 1314 case SIOCSIFDSTADDR: 1315 #endif 1316 case SIOCSIFFLAGS: 1317 s = splnet(); 1318 if (ifp->if_flags & IFF_UP) { 1319 ifp->if_flags |= IFF_RUNNING; 1320 pfsync_request_full_update(sc); 1321 } else { 1322 ifp->if_flags &= ~IFF_RUNNING; 1323 1324 /* drop everything */ 1325 timeout_del(&sc->sc_tmo); 1326 pfsync_drop(sc); 1327 1328 /* cancel bulk update */ 1329 timeout_del(&sc->sc_bulk_tmo); 1330 sc->sc_bulk_next = NULL; 1331 sc->sc_bulk_last = NULL; 1332 } 1333 splx(s); 1334 break; 1335 case SIOCSIFMTU: 1336 s = splnet(); 1337 if (ifr->ifr_mtu <= PFSYNC_MINPKT) 1338 return (EINVAL); 1339 if (ifr->ifr_mtu > MCLBYTES) /* XXX could be bigger */ 1340 ifr->ifr_mtu = MCLBYTES; 1341 if (ifr->ifr_mtu < ifp->if_mtu) 1342 pfsync_sendout(); 1343 ifp->if_mtu = ifr->ifr_mtu; 1344 splx(s); 1345 break; 1346 case SIOCGETPFSYNC: 1347 bzero(&pfsyncr, sizeof(pfsyncr)); 1348 if (sc->sc_sync_if) { 1349 strlcpy(pfsyncr.pfsyncr_syncdev, 1350 sc->sc_sync_if->if_xname, IFNAMSIZ); 1351 } 1352 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1353 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1354 pfsyncr.pfsyncr_defer = sc->sc_defer; 1355 return (copyout(&pfsyncr, ifr->ifr_data, sizeof(pfsyncr))); 1356 1357 case SIOCSETPFSYNC: 1358 if ((error = suser(p, p->p_acflag)) != 0) 1359 return (error); 1360 if ((error = copyin(ifr->ifr_data, &pfsyncr, sizeof(pfsyncr)))) 1361 return (error); 1362 1363 s = splnet(); 1364 1365 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1366 sc->sc_sync_peer.s_addr = INADDR_PFSYNC_GROUP; 1367 else 1368 sc->sc_sync_peer.s_addr = 1369 pfsyncr.pfsyncr_syncpeer.s_addr; 1370 1371 if (pfsyncr.pfsyncr_maxupdates > 255) { 1372 splx(s); 1373 return (EINVAL); 1374 } 1375 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1376 1377 sc->sc_defer = pfsyncr.pfsyncr_defer; 1378 1379 if (pfsyncr.pfsyncr_syncdev[0] == 0) { 1380 sc->sc_sync_if = NULL; 1381 if (imo->imo_num_memberships > 0) { 1382 in_delmulti(imo->imo_membership[ 1383 --imo->imo_num_memberships]); 1384 imo->imo_multicast_ifp = NULL; 1385 } 1386 splx(s); 1387 break; 1388 } 1389 1390 if ((sifp = ifunit(pfsyncr.pfsyncr_syncdev)) == NULL) { 1391 splx(s); 1392 return (EINVAL); 1393 } 1394 1395 if (sifp->if_mtu < sc->sc_if.if_mtu || 1396 (sc->sc_sync_if != NULL && 1397 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1398 sifp->if_mtu < MCLBYTES - sizeof(struct ip)) 1399 pfsync_sendout(); 1400 sc->sc_sync_if = sifp; 1401 1402 if (imo->imo_num_memberships > 0) { 1403 in_delmulti(imo->imo_membership[--imo->imo_num_memberships]); 1404 imo->imo_multicast_ifp = NULL; 1405 } 1406 1407 if (sc->sc_sync_if && 1408 sc->sc_sync_peer.s_addr == INADDR_PFSYNC_GROUP) { 1409 struct in_addr addr; 1410 1411 if (!(sc->sc_sync_if->if_flags & IFF_MULTICAST)) { 1412 sc->sc_sync_if = NULL; 1413 splx(s); 1414 return (EADDRNOTAVAIL); 1415 } 1416 1417 addr.s_addr = INADDR_PFSYNC_GROUP; 1418 1419 if ((imo->imo_membership[0] = 1420 in_addmulti(&addr, sc->sc_sync_if)) == NULL) { 1421 sc->sc_sync_if = NULL; 1422 splx(s); 1423 return (ENOBUFS); 1424 } 1425 imo->imo_num_memberships++; 1426 imo->imo_multicast_ifp = sc->sc_sync_if; 1427 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 1428 imo->imo_multicast_loop = 0; 1429 } 1430 1431 ip = &sc->sc_template; 1432 bzero(ip, sizeof(*ip)); 1433 ip->ip_v = IPVERSION; 1434 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1435 ip->ip_tos = IPTOS_LOWDELAY; 1436 /* len and id are set later */ 1437 ip->ip_off = htons(IP_DF); 1438 ip->ip_ttl = PFSYNC_DFLTTL; 1439 ip->ip_p = IPPROTO_PFSYNC; 1440 ip->ip_src.s_addr = INADDR_ANY; 1441 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1442 1443 pfsync_request_full_update(sc); 1444 splx(s); 1445 1446 break; 1447 1448 default: 1449 return (ENOTTY); 1450 } 1451 1452 return (0); 1453 } 1454 1455 void 1456 pfsync_out_state(struct pf_state *st, void *buf) 1457 { 1458 struct pfsync_state *sp = buf; 1459 1460 pfsync_state_export(sp, st); 1461 } 1462 1463 void 1464 pfsync_out_iack(struct pf_state *st, void *buf) 1465 { 1466 struct pfsync_ins_ack *iack = buf; 1467 1468 iack->id = st->id; 1469 iack->creatorid = st->creatorid; 1470 } 1471 1472 void 1473 pfsync_out_upd_c(struct pf_state *st, void *buf) 1474 { 1475 struct pfsync_upd_c *up = buf; 1476 1477 bzero(up, sizeof(*up)); 1478 up->id = st->id; 1479 pf_state_peer_hton(&st->src, &up->src); 1480 pf_state_peer_hton(&st->dst, &up->dst); 1481 up->creatorid = st->creatorid; 1482 1483 up->expire = pf_state_expires(st); 1484 if (up->expire <= time_second) 1485 up->expire = htonl(0); 1486 else 1487 up->expire = htonl(up->expire - time_second); 1488 up->timeout = st->timeout; 1489 } 1490 1491 void 1492 pfsync_out_del(struct pf_state *st, void *buf) 1493 { 1494 struct pfsync_del_c *dp = buf; 1495 1496 dp->id = st->id; 1497 dp->creatorid = st->creatorid; 1498 1499 SET(st->state_flags, PFSTATE_NOSYNC); 1500 } 1501 1502 void 1503 pfsync_drop(struct pfsync_softc *sc) 1504 { 1505 struct pf_state *st; 1506 struct pfsync_upd_req_item *ur; 1507 struct tdb *t; 1508 int q; 1509 1510 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1511 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1512 continue; 1513 1514 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1515 #ifdef PFSYNC_DEBUG 1516 KASSERT(st->sync_state == q); 1517 #endif 1518 st->sync_state = PFSYNC_S_NONE; 1519 } 1520 TAILQ_INIT(&sc->sc_qs[q]); 1521 } 1522 1523 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1524 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1525 pool_put(&sc->sc_pool, ur); 1526 } 1527 1528 sc->sc_plus = NULL; 1529 1530 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1531 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) 1532 CLR(t->tdb_flags, TDBF_PFSYNC); 1533 1534 TAILQ_INIT(&sc->sc_tdb_q); 1535 } 1536 1537 sc->sc_len = PFSYNC_MINPKT; 1538 } 1539 1540 void 1541 pfsync_sendout(void) 1542 { 1543 struct pfsync_softc *sc = pfsyncif; 1544 #if NBPFILTER > 0 1545 struct ifnet *ifp = &sc->sc_if; 1546 #endif 1547 struct mbuf *m; 1548 struct ip *ip; 1549 struct pfsync_header *ph; 1550 struct pfsync_subheader *subh; 1551 struct pf_state *st; 1552 struct pfsync_upd_req_item *ur; 1553 struct tdb *t; 1554 1555 int offset; 1556 int q, count = 0; 1557 1558 if (sc == NULL || sc->sc_len == PFSYNC_MINPKT) 1559 return; 1560 1561 if (!ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1562 #if NBPFILTER > 0 1563 (ifp->if_bpf == NULL && sc->sc_sync_if == NULL)) { 1564 #else 1565 sc->sc_sync_if == NULL) { 1566 #endif 1567 pfsync_drop(sc); 1568 return; 1569 } 1570 1571 MGETHDR(m, M_DONTWAIT, MT_DATA); 1572 if (m == NULL) { 1573 sc->sc_if.if_oerrors++; 1574 pfsyncstats.pfsyncs_onomem++; 1575 pfsync_drop(sc); 1576 return; 1577 } 1578 1579 if (max_linkhdr + sc->sc_len > MHLEN) { 1580 MCLGETI(m, M_DONTWAIT, NULL, max_linkhdr + sc->sc_len); 1581 if (!ISSET(m->m_flags, M_EXT)) { 1582 m_free(m); 1583 sc->sc_if.if_oerrors++; 1584 pfsyncstats.pfsyncs_onomem++; 1585 pfsync_drop(sc); 1586 return; 1587 } 1588 } 1589 m->m_data += max_linkhdr; 1590 m->m_len = m->m_pkthdr.len = sc->sc_len; 1591 1592 /* build the ip header */ 1593 ip = (struct ip *)m->m_data; 1594 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1595 offset = sizeof(*ip); 1596 1597 ip->ip_len = htons(m->m_pkthdr.len); 1598 ip->ip_id = htons(ip_randomid()); 1599 1600 /* build the pfsync header */ 1601 ph = (struct pfsync_header *)(m->m_data + offset); 1602 bzero(ph, sizeof(*ph)); 1603 offset += sizeof(*ph); 1604 1605 ph->version = PFSYNC_VERSION; 1606 ph->len = htons(sc->sc_len - sizeof(*ip)); 1607 bcopy(pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1608 1609 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1610 subh = (struct pfsync_subheader *)(m->m_data + offset); 1611 offset += sizeof(*subh); 1612 1613 count = 0; 1614 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1615 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1616 1617 bcopy(&ur->ur_msg, m->m_data + offset, 1618 sizeof(ur->ur_msg)); 1619 offset += sizeof(ur->ur_msg); 1620 1621 pool_put(&sc->sc_pool, ur); 1622 1623 count++; 1624 } 1625 1626 bzero(subh, sizeof(*subh)); 1627 subh->len = sizeof(ur->ur_msg) >> 2; 1628 subh->action = PFSYNC_ACT_UPD_REQ; 1629 subh->count = htons(count); 1630 } 1631 1632 /* has someone built a custom region for us to add? */ 1633 if (sc->sc_plus != NULL) { 1634 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1635 offset += sc->sc_pluslen; 1636 1637 sc->sc_plus = NULL; 1638 } 1639 1640 if (!TAILQ_EMPTY(&sc->sc_tdb_q)) { 1641 subh = (struct pfsync_subheader *)(m->m_data + offset); 1642 offset += sizeof(*subh); 1643 1644 count = 0; 1645 TAILQ_FOREACH(t, &sc->sc_tdb_q, tdb_sync_entry) { 1646 pfsync_out_tdb(t, m->m_data + offset); 1647 offset += sizeof(struct pfsync_tdb); 1648 CLR(t->tdb_flags, TDBF_PFSYNC); 1649 1650 count++; 1651 } 1652 TAILQ_INIT(&sc->sc_tdb_q); 1653 1654 bzero(subh, sizeof(*subh)); 1655 subh->action = PFSYNC_ACT_TDB; 1656 subh->len = sizeof(struct pfsync_tdb) >> 2; 1657 subh->count = htons(count); 1658 } 1659 1660 /* walk the queues */ 1661 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1662 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1663 continue; 1664 1665 subh = (struct pfsync_subheader *)(m->m_data + offset); 1666 offset += sizeof(*subh); 1667 1668 count = 0; 1669 TAILQ_FOREACH(st, &sc->sc_qs[q], sync_list) { 1670 #ifdef PFSYNC_DEBUG 1671 KASSERT(st->sync_state == q); 1672 #endif 1673 pfsync_qs[q].write(st, m->m_data + offset); 1674 offset += pfsync_qs[q].len; 1675 1676 st->sync_state = PFSYNC_S_NONE; 1677 count++; 1678 } 1679 TAILQ_INIT(&sc->sc_qs[q]); 1680 1681 bzero(subh, sizeof(*subh)); 1682 subh->action = pfsync_qs[q].action; 1683 subh->len = pfsync_qs[q].len >> 2; 1684 subh->count = htons(count); 1685 } 1686 1687 /* we're done, let's put it on the wire */ 1688 #if NBPFILTER > 0 1689 if (ifp->if_bpf) { 1690 m->m_data += sizeof(*ip); 1691 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1692 bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT); 1693 m->m_data -= sizeof(*ip); 1694 m->m_len = m->m_pkthdr.len = sc->sc_len; 1695 } 1696 1697 if (sc->sc_sync_if == NULL) { 1698 sc->sc_len = PFSYNC_MINPKT; 1699 m_freem(m); 1700 return; 1701 } 1702 #endif 1703 1704 sc->sc_if.if_opackets++; 1705 sc->sc_if.if_obytes += m->m_pkthdr.len; 1706 1707 if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, NULL) == 0) 1708 pfsyncstats.pfsyncs_opackets++; 1709 else 1710 pfsyncstats.pfsyncs_oerrors++; 1711 1712 /* start again */ 1713 sc->sc_len = PFSYNC_MINPKT; 1714 } 1715 1716 void 1717 pfsync_insert_state(struct pf_state *st) 1718 { 1719 struct pfsync_softc *sc = pfsyncif; 1720 1721 splsoftassert(IPL_SOFTNET); 1722 1723 if (ISSET(st->rule.ptr->rule_flag, PFRULE_NOSYNC) || 1724 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1725 SET(st->state_flags, PFSTATE_NOSYNC); 1726 return; 1727 } 1728 1729 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING) || 1730 ISSET(st->state_flags, PFSTATE_NOSYNC)) 1731 return; 1732 1733 #ifdef PFSYNC_DEBUG 1734 KASSERT(st->sync_state == PFSYNC_S_NONE); 1735 #endif 1736 1737 if (sc->sc_len == PFSYNC_MINPKT) 1738 timeout_add_sec(&sc->sc_tmo, 1); 1739 1740 pfsync_q_ins(st, PFSYNC_S_INS); 1741 1742 st->sync_updates = 0; 1743 } 1744 1745 int defer = 10; 1746 1747 int 1748 pfsync_defer(struct pf_state *st, struct mbuf *m) 1749 { 1750 struct pfsync_softc *sc = pfsyncif; 1751 struct pfsync_deferral *pd; 1752 1753 splsoftassert(IPL_SOFTNET); 1754 1755 if (!sc->sc_defer || m->m_flags & (M_BCAST|M_MCAST)) 1756 return (0); 1757 1758 if (sc->sc_deferred >= 128) 1759 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1760 1761 pd = pool_get(&sc->sc_pool, M_NOWAIT); 1762 if (pd == NULL) 1763 return (0); 1764 1765 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 1766 SET(st->state_flags, PFSTATE_ACK); 1767 1768 pd->pd_st = st; 1769 pd->pd_m = m; 1770 1771 sc->sc_deferred++; 1772 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1773 1774 timeout_set(&pd->pd_tmo, pfsync_defer_tmo, pd); 1775 timeout_add(&pd->pd_tmo, defer); 1776 1777 schednetisr(NETISR_PFSYNC); 1778 1779 return (1); 1780 } 1781 1782 void 1783 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1784 { 1785 struct pfsync_softc *sc = pfsyncif; 1786 1787 splsoftassert(IPL_SOFTNET); 1788 1789 timeout_del(&pd->pd_tmo); /* bah */ 1790 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1791 sc->sc_deferred--; 1792 1793 CLR(pd->pd_st->state_flags, PFSTATE_ACK); 1794 if (drop) 1795 m_freem(pd->pd_m); 1796 else { 1797 ip_output(pd->pd_m, (void *)NULL, (void *)NULL, 0, 1798 (void *)NULL, (void *)NULL); 1799 } 1800 1801 pool_put(&sc->sc_pool, pd); 1802 } 1803 1804 void 1805 pfsync_defer_tmo(void *arg) 1806 { 1807 int s; 1808 1809 s = splsoftnet(); 1810 pfsync_undefer(arg, 0); 1811 splx(s); 1812 } 1813 1814 void 1815 pfsync_deferred(struct pf_state *st, int drop) 1816 { 1817 struct pfsync_softc *sc = pfsyncif; 1818 struct pfsync_deferral *pd; 1819 1820 splsoftassert(IPL_SOFTNET); 1821 1822 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1823 if (pd->pd_st == st) { 1824 pfsync_undefer(pd, drop); 1825 return; 1826 } 1827 } 1828 1829 panic("pfsync_deferred: unable to find deferred state"); 1830 } 1831 1832 void 1833 pfsync_update_state(struct pf_state *st) 1834 { 1835 struct pfsync_softc *sc = pfsyncif; 1836 int sync = 0; 1837 1838 splsoftassert(IPL_SOFTNET); 1839 1840 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1841 return; 1842 1843 if (ISSET(st->state_flags, PFSTATE_ACK)) 1844 pfsync_deferred(st, 0); 1845 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1846 if (st->sync_state != PFSYNC_S_NONE) 1847 pfsync_q_del(st); 1848 return; 1849 } 1850 1851 if (sc->sc_len == PFSYNC_MINPKT) 1852 timeout_add_sec(&sc->sc_tmo, 1); 1853 1854 switch (st->sync_state) { 1855 case PFSYNC_S_UPD_C: 1856 case PFSYNC_S_UPD: 1857 case PFSYNC_S_INS: 1858 /* we're already handling it */ 1859 1860 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1861 st->sync_updates++; 1862 if (st->sync_updates >= sc->sc_maxupdates) 1863 sync = 1; 1864 } 1865 break; 1866 1867 case PFSYNC_S_IACK: 1868 pfsync_q_del(st); 1869 case PFSYNC_S_NONE: 1870 pfsync_q_ins(st, PFSYNC_S_UPD_C); 1871 st->sync_updates = 0; 1872 break; 1873 1874 default: 1875 panic("pfsync_update_state: unexpected sync state %d", 1876 st->sync_state); 1877 } 1878 1879 if (sync || (time_uptime - st->pfsync_time) < 2) 1880 schednetisr(NETISR_PFSYNC); 1881 } 1882 1883 void 1884 pfsync_request_full_update(struct pfsync_softc *sc) 1885 { 1886 if (sc->sc_sync_if && ISSET(sc->sc_if.if_flags, IFF_RUNNING)) { 1887 /* Request a full state table update. */ 1888 sc->sc_ureq_sent = time_uptime; 1889 #if NCARP > 0 1890 if (pfsync_sync_ok) 1891 carp_group_demote_adj(&sc->sc_if, 1, 1892 "pfsync bulk start"); 1893 #endif 1894 pfsync_sync_ok = 0; 1895 DPFPRINTF(LOG_INFO, "requesting bulk update"); 1896 timeout_add(&sc->sc_bulkfail_tmo, 4 * hz + 1897 pf_pool_limits[PF_LIMIT_STATES].limit / 1898 ((sc->sc_if.if_mtu - PFSYNC_MINPKT) / 1899 sizeof(struct pfsync_state))); 1900 pfsync_request_update(0, 0); 1901 } 1902 } 1903 1904 void 1905 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1906 { 1907 struct pfsync_softc *sc = pfsyncif; 1908 struct pfsync_upd_req_item *item; 1909 size_t nlen = sizeof(struct pfsync_upd_req); 1910 1911 /* 1912 * this code does nothing to prevent multiple update requests for the 1913 * same state being generated. 1914 */ 1915 1916 item = pool_get(&sc->sc_pool, PR_NOWAIT); 1917 if (item == NULL) { 1918 /* XXX stats */ 1919 return; 1920 } 1921 1922 item->ur_msg.id = id; 1923 item->ur_msg.creatorid = creatorid; 1924 1925 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1926 nlen += sizeof(struct pfsync_subheader); 1927 1928 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 1929 pfsync_sendout(); 1930 1931 nlen = sizeof(struct pfsync_subheader) + 1932 sizeof(struct pfsync_upd_req); 1933 } 1934 1935 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1936 sc->sc_len += nlen; 1937 1938 schednetisr(NETISR_PFSYNC); 1939 } 1940 1941 void 1942 pfsync_update_state_req(struct pf_state *st) 1943 { 1944 struct pfsync_softc *sc = pfsyncif; 1945 1946 if (sc == NULL) 1947 panic("pfsync_update_state_req: nonexistant instance"); 1948 1949 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1950 if (st->sync_state != PFSYNC_S_NONE) 1951 pfsync_q_del(st); 1952 return; 1953 } 1954 1955 switch (st->sync_state) { 1956 case PFSYNC_S_UPD_C: 1957 case PFSYNC_S_IACK: 1958 pfsync_q_del(st); 1959 case PFSYNC_S_NONE: 1960 pfsync_q_ins(st, PFSYNC_S_UPD); 1961 schednetisr(NETISR_PFSYNC); 1962 return; 1963 1964 case PFSYNC_S_INS: 1965 case PFSYNC_S_UPD: 1966 case PFSYNC_S_DEL: 1967 /* we're already handling it */ 1968 return; 1969 1970 default: 1971 panic("pfsync_update_state_req: unexpected sync state %d", 1972 st->sync_state); 1973 } 1974 } 1975 1976 void 1977 pfsync_delete_state(struct pf_state *st) 1978 { 1979 struct pfsync_softc *sc = pfsyncif; 1980 1981 splsoftassert(IPL_SOFTNET); 1982 1983 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 1984 return; 1985 1986 if (ISSET(st->state_flags, PFSTATE_ACK)) 1987 pfsync_deferred(st, 1); 1988 if (ISSET(st->state_flags, PFSTATE_NOSYNC)) { 1989 if (st->sync_state != PFSYNC_S_NONE) 1990 pfsync_q_del(st); 1991 return; 1992 } 1993 1994 if (sc->sc_len == PFSYNC_MINPKT) 1995 timeout_add_sec(&sc->sc_tmo, 1); 1996 1997 switch (st->sync_state) { 1998 case PFSYNC_S_INS: 1999 /* we never got to tell the world so just forget about it */ 2000 pfsync_q_del(st); 2001 return; 2002 2003 case PFSYNC_S_UPD_C: 2004 case PFSYNC_S_UPD: 2005 case PFSYNC_S_IACK: 2006 pfsync_q_del(st); 2007 /* FALLTHROUGH to putting it on the del list */ 2008 2009 case PFSYNC_S_NONE: 2010 pfsync_q_ins(st, PFSYNC_S_DEL); 2011 return; 2012 2013 default: 2014 panic("pfsync_delete_state: unexpected sync state %d", 2015 st->sync_state); 2016 } 2017 } 2018 2019 void 2020 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 2021 { 2022 struct pfsync_softc *sc = pfsyncif; 2023 struct { 2024 struct pfsync_subheader subh; 2025 struct pfsync_clr clr; 2026 } __packed r; 2027 2028 splsoftassert(IPL_SOFTNET); 2029 2030 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2031 return; 2032 2033 bzero(&r, sizeof(r)); 2034 2035 r.subh.action = PFSYNC_ACT_CLR; 2036 r.subh.len = sizeof(struct pfsync_clr) >> 2; 2037 r.subh.count = htons(1); 2038 2039 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 2040 r.clr.creatorid = creatorid; 2041 2042 pfsync_send_plus(&r, sizeof(r)); 2043 } 2044 2045 void 2046 pfsync_q_ins(struct pf_state *st, int q) 2047 { 2048 struct pfsync_softc *sc = pfsyncif; 2049 size_t nlen = pfsync_qs[q].len; 2050 2051 KASSERT(st->sync_state == PFSYNC_S_NONE); 2052 2053 #if 1 || defined(PFSYNC_DEBUG) 2054 if (sc->sc_len < PFSYNC_MINPKT) 2055 panic("pfsync pkt len is too low %d", sc->sc_len); 2056 #endif 2057 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2058 nlen += sizeof(struct pfsync_subheader); 2059 2060 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2061 pfsync_sendout(); 2062 2063 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2064 } 2065 2066 sc->sc_len += nlen; 2067 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2068 st->sync_state = q; 2069 } 2070 2071 void 2072 pfsync_q_del(struct pf_state *st) 2073 { 2074 struct pfsync_softc *sc = pfsyncif; 2075 int q = st->sync_state; 2076 2077 KASSERT(st->sync_state != PFSYNC_S_NONE); 2078 2079 sc->sc_len -= pfsync_qs[q].len; 2080 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2081 st->sync_state = PFSYNC_S_NONE; 2082 2083 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2084 sc->sc_len -= sizeof(struct pfsync_subheader); 2085 } 2086 2087 void 2088 pfsync_update_tdb(struct tdb *t, int output) 2089 { 2090 struct pfsync_softc *sc = pfsyncif; 2091 size_t nlen = sizeof(struct pfsync_tdb); 2092 2093 if (sc == NULL) 2094 return; 2095 2096 if (!ISSET(t->tdb_flags, TDBF_PFSYNC)) { 2097 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2098 nlen += sizeof(struct pfsync_subheader); 2099 2100 if (sc->sc_len + nlen > sc->sc_if.if_mtu) { 2101 pfsync_sendout(); 2102 2103 nlen = sizeof(struct pfsync_subheader) + 2104 sizeof(struct pfsync_tdb); 2105 } 2106 2107 sc->sc_len += nlen; 2108 TAILQ_INSERT_TAIL(&sc->sc_tdb_q, t, tdb_sync_entry); 2109 SET(t->tdb_flags, TDBF_PFSYNC); 2110 t->tdb_updates = 0; 2111 } else { 2112 if (++t->tdb_updates >= sc->sc_maxupdates) 2113 schednetisr(NETISR_PFSYNC); 2114 } 2115 2116 if (output) 2117 SET(t->tdb_flags, TDBF_PFSYNC_RPL); 2118 else 2119 CLR(t->tdb_flags, TDBF_PFSYNC_RPL); 2120 } 2121 2122 void 2123 pfsync_delete_tdb(struct tdb *t) 2124 { 2125 struct pfsync_softc *sc = pfsyncif; 2126 2127 if (sc == NULL || !ISSET(t->tdb_flags, TDBF_PFSYNC)) 2128 return; 2129 2130 sc->sc_len -= sizeof(struct pfsync_tdb); 2131 TAILQ_REMOVE(&sc->sc_tdb_q, t, tdb_sync_entry); 2132 CLR(t->tdb_flags, TDBF_PFSYNC); 2133 2134 if (TAILQ_EMPTY(&sc->sc_tdb_q)) 2135 sc->sc_len -= sizeof(struct pfsync_subheader); 2136 } 2137 2138 void 2139 pfsync_out_tdb(struct tdb *t, void *buf) 2140 { 2141 struct pfsync_tdb *ut = buf; 2142 2143 bzero(ut, sizeof(*ut)); 2144 ut->spi = t->tdb_spi; 2145 bcopy(&t->tdb_dst, &ut->dst, sizeof(ut->dst)); 2146 /* 2147 * When a failover happens, the master's rpl is probably above 2148 * what we see here (we may be up to a second late), so 2149 * increase it a bit for outbound tdbs to manage most such 2150 * situations. 2151 * 2152 * For now, just add an offset that is likely to be larger 2153 * than the number of packets we can see in one second. The RFC 2154 * just says the next packet must have a higher seq value. 2155 * 2156 * XXX What is a good algorithm for this? We could use 2157 * a rate-determined increase, but to know it, we would have 2158 * to extend struct tdb. 2159 * XXX pt->rpl can wrap over MAXINT, but if so the real tdb 2160 * will soon be replaced anyway. For now, just don't handle 2161 * this edge case. 2162 */ 2163 #define RPL_INCR 16384 2164 ut->rpl = htonl(t->tdb_rpl + (ISSET(t->tdb_flags, TDBF_PFSYNC_RPL) ? 2165 RPL_INCR : 0)); 2166 ut->cur_bytes = htobe64(t->tdb_cur_bytes); 2167 ut->sproto = t->tdb_sproto; 2168 } 2169 2170 void 2171 pfsync_bulk_start(void) 2172 { 2173 struct pfsync_softc *sc = pfsyncif; 2174 2175 sc->sc_ureq_received = time_uptime; 2176 2177 if (sc->sc_bulk_next == NULL) 2178 sc->sc_bulk_next = TAILQ_FIRST(&state_list); 2179 sc->sc_bulk_last = sc->sc_bulk_next; 2180 2181 DPFPRINTF(LOG_INFO, "received bulk update request"); 2182 2183 pfsync_bulk_status(PFSYNC_BUS_START); 2184 timeout_add(&sc->sc_bulk_tmo, 0); 2185 } 2186 2187 void 2188 pfsync_bulk_update(void *arg) 2189 { 2190 struct pfsync_softc *sc = arg; 2191 struct pf_state *st; 2192 int i = 0; 2193 int s; 2194 2195 s = splsoftnet(); 2196 2197 st = sc->sc_bulk_next; 2198 2199 while (st != sc->sc_bulk_last) { 2200 if (st->sync_state == PFSYNC_S_NONE && 2201 st->timeout < PFTM_MAX && 2202 st->pfsync_time <= sc->sc_ureq_received) { 2203 pfsync_update_state_req(st); 2204 i++; 2205 } 2206 2207 st = TAILQ_NEXT(st, entry_list); 2208 if (st == NULL) 2209 st = TAILQ_FIRST(&state_list); 2210 2211 if (i > 0 && TAILQ_EMPTY(&sc->sc_qs[PFSYNC_S_UPD])) { 2212 sc->sc_bulk_next = st; 2213 timeout_add(&sc->sc_bulk_tmo, 1); 2214 goto out; 2215 } 2216 } 2217 2218 /* we're done */ 2219 sc->sc_bulk_next = NULL; 2220 sc->sc_bulk_last = NULL; 2221 pfsync_bulk_status(PFSYNC_BUS_END); 2222 2223 out: 2224 splx(s); 2225 } 2226 2227 void 2228 pfsync_bulk_status(u_int8_t status) 2229 { 2230 struct { 2231 struct pfsync_subheader subh; 2232 struct pfsync_bus bus; 2233 } __packed r; 2234 2235 struct pfsync_softc *sc = pfsyncif; 2236 2237 bzero(&r, sizeof(r)); 2238 2239 r.subh.action = PFSYNC_ACT_BUS; 2240 r.subh.len = sizeof(struct pfsync_bus) >> 2; 2241 r.subh.count = htons(1); 2242 2243 r.bus.creatorid = pf_status.hostid; 2244 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2245 r.bus.status = status; 2246 2247 pfsync_send_plus(&r, sizeof(r)); 2248 } 2249 2250 void 2251 pfsync_bulk_fail(void *arg) 2252 { 2253 struct pfsync_softc *sc = arg; 2254 2255 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2256 /* Try again */ 2257 timeout_add_sec(&sc->sc_bulkfail_tmo, 5); 2258 pfsync_request_update(0, 0); 2259 } else { 2260 /* Pretend like the transfer was ok */ 2261 sc->sc_ureq_sent = 0; 2262 sc->sc_bulk_tries = 0; 2263 #if NCARP > 0 2264 if (!pfsync_sync_ok) 2265 carp_group_demote_adj(&sc->sc_if, -1, 2266 "pfsync bulk fail"); 2267 #endif 2268 pfsync_sync_ok = 1; 2269 DPFPRINTF(LOG_ERR, "failed to receive bulk update"); 2270 } 2271 } 2272 2273 void 2274 pfsync_send_plus(void *plus, size_t pluslen) 2275 { 2276 struct pfsync_softc *sc = pfsyncif; 2277 2278 if (sc->sc_len + pluslen > sc->sc_if.if_mtu) 2279 pfsync_sendout(); 2280 2281 sc->sc_plus = plus; 2282 sc->sc_len += (sc->sc_pluslen = pluslen); 2283 2284 pfsync_sendout(); 2285 } 2286 2287 int 2288 pfsync_up(void) 2289 { 2290 struct pfsync_softc *sc = pfsyncif; 2291 2292 if (sc == NULL || !ISSET(sc->sc_if.if_flags, IFF_RUNNING)) 2293 return (0); 2294 2295 return (1); 2296 } 2297 2298 int 2299 pfsync_state_in_use(struct pf_state *st) 2300 { 2301 struct pfsync_softc *sc = pfsyncif; 2302 2303 if (sc == NULL) 2304 return (0); 2305 2306 if (st->sync_state != PFSYNC_S_NONE) 2307 return (1); 2308 2309 if (sc->sc_bulk_next == NULL && sc->sc_bulk_last == NULL) 2310 return (0); 2311 2312 return (1); 2313 } 2314 2315 void 2316 pfsync_timeout(void *arg) 2317 { 2318 int s; 2319 2320 s = splsoftnet(); 2321 pfsync_sendout(); 2322 splx(s); 2323 } 2324 2325 /* this is a softnet/netisr handler */ 2326 void 2327 pfsyncintr(void) 2328 { 2329 pfsync_sendout(); 2330 } 2331 2332 int 2333 pfsync_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, 2334 size_t newlen) 2335 { 2336 /* All sysctl names at this level are terminal. */ 2337 if (namelen != 1) 2338 return (ENOTDIR); 2339 2340 switch (name[0]) { 2341 case PFSYNCCTL_STATS: 2342 if (newp != NULL) 2343 return (EPERM); 2344 return (sysctl_struct(oldp, oldlenp, newp, newlen, 2345 &pfsyncstats, sizeof(pfsyncstats))); 2346 default: 2347 return (ENOPROTOOPT); 2348 } 2349 } 2350