1 /*- 2 * SPDX-License-Identifier: (BSD-2-Clause-FreeBSD AND ISC) 3 * 4 * Copyright (c) 2002 Michael Shalayeff 5 * Copyright (c) 2012 Gleb Smirnoff <glebius@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR OR HIS RELATIVES BE LIABLE FOR ANY DIRECT, 21 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 * SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 25 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING 26 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 27 * THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /*- 31 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org> 32 * 33 * Permission to use, copy, modify, and distribute this software for any 34 * purpose with or without fee is hereby granted, provided that the above 35 * copyright notice and this permission notice appear in all copies. 36 * 37 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 38 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 39 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 40 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 41 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 42 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 43 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 44 */ 45 46 /* 47 * $OpenBSD: if_pfsync.c,v 1.110 2009/02/24 05:39:19 dlg Exp $ 48 * 49 * Revisions picked from OpenBSD after revision 1.110 import: 50 * 1.119 - don't m_copydata() beyond the len of mbuf in pfsync_input() 51 * 1.118, 1.124, 1.148, 1.149, 1.151, 1.171 - fixes to bulk updates 52 * 1.120, 1.175 - use monotonic time_uptime 53 * 1.122 - reduce number of updates for non-TCP sessions 54 * 1.125, 1.127 - rewrite merge or stale processing 55 * 1.128 - cleanups 56 * 1.146 - bzero() mbuf before sparsely filling it with data 57 * 1.170 - SIOCSIFMTU checks 58 * 1.126, 1.142 - deferred packets processing 59 * 1.173 - correct expire time processing 60 */ 61 62 #include <sys/cdefs.h> 63 __FBSDID("$FreeBSD$"); 64 65 #include "opt_inet.h" 66 #include "opt_inet6.h" 67 #include "opt_pf.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/endian.h> 72 #include <sys/interrupt.h> 73 #include <sys/kernel.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/module.h> 77 #include <sys/mutex.h> 78 #include <sys/priv.h> 79 #include <sys/protosw.h> 80 #include <sys/socket.h> 81 #include <sys/sockio.h> 82 #include <sys/sysctl.h> 83 #include <sys/syslog.h> 84 85 #include <net/bpf.h> 86 #include <net/if.h> 87 #include <net/if_var.h> 88 #include <net/if_clone.h> 89 #include <net/if_types.h> 90 #include <net/vnet.h> 91 #include <net/pfvar.h> 92 #include <net/if_pfsync.h> 93 94 #include <netinet/if_ether.h> 95 #include <netinet/in.h> 96 #include <netinet/in_var.h> 97 #include <netinet/ip.h> 98 #include <netinet/ip_carp.h> 99 #include <netinet/ip_var.h> 100 #include <netinet/tcp.h> 101 #include <netinet/tcp_fsm.h> 102 #include <netinet/tcp_seq.h> 103 104 #define PFSYNC_MINPKT ( \ 105 sizeof(struct ip) + \ 106 sizeof(struct pfsync_header) + \ 107 sizeof(struct pfsync_subheader) ) 108 109 struct pfsync_pkt { 110 struct ip *ip; 111 struct in_addr src; 112 u_int8_t flags; 113 }; 114 115 static int pfsync_upd_tcp(struct pf_state *, struct pfsync_state_peer *, 116 struct pfsync_state_peer *); 117 static int pfsync_in_clr(struct pfsync_pkt *, struct mbuf *, int, int); 118 static int pfsync_in_ins(struct pfsync_pkt *, struct mbuf *, int, int); 119 static int pfsync_in_iack(struct pfsync_pkt *, struct mbuf *, int, int); 120 static int pfsync_in_upd(struct pfsync_pkt *, struct mbuf *, int, int); 121 static int pfsync_in_upd_c(struct pfsync_pkt *, struct mbuf *, int, int); 122 static int pfsync_in_ureq(struct pfsync_pkt *, struct mbuf *, int, int); 123 static int pfsync_in_del(struct pfsync_pkt *, struct mbuf *, int, int); 124 static int pfsync_in_del_c(struct pfsync_pkt *, struct mbuf *, int, int); 125 static int pfsync_in_bus(struct pfsync_pkt *, struct mbuf *, int, int); 126 static int pfsync_in_tdb(struct pfsync_pkt *, struct mbuf *, int, int); 127 static int pfsync_in_eof(struct pfsync_pkt *, struct mbuf *, int, int); 128 static int pfsync_in_error(struct pfsync_pkt *, struct mbuf *, int, int); 129 130 static int (*pfsync_acts[])(struct pfsync_pkt *, struct mbuf *, int, int) = { 131 pfsync_in_clr, /* PFSYNC_ACT_CLR */ 132 pfsync_in_ins, /* PFSYNC_ACT_INS */ 133 pfsync_in_iack, /* PFSYNC_ACT_INS_ACK */ 134 pfsync_in_upd, /* PFSYNC_ACT_UPD */ 135 pfsync_in_upd_c, /* PFSYNC_ACT_UPD_C */ 136 pfsync_in_ureq, /* PFSYNC_ACT_UPD_REQ */ 137 pfsync_in_del, /* PFSYNC_ACT_DEL */ 138 pfsync_in_del_c, /* PFSYNC_ACT_DEL_C */ 139 pfsync_in_error, /* PFSYNC_ACT_INS_F */ 140 pfsync_in_error, /* PFSYNC_ACT_DEL_F */ 141 pfsync_in_bus, /* PFSYNC_ACT_BUS */ 142 pfsync_in_tdb, /* PFSYNC_ACT_TDB */ 143 pfsync_in_eof /* PFSYNC_ACT_EOF */ 144 }; 145 146 struct pfsync_q { 147 void (*write)(struct pf_state *, void *); 148 size_t len; 149 u_int8_t action; 150 }; 151 152 /* we have one of these for every PFSYNC_S_ */ 153 static void pfsync_out_state(struct pf_state *, void *); 154 static void pfsync_out_iack(struct pf_state *, void *); 155 static void pfsync_out_upd_c(struct pf_state *, void *); 156 static void pfsync_out_del(struct pf_state *, void *); 157 158 static struct pfsync_q pfsync_qs[] = { 159 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_INS }, 160 { pfsync_out_iack, sizeof(struct pfsync_ins_ack), PFSYNC_ACT_INS_ACK }, 161 { pfsync_out_state, sizeof(struct pfsync_state), PFSYNC_ACT_UPD }, 162 { pfsync_out_upd_c, sizeof(struct pfsync_upd_c), PFSYNC_ACT_UPD_C }, 163 { pfsync_out_del, sizeof(struct pfsync_del_c), PFSYNC_ACT_DEL_C } 164 }; 165 166 static void pfsync_q_ins(struct pf_state *, int, bool); 167 static void pfsync_q_del(struct pf_state *, bool); 168 169 static void pfsync_update_state(struct pf_state *); 170 171 struct pfsync_upd_req_item { 172 TAILQ_ENTRY(pfsync_upd_req_item) ur_entry; 173 struct pfsync_upd_req ur_msg; 174 }; 175 176 struct pfsync_deferral { 177 struct pfsync_softc *pd_sc; 178 TAILQ_ENTRY(pfsync_deferral) pd_entry; 179 u_int pd_refs; 180 struct callout pd_tmo; 181 182 struct pf_state *pd_st; 183 struct mbuf *pd_m; 184 }; 185 186 struct pfsync_softc { 187 /* Configuration */ 188 struct ifnet *sc_ifp; 189 struct ifnet *sc_sync_if; 190 struct ip_moptions sc_imo; 191 struct in_addr sc_sync_peer; 192 uint32_t sc_flags; 193 #define PFSYNCF_OK 0x00000001 194 #define PFSYNCF_DEFER 0x00000002 195 #define PFSYNCF_PUSH 0x00000004 196 uint8_t sc_maxupdates; 197 struct ip sc_template; 198 struct callout sc_tmo; 199 struct mtx sc_mtx; 200 201 /* Queued data */ 202 size_t sc_len; 203 TAILQ_HEAD(, pf_state) sc_qs[PFSYNC_S_COUNT]; 204 TAILQ_HEAD(, pfsync_upd_req_item) sc_upd_req_list; 205 TAILQ_HEAD(, pfsync_deferral) sc_deferrals; 206 u_int sc_deferred; 207 void *sc_plus; 208 size_t sc_pluslen; 209 210 /* Bulk update info */ 211 struct mtx sc_bulk_mtx; 212 uint32_t sc_ureq_sent; 213 int sc_bulk_tries; 214 uint32_t sc_ureq_received; 215 int sc_bulk_hashid; 216 uint64_t sc_bulk_stateid; 217 uint32_t sc_bulk_creatorid; 218 struct callout sc_bulk_tmo; 219 struct callout sc_bulkfail_tmo; 220 }; 221 222 #define PFSYNC_LOCK(sc) mtx_lock(&(sc)->sc_mtx) 223 #define PFSYNC_UNLOCK(sc) mtx_unlock(&(sc)->sc_mtx) 224 #define PFSYNC_LOCK_ASSERT(sc) mtx_assert(&(sc)->sc_mtx, MA_OWNED) 225 226 #define PFSYNC_BLOCK(sc) mtx_lock(&(sc)->sc_bulk_mtx) 227 #define PFSYNC_BUNLOCK(sc) mtx_unlock(&(sc)->sc_bulk_mtx) 228 #define PFSYNC_BLOCK_ASSERT(sc) mtx_assert(&(sc)->sc_bulk_mtx, MA_OWNED) 229 230 static const char pfsyncname[] = "pfsync"; 231 static MALLOC_DEFINE(M_PFSYNC, pfsyncname, "pfsync(4) data"); 232 static VNET_DEFINE(struct pfsync_softc *, pfsyncif) = NULL; 233 #define V_pfsyncif VNET(pfsyncif) 234 static VNET_DEFINE(void *, pfsync_swi_cookie) = NULL; 235 #define V_pfsync_swi_cookie VNET(pfsync_swi_cookie) 236 static VNET_DEFINE(struct pfsyncstats, pfsyncstats); 237 #define V_pfsyncstats VNET(pfsyncstats) 238 static VNET_DEFINE(int, pfsync_carp_adj) = CARP_MAXSKEW; 239 #define V_pfsync_carp_adj VNET(pfsync_carp_adj) 240 241 static void pfsync_timeout(void *); 242 static void pfsync_push(struct pfsync_softc *); 243 static void pfsyncintr(void *); 244 static int pfsync_multicast_setup(struct pfsync_softc *, struct ifnet *, 245 void *); 246 static void pfsync_multicast_cleanup(struct pfsync_softc *); 247 static void pfsync_pointers_init(void); 248 static void pfsync_pointers_uninit(void); 249 static int pfsync_init(void); 250 static void pfsync_uninit(void); 251 252 SYSCTL_NODE(_net, OID_AUTO, pfsync, CTLFLAG_RW, 0, "PFSYNC"); 253 SYSCTL_STRUCT(_net_pfsync, OID_AUTO, stats, CTLFLAG_VNET | CTLFLAG_RW, 254 &VNET_NAME(pfsyncstats), pfsyncstats, 255 "PFSYNC statistics (struct pfsyncstats, net/if_pfsync.h)"); 256 SYSCTL_INT(_net_pfsync, OID_AUTO, carp_demotion_factor, CTLFLAG_RW, 257 &VNET_NAME(pfsync_carp_adj), 0, "pfsync's CARP demotion factor adjustment"); 258 259 static int pfsync_clone_create(struct if_clone *, int, caddr_t); 260 static void pfsync_clone_destroy(struct ifnet *); 261 static int pfsync_alloc_scrub_memory(struct pfsync_state_peer *, 262 struct pf_state_peer *); 263 static int pfsyncoutput(struct ifnet *, struct mbuf *, 264 const struct sockaddr *, struct route *); 265 static int pfsyncioctl(struct ifnet *, u_long, caddr_t); 266 267 static int pfsync_defer(struct pf_state *, struct mbuf *); 268 static void pfsync_undefer(struct pfsync_deferral *, int); 269 static void pfsync_undefer_state(struct pf_state *, int); 270 static void pfsync_defer_tmo(void *); 271 272 static void pfsync_request_update(u_int32_t, u_int64_t); 273 static void pfsync_update_state_req(struct pf_state *); 274 275 static void pfsync_drop(struct pfsync_softc *); 276 static void pfsync_sendout(int); 277 static void pfsync_send_plus(void *, size_t); 278 279 static void pfsync_bulk_start(void); 280 static void pfsync_bulk_status(u_int8_t); 281 static void pfsync_bulk_update(void *); 282 static void pfsync_bulk_fail(void *); 283 284 #ifdef IPSEC 285 static void pfsync_update_net_tdb(struct pfsync_tdb *); 286 #endif 287 288 #define PFSYNC_MAX_BULKTRIES 12 289 290 VNET_DEFINE(struct if_clone *, pfsync_cloner); 291 #define V_pfsync_cloner VNET(pfsync_cloner) 292 293 static int 294 pfsync_clone_create(struct if_clone *ifc, int unit, caddr_t param) 295 { 296 struct pfsync_softc *sc; 297 struct ifnet *ifp; 298 int q; 299 300 if (unit != 0) 301 return (EINVAL); 302 303 sc = malloc(sizeof(struct pfsync_softc), M_PFSYNC, M_WAITOK | M_ZERO); 304 sc->sc_flags |= PFSYNCF_OK; 305 306 for (q = 0; q < PFSYNC_S_COUNT; q++) 307 TAILQ_INIT(&sc->sc_qs[q]); 308 309 TAILQ_INIT(&sc->sc_upd_req_list); 310 TAILQ_INIT(&sc->sc_deferrals); 311 312 sc->sc_len = PFSYNC_MINPKT; 313 sc->sc_maxupdates = 128; 314 315 ifp = sc->sc_ifp = if_alloc(IFT_PFSYNC); 316 if (ifp == NULL) { 317 free(sc, M_PFSYNC); 318 return (ENOSPC); 319 } 320 if_initname(ifp, pfsyncname, unit); 321 ifp->if_softc = sc; 322 ifp->if_ioctl = pfsyncioctl; 323 ifp->if_output = pfsyncoutput; 324 ifp->if_type = IFT_PFSYNC; 325 ifp->if_snd.ifq_maxlen = ifqmaxlen; 326 ifp->if_hdrlen = sizeof(struct pfsync_header); 327 ifp->if_mtu = ETHERMTU; 328 mtx_init(&sc->sc_mtx, pfsyncname, NULL, MTX_DEF); 329 mtx_init(&sc->sc_bulk_mtx, "pfsync bulk", NULL, MTX_DEF); 330 callout_init(&sc->sc_tmo, 1); 331 callout_init_mtx(&sc->sc_bulk_tmo, &sc->sc_bulk_mtx, 0); 332 callout_init_mtx(&sc->sc_bulkfail_tmo, &sc->sc_bulk_mtx, 0); 333 334 if_attach(ifp); 335 336 bpfattach(ifp, DLT_PFSYNC, PFSYNC_HDRLEN); 337 338 V_pfsyncif = sc; 339 340 return (0); 341 } 342 343 static void 344 pfsync_clone_destroy(struct ifnet *ifp) 345 { 346 struct pfsync_softc *sc = ifp->if_softc; 347 348 /* 349 * At this stage, everything should have already been 350 * cleared by pfsync_uninit(), and we have only to 351 * drain callouts. 352 */ 353 while (sc->sc_deferred > 0) { 354 struct pfsync_deferral *pd = TAILQ_FIRST(&sc->sc_deferrals); 355 356 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 357 sc->sc_deferred--; 358 if (callout_stop(&pd->pd_tmo) > 0) { 359 pf_release_state(pd->pd_st); 360 m_freem(pd->pd_m); 361 free(pd, M_PFSYNC); 362 } else { 363 pd->pd_refs++; 364 callout_drain(&pd->pd_tmo); 365 free(pd, M_PFSYNC); 366 } 367 } 368 369 callout_drain(&sc->sc_tmo); 370 callout_drain(&sc->sc_bulkfail_tmo); 371 callout_drain(&sc->sc_bulk_tmo); 372 373 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 374 (*carp_demote_adj_p)(-V_pfsync_carp_adj, "pfsync destroy"); 375 bpfdetach(ifp); 376 if_detach(ifp); 377 378 pfsync_drop(sc); 379 380 if_free(ifp); 381 if (sc->sc_imo.imo_membership) 382 pfsync_multicast_cleanup(sc); 383 mtx_destroy(&sc->sc_mtx); 384 mtx_destroy(&sc->sc_bulk_mtx); 385 free(sc, M_PFSYNC); 386 387 V_pfsyncif = NULL; 388 } 389 390 static int 391 pfsync_alloc_scrub_memory(struct pfsync_state_peer *s, 392 struct pf_state_peer *d) 393 { 394 if (s->scrub.scrub_flag && d->scrub == NULL) { 395 d->scrub = uma_zalloc(V_pf_state_scrub_z, M_NOWAIT | M_ZERO); 396 if (d->scrub == NULL) 397 return (ENOMEM); 398 } 399 400 return (0); 401 } 402 403 404 static int 405 pfsync_state_import(struct pfsync_state *sp, u_int8_t flags) 406 { 407 struct pfsync_softc *sc = V_pfsyncif; 408 #ifndef __NO_STRICT_ALIGNMENT 409 struct pfsync_state_key key[2]; 410 #endif 411 struct pfsync_state_key *kw, *ks; 412 struct pf_state *st = NULL; 413 struct pf_state_key *skw = NULL, *sks = NULL; 414 struct pf_rule *r = NULL; 415 struct pfi_kif *kif; 416 int error; 417 418 PF_RULES_RASSERT(); 419 420 if (sp->creatorid == 0) { 421 if (V_pf_status.debug >= PF_DEBUG_MISC) 422 printf("%s: invalid creator id: %08x\n", __func__, 423 ntohl(sp->creatorid)); 424 return (EINVAL); 425 } 426 427 if ((kif = pfi_kif_find(sp->ifname)) == NULL) { 428 if (V_pf_status.debug >= PF_DEBUG_MISC) 429 printf("%s: unknown interface: %s\n", __func__, 430 sp->ifname); 431 if (flags & PFSYNC_SI_IOCTL) 432 return (EINVAL); 433 return (0); /* skip this state */ 434 } 435 436 /* 437 * If the ruleset checksums match or the state is coming from the ioctl, 438 * it's safe to associate the state with the rule of that number. 439 */ 440 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 441 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && ntohl(sp->rule) < 442 pf_main_ruleset.rules[PF_RULESET_FILTER].active.rcount) 443 r = pf_main_ruleset.rules[ 444 PF_RULESET_FILTER].active.ptr_array[ntohl(sp->rule)]; 445 else 446 r = &V_pf_default_rule; 447 448 if ((r->max_states && 449 counter_u64_fetch(r->states_cur) >= r->max_states)) 450 goto cleanup; 451 452 /* 453 * XXXGL: consider M_WAITOK in ioctl path after. 454 */ 455 if ((st = uma_zalloc(V_pf_state_z, M_NOWAIT | M_ZERO)) == NULL) 456 goto cleanup; 457 458 if ((skw = uma_zalloc(V_pf_state_key_z, M_NOWAIT)) == NULL) 459 goto cleanup; 460 461 #ifndef __NO_STRICT_ALIGNMENT 462 bcopy(&sp->key, key, sizeof(struct pfsync_state_key) * 2); 463 kw = &key[PF_SK_WIRE]; 464 ks = &key[PF_SK_STACK]; 465 #else 466 kw = &sp->key[PF_SK_WIRE]; 467 ks = &sp->key[PF_SK_STACK]; 468 #endif 469 470 if (PF_ANEQ(&kw->addr[0], &ks->addr[0], sp->af) || 471 PF_ANEQ(&kw->addr[1], &ks->addr[1], sp->af) || 472 kw->port[0] != ks->port[0] || 473 kw->port[1] != ks->port[1]) { 474 sks = uma_zalloc(V_pf_state_key_z, M_NOWAIT); 475 if (sks == NULL) 476 goto cleanup; 477 } else 478 sks = skw; 479 480 /* allocate memory for scrub info */ 481 if (pfsync_alloc_scrub_memory(&sp->src, &st->src) || 482 pfsync_alloc_scrub_memory(&sp->dst, &st->dst)) 483 goto cleanup; 484 485 /* Copy to state key(s). */ 486 skw->addr[0] = kw->addr[0]; 487 skw->addr[1] = kw->addr[1]; 488 skw->port[0] = kw->port[0]; 489 skw->port[1] = kw->port[1]; 490 skw->proto = sp->proto; 491 skw->af = sp->af; 492 if (sks != skw) { 493 sks->addr[0] = ks->addr[0]; 494 sks->addr[1] = ks->addr[1]; 495 sks->port[0] = ks->port[0]; 496 sks->port[1] = ks->port[1]; 497 sks->proto = sp->proto; 498 sks->af = sp->af; 499 } 500 501 /* copy to state */ 502 bcopy(&sp->rt_addr, &st->rt_addr, sizeof(st->rt_addr)); 503 st->creation = time_uptime - ntohl(sp->creation); 504 st->expire = time_uptime; 505 if (sp->expire) { 506 uint32_t timeout; 507 508 timeout = r->timeout[sp->timeout]; 509 if (!timeout) 510 timeout = V_pf_default_rule.timeout[sp->timeout]; 511 512 /* sp->expire may have been adaptively scaled by export. */ 513 st->expire -= timeout - ntohl(sp->expire); 514 } 515 516 st->direction = sp->direction; 517 st->log = sp->log; 518 st->timeout = sp->timeout; 519 st->state_flags = sp->state_flags; 520 521 st->id = sp->id; 522 st->creatorid = sp->creatorid; 523 pf_state_peer_ntoh(&sp->src, &st->src); 524 pf_state_peer_ntoh(&sp->dst, &st->dst); 525 526 st->rule.ptr = r; 527 st->nat_rule.ptr = NULL; 528 st->anchor.ptr = NULL; 529 st->rt_kif = NULL; 530 531 st->pfsync_time = time_uptime; 532 st->sync_state = PFSYNC_S_NONE; 533 534 if (!(flags & PFSYNC_SI_IOCTL)) 535 st->state_flags |= PFSTATE_NOSYNC; 536 537 if ((error = pf_state_insert(kif, skw, sks, st)) != 0) 538 goto cleanup_state; 539 540 /* XXX when we have nat_rule/anchors, use STATE_INC_COUNTERS */ 541 counter_u64_add(r->states_cur, 1); 542 counter_u64_add(r->states_tot, 1); 543 544 if (!(flags & PFSYNC_SI_IOCTL)) { 545 st->state_flags &= ~PFSTATE_NOSYNC; 546 if (st->state_flags & PFSTATE_ACK) { 547 pfsync_q_ins(st, PFSYNC_S_IACK, true); 548 pfsync_push(sc); 549 } 550 } 551 st->state_flags &= ~PFSTATE_ACK; 552 PF_STATE_UNLOCK(st); 553 554 return (0); 555 556 cleanup: 557 error = ENOMEM; 558 if (skw == sks) 559 sks = NULL; 560 if (skw != NULL) 561 uma_zfree(V_pf_state_key_z, skw); 562 if (sks != NULL) 563 uma_zfree(V_pf_state_key_z, sks); 564 565 cleanup_state: /* pf_state_insert() frees the state keys. */ 566 if (st) { 567 if (st->dst.scrub) 568 uma_zfree(V_pf_state_scrub_z, st->dst.scrub); 569 if (st->src.scrub) 570 uma_zfree(V_pf_state_scrub_z, st->src.scrub); 571 uma_zfree(V_pf_state_z, st); 572 } 573 return (error); 574 } 575 576 static int 577 pfsync_input(struct mbuf **mp, int *offp __unused, int proto __unused) 578 { 579 struct pfsync_softc *sc = V_pfsyncif; 580 struct pfsync_pkt pkt; 581 struct mbuf *m = *mp; 582 struct ip *ip = mtod(m, struct ip *); 583 struct pfsync_header *ph; 584 struct pfsync_subheader subh; 585 586 int offset, len; 587 int rv; 588 uint16_t count; 589 590 *mp = NULL; 591 V_pfsyncstats.pfsyncs_ipackets++; 592 593 /* Verify that we have a sync interface configured. */ 594 if (!sc || !sc->sc_sync_if || !V_pf_status.running || 595 (sc->sc_ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 596 goto done; 597 598 /* verify that the packet came in on the right interface */ 599 if (sc->sc_sync_if != m->m_pkthdr.rcvif) { 600 V_pfsyncstats.pfsyncs_badif++; 601 goto done; 602 } 603 604 if_inc_counter(sc->sc_ifp, IFCOUNTER_IPACKETS, 1); 605 if_inc_counter(sc->sc_ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); 606 /* verify that the IP TTL is 255. */ 607 if (ip->ip_ttl != PFSYNC_DFLTTL) { 608 V_pfsyncstats.pfsyncs_badttl++; 609 goto done; 610 } 611 612 offset = ip->ip_hl << 2; 613 if (m->m_pkthdr.len < offset + sizeof(*ph)) { 614 V_pfsyncstats.pfsyncs_hdrops++; 615 goto done; 616 } 617 618 if (offset + sizeof(*ph) > m->m_len) { 619 if (m_pullup(m, offset + sizeof(*ph)) == NULL) { 620 V_pfsyncstats.pfsyncs_hdrops++; 621 return (IPPROTO_DONE); 622 } 623 ip = mtod(m, struct ip *); 624 } 625 ph = (struct pfsync_header *)((char *)ip + offset); 626 627 /* verify the version */ 628 if (ph->version != PFSYNC_VERSION) { 629 V_pfsyncstats.pfsyncs_badver++; 630 goto done; 631 } 632 633 len = ntohs(ph->len) + offset; 634 if (m->m_pkthdr.len < len) { 635 V_pfsyncstats.pfsyncs_badlen++; 636 goto done; 637 } 638 639 /* Cheaper to grab this now than having to mess with mbufs later */ 640 pkt.ip = ip; 641 pkt.src = ip->ip_src; 642 pkt.flags = 0; 643 644 /* 645 * Trusting pf_chksum during packet processing, as well as seeking 646 * in interface name tree, require holding PF_RULES_RLOCK(). 647 */ 648 PF_RULES_RLOCK(); 649 if (!bcmp(&ph->pfcksum, &V_pf_status.pf_chksum, PF_MD5_DIGEST_LENGTH)) 650 pkt.flags |= PFSYNC_SI_CKSUM; 651 652 offset += sizeof(*ph); 653 while (offset <= len - sizeof(subh)) { 654 m_copydata(m, offset, sizeof(subh), (caddr_t)&subh); 655 offset += sizeof(subh); 656 657 if (subh.action >= PFSYNC_ACT_MAX) { 658 V_pfsyncstats.pfsyncs_badact++; 659 PF_RULES_RUNLOCK(); 660 goto done; 661 } 662 663 count = ntohs(subh.count); 664 V_pfsyncstats.pfsyncs_iacts[subh.action] += count; 665 rv = (*pfsync_acts[subh.action])(&pkt, m, offset, count); 666 if (rv == -1) { 667 PF_RULES_RUNLOCK(); 668 return (IPPROTO_DONE); 669 } 670 671 offset += rv; 672 } 673 PF_RULES_RUNLOCK(); 674 675 done: 676 m_freem(m); 677 return (IPPROTO_DONE); 678 } 679 680 static int 681 pfsync_in_clr(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 682 { 683 struct pfsync_clr *clr; 684 struct mbuf *mp; 685 int len = sizeof(*clr) * count; 686 int i, offp; 687 u_int32_t creatorid; 688 689 mp = m_pulldown(m, offset, len, &offp); 690 if (mp == NULL) { 691 V_pfsyncstats.pfsyncs_badlen++; 692 return (-1); 693 } 694 clr = (struct pfsync_clr *)(mp->m_data + offp); 695 696 for (i = 0; i < count; i++) { 697 creatorid = clr[i].creatorid; 698 699 if (clr[i].ifname[0] != '\0' && 700 pfi_kif_find(clr[i].ifname) == NULL) 701 continue; 702 703 for (int i = 0; i <= pf_hashmask; i++) { 704 struct pf_idhash *ih = &V_pf_idhash[i]; 705 struct pf_state *s; 706 relock: 707 PF_HASHROW_LOCK(ih); 708 LIST_FOREACH(s, &ih->states, entry) { 709 if (s->creatorid == creatorid) { 710 s->state_flags |= PFSTATE_NOSYNC; 711 pf_unlink_state(s, PF_ENTER_LOCKED); 712 goto relock; 713 } 714 } 715 PF_HASHROW_UNLOCK(ih); 716 } 717 } 718 719 return (len); 720 } 721 722 static int 723 pfsync_in_ins(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 724 { 725 struct mbuf *mp; 726 struct pfsync_state *sa, *sp; 727 int len = sizeof(*sp) * count; 728 int i, offp; 729 730 mp = m_pulldown(m, offset, len, &offp); 731 if (mp == NULL) { 732 V_pfsyncstats.pfsyncs_badlen++; 733 return (-1); 734 } 735 sa = (struct pfsync_state *)(mp->m_data + offp); 736 737 for (i = 0; i < count; i++) { 738 sp = &sa[i]; 739 740 /* Check for invalid values. */ 741 if (sp->timeout >= PFTM_MAX || 742 sp->src.state > PF_TCPS_PROXY_DST || 743 sp->dst.state > PF_TCPS_PROXY_DST || 744 sp->direction > PF_OUT || 745 (sp->af != AF_INET && sp->af != AF_INET6)) { 746 if (V_pf_status.debug >= PF_DEBUG_MISC) 747 printf("%s: invalid value\n", __func__); 748 V_pfsyncstats.pfsyncs_badval++; 749 continue; 750 } 751 752 if (pfsync_state_import(sp, pkt->flags) == ENOMEM) 753 /* Drop out, but process the rest of the actions. */ 754 break; 755 } 756 757 return (len); 758 } 759 760 static int 761 pfsync_in_iack(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 762 { 763 struct pfsync_ins_ack *ia, *iaa; 764 struct pf_state *st; 765 766 struct mbuf *mp; 767 int len = count * sizeof(*ia); 768 int offp, i; 769 770 mp = m_pulldown(m, offset, len, &offp); 771 if (mp == NULL) { 772 V_pfsyncstats.pfsyncs_badlen++; 773 return (-1); 774 } 775 iaa = (struct pfsync_ins_ack *)(mp->m_data + offp); 776 777 for (i = 0; i < count; i++) { 778 ia = &iaa[i]; 779 780 st = pf_find_state_byid(ia->id, ia->creatorid); 781 if (st == NULL) 782 continue; 783 784 if (st->state_flags & PFSTATE_ACK) { 785 PFSYNC_LOCK(V_pfsyncif); 786 pfsync_undefer_state(st, 0); 787 PFSYNC_UNLOCK(V_pfsyncif); 788 } 789 PF_STATE_UNLOCK(st); 790 } 791 /* 792 * XXX this is not yet implemented, but we know the size of the 793 * message so we can skip it. 794 */ 795 796 return (count * sizeof(struct pfsync_ins_ack)); 797 } 798 799 static int 800 pfsync_upd_tcp(struct pf_state *st, struct pfsync_state_peer *src, 801 struct pfsync_state_peer *dst) 802 { 803 int sync = 0; 804 805 PF_STATE_LOCK_ASSERT(st); 806 807 /* 808 * The state should never go backwards except 809 * for syn-proxy states. Neither should the 810 * sequence window slide backwards. 811 */ 812 if ((st->src.state > src->state && 813 (st->src.state < PF_TCPS_PROXY_SRC || 814 src->state >= PF_TCPS_PROXY_SRC)) || 815 816 (st->src.state == src->state && 817 SEQ_GT(st->src.seqlo, ntohl(src->seqlo)))) 818 sync++; 819 else 820 pf_state_peer_ntoh(src, &st->src); 821 822 if ((st->dst.state > dst->state) || 823 824 (st->dst.state >= TCPS_SYN_SENT && 825 SEQ_GT(st->dst.seqlo, ntohl(dst->seqlo)))) 826 sync++; 827 else 828 pf_state_peer_ntoh(dst, &st->dst); 829 830 return (sync); 831 } 832 833 static int 834 pfsync_in_upd(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 835 { 836 struct pfsync_softc *sc = V_pfsyncif; 837 struct pfsync_state *sa, *sp; 838 struct pf_state *st; 839 int sync; 840 841 struct mbuf *mp; 842 int len = count * sizeof(*sp); 843 int offp, i; 844 845 mp = m_pulldown(m, offset, len, &offp); 846 if (mp == NULL) { 847 V_pfsyncstats.pfsyncs_badlen++; 848 return (-1); 849 } 850 sa = (struct pfsync_state *)(mp->m_data + offp); 851 852 for (i = 0; i < count; i++) { 853 sp = &sa[i]; 854 855 /* check for invalid values */ 856 if (sp->timeout >= PFTM_MAX || 857 sp->src.state > PF_TCPS_PROXY_DST || 858 sp->dst.state > PF_TCPS_PROXY_DST) { 859 if (V_pf_status.debug >= PF_DEBUG_MISC) { 860 printf("pfsync_input: PFSYNC_ACT_UPD: " 861 "invalid value\n"); 862 } 863 V_pfsyncstats.pfsyncs_badval++; 864 continue; 865 } 866 867 st = pf_find_state_byid(sp->id, sp->creatorid); 868 if (st == NULL) { 869 /* insert the update */ 870 if (pfsync_state_import(sp, 0)) 871 V_pfsyncstats.pfsyncs_badstate++; 872 continue; 873 } 874 875 if (st->state_flags & PFSTATE_ACK) { 876 PFSYNC_LOCK(sc); 877 pfsync_undefer_state(st, 1); 878 PFSYNC_UNLOCK(sc); 879 } 880 881 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 882 sync = pfsync_upd_tcp(st, &sp->src, &sp->dst); 883 else { 884 sync = 0; 885 886 /* 887 * Non-TCP protocol state machine always go 888 * forwards 889 */ 890 if (st->src.state > sp->src.state) 891 sync++; 892 else 893 pf_state_peer_ntoh(&sp->src, &st->src); 894 if (st->dst.state > sp->dst.state) 895 sync++; 896 else 897 pf_state_peer_ntoh(&sp->dst, &st->dst); 898 } 899 if (sync < 2) { 900 pfsync_alloc_scrub_memory(&sp->dst, &st->dst); 901 pf_state_peer_ntoh(&sp->dst, &st->dst); 902 st->expire = time_uptime; 903 st->timeout = sp->timeout; 904 } 905 st->pfsync_time = time_uptime; 906 907 if (sync) { 908 V_pfsyncstats.pfsyncs_stale++; 909 910 pfsync_update_state(st); 911 PF_STATE_UNLOCK(st); 912 PFSYNC_LOCK(sc); 913 pfsync_push(sc); 914 PFSYNC_UNLOCK(sc); 915 continue; 916 } 917 PF_STATE_UNLOCK(st); 918 } 919 920 return (len); 921 } 922 923 static int 924 pfsync_in_upd_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 925 { 926 struct pfsync_softc *sc = V_pfsyncif; 927 struct pfsync_upd_c *ua, *up; 928 struct pf_state *st; 929 int len = count * sizeof(*up); 930 int sync; 931 struct mbuf *mp; 932 int offp, i; 933 934 mp = m_pulldown(m, offset, len, &offp); 935 if (mp == NULL) { 936 V_pfsyncstats.pfsyncs_badlen++; 937 return (-1); 938 } 939 ua = (struct pfsync_upd_c *)(mp->m_data + offp); 940 941 for (i = 0; i < count; i++) { 942 up = &ua[i]; 943 944 /* check for invalid values */ 945 if (up->timeout >= PFTM_MAX || 946 up->src.state > PF_TCPS_PROXY_DST || 947 up->dst.state > PF_TCPS_PROXY_DST) { 948 if (V_pf_status.debug >= PF_DEBUG_MISC) { 949 printf("pfsync_input: " 950 "PFSYNC_ACT_UPD_C: " 951 "invalid value\n"); 952 } 953 V_pfsyncstats.pfsyncs_badval++; 954 continue; 955 } 956 957 st = pf_find_state_byid(up->id, up->creatorid); 958 if (st == NULL) { 959 /* We don't have this state. Ask for it. */ 960 PFSYNC_LOCK(sc); 961 pfsync_request_update(up->creatorid, up->id); 962 PFSYNC_UNLOCK(sc); 963 continue; 964 } 965 966 if (st->state_flags & PFSTATE_ACK) { 967 PFSYNC_LOCK(sc); 968 pfsync_undefer_state(st, 1); 969 PFSYNC_UNLOCK(sc); 970 } 971 972 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) 973 sync = pfsync_upd_tcp(st, &up->src, &up->dst); 974 else { 975 sync = 0; 976 977 /* 978 * Non-TCP protocol state machine always go 979 * forwards 980 */ 981 if (st->src.state > up->src.state) 982 sync++; 983 else 984 pf_state_peer_ntoh(&up->src, &st->src); 985 if (st->dst.state > up->dst.state) 986 sync++; 987 else 988 pf_state_peer_ntoh(&up->dst, &st->dst); 989 } 990 if (sync < 2) { 991 pfsync_alloc_scrub_memory(&up->dst, &st->dst); 992 pf_state_peer_ntoh(&up->dst, &st->dst); 993 st->expire = time_uptime; 994 st->timeout = up->timeout; 995 } 996 st->pfsync_time = time_uptime; 997 998 if (sync) { 999 V_pfsyncstats.pfsyncs_stale++; 1000 1001 pfsync_update_state(st); 1002 PF_STATE_UNLOCK(st); 1003 PFSYNC_LOCK(sc); 1004 pfsync_push(sc); 1005 PFSYNC_UNLOCK(sc); 1006 continue; 1007 } 1008 PF_STATE_UNLOCK(st); 1009 } 1010 1011 return (len); 1012 } 1013 1014 static int 1015 pfsync_in_ureq(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1016 { 1017 struct pfsync_upd_req *ur, *ura; 1018 struct mbuf *mp; 1019 int len = count * sizeof(*ur); 1020 int i, offp; 1021 1022 struct pf_state *st; 1023 1024 mp = m_pulldown(m, offset, len, &offp); 1025 if (mp == NULL) { 1026 V_pfsyncstats.pfsyncs_badlen++; 1027 return (-1); 1028 } 1029 ura = (struct pfsync_upd_req *)(mp->m_data + offp); 1030 1031 for (i = 0; i < count; i++) { 1032 ur = &ura[i]; 1033 1034 if (ur->id == 0 && ur->creatorid == 0) 1035 pfsync_bulk_start(); 1036 else { 1037 st = pf_find_state_byid(ur->id, ur->creatorid); 1038 if (st == NULL) { 1039 V_pfsyncstats.pfsyncs_badstate++; 1040 continue; 1041 } 1042 if (st->state_flags & PFSTATE_NOSYNC) { 1043 PF_STATE_UNLOCK(st); 1044 continue; 1045 } 1046 1047 pfsync_update_state_req(st); 1048 PF_STATE_UNLOCK(st); 1049 } 1050 } 1051 1052 return (len); 1053 } 1054 1055 static int 1056 pfsync_in_del(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1057 { 1058 struct mbuf *mp; 1059 struct pfsync_state *sa, *sp; 1060 struct pf_state *st; 1061 int len = count * sizeof(*sp); 1062 int offp, i; 1063 1064 mp = m_pulldown(m, offset, len, &offp); 1065 if (mp == NULL) { 1066 V_pfsyncstats.pfsyncs_badlen++; 1067 return (-1); 1068 } 1069 sa = (struct pfsync_state *)(mp->m_data + offp); 1070 1071 for (i = 0; i < count; i++) { 1072 sp = &sa[i]; 1073 1074 st = pf_find_state_byid(sp->id, sp->creatorid); 1075 if (st == NULL) { 1076 V_pfsyncstats.pfsyncs_badstate++; 1077 continue; 1078 } 1079 st->state_flags |= PFSTATE_NOSYNC; 1080 pf_unlink_state(st, PF_ENTER_LOCKED); 1081 } 1082 1083 return (len); 1084 } 1085 1086 static int 1087 pfsync_in_del_c(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1088 { 1089 struct mbuf *mp; 1090 struct pfsync_del_c *sa, *sp; 1091 struct pf_state *st; 1092 int len = count * sizeof(*sp); 1093 int offp, i; 1094 1095 mp = m_pulldown(m, offset, len, &offp); 1096 if (mp == NULL) { 1097 V_pfsyncstats.pfsyncs_badlen++; 1098 return (-1); 1099 } 1100 sa = (struct pfsync_del_c *)(mp->m_data + offp); 1101 1102 for (i = 0; i < count; i++) { 1103 sp = &sa[i]; 1104 1105 st = pf_find_state_byid(sp->id, sp->creatorid); 1106 if (st == NULL) { 1107 V_pfsyncstats.pfsyncs_badstate++; 1108 continue; 1109 } 1110 1111 st->state_flags |= PFSTATE_NOSYNC; 1112 pf_unlink_state(st, PF_ENTER_LOCKED); 1113 } 1114 1115 return (len); 1116 } 1117 1118 static int 1119 pfsync_in_bus(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1120 { 1121 struct pfsync_softc *sc = V_pfsyncif; 1122 struct pfsync_bus *bus; 1123 struct mbuf *mp; 1124 int len = count * sizeof(*bus); 1125 int offp; 1126 1127 PFSYNC_BLOCK(sc); 1128 1129 /* If we're not waiting for a bulk update, who cares. */ 1130 if (sc->sc_ureq_sent == 0) { 1131 PFSYNC_BUNLOCK(sc); 1132 return (len); 1133 } 1134 1135 mp = m_pulldown(m, offset, len, &offp); 1136 if (mp == NULL) { 1137 PFSYNC_BUNLOCK(sc); 1138 V_pfsyncstats.pfsyncs_badlen++; 1139 return (-1); 1140 } 1141 bus = (struct pfsync_bus *)(mp->m_data + offp); 1142 1143 switch (bus->status) { 1144 case PFSYNC_BUS_START: 1145 callout_reset(&sc->sc_bulkfail_tmo, 4 * hz + 1146 V_pf_limits[PF_LIMIT_STATES].limit / 1147 ((sc->sc_ifp->if_mtu - PFSYNC_MINPKT) / 1148 sizeof(struct pfsync_state)), 1149 pfsync_bulk_fail, sc); 1150 if (V_pf_status.debug >= PF_DEBUG_MISC) 1151 printf("pfsync: received bulk update start\n"); 1152 break; 1153 1154 case PFSYNC_BUS_END: 1155 if (time_uptime - ntohl(bus->endtime) >= 1156 sc->sc_ureq_sent) { 1157 /* that's it, we're happy */ 1158 sc->sc_ureq_sent = 0; 1159 sc->sc_bulk_tries = 0; 1160 callout_stop(&sc->sc_bulkfail_tmo); 1161 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1162 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 1163 "pfsync bulk done"); 1164 sc->sc_flags |= PFSYNCF_OK; 1165 if (V_pf_status.debug >= PF_DEBUG_MISC) 1166 printf("pfsync: received valid " 1167 "bulk update end\n"); 1168 } else { 1169 if (V_pf_status.debug >= PF_DEBUG_MISC) 1170 printf("pfsync: received invalid " 1171 "bulk update end: bad timestamp\n"); 1172 } 1173 break; 1174 } 1175 PFSYNC_BUNLOCK(sc); 1176 1177 return (len); 1178 } 1179 1180 static int 1181 pfsync_in_tdb(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1182 { 1183 int len = count * sizeof(struct pfsync_tdb); 1184 1185 #if defined(IPSEC) 1186 struct pfsync_tdb *tp; 1187 struct mbuf *mp; 1188 int offp; 1189 int i; 1190 int s; 1191 1192 mp = m_pulldown(m, offset, len, &offp); 1193 if (mp == NULL) { 1194 V_pfsyncstats.pfsyncs_badlen++; 1195 return (-1); 1196 } 1197 tp = (struct pfsync_tdb *)(mp->m_data + offp); 1198 1199 for (i = 0; i < count; i++) 1200 pfsync_update_net_tdb(&tp[i]); 1201 #endif 1202 1203 return (len); 1204 } 1205 1206 #if defined(IPSEC) 1207 /* Update an in-kernel tdb. Silently fail if no tdb is found. */ 1208 static void 1209 pfsync_update_net_tdb(struct pfsync_tdb *pt) 1210 { 1211 struct tdb *tdb; 1212 int s; 1213 1214 /* check for invalid values */ 1215 if (ntohl(pt->spi) <= SPI_RESERVED_MAX || 1216 (pt->dst.sa.sa_family != AF_INET && 1217 pt->dst.sa.sa_family != AF_INET6)) 1218 goto bad; 1219 1220 tdb = gettdb(pt->spi, &pt->dst, pt->sproto); 1221 if (tdb) { 1222 pt->rpl = ntohl(pt->rpl); 1223 pt->cur_bytes = (unsigned long long)be64toh(pt->cur_bytes); 1224 1225 /* Neither replay nor byte counter should ever decrease. */ 1226 if (pt->rpl < tdb->tdb_rpl || 1227 pt->cur_bytes < tdb->tdb_cur_bytes) { 1228 goto bad; 1229 } 1230 1231 tdb->tdb_rpl = pt->rpl; 1232 tdb->tdb_cur_bytes = pt->cur_bytes; 1233 } 1234 return; 1235 1236 bad: 1237 if (V_pf_status.debug >= PF_DEBUG_MISC) 1238 printf("pfsync_insert: PFSYNC_ACT_TDB_UPD: " 1239 "invalid value\n"); 1240 V_pfsyncstats.pfsyncs_badstate++; 1241 return; 1242 } 1243 #endif 1244 1245 1246 static int 1247 pfsync_in_eof(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1248 { 1249 /* check if we are at the right place in the packet */ 1250 if (offset != m->m_pkthdr.len) 1251 V_pfsyncstats.pfsyncs_badlen++; 1252 1253 /* we're done. free and let the caller return */ 1254 m_freem(m); 1255 return (-1); 1256 } 1257 1258 static int 1259 pfsync_in_error(struct pfsync_pkt *pkt, struct mbuf *m, int offset, int count) 1260 { 1261 V_pfsyncstats.pfsyncs_badact++; 1262 1263 m_freem(m); 1264 return (-1); 1265 } 1266 1267 static int 1268 pfsyncoutput(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst, 1269 struct route *rt) 1270 { 1271 m_freem(m); 1272 return (0); 1273 } 1274 1275 /* ARGSUSED */ 1276 static int 1277 pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1278 { 1279 struct pfsync_softc *sc = ifp->if_softc; 1280 struct ifreq *ifr = (struct ifreq *)data; 1281 struct pfsyncreq pfsyncr; 1282 int error; 1283 1284 switch (cmd) { 1285 case SIOCSIFFLAGS: 1286 PFSYNC_LOCK(sc); 1287 if (ifp->if_flags & IFF_UP) { 1288 ifp->if_drv_flags |= IFF_DRV_RUNNING; 1289 PFSYNC_UNLOCK(sc); 1290 pfsync_pointers_init(); 1291 } else { 1292 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1293 PFSYNC_UNLOCK(sc); 1294 pfsync_pointers_uninit(); 1295 } 1296 break; 1297 case SIOCSIFMTU: 1298 if (!sc->sc_sync_if || 1299 ifr->ifr_mtu <= PFSYNC_MINPKT || 1300 ifr->ifr_mtu > sc->sc_sync_if->if_mtu) 1301 return (EINVAL); 1302 if (ifr->ifr_mtu < ifp->if_mtu) { 1303 PFSYNC_LOCK(sc); 1304 if (sc->sc_len > PFSYNC_MINPKT) 1305 pfsync_sendout(1); 1306 PFSYNC_UNLOCK(sc); 1307 } 1308 ifp->if_mtu = ifr->ifr_mtu; 1309 break; 1310 case SIOCGETPFSYNC: 1311 bzero(&pfsyncr, sizeof(pfsyncr)); 1312 PFSYNC_LOCK(sc); 1313 if (sc->sc_sync_if) { 1314 strlcpy(pfsyncr.pfsyncr_syncdev, 1315 sc->sc_sync_if->if_xname, IFNAMSIZ); 1316 } 1317 pfsyncr.pfsyncr_syncpeer = sc->sc_sync_peer; 1318 pfsyncr.pfsyncr_maxupdates = sc->sc_maxupdates; 1319 pfsyncr.pfsyncr_defer = (PFSYNCF_DEFER == 1320 (sc->sc_flags & PFSYNCF_DEFER)); 1321 PFSYNC_UNLOCK(sc); 1322 return (copyout(&pfsyncr, ifr_data_get_ptr(ifr), 1323 sizeof(pfsyncr))); 1324 1325 case SIOCSETPFSYNC: 1326 { 1327 struct ip_moptions *imo = &sc->sc_imo; 1328 struct ifnet *sifp; 1329 struct ip *ip; 1330 void *mship = NULL; 1331 1332 if ((error = priv_check(curthread, PRIV_NETINET_PF)) != 0) 1333 return (error); 1334 if ((error = copyin(ifr_data_get_ptr(ifr), &pfsyncr, 1335 sizeof(pfsyncr)))) 1336 return (error); 1337 1338 if (pfsyncr.pfsyncr_maxupdates > 255) 1339 return (EINVAL); 1340 1341 if (pfsyncr.pfsyncr_syncdev[0] == 0) 1342 sifp = NULL; 1343 else if ((sifp = ifunit_ref(pfsyncr.pfsyncr_syncdev)) == NULL) 1344 return (EINVAL); 1345 1346 if (sifp != NULL && ( 1347 pfsyncr.pfsyncr_syncpeer.s_addr == 0 || 1348 pfsyncr.pfsyncr_syncpeer.s_addr == 1349 htonl(INADDR_PFSYNC_GROUP))) 1350 mship = malloc((sizeof(struct in_multi *) * 1351 IP_MIN_MEMBERSHIPS), M_PFSYNC, M_WAITOK | M_ZERO); 1352 1353 PFSYNC_LOCK(sc); 1354 if (pfsyncr.pfsyncr_syncpeer.s_addr == 0) 1355 sc->sc_sync_peer.s_addr = htonl(INADDR_PFSYNC_GROUP); 1356 else 1357 sc->sc_sync_peer.s_addr = 1358 pfsyncr.pfsyncr_syncpeer.s_addr; 1359 1360 sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; 1361 if (pfsyncr.pfsyncr_defer) { 1362 sc->sc_flags |= PFSYNCF_DEFER; 1363 pfsync_defer_ptr = pfsync_defer; 1364 } else { 1365 sc->sc_flags &= ~PFSYNCF_DEFER; 1366 pfsync_defer_ptr = NULL; 1367 } 1368 1369 if (sifp == NULL) { 1370 if (sc->sc_sync_if) 1371 if_rele(sc->sc_sync_if); 1372 sc->sc_sync_if = NULL; 1373 if (imo->imo_membership) 1374 pfsync_multicast_cleanup(sc); 1375 PFSYNC_UNLOCK(sc); 1376 break; 1377 } 1378 1379 if (sc->sc_len > PFSYNC_MINPKT && 1380 (sifp->if_mtu < sc->sc_ifp->if_mtu || 1381 (sc->sc_sync_if != NULL && 1382 sifp->if_mtu < sc->sc_sync_if->if_mtu) || 1383 sifp->if_mtu < MCLBYTES - sizeof(struct ip))) 1384 pfsync_sendout(1); 1385 1386 if (imo->imo_membership) 1387 pfsync_multicast_cleanup(sc); 1388 1389 if (sc->sc_sync_peer.s_addr == htonl(INADDR_PFSYNC_GROUP)) { 1390 error = pfsync_multicast_setup(sc, sifp, mship); 1391 if (error) { 1392 if_rele(sifp); 1393 free(mship, M_PFSYNC); 1394 return (error); 1395 } 1396 } 1397 if (sc->sc_sync_if) 1398 if_rele(sc->sc_sync_if); 1399 sc->sc_sync_if = sifp; 1400 1401 ip = &sc->sc_template; 1402 bzero(ip, sizeof(*ip)); 1403 ip->ip_v = IPVERSION; 1404 ip->ip_hl = sizeof(sc->sc_template) >> 2; 1405 ip->ip_tos = IPTOS_LOWDELAY; 1406 /* len and id are set later. */ 1407 ip->ip_off = htons(IP_DF); 1408 ip->ip_ttl = PFSYNC_DFLTTL; 1409 ip->ip_p = IPPROTO_PFSYNC; 1410 ip->ip_src.s_addr = INADDR_ANY; 1411 ip->ip_dst.s_addr = sc->sc_sync_peer.s_addr; 1412 1413 /* Request a full state table update. */ 1414 if ((sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 1415 (*carp_demote_adj_p)(V_pfsync_carp_adj, 1416 "pfsync bulk start"); 1417 sc->sc_flags &= ~PFSYNCF_OK; 1418 if (V_pf_status.debug >= PF_DEBUG_MISC) 1419 printf("pfsync: requesting bulk update\n"); 1420 pfsync_request_update(0, 0); 1421 PFSYNC_UNLOCK(sc); 1422 PFSYNC_BLOCK(sc); 1423 sc->sc_ureq_sent = time_uptime; 1424 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, pfsync_bulk_fail, 1425 sc); 1426 PFSYNC_BUNLOCK(sc); 1427 1428 break; 1429 } 1430 default: 1431 return (ENOTTY); 1432 } 1433 1434 return (0); 1435 } 1436 1437 static void 1438 pfsync_out_state(struct pf_state *st, void *buf) 1439 { 1440 struct pfsync_state *sp = buf; 1441 1442 pfsync_state_export(sp, st); 1443 } 1444 1445 static void 1446 pfsync_out_iack(struct pf_state *st, void *buf) 1447 { 1448 struct pfsync_ins_ack *iack = buf; 1449 1450 iack->id = st->id; 1451 iack->creatorid = st->creatorid; 1452 } 1453 1454 static void 1455 pfsync_out_upd_c(struct pf_state *st, void *buf) 1456 { 1457 struct pfsync_upd_c *up = buf; 1458 1459 bzero(up, sizeof(*up)); 1460 up->id = st->id; 1461 pf_state_peer_hton(&st->src, &up->src); 1462 pf_state_peer_hton(&st->dst, &up->dst); 1463 up->creatorid = st->creatorid; 1464 up->timeout = st->timeout; 1465 } 1466 1467 static void 1468 pfsync_out_del(struct pf_state *st, void *buf) 1469 { 1470 struct pfsync_del_c *dp = buf; 1471 1472 dp->id = st->id; 1473 dp->creatorid = st->creatorid; 1474 st->state_flags |= PFSTATE_NOSYNC; 1475 } 1476 1477 static void 1478 pfsync_drop(struct pfsync_softc *sc) 1479 { 1480 struct pf_state *st, *next; 1481 struct pfsync_upd_req_item *ur; 1482 int q; 1483 1484 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1485 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1486 continue; 1487 1488 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, next) { 1489 KASSERT(st->sync_state == q, 1490 ("%s: st->sync_state == q", 1491 __func__)); 1492 st->sync_state = PFSYNC_S_NONE; 1493 pf_release_state(st); 1494 } 1495 TAILQ_INIT(&sc->sc_qs[q]); 1496 } 1497 1498 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1499 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1500 free(ur, M_PFSYNC); 1501 } 1502 1503 sc->sc_plus = NULL; 1504 sc->sc_len = PFSYNC_MINPKT; 1505 } 1506 1507 static void 1508 pfsync_sendout(int schedswi) 1509 { 1510 struct pfsync_softc *sc = V_pfsyncif; 1511 struct ifnet *ifp = sc->sc_ifp; 1512 struct mbuf *m; 1513 struct ip *ip; 1514 struct pfsync_header *ph; 1515 struct pfsync_subheader *subh; 1516 struct pf_state *st, *st_next; 1517 struct pfsync_upd_req_item *ur; 1518 int offset; 1519 int q, count = 0; 1520 1521 KASSERT(sc != NULL, ("%s: null sc", __func__)); 1522 KASSERT(sc->sc_len > PFSYNC_MINPKT, 1523 ("%s: sc_len %zu", __func__, sc->sc_len)); 1524 PFSYNC_LOCK_ASSERT(sc); 1525 1526 if (ifp->if_bpf == NULL && sc->sc_sync_if == NULL) { 1527 pfsync_drop(sc); 1528 return; 1529 } 1530 1531 m = m_get2(max_linkhdr + sc->sc_len, M_NOWAIT, MT_DATA, M_PKTHDR); 1532 if (m == NULL) { 1533 if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1); 1534 V_pfsyncstats.pfsyncs_onomem++; 1535 return; 1536 } 1537 m->m_data += max_linkhdr; 1538 m->m_len = m->m_pkthdr.len = sc->sc_len; 1539 1540 /* build the ip header */ 1541 ip = (struct ip *)m->m_data; 1542 bcopy(&sc->sc_template, ip, sizeof(*ip)); 1543 offset = sizeof(*ip); 1544 1545 ip->ip_len = htons(m->m_pkthdr.len); 1546 ip_fillid(ip); 1547 1548 /* build the pfsync header */ 1549 ph = (struct pfsync_header *)(m->m_data + offset); 1550 bzero(ph, sizeof(*ph)); 1551 offset += sizeof(*ph); 1552 1553 ph->version = PFSYNC_VERSION; 1554 ph->len = htons(sc->sc_len - sizeof(*ip)); 1555 bcopy(V_pf_status.pf_chksum, ph->pfcksum, PF_MD5_DIGEST_LENGTH); 1556 1557 /* walk the queues */ 1558 for (q = 0; q < PFSYNC_S_COUNT; q++) { 1559 if (TAILQ_EMPTY(&sc->sc_qs[q])) 1560 continue; 1561 1562 subh = (struct pfsync_subheader *)(m->m_data + offset); 1563 offset += sizeof(*subh); 1564 1565 count = 0; 1566 TAILQ_FOREACH_SAFE(st, &sc->sc_qs[q], sync_list, st_next) { 1567 KASSERT(st->sync_state == q, 1568 ("%s: st->sync_state == q", 1569 __func__)); 1570 /* 1571 * XXXGL: some of write methods do unlocked reads 1572 * of state data :( 1573 */ 1574 pfsync_qs[q].write(st, m->m_data + offset); 1575 offset += pfsync_qs[q].len; 1576 st->sync_state = PFSYNC_S_NONE; 1577 pf_release_state(st); 1578 count++; 1579 } 1580 TAILQ_INIT(&sc->sc_qs[q]); 1581 1582 bzero(subh, sizeof(*subh)); 1583 subh->action = pfsync_qs[q].action; 1584 subh->count = htons(count); 1585 V_pfsyncstats.pfsyncs_oacts[pfsync_qs[q].action] += count; 1586 } 1587 1588 if (!TAILQ_EMPTY(&sc->sc_upd_req_list)) { 1589 subh = (struct pfsync_subheader *)(m->m_data + offset); 1590 offset += sizeof(*subh); 1591 1592 count = 0; 1593 while ((ur = TAILQ_FIRST(&sc->sc_upd_req_list)) != NULL) { 1594 TAILQ_REMOVE(&sc->sc_upd_req_list, ur, ur_entry); 1595 1596 bcopy(&ur->ur_msg, m->m_data + offset, 1597 sizeof(ur->ur_msg)); 1598 offset += sizeof(ur->ur_msg); 1599 free(ur, M_PFSYNC); 1600 count++; 1601 } 1602 1603 bzero(subh, sizeof(*subh)); 1604 subh->action = PFSYNC_ACT_UPD_REQ; 1605 subh->count = htons(count); 1606 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_UPD_REQ] += count; 1607 } 1608 1609 /* has someone built a custom region for us to add? */ 1610 if (sc->sc_plus != NULL) { 1611 bcopy(sc->sc_plus, m->m_data + offset, sc->sc_pluslen); 1612 offset += sc->sc_pluslen; 1613 1614 sc->sc_plus = NULL; 1615 } 1616 1617 subh = (struct pfsync_subheader *)(m->m_data + offset); 1618 offset += sizeof(*subh); 1619 1620 bzero(subh, sizeof(*subh)); 1621 subh->action = PFSYNC_ACT_EOF; 1622 subh->count = htons(1); 1623 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_EOF]++; 1624 1625 /* we're done, let's put it on the wire */ 1626 if (ifp->if_bpf) { 1627 m->m_data += sizeof(*ip); 1628 m->m_len = m->m_pkthdr.len = sc->sc_len - sizeof(*ip); 1629 BPF_MTAP(ifp, m); 1630 m->m_data -= sizeof(*ip); 1631 m->m_len = m->m_pkthdr.len = sc->sc_len; 1632 } 1633 1634 if (sc->sc_sync_if == NULL) { 1635 sc->sc_len = PFSYNC_MINPKT; 1636 m_freem(m); 1637 return; 1638 } 1639 1640 if_inc_counter(sc->sc_ifp, IFCOUNTER_OPACKETS, 1); 1641 if_inc_counter(sc->sc_ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len); 1642 sc->sc_len = PFSYNC_MINPKT; 1643 1644 if (!_IF_QFULL(&sc->sc_ifp->if_snd)) 1645 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1646 else { 1647 m_freem(m); 1648 if_inc_counter(sc->sc_ifp, IFCOUNTER_OQDROPS, 1); 1649 } 1650 if (schedswi) 1651 swi_sched(V_pfsync_swi_cookie, 0); 1652 } 1653 1654 static void 1655 pfsync_insert_state(struct pf_state *st) 1656 { 1657 struct pfsync_softc *sc = V_pfsyncif; 1658 1659 if (st->state_flags & PFSTATE_NOSYNC) 1660 return; 1661 1662 if ((st->rule.ptr->rule_flag & PFRULE_NOSYNC) || 1663 st->key[PF_SK_WIRE]->proto == IPPROTO_PFSYNC) { 1664 st->state_flags |= PFSTATE_NOSYNC; 1665 return; 1666 } 1667 1668 KASSERT(st->sync_state == PFSYNC_S_NONE, 1669 ("%s: st->sync_state %u", __func__, st->sync_state)); 1670 1671 PFSYNC_LOCK(sc); 1672 if (sc->sc_len == PFSYNC_MINPKT) 1673 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1674 1675 pfsync_q_ins(st, PFSYNC_S_INS, true); 1676 PFSYNC_UNLOCK(sc); 1677 1678 st->sync_updates = 0; 1679 } 1680 1681 static int 1682 pfsync_defer(struct pf_state *st, struct mbuf *m) 1683 { 1684 struct pfsync_softc *sc = V_pfsyncif; 1685 struct pfsync_deferral *pd; 1686 1687 if (m->m_flags & (M_BCAST|M_MCAST)) 1688 return (0); 1689 1690 PFSYNC_LOCK(sc); 1691 1692 if (sc == NULL || !(sc->sc_ifp->if_flags & IFF_DRV_RUNNING) || 1693 !(sc->sc_flags & PFSYNCF_DEFER)) { 1694 PFSYNC_UNLOCK(sc); 1695 return (0); 1696 } 1697 1698 if (sc->sc_deferred >= 128) 1699 pfsync_undefer(TAILQ_FIRST(&sc->sc_deferrals), 0); 1700 1701 pd = malloc(sizeof(*pd), M_PFSYNC, M_NOWAIT); 1702 if (pd == NULL) 1703 return (0); 1704 sc->sc_deferred++; 1705 1706 m->m_flags |= M_SKIP_FIREWALL; 1707 st->state_flags |= PFSTATE_ACK; 1708 1709 pd->pd_sc = sc; 1710 pd->pd_refs = 0; 1711 pd->pd_st = st; 1712 pf_ref_state(st); 1713 pd->pd_m = m; 1714 1715 TAILQ_INSERT_TAIL(&sc->sc_deferrals, pd, pd_entry); 1716 callout_init_mtx(&pd->pd_tmo, &sc->sc_mtx, CALLOUT_RETURNUNLOCKED); 1717 callout_reset(&pd->pd_tmo, 10, pfsync_defer_tmo, pd); 1718 1719 pfsync_push(sc); 1720 1721 return (1); 1722 } 1723 1724 static void 1725 pfsync_undefer(struct pfsync_deferral *pd, int drop) 1726 { 1727 struct pfsync_softc *sc = pd->pd_sc; 1728 struct mbuf *m = pd->pd_m; 1729 struct pf_state *st = pd->pd_st; 1730 1731 PFSYNC_LOCK_ASSERT(sc); 1732 1733 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1734 sc->sc_deferred--; 1735 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1736 free(pd, M_PFSYNC); 1737 pf_release_state(st); 1738 1739 if (drop) 1740 m_freem(m); 1741 else { 1742 _IF_ENQUEUE(&sc->sc_ifp->if_snd, m); 1743 pfsync_push(sc); 1744 } 1745 } 1746 1747 static void 1748 pfsync_defer_tmo(void *arg) 1749 { 1750 struct pfsync_deferral *pd = arg; 1751 struct pfsync_softc *sc = pd->pd_sc; 1752 struct mbuf *m = pd->pd_m; 1753 struct pf_state *st = pd->pd_st; 1754 1755 PFSYNC_LOCK_ASSERT(sc); 1756 1757 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 1758 1759 TAILQ_REMOVE(&sc->sc_deferrals, pd, pd_entry); 1760 sc->sc_deferred--; 1761 pd->pd_st->state_flags &= ~PFSTATE_ACK; /* XXX: locking! */ 1762 if (pd->pd_refs == 0) 1763 free(pd, M_PFSYNC); 1764 PFSYNC_UNLOCK(sc); 1765 1766 ip_output(m, NULL, NULL, 0, NULL, NULL); 1767 1768 pf_release_state(st); 1769 1770 CURVNET_RESTORE(); 1771 } 1772 1773 static void 1774 pfsync_undefer_state(struct pf_state *st, int drop) 1775 { 1776 struct pfsync_softc *sc = V_pfsyncif; 1777 struct pfsync_deferral *pd; 1778 1779 PFSYNC_LOCK_ASSERT(sc); 1780 1781 TAILQ_FOREACH(pd, &sc->sc_deferrals, pd_entry) { 1782 if (pd->pd_st == st) { 1783 if (callout_stop(&pd->pd_tmo) > 0) 1784 pfsync_undefer(pd, drop); 1785 return; 1786 } 1787 } 1788 1789 panic("%s: unable to find deferred state", __func__); 1790 } 1791 1792 static void 1793 pfsync_update_state(struct pf_state *st) 1794 { 1795 struct pfsync_softc *sc = V_pfsyncif; 1796 bool sync = false, ref = true; 1797 1798 PF_STATE_LOCK_ASSERT(st); 1799 PFSYNC_LOCK(sc); 1800 1801 if (st->state_flags & PFSTATE_ACK) 1802 pfsync_undefer_state(st, 0); 1803 if (st->state_flags & PFSTATE_NOSYNC) { 1804 if (st->sync_state != PFSYNC_S_NONE) 1805 pfsync_q_del(st, true); 1806 PFSYNC_UNLOCK(sc); 1807 return; 1808 } 1809 1810 if (sc->sc_len == PFSYNC_MINPKT) 1811 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1812 1813 switch (st->sync_state) { 1814 case PFSYNC_S_UPD_C: 1815 case PFSYNC_S_UPD: 1816 case PFSYNC_S_INS: 1817 /* we're already handling it */ 1818 1819 if (st->key[PF_SK_WIRE]->proto == IPPROTO_TCP) { 1820 st->sync_updates++; 1821 if (st->sync_updates >= sc->sc_maxupdates) 1822 sync = true; 1823 } 1824 break; 1825 1826 case PFSYNC_S_IACK: 1827 pfsync_q_del(st, false); 1828 ref = false; 1829 /* FALLTHROUGH */ 1830 1831 case PFSYNC_S_NONE: 1832 pfsync_q_ins(st, PFSYNC_S_UPD_C, ref); 1833 st->sync_updates = 0; 1834 break; 1835 1836 default: 1837 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1838 } 1839 1840 if (sync || (time_uptime - st->pfsync_time) < 2) 1841 pfsync_push(sc); 1842 1843 PFSYNC_UNLOCK(sc); 1844 } 1845 1846 static void 1847 pfsync_request_update(u_int32_t creatorid, u_int64_t id) 1848 { 1849 struct pfsync_softc *sc = V_pfsyncif; 1850 struct pfsync_upd_req_item *item; 1851 size_t nlen = sizeof(struct pfsync_upd_req); 1852 1853 PFSYNC_LOCK_ASSERT(sc); 1854 1855 /* 1856 * This code does a bit to prevent multiple update requests for the 1857 * same state being generated. It searches current subheader queue, 1858 * but it doesn't lookup into queue of already packed datagrams. 1859 */ 1860 TAILQ_FOREACH(item, &sc->sc_upd_req_list, ur_entry) 1861 if (item->ur_msg.id == id && 1862 item->ur_msg.creatorid == creatorid) 1863 return; 1864 1865 item = malloc(sizeof(*item), M_PFSYNC, M_NOWAIT); 1866 if (item == NULL) 1867 return; /* XXX stats */ 1868 1869 item->ur_msg.id = id; 1870 item->ur_msg.creatorid = creatorid; 1871 1872 if (TAILQ_EMPTY(&sc->sc_upd_req_list)) 1873 nlen += sizeof(struct pfsync_subheader); 1874 1875 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 1876 pfsync_sendout(1); 1877 1878 nlen = sizeof(struct pfsync_subheader) + 1879 sizeof(struct pfsync_upd_req); 1880 } 1881 1882 TAILQ_INSERT_TAIL(&sc->sc_upd_req_list, item, ur_entry); 1883 sc->sc_len += nlen; 1884 } 1885 1886 static void 1887 pfsync_update_state_req(struct pf_state *st) 1888 { 1889 struct pfsync_softc *sc = V_pfsyncif; 1890 bool ref = true; 1891 1892 PF_STATE_LOCK_ASSERT(st); 1893 PFSYNC_LOCK(sc); 1894 1895 if (st->state_flags & PFSTATE_NOSYNC) { 1896 if (st->sync_state != PFSYNC_S_NONE) 1897 pfsync_q_del(st, true); 1898 PFSYNC_UNLOCK(sc); 1899 return; 1900 } 1901 1902 switch (st->sync_state) { 1903 case PFSYNC_S_UPD_C: 1904 case PFSYNC_S_IACK: 1905 pfsync_q_del(st, false); 1906 ref = false; 1907 /* FALLTHROUGH */ 1908 1909 case PFSYNC_S_NONE: 1910 pfsync_q_ins(st, PFSYNC_S_UPD, ref); 1911 pfsync_push(sc); 1912 break; 1913 1914 case PFSYNC_S_INS: 1915 case PFSYNC_S_UPD: 1916 case PFSYNC_S_DEL: 1917 /* we're already handling it */ 1918 break; 1919 1920 default: 1921 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1922 } 1923 1924 PFSYNC_UNLOCK(sc); 1925 } 1926 1927 static void 1928 pfsync_delete_state(struct pf_state *st) 1929 { 1930 struct pfsync_softc *sc = V_pfsyncif; 1931 bool ref = true; 1932 1933 PFSYNC_LOCK(sc); 1934 if (st->state_flags & PFSTATE_ACK) 1935 pfsync_undefer_state(st, 1); 1936 if (st->state_flags & PFSTATE_NOSYNC) { 1937 if (st->sync_state != PFSYNC_S_NONE) 1938 pfsync_q_del(st, true); 1939 PFSYNC_UNLOCK(sc); 1940 return; 1941 } 1942 1943 if (sc->sc_len == PFSYNC_MINPKT) 1944 callout_reset(&sc->sc_tmo, 1 * hz, pfsync_timeout, V_pfsyncif); 1945 1946 switch (st->sync_state) { 1947 case PFSYNC_S_INS: 1948 /* We never got to tell the world so just forget about it. */ 1949 pfsync_q_del(st, true); 1950 break; 1951 1952 case PFSYNC_S_UPD_C: 1953 case PFSYNC_S_UPD: 1954 case PFSYNC_S_IACK: 1955 pfsync_q_del(st, false); 1956 ref = false; 1957 /* FALLTHROUGH */ 1958 1959 case PFSYNC_S_NONE: 1960 pfsync_q_ins(st, PFSYNC_S_DEL, ref); 1961 break; 1962 1963 default: 1964 panic("%s: unexpected sync state %d", __func__, st->sync_state); 1965 } 1966 1967 PFSYNC_UNLOCK(sc); 1968 } 1969 1970 static void 1971 pfsync_clear_states(u_int32_t creatorid, const char *ifname) 1972 { 1973 struct pfsync_softc *sc = V_pfsyncif; 1974 struct { 1975 struct pfsync_subheader subh; 1976 struct pfsync_clr clr; 1977 } __packed r; 1978 1979 bzero(&r, sizeof(r)); 1980 1981 r.subh.action = PFSYNC_ACT_CLR; 1982 r.subh.count = htons(1); 1983 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_CLR]++; 1984 1985 strlcpy(r.clr.ifname, ifname, sizeof(r.clr.ifname)); 1986 r.clr.creatorid = creatorid; 1987 1988 PFSYNC_LOCK(sc); 1989 pfsync_send_plus(&r, sizeof(r)); 1990 PFSYNC_UNLOCK(sc); 1991 } 1992 1993 static void 1994 pfsync_q_ins(struct pf_state *st, int q, bool ref) 1995 { 1996 struct pfsync_softc *sc = V_pfsyncif; 1997 size_t nlen = pfsync_qs[q].len; 1998 1999 PFSYNC_LOCK_ASSERT(sc); 2000 2001 KASSERT(st->sync_state == PFSYNC_S_NONE, 2002 ("%s: st->sync_state %u", __func__, st->sync_state)); 2003 KASSERT(sc->sc_len >= PFSYNC_MINPKT, ("pfsync pkt len is too low %zu", 2004 sc->sc_len)); 2005 2006 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2007 nlen += sizeof(struct pfsync_subheader); 2008 2009 if (sc->sc_len + nlen > sc->sc_ifp->if_mtu) { 2010 pfsync_sendout(1); 2011 2012 nlen = sizeof(struct pfsync_subheader) + pfsync_qs[q].len; 2013 } 2014 2015 sc->sc_len += nlen; 2016 TAILQ_INSERT_TAIL(&sc->sc_qs[q], st, sync_list); 2017 st->sync_state = q; 2018 if (ref) 2019 pf_ref_state(st); 2020 } 2021 2022 static void 2023 pfsync_q_del(struct pf_state *st, bool unref) 2024 { 2025 struct pfsync_softc *sc = V_pfsyncif; 2026 int q = st->sync_state; 2027 2028 PFSYNC_LOCK_ASSERT(sc); 2029 KASSERT(st->sync_state != PFSYNC_S_NONE, 2030 ("%s: st->sync_state != PFSYNC_S_NONE", __func__)); 2031 2032 sc->sc_len -= pfsync_qs[q].len; 2033 TAILQ_REMOVE(&sc->sc_qs[q], st, sync_list); 2034 st->sync_state = PFSYNC_S_NONE; 2035 if (unref) 2036 pf_release_state(st); 2037 2038 if (TAILQ_EMPTY(&sc->sc_qs[q])) 2039 sc->sc_len -= sizeof(struct pfsync_subheader); 2040 } 2041 2042 static void 2043 pfsync_bulk_start(void) 2044 { 2045 struct pfsync_softc *sc = V_pfsyncif; 2046 2047 if (V_pf_status.debug >= PF_DEBUG_MISC) 2048 printf("pfsync: received bulk update request\n"); 2049 2050 PFSYNC_BLOCK(sc); 2051 2052 sc->sc_ureq_received = time_uptime; 2053 sc->sc_bulk_hashid = 0; 2054 sc->sc_bulk_stateid = 0; 2055 pfsync_bulk_status(PFSYNC_BUS_START); 2056 callout_reset(&sc->sc_bulk_tmo, 1, pfsync_bulk_update, sc); 2057 PFSYNC_BUNLOCK(sc); 2058 } 2059 2060 static void 2061 pfsync_bulk_update(void *arg) 2062 { 2063 struct pfsync_softc *sc = arg; 2064 struct pf_state *s; 2065 int i, sent = 0; 2066 2067 PFSYNC_BLOCK_ASSERT(sc); 2068 CURVNET_SET(sc->sc_ifp->if_vnet); 2069 2070 /* 2071 * Start with last state from previous invocation. 2072 * It may had gone, in this case start from the 2073 * hash slot. 2074 */ 2075 s = pf_find_state_byid(sc->sc_bulk_stateid, sc->sc_bulk_creatorid); 2076 2077 if (s != NULL) 2078 i = PF_IDHASH(s); 2079 else 2080 i = sc->sc_bulk_hashid; 2081 2082 for (; i <= pf_hashmask; i++) { 2083 struct pf_idhash *ih = &V_pf_idhash[i]; 2084 2085 if (s != NULL) 2086 PF_HASHROW_ASSERT(ih); 2087 else { 2088 PF_HASHROW_LOCK(ih); 2089 s = LIST_FIRST(&ih->states); 2090 } 2091 2092 for (; s; s = LIST_NEXT(s, entry)) { 2093 2094 if (sent > 1 && (sc->sc_ifp->if_mtu - sc->sc_len) < 2095 sizeof(struct pfsync_state)) { 2096 /* We've filled a packet. */ 2097 sc->sc_bulk_hashid = i; 2098 sc->sc_bulk_stateid = s->id; 2099 sc->sc_bulk_creatorid = s->creatorid; 2100 PF_HASHROW_UNLOCK(ih); 2101 callout_reset(&sc->sc_bulk_tmo, 1, 2102 pfsync_bulk_update, sc); 2103 goto full; 2104 } 2105 2106 if (s->sync_state == PFSYNC_S_NONE && 2107 s->timeout < PFTM_MAX && 2108 s->pfsync_time <= sc->sc_ureq_received) { 2109 pfsync_update_state_req(s); 2110 sent++; 2111 } 2112 } 2113 PF_HASHROW_UNLOCK(ih); 2114 } 2115 2116 /* We're done. */ 2117 pfsync_bulk_status(PFSYNC_BUS_END); 2118 2119 full: 2120 CURVNET_RESTORE(); 2121 } 2122 2123 static void 2124 pfsync_bulk_status(u_int8_t status) 2125 { 2126 struct { 2127 struct pfsync_subheader subh; 2128 struct pfsync_bus bus; 2129 } __packed r; 2130 2131 struct pfsync_softc *sc = V_pfsyncif; 2132 2133 bzero(&r, sizeof(r)); 2134 2135 r.subh.action = PFSYNC_ACT_BUS; 2136 r.subh.count = htons(1); 2137 V_pfsyncstats.pfsyncs_oacts[PFSYNC_ACT_BUS]++; 2138 2139 r.bus.creatorid = V_pf_status.hostid; 2140 r.bus.endtime = htonl(time_uptime - sc->sc_ureq_received); 2141 r.bus.status = status; 2142 2143 PFSYNC_LOCK(sc); 2144 pfsync_send_plus(&r, sizeof(r)); 2145 PFSYNC_UNLOCK(sc); 2146 } 2147 2148 static void 2149 pfsync_bulk_fail(void *arg) 2150 { 2151 struct pfsync_softc *sc = arg; 2152 2153 CURVNET_SET(sc->sc_ifp->if_vnet); 2154 2155 PFSYNC_BLOCK_ASSERT(sc); 2156 2157 if (sc->sc_bulk_tries++ < PFSYNC_MAX_BULKTRIES) { 2158 /* Try again */ 2159 callout_reset(&sc->sc_bulkfail_tmo, 5 * hz, 2160 pfsync_bulk_fail, V_pfsyncif); 2161 PFSYNC_LOCK(sc); 2162 pfsync_request_update(0, 0); 2163 PFSYNC_UNLOCK(sc); 2164 } else { 2165 /* Pretend like the transfer was ok. */ 2166 sc->sc_ureq_sent = 0; 2167 sc->sc_bulk_tries = 0; 2168 PFSYNC_LOCK(sc); 2169 if (!(sc->sc_flags & PFSYNCF_OK) && carp_demote_adj_p) 2170 (*carp_demote_adj_p)(-V_pfsync_carp_adj, 2171 "pfsync bulk fail"); 2172 sc->sc_flags |= PFSYNCF_OK; 2173 PFSYNC_UNLOCK(sc); 2174 if (V_pf_status.debug >= PF_DEBUG_MISC) 2175 printf("pfsync: failed to receive bulk update\n"); 2176 } 2177 2178 CURVNET_RESTORE(); 2179 } 2180 2181 static void 2182 pfsync_send_plus(void *plus, size_t pluslen) 2183 { 2184 struct pfsync_softc *sc = V_pfsyncif; 2185 2186 PFSYNC_LOCK_ASSERT(sc); 2187 2188 if (sc->sc_len + pluslen > sc->sc_ifp->if_mtu) 2189 pfsync_sendout(1); 2190 2191 sc->sc_plus = plus; 2192 sc->sc_len += (sc->sc_pluslen = pluslen); 2193 2194 pfsync_sendout(1); 2195 } 2196 2197 static void 2198 pfsync_timeout(void *arg) 2199 { 2200 struct pfsync_softc *sc = arg; 2201 2202 CURVNET_SET(sc->sc_ifp->if_vnet); 2203 PFSYNC_LOCK(sc); 2204 pfsync_push(sc); 2205 PFSYNC_UNLOCK(sc); 2206 CURVNET_RESTORE(); 2207 } 2208 2209 static void 2210 pfsync_push(struct pfsync_softc *sc) 2211 { 2212 2213 PFSYNC_LOCK_ASSERT(sc); 2214 2215 sc->sc_flags |= PFSYNCF_PUSH; 2216 swi_sched(V_pfsync_swi_cookie, 0); 2217 } 2218 2219 static void 2220 pfsyncintr(void *arg) 2221 { 2222 struct pfsync_softc *sc = arg; 2223 struct mbuf *m, *n; 2224 2225 CURVNET_SET(sc->sc_ifp->if_vnet); 2226 2227 PFSYNC_LOCK(sc); 2228 if ((sc->sc_flags & PFSYNCF_PUSH) && sc->sc_len > PFSYNC_MINPKT) { 2229 pfsync_sendout(0); 2230 sc->sc_flags &= ~PFSYNCF_PUSH; 2231 } 2232 _IF_DEQUEUE_ALL(&sc->sc_ifp->if_snd, m); 2233 PFSYNC_UNLOCK(sc); 2234 2235 for (; m != NULL; m = n) { 2236 2237 n = m->m_nextpkt; 2238 m->m_nextpkt = NULL; 2239 2240 /* 2241 * We distinguish between a deferral packet and our 2242 * own pfsync packet based on M_SKIP_FIREWALL 2243 * flag. This is XXX. 2244 */ 2245 if (m->m_flags & M_SKIP_FIREWALL) 2246 ip_output(m, NULL, NULL, 0, NULL, NULL); 2247 else if (ip_output(m, NULL, NULL, IP_RAWOUTPUT, &sc->sc_imo, 2248 NULL) == 0) 2249 V_pfsyncstats.pfsyncs_opackets++; 2250 else 2251 V_pfsyncstats.pfsyncs_oerrors++; 2252 } 2253 CURVNET_RESTORE(); 2254 } 2255 2256 static int 2257 pfsync_multicast_setup(struct pfsync_softc *sc, struct ifnet *ifp, void *mship) 2258 { 2259 struct ip_moptions *imo = &sc->sc_imo; 2260 int error; 2261 2262 if (!(ifp->if_flags & IFF_MULTICAST)) 2263 return (EADDRNOTAVAIL); 2264 2265 imo->imo_membership = (struct in_multi **)mship; 2266 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 2267 imo->imo_multicast_vif = -1; 2268 2269 if ((error = in_joingroup(ifp, &sc->sc_sync_peer, NULL, 2270 &imo->imo_membership[0])) != 0) { 2271 imo->imo_membership = NULL; 2272 return (error); 2273 } 2274 imo->imo_num_memberships++; 2275 imo->imo_multicast_ifp = ifp; 2276 imo->imo_multicast_ttl = PFSYNC_DFLTTL; 2277 imo->imo_multicast_loop = 0; 2278 2279 return (0); 2280 } 2281 2282 static void 2283 pfsync_multicast_cleanup(struct pfsync_softc *sc) 2284 { 2285 struct ip_moptions *imo = &sc->sc_imo; 2286 2287 in_leavegroup(imo->imo_membership[0], NULL); 2288 free(imo->imo_membership, M_PFSYNC); 2289 imo->imo_membership = NULL; 2290 imo->imo_multicast_ifp = NULL; 2291 } 2292 2293 #ifdef INET 2294 extern struct domain inetdomain; 2295 static struct protosw in_pfsync_protosw = { 2296 .pr_type = SOCK_RAW, 2297 .pr_domain = &inetdomain, 2298 .pr_protocol = IPPROTO_PFSYNC, 2299 .pr_flags = PR_ATOMIC|PR_ADDR, 2300 .pr_input = pfsync_input, 2301 .pr_output = rip_output, 2302 .pr_ctloutput = rip_ctloutput, 2303 .pr_usrreqs = &rip_usrreqs 2304 }; 2305 #endif 2306 2307 static void 2308 pfsync_pointers_init() 2309 { 2310 2311 PF_RULES_WLOCK(); 2312 pfsync_state_import_ptr = pfsync_state_import; 2313 pfsync_insert_state_ptr = pfsync_insert_state; 2314 pfsync_update_state_ptr = pfsync_update_state; 2315 pfsync_delete_state_ptr = pfsync_delete_state; 2316 pfsync_clear_states_ptr = pfsync_clear_states; 2317 pfsync_defer_ptr = pfsync_defer; 2318 PF_RULES_WUNLOCK(); 2319 } 2320 2321 static void 2322 pfsync_pointers_uninit() 2323 { 2324 2325 PF_RULES_WLOCK(); 2326 pfsync_state_import_ptr = NULL; 2327 pfsync_insert_state_ptr = NULL; 2328 pfsync_update_state_ptr = NULL; 2329 pfsync_delete_state_ptr = NULL; 2330 pfsync_clear_states_ptr = NULL; 2331 pfsync_defer_ptr = NULL; 2332 PF_RULES_WUNLOCK(); 2333 } 2334 2335 static void 2336 vnet_pfsync_init(const void *unused __unused) 2337 { 2338 int error; 2339 2340 V_pfsync_cloner = if_clone_simple(pfsyncname, 2341 pfsync_clone_create, pfsync_clone_destroy, 1); 2342 error = swi_add(NULL, pfsyncname, pfsyncintr, V_pfsyncif, 2343 SWI_NET, INTR_MPSAFE, &V_pfsync_swi_cookie); 2344 if (error) { 2345 if_clone_detach(V_pfsync_cloner); 2346 log(LOG_INFO, "swi_add() failed in %s\n", __func__); 2347 } 2348 } 2349 VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, 2350 vnet_pfsync_init, NULL); 2351 2352 static void 2353 vnet_pfsync_uninit(const void *unused __unused) 2354 { 2355 2356 if_clone_detach(V_pfsync_cloner); 2357 swi_remove(V_pfsync_swi_cookie); 2358 } 2359 /* 2360 * Detach after pf is gone; otherwise we might touch pfsync memory 2361 * from within pf after freeing pfsync. 2362 */ 2363 VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, 2364 vnet_pfsync_uninit, NULL); 2365 2366 static int 2367 pfsync_init() 2368 { 2369 #ifdef INET 2370 int error; 2371 2372 error = pf_proto_register(PF_INET, &in_pfsync_protosw); 2373 if (error) 2374 return (error); 2375 error = ipproto_register(IPPROTO_PFSYNC); 2376 if (error) { 2377 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2378 return (error); 2379 } 2380 #endif 2381 pfsync_pointers_init(); 2382 2383 return (0); 2384 } 2385 2386 static void 2387 pfsync_uninit() 2388 { 2389 2390 pfsync_pointers_uninit(); 2391 2392 #ifdef INET 2393 ipproto_unregister(IPPROTO_PFSYNC); 2394 pf_proto_unregister(PF_INET, IPPROTO_PFSYNC, SOCK_RAW); 2395 #endif 2396 } 2397 2398 static int 2399 pfsync_modevent(module_t mod, int type, void *data) 2400 { 2401 int error = 0; 2402 2403 switch (type) { 2404 case MOD_LOAD: 2405 error = pfsync_init(); 2406 break; 2407 case MOD_QUIESCE: 2408 /* 2409 * Module should not be unloaded due to race conditions. 2410 */ 2411 error = EBUSY; 2412 break; 2413 case MOD_UNLOAD: 2414 pfsync_uninit(); 2415 break; 2416 default: 2417 error = EINVAL; 2418 break; 2419 } 2420 2421 return (error); 2422 } 2423 2424 static moduledata_t pfsync_mod = { 2425 pfsyncname, 2426 pfsync_modevent, 2427 0 2428 }; 2429 2430 #define PFSYNC_MODVER 1 2431 2432 /* Stay on FIREWALL as we depend on pf being initialized and on inetdomain. */ 2433 DECLARE_MODULE(pfsync, pfsync_mod, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY); 2434 MODULE_VERSION(pfsync, PFSYNC_MODVER); 2435 MODULE_DEPEND(pfsync, pf, PF_MODVER, PF_MODVER, PF_MODVER); 2436