1 /* $OpenBSD: pf_lb.c,v 1.74 2023/05/10 22:42:51 sashan Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "pflog.h" 40 #include "pfsync.h" 41 #include "pflow.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/filio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/kernel.h> 50 #include <sys/time.h> 51 #include <sys/pool.h> 52 #include <sys/rwlock.h> 53 #include <sys/syslog.h> 54 #include <sys/stdint.h> 55 56 #include <crypto/siphash.h> 57 58 #include <net/if.h> 59 #include <net/bpf.h> 60 #include <net/route.h> 61 62 #include <netinet/in.h> 63 #include <netinet/ip.h> 64 #include <netinet/in_pcb.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/ip_icmp.h> 67 #include <netinet/icmp_var.h> 68 #include <netinet/tcp.h> 69 #include <netinet/tcp_seq.h> 70 #include <netinet/tcp_timer.h> 71 #include <netinet/udp.h> 72 #include <netinet/udp_var.h> 73 #include <netinet/if_ether.h> 74 75 #ifdef INET6 76 #include <netinet/ip6.h> 77 #include <netinet/icmp6.h> 78 #endif /* INET6 */ 79 80 #include <net/pfvar.h> 81 #include <net/pfvar_priv.h> 82 83 #if NPFLOG > 0 84 #include <net/if_pflog.h> 85 #endif /* NPFLOG > 0 */ 86 87 #if NPFLOW > 0 88 #include <net/if_pflow.h> 89 #endif /* NPFLOW > 0 */ 90 91 #if NPFSYNC > 0 92 #include <net/if_pfsync.h> 93 #endif /* NPFSYNC > 0 */ 94 95 u_int64_t pf_hash(struct pf_addr *, struct pf_addr *, 96 struct pf_poolhashkey *, sa_family_t); 97 int pf_get_sport(struct pf_pdesc *, struct pf_rule *, 98 struct pf_addr *, u_int16_t *, u_int16_t, 99 u_int16_t, struct pf_src_node **); 100 int pf_map_addr_states_increase(sa_family_t, 101 struct pf_pool *, struct pf_addr *); 102 int pf_get_transaddr_af(struct pf_rule *, 103 struct pf_pdesc *, struct pf_src_node **); 104 int pf_map_addr_sticky(sa_family_t, struct pf_rule *, 105 struct pf_addr *, struct pf_addr *, 106 struct pf_src_node **, struct pf_pool *, 107 enum pf_sn_types); 108 109 u_int64_t 110 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 111 struct pf_poolhashkey *key, sa_family_t af) 112 { 113 uint64_t res = 0; 114 #ifdef INET6 115 union { 116 uint64_t hash64; 117 uint32_t hash32[2]; 118 } h; 119 #endif /* INET6 */ 120 121 switch (af) { 122 case AF_INET: 123 res = SipHash24((SIPHASH_KEY *)key, 124 &inaddr->addr32[0], sizeof(inaddr->addr32[0])); 125 hash->addr32[0] = res; 126 break; 127 #ifdef INET6 128 case AF_INET6: 129 res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0], 130 4 * sizeof(inaddr->addr32[0])); 131 h.hash64 = res; 132 hash->addr32[0] = h.hash32[0]; 133 hash->addr32[1] = h.hash32[1]; 134 /* 135 * siphash isn't big enough, but flipping it around is 136 * good enough here. 137 */ 138 hash->addr32[2] = ~h.hash32[1]; 139 hash->addr32[3] = ~h.hash32[0]; 140 break; 141 #endif /* INET6 */ 142 default: 143 unhandled_af(af); 144 } 145 return (res); 146 } 147 148 int 149 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r, 150 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, 151 struct pf_src_node **sn) 152 { 153 struct pf_state_key_cmp key; 154 struct pf_addr init_addr; 155 u_int16_t cut; 156 int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN; 157 int sidx = pd->sidx; 158 int didx = pd->didx; 159 160 memset(&init_addr, 0, sizeof(init_addr)); 161 if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat, 162 PF_SN_NAT)) 163 return (1); 164 165 if (pd->proto == IPPROTO_ICMP) { 166 if (pd->ndport == htons(ICMP_ECHO)) { 167 low = 1; 168 high = 65535; 169 } else 170 return (0); /* Don't try to modify non-echo ICMP */ 171 } 172 #ifdef INET6 173 if (pd->proto == IPPROTO_ICMPV6) { 174 if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) { 175 low = 1; 176 high = 65535; 177 } else 178 return (0); /* Don't try to modify non-echo ICMP */ 179 } 180 #endif /* INET6 */ 181 182 do { 183 key.af = pd->naf; 184 key.proto = pd->proto; 185 key.rdomain = pd->rdomain; 186 pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af); 187 pf_addrcpy(&key.addr[sidx], naddr, key.af); 188 key.port[didx] = pd->ndport; 189 190 /* 191 * port search; start random, step; 192 * similar 2 portloop in in_pcbbind 193 */ 194 if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP || 195 pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) { 196 /* XXX bug: icmp states dont use the id on both 197 * XXX sides (traceroute -I through nat) */ 198 key.port[sidx] = pd->nsport; 199 key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0], 200 &key.addr[1], key.port[0], key.port[1]); 201 if (pf_find_state_all(&key, dir, NULL) == NULL) { 202 *nport = pd->nsport; 203 return (0); 204 } 205 } else if (low == 0 && high == 0) { 206 key.port[sidx] = pd->nsport; 207 key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0], 208 &key.addr[1], key.port[0], key.port[1]); 209 if (pf_find_state_all(&key, dir, NULL) == NULL) { 210 *nport = pd->nsport; 211 return (0); 212 } 213 } else if (low == high) { 214 key.port[sidx] = htons(low); 215 key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0], 216 &key.addr[1], key.port[0], key.port[1]); 217 if (pf_find_state_all(&key, dir, NULL) == NULL) { 218 *nport = htons(low); 219 return (0); 220 } 221 } else { 222 u_int32_t tmp; 223 224 if (low > high) { 225 tmp = low; 226 low = high; 227 high = tmp; 228 } 229 /* low < high */ 230 cut = arc4random_uniform(1 + high - low) + low; 231 /* low <= cut <= high */ 232 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) { 233 key.port[sidx] = htons(tmp); 234 key.hash = pf_pkt_hash(key.af, key.proto, 235 &key.addr[0], &key.addr[1], key.port[0], 236 key.port[1]); 237 if (pf_find_state_all(&key, dir, NULL) == 238 NULL && !in_baddynamic(tmp, pd->proto)) { 239 *nport = htons(tmp); 240 return (0); 241 } 242 } 243 tmp = cut; 244 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) { 245 key.port[sidx] = htons(tmp); 246 key.hash = pf_pkt_hash(key.af, key.proto, 247 &key.addr[0], &key.addr[1], key.port[0], 248 key.port[1]); 249 if (pf_find_state_all(&key, dir, NULL) == 250 NULL && !in_baddynamic(tmp, pd->proto)) { 251 *nport = htons(tmp); 252 return (0); 253 } 254 } 255 } 256 257 switch (r->nat.opts & PF_POOL_TYPEMASK) { 258 case PF_POOL_RANDOM: 259 case PF_POOL_ROUNDROBIN: 260 case PF_POOL_LEASTSTATES: 261 /* 262 * pick a different source address since we're out 263 * of free port choices for the current one. 264 */ 265 if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, 266 &init_addr, sn, &r->nat, PF_SN_NAT)) 267 return (1); 268 break; 269 case PF_POOL_NONE: 270 case PF_POOL_SRCHASH: 271 case PF_POOL_BITMASK: 272 default: 273 return (1); 274 } 275 } while (! PF_AEQ(&init_addr, naddr, pd->naf) ); 276 return (1); /* none available */ 277 } 278 279 int 280 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 281 struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool, 282 enum pf_sn_types type) 283 { 284 struct pf_addr *raddr, *rmask, *cached; 285 struct pf_state *s; 286 struct pf_src_node k; 287 int valid; 288 289 k.af = af; 290 k.type = type; 291 pf_addrcpy(&k.addr, saddr, af); 292 k.rule.ptr = r; 293 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 294 sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 295 if (sns[type] == NULL) 296 return (-1); 297 298 /* check if the cached entry is still valid */ 299 cached = &(sns[type])->raddr; 300 valid = 0; 301 if (PF_AZERO(cached, af)) { 302 valid = 1; 303 } else if (rpool->addr.type == PF_ADDR_DYNIFTL) { 304 if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached, 305 af, 0)) 306 valid = 1; 307 } else if (rpool->addr.type == PF_ADDR_TABLE) { 308 if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0)) 309 valid = 1; 310 } else if (rpool->addr.type != PF_ADDR_NOROUTE) { 311 raddr = &rpool->addr.v.a.addr; 312 rmask = &rpool->addr.v.a.mask; 313 valid = pf_match_addr(0, raddr, rmask, cached, af); 314 } 315 if (!valid) { 316 if (pf_status.debug >= LOG_DEBUG) { 317 log(LOG_DEBUG, "pf: pf_map_addr: " 318 "stale src tracking (%u) ", type); 319 pf_print_host(&k.addr, 0, af); 320 addlog(" to "); 321 pf_print_host(cached, 0, af); 322 addlog("\n"); 323 } 324 if (sns[type]->states != 0) { 325 /* XXX expensive */ 326 RBT_FOREACH(s, pf_state_tree_id, &tree_id) 327 pf_state_rm_src_node(s, sns[type]); 328 } 329 sns[type]->expire = 1; 330 pf_remove_src_node(sns[type]); 331 sns[type] = NULL; 332 return (-1); 333 } 334 335 336 if (!PF_AZERO(cached, af)) { 337 pf_addrcpy(naddr, cached, af); 338 if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES && 339 pf_map_addr_states_increase(af, rpool, cached) == -1) 340 return (-1); 341 } 342 if (pf_status.debug >= LOG_DEBUG) { 343 log(LOG_DEBUG, "pf: pf_map_addr: " 344 "src tracking (%u) maps ", type); 345 pf_print_host(&k.addr, 0, af); 346 addlog(" to "); 347 pf_print_host(naddr, 0, af); 348 addlog("\n"); 349 } 350 351 if (sns[type]->kif != NULL) 352 rpool->kif = sns[type]->kif; 353 354 return (0); 355 } 356 357 uint32_t 358 pf_rand_addr(uint32_t mask) 359 { 360 uint32_t addr; 361 362 mask = ~ntohl(mask); 363 addr = arc4random_uniform(mask + 1); 364 365 return (htonl(addr)); 366 } 367 368 int 369 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 370 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns, 371 struct pf_pool *rpool, enum pf_sn_types type) 372 { 373 struct pf_addr hash; 374 struct pf_addr faddr; 375 struct pf_addr *raddr = &rpool->addr.v.a.addr; 376 struct pf_addr *rmask = &rpool->addr.v.a.mask; 377 struct pfr_ktable *kt; 378 struct pfi_kif *kif; 379 u_int64_t states; 380 u_int16_t weight; 381 u_int64_t load; 382 u_int64_t cload; 383 u_int64_t hashidx; 384 int cnt; 385 386 if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR && 387 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE && 388 pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0) 389 return (0); 390 391 if (rpool->addr.type == PF_ADDR_NOROUTE) 392 return (1); 393 if (rpool->addr.type == PF_ADDR_DYNIFTL) { 394 switch (af) { 395 case AF_INET: 396 if (rpool->addr.p.dyn->pfid_acnt4 < 1 && 397 !PF_POOL_DYNTYPE(rpool->opts)) 398 return (1); 399 raddr = &rpool->addr.p.dyn->pfid_addr4; 400 rmask = &rpool->addr.p.dyn->pfid_mask4; 401 break; 402 #ifdef INET6 403 case AF_INET6: 404 if (rpool->addr.p.dyn->pfid_acnt6 < 1 && 405 !PF_POOL_DYNTYPE(rpool->opts)) 406 return (1); 407 raddr = &rpool->addr.p.dyn->pfid_addr6; 408 rmask = &rpool->addr.p.dyn->pfid_mask6; 409 break; 410 #endif /* INET6 */ 411 default: 412 unhandled_af(af); 413 } 414 } else if (rpool->addr.type == PF_ADDR_TABLE) { 415 if (!PF_POOL_DYNTYPE(rpool->opts)) 416 return (1); /* unsupported */ 417 } else { 418 raddr = &rpool->addr.v.a.addr; 419 rmask = &rpool->addr.v.a.mask; 420 } 421 422 switch (rpool->opts & PF_POOL_TYPEMASK) { 423 case PF_POOL_NONE: 424 pf_addrcpy(naddr, raddr, af); 425 break; 426 case PF_POOL_BITMASK: 427 pf_poolmask(naddr, raddr, rmask, saddr, af); 428 break; 429 case PF_POOL_RANDOM: 430 if (rpool->addr.type == PF_ADDR_TABLE || 431 rpool->addr.type == PF_ADDR_DYNIFTL) { 432 if (rpool->addr.type == PF_ADDR_TABLE) 433 kt = rpool->addr.p.tbl; 434 else 435 kt = rpool->addr.p.dyn->pfid_kt; 436 kt = pfr_ktable_select_active(kt); 437 if (kt == NULL) 438 return (1); 439 440 cnt = kt->pfrkt_cnt; 441 if (cnt == 0) 442 rpool->tblidx = 0; 443 else 444 rpool->tblidx = (int)arc4random_uniform(cnt); 445 memset(&rpool->counter, 0, sizeof(rpool->counter)); 446 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 447 return (1); 448 pf_addrcpy(naddr, &rpool->counter, af); 449 } else if (init_addr != NULL && PF_AZERO(init_addr, af)) { 450 switch (af) { 451 case AF_INET: 452 rpool->counter.addr32[0] = pf_rand_addr( 453 rmask->addr32[0]); 454 break; 455 #ifdef INET6 456 case AF_INET6: 457 if (rmask->addr32[3] != 0xffffffff) 458 rpool->counter.addr32[3] = pf_rand_addr( 459 rmask->addr32[3]); 460 else 461 break; 462 if (rmask->addr32[2] != 0xffffffff) 463 rpool->counter.addr32[2] = pf_rand_addr( 464 rmask->addr32[2]); 465 else 466 break; 467 if (rmask->addr32[1] != 0xffffffff) 468 rpool->counter.addr32[1] = pf_rand_addr( 469 rmask->addr32[1]); 470 else 471 break; 472 if (rmask->addr32[0] != 0xffffffff) 473 rpool->counter.addr32[0] = pf_rand_addr( 474 rmask->addr32[0]); 475 break; 476 #endif /* INET6 */ 477 default: 478 unhandled_af(af); 479 } 480 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); 481 pf_addrcpy(init_addr, naddr, af); 482 483 } else { 484 pf_addr_inc(&rpool->counter, af); 485 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); 486 } 487 break; 488 case PF_POOL_SRCHASH: 489 hashidx = pf_hash(saddr, &hash, &rpool->key, af); 490 491 if (rpool->addr.type == PF_ADDR_TABLE || 492 rpool->addr.type == PF_ADDR_DYNIFTL) { 493 if (rpool->addr.type == PF_ADDR_TABLE) 494 kt = rpool->addr.p.tbl; 495 else 496 kt = rpool->addr.p.dyn->pfid_kt; 497 kt = pfr_ktable_select_active(kt); 498 if (kt == NULL) 499 return (1); 500 501 cnt = kt->pfrkt_cnt; 502 if (cnt == 0) 503 rpool->tblidx = 0; 504 else 505 rpool->tblidx = (int)(hashidx % cnt); 506 memset(&rpool->counter, 0, sizeof(rpool->counter)); 507 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 508 return (1); 509 pf_addrcpy(naddr, &rpool->counter, af); 510 } else { 511 pf_poolmask(naddr, raddr, rmask, &hash, af); 512 } 513 break; 514 case PF_POOL_ROUNDROBIN: 515 if (rpool->addr.type == PF_ADDR_TABLE || 516 rpool->addr.type == PF_ADDR_DYNIFTL) { 517 if (pfr_pool_get(rpool, &raddr, &rmask, af)) { 518 /* 519 * reset counter in case its value 520 * has been removed from the pool. 521 */ 522 memset(&rpool->counter, 0, 523 sizeof(rpool->counter)); 524 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 525 return (1); 526 } 527 } else if (PF_AZERO(&rpool->counter, af)) { 528 /* 529 * fall back to POOL_NONE if there is a single host 530 * address in pool. 531 */ 532 if (af == AF_INET && 533 rmask->addr32[0] == INADDR_BROADCAST) { 534 pf_addrcpy(naddr, raddr, af); 535 break; 536 } 537 #ifdef INET6 538 if (af == AF_INET6 && 539 IN6_ARE_ADDR_EQUAL(&rmask->v6, &in6mask128)) { 540 pf_addrcpy(naddr, raddr, af); 541 break; 542 } 543 #endif 544 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 545 return (1); 546 547 /* iterate over table if it contains entries which are weighted */ 548 if ((rpool->addr.type == PF_ADDR_TABLE && 549 rpool->addr.p.tbl->pfrkt_refcntcost > 0) || 550 (rpool->addr.type == PF_ADDR_DYNIFTL && 551 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) { 552 do { 553 if (rpool->addr.type == PF_ADDR_TABLE || 554 rpool->addr.type == PF_ADDR_DYNIFTL) { 555 if (pfr_pool_get(rpool, 556 &raddr, &rmask, af)) 557 return (1); 558 } else { 559 log(LOG_ERR, "pf: pf_map_addr: " 560 "weighted RR failure"); 561 return (1); 562 } 563 if (rpool->weight >= rpool->curweight) 564 break; 565 pf_addr_inc(&rpool->counter, af); 566 } while (1); 567 568 weight = rpool->weight; 569 } 570 571 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af); 572 if (init_addr != NULL && PF_AZERO(init_addr, af)) 573 pf_addrcpy(init_addr, &rpool->counter, af); 574 pf_addr_inc(&rpool->counter, af); 575 break; 576 case PF_POOL_LEASTSTATES: 577 /* retrieve an address first */ 578 if (rpool->addr.type == PF_ADDR_TABLE || 579 rpool->addr.type == PF_ADDR_DYNIFTL) { 580 if (pfr_pool_get(rpool, &raddr, &rmask, af)) { 581 /* see PF_POOL_ROUNDROBIN */ 582 memset(&rpool->counter, 0, 583 sizeof(rpool->counter)); 584 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 585 return (1); 586 } 587 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 588 return (1); 589 590 states = rpool->states; 591 weight = rpool->weight; 592 kif = rpool->kif; 593 594 if ((rpool->addr.type == PF_ADDR_TABLE && 595 rpool->addr.p.tbl->pfrkt_refcntcost > 0) || 596 (rpool->addr.type == PF_ADDR_DYNIFTL && 597 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) 598 load = ((UINT16_MAX * rpool->states) / rpool->weight); 599 else 600 load = states; 601 602 pf_addrcpy(&faddr, &rpool->counter, af); 603 604 pf_addrcpy(naddr, &rpool->counter, af); 605 if (init_addr != NULL && PF_AZERO(init_addr, af)) 606 pf_addrcpy(init_addr, naddr, af); 607 608 /* 609 * iterate *once* over whole table and find destination with 610 * least connection 611 */ 612 do { 613 pf_addr_inc(&rpool->counter, af); 614 if (rpool->addr.type == PF_ADDR_TABLE || 615 rpool->addr.type == PF_ADDR_DYNIFTL) { 616 if (pfr_pool_get(rpool, &raddr, &rmask, af)) 617 return (1); 618 } else if (pf_match_addr(0, raddr, rmask, 619 &rpool->counter, af)) 620 return (1); 621 622 if ((rpool->addr.type == PF_ADDR_TABLE && 623 rpool->addr.p.tbl->pfrkt_refcntcost > 0) || 624 (rpool->addr.type == PF_ADDR_DYNIFTL && 625 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) 626 cload = ((UINT16_MAX * rpool->states) 627 / rpool->weight); 628 else 629 cload = rpool->states; 630 631 /* find lc minimum */ 632 if (cload < load) { 633 states = rpool->states; 634 weight = rpool->weight; 635 kif = rpool->kif; 636 load = cload; 637 638 pf_addrcpy(naddr, &rpool->counter, af); 639 if (init_addr != NULL && 640 PF_AZERO(init_addr, af)) 641 pf_addrcpy(init_addr, naddr, af); 642 } 643 } while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) && 644 (states > 0)); 645 646 if (pf_map_addr_states_increase(af, rpool, naddr) == -1) 647 return (1); 648 /* revert the kif which was set by pfr_pool_get() */ 649 rpool->kif = kif; 650 break; 651 } 652 653 if (rpool->opts & PF_POOL_STICKYADDR) { 654 if (sns[type] != NULL) { 655 pf_remove_src_node(sns[type]); 656 sns[type] = NULL; 657 } 658 if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr, 659 rpool->kif)) 660 return (1); 661 } 662 663 if (pf_status.debug >= LOG_INFO && 664 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 665 log(LOG_INFO, "pf: pf_map_addr: selected address "); 666 pf_print_host(naddr, 0, af); 667 if ((rpool->opts & PF_POOL_TYPEMASK) == 668 PF_POOL_LEASTSTATES) 669 addlog(" with state count %llu", states); 670 if ((rpool->addr.type == PF_ADDR_TABLE && 671 rpool->addr.p.tbl->pfrkt_refcntcost > 0) || 672 (rpool->addr.type == PF_ADDR_DYNIFTL && 673 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) 674 addlog(" with weight %u", weight); 675 addlog("\n"); 676 } 677 678 return (0); 679 } 680 681 int 682 pf_map_addr_states_increase(sa_family_t af, struct pf_pool *rpool, 683 struct pf_addr *naddr) 684 { 685 if (rpool->addr.type == PF_ADDR_TABLE) { 686 if (pfr_states_increase(rpool->addr.p.tbl, 687 naddr, af) == -1) { 688 if (pf_status.debug >= LOG_DEBUG) { 689 log(LOG_DEBUG, 690 "pf: pf_map_addr_states_increase: " 691 "selected address "); 692 pf_print_host(naddr, 0, af); 693 addlog(". Failed to increase count!\n"); 694 } 695 return (-1); 696 } 697 } else if (rpool->addr.type == PF_ADDR_DYNIFTL) { 698 if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt, 699 naddr, af) == -1) { 700 if (pf_status.debug >= LOG_DEBUG) { 701 log(LOG_DEBUG, 702 "pf: pf_map_addr_states_increase: " 703 "selected address "); 704 pf_print_host(naddr, 0, af); 705 addlog(". Failed to increase count!\n"); 706 } 707 return (-1); 708 } 709 } 710 return (0); 711 } 712 713 int 714 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, 715 struct pf_src_node **sns, struct pf_rule **nr) 716 { 717 struct pf_addr naddr; 718 u_int16_t nport; 719 720 #ifdef INET6 721 if (pd->af != pd->naf) 722 return (pf_get_transaddr_af(r, pd, sns)); 723 #endif /* INET6 */ 724 725 if (r->nat.addr.type != PF_ADDR_NONE) { 726 /* XXX is this right? what if rtable is changed at the same 727 * XXX time? where do I need to figure out the sport? */ 728 nport = 0; 729 if (pf_get_sport(pd, r, &naddr, &nport, 730 r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) { 731 DPFPRINTF(LOG_NOTICE, 732 "pf: NAT proxy port allocation (%u-%u) failed", 733 r->nat.proxy_port[0], 734 r->nat.proxy_port[1]); 735 return (-1); 736 } 737 *nr = r; 738 pf_addrcpy(&pd->nsaddr, &naddr, pd->af); 739 pd->nsport = nport; 740 } 741 if (r->rdr.addr.type != PF_ADDR_NONE) { 742 if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns, 743 &r->rdr, PF_SN_RDR)) 744 return (-1); 745 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 746 pf_poolmask(&naddr, &naddr, &r->rdr.addr.v.a.mask, 747 &pd->ndaddr, pd->af); 748 749 nport = 0; 750 if (r->rdr.proxy_port[1]) { 751 u_int32_t tmp_nport; 752 u_int16_t div; 753 754 div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1; 755 div = (div == 0) ? 1 : div; 756 757 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) + 758 r->rdr.proxy_port[0]; 759 760 /* wrap around if necessary */ 761 if (tmp_nport > 65535) 762 tmp_nport -= 65535; 763 nport = htons((u_int16_t)tmp_nport); 764 } else if (r->rdr.proxy_port[0]) 765 nport = htons(r->rdr.proxy_port[0]); 766 *nr = r; 767 pf_addrcpy(&pd->ndaddr, &naddr, pd->af); 768 if (nport) 769 pd->ndport = nport; 770 } 771 772 return (0); 773 } 774 775 #ifdef INET6 776 int 777 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd, 778 struct pf_src_node **sns) 779 { 780 struct pf_addr ndaddr, nsaddr, naddr; 781 u_int16_t nport; 782 int prefixlen = 96; 783 784 if (pf_status.debug >= LOG_INFO) { 785 log(LOG_INFO, "pf: af-to %s %s, ", 786 pd->naf == AF_INET ? "inet" : "inet6", 787 r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr"); 788 pf_print_host(&pd->nsaddr, pd->nsport, pd->af); 789 addlog(" -> "); 790 pf_print_host(&pd->ndaddr, pd->ndport, pd->af); 791 addlog("\n"); 792 } 793 794 if (r->nat.addr.type == PF_ADDR_NONE) 795 panic("pf_get_transaddr_af: no nat pool for source address"); 796 797 /* get source address and port */ 798 nport = 0; 799 if (pf_get_sport(pd, r, &nsaddr, &nport, 800 r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) { 801 DPFPRINTF(LOG_NOTICE, 802 "pf: af-to NAT proxy port allocation (%u-%u) failed", 803 r->nat.proxy_port[0], 804 r->nat.proxy_port[1]); 805 return (-1); 806 } 807 pd->nsport = nport; 808 809 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) { 810 if (pd->dir == PF_IN) { 811 pd->ndport = ntohs(pd->ndport); 812 if (pd->ndport == ICMP6_ECHO_REQUEST) 813 pd->ndport = ICMP_ECHO; 814 else if (pd->ndport == ICMP6_ECHO_REPLY) 815 pd->ndport = ICMP_ECHOREPLY; 816 pd->ndport = htons(pd->ndport); 817 } else { 818 pd->nsport = ntohs(pd->nsport); 819 if (pd->nsport == ICMP6_ECHO_REQUEST) 820 pd->nsport = ICMP_ECHO; 821 else if (pd->nsport == ICMP6_ECHO_REPLY) 822 pd->nsport = ICMP_ECHOREPLY; 823 pd->nsport = htons(pd->nsport); 824 } 825 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) { 826 if (pd->dir == PF_IN) { 827 pd->ndport = ntohs(pd->ndport); 828 if (pd->ndport == ICMP_ECHO) 829 pd->ndport = ICMP6_ECHO_REQUEST; 830 else if (pd->ndport == ICMP_ECHOREPLY) 831 pd->ndport = ICMP6_ECHO_REPLY; 832 pd->ndport = htons(pd->ndport); 833 } else { 834 pd->nsport = ntohs(pd->nsport); 835 if (pd->nsport == ICMP_ECHO) 836 pd->nsport = ICMP6_ECHO_REQUEST; 837 else if (pd->nsport == ICMP_ECHOREPLY) 838 pd->nsport = ICMP6_ECHO_REPLY; 839 pd->nsport = htons(pd->nsport); 840 } 841 } 842 843 /* get the destination address and port */ 844 if (r->rdr.addr.type != PF_ADDR_NONE) { 845 if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns, 846 &r->rdr, PF_SN_RDR)) 847 return (-1); 848 if (r->rdr.proxy_port[0]) 849 pd->ndport = htons(r->rdr.proxy_port[0]); 850 851 if (pd->naf == AF_INET) { 852 /* The prefix is the IPv4 rdr address */ 853 prefixlen = in_mask2len((struct in_addr *) 854 &r->rdr.addr.v.a.mask); 855 inet_nat46(pd->naf, &pd->ndaddr, 856 &ndaddr, &naddr, prefixlen); 857 } else { 858 /* The prefix is the IPv6 rdr address */ 859 prefixlen = 860 in6_mask2len((struct in6_addr *) 861 &r->rdr.addr.v.a.mask, NULL); 862 inet_nat64(pd->naf, &pd->ndaddr, 863 &ndaddr, &naddr, prefixlen); 864 } 865 } else { 866 if (pd->naf == AF_INET) { 867 /* The prefix is the IPv6 dst address */ 868 prefixlen = 869 in6_mask2len((struct in6_addr *) 870 &r->dst.addr.v.a.mask, NULL); 871 if (prefixlen < 32) 872 prefixlen = 96; 873 inet_nat64(pd->naf, &pd->ndaddr, 874 &ndaddr, &pd->ndaddr, prefixlen); 875 } else { 876 /* 877 * The prefix is the IPv6 nat address 878 * (that was stored in pd->nsaddr) 879 */ 880 prefixlen = in6_mask2len((struct in6_addr *) 881 &r->nat.addr.v.a.mask, NULL); 882 if (prefixlen > 96) 883 prefixlen = 96; 884 inet_nat64(pd->naf, &pd->ndaddr, 885 &ndaddr, &nsaddr, prefixlen); 886 } 887 } 888 889 pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf); 890 pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf); 891 892 if (pf_status.debug >= LOG_INFO) { 893 log(LOG_INFO, "pf: af-to %s %s done, prefixlen %d, ", 894 pd->naf == AF_INET ? "inet" : "inet6", 895 r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr", 896 prefixlen); 897 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf); 898 addlog(" -> "); 899 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf); 900 addlog("\n"); 901 } 902 903 return (0); 904 } 905 #endif /* INET6 */ 906 907 int 908 pf_postprocess_addr(struct pf_state *cur) 909 { 910 struct pf_rule *nr; 911 struct pf_state_key *sks; 912 struct pf_pool rpool; 913 struct pf_addr lookup_addr; 914 int slbcount = -1; 915 916 nr = cur->natrule.ptr; 917 918 if (nr == NULL) 919 return (0); 920 921 /* decrease counter */ 922 923 sks = cur->key[PF_SK_STACK]; 924 925 /* check for outgoing or ingoing balancing */ 926 if (nr->rt == PF_ROUTETO) 927 lookup_addr = cur->rt_addr; 928 else if (sks != NULL) 929 lookup_addr = sks->addr[1]; 930 else { 931 if (pf_status.debug >= LOG_DEBUG) { 932 log(LOG_DEBUG, "pf: %s: unable to obtain address", 933 __func__); 934 } 935 return (1); 936 } 937 938 /* check for appropriate pool */ 939 if (nr->rdr.addr.type != PF_ADDR_NONE) 940 rpool = nr->rdr; 941 else if (nr->nat.addr.type != PF_ADDR_NONE) 942 rpool = nr->nat; 943 else if (nr->route.addr.type != PF_ADDR_NONE) 944 rpool = nr->route; 945 else 946 return (0); 947 948 if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES)) 949 return (0); 950 951 if (rpool.addr.type == PF_ADDR_TABLE) { 952 if ((slbcount = pfr_states_decrease( 953 rpool.addr.p.tbl, 954 &lookup_addr, sks->af)) == -1) { 955 if (pf_status.debug >= LOG_DEBUG) { 956 log(LOG_DEBUG, "pf: %s: selected address ", 957 __func__); 958 pf_print_host(&lookup_addr, 959 sks->port[0], sks->af); 960 addlog(". Failed to " 961 "decrease count!\n"); 962 } 963 return (1); 964 } 965 } else if (rpool.addr.type == PF_ADDR_DYNIFTL) { 966 if ((slbcount = pfr_states_decrease( 967 rpool.addr.p.dyn->pfid_kt, 968 &lookup_addr, sks->af)) == -1) { 969 if (pf_status.debug >= LOG_DEBUG) { 970 log(LOG_DEBUG, "pf: %s: selected address ", 971 __func__); 972 pf_print_host(&lookup_addr, 973 sks->port[0], sks->af); 974 addlog(". Failed to " 975 "decrease count!\n"); 976 } 977 return (1); 978 } 979 } 980 if (slbcount > -1) { 981 if (pf_status.debug >= LOG_INFO) { 982 log(LOG_INFO, "pf: %s: selected address ", __func__); 983 pf_print_host(&lookup_addr, sks->port[0], 984 sks->af); 985 addlog(" decreased state count to %u\n", 986 slbcount); 987 } 988 } 989 return (0); 990 } 991