1 /* $OpenBSD: pf_lb.c,v 1.7 2009/09/07 08:27:45 sthen Exp $ */ 2 3 /* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2008 Henning Brauer 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38 #include "bpfilter.h" 39 #include "pflog.h" 40 #include "pfsync.h" 41 #include "pflow.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/mbuf.h> 46 #include <sys/filio.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/kernel.h> 50 #include <sys/time.h> 51 #include <sys/pool.h> 52 #include <sys/proc.h> 53 #include <sys/rwlock.h> 54 55 #include <crypto/md5.h> 56 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/bpf.h> 60 #include <net/route.h> 61 #include <net/radix_mpath.h> 62 63 #include <netinet/in.h> 64 #include <netinet/in_var.h> 65 #include <netinet/in_systm.h> 66 #include <netinet/ip.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/tcp.h> 69 #include <netinet/tcp_seq.h> 70 #include <netinet/udp.h> 71 #include <netinet/ip_icmp.h> 72 #include <netinet/in_pcb.h> 73 #include <netinet/tcp_timer.h> 74 #include <netinet/tcp_var.h> 75 #include <netinet/udp_var.h> 76 #include <netinet/icmp_var.h> 77 #include <netinet/if_ether.h> 78 79 #include <dev/rndvar.h> 80 #include <net/pfvar.h> 81 #include <net/if_pflog.h> 82 #include <net/if_pflow.h> 83 84 #if NPFSYNC > 0 85 #include <net/if_pfsync.h> 86 #endif /* NPFSYNC > 0 */ 87 88 #ifdef INET6 89 #include <netinet/ip6.h> 90 #include <netinet/in_pcb.h> 91 #include <netinet/icmp6.h> 92 #include <netinet6/nd6.h> 93 #endif /* INET6 */ 94 95 96 #define DPFPRINTF(n, x) if (pf_status.debug >= (n)) printf x 97 98 /* 99 * Global variables 100 */ 101 102 void pf_hash(struct pf_addr *, struct pf_addr *, 103 struct pf_poolhashkey *, sa_family_t); 104 int pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *, 105 struct pf_addr *, struct pf_addr *, u_int16_t, 106 struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t, 107 struct pf_src_node **); 108 109 #define mix(a,b,c) \ 110 do { \ 111 a -= b; a -= c; a ^= (c >> 13); \ 112 b -= c; b -= a; b ^= (a << 8); \ 113 c -= a; c -= b; c ^= (b >> 13); \ 114 a -= b; a -= c; a ^= (c >> 12); \ 115 b -= c; b -= a; b ^= (a << 16); \ 116 c -= a; c -= b; c ^= (b >> 5); \ 117 a -= b; a -= c; a ^= (c >> 3); \ 118 b -= c; b -= a; b ^= (a << 10); \ 119 c -= a; c -= b; c ^= (b >> 15); \ 120 } while (0) 121 122 /* 123 * hash function based on bridge_hash in if_bridge.c 124 */ 125 void 126 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash, 127 struct pf_poolhashkey *key, sa_family_t af) 128 { 129 u_int32_t a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0]; 130 131 switch (af) { 132 #ifdef INET 133 case AF_INET: 134 a += inaddr->addr32[0]; 135 b += key->key32[1]; 136 mix(a, b, c); 137 hash->addr32[0] = c + key->key32[2]; 138 break; 139 #endif /* INET */ 140 #ifdef INET6 141 case AF_INET6: 142 a += inaddr->addr32[0]; 143 b += inaddr->addr32[2]; 144 mix(a, b, c); 145 hash->addr32[0] = c; 146 a += inaddr->addr32[1]; 147 b += inaddr->addr32[3]; 148 c += key->key32[1]; 149 mix(a, b, c); 150 hash->addr32[1] = c; 151 a += inaddr->addr32[2]; 152 b += inaddr->addr32[1]; 153 c += key->key32[2]; 154 mix(a, b, c); 155 hash->addr32[2] = c; 156 a += inaddr->addr32[3]; 157 b += inaddr->addr32[0]; 158 c += key->key32[3]; 159 mix(a, b, c); 160 hash->addr32[3] = c; 161 break; 162 #endif /* INET6 */ 163 } 164 } 165 166 int 167 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r, 168 struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport, 169 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high, 170 struct pf_src_node **sn) 171 { 172 struct pf_state_key_cmp key; 173 struct pf_addr init_addr; 174 u_int16_t cut; 175 176 bzero(&init_addr, sizeof(init_addr)); 177 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat)) 178 return (1); 179 180 if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) { 181 if (dport == htons(ICMP6_ECHO_REQUEST) || 182 dport == htons(ICMP_ECHO)) { 183 low = 1; 184 high = 65535; 185 } else 186 return (0); /* Don't try to modify non-echo ICMP */ 187 } 188 189 do { 190 key.af = af; 191 key.proto = proto; 192 PF_ACPY(&key.addr[1], daddr, key.af); 193 PF_ACPY(&key.addr[0], naddr, key.af); 194 key.port[1] = dport; 195 196 /* 197 * port search; start random, step; 198 * similar 2 portloop in in_pcbbind 199 */ 200 if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP || 201 proto == IPPROTO_ICMP)) { 202 /* XXX bug icmp states dont use the id on both sides */ 203 key.port[0] = dport; 204 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) 205 return (0); 206 } else if (low == 0 && high == 0) { 207 key.port[0] = *nport; 208 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) 209 return (0); 210 } else if (low == high) { 211 key.port[0] = htons(low); 212 if (pf_find_state_all(&key, PF_IN, NULL) == NULL) { 213 *nport = htons(low); 214 return (0); 215 } 216 } else { 217 u_int16_t tmp; 218 219 if (low > high) { 220 tmp = low; 221 low = high; 222 high = tmp; 223 } 224 /* low < high */ 225 cut = arc4random_uniform(1 + high - low) + low; 226 /* low <= cut <= high */ 227 for (tmp = cut; tmp <= high; ++(tmp)) { 228 key.port[0] = htons(tmp); 229 if (pf_find_state_all(&key, PF_IN, NULL) == 230 NULL && !in_baddynamic(tmp, proto)) { 231 *nport = htons(tmp); 232 return (0); 233 } 234 } 235 for (tmp = cut - 1; tmp >= low; --(tmp)) { 236 key.port[0] = htons(tmp); 237 if (pf_find_state_all(&key, PF_IN, NULL) == 238 NULL && !in_baddynamic(tmp, proto)) { 239 *nport = htons(tmp); 240 return (0); 241 } 242 } 243 } 244 245 switch (r->nat.opts & PF_POOL_TYPEMASK) { 246 case PF_POOL_RANDOM: 247 case PF_POOL_ROUNDROBIN: 248 if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, 249 &r->nat)) 250 return (1); 251 break; 252 case PF_POOL_NONE: 253 case PF_POOL_SRCHASH: 254 case PF_POOL_BITMASK: 255 default: 256 return (1); 257 } 258 } while (! PF_AEQ(&init_addr, naddr, af) ); 259 return (1); /* none available */ 260 } 261 262 int 263 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr, 264 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn, 265 struct pf_pool *rpool) 266 { 267 unsigned char hash[16]; 268 struct pf_addr *raddr = &rpool->cur->addr.v.a.addr; 269 struct pf_addr *rmask = &rpool->cur->addr.v.a.mask; 270 struct pf_pooladdr *acur = rpool->cur; 271 struct pf_src_node k; 272 273 if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR && 274 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 275 k.af = af; 276 PF_ACPY(&k.addr, saddr, af); 277 if (r->rule_flag & PFRULE_RULESRCTRACK || 278 rpool->opts & PF_POOL_STICKYADDR) 279 k.rule.ptr = r; 280 else 281 k.rule.ptr = NULL; 282 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 283 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 284 if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) { 285 PF_ACPY(naddr, &(*sn)->raddr, af); 286 if (pf_status.debug >= PF_DEBUG_MISC) { 287 printf("pf_map_addr: src tracking maps "); 288 pf_print_host(&k.addr, 0, af); 289 printf(" to "); 290 pf_print_host(naddr, 0, af); 291 printf("\n"); 292 } 293 return (0); 294 } 295 } 296 297 if (rpool->cur->addr.type == PF_ADDR_NOROUTE) 298 return (1); 299 if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 300 switch (af) { 301 #ifdef INET 302 case AF_INET: 303 if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 && 304 (rpool->opts & PF_POOL_TYPEMASK) != 305 PF_POOL_ROUNDROBIN) 306 return (1); 307 raddr = &rpool->cur->addr.p.dyn->pfid_addr4; 308 rmask = &rpool->cur->addr.p.dyn->pfid_mask4; 309 break; 310 #endif /* INET */ 311 #ifdef INET6 312 case AF_INET6: 313 if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 && 314 (rpool->opts & PF_POOL_TYPEMASK) != 315 PF_POOL_ROUNDROBIN) 316 return (1); 317 raddr = &rpool->cur->addr.p.dyn->pfid_addr6; 318 rmask = &rpool->cur->addr.p.dyn->pfid_mask6; 319 break; 320 #endif /* INET6 */ 321 } 322 } else if (rpool->cur->addr.type == PF_ADDR_TABLE) { 323 if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) 324 return (1); /* unsupported */ 325 } else { 326 raddr = &rpool->cur->addr.v.a.addr; 327 rmask = &rpool->cur->addr.v.a.mask; 328 } 329 330 switch (rpool->opts & PF_POOL_TYPEMASK) { 331 case PF_POOL_NONE: 332 PF_ACPY(naddr, raddr, af); 333 break; 334 case PF_POOL_BITMASK: 335 PF_POOLMASK(naddr, raddr, rmask, saddr, af); 336 break; 337 case PF_POOL_RANDOM: 338 if (init_addr != NULL && PF_AZERO(init_addr, af)) { 339 switch (af) { 340 #ifdef INET 341 case AF_INET: 342 rpool->counter.addr32[0] = htonl(arc4random()); 343 break; 344 #endif /* INET */ 345 #ifdef INET6 346 case AF_INET6: 347 if (rmask->addr32[3] != 0xffffffff) 348 rpool->counter.addr32[3] = 349 htonl(arc4random()); 350 else 351 break; 352 if (rmask->addr32[2] != 0xffffffff) 353 rpool->counter.addr32[2] = 354 htonl(arc4random()); 355 else 356 break; 357 if (rmask->addr32[1] != 0xffffffff) 358 rpool->counter.addr32[1] = 359 htonl(arc4random()); 360 else 361 break; 362 if (rmask->addr32[0] != 0xffffffff) 363 rpool->counter.addr32[0] = 364 htonl(arc4random()); 365 break; 366 #endif /* INET6 */ 367 } 368 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 369 PF_ACPY(init_addr, naddr, af); 370 371 } else { 372 PF_AINC(&rpool->counter, af); 373 PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af); 374 } 375 break; 376 case PF_POOL_SRCHASH: 377 pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af); 378 PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af); 379 break; 380 case PF_POOL_ROUNDROBIN: 381 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 382 if (!pfr_pool_get(rpool->cur->addr.p.tbl, 383 &rpool->tblidx, &rpool->counter, 384 &raddr, &rmask, af)) 385 goto get_addr; 386 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 387 if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 388 &rpool->tblidx, &rpool->counter, 389 &raddr, &rmask, af)) 390 goto get_addr; 391 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af)) 392 goto get_addr; 393 394 try_next: 395 if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL) 396 rpool->cur = TAILQ_FIRST(&rpool->list); 397 if (rpool->cur->addr.type == PF_ADDR_TABLE) { 398 rpool->tblidx = -1; 399 if (pfr_pool_get(rpool->cur->addr.p.tbl, 400 &rpool->tblidx, &rpool->counter, 401 &raddr, &rmask, af)) { 402 /* table contains no address of type 'af' */ 403 if (rpool->cur != acur) 404 goto try_next; 405 return (1); 406 } 407 } else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) { 408 rpool->tblidx = -1; 409 if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt, 410 &rpool->tblidx, &rpool->counter, 411 &raddr, &rmask, af)) { 412 /* table contains no address of type 'af' */ 413 if (rpool->cur != acur) 414 goto try_next; 415 return (1); 416 } 417 } else { 418 raddr = &rpool->cur->addr.v.a.addr; 419 rmask = &rpool->cur->addr.v.a.mask; 420 PF_ACPY(&rpool->counter, raddr, af); 421 } 422 423 get_addr: 424 PF_ACPY(naddr, &rpool->counter, af); 425 if (init_addr != NULL && PF_AZERO(init_addr, af)) 426 PF_ACPY(init_addr, naddr, af); 427 PF_AINC(&rpool->counter, af); 428 break; 429 } 430 if (*sn != NULL) 431 PF_ACPY(&(*sn)->raddr, naddr, af); 432 433 if (pf_status.debug >= PF_DEBUG_NOISY && 434 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) { 435 printf("pf_map_addr: selected address "); 436 pf_print_host(naddr, 0, af); 437 printf("\n"); 438 } 439 440 return (0); 441 } 442 443 int 444 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, struct pf_addr *saddr, 445 u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport) 446 { 447 struct pf_addr naddr; 448 u_int16_t nport = 0; 449 450 struct pf_src_node srcnode, *sn = &srcnode; 451 452 if (!TAILQ_EMPTY(&r->nat.list)) { 453 if (pf_get_sport(pd->af, pd->proto, r, saddr, 454 daddr, *dport, &naddr, &nport, r->nat.proxy_port[0], 455 r->nat.proxy_port[1], &sn)) { 456 DPFPRINTF(PF_DEBUG_MISC, 457 ("pf: NAT proxy port allocation " 458 "(%u-%u) failed\n", 459 r->nat.proxy_port[0], 460 r->nat.proxy_port[1])); 461 return (-1); 462 } 463 PF_ACPY(saddr, &naddr, pd->af); 464 if (nport) 465 *sport = nport; 466 } 467 if (!TAILQ_EMPTY(&r->rdr.list)) { 468 if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, &sn, &r->rdr)) 469 return (-1); 470 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK) 471 PF_POOLMASK(&naddr, &naddr, &r->rdr.cur->addr.v.a.mask, 472 daddr, pd->af); 473 474 if (r->rdr.proxy_port[1]) { 475 u_int32_t tmp_nport; 476 477 tmp_nport = ((ntohs(*dport) - 478 ntohs(r->dst.port[0])) % 479 (r->rdr.proxy_port[1] - 480 r->rdr.proxy_port[0] + 1)) + 481 r->rdr.proxy_port[0]; 482 483 /* wrap around if necessary */ 484 if (tmp_nport > 65535) 485 tmp_nport -= 65535; 486 nport = htons((u_int16_t)tmp_nport); 487 } else if (r->rdr.proxy_port[0]) 488 nport = htons(r->rdr.proxy_port[0]); 489 490 PF_ACPY(daddr, &naddr, pd->af); 491 if (nport) 492 *dport = nport; 493 } 494 495 return (0); 496 } 497 498