1 /* $OpenBSD: pf_lb.c,v 1.74 2023/05/10 22:42:51 sashan Exp $ */
2
3 /*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2008 Henning Brauer
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/rwlock.h>
53 #include <sys/syslog.h>
54 #include <sys/stdint.h>
55
56 #include <crypto/siphash.h>
57
58 #include <net/if.h>
59 #include <net/bpf.h>
60 #include <net/route.h>
61
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip_icmp.h>
67 #include <netinet/icmp_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_seq.h>
70 #include <netinet/tcp_timer.h>
71 #include <netinet/udp.h>
72 #include <netinet/udp_var.h>
73 #include <netinet/if_ether.h>
74
75 #ifdef INET6
76 #include <netinet/ip6.h>
77 #include <netinet/icmp6.h>
78 #endif /* INET6 */
79
80 #include <net/pfvar.h>
81 #include <net/pfvar_priv.h>
82
83 #if NPFLOG > 0
84 #include <net/if_pflog.h>
85 #endif /* NPFLOG > 0 */
86
87 #if NPFLOW > 0
88 #include <net/if_pflow.h>
89 #endif /* NPFLOW > 0 */
90
91 #if NPFSYNC > 0
92 #include <net/if_pfsync.h>
93 #endif /* NPFSYNC > 0 */
94
95 u_int64_t pf_hash(struct pf_addr *, struct pf_addr *,
96 struct pf_poolhashkey *, sa_family_t);
97 int pf_get_sport(struct pf_pdesc *, struct pf_rule *,
98 struct pf_addr *, u_int16_t *, u_int16_t,
99 u_int16_t, struct pf_src_node **);
100 int pf_map_addr_states_increase(sa_family_t,
101 struct pf_pool *, struct pf_addr *);
102 int pf_get_transaddr_af(struct pf_rule *,
103 struct pf_pdesc *, struct pf_src_node **);
104 int pf_map_addr_sticky(sa_family_t, struct pf_rule *,
105 struct pf_addr *, struct pf_addr *,
106 struct pf_src_node **, struct pf_pool *,
107 enum pf_sn_types);
108
109 u_int64_t
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)110 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
111 struct pf_poolhashkey *key, sa_family_t af)
112 {
113 uint64_t res = 0;
114 #ifdef INET6
115 union {
116 uint64_t hash64;
117 uint32_t hash32[2];
118 } h;
119 #endif /* INET6 */
120
121 switch (af) {
122 case AF_INET:
123 res = SipHash24((SIPHASH_KEY *)key,
124 &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
125 hash->addr32[0] = res;
126 break;
127 #ifdef INET6
128 case AF_INET6:
129 res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
130 4 * sizeof(inaddr->addr32[0]));
131 h.hash64 = res;
132 hash->addr32[0] = h.hash32[0];
133 hash->addr32[1] = h.hash32[1];
134 /*
135 * siphash isn't big enough, but flipping it around is
136 * good enough here.
137 */
138 hash->addr32[2] = ~h.hash32[1];
139 hash->addr32[3] = ~h.hash32[0];
140 break;
141 #endif /* INET6 */
142 default:
143 unhandled_af(af);
144 }
145 return (res);
146 }
147
148 int
pf_get_sport(struct pf_pdesc * pd,struct pf_rule * r,struct pf_addr * naddr,u_int16_t * nport,u_int16_t low,u_int16_t high,struct pf_src_node ** sn)149 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
150 struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
151 struct pf_src_node **sn)
152 {
153 struct pf_state_key_cmp key;
154 struct pf_addr init_addr;
155 u_int16_t cut;
156 int dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
157 int sidx = pd->sidx;
158 int didx = pd->didx;
159
160 memset(&init_addr, 0, sizeof(init_addr));
161 if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
162 PF_SN_NAT))
163 return (1);
164
165 if (pd->proto == IPPROTO_ICMP) {
166 if (pd->ndport == htons(ICMP_ECHO)) {
167 low = 1;
168 high = 65535;
169 } else
170 return (0); /* Don't try to modify non-echo ICMP */
171 }
172 #ifdef INET6
173 if (pd->proto == IPPROTO_ICMPV6) {
174 if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
175 low = 1;
176 high = 65535;
177 } else
178 return (0); /* Don't try to modify non-echo ICMP */
179 }
180 #endif /* INET6 */
181
182 do {
183 key.af = pd->naf;
184 key.proto = pd->proto;
185 key.rdomain = pd->rdomain;
186 pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
187 pf_addrcpy(&key.addr[sidx], naddr, key.af);
188 key.port[didx] = pd->ndport;
189
190 /*
191 * port search; start random, step;
192 * similar 2 portloop in in_pcbbind
193 */
194 if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
195 pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
196 /* XXX bug: icmp states dont use the id on both
197 * XXX sides (traceroute -I through nat) */
198 key.port[sidx] = pd->nsport;
199 key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0],
200 &key.addr[1], key.port[0], key.port[1]);
201 if (pf_find_state_all(&key, dir, NULL) == NULL) {
202 *nport = pd->nsport;
203 return (0);
204 }
205 } else if (low == 0 && high == 0) {
206 key.port[sidx] = pd->nsport;
207 key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0],
208 &key.addr[1], key.port[0], key.port[1]);
209 if (pf_find_state_all(&key, dir, NULL) == NULL) {
210 *nport = pd->nsport;
211 return (0);
212 }
213 } else if (low == high) {
214 key.port[sidx] = htons(low);
215 key.hash = pf_pkt_hash(key.af, key.proto, &key.addr[0],
216 &key.addr[1], key.port[0], key.port[1]);
217 if (pf_find_state_all(&key, dir, NULL) == NULL) {
218 *nport = htons(low);
219 return (0);
220 }
221 } else {
222 u_int32_t tmp;
223
224 if (low > high) {
225 tmp = low;
226 low = high;
227 high = tmp;
228 }
229 /* low < high */
230 cut = arc4random_uniform(1 + high - low) + low;
231 /* low <= cut <= high */
232 for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
233 key.port[sidx] = htons(tmp);
234 key.hash = pf_pkt_hash(key.af, key.proto,
235 &key.addr[0], &key.addr[1], key.port[0],
236 key.port[1]);
237 if (pf_find_state_all(&key, dir, NULL) ==
238 NULL && !in_baddynamic(tmp, pd->proto)) {
239 *nport = htons(tmp);
240 return (0);
241 }
242 }
243 tmp = cut;
244 for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
245 key.port[sidx] = htons(tmp);
246 key.hash = pf_pkt_hash(key.af, key.proto,
247 &key.addr[0], &key.addr[1], key.port[0],
248 key.port[1]);
249 if (pf_find_state_all(&key, dir, NULL) ==
250 NULL && !in_baddynamic(tmp, pd->proto)) {
251 *nport = htons(tmp);
252 return (0);
253 }
254 }
255 }
256
257 switch (r->nat.opts & PF_POOL_TYPEMASK) {
258 case PF_POOL_RANDOM:
259 case PF_POOL_ROUNDROBIN:
260 case PF_POOL_LEASTSTATES:
261 /*
262 * pick a different source address since we're out
263 * of free port choices for the current one.
264 */
265 if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
266 &init_addr, sn, &r->nat, PF_SN_NAT))
267 return (1);
268 break;
269 case PF_POOL_NONE:
270 case PF_POOL_SRCHASH:
271 case PF_POOL_BITMASK:
272 default:
273 return (1);
274 }
275 } while (! PF_AEQ(&init_addr, naddr, pd->naf) );
276 return (1); /* none available */
277 }
278
279 int
pf_map_addr_sticky(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_src_node ** sns,struct pf_pool * rpool,enum pf_sn_types type)280 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
281 struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
282 enum pf_sn_types type)
283 {
284 struct pf_addr *raddr, *rmask, *cached;
285 struct pf_state *s;
286 struct pf_src_node k;
287 int valid;
288
289 k.af = af;
290 k.type = type;
291 pf_addrcpy(&k.addr, saddr, af);
292 k.rule.ptr = r;
293 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
294 sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
295 if (sns[type] == NULL)
296 return (-1);
297
298 /* check if the cached entry is still valid */
299 cached = &(sns[type])->raddr;
300 valid = 0;
301 if (PF_AZERO(cached, af)) {
302 valid = 1;
303 } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
304 if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
305 af, 0))
306 valid = 1;
307 } else if (rpool->addr.type == PF_ADDR_TABLE) {
308 if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
309 valid = 1;
310 } else if (rpool->addr.type != PF_ADDR_NOROUTE) {
311 raddr = &rpool->addr.v.a.addr;
312 rmask = &rpool->addr.v.a.mask;
313 valid = pf_match_addr(0, raddr, rmask, cached, af);
314 }
315 if (!valid) {
316 if (pf_status.debug >= LOG_DEBUG) {
317 log(LOG_DEBUG, "pf: pf_map_addr: "
318 "stale src tracking (%u) ", type);
319 pf_print_host(&k.addr, 0, af);
320 addlog(" to ");
321 pf_print_host(cached, 0, af);
322 addlog("\n");
323 }
324 if (sns[type]->states != 0) {
325 /* XXX expensive */
326 RBT_FOREACH(s, pf_state_tree_id, &tree_id)
327 pf_state_rm_src_node(s, sns[type]);
328 }
329 sns[type]->expire = 1;
330 pf_remove_src_node(sns[type]);
331 sns[type] = NULL;
332 return (-1);
333 }
334
335
336 if (!PF_AZERO(cached, af)) {
337 pf_addrcpy(naddr, cached, af);
338 if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES &&
339 pf_map_addr_states_increase(af, rpool, cached) == -1)
340 return (-1);
341 }
342 if (pf_status.debug >= LOG_DEBUG) {
343 log(LOG_DEBUG, "pf: pf_map_addr: "
344 "src tracking (%u) maps ", type);
345 pf_print_host(&k.addr, 0, af);
346 addlog(" to ");
347 pf_print_host(naddr, 0, af);
348 addlog("\n");
349 }
350
351 if (sns[type]->kif != NULL)
352 rpool->kif = sns[type]->kif;
353
354 return (0);
355 }
356
357 uint32_t
pf_rand_addr(uint32_t mask)358 pf_rand_addr(uint32_t mask)
359 {
360 uint32_t addr;
361
362 mask = ~ntohl(mask);
363 addr = arc4random_uniform(mask + 1);
364
365 return (htonl(addr));
366 }
367
368 int
pf_map_addr(sa_family_t af,struct pf_rule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pf_addr * init_addr,struct pf_src_node ** sns,struct pf_pool * rpool,enum pf_sn_types type)369 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
370 struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
371 struct pf_pool *rpool, enum pf_sn_types type)
372 {
373 struct pf_addr hash;
374 struct pf_addr faddr;
375 struct pf_addr *raddr = &rpool->addr.v.a.addr;
376 struct pf_addr *rmask = &rpool->addr.v.a.mask;
377 struct pfr_ktable *kt;
378 struct pfi_kif *kif;
379 u_int64_t states;
380 u_int16_t weight;
381 u_int64_t load;
382 u_int64_t cload;
383 u_int64_t hashidx;
384 int cnt;
385
386 if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
387 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
388 pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
389 return (0);
390
391 if (rpool->addr.type == PF_ADDR_NOROUTE)
392 return (1);
393 if (rpool->addr.type == PF_ADDR_DYNIFTL) {
394 switch (af) {
395 case AF_INET:
396 if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
397 !PF_POOL_DYNTYPE(rpool->opts))
398 return (1);
399 raddr = &rpool->addr.p.dyn->pfid_addr4;
400 rmask = &rpool->addr.p.dyn->pfid_mask4;
401 break;
402 #ifdef INET6
403 case AF_INET6:
404 if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
405 !PF_POOL_DYNTYPE(rpool->opts))
406 return (1);
407 raddr = &rpool->addr.p.dyn->pfid_addr6;
408 rmask = &rpool->addr.p.dyn->pfid_mask6;
409 break;
410 #endif /* INET6 */
411 default:
412 unhandled_af(af);
413 }
414 } else if (rpool->addr.type == PF_ADDR_TABLE) {
415 if (!PF_POOL_DYNTYPE(rpool->opts))
416 return (1); /* unsupported */
417 } else {
418 raddr = &rpool->addr.v.a.addr;
419 rmask = &rpool->addr.v.a.mask;
420 }
421
422 switch (rpool->opts & PF_POOL_TYPEMASK) {
423 case PF_POOL_NONE:
424 pf_addrcpy(naddr, raddr, af);
425 break;
426 case PF_POOL_BITMASK:
427 pf_poolmask(naddr, raddr, rmask, saddr, af);
428 break;
429 case PF_POOL_RANDOM:
430 if (rpool->addr.type == PF_ADDR_TABLE ||
431 rpool->addr.type == PF_ADDR_DYNIFTL) {
432 if (rpool->addr.type == PF_ADDR_TABLE)
433 kt = rpool->addr.p.tbl;
434 else
435 kt = rpool->addr.p.dyn->pfid_kt;
436 kt = pfr_ktable_select_active(kt);
437 if (kt == NULL)
438 return (1);
439
440 cnt = kt->pfrkt_cnt;
441 if (cnt == 0)
442 rpool->tblidx = 0;
443 else
444 rpool->tblidx = (int)arc4random_uniform(cnt);
445 memset(&rpool->counter, 0, sizeof(rpool->counter));
446 if (pfr_pool_get(rpool, &raddr, &rmask, af))
447 return (1);
448 pf_addrcpy(naddr, &rpool->counter, af);
449 } else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
450 switch (af) {
451 case AF_INET:
452 rpool->counter.addr32[0] = pf_rand_addr(
453 rmask->addr32[0]);
454 break;
455 #ifdef INET6
456 case AF_INET6:
457 if (rmask->addr32[3] != 0xffffffff)
458 rpool->counter.addr32[3] = pf_rand_addr(
459 rmask->addr32[3]);
460 else
461 break;
462 if (rmask->addr32[2] != 0xffffffff)
463 rpool->counter.addr32[2] = pf_rand_addr(
464 rmask->addr32[2]);
465 else
466 break;
467 if (rmask->addr32[1] != 0xffffffff)
468 rpool->counter.addr32[1] = pf_rand_addr(
469 rmask->addr32[1]);
470 else
471 break;
472 if (rmask->addr32[0] != 0xffffffff)
473 rpool->counter.addr32[0] = pf_rand_addr(
474 rmask->addr32[0]);
475 break;
476 #endif /* INET6 */
477 default:
478 unhandled_af(af);
479 }
480 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
481 pf_addrcpy(init_addr, naddr, af);
482
483 } else {
484 pf_addr_inc(&rpool->counter, af);
485 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
486 }
487 break;
488 case PF_POOL_SRCHASH:
489 hashidx = pf_hash(saddr, &hash, &rpool->key, af);
490
491 if (rpool->addr.type == PF_ADDR_TABLE ||
492 rpool->addr.type == PF_ADDR_DYNIFTL) {
493 if (rpool->addr.type == PF_ADDR_TABLE)
494 kt = rpool->addr.p.tbl;
495 else
496 kt = rpool->addr.p.dyn->pfid_kt;
497 kt = pfr_ktable_select_active(kt);
498 if (kt == NULL)
499 return (1);
500
501 cnt = kt->pfrkt_cnt;
502 if (cnt == 0)
503 rpool->tblidx = 0;
504 else
505 rpool->tblidx = (int)(hashidx % cnt);
506 memset(&rpool->counter, 0, sizeof(rpool->counter));
507 if (pfr_pool_get(rpool, &raddr, &rmask, af))
508 return (1);
509 pf_addrcpy(naddr, &rpool->counter, af);
510 } else {
511 pf_poolmask(naddr, raddr, rmask, &hash, af);
512 }
513 break;
514 case PF_POOL_ROUNDROBIN:
515 if (rpool->addr.type == PF_ADDR_TABLE ||
516 rpool->addr.type == PF_ADDR_DYNIFTL) {
517 if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
518 /*
519 * reset counter in case its value
520 * has been removed from the pool.
521 */
522 memset(&rpool->counter, 0,
523 sizeof(rpool->counter));
524 if (pfr_pool_get(rpool, &raddr, &rmask, af))
525 return (1);
526 }
527 } else if (PF_AZERO(&rpool->counter, af)) {
528 /*
529 * fall back to POOL_NONE if there is a single host
530 * address in pool.
531 */
532 if (af == AF_INET &&
533 rmask->addr32[0] == INADDR_BROADCAST) {
534 pf_addrcpy(naddr, raddr, af);
535 break;
536 }
537 #ifdef INET6
538 if (af == AF_INET6 &&
539 IN6_ARE_ADDR_EQUAL(&rmask->v6, &in6mask128)) {
540 pf_addrcpy(naddr, raddr, af);
541 break;
542 }
543 #endif
544 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
545 return (1);
546
547 /* iterate over table if it contains entries which are weighted */
548 if ((rpool->addr.type == PF_ADDR_TABLE &&
549 rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
550 (rpool->addr.type == PF_ADDR_DYNIFTL &&
551 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
552 do {
553 if (rpool->addr.type == PF_ADDR_TABLE ||
554 rpool->addr.type == PF_ADDR_DYNIFTL) {
555 if (pfr_pool_get(rpool,
556 &raddr, &rmask, af))
557 return (1);
558 } else {
559 log(LOG_ERR, "pf: pf_map_addr: "
560 "weighted RR failure");
561 return (1);
562 }
563 if (rpool->weight >= rpool->curweight)
564 break;
565 pf_addr_inc(&rpool->counter, af);
566 } while (1);
567
568 weight = rpool->weight;
569 }
570
571 pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
572 if (init_addr != NULL && PF_AZERO(init_addr, af))
573 pf_addrcpy(init_addr, &rpool->counter, af);
574 pf_addr_inc(&rpool->counter, af);
575 break;
576 case PF_POOL_LEASTSTATES:
577 /* retrieve an address first */
578 if (rpool->addr.type == PF_ADDR_TABLE ||
579 rpool->addr.type == PF_ADDR_DYNIFTL) {
580 if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
581 /* see PF_POOL_ROUNDROBIN */
582 memset(&rpool->counter, 0,
583 sizeof(rpool->counter));
584 if (pfr_pool_get(rpool, &raddr, &rmask, af))
585 return (1);
586 }
587 } else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
588 return (1);
589
590 states = rpool->states;
591 weight = rpool->weight;
592 kif = rpool->kif;
593
594 if ((rpool->addr.type == PF_ADDR_TABLE &&
595 rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
596 (rpool->addr.type == PF_ADDR_DYNIFTL &&
597 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
598 load = ((UINT16_MAX * rpool->states) / rpool->weight);
599 else
600 load = states;
601
602 pf_addrcpy(&faddr, &rpool->counter, af);
603
604 pf_addrcpy(naddr, &rpool->counter, af);
605 if (init_addr != NULL && PF_AZERO(init_addr, af))
606 pf_addrcpy(init_addr, naddr, af);
607
608 /*
609 * iterate *once* over whole table and find destination with
610 * least connection
611 */
612 do {
613 pf_addr_inc(&rpool->counter, af);
614 if (rpool->addr.type == PF_ADDR_TABLE ||
615 rpool->addr.type == PF_ADDR_DYNIFTL) {
616 if (pfr_pool_get(rpool, &raddr, &rmask, af))
617 return (1);
618 } else if (pf_match_addr(0, raddr, rmask,
619 &rpool->counter, af))
620 return (1);
621
622 if ((rpool->addr.type == PF_ADDR_TABLE &&
623 rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
624 (rpool->addr.type == PF_ADDR_DYNIFTL &&
625 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
626 cload = ((UINT16_MAX * rpool->states)
627 / rpool->weight);
628 else
629 cload = rpool->states;
630
631 /* find lc minimum */
632 if (cload < load) {
633 states = rpool->states;
634 weight = rpool->weight;
635 kif = rpool->kif;
636 load = cload;
637
638 pf_addrcpy(naddr, &rpool->counter, af);
639 if (init_addr != NULL &&
640 PF_AZERO(init_addr, af))
641 pf_addrcpy(init_addr, naddr, af);
642 }
643 } while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
644 (states > 0));
645
646 if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
647 return (1);
648 /* revert the kif which was set by pfr_pool_get() */
649 rpool->kif = kif;
650 break;
651 }
652
653 if (rpool->opts & PF_POOL_STICKYADDR) {
654 if (sns[type] != NULL) {
655 pf_remove_src_node(sns[type]);
656 sns[type] = NULL;
657 }
658 if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
659 rpool->kif))
660 return (1);
661 }
662
663 if (pf_status.debug >= LOG_INFO &&
664 (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
665 log(LOG_INFO, "pf: pf_map_addr: selected address ");
666 pf_print_host(naddr, 0, af);
667 if ((rpool->opts & PF_POOL_TYPEMASK) ==
668 PF_POOL_LEASTSTATES)
669 addlog(" with state count %llu", states);
670 if ((rpool->addr.type == PF_ADDR_TABLE &&
671 rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
672 (rpool->addr.type == PF_ADDR_DYNIFTL &&
673 rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
674 addlog(" with weight %u", weight);
675 addlog("\n");
676 }
677
678 return (0);
679 }
680
681 int
pf_map_addr_states_increase(sa_family_t af,struct pf_pool * rpool,struct pf_addr * naddr)682 pf_map_addr_states_increase(sa_family_t af, struct pf_pool *rpool,
683 struct pf_addr *naddr)
684 {
685 if (rpool->addr.type == PF_ADDR_TABLE) {
686 if (pfr_states_increase(rpool->addr.p.tbl,
687 naddr, af) == -1) {
688 if (pf_status.debug >= LOG_DEBUG) {
689 log(LOG_DEBUG,
690 "pf: pf_map_addr_states_increase: "
691 "selected address ");
692 pf_print_host(naddr, 0, af);
693 addlog(". Failed to increase count!\n");
694 }
695 return (-1);
696 }
697 } else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
698 if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
699 naddr, af) == -1) {
700 if (pf_status.debug >= LOG_DEBUG) {
701 log(LOG_DEBUG,
702 "pf: pf_map_addr_states_increase: "
703 "selected address ");
704 pf_print_host(naddr, 0, af);
705 addlog(". Failed to increase count!\n");
706 }
707 return (-1);
708 }
709 }
710 return (0);
711 }
712
713 int
pf_get_transaddr(struct pf_rule * r,struct pf_pdesc * pd,struct pf_src_node ** sns,struct pf_rule ** nr)714 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
715 struct pf_src_node **sns, struct pf_rule **nr)
716 {
717 struct pf_addr naddr;
718 u_int16_t nport;
719
720 #ifdef INET6
721 if (pd->af != pd->naf)
722 return (pf_get_transaddr_af(r, pd, sns));
723 #endif /* INET6 */
724
725 if (r->nat.addr.type != PF_ADDR_NONE) {
726 /* XXX is this right? what if rtable is changed at the same
727 * XXX time? where do I need to figure out the sport? */
728 nport = 0;
729 if (pf_get_sport(pd, r, &naddr, &nport,
730 r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
731 DPFPRINTF(LOG_NOTICE,
732 "pf: NAT proxy port allocation (%u-%u) failed",
733 r->nat.proxy_port[0],
734 r->nat.proxy_port[1]);
735 return (-1);
736 }
737 *nr = r;
738 pf_addrcpy(&pd->nsaddr, &naddr, pd->af);
739 pd->nsport = nport;
740 }
741 if (r->rdr.addr.type != PF_ADDR_NONE) {
742 if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
743 &r->rdr, PF_SN_RDR))
744 return (-1);
745 if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
746 pf_poolmask(&naddr, &naddr, &r->rdr.addr.v.a.mask,
747 &pd->ndaddr, pd->af);
748
749 nport = 0;
750 if (r->rdr.proxy_port[1]) {
751 u_int32_t tmp_nport;
752 u_int16_t div;
753
754 div = r->rdr.proxy_port[1] - r->rdr.proxy_port[0] + 1;
755 div = (div == 0) ? 1 : div;
756
757 tmp_nport = ((ntohs(pd->ndport) - ntohs(r->dst.port[0])) % div) +
758 r->rdr.proxy_port[0];
759
760 /* wrap around if necessary */
761 if (tmp_nport > 65535)
762 tmp_nport -= 65535;
763 nport = htons((u_int16_t)tmp_nport);
764 } else if (r->rdr.proxy_port[0])
765 nport = htons(r->rdr.proxy_port[0]);
766 *nr = r;
767 pf_addrcpy(&pd->ndaddr, &naddr, pd->af);
768 if (nport)
769 pd->ndport = nport;
770 }
771
772 return (0);
773 }
774
775 #ifdef INET6
776 int
pf_get_transaddr_af(struct pf_rule * r,struct pf_pdesc * pd,struct pf_src_node ** sns)777 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
778 struct pf_src_node **sns)
779 {
780 struct pf_addr ndaddr, nsaddr, naddr;
781 u_int16_t nport;
782 int prefixlen = 96;
783
784 if (pf_status.debug >= LOG_INFO) {
785 log(LOG_INFO, "pf: af-to %s %s, ",
786 pd->naf == AF_INET ? "inet" : "inet6",
787 r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
788 pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
789 addlog(" -> ");
790 pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
791 addlog("\n");
792 }
793
794 if (r->nat.addr.type == PF_ADDR_NONE)
795 panic("pf_get_transaddr_af: no nat pool for source address");
796
797 /* get source address and port */
798 nport = 0;
799 if (pf_get_sport(pd, r, &nsaddr, &nport,
800 r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
801 DPFPRINTF(LOG_NOTICE,
802 "pf: af-to NAT proxy port allocation (%u-%u) failed",
803 r->nat.proxy_port[0],
804 r->nat.proxy_port[1]);
805 return (-1);
806 }
807 pd->nsport = nport;
808
809 if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
810 if (pd->dir == PF_IN) {
811 pd->ndport = ntohs(pd->ndport);
812 if (pd->ndport == ICMP6_ECHO_REQUEST)
813 pd->ndport = ICMP_ECHO;
814 else if (pd->ndport == ICMP6_ECHO_REPLY)
815 pd->ndport = ICMP_ECHOREPLY;
816 pd->ndport = htons(pd->ndport);
817 } else {
818 pd->nsport = ntohs(pd->nsport);
819 if (pd->nsport == ICMP6_ECHO_REQUEST)
820 pd->nsport = ICMP_ECHO;
821 else if (pd->nsport == ICMP6_ECHO_REPLY)
822 pd->nsport = ICMP_ECHOREPLY;
823 pd->nsport = htons(pd->nsport);
824 }
825 } else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
826 if (pd->dir == PF_IN) {
827 pd->ndport = ntohs(pd->ndport);
828 if (pd->ndport == ICMP_ECHO)
829 pd->ndport = ICMP6_ECHO_REQUEST;
830 else if (pd->ndport == ICMP_ECHOREPLY)
831 pd->ndport = ICMP6_ECHO_REPLY;
832 pd->ndport = htons(pd->ndport);
833 } else {
834 pd->nsport = ntohs(pd->nsport);
835 if (pd->nsport == ICMP_ECHO)
836 pd->nsport = ICMP6_ECHO_REQUEST;
837 else if (pd->nsport == ICMP_ECHOREPLY)
838 pd->nsport = ICMP6_ECHO_REPLY;
839 pd->nsport = htons(pd->nsport);
840 }
841 }
842
843 /* get the destination address and port */
844 if (r->rdr.addr.type != PF_ADDR_NONE) {
845 if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
846 &r->rdr, PF_SN_RDR))
847 return (-1);
848 if (r->rdr.proxy_port[0])
849 pd->ndport = htons(r->rdr.proxy_port[0]);
850
851 if (pd->naf == AF_INET) {
852 /* The prefix is the IPv4 rdr address */
853 prefixlen = in_mask2len((struct in_addr *)
854 &r->rdr.addr.v.a.mask);
855 inet_nat46(pd->naf, &pd->ndaddr,
856 &ndaddr, &naddr, prefixlen);
857 } else {
858 /* The prefix is the IPv6 rdr address */
859 prefixlen =
860 in6_mask2len((struct in6_addr *)
861 &r->rdr.addr.v.a.mask, NULL);
862 inet_nat64(pd->naf, &pd->ndaddr,
863 &ndaddr, &naddr, prefixlen);
864 }
865 } else {
866 if (pd->naf == AF_INET) {
867 /* The prefix is the IPv6 dst address */
868 prefixlen =
869 in6_mask2len((struct in6_addr *)
870 &r->dst.addr.v.a.mask, NULL);
871 if (prefixlen < 32)
872 prefixlen = 96;
873 inet_nat64(pd->naf, &pd->ndaddr,
874 &ndaddr, &pd->ndaddr, prefixlen);
875 } else {
876 /*
877 * The prefix is the IPv6 nat address
878 * (that was stored in pd->nsaddr)
879 */
880 prefixlen = in6_mask2len((struct in6_addr *)
881 &r->nat.addr.v.a.mask, NULL);
882 if (prefixlen > 96)
883 prefixlen = 96;
884 inet_nat64(pd->naf, &pd->ndaddr,
885 &ndaddr, &nsaddr, prefixlen);
886 }
887 }
888
889 pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
890 pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
891
892 if (pf_status.debug >= LOG_INFO) {
893 log(LOG_INFO, "pf: af-to %s %s done, prefixlen %d, ",
894 pd->naf == AF_INET ? "inet" : "inet6",
895 r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
896 prefixlen);
897 pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
898 addlog(" -> ");
899 pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
900 addlog("\n");
901 }
902
903 return (0);
904 }
905 #endif /* INET6 */
906
907 int
pf_postprocess_addr(struct pf_state * cur)908 pf_postprocess_addr(struct pf_state *cur)
909 {
910 struct pf_rule *nr;
911 struct pf_state_key *sks;
912 struct pf_pool rpool;
913 struct pf_addr lookup_addr;
914 int slbcount = -1;
915
916 nr = cur->natrule.ptr;
917
918 if (nr == NULL)
919 return (0);
920
921 /* decrease counter */
922
923 sks = cur->key[PF_SK_STACK];
924
925 /* check for outgoing or ingoing balancing */
926 if (nr->rt == PF_ROUTETO)
927 lookup_addr = cur->rt_addr;
928 else if (sks != NULL)
929 lookup_addr = sks->addr[1];
930 else {
931 if (pf_status.debug >= LOG_DEBUG) {
932 log(LOG_DEBUG, "pf: %s: unable to obtain address",
933 __func__);
934 }
935 return (1);
936 }
937
938 /* check for appropriate pool */
939 if (nr->rdr.addr.type != PF_ADDR_NONE)
940 rpool = nr->rdr;
941 else if (nr->nat.addr.type != PF_ADDR_NONE)
942 rpool = nr->nat;
943 else if (nr->route.addr.type != PF_ADDR_NONE)
944 rpool = nr->route;
945 else
946 return (0);
947
948 if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
949 return (0);
950
951 if (rpool.addr.type == PF_ADDR_TABLE) {
952 if ((slbcount = pfr_states_decrease(
953 rpool.addr.p.tbl,
954 &lookup_addr, sks->af)) == -1) {
955 if (pf_status.debug >= LOG_DEBUG) {
956 log(LOG_DEBUG, "pf: %s: selected address ",
957 __func__);
958 pf_print_host(&lookup_addr,
959 sks->port[0], sks->af);
960 addlog(". Failed to "
961 "decrease count!\n");
962 }
963 return (1);
964 }
965 } else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
966 if ((slbcount = pfr_states_decrease(
967 rpool.addr.p.dyn->pfid_kt,
968 &lookup_addr, sks->af)) == -1) {
969 if (pf_status.debug >= LOG_DEBUG) {
970 log(LOG_DEBUG, "pf: %s: selected address ",
971 __func__);
972 pf_print_host(&lookup_addr,
973 sks->port[0], sks->af);
974 addlog(". Failed to "
975 "decrease count!\n");
976 }
977 return (1);
978 }
979 }
980 if (slbcount > -1) {
981 if (pf_status.debug >= LOG_INFO) {
982 log(LOG_INFO, "pf: %s: selected address ", __func__);
983 pf_print_host(&lookup_addr, sks->port[0],
984 sks->af);
985 addlog(" decreased state count to %u\n",
986 slbcount);
987 }
988 }
989 return (0);
990 }
991