xref: /openbsd/sys/net/pf_lb.c (revision 905646f0)
1 /*	$OpenBSD: pf_lb.c,v 1.67 2020/07/29 02:32:13 yasuoka Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/rwlock.h>
53 #include <sys/syslog.h>
54 #include <sys/stdint.h>
55 
56 #include <crypto/siphash.h>
57 
58 #include <net/if.h>
59 #include <net/bpf.h>
60 #include <net/route.h>
61 
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip_icmp.h>
67 #include <netinet/icmp_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_seq.h>
70 #include <netinet/tcp_timer.h>
71 #include <netinet/udp.h>
72 #include <netinet/udp_var.h>
73 #include <netinet/if_ether.h>
74 
75 #ifdef INET6
76 #include <netinet/ip6.h>
77 #include <netinet/icmp6.h>
78 #endif /* INET6 */
79 
80 #include <net/pfvar.h>
81 #include <net/pfvar_priv.h>
82 
83 #if NPFLOG > 0
84 #include <net/if_pflog.h>
85 #endif	/* NPFLOG > 0 */
86 
87 #if NPFLOW > 0
88 #include <net/if_pflow.h>
89 #endif	/* NPFLOW > 0 */
90 
91 #if NPFSYNC > 0
92 #include <net/if_pfsync.h>
93 #endif /* NPFSYNC > 0 */
94 
95 u_int64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
96 			    struct pf_poolhashkey *, sa_family_t);
97 int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
98 			    struct pf_addr *, u_int16_t *, u_int16_t,
99 			    u_int16_t, struct pf_src_node **);
100 int			 pf_map_addr_states_increase(sa_family_t,
101 				struct pf_pool *, struct pf_addr *);
102 int			 pf_get_transaddr_af(struct pf_rule *,
103 			    struct pf_pdesc *, struct pf_src_node **);
104 int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
105 			    struct pf_addr *, struct pf_addr *,
106 			    struct pf_src_node **, struct pf_pool *,
107 			    enum pf_sn_types);
108 
109 u_int64_t
110 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
111     struct pf_poolhashkey *key, sa_family_t af)
112 {
113 	uint64_t res = 0;
114 #ifdef INET6
115 	union {
116 		uint64_t hash64;
117 		uint32_t hash32[2];
118 	} h;
119 #endif	/* INET6 */
120 
121 	switch (af) {
122 	case AF_INET:
123 		res = SipHash24((SIPHASH_KEY *)key,
124 		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
125 		hash->addr32[0] = res;
126 		break;
127 #ifdef INET6
128 	case AF_INET6:
129 		res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
130 		    4 * sizeof(inaddr->addr32[0]));
131 		h.hash64 = res;
132 		hash->addr32[0] = h.hash32[0];
133 		hash->addr32[1] = h.hash32[1];
134 		/*
135 		 * siphash isn't big enough, but flipping it around is
136 		 * good enough here.
137 		 */
138 		hash->addr32[2] = ~h.hash32[1];
139 		hash->addr32[3] = ~h.hash32[0];
140 		break;
141 #endif /* INET6 */
142 	default:
143 		unhandled_af(af);
144 	}
145 	return (res);
146 }
147 
148 int
149 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
150     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
151     struct pf_src_node **sn)
152 {
153 	struct pf_state_key_cmp	key;
154 	struct pf_addr		init_addr;
155 	u_int16_t		cut;
156 	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
157 	int			sidx = pd->sidx;
158 	int			didx = pd->didx;
159 
160 	memset(&init_addr, 0, sizeof(init_addr));
161 	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
162 	    PF_SN_NAT))
163 		return (1);
164 
165 	if (pd->proto == IPPROTO_ICMP) {
166 		if (pd->ndport == htons(ICMP_ECHO)) {
167 			low = 1;
168 			high = 65535;
169 		} else
170 			return (0);	/* Don't try to modify non-echo ICMP */
171 	}
172 #ifdef INET6
173 	if (pd->proto == IPPROTO_ICMPV6) {
174 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
175 			low = 1;
176 			high = 65535;
177 		} else
178 			return (0);	/* Don't try to modify non-echo ICMP */
179 	}
180 #endif /* INET6 */
181 
182 	do {
183 		key.af = pd->naf;
184 		key.proto = pd->proto;
185 		key.rdomain = pd->rdomain;
186 		pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
187 		pf_addrcpy(&key.addr[sidx], naddr, key.af);
188 		key.port[didx] = pd->ndport;
189 
190 		/*
191 		 * port search; start random, step;
192 		 * similar 2 portloop in in_pcbbind
193 		 */
194 		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
195 		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
196 			/* XXX bug: icmp states dont use the id on both
197 			 * XXX sides (traceroute -I through nat) */
198 			key.port[sidx] = pd->nsport;
199 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
200 				*nport = pd->nsport;
201 				return (0);
202 			}
203 		} else if (low == 0 && high == 0) {
204 			key.port[sidx] = pd->nsport;
205 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
206 				*nport = pd->nsport;
207 				return (0);
208 			}
209 		} else if (low == high) {
210 			key.port[sidx] = htons(low);
211 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
212 				*nport = htons(low);
213 				return (0);
214 			}
215 		} else {
216 			u_int32_t tmp;
217 
218 			if (low > high) {
219 				tmp = low;
220 				low = high;
221 				high = tmp;
222 			}
223 			/* low < high */
224 			cut = arc4random_uniform(1 + high - low) + low;
225 			/* low <= cut <= high */
226 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
227 				key.port[sidx] = htons(tmp);
228 				if (pf_find_state_all(&key, dir, NULL) ==
229 				    NULL && !in_baddynamic(tmp, pd->proto)) {
230 					*nport = htons(tmp);
231 					return (0);
232 				}
233 			}
234 			tmp = cut;
235 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
236 				key.port[sidx] = htons(tmp);
237 				if (pf_find_state_all(&key, dir, NULL) ==
238 				    NULL && !in_baddynamic(tmp, pd->proto)) {
239 					*nport = htons(tmp);
240 					return (0);
241 				}
242 			}
243 		}
244 
245 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
246 		case PF_POOL_RANDOM:
247 		case PF_POOL_ROUNDROBIN:
248 		case PF_POOL_LEASTSTATES:
249 			/*
250 			 * pick a different source address since we're out
251 			 * of free port choices for the current one.
252 			 */
253 			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
254 			    &init_addr, sn, &r->nat, PF_SN_NAT))
255 				return (1);
256 			break;
257 		case PF_POOL_NONE:
258 		case PF_POOL_SRCHASH:
259 		case PF_POOL_BITMASK:
260 		default:
261 			return (1);
262 		}
263 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
264 	return (1);					/* none available */
265 }
266 
267 int
268 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
269     struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
270     enum pf_sn_types type)
271 {
272 	struct pf_addr		*raddr, *rmask, *cached;
273 	struct pf_state		*s;
274 	struct pf_src_node	 k;
275 	int			 valid;
276 
277 	k.af = af;
278 	k.type = type;
279 	pf_addrcpy(&k.addr, saddr, af);
280 	k.rule.ptr = r;
281 	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
282 	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
283 	if (sns[type] == NULL)
284 		return (-1);
285 
286 	/* check if the cached entry is still valid */
287 	cached = &(sns[type])->raddr;
288 	valid = 0;
289 	if (PF_AZERO(cached, af)) {
290 		valid = 1;
291 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
292 		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
293 		    af, 0))
294 			valid = 1;
295 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
296 		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
297 			valid = 1;
298 	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
299 		raddr = &rpool->addr.v.a.addr;
300 		rmask = &rpool->addr.v.a.mask;
301 		valid = pf_match_addr(0, raddr, rmask, cached, af);
302 	}
303 	if (!valid) {
304 		if (pf_status.debug >= LOG_DEBUG) {
305 			log(LOG_DEBUG, "pf: pf_map_addr: "
306 			    "stale src tracking (%u) ", type);
307 			pf_print_host(&k.addr, 0, af);
308 			addlog(" to ");
309 			pf_print_host(cached, 0, af);
310 			addlog("\n");
311 		}
312 		if (sns[type]->states != 0) {
313 			/* XXX expensive */
314 			RB_FOREACH(s, pf_state_tree_id,
315 			   &tree_id)
316 				pf_state_rm_src_node(s,
317 				    sns[type]);
318 		}
319 		sns[type]->expire = 1;
320 		pf_remove_src_node(sns[type]);
321 		sns[type] = NULL;
322 		return (-1);
323 	}
324 
325 
326 	if (!PF_AZERO(cached, af)) {
327 		pf_addrcpy(naddr, cached, af);
328 		if ((rpool->opts & PF_POOL_TYPEMASK) == PF_POOL_LEASTSTATES &&
329 		    pf_map_addr_states_increase(af, rpool, cached) == -1)
330 			return (-1);
331 	}
332 	if (pf_status.debug >= LOG_DEBUG) {
333 		log(LOG_DEBUG, "pf: pf_map_addr: "
334 		    "src tracking (%u) maps ", type);
335 		pf_print_host(&k.addr, 0, af);
336 		addlog(" to ");
337 		pf_print_host(naddr, 0, af);
338 		addlog("\n");
339 	}
340 
341 	if (sns[type]->kif != NULL)
342 		rpool->kif = sns[type]->kif;
343 
344 	return (0);
345 }
346 
347 int
348 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
349     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
350     struct pf_pool *rpool, enum pf_sn_types type)
351 {
352 	unsigned char		 hash[16];
353 	struct pf_addr		 faddr;
354 	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
355 	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
356 	struct pfr_ktable	*kt;
357 	struct pfi_kif		*kif;
358 	u_int64_t		 states;
359 	u_int16_t		 weight;
360 	u_int64_t		 load;
361 	u_int64_t		 cload;
362 	u_int64_t		 hashidx;
363 	int			 cnt;
364 
365 	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
366 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
367 	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
368 		return (0);
369 
370 	if (rpool->addr.type == PF_ADDR_NOROUTE)
371 		return (1);
372 	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
373 		switch (af) {
374 		case AF_INET:
375 			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
376 			    !PF_POOL_DYNTYPE(rpool->opts))
377 				return (1);
378 			raddr = &rpool->addr.p.dyn->pfid_addr4;
379 			rmask = &rpool->addr.p.dyn->pfid_mask4;
380 			break;
381 #ifdef INET6
382 		case AF_INET6:
383 			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
384 			    !PF_POOL_DYNTYPE(rpool->opts))
385 				return (1);
386 			raddr = &rpool->addr.p.dyn->pfid_addr6;
387 			rmask = &rpool->addr.p.dyn->pfid_mask6;
388 			break;
389 #endif /* INET6 */
390 		default:
391 			unhandled_af(af);
392 		}
393 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
394 		if (!PF_POOL_DYNTYPE(rpool->opts))
395 			return (1); /* unsupported */
396 	} else {
397 		raddr = &rpool->addr.v.a.addr;
398 		rmask = &rpool->addr.v.a.mask;
399 	}
400 
401 	switch (rpool->opts & PF_POOL_TYPEMASK) {
402 	case PF_POOL_NONE:
403 		pf_addrcpy(naddr, raddr, af);
404 		break;
405 	case PF_POOL_BITMASK:
406 		pf_poolmask(naddr, raddr, rmask, saddr, af);
407 		break;
408 	case PF_POOL_RANDOM:
409 		if (rpool->addr.type == PF_ADDR_TABLE ||
410 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
411 			if (rpool->addr.type == PF_ADDR_TABLE)
412 				kt = rpool->addr.p.tbl;
413 			else
414 				kt = rpool->addr.p.dyn->pfid_kt;
415 			kt = pfr_ktable_select_active(kt);
416 			if (kt == NULL)
417 				return (1);
418 
419 			cnt = kt->pfrkt_cnt;
420 			if (cnt == 0)
421 				rpool->tblidx = 0;
422 			else
423 				rpool->tblidx = (int)arc4random_uniform(cnt);
424 			memset(&rpool->counter, 0, sizeof(rpool->counter));
425 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
426 				return (1);
427 			pf_addrcpy(naddr, &rpool->counter, af);
428 		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
429 			switch (af) {
430 			case AF_INET:
431 				rpool->counter.addr32[0] = arc4random();
432 				break;
433 #ifdef INET6
434 			case AF_INET6:
435 				if (rmask->addr32[3] != 0xffffffff)
436 					rpool->counter.addr32[3] = arc4random();
437 				else
438 					break;
439 				if (rmask->addr32[2] != 0xffffffff)
440 					rpool->counter.addr32[2] = arc4random();
441 				else
442 					break;
443 				if (rmask->addr32[1] != 0xffffffff)
444 					rpool->counter.addr32[1] = arc4random();
445 				else
446 					break;
447 				if (rmask->addr32[0] != 0xffffffff)
448 					rpool->counter.addr32[0] = arc4random();
449 				break;
450 #endif /* INET6 */
451 			default:
452 				unhandled_af(af);
453 			}
454 			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
455 			pf_addrcpy(init_addr, naddr, af);
456 
457 		} else {
458 			pf_addr_inc(&rpool->counter, af);
459 			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
460 		}
461 		break;
462 	case PF_POOL_SRCHASH:
463 		hashidx =
464 		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
465 
466 		if (rpool->addr.type == PF_ADDR_TABLE ||
467 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
468 			if (rpool->addr.type == PF_ADDR_TABLE)
469 				kt = rpool->addr.p.tbl;
470 			else
471 				kt = rpool->addr.p.dyn->pfid_kt;
472 			kt = pfr_ktable_select_active(kt);
473 			if (kt == NULL)
474 				return (1);
475 
476 			cnt = kt->pfrkt_cnt;
477 			if (cnt == 0)
478 				rpool->tblidx = 0;
479 			else
480 				rpool->tblidx = (int)(hashidx % cnt);
481 			memset(&rpool->counter, 0, sizeof(rpool->counter));
482 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
483 				return (1);
484 			pf_addrcpy(naddr, &rpool->counter, af);
485 		} else {
486 			pf_poolmask(naddr, raddr, rmask,
487 			    (struct pf_addr *)&hash, af);
488 		}
489 		break;
490 	case PF_POOL_ROUNDROBIN:
491 		if (rpool->addr.type == PF_ADDR_TABLE ||
492 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
493 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
494 				/*
495 				 * reset counter in case its value
496 				 * has been removed from the pool.
497 				 */
498 				memset(&rpool->counter, 0,
499 				    sizeof(rpool->counter));
500 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
501 					return (1);
502 			}
503 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
504 			return (1);
505 
506 		/* iterate over table if it contains entries which are weighted */
507 		if ((rpool->addr.type == PF_ADDR_TABLE &&
508 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
509 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
510 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
511 			do {
512 				if (rpool->addr.type == PF_ADDR_TABLE ||
513 				    rpool->addr.type == PF_ADDR_DYNIFTL) {
514 					if (pfr_pool_get(rpool,
515 					    &raddr, &rmask, af))
516 						return (1);
517 				} else {
518 					log(LOG_ERR, "pf: pf_map_addr: "
519 					    "weighted RR failure");
520 					return (1);
521 				}
522 				if (rpool->weight >= rpool->curweight)
523 					break;
524 				pf_addr_inc(&rpool->counter, af);
525 			} while (1);
526 
527 			weight = rpool->weight;
528 		}
529 
530 		pf_addrcpy(naddr, &rpool->counter, af);
531 		if (init_addr != NULL && PF_AZERO(init_addr, af))
532 			pf_addrcpy(init_addr, naddr, af);
533 		pf_addr_inc(&rpool->counter, af);
534 		break;
535 	case PF_POOL_LEASTSTATES:
536 		/* retrieve an address first */
537 		if (rpool->addr.type == PF_ADDR_TABLE ||
538 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
539 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
540 				/* see PF_POOL_ROUNDROBIN */
541 				memset(&rpool->counter, 0,
542 				    sizeof(rpool->counter));
543 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
544 					return (1);
545 			}
546 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
547 			return (1);
548 
549 		states = rpool->states;
550 		weight = rpool->weight;
551 		kif = rpool->kif;
552 
553 		if ((rpool->addr.type == PF_ADDR_TABLE &&
554 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
555 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
556 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
557 			load = ((UINT16_MAX * rpool->states) / rpool->weight);
558 		else
559 			load = states;
560 
561 		pf_addrcpy(&faddr, &rpool->counter, af);
562 
563 		pf_addrcpy(naddr, &rpool->counter, af);
564 		if (init_addr != NULL && PF_AZERO(init_addr, af))
565 			pf_addrcpy(init_addr, naddr, af);
566 
567 		/*
568 		 * iterate *once* over whole table and find destination with
569 		 * least connection
570 		 */
571 		do  {
572 			pf_addr_inc(&rpool->counter, af);
573 			if (rpool->addr.type == PF_ADDR_TABLE ||
574 			    rpool->addr.type == PF_ADDR_DYNIFTL) {
575 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
576 					return (1);
577 			} else if (pf_match_addr(0, raddr, rmask,
578 			    &rpool->counter, af))
579 				return (1);
580 
581 			if ((rpool->addr.type == PF_ADDR_TABLE &&
582 			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
583 			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
584 			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
585 				cload = ((UINT16_MAX * rpool->states)
586 					/ rpool->weight);
587 			else
588 				cload = rpool->states;
589 
590 			/* find lc minimum */
591 			if (cload < load) {
592 				states = rpool->states;
593 				weight = rpool->weight;
594 				kif = rpool->kif;
595 				load = cload;
596 
597 				pf_addrcpy(naddr, &rpool->counter, af);
598 				if (init_addr != NULL &&
599 				    PF_AZERO(init_addr, af))
600 				    pf_addrcpy(init_addr, naddr, af);
601 			}
602 		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
603 		    (states > 0));
604 
605 		if (pf_map_addr_states_increase(af, rpool, naddr) == -1)
606 			return (1);
607 		/* revert the kif which was set by pfr_pool_get() */
608 		rpool->kif = kif;
609 		break;
610 	}
611 
612 	if (rpool->opts & PF_POOL_STICKYADDR) {
613 		if (sns[type] != NULL) {
614 			pf_remove_src_node(sns[type]);
615 			sns[type] = NULL;
616 		}
617 		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
618 		    rpool->kif))
619 			return (1);
620 	}
621 
622 	if (pf_status.debug >= LOG_INFO &&
623 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
624 		log(LOG_INFO, "pf: pf_map_addr: selected address ");
625 		pf_print_host(naddr, 0, af);
626 		if ((rpool->opts & PF_POOL_TYPEMASK) ==
627 		    PF_POOL_LEASTSTATES)
628 			addlog(" with state count %llu", states);
629 		if ((rpool->addr.type == PF_ADDR_TABLE &&
630 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
631 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
632 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
633 			addlog(" with weight %u", weight);
634 		addlog("\n");
635 	}
636 
637 	return (0);
638 }
639 
640 int
641 pf_map_addr_states_increase(sa_family_t af, struct pf_pool *rpool,
642     struct pf_addr *naddr)
643 {
644 	if (rpool->addr.type == PF_ADDR_TABLE) {
645 		if (pfr_states_increase(rpool->addr.p.tbl,
646 		    naddr, af) == -1) {
647 			if (pf_status.debug >= LOG_DEBUG) {
648 				log(LOG_DEBUG,
649 				    "pf: pf_map_addr_states_increase: "
650 				    "selected address ");
651 				pf_print_host(naddr, 0, af);
652 				addlog(". Failed to increase count!\n");
653 			}
654 			return (-1);
655 		}
656 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
657 		if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
658 		    naddr, af) == -1) {
659 			if (pf_status.debug >= LOG_DEBUG) {
660 				log(LOG_DEBUG,
661 				    "pf: pf_map_addr_states_increase: "
662 				    "selected address ");
663 				pf_print_host(naddr, 0, af);
664 				addlog(". Failed to increase count!\n");
665 			}
666 			return (-1);
667 		}
668 	}
669 	return (0);
670 }
671 
672 int
673 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
674     struct pf_src_node **sns, struct pf_rule **nr)
675 {
676 	struct pf_addr	naddr;
677 	u_int16_t	nport;
678 
679 #ifdef INET6
680 	if (pd->af != pd->naf)
681 		return (pf_get_transaddr_af(r, pd, sns));
682 #endif /* INET6 */
683 
684 	if (r->nat.addr.type != PF_ADDR_NONE) {
685 		/* XXX is this right? what if rtable is changed at the same
686 		 * XXX time? where do I need to figure out the sport? */
687 		nport = 0;
688 		if (pf_get_sport(pd, r, &naddr, &nport,
689 		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
690 			DPFPRINTF(LOG_NOTICE,
691 			    "pf: NAT proxy port allocation (%u-%u) failed",
692 			    r->nat.proxy_port[0],
693 			    r->nat.proxy_port[1]);
694 			return (-1);
695 		}
696 		*nr = r;
697 		pf_addrcpy(&pd->nsaddr, &naddr, pd->af);
698 		pd->nsport = nport;
699 	}
700 	if (r->rdr.addr.type != PF_ADDR_NONE) {
701 		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
702 		    &r->rdr, PF_SN_RDR))
703 			return (-1);
704 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
705 			pf_poolmask(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
706 			    &pd->ndaddr, pd->af);
707 
708 		nport = 0;
709 		if (r->rdr.proxy_port[1]) {
710 			u_int32_t	tmp_nport;
711 
712 			tmp_nport = ((ntohs(pd->ndport) -
713 			    ntohs(r->dst.port[0])) %
714 			    (r->rdr.proxy_port[1] -
715 			    r->rdr.proxy_port[0] + 1)) +
716 			    r->rdr.proxy_port[0];
717 
718 			/* wrap around if necessary */
719 			if (tmp_nport > 65535)
720 				tmp_nport -= 65535;
721 			nport = htons((u_int16_t)tmp_nport);
722 		} else if (r->rdr.proxy_port[0])
723 			nport = htons(r->rdr.proxy_port[0]);
724 		*nr = r;
725 		pf_addrcpy(&pd->ndaddr, &naddr, pd->af);
726 		if (nport)
727 			pd->ndport = nport;
728 	}
729 
730 	return (0);
731 }
732 
733 #ifdef INET6
734 int
735 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
736     struct pf_src_node **sns)
737 {
738 	struct pf_addr	ndaddr, nsaddr, naddr;
739 	u_int16_t	nport;
740 	int		prefixlen = 96;
741 
742 	if (pf_status.debug >= LOG_INFO) {
743 		log(LOG_INFO, "pf: af-to %s %s, ",
744 		    pd->naf == AF_INET ? "inet" : "inet6",
745 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
746 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
747 		addlog(" -> ");
748 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
749 		addlog("\n");
750 	}
751 
752 	if (r->nat.addr.type == PF_ADDR_NONE)
753 		panic("pf_get_transaddr_af: no nat pool for source address");
754 
755 	/* get source address and port */
756 	nport = 0;
757 	if (pf_get_sport(pd, r, &nsaddr, &nport,
758 	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
759 		DPFPRINTF(LOG_NOTICE,
760 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
761 		    r->nat.proxy_port[0],
762 		    r->nat.proxy_port[1]);
763 		return (-1);
764 	}
765 	pd->nsport = nport;
766 
767 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
768 		if (pd->dir == PF_IN) {
769 			pd->ndport = ntohs(pd->ndport);
770 			if (pd->ndport == ICMP6_ECHO_REQUEST)
771 				pd->ndport = ICMP_ECHO;
772 			else if (pd->ndport == ICMP6_ECHO_REPLY)
773 				pd->ndport = ICMP_ECHOREPLY;
774 			pd->ndport = htons(pd->ndport);
775 		} else {
776 			pd->nsport = ntohs(pd->nsport);
777 			if (pd->nsport == ICMP6_ECHO_REQUEST)
778 				pd->nsport = ICMP_ECHO;
779 			else if (pd->nsport == ICMP6_ECHO_REPLY)
780 				pd->nsport = ICMP_ECHOREPLY;
781 			pd->nsport = htons(pd->nsport);
782 		}
783 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
784 		if (pd->dir == PF_IN) {
785 			pd->ndport = ntohs(pd->ndport);
786 			if (pd->ndport == ICMP_ECHO)
787 				pd->ndport = ICMP6_ECHO_REQUEST;
788 			else if (pd->ndport == ICMP_ECHOREPLY)
789 				pd->ndport = ICMP6_ECHO_REPLY;
790 			pd->ndport = htons(pd->ndport);
791 		} else {
792 			pd->nsport = ntohs(pd->nsport);
793 			if (pd->nsport == ICMP_ECHO)
794 				pd->nsport = ICMP6_ECHO_REQUEST;
795 			else if (pd->nsport == ICMP_ECHOREPLY)
796 				pd->nsport = ICMP6_ECHO_REPLY;
797 			pd->nsport = htons(pd->nsport);
798 		}
799 	}
800 
801 	/* get the destination address and port */
802 	if (r->rdr.addr.type != PF_ADDR_NONE) {
803 		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
804 		    &r->rdr, PF_SN_RDR))
805 			return (-1);
806 		if (r->rdr.proxy_port[0])
807 			pd->ndport = htons(r->rdr.proxy_port[0]);
808 
809 		if (pd->naf == AF_INET) {
810 			/* The prefix is the IPv4 rdr address */
811 			prefixlen = in_mask2len((struct in_addr *)
812 			    &r->rdr.addr.v.a.mask);
813 			inet_nat46(pd->naf, &pd->ndaddr,
814 			    &ndaddr, &naddr, prefixlen);
815 		} else {
816 			/* The prefix is the IPv6 rdr address */
817 			prefixlen =
818 			    in6_mask2len((struct in6_addr *)
819 			    &r->rdr.addr.v.a.mask, NULL);
820 			inet_nat64(pd->naf, &pd->ndaddr,
821 			    &ndaddr, &naddr, prefixlen);
822 		}
823 	} else {
824 		if (pd->naf == AF_INET) {
825 			/* The prefix is the IPv6 dst address */
826 			prefixlen =
827 			    in6_mask2len((struct in6_addr *)
828 			    &r->dst.addr.v.a.mask, NULL);
829 			if (prefixlen < 32)
830 				prefixlen = 96;
831 			inet_nat64(pd->naf, &pd->ndaddr,
832 			    &ndaddr, &pd->ndaddr, prefixlen);
833 		} else {
834 			/*
835 			 * The prefix is the IPv6 nat address
836 			 * (that was stored in pd->nsaddr)
837 			 */
838 			prefixlen = in6_mask2len((struct in6_addr *)
839 			    &r->nat.addr.v.a.mask, NULL);
840 			if (prefixlen > 96)
841 				prefixlen = 96;
842 			inet_nat64(pd->naf, &pd->ndaddr,
843 			    &ndaddr, &nsaddr, prefixlen);
844 		}
845 	}
846 
847 	pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
848 	pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
849 
850 	if (pf_status.debug >= LOG_INFO) {
851 		log(LOG_INFO, "pf: af-to %s %s done, prefixlen %d, ",
852 		    pd->naf == AF_INET ? "inet" : "inet6",
853 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
854 		    prefixlen);
855 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
856 		addlog(" -> ");
857 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
858 		addlog("\n");
859 	}
860 
861 	return (0);
862 }
863 #endif /* INET6 */
864 
865 int
866 pf_postprocess_addr(struct pf_state *cur)
867 {
868 	struct pf_rule		*nr;
869 	struct pf_state_key	*sks;
870 	struct pf_pool		 rpool;
871 	struct pf_addr		 lookup_addr;
872 	int			 slbcount = -1;
873 
874 	nr = cur->natrule.ptr;
875 
876 	if (nr == NULL)
877 		return (0);
878 
879 	/* decrease counter */
880 
881 	sks = cur->key[PF_SK_STACK];
882 
883 	/* check for outgoing or ingoing balancing */
884 	if (nr->rt == PF_ROUTETO)
885 		lookup_addr = cur->rt_addr;
886 	else if (sks != NULL)
887 		lookup_addr = sks->addr[1];
888 	else {
889 		if (pf_status.debug >= LOG_DEBUG) {
890 			log(LOG_DEBUG, "pf: %s: unable to obtain address",
891 			    __func__);
892 		}
893 		return (1);
894 	}
895 
896 	/* check for appropriate pool */
897 	if (nr->rdr.addr.type != PF_ADDR_NONE)
898 		rpool = nr->rdr;
899 	else if (nr->nat.addr.type != PF_ADDR_NONE)
900 		rpool = nr->nat;
901 	else if (nr->route.addr.type != PF_ADDR_NONE)
902 		rpool = nr->route;
903 	else
904 		return (0);
905 
906 	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
907 		return (0);
908 
909 	if (rpool.addr.type == PF_ADDR_TABLE) {
910 		if ((slbcount = pfr_states_decrease(
911 		    rpool.addr.p.tbl,
912 		    &lookup_addr, sks->af)) == -1) {
913 			if (pf_status.debug >= LOG_DEBUG) {
914 				log(LOG_DEBUG, "pf: %s: selected address ",
915 				    __func__);
916 				pf_print_host(&lookup_addr,
917 				    sks->port[0], sks->af);
918 				addlog(". Failed to "
919 				    "decrease count!\n");
920 			}
921 			return (1);
922 		}
923 	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
924 		if ((slbcount = pfr_states_decrease(
925 		    rpool.addr.p.dyn->pfid_kt,
926 		    &lookup_addr, sks->af)) == -1) {
927 			if (pf_status.debug >= LOG_DEBUG) {
928 				log(LOG_DEBUG, "pf: %s: selected address ",
929 				    __func__);
930 				pf_print_host(&lookup_addr,
931 				    sks->port[0], sks->af);
932 				addlog(". Failed to "
933 				    "decrease count!\n");
934 			}
935 			return (1);
936 		}
937 	}
938 	if (slbcount > -1) {
939 		if (pf_status.debug >= LOG_INFO) {
940 			log(LOG_INFO, "pf: %s: selected address ", __func__);
941 			pf_print_host(&lookup_addr, sks->port[0],
942 			    sks->af);
943 			addlog(" decreased state count to %u\n",
944 			    slbcount);
945 		}
946 	}
947 	return (0);
948 }
949