xref: /openbsd/sys/net/pf_lb.c (revision 274d7c50)
1 /*	$OpenBSD: pf_lb.c,v 1.64 2019/07/02 09:04:53 yasuoka Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/rwlock.h>
53 #include <sys/syslog.h>
54 #include <sys/stdint.h>
55 
56 #include <crypto/siphash.h>
57 
58 #include <net/if.h>
59 #include <net/bpf.h>
60 #include <net/route.h>
61 
62 #include <netinet/in.h>
63 #include <netinet/ip.h>
64 #include <netinet/in_pcb.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/ip_icmp.h>
67 #include <netinet/icmp_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_seq.h>
70 #include <netinet/tcp_timer.h>
71 #include <netinet/udp.h>
72 #include <netinet/udp_var.h>
73 #include <netinet/if_ether.h>
74 
75 #ifdef INET6
76 #include <netinet/ip6.h>
77 #include <netinet/icmp6.h>
78 #endif /* INET6 */
79 
80 #include <net/pfvar.h>
81 #include <net/pfvar_priv.h>
82 
83 #if NPFLOG > 0
84 #include <net/if_pflog.h>
85 #endif	/* NPFLOG > 0 */
86 
87 #if NPFLOW > 0
88 #include <net/if_pflow.h>
89 #endif	/* NPFLOW > 0 */
90 
91 #if NPFSYNC > 0
92 #include <net/if_pfsync.h>
93 #endif /* NPFSYNC > 0 */
94 
95 u_int64_t		 pf_hash(struct pf_addr *, struct pf_addr *,
96 			    struct pf_poolhashkey *, sa_family_t);
97 int			 pf_get_sport(struct pf_pdesc *, struct pf_rule *,
98 			    struct pf_addr *, u_int16_t *, u_int16_t,
99 			    u_int16_t, struct pf_src_node **);
100 int			 pf_get_transaddr_af(struct pf_rule *,
101 			    struct pf_pdesc *, struct pf_src_node **);
102 int			 pf_map_addr_sticky(sa_family_t, struct pf_rule *,
103 			    struct pf_addr *, struct pf_addr *,
104 			    struct pf_src_node **, struct pf_pool *,
105 			    enum pf_sn_types);
106 
107 u_int64_t
108 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
109     struct pf_poolhashkey *key, sa_family_t af)
110 {
111 	uint64_t res = 0;
112 #ifdef INET6
113 	union {
114 		uint64_t hash64;
115 		uint32_t hash32[2];
116 	} h;
117 #endif	/* INET6 */
118 
119 	switch (af) {
120 	case AF_INET:
121 		res = SipHash24((SIPHASH_KEY *)key,
122 		    &inaddr->addr32[0], sizeof(inaddr->addr32[0]));
123 		hash->addr32[0] = res;
124 		break;
125 #ifdef INET6
126 	case AF_INET6:
127 		res = SipHash24((SIPHASH_KEY *)key, &inaddr->addr32[0],
128 		    4 * sizeof(inaddr->addr32[0]));
129 		h.hash64 = res;
130 		hash->addr32[0] = h.hash32[0];
131 		hash->addr32[1] = h.hash32[1];
132 		/*
133 		 * siphash isn't big enough, but flipping it around is
134 		 * good enough here.
135 		 */
136 		hash->addr32[2] = ~h.hash32[1];
137 		hash->addr32[3] = ~h.hash32[0];
138 		break;
139 #endif /* INET6 */
140 	default:
141 		unhandled_af(af);
142 	}
143 	return (res);
144 }
145 
146 int
147 pf_get_sport(struct pf_pdesc *pd, struct pf_rule *r,
148     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
149     struct pf_src_node **sn)
150 {
151 	struct pf_state_key_cmp	key;
152 	struct pf_addr		init_addr;
153 	u_int16_t		cut;
154 	int			dir = (pd->dir == PF_IN) ? PF_OUT : PF_IN;
155 	int			sidx = pd->sidx;
156 	int			didx = pd->didx;
157 
158 	memset(&init_addr, 0, sizeof(init_addr));
159 	if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr, &init_addr, sn, &r->nat,
160 	    PF_SN_NAT))
161 		return (1);
162 
163 	if (pd->proto == IPPROTO_ICMP) {
164 		if (pd->ndport == htons(ICMP_ECHO)) {
165 			low = 1;
166 			high = 65535;
167 		} else
168 			return (0);	/* Don't try to modify non-echo ICMP */
169 	}
170 #ifdef INET6
171 	if (pd->proto == IPPROTO_ICMPV6) {
172 		if (pd->ndport == htons(ICMP6_ECHO_REQUEST)) {
173 			low = 1;
174 			high = 65535;
175 		} else
176 			return (0);	/* Don't try to modify non-echo ICMP */
177 	}
178 #endif /* INET6 */
179 
180 	do {
181 		key.af = pd->naf;
182 		key.proto = pd->proto;
183 		key.rdomain = pd->rdomain;
184 		pf_addrcpy(&key.addr[didx], &pd->ndaddr, key.af);
185 		pf_addrcpy(&key.addr[sidx], naddr, key.af);
186 		key.port[didx] = pd->ndport;
187 
188 		/*
189 		 * port search; start random, step;
190 		 * similar 2 portloop in in_pcbbind
191 		 */
192 		if (!(pd->proto == IPPROTO_TCP || pd->proto == IPPROTO_UDP ||
193 		    pd->proto == IPPROTO_ICMP || pd->proto == IPPROTO_ICMPV6)) {
194 			/* XXX bug: icmp states dont use the id on both
195 			 * XXX sides (traceroute -I through nat) */
196 			key.port[sidx] = pd->nsport;
197 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
198 				*nport = pd->nsport;
199 				return (0);
200 			}
201 		} else if (low == 0 && high == 0) {
202 			key.port[sidx] = pd->nsport;
203 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
204 				*nport = pd->nsport;
205 				return (0);
206 			}
207 		} else if (low == high) {
208 			key.port[sidx] = htons(low);
209 			if (pf_find_state_all(&key, dir, NULL) == NULL) {
210 				*nport = htons(low);
211 				return (0);
212 			}
213 		} else {
214 			u_int32_t tmp;
215 
216 			if (low > high) {
217 				tmp = low;
218 				low = high;
219 				high = tmp;
220 			}
221 			/* low < high */
222 			cut = arc4random_uniform(1 + high - low) + low;
223 			/* low <= cut <= high */
224 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
225 				key.port[sidx] = htons(tmp);
226 				if (pf_find_state_all(&key, dir, NULL) ==
227 				    NULL && !in_baddynamic(tmp, pd->proto)) {
228 					*nport = htons(tmp);
229 					return (0);
230 				}
231 			}
232 			tmp = cut;
233 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
234 				key.port[sidx] = htons(tmp);
235 				if (pf_find_state_all(&key, dir, NULL) ==
236 				    NULL && !in_baddynamic(tmp, pd->proto)) {
237 					*nport = htons(tmp);
238 					return (0);
239 				}
240 			}
241 		}
242 
243 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
244 		case PF_POOL_RANDOM:
245 		case PF_POOL_ROUNDROBIN:
246 		case PF_POOL_LEASTSTATES:
247 			/*
248 			 * pick a different source address since we're out
249 			 * of free port choices for the current one.
250 			 */
251 			if (pf_map_addr(pd->naf, r, &pd->nsaddr, naddr,
252 			    &init_addr, sn, &r->nat, PF_SN_NAT))
253 				return (1);
254 			break;
255 		case PF_POOL_NONE:
256 		case PF_POOL_SRCHASH:
257 		case PF_POOL_BITMASK:
258 		default:
259 			return (1);
260 		}
261 	} while (! PF_AEQ(&init_addr, naddr, pd->naf) );
262 	return (1);					/* none available */
263 }
264 
265 int
266 pf_map_addr_sticky(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
267     struct pf_addr *naddr, struct pf_src_node **sns, struct pf_pool *rpool,
268     enum pf_sn_types type)
269 {
270 	struct pf_addr		*raddr, *rmask, *cached;
271 	struct pf_state		*s;
272 	struct pf_src_node	 k;
273 	int			 valid;
274 
275 	k.af = af;
276 	k.type = type;
277 	pf_addrcpy(&k.addr, saddr, af);
278 	k.rule.ptr = r;
279 	pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
280 	sns[type] = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
281 	if (sns[type] == NULL)
282 		return (-1);
283 
284 	/* check if the cached entry is still valid */
285 	cached = &(sns[type])->raddr;
286 	valid = 0;
287 	if (PF_AZERO(cached, af)) {
288 		valid = 1;
289 	} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
290 		if (pfr_kentry_byaddr(rpool->addr.p.dyn->pfid_kt, cached,
291 		    af, 0))
292 			valid = 1;
293 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
294 		if (pfr_kentry_byaddr(rpool->addr.p.tbl, cached, af, 0))
295 			valid = 1;
296 	} else if (rpool->addr.type != PF_ADDR_NOROUTE) {
297 		raddr = &rpool->addr.v.a.addr;
298 		rmask = &rpool->addr.v.a.mask;
299 		valid = pf_match_addr(0, raddr, rmask, cached, af);
300 	}
301 	if (!valid) {
302 		if (pf_status.debug >= LOG_DEBUG) {
303 			log(LOG_DEBUG, "pf: pf_map_addr: "
304 			    "stale src tracking (%u) ", type);
305 			pf_print_host(&k.addr, 0, af);
306 			addlog(" to ");
307 			pf_print_host(cached, 0, af);
308 			addlog("\n");
309 		}
310 		if (sns[type]->states != 0) {
311 			/* XXX expensive */
312 			RB_FOREACH(s, pf_state_tree_id,
313 			   &tree_id)
314 				pf_state_rm_src_node(s,
315 				    sns[type]);
316 		}
317 		sns[type]->expire = 1;
318 		pf_remove_src_node(sns[type]);
319 		sns[type] = NULL;
320 		return (-1);
321 	}
322 	if (!PF_AZERO(cached, af))
323 		pf_addrcpy(naddr, cached, af);
324 	if (pf_status.debug >= LOG_DEBUG) {
325 		log(LOG_DEBUG, "pf: pf_map_addr: "
326 		    "src tracking (%u) maps ", type);
327 		pf_print_host(&k.addr, 0, af);
328 		addlog(" to ");
329 		pf_print_host(naddr, 0, af);
330 		addlog("\n");
331 	}
332 
333 	if (sns[type]->kif != NULL)
334 		rpool->kif = sns[type]->kif;
335 
336 	return (0);
337 }
338 
339 int
340 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
341     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sns,
342     struct pf_pool *rpool, enum pf_sn_types type)
343 {
344 	unsigned char		 hash[16];
345 	struct pf_addr		 faddr;
346 	struct pf_addr		*raddr = &rpool->addr.v.a.addr;
347 	struct pf_addr		*rmask = &rpool->addr.v.a.mask;
348 	u_int64_t		 states;
349 	u_int16_t		 weight;
350 	u_int64_t		 load;
351 	u_int64_t		 cload;
352 	u_int64_t		 hashidx;
353 	int			 cnt;
354 
355 	if (sns[type] == NULL && rpool->opts & PF_POOL_STICKYADDR &&
356 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE &&
357 	    pf_map_addr_sticky(af, r, saddr, naddr, sns, rpool, type) == 0)
358 		return (0);
359 
360 	if (rpool->addr.type == PF_ADDR_NOROUTE)
361 		return (1);
362 	if (rpool->addr.type == PF_ADDR_DYNIFTL) {
363 		switch (af) {
364 		case AF_INET:
365 			if (rpool->addr.p.dyn->pfid_acnt4 < 1 &&
366 			    !PF_POOL_DYNTYPE(rpool->opts))
367 				return (1);
368 			raddr = &rpool->addr.p.dyn->pfid_addr4;
369 			rmask = &rpool->addr.p.dyn->pfid_mask4;
370 			break;
371 #ifdef INET6
372 		case AF_INET6:
373 			if (rpool->addr.p.dyn->pfid_acnt6 < 1 &&
374 			    !PF_POOL_DYNTYPE(rpool->opts))
375 				return (1);
376 			raddr = &rpool->addr.p.dyn->pfid_addr6;
377 			rmask = &rpool->addr.p.dyn->pfid_mask6;
378 			break;
379 #endif /* INET6 */
380 		default:
381 			unhandled_af(af);
382 		}
383 	} else if (rpool->addr.type == PF_ADDR_TABLE) {
384 		if (!PF_POOL_DYNTYPE(rpool->opts))
385 			return (1); /* unsupported */
386 	} else {
387 		raddr = &rpool->addr.v.a.addr;
388 		rmask = &rpool->addr.v.a.mask;
389 	}
390 
391 	switch (rpool->opts & PF_POOL_TYPEMASK) {
392 	case PF_POOL_NONE:
393 		pf_addrcpy(naddr, raddr, af);
394 		break;
395 	case PF_POOL_BITMASK:
396 		pf_poolmask(naddr, raddr, rmask, saddr, af);
397 		break;
398 	case PF_POOL_RANDOM:
399 		if (rpool->addr.type == PF_ADDR_TABLE) {
400 			cnt = rpool->addr.p.tbl->pfrkt_cnt;
401 			if (cnt == 0)
402 				rpool->tblidx = 0;
403 			else
404 				rpool->tblidx = (int)arc4random_uniform(cnt);
405 			memset(&rpool->counter, 0, sizeof(rpool->counter));
406 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
407 				return (1);
408 			pf_addrcpy(naddr, &rpool->counter, af);
409 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
410 			cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
411 			if (cnt == 0)
412 				rpool->tblidx = 0;
413 			else
414 				rpool->tblidx = (int)arc4random_uniform(cnt);
415 			memset(&rpool->counter, 0, sizeof(rpool->counter));
416 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
417 				return (1);
418 			pf_addrcpy(naddr, &rpool->counter, af);
419 		} else if (init_addr != NULL && PF_AZERO(init_addr, af)) {
420 			switch (af) {
421 			case AF_INET:
422 				rpool->counter.addr32[0] = arc4random();
423 				break;
424 #ifdef INET6
425 			case AF_INET6:
426 				if (rmask->addr32[3] != 0xffffffff)
427 					rpool->counter.addr32[3] = arc4random();
428 				else
429 					break;
430 				if (rmask->addr32[2] != 0xffffffff)
431 					rpool->counter.addr32[2] = arc4random();
432 				else
433 					break;
434 				if (rmask->addr32[1] != 0xffffffff)
435 					rpool->counter.addr32[1] = arc4random();
436 				else
437 					break;
438 				if (rmask->addr32[0] != 0xffffffff)
439 					rpool->counter.addr32[0] = arc4random();
440 				break;
441 #endif /* INET6 */
442 			default:
443 				unhandled_af(af);
444 			}
445 			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
446 			pf_addrcpy(init_addr, naddr, af);
447 
448 		} else {
449 			pf_addr_inc(&rpool->counter, af);
450 			pf_poolmask(naddr, raddr, rmask, &rpool->counter, af);
451 		}
452 		break;
453 	case PF_POOL_SRCHASH:
454 		hashidx =
455 		    pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
456 		if (rpool->addr.type == PF_ADDR_TABLE) {
457 			cnt = rpool->addr.p.tbl->pfrkt_cnt;
458 			if (cnt == 0)
459 				rpool->tblidx = 0;
460 			else
461 				rpool->tblidx = (int)(hashidx % cnt);
462 			memset(&rpool->counter, 0, sizeof(rpool->counter));
463 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
464 				return (1);
465 			pf_addrcpy(naddr, &rpool->counter, af);
466 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
467 			cnt = rpool->addr.p.dyn->pfid_kt->pfrkt_cnt;
468 			if (cnt == 0)
469 				rpool->tblidx = 0;
470 			else
471 				rpool->tblidx = (int)(hashidx % cnt);
472 			memset(&rpool->counter, 0, sizeof(rpool->counter));
473 			if (pfr_pool_get(rpool, &raddr, &rmask, af))
474 				return (1);
475 			pf_addrcpy(naddr, &rpool->counter, af);
476 		} else {
477 			pf_poolmask(naddr, raddr, rmask,
478 			    (struct pf_addr *)&hash, af);
479 		}
480 		break;
481 	case PF_POOL_ROUNDROBIN:
482 		if (rpool->addr.type == PF_ADDR_TABLE ||
483 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
484 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
485 				/*
486 				 * reset counter in case its value
487 				 * has been removed from the pool.
488 				 */
489 				memset(&rpool->counter, 0,
490 				    sizeof(rpool->counter));
491 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
492 					return (1);
493 			}
494 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
495 			return (1);
496 
497 		/* iterate over table if it contains entries which are weighted */
498 		if ((rpool->addr.type == PF_ADDR_TABLE &&
499 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
500 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
501 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0)) {
502 			do {
503 				if (rpool->addr.type == PF_ADDR_TABLE ||
504 				    rpool->addr.type == PF_ADDR_DYNIFTL) {
505 					if (pfr_pool_get(rpool,
506 					    &raddr, &rmask, af))
507 						return (1);
508 				} else {
509 					log(LOG_ERR, "pf: pf_map_addr: "
510 					    "weighted RR failure");
511 					return (1);
512 				}
513 				if (rpool->weight >= rpool->curweight)
514 					break;
515 				pf_addr_inc(&rpool->counter, af);
516 			} while (1);
517 
518 			weight = rpool->weight;
519 		}
520 
521 		pf_addrcpy(naddr, &rpool->counter, af);
522 		if (init_addr != NULL && PF_AZERO(init_addr, af))
523 			pf_addrcpy(init_addr, naddr, af);
524 		pf_addr_inc(&rpool->counter, af);
525 		break;
526 	case PF_POOL_LEASTSTATES:
527 		/* retrieve an address first */
528 		if (rpool->addr.type == PF_ADDR_TABLE ||
529 		    rpool->addr.type == PF_ADDR_DYNIFTL) {
530 			if (pfr_pool_get(rpool, &raddr, &rmask, af)) {
531 				/* see PF_POOL_ROUNDROBIN */
532 				memset(&rpool->counter, 0,
533 				    sizeof(rpool->counter));
534 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
535 					return (1);
536 			}
537 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
538 			return (1);
539 
540 		states = rpool->states;
541 		weight = rpool->weight;
542 
543 		if ((rpool->addr.type == PF_ADDR_TABLE &&
544 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
545 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
546 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
547 			load = ((UINT16_MAX * rpool->states) / rpool->weight);
548 		else
549 			load = states;
550 
551 		pf_addrcpy(&faddr, &rpool->counter, af);
552 
553 		pf_addrcpy(naddr, &rpool->counter, af);
554 		if (init_addr != NULL && PF_AZERO(init_addr, af))
555 			pf_addrcpy(init_addr, naddr, af);
556 
557 		/*
558 		 * iterate *once* over whole table and find destination with
559 		 * least connection
560 		 */
561 		do  {
562 			pf_addr_inc(&rpool->counter, af);
563 			if (rpool->addr.type == PF_ADDR_TABLE ||
564 			    rpool->addr.type == PF_ADDR_DYNIFTL) {
565 				if (pfr_pool_get(rpool, &raddr, &rmask, af))
566 					return (1);
567 			} else if (pf_match_addr(0, raddr, rmask,
568 			    &rpool->counter, af))
569 				return (1);
570 
571 			if ((rpool->addr.type == PF_ADDR_TABLE &&
572 			    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
573 			    (rpool->addr.type == PF_ADDR_DYNIFTL &&
574 			    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
575 				cload = ((UINT16_MAX * rpool->states)
576 					/ rpool->weight);
577 			else
578 				cload = rpool->states;
579 
580 			/* find lc minimum */
581 			if (cload < load) {
582 				states = rpool->states;
583 				weight = rpool->weight;
584 				load = cload;
585 
586 				pf_addrcpy(naddr, &rpool->counter, af);
587 				if (init_addr != NULL &&
588 				    PF_AZERO(init_addr, af))
589 				    pf_addrcpy(init_addr, naddr, af);
590 			}
591 		} while (pf_match_addr(1, &faddr, rmask, &rpool->counter, af) &&
592 		    (states > 0));
593 
594 		if (rpool->addr.type == PF_ADDR_TABLE) {
595 			if (pfr_states_increase(rpool->addr.p.tbl,
596 			    naddr, af) == -1) {
597 				if (pf_status.debug >= LOG_DEBUG) {
598 					log(LOG_DEBUG,"pf: pf_map_addr: "
599 					    "selected address ");
600 					pf_print_host(naddr, 0, af);
601 					addlog(". Failed to increase count!\n");
602 				}
603 				return (1);
604 			}
605 		} else if (rpool->addr.type == PF_ADDR_DYNIFTL) {
606 			if (pfr_states_increase(rpool->addr.p.dyn->pfid_kt,
607 			    naddr, af) == -1) {
608 				if (pf_status.debug >= LOG_DEBUG) {
609 					log(LOG_DEBUG, "pf: pf_map_addr: "
610 					    "selected address ");
611 					pf_print_host(naddr, 0, af);
612 					addlog(". Failed to increase count!\n");
613 				}
614 				return (1);
615 			}
616 		}
617 		break;
618 	}
619 
620 	if (rpool->opts & PF_POOL_STICKYADDR) {
621 		if (sns[type] != NULL) {
622 			pf_remove_src_node(sns[type]);
623 			sns[type] = NULL;
624 		}
625 		if (pf_insert_src_node(&sns[type], r, type, af, saddr, naddr,
626 		    rpool->kif))
627 			return (1);
628 	}
629 
630 	if (pf_status.debug >= LOG_INFO &&
631 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
632 		log(LOG_INFO, "pf: pf_map_addr: selected address ");
633 		pf_print_host(naddr, 0, af);
634 		if ((rpool->opts & PF_POOL_TYPEMASK) ==
635 		    PF_POOL_LEASTSTATES)
636 			addlog(" with state count %llu", states);
637 		if ((rpool->addr.type == PF_ADDR_TABLE &&
638 		    rpool->addr.p.tbl->pfrkt_refcntcost > 0) ||
639 		    (rpool->addr.type == PF_ADDR_DYNIFTL &&
640 		    rpool->addr.p.dyn->pfid_kt->pfrkt_refcntcost > 0))
641 			addlog(" with weight %u", weight);
642 		addlog("\n");
643 	}
644 
645 	return (0);
646 }
647 
648 int
649 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd,
650     struct pf_src_node **sns, struct pf_rule **nr)
651 {
652 	struct pf_addr	naddr;
653 	u_int16_t	nport;
654 
655 #ifdef INET6
656 	if (pd->af != pd->naf)
657 		return (pf_get_transaddr_af(r, pd, sns));
658 #endif /* INET6 */
659 
660 	if (r->nat.addr.type != PF_ADDR_NONE) {
661 		/* XXX is this right? what if rtable is changed at the same
662 		 * XXX time? where do I need to figure out the sport? */
663 		nport = 0;
664 		if (pf_get_sport(pd, r, &naddr, &nport,
665 		    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
666 			DPFPRINTF(LOG_NOTICE,
667 			    "pf: NAT proxy port allocation (%u-%u) failed",
668 			    r->nat.proxy_port[0],
669 			    r->nat.proxy_port[1]);
670 			return (-1);
671 		}
672 		*nr = r;
673 		pf_addrcpy(&pd->nsaddr, &naddr, pd->af);
674 		pd->nsport = nport;
675 	}
676 	if (r->rdr.addr.type != PF_ADDR_NONE) {
677 		if (pf_map_addr(pd->af, r, &pd->nsaddr, &naddr, NULL, sns,
678 		    &r->rdr, PF_SN_RDR))
679 			return (-1);
680 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
681 			pf_poolmask(&naddr, &naddr,  &r->rdr.addr.v.a.mask,
682 			    &pd->ndaddr, pd->af);
683 
684 		nport = 0;
685 		if (r->rdr.proxy_port[1]) {
686 			u_int32_t	tmp_nport;
687 
688 			tmp_nport = ((ntohs(pd->ndport) -
689 			    ntohs(r->dst.port[0])) %
690 			    (r->rdr.proxy_port[1] -
691 			    r->rdr.proxy_port[0] + 1)) +
692 			    r->rdr.proxy_port[0];
693 
694 			/* wrap around if necessary */
695 			if (tmp_nport > 65535)
696 				tmp_nport -= 65535;
697 			nport = htons((u_int16_t)tmp_nport);
698 		} else if (r->rdr.proxy_port[0])
699 			nport = htons(r->rdr.proxy_port[0]);
700 		*nr = r;
701 		pf_addrcpy(&pd->ndaddr, &naddr, pd->af);
702 		if (nport)
703 			pd->ndport = nport;
704 	}
705 
706 	return (0);
707 }
708 
709 #ifdef INET6
710 int
711 pf_get_transaddr_af(struct pf_rule *r, struct pf_pdesc *pd,
712     struct pf_src_node **sns)
713 {
714 	struct pf_addr	ndaddr, nsaddr, naddr;
715 	u_int16_t	nport;
716 	int		prefixlen = 96;
717 
718 	if (pf_status.debug >= LOG_INFO) {
719 		log(LOG_INFO, "pf: af-to %s %s, ",
720 		    pd->naf == AF_INET ? "inet" : "inet6",
721 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr");
722 		pf_print_host(&pd->nsaddr, pd->nsport, pd->af);
723 		addlog(" -> ");
724 		pf_print_host(&pd->ndaddr, pd->ndport, pd->af);
725 		addlog("\n");
726 	}
727 
728 	if (r->nat.addr.type == PF_ADDR_NONE)
729 		panic("pf_get_transaddr_af: no nat pool for source address");
730 
731 	/* get source address and port */
732 	nport = 0;
733 	if (pf_get_sport(pd, r, &nsaddr, &nport,
734 	    r->nat.proxy_port[0], r->nat.proxy_port[1], sns)) {
735 		DPFPRINTF(LOG_NOTICE,
736 		    "pf: af-to NAT proxy port allocation (%u-%u) failed",
737 		    r->nat.proxy_port[0],
738 		    r->nat.proxy_port[1]);
739 		return (-1);
740 	}
741 	pd->nsport = nport;
742 
743 	if (pd->proto == IPPROTO_ICMPV6 && pd->naf == AF_INET) {
744 		if (pd->dir == PF_IN) {
745 			pd->ndport = ntohs(pd->ndport);
746 			if (pd->ndport == ICMP6_ECHO_REQUEST)
747 				pd->ndport = ICMP_ECHO;
748 			else if (pd->ndport == ICMP6_ECHO_REPLY)
749 				pd->ndport = ICMP_ECHOREPLY;
750 			pd->ndport = htons(pd->ndport);
751 		} else {
752 			pd->nsport = ntohs(pd->nsport);
753 			if (pd->nsport == ICMP6_ECHO_REQUEST)
754 				pd->nsport = ICMP_ECHO;
755 			else if (pd->nsport == ICMP6_ECHO_REPLY)
756 				pd->nsport = ICMP_ECHOREPLY;
757 			pd->nsport = htons(pd->nsport);
758 		}
759 	} else if (pd->proto == IPPROTO_ICMP && pd->naf == AF_INET6) {
760 		if (pd->dir == PF_IN) {
761 			pd->ndport = ntohs(pd->ndport);
762 			if (pd->ndport == ICMP_ECHO)
763 				pd->ndport = ICMP6_ECHO_REQUEST;
764 			else if (pd->ndport == ICMP_ECHOREPLY)
765 				pd->ndport = ICMP6_ECHO_REPLY;
766 			pd->ndport = htons(pd->ndport);
767 		} else {
768 			pd->nsport = ntohs(pd->nsport);
769 			if (pd->nsport == ICMP_ECHO)
770 				pd->nsport = ICMP6_ECHO_REQUEST;
771 			else if (pd->nsport == ICMP_ECHOREPLY)
772 				pd->nsport = ICMP6_ECHO_REPLY;
773 			pd->nsport = htons(pd->nsport);
774 		}
775 	}
776 
777 	/* get the destination address and port */
778 	if (r->rdr.addr.type != PF_ADDR_NONE) {
779 		if (pf_map_addr(pd->naf, r, &nsaddr, &naddr, NULL, sns,
780 		    &r->rdr, PF_SN_RDR))
781 			return (-1);
782 		if (r->rdr.proxy_port[0])
783 			pd->ndport = htons(r->rdr.proxy_port[0]);
784 
785 		if (pd->naf == AF_INET) {
786 			/* The prefix is the IPv4 rdr address */
787 			prefixlen = in_mask2len((struct in_addr *)
788 			    &r->rdr.addr.v.a.mask);
789 			inet_nat46(pd->naf, &pd->ndaddr,
790 			    &ndaddr, &naddr, prefixlen);
791 		} else {
792 			/* The prefix is the IPv6 rdr address */
793 			prefixlen =
794 			    in6_mask2len((struct in6_addr *)
795 			    &r->rdr.addr.v.a.mask, NULL);
796 			inet_nat64(pd->naf, &pd->ndaddr,
797 			    &ndaddr, &naddr, prefixlen);
798 		}
799 	} else {
800 		if (pd->naf == AF_INET) {
801 			/* The prefix is the IPv6 dst address */
802 			prefixlen =
803 			    in6_mask2len((struct in6_addr *)
804 			    &r->dst.addr.v.a.mask, NULL);
805 			if (prefixlen < 32)
806 				prefixlen = 96;
807 			inet_nat64(pd->naf, &pd->ndaddr,
808 			    &ndaddr, &pd->ndaddr, prefixlen);
809 		} else {
810 			/*
811 			 * The prefix is the IPv6 nat address
812 			 * (that was stored in pd->nsaddr)
813 			 */
814 			prefixlen = in6_mask2len((struct in6_addr *)
815 			    &r->nat.addr.v.a.mask, NULL);
816 			if (prefixlen > 96)
817 				prefixlen = 96;
818 			inet_nat64(pd->naf, &pd->ndaddr,
819 			    &ndaddr, &nsaddr, prefixlen);
820 		}
821 	}
822 
823 	pf_addrcpy(&pd->nsaddr, &nsaddr, pd->naf);
824 	pf_addrcpy(&pd->ndaddr, &ndaddr, pd->naf);
825 
826 	if (pf_status.debug >= LOG_INFO) {
827 		log(LOG_INFO, "pf: af-to %s %s done, prefixlen %d, ",
828 		    pd->naf == AF_INET ? "inet" : "inet6",
829 		    r->rdr.addr.type == PF_ADDR_NONE ? "nat" : "rdr",
830 		    prefixlen);
831 		pf_print_host(&pd->nsaddr, pd->nsport, pd->naf);
832 		addlog(" -> ");
833 		pf_print_host(&pd->ndaddr, pd->ndport, pd->naf);
834 		addlog("\n");
835 	}
836 
837 	return (0);
838 }
839 #endif /* INET6 */
840 
841 int
842 pf_postprocess_addr(struct pf_state *cur)
843 {
844 	struct pf_rule		*nr;
845 	struct pf_state_key	*sks;
846 	struct pf_pool		 rpool;
847 	struct pf_addr		 lookup_addr;
848 	int			 slbcount = -1;
849 
850 	nr = cur->natrule.ptr;
851 
852 	if (nr == NULL)
853 		return (0);
854 
855 	/* decrease counter */
856 
857 	sks = cur->key[PF_SK_STACK];
858 
859 	/* check for outgoing or ingoing balancing */
860 	if (nr->rt == PF_ROUTETO)
861 		lookup_addr = cur->rt_addr;
862 	else if (sks != NULL)
863 		lookup_addr = sks->addr[1];
864 	else {
865 		if (pf_status.debug >= LOG_DEBUG) {
866 			log(LOG_DEBUG, "pf: %s: unable to obtain address",
867 			    __func__);
868 		}
869 		return (1);
870 	}
871 
872 	/* check for appropriate pool */
873 	if (nr->rdr.addr.type != PF_ADDR_NONE)
874 		rpool = nr->rdr;
875 	else if (nr->nat.addr.type != PF_ADDR_NONE)
876 		rpool = nr->nat;
877 	else if (nr->route.addr.type != PF_ADDR_NONE)
878 		rpool = nr->route;
879 	else
880 		return (0);
881 
882 	if (((rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_LEASTSTATES))
883 		return (0);
884 
885 	if (rpool.addr.type == PF_ADDR_TABLE) {
886 		if ((slbcount = pfr_states_decrease(
887 		    rpool.addr.p.tbl,
888 		    &lookup_addr, sks->af)) == -1) {
889 			if (pf_status.debug >= LOG_DEBUG) {
890 				log(LOG_DEBUG, "pf: %s: selected address ",
891 				    __func__);
892 				pf_print_host(&lookup_addr,
893 				    sks->port[0], sks->af);
894 				addlog(". Failed to "
895 				    "decrease count!\n");
896 			}
897 			return (1);
898 		}
899 	} else if (rpool.addr.type == PF_ADDR_DYNIFTL) {
900 		if ((slbcount = pfr_states_decrease(
901 		    rpool.addr.p.dyn->pfid_kt,
902 		    &lookup_addr, sks->af)) == -1) {
903 			if (pf_status.debug >= LOG_DEBUG) {
904 				log(LOG_DEBUG, "pf: %s: selected address ",
905 				    __func__);
906 				pf_print_host(&lookup_addr,
907 				    sks->port[0], sks->af);
908 				addlog(". Failed to "
909 				    "decrease count!\n");
910 			}
911 			return (1);
912 		}
913 	}
914 	if (slbcount > -1) {
915 		if (pf_status.debug >= LOG_INFO) {
916 			log(LOG_INFO, "pf: %s: selected address ", __func__);
917 			pf_print_host(&lookup_addr, sks->port[0],
918 			    sks->af);
919 			addlog(" decreased state count to %u\n",
920 			    slbcount);
921 		}
922 	}
923 	return (0);
924 }
925