xref: /freebsd/sys/netpfil/pf/pf_lb.c (revision 7d381d0a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  *	$OpenBSD: pf_lb.c,v 1.2 2009/02/12 02:13:15 sthen Exp $
37  */
38 
39 #include <sys/cdefs.h>
40 #include "opt_pf.h"
41 #include "opt_inet.h"
42 #include "opt_inet6.h"
43 
44 #include <sys/param.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/socket.h>
48 #include <sys/sysctl.h>
49 
50 #include <net/if.h>
51 #include <net/vnet.h>
52 #include <net/pfvar.h>
53 #include <net/if_pflog.h>
54 
55 /*
56  * Limit the amount of work we do to find a free source port for redirects that
57  * introduce a state conflict.
58  */
59 #define	V_pf_rdr_srcport_rewrite_tries	VNET(pf_rdr_srcport_rewrite_tries)
60 VNET_DEFINE_STATIC(int, pf_rdr_srcport_rewrite_tries) = 16;
61 
62 #define DPFPRINTF(n, x)	if (V_pf_status.debug >= (n)) printf x
63 
64 static void		 pf_hash(struct pf_addr *, struct pf_addr *,
65 			    struct pf_poolhashkey *, sa_family_t);
66 static struct pf_krule	*pf_match_translation(struct pf_pdesc *, struct mbuf *,
67 			    int, struct pfi_kkif *,
68 			    struct pf_addr *, u_int16_t, struct pf_addr *,
69 			    uint16_t, int, struct pf_kanchor_stackframe *);
70 static int pf_get_sport(sa_family_t, uint8_t, struct pf_krule *,
71     struct pf_addr *, uint16_t, struct pf_addr *, uint16_t, struct pf_addr *,
72     uint16_t *, uint16_t, uint16_t, struct pf_ksrc_node **,
73     struct pf_udp_mapping **);
74 static bool		 pf_islinklocal(const sa_family_t, const struct pf_addr *);
75 
76 #define mix(a,b,c) \
77 	do {					\
78 		a -= b; a -= c; a ^= (c >> 13);	\
79 		b -= c; b -= a; b ^= (a << 8);	\
80 		c -= a; c -= b; c ^= (b >> 13);	\
81 		a -= b; a -= c; a ^= (c >> 12);	\
82 		b -= c; b -= a; b ^= (a << 16);	\
83 		c -= a; c -= b; c ^= (b >> 5);	\
84 		a -= b; a -= c; a ^= (c >> 3);	\
85 		b -= c; b -= a; b ^= (a << 10);	\
86 		c -= a; c -= b; c ^= (b >> 15);	\
87 	} while (0)
88 
89 /*
90  * hash function based on bridge_hash in if_bridge.c
91  */
92 static void
pf_hash(struct pf_addr * inaddr,struct pf_addr * hash,struct pf_poolhashkey * key,sa_family_t af)93 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
94     struct pf_poolhashkey *key, sa_family_t af)
95 {
96 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
97 
98 	switch (af) {
99 #ifdef INET
100 	case AF_INET:
101 		a += inaddr->addr32[0];
102 		b += key->key32[1];
103 		mix(a, b, c);
104 		hash->addr32[0] = c + key->key32[2];
105 		break;
106 #endif /* INET */
107 #ifdef INET6
108 	case AF_INET6:
109 		a += inaddr->addr32[0];
110 		b += inaddr->addr32[2];
111 		mix(a, b, c);
112 		hash->addr32[0] = c;
113 		a += inaddr->addr32[1];
114 		b += inaddr->addr32[3];
115 		c += key->key32[1];
116 		mix(a, b, c);
117 		hash->addr32[1] = c;
118 		a += inaddr->addr32[2];
119 		b += inaddr->addr32[1];
120 		c += key->key32[2];
121 		mix(a, b, c);
122 		hash->addr32[2] = c;
123 		a += inaddr->addr32[3];
124 		b += inaddr->addr32[0];
125 		c += key->key32[3];
126 		mix(a, b, c);
127 		hash->addr32[3] = c;
128 		break;
129 #endif /* INET6 */
130 	}
131 }
132 
133 static struct pf_krule *
pf_match_translation(struct pf_pdesc * pd,struct mbuf * m,int off,struct pfi_kkif * kif,struct pf_addr * saddr,u_int16_t sport,struct pf_addr * daddr,uint16_t dport,int rs_num,struct pf_kanchor_stackframe * anchor_stack)134 pf_match_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
135     struct pfi_kkif *kif, struct pf_addr *saddr, u_int16_t sport,
136     struct pf_addr *daddr, uint16_t dport, int rs_num,
137     struct pf_kanchor_stackframe *anchor_stack)
138 {
139 	struct pf_krule		*r, *rm = NULL;
140 	struct pf_kruleset	*ruleset = NULL;
141 	int			 tag = -1;
142 	int			 rtableid = -1;
143 	int			 asd = 0;
144 
145 	r = TAILQ_FIRST(pf_main_ruleset.rules[rs_num].active.ptr);
146 	while (r != NULL) {
147 		struct pf_rule_addr	*src = NULL, *dst = NULL;
148 		struct pf_addr_wrap	*xdst = NULL;
149 
150 		if (r->action == PF_BINAT && pd->dir == PF_IN) {
151 			src = &r->dst;
152 			if (r->rpool.cur != NULL)
153 				xdst = &r->rpool.cur->addr;
154 		} else {
155 			src = &r->src;
156 			dst = &r->dst;
157 		}
158 
159 		pf_counter_u64_add(&r->evaluations, 1);
160 		if (pfi_kkif_match(r->kif, kif) == r->ifnot)
161 			r = r->skip[PF_SKIP_IFP].ptr;
162 		else if (r->direction && r->direction != pd->dir)
163 			r = r->skip[PF_SKIP_DIR].ptr;
164 		else if (r->af && r->af != pd->af)
165 			r = r->skip[PF_SKIP_AF].ptr;
166 		else if (r->proto && r->proto != pd->proto)
167 			r = r->skip[PF_SKIP_PROTO].ptr;
168 		else if (PF_MISMATCHAW(&src->addr, saddr, pd->af,
169 		    src->neg, kif, M_GETFIB(m)))
170 			r = r->skip[src == &r->src ? PF_SKIP_SRC_ADDR :
171 			    PF_SKIP_DST_ADDR].ptr;
172 		else if (src->port_op && !pf_match_port(src->port_op,
173 		    src->port[0], src->port[1], sport))
174 			r = r->skip[src == &r->src ? PF_SKIP_SRC_PORT :
175 			    PF_SKIP_DST_PORT].ptr;
176 		else if (dst != NULL &&
177 		    PF_MISMATCHAW(&dst->addr, daddr, pd->af, dst->neg, NULL,
178 		    M_GETFIB(m)))
179 			r = r->skip[PF_SKIP_DST_ADDR].ptr;
180 		else if (xdst != NULL && PF_MISMATCHAW(xdst, daddr, pd->af,
181 		    0, NULL, M_GETFIB(m)))
182 			r = TAILQ_NEXT(r, entries);
183 		else if (dst != NULL && dst->port_op &&
184 		    !pf_match_port(dst->port_op, dst->port[0],
185 		    dst->port[1], dport))
186 			r = r->skip[PF_SKIP_DST_PORT].ptr;
187 		else if (r->match_tag && !pf_match_tag(m, r, &tag,
188 		    pd->pf_mtag ? pd->pf_mtag->tag : 0))
189 			r = TAILQ_NEXT(r, entries);
190 		else if (r->os_fingerprint != PF_OSFP_ANY && (pd->proto !=
191 		    IPPROTO_TCP || !pf_osfp_match(pf_osfp_fingerprint(pd, m,
192 		    off, &pd->hdr.tcp), r->os_fingerprint)))
193 			r = TAILQ_NEXT(r, entries);
194 		else {
195 			if (r->tag)
196 				tag = r->tag;
197 			if (r->rtableid >= 0)
198 				rtableid = r->rtableid;
199 			if (r->anchor == NULL) {
200 				rm = r;
201 				if (rm->action == PF_NONAT ||
202 				    rm->action == PF_NORDR ||
203 				    rm->action == PF_NOBINAT) {
204 					rm = NULL;
205 				}
206 				break;
207 			} else
208 				pf_step_into_anchor(anchor_stack, &asd,
209 				    &ruleset, rs_num, &r, NULL, NULL);
210 		}
211 		if (r == NULL)
212 			pf_step_out_of_anchor(anchor_stack, &asd, &ruleset,
213 			    rs_num, &r, NULL, NULL);
214 	}
215 
216 	if (tag > 0 && pf_tag_packet(m, pd, tag))
217 		return (NULL);
218 	if (rtableid >= 0)
219 		M_SETFIB(m, rtableid);
220 
221 	return (rm);
222 }
223 
224 static int
pf_get_sport(sa_family_t af,u_int8_t proto,struct pf_krule * r,struct pf_addr * saddr,uint16_t sport,struct pf_addr * daddr,uint16_t dport,struct pf_addr * naddr,uint16_t * nport,uint16_t low,uint16_t high,struct pf_ksrc_node ** sn,struct pf_udp_mapping ** udp_mapping)225 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
226     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
227     uint16_t dport, struct pf_addr *naddr, uint16_t *nport, uint16_t low,
228     uint16_t high, struct pf_ksrc_node **sn,
229     struct pf_udp_mapping **udp_mapping)
230 {
231 	struct pf_state_key_cmp	key;
232 	struct pf_addr		init_addr;
233 	struct pf_srchash	*sh = NULL;
234 
235 	bzero(&init_addr, sizeof(init_addr));
236 
237 	MPASS(*udp_mapping == NULL);
238 
239 	/*
240 	 * If we are UDP and have an existing mapping we can get source port
241 	 * from the mapping. In this case we have to look up the src_node as
242 	 * pf_map_addr would.
243 	 */
244 	if (proto == IPPROTO_UDP && (r->rpool.opts & PF_POOL_ENDPI)) {
245 		struct pf_udp_endpoint_cmp udp_source;
246 
247 		bzero(&udp_source, sizeof(udp_source));
248 		udp_source.af = af;
249 		PF_ACPY(&udp_source.addr, saddr, af);
250 		udp_source.port = sport;
251 		*udp_mapping = pf_udp_mapping_find(&udp_source);
252 		if (*udp_mapping) {
253 			PF_ACPY(naddr, &(*udp_mapping)->endpoints[1].addr, af);
254 			*nport = (*udp_mapping)->endpoints[1].port;
255 			/* Try to find a src_node as per pf_map_addr(). */
256 			if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
257 			    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
258 				*sn = pf_find_src_node(saddr, r, af, &sh, 0);
259 			return (0);
260 		} else {
261 			*udp_mapping = pf_udp_mapping_create(af, saddr, sport, &init_addr, 0);
262 			if (*udp_mapping == NULL)
263 				return (1);
264 		}
265 	}
266 
267 	if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
268 		goto failed;
269 
270 	if (proto == IPPROTO_ICMP) {
271 		if (*nport == htons(ICMP_ECHO)) {
272 			low = 1;
273 			high = 65535;
274 		} else
275 			return (0);	/* Don't try to modify non-echo ICMP */
276 	}
277 #ifdef INET6
278 	if (proto == IPPROTO_ICMPV6) {
279 		if (*nport == htons(ICMP6_ECHO_REQUEST)) {
280 			low = 1;
281 			high = 65535;
282 		} else
283 			return (0);	/* Don't try to modify non-echo ICMP */
284 	}
285 #endif /* INET6 */
286 
287 	bzero(&key, sizeof(key));
288 	key.af = af;
289 	key.proto = proto;
290 	key.port[0] = dport;
291 	PF_ACPY(&key.addr[0], daddr, key.af);
292 
293 	do {
294 		PF_ACPY(&key.addr[1], naddr, key.af);
295 		if (*udp_mapping)
296 			PF_ACPY(&(*udp_mapping)->endpoints[1].addr, naddr, af);
297 
298 		/*
299 		 * port search; start random, step;
300 		 * similar 2 portloop in in_pcbbind
301 		 */
302 		if (proto == IPPROTO_SCTP) {
303 			key.port[1] = sport;
304 			if (!pf_find_state_all_exists(&key, PF_IN)) {
305 				*nport = sport;
306 				return (0);
307 			} else {
308 				return (1); /* Fail mapping. */
309 			}
310 		} else if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
311 		    proto == IPPROTO_ICMP) || (low == 0 && high == 0)) {
312 			/*
313 			 * XXX bug: icmp states don't use the id on both sides.
314 			 * (traceroute -I through nat)
315 			 */
316 			key.port[1] = sport;
317 			if (!pf_find_state_all_exists(&key, PF_IN)) {
318 				*nport = sport;
319 				return (0);
320 			}
321 		} else if (low == high) {
322 			key.port[1] = htons(low);
323 			if (!pf_find_state_all_exists(&key, PF_IN)) {
324 				if (*udp_mapping != NULL) {
325 					(*udp_mapping)->endpoints[1].port = htons(low);
326 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
327 						*nport = htons(low);
328 						return (0);
329 					}
330 				} else {
331 					*nport = htons(low);
332 					return (0);
333 				}
334 			}
335 		} else {
336 			uint32_t tmp;
337 			uint16_t cut;
338 
339 			if (low > high) {
340 				tmp = low;
341 				low = high;
342 				high = tmp;
343 			}
344 			/* low < high */
345 			cut = arc4random() % (1 + high - low) + low;
346 			/* low <= cut <= high */
347 			for (tmp = cut; tmp <= high && tmp <= 0xffff; ++tmp) {
348 				if (*udp_mapping != NULL) {
349 					(*udp_mapping)->endpoints[1].port = htons(tmp);
350 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
351 						*nport = htons(tmp);
352 						return (0);
353 					}
354 				} else {
355 					key.port[1] = htons(tmp);
356 					if (!pf_find_state_all_exists(&key, PF_IN)) {
357 						*nport = htons(tmp);
358 						return (0);
359 					}
360 				}
361 			}
362 			tmp = cut;
363 			for (tmp -= 1; tmp >= low && tmp <= 0xffff; --tmp) {
364 				if (proto == IPPROTO_UDP &&
365 				    (r->rpool.opts & PF_POOL_ENDPI)) {
366 					(*udp_mapping)->endpoints[1].port = htons(tmp);
367 					if (pf_udp_mapping_insert(*udp_mapping) == 0) {
368 						*nport = htons(tmp);
369 						return (0);
370 					}
371 				} else {
372 					key.port[1] = htons(tmp);
373 					if (!pf_find_state_all_exists(&key, PF_IN)) {
374 						*nport = htons(tmp);
375 						return (0);
376 					}
377 				}
378 			}
379 		}
380 
381 		switch (r->rpool.opts & PF_POOL_TYPEMASK) {
382 		case PF_POOL_RANDOM:
383 		case PF_POOL_ROUNDROBIN:
384 			/*
385 			 * pick a different source address since we're out
386 			 * of free port choices for the current one.
387 			 */
388 			if (pf_map_addr(af, r, saddr, naddr, NULL, &init_addr, sn))
389 				return (1);
390 			break;
391 		case PF_POOL_NONE:
392 		case PF_POOL_SRCHASH:
393 		case PF_POOL_BITMASK:
394 		default:
395 			return (1);
396 		}
397 	} while (! PF_AEQ(&init_addr, naddr, af) );
398 
399 failed:
400 	uma_zfree(V_pf_udp_mapping_z, *udp_mapping);
401 	*udp_mapping = NULL;
402 	return (1);					/* none available */
403 }
404 
405 static bool
pf_islinklocal(const sa_family_t af,const struct pf_addr * addr)406 pf_islinklocal(const sa_family_t af, const struct pf_addr *addr)
407 {
408 	if (af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&addr->v6))
409 		return (true);
410 	return (false);
411 }
412 
413 static int
pf_get_mape_sport(sa_family_t af,u_int8_t proto,struct pf_krule * r,struct pf_addr * saddr,uint16_t sport,struct pf_addr * daddr,uint16_t dport,struct pf_addr * naddr,uint16_t * nport,struct pf_ksrc_node ** sn,struct pf_udp_mapping ** udp_mapping)414 pf_get_mape_sport(sa_family_t af, u_int8_t proto, struct pf_krule *r,
415     struct pf_addr *saddr, uint16_t sport, struct pf_addr *daddr,
416     uint16_t dport, struct pf_addr *naddr, uint16_t *nport,
417     struct pf_ksrc_node **sn, struct pf_udp_mapping **udp_mapping)
418 {
419 	uint16_t psmask, low, highmask;
420 	uint16_t i, ahigh, cut;
421 	int ashift, psidshift;
422 
423 	ashift = 16 - r->rpool.mape.offset;
424 	psidshift = ashift - r->rpool.mape.psidlen;
425 	psmask = r->rpool.mape.psid & ((1U << r->rpool.mape.psidlen) - 1);
426 	psmask = psmask << psidshift;
427 	highmask = (1U << psidshift) - 1;
428 
429 	ahigh = (1U << r->rpool.mape.offset) - 1;
430 	cut = arc4random() & ahigh;
431 	if (cut == 0)
432 		cut = 1;
433 
434 	for (i = cut; i <= ahigh; i++) {
435 		low = (i << ashift) | psmask;
436 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
437 		    naddr, nport, low, low | highmask, sn, udp_mapping))
438 			return (0);
439 	}
440 	for (i = cut - 1; i > 0; i--) {
441 		low = (i << ashift) | psmask;
442 		if (!pf_get_sport(af, proto, r, saddr, sport, daddr, dport,
443 		    naddr, nport, low, low | highmask, sn, udp_mapping))
444 			return (0);
445 	}
446 	return (1);
447 }
448 
449 u_short
pf_map_addr(sa_family_t af,struct pf_krule * r,struct pf_addr * saddr,struct pf_addr * naddr,struct pfi_kkif ** nkif,struct pf_addr * init_addr,struct pf_ksrc_node ** sn)450 pf_map_addr(sa_family_t af, struct pf_krule *r, struct pf_addr *saddr,
451     struct pf_addr *naddr, struct pfi_kkif **nkif, struct pf_addr *init_addr,
452     struct pf_ksrc_node **sn)
453 {
454 	u_short			 reason = PFRES_MATCH;
455 	struct pf_kpool		*rpool = &r->rpool;
456 	struct pf_addr		*raddr = NULL, *rmask = NULL;
457 	struct pf_srchash	*sh = NULL;
458 
459 	/* Try to find a src_node if none was given and this
460 	   is a sticky-address rule. */
461 	if (*sn == NULL && r->rpool.opts & PF_POOL_STICKYADDR &&
462 	    (r->rpool.opts & PF_POOL_TYPEMASK) != PF_POOL_NONE)
463 		*sn = pf_find_src_node(saddr, r, af, &sh, false);
464 
465 	/* If a src_node was found or explicitly given and it has a non-zero
466 	   route address, use this address. A zeroed address is found if the
467 	   src node was created just a moment ago in pf_create_state and it
468 	   needs to be filled in with routing decision calculated here. */
469 	if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
470 		/* If the supplied address is the same as the current one we've
471 		 * been asked before, so tell the caller that there's no other
472 		 * address to be had. */
473 		if (PF_AEQ(naddr, &(*sn)->raddr, af)) {
474 			reason = PFRES_MAPFAILED;
475 			goto done;
476 		}
477 
478 		PF_ACPY(naddr, &(*sn)->raddr, af);
479 		if (nkif)
480 			*nkif = (*sn)->rkif;
481 		if (V_pf_status.debug >= PF_DEBUG_NOISY) {
482 			printf("pf_map_addr: src tracking maps ");
483 			pf_print_host(saddr, 0, af);
484 			printf(" to ");
485 			pf_print_host(naddr, 0, af);
486 			if (nkif)
487 				printf("@%s", (*nkif)->pfik_name);
488 			printf("\n");
489 		}
490 		goto done;
491 	}
492 
493 	mtx_lock(&rpool->mtx);
494 	/* Find the route using chosen algorithm. Store the found route
495 	   in src_node if it was given or found. */
496 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE) {
497 		reason = PFRES_MAPFAILED;
498 		goto done_pool_mtx;
499 	}
500 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
501 		switch (af) {
502 #ifdef INET
503 		case AF_INET:
504 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
505 			    (rpool->opts & PF_POOL_TYPEMASK) !=
506 			    PF_POOL_ROUNDROBIN) {
507 				reason = PFRES_MAPFAILED;
508 				goto done_pool_mtx;
509 			}
510 			raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
511 			rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
512 			break;
513 #endif /* INET */
514 #ifdef INET6
515 		case AF_INET6:
516 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
517 			    (rpool->opts & PF_POOL_TYPEMASK) !=
518 			    PF_POOL_ROUNDROBIN) {
519 				reason = PFRES_MAPFAILED;
520 				goto done_pool_mtx;
521 			}
522 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
523 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
524 			break;
525 #endif /* INET6 */
526 		}
527 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
528 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN) {
529 			reason = PFRES_MAPFAILED;
530 			goto done_pool_mtx; /* unsupported */
531 		}
532 	} else {
533 		raddr = &rpool->cur->addr.v.a.addr;
534 		rmask = &rpool->cur->addr.v.a.mask;
535 	}
536 
537 	switch (rpool->opts & PF_POOL_TYPEMASK) {
538 	case PF_POOL_NONE:
539 		PF_ACPY(naddr, raddr, af);
540 		break;
541 	case PF_POOL_BITMASK:
542 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
543 		break;
544 	case PF_POOL_RANDOM:
545 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
546 			switch (af) {
547 #ifdef INET
548 			case AF_INET:
549 				rpool->counter.addr32[0] = htonl(arc4random());
550 				break;
551 #endif /* INET */
552 #ifdef INET6
553 			case AF_INET6:
554 				if (rmask->addr32[3] != 0xffffffff)
555 					rpool->counter.addr32[3] =
556 					    htonl(arc4random());
557 				else
558 					break;
559 				if (rmask->addr32[2] != 0xffffffff)
560 					rpool->counter.addr32[2] =
561 					    htonl(arc4random());
562 				else
563 					break;
564 				if (rmask->addr32[1] != 0xffffffff)
565 					rpool->counter.addr32[1] =
566 					    htonl(arc4random());
567 				else
568 					break;
569 				if (rmask->addr32[0] != 0xffffffff)
570 					rpool->counter.addr32[0] =
571 					    htonl(arc4random());
572 				break;
573 #endif /* INET6 */
574 			}
575 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
576 			PF_ACPY(init_addr, naddr, af);
577 
578 		} else {
579 			PF_AINC(&rpool->counter, af);
580 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
581 		}
582 		break;
583 	case PF_POOL_SRCHASH:
584 	    {
585 		unsigned char hash[16];
586 
587 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
588 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
589 		break;
590 	    }
591 	case PF_POOL_ROUNDROBIN:
592 	    {
593 		struct pf_kpooladdr *acur = rpool->cur;
594 
595 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
596 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
597 			    &rpool->tblidx, &rpool->counter, af, NULL))
598 				goto get_addr;
599 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
600 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
601 			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal))
602 				goto get_addr;
603 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
604 			goto get_addr;
605 
606 	try_next:
607 		if (TAILQ_NEXT(rpool->cur, entries) == NULL)
608 			rpool->cur = TAILQ_FIRST(&rpool->list);
609 		else
610 			rpool->cur = TAILQ_NEXT(rpool->cur, entries);
611 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
612 			rpool->tblidx = -1;
613 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
614 			    &rpool->tblidx, &rpool->counter, af, NULL)) {
615 				/* table contains no address of type 'af' */
616 				if (rpool->cur != acur)
617 					goto try_next;
618 				reason = PFRES_MAPFAILED;
619 				goto done_pool_mtx;
620 			}
621 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
622 			rpool->tblidx = -1;
623 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
624 			    &rpool->tblidx, &rpool->counter, af, pf_islinklocal)) {
625 				/* table contains no address of type 'af' */
626 				if (rpool->cur != acur)
627 					goto try_next;
628 				reason = PFRES_MAPFAILED;
629 				goto done_pool_mtx;
630 			}
631 		} else {
632 			raddr = &rpool->cur->addr.v.a.addr;
633 			rmask = &rpool->cur->addr.v.a.mask;
634 			PF_ACPY(&rpool->counter, raddr, af);
635 		}
636 
637 	get_addr:
638 		PF_ACPY(naddr, &rpool->counter, af);
639 		if (init_addr != NULL && PF_AZERO(init_addr, af))
640 			PF_ACPY(init_addr, naddr, af);
641 		PF_AINC(&rpool->counter, af);
642 		break;
643 	    }
644 	}
645 
646 	if (nkif)
647 		*nkif = rpool->cur->kif;
648 
649 	if (*sn != NULL) {
650 		PF_ACPY(&(*sn)->raddr, naddr, af);
651 		if (nkif)
652 			(*sn)->rkif = *nkif;
653 	}
654 
655 	if (V_pf_status.debug >= PF_DEBUG_NOISY &&
656 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
657 		printf("pf_map_addr: selected address ");
658 		pf_print_host(naddr, 0, af);
659 		if (nkif)
660 			printf("@%s", (*nkif)->pfik_name);
661 		printf("\n");
662 	}
663 
664 done_pool_mtx:
665 	mtx_unlock(&rpool->mtx);
666 
667 done:
668 	if (reason) {
669 		counter_u64_add(V_pf_status.counters[reason], 1);
670 	}
671 
672 	return (reason);
673 }
674 
675 u_short
pf_get_translation(struct pf_pdesc * pd,struct mbuf * m,int off,struct pfi_kkif * kif,struct pf_ksrc_node ** sn,struct pf_state_key ** skp,struct pf_state_key ** nkp,struct pf_addr * saddr,struct pf_addr * daddr,uint16_t sport,uint16_t dport,struct pf_kanchor_stackframe * anchor_stack,struct pf_krule ** rp,struct pf_udp_mapping ** udp_mapping)676 pf_get_translation(struct pf_pdesc *pd, struct mbuf *m, int off,
677     struct pfi_kkif *kif, struct pf_ksrc_node **sn,
678     struct pf_state_key **skp, struct pf_state_key **nkp,
679     struct pf_addr *saddr, struct pf_addr *daddr,
680     uint16_t sport, uint16_t dport, struct pf_kanchor_stackframe *anchor_stack,
681     struct pf_krule **rp,
682     struct pf_udp_mapping **udp_mapping)
683 {
684 	struct pf_krule	*r = NULL;
685 	struct pf_addr	*naddr;
686 	uint16_t	*nportp;
687 	uint16_t	 low, high;
688 	u_short		 reason;
689 
690 	PF_RULES_RASSERT();
691 	KASSERT(*skp == NULL, ("*skp not NULL"));
692 	KASSERT(*nkp == NULL, ("*nkp not NULL"));
693 
694 	*rp = NULL;
695 
696 	if (pd->dir == PF_OUT) {
697 		r = pf_match_translation(pd, m, off, kif, saddr,
698 		    sport, daddr, dport, PF_RULESET_BINAT, anchor_stack);
699 		if (r == NULL)
700 			r = pf_match_translation(pd, m, off, kif,
701 			    saddr, sport, daddr, dport, PF_RULESET_NAT,
702 			    anchor_stack);
703 	} else {
704 		r = pf_match_translation(pd, m, off, kif, saddr,
705 		    sport, daddr, dport, PF_RULESET_RDR, anchor_stack);
706 		if (r == NULL)
707 			r = pf_match_translation(pd, m, off, kif,
708 			    saddr, sport, daddr, dport, PF_RULESET_BINAT,
709 			    anchor_stack);
710 	}
711 
712 	if (r == NULL)
713 		return (PFRES_MAX);
714 
715 	switch (r->action) {
716 	case PF_NONAT:
717 	case PF_NOBINAT:
718 	case PF_NORDR:
719 		return (PFRES_MAX);
720 	}
721 
722 	*skp = pf_state_key_setup(pd, m, off, saddr, daddr, sport, dport);
723 	if (*skp == NULL)
724 		return (PFRES_MEMORY);
725 	*nkp = pf_state_key_clone(*skp);
726 	if (*nkp == NULL) {
727 		uma_zfree(V_pf_state_key_z, *skp);
728 		*skp = NULL;
729 		return (PFRES_MEMORY);
730 	}
731 
732 	naddr = &(*nkp)->addr[1];
733 	nportp = &(*nkp)->port[1];
734 
735 	switch (r->action) {
736 	case PF_NAT:
737 		if (pd->proto == IPPROTO_ICMP) {
738 			low = 1;
739 			high = 65535;
740 		} else {
741 			low  = r->rpool.proxy_port[0];
742 			high = r->rpool.proxy_port[1];
743 		}
744 		if (r->rpool.mape.offset > 0) {
745 			if (pf_get_mape_sport(pd->af, pd->proto, r, saddr,
746 			    sport, daddr, dport, naddr, nportp, sn, udp_mapping)) {
747 				DPFPRINTF(PF_DEBUG_MISC,
748 				    ("pf: MAP-E port allocation (%u/%u/%u)"
749 				    " failed\n",
750 				    r->rpool.mape.offset,
751 				    r->rpool.mape.psidlen,
752 				    r->rpool.mape.psid));
753 				reason = PFRES_MAPFAILED;
754 				goto notrans;
755 			}
756 		} else if (pf_get_sport(pd->af, pd->proto, r, saddr, sport,
757 		    daddr, dport, naddr, nportp, low, high, sn, udp_mapping)) {
758 			DPFPRINTF(PF_DEBUG_MISC,
759 			    ("pf: NAT proxy port allocation (%u-%u) failed\n",
760 			    r->rpool.proxy_port[0], r->rpool.proxy_port[1]));
761 			reason = PFRES_MAPFAILED;
762 			goto notrans;
763 		}
764 		break;
765 	case PF_BINAT:
766 		switch (pd->dir) {
767 		case PF_OUT:
768 			if (r->rpool.cur->addr.type == PF_ADDR_DYNIFTL){
769 				switch (pd->af) {
770 #ifdef INET
771 				case AF_INET:
772 					if (r->rpool.cur->addr.p.dyn->
773 					    pfid_acnt4 < 1) {
774 						reason = PFRES_MAPFAILED;
775 						goto notrans;
776 					}
777 					PF_POOLMASK(naddr,
778 					    &r->rpool.cur->addr.p.dyn->
779 					    pfid_addr4,
780 					    &r->rpool.cur->addr.p.dyn->
781 					    pfid_mask4, saddr, AF_INET);
782 					break;
783 #endif /* INET */
784 #ifdef INET6
785 				case AF_INET6:
786 					if (r->rpool.cur->addr.p.dyn->
787 					    pfid_acnt6 < 1) {
788 						reason = PFRES_MAPFAILED;
789 						goto notrans;
790 					}
791 					PF_POOLMASK(naddr,
792 					    &r->rpool.cur->addr.p.dyn->
793 					    pfid_addr6,
794 					    &r->rpool.cur->addr.p.dyn->
795 					    pfid_mask6, saddr, AF_INET6);
796 					break;
797 #endif /* INET6 */
798 				}
799 			} else
800 				PF_POOLMASK(naddr,
801 				    &r->rpool.cur->addr.v.a.addr,
802 				    &r->rpool.cur->addr.v.a.mask, saddr,
803 				    pd->af);
804 			break;
805 		case PF_IN:
806 			if (r->src.addr.type == PF_ADDR_DYNIFTL) {
807 				switch (pd->af) {
808 #ifdef INET
809 				case AF_INET:
810 					if (r->src.addr.p.dyn->pfid_acnt4 < 1) {
811 						reason = PFRES_MAPFAILED;
812 						goto notrans;
813 					}
814 					PF_POOLMASK(naddr,
815 					    &r->src.addr.p.dyn->pfid_addr4,
816 					    &r->src.addr.p.dyn->pfid_mask4,
817 					    daddr, AF_INET);
818 					break;
819 #endif /* INET */
820 #ifdef INET6
821 				case AF_INET6:
822 					if (r->src.addr.p.dyn->pfid_acnt6 < 1) {
823 						reason = PFRES_MAPFAILED;
824 						goto notrans;
825 					}
826 					PF_POOLMASK(naddr,
827 					    &r->src.addr.p.dyn->pfid_addr6,
828 					    &r->src.addr.p.dyn->pfid_mask6,
829 					    daddr, AF_INET6);
830 					break;
831 #endif /* INET6 */
832 				}
833 			} else
834 				PF_POOLMASK(naddr, &r->src.addr.v.a.addr,
835 				    &r->src.addr.v.a.mask, daddr, pd->af);
836 			break;
837 		}
838 		break;
839 	case PF_RDR: {
840 		struct pf_state_key_cmp key;
841 		int tries;
842 		uint16_t cut, low, high, nport;
843 
844 		reason = pf_map_addr(pd->af, r, saddr, naddr, NULL, NULL, sn);
845 		if (reason != 0)
846 			goto notrans;
847 		if ((r->rpool.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
848 			PF_POOLMASK(naddr, naddr, &r->rpool.cur->addr.v.a.mask,
849 			    daddr, pd->af);
850 
851 		/* Do not change SCTP ports. */
852 		if (pd->proto == IPPROTO_SCTP)
853 			break;
854 
855 		if (r->rpool.proxy_port[1]) {
856 			uint32_t	tmp_nport;
857 
858 			tmp_nport = ((ntohs(dport) - ntohs(r->dst.port[0])) %
859 			    (r->rpool.proxy_port[1] - r->rpool.proxy_port[0] +
860 			    1)) + r->rpool.proxy_port[0];
861 
862 			/* Wrap around if necessary. */
863 			if (tmp_nport > 65535)
864 				tmp_nport -= 65535;
865 			nport = htons((uint16_t)tmp_nport);
866 		} else if (r->rpool.proxy_port[0])
867 			nport = htons(r->rpool.proxy_port[0]);
868 		else
869 			nport = dport;
870 
871 		/*
872 		 * Update the destination port.
873 		 */
874 		*nportp = nport;
875 
876 		/*
877 		 * Do we have a source port conflict in the stack state?  Try to
878 		 * modulate the source port if so.  Note that this is racy since
879 		 * the state lookup may not find any matches here but will once
880 		 * pf_create_state() actually instantiates the state.
881 		 */
882 		bzero(&key, sizeof(key));
883 		key.af = pd->af;
884 		key.proto = pd->proto;
885 		key.port[0] = sport;
886 		PF_ACPY(&key.addr[0], saddr, key.af);
887 		key.port[1] = nport;
888 		PF_ACPY(&key.addr[1], naddr, key.af);
889 
890 		if (!pf_find_state_all_exists(&key, PF_OUT))
891 			break;
892 
893 		tries = 0;
894 
895 		low = 50001;	/* XXX-MJ PF_NAT_PROXY_PORT_LOW/HIGH */
896 		high = 65535;
897 		cut = arc4random() % (1 + high - low) + low;
898 		for (uint32_t tmp = cut;
899 		    tmp <= high && tmp <= UINT16_MAX &&
900 		    tries < V_pf_rdr_srcport_rewrite_tries;
901 		    tmp++, tries++) {
902 			key.port[0] = htons(tmp);
903 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
904 				/* Update the source port. */
905 				(*nkp)->port[0] = htons(tmp);
906 				goto out;
907 			}
908 		}
909 		for (uint32_t tmp = cut - 1;
910 		    tmp >= low && tries < V_pf_rdr_srcport_rewrite_tries;
911 		    tmp--, tries++) {
912 			key.port[0] = htons(tmp);
913 			if (!pf_find_state_all_exists(&key, PF_OUT)) {
914 				/* Update the source port. */
915 				(*nkp)->port[0] = htons(tmp);
916 				goto out;
917 			}
918 		}
919 
920 		/*
921 		 * We failed to find a match.  Push on ahead anyway, let
922 		 * pf_state_insert() be the arbiter of whether the state
923 		 * conflict is tolerable.  In particular, with TCP connections
924 		 * the state may be reused if the TCP state is terminal.
925 		 */
926 		DPFPRINTF(PF_DEBUG_MISC,
927 		    ("pf: RDR source port allocation failed\n"));
928 		break;
929 
930 out:
931 		DPFPRINTF(PF_DEBUG_MISC,
932 		    ("pf: RDR source port allocation %u->%u\n",
933 		    ntohs(sport), ntohs((*nkp)->port[0])));
934 		break;
935 	}
936 	default:
937 		panic("%s: unknown action %u", __func__, r->action);
938 	}
939 
940 	/* Return success only if translation really happened. */
941 	if (bcmp(*skp, *nkp, sizeof(struct pf_state_key_cmp))) {
942 		*rp = r;
943 		return (PFRES_MATCH);
944 	}
945 
946 	reason = PFRES_MAX;
947 notrans:
948 	uma_zfree(V_pf_state_key_z, *nkp);
949 	uma_zfree(V_pf_state_key_z, *skp);
950 	*skp = *nkp = NULL;
951 	*sn = NULL;
952 
953 	return (reason);
954 }
955