xref: /openbsd/sys/net/pf_lb.c (revision 404b540a)
1 /*	$OpenBSD: pf_lb.c,v 1.7 2009/09/07 08:27:45 sthen Exp $ */
2 
3 /*
4  * Copyright (c) 2001 Daniel Hartmeier
5  * Copyright (c) 2002 - 2008 Henning Brauer
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  *    - Redistributions of source code must retain the above copyright
13  *      notice, this list of conditions and the following disclaimer.
14  *    - Redistributions in binary form must reproduce the above
15  *      copyright notice, this list of conditions and the following
16  *      disclaimer in the documentation and/or other materials provided
17  *      with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  *
32  * Effort sponsored in part by the Defense Advanced Research Projects
33  * Agency (DARPA) and Air Force Research Laboratory, Air Force
34  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35  *
36  */
37 
38 #include "bpfilter.h"
39 #include "pflog.h"
40 #include "pfsync.h"
41 #include "pflow.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/mbuf.h>
46 #include <sys/filio.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/kernel.h>
50 #include <sys/time.h>
51 #include <sys/pool.h>
52 #include <sys/proc.h>
53 #include <sys/rwlock.h>
54 
55 #include <crypto/md5.h>
56 
57 #include <net/if.h>
58 #include <net/if_types.h>
59 #include <net/bpf.h>
60 #include <net/route.h>
61 #include <net/radix_mpath.h>
62 
63 #include <netinet/in.h>
64 #include <netinet/in_var.h>
65 #include <netinet/in_systm.h>
66 #include <netinet/ip.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/tcp.h>
69 #include <netinet/tcp_seq.h>
70 #include <netinet/udp.h>
71 #include <netinet/ip_icmp.h>
72 #include <netinet/in_pcb.h>
73 #include <netinet/tcp_timer.h>
74 #include <netinet/tcp_var.h>
75 #include <netinet/udp_var.h>
76 #include <netinet/icmp_var.h>
77 #include <netinet/if_ether.h>
78 
79 #include <dev/rndvar.h>
80 #include <net/pfvar.h>
81 #include <net/if_pflog.h>
82 #include <net/if_pflow.h>
83 
84 #if NPFSYNC > 0
85 #include <net/if_pfsync.h>
86 #endif /* NPFSYNC > 0 */
87 
88 #ifdef INET6
89 #include <netinet/ip6.h>
90 #include <netinet/in_pcb.h>
91 #include <netinet/icmp6.h>
92 #include <netinet6/nd6.h>
93 #endif /* INET6 */
94 
95 
96 #define DPFPRINTF(n, x)	if (pf_status.debug >= (n)) printf x
97 
98 /*
99  * Global variables
100  */
101 
102 void			 pf_hash(struct pf_addr *, struct pf_addr *,
103 			    struct pf_poolhashkey *, sa_family_t);
104 int			 pf_get_sport(sa_family_t, u_int8_t, struct pf_rule *,
105 			    struct pf_addr *, struct pf_addr *, u_int16_t,
106 			    struct pf_addr *, u_int16_t *, u_int16_t, u_int16_t,
107 			    struct pf_src_node **);
108 
109 #define mix(a,b,c) \
110 	do {					\
111 		a -= b; a -= c; a ^= (c >> 13);	\
112 		b -= c; b -= a; b ^= (a << 8);	\
113 		c -= a; c -= b; c ^= (b >> 13);	\
114 		a -= b; a -= c; a ^= (c >> 12);	\
115 		b -= c; b -= a; b ^= (a << 16);	\
116 		c -= a; c -= b; c ^= (b >> 5);	\
117 		a -= b; a -= c; a ^= (c >> 3);	\
118 		b -= c; b -= a; b ^= (a << 10);	\
119 		c -= a; c -= b; c ^= (b >> 15);	\
120 	} while (0)
121 
122 /*
123  * hash function based on bridge_hash in if_bridge.c
124  */
125 void
126 pf_hash(struct pf_addr *inaddr, struct pf_addr *hash,
127     struct pf_poolhashkey *key, sa_family_t af)
128 {
129 	u_int32_t	a = 0x9e3779b9, b = 0x9e3779b9, c = key->key32[0];
130 
131 	switch (af) {
132 #ifdef INET
133 	case AF_INET:
134 		a += inaddr->addr32[0];
135 		b += key->key32[1];
136 		mix(a, b, c);
137 		hash->addr32[0] = c + key->key32[2];
138 		break;
139 #endif /* INET */
140 #ifdef INET6
141 	case AF_INET6:
142 		a += inaddr->addr32[0];
143 		b += inaddr->addr32[2];
144 		mix(a, b, c);
145 		hash->addr32[0] = c;
146 		a += inaddr->addr32[1];
147 		b += inaddr->addr32[3];
148 		c += key->key32[1];
149 		mix(a, b, c);
150 		hash->addr32[1] = c;
151 		a += inaddr->addr32[2];
152 		b += inaddr->addr32[1];
153 		c += key->key32[2];
154 		mix(a, b, c);
155 		hash->addr32[2] = c;
156 		a += inaddr->addr32[3];
157 		b += inaddr->addr32[0];
158 		c += key->key32[3];
159 		mix(a, b, c);
160 		hash->addr32[3] = c;
161 		break;
162 #endif /* INET6 */
163 	}
164 }
165 
166 int
167 pf_get_sport(sa_family_t af, u_int8_t proto, struct pf_rule *r,
168     struct pf_addr *saddr, struct pf_addr *daddr, u_int16_t dport,
169     struct pf_addr *naddr, u_int16_t *nport, u_int16_t low, u_int16_t high,
170     struct pf_src_node **sn)
171 {
172 	struct pf_state_key_cmp	key;
173 	struct pf_addr		init_addr;
174 	u_int16_t		cut;
175 
176 	bzero(&init_addr, sizeof(init_addr));
177 	if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn, &r->nat))
178 		return (1);
179 
180 	if (proto == IPPROTO_ICMP || proto == IPPROTO_ICMPV6) {
181 		if (dport == htons(ICMP6_ECHO_REQUEST) ||
182 		    dport == htons(ICMP_ECHO)) {
183 			low = 1;
184 			high = 65535;
185 		} else
186 			return (0);	/* Don't try to modify non-echo ICMP */
187 	}
188 
189 	do {
190 		key.af = af;
191 		key.proto = proto;
192 		PF_ACPY(&key.addr[1], daddr, key.af);
193 		PF_ACPY(&key.addr[0], naddr, key.af);
194 		key.port[1] = dport;
195 
196 		/*
197 		 * port search; start random, step;
198 		 * similar 2 portloop in in_pcbbind
199 		 */
200 		if (!(proto == IPPROTO_TCP || proto == IPPROTO_UDP ||
201 		    proto == IPPROTO_ICMP)) {
202 			/* XXX bug icmp states dont use the id on both sides */
203 			key.port[0] = dport;
204 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
205 				return (0);
206 		} else if (low == 0 && high == 0) {
207 			key.port[0] = *nport;
208 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL)
209 				return (0);
210 		} else if (low == high) {
211 			key.port[0] = htons(low);
212 			if (pf_find_state_all(&key, PF_IN, NULL) == NULL) {
213 				*nport = htons(low);
214 				return (0);
215 			}
216 		} else {
217 			u_int16_t tmp;
218 
219 			if (low > high) {
220 				tmp = low;
221 				low = high;
222 				high = tmp;
223 			}
224 			/* low < high */
225 			cut = arc4random_uniform(1 + high - low) + low;
226 			/* low <= cut <= high */
227 			for (tmp = cut; tmp <= high; ++(tmp)) {
228 				key.port[0] = htons(tmp);
229 				if (pf_find_state_all(&key, PF_IN, NULL) ==
230 				    NULL && !in_baddynamic(tmp, proto)) {
231 					*nport = htons(tmp);
232 					return (0);
233 				}
234 			}
235 			for (tmp = cut - 1; tmp >= low; --(tmp)) {
236 				key.port[0] = htons(tmp);
237 				if (pf_find_state_all(&key, PF_IN, NULL) ==
238 				    NULL && !in_baddynamic(tmp, proto)) {
239 					*nport = htons(tmp);
240 					return (0);
241 				}
242 			}
243 		}
244 
245 		switch (r->nat.opts & PF_POOL_TYPEMASK) {
246 		case PF_POOL_RANDOM:
247 		case PF_POOL_ROUNDROBIN:
248 			if (pf_map_addr(af, r, saddr, naddr, &init_addr, sn,
249 			    &r->nat))
250 				return (1);
251 			break;
252 		case PF_POOL_NONE:
253 		case PF_POOL_SRCHASH:
254 		case PF_POOL_BITMASK:
255 		default:
256 			return (1);
257 		}
258 	} while (! PF_AEQ(&init_addr, naddr, af) );
259 	return (1);					/* none available */
260 }
261 
262 int
263 pf_map_addr(sa_family_t af, struct pf_rule *r, struct pf_addr *saddr,
264     struct pf_addr *naddr, struct pf_addr *init_addr, struct pf_src_node **sn,
265     struct pf_pool *rpool)
266 {
267 	unsigned char		 hash[16];
268 	struct pf_addr		*raddr = &rpool->cur->addr.v.a.addr;
269 	struct pf_addr		*rmask = &rpool->cur->addr.v.a.mask;
270 	struct pf_pooladdr	*acur = rpool->cur;
271 	struct pf_src_node	 k;
272 
273 	if (*sn == NULL && rpool->opts & PF_POOL_STICKYADDR &&
274 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
275 		k.af = af;
276 		PF_ACPY(&k.addr, saddr, af);
277 		if (r->rule_flag & PFRULE_RULESRCTRACK ||
278 		    rpool->opts & PF_POOL_STICKYADDR)
279 			k.rule.ptr = r;
280 		else
281 			k.rule.ptr = NULL;
282 		pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
283 		*sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
284 		if (*sn != NULL && !PF_AZERO(&(*sn)->raddr, af)) {
285 			PF_ACPY(naddr, &(*sn)->raddr, af);
286 			if (pf_status.debug >= PF_DEBUG_MISC) {
287 				printf("pf_map_addr: src tracking maps ");
288 				pf_print_host(&k.addr, 0, af);
289 				printf(" to ");
290 				pf_print_host(naddr, 0, af);
291 				printf("\n");
292 			}
293 			return (0);
294 		}
295 	}
296 
297 	if (rpool->cur->addr.type == PF_ADDR_NOROUTE)
298 		return (1);
299 	if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
300 		switch (af) {
301 #ifdef INET
302 		case AF_INET:
303 			if (rpool->cur->addr.p.dyn->pfid_acnt4 < 1 &&
304 			    (rpool->opts & PF_POOL_TYPEMASK) !=
305 			    PF_POOL_ROUNDROBIN)
306 				return (1);
307 			 raddr = &rpool->cur->addr.p.dyn->pfid_addr4;
308 			 rmask = &rpool->cur->addr.p.dyn->pfid_mask4;
309 			break;
310 #endif /* INET */
311 #ifdef INET6
312 		case AF_INET6:
313 			if (rpool->cur->addr.p.dyn->pfid_acnt6 < 1 &&
314 			    (rpool->opts & PF_POOL_TYPEMASK) !=
315 			    PF_POOL_ROUNDROBIN)
316 				return (1);
317 			raddr = &rpool->cur->addr.p.dyn->pfid_addr6;
318 			rmask = &rpool->cur->addr.p.dyn->pfid_mask6;
319 			break;
320 #endif /* INET6 */
321 		}
322 	} else if (rpool->cur->addr.type == PF_ADDR_TABLE) {
323 		if ((rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_ROUNDROBIN)
324 			return (1); /* unsupported */
325 	} else {
326 		raddr = &rpool->cur->addr.v.a.addr;
327 		rmask = &rpool->cur->addr.v.a.mask;
328 	}
329 
330 	switch (rpool->opts & PF_POOL_TYPEMASK) {
331 	case PF_POOL_NONE:
332 		PF_ACPY(naddr, raddr, af);
333 		break;
334 	case PF_POOL_BITMASK:
335 		PF_POOLMASK(naddr, raddr, rmask, saddr, af);
336 		break;
337 	case PF_POOL_RANDOM:
338 		if (init_addr != NULL && PF_AZERO(init_addr, af)) {
339 			switch (af) {
340 #ifdef INET
341 			case AF_INET:
342 				rpool->counter.addr32[0] = htonl(arc4random());
343 				break;
344 #endif /* INET */
345 #ifdef INET6
346 			case AF_INET6:
347 				if (rmask->addr32[3] != 0xffffffff)
348 					rpool->counter.addr32[3] =
349 					    htonl(arc4random());
350 				else
351 					break;
352 				if (rmask->addr32[2] != 0xffffffff)
353 					rpool->counter.addr32[2] =
354 					    htonl(arc4random());
355 				else
356 					break;
357 				if (rmask->addr32[1] != 0xffffffff)
358 					rpool->counter.addr32[1] =
359 					    htonl(arc4random());
360 				else
361 					break;
362 				if (rmask->addr32[0] != 0xffffffff)
363 					rpool->counter.addr32[0] =
364 					    htonl(arc4random());
365 				break;
366 #endif /* INET6 */
367 			}
368 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
369 			PF_ACPY(init_addr, naddr, af);
370 
371 		} else {
372 			PF_AINC(&rpool->counter, af);
373 			PF_POOLMASK(naddr, raddr, rmask, &rpool->counter, af);
374 		}
375 		break;
376 	case PF_POOL_SRCHASH:
377 		pf_hash(saddr, (struct pf_addr *)&hash, &rpool->key, af);
378 		PF_POOLMASK(naddr, raddr, rmask, (struct pf_addr *)&hash, af);
379 		break;
380 	case PF_POOL_ROUNDROBIN:
381 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
382 			if (!pfr_pool_get(rpool->cur->addr.p.tbl,
383 			    &rpool->tblidx, &rpool->counter,
384 			    &raddr, &rmask, af))
385 				goto get_addr;
386 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
387 			if (!pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
388 			    &rpool->tblidx, &rpool->counter,
389 			    &raddr, &rmask, af))
390 				goto get_addr;
391 		} else if (pf_match_addr(0, raddr, rmask, &rpool->counter, af))
392 			goto get_addr;
393 
394 	try_next:
395 		if ((rpool->cur = TAILQ_NEXT(rpool->cur, entries)) == NULL)
396 			rpool->cur = TAILQ_FIRST(&rpool->list);
397 		if (rpool->cur->addr.type == PF_ADDR_TABLE) {
398 			rpool->tblidx = -1;
399 			if (pfr_pool_get(rpool->cur->addr.p.tbl,
400 			    &rpool->tblidx, &rpool->counter,
401 			    &raddr, &rmask, af)) {
402 				/* table contains no address of type 'af' */
403 				if (rpool->cur != acur)
404 					goto try_next;
405 				return (1);
406 			}
407 		} else if (rpool->cur->addr.type == PF_ADDR_DYNIFTL) {
408 			rpool->tblidx = -1;
409 			if (pfr_pool_get(rpool->cur->addr.p.dyn->pfid_kt,
410 			    &rpool->tblidx, &rpool->counter,
411 			    &raddr, &rmask, af)) {
412 				/* table contains no address of type 'af' */
413 				if (rpool->cur != acur)
414 					goto try_next;
415 				return (1);
416 			}
417 		} else {
418 			raddr = &rpool->cur->addr.v.a.addr;
419 			rmask = &rpool->cur->addr.v.a.mask;
420 			PF_ACPY(&rpool->counter, raddr, af);
421 		}
422 
423 	get_addr:
424 		PF_ACPY(naddr, &rpool->counter, af);
425 		if (init_addr != NULL && PF_AZERO(init_addr, af))
426 			PF_ACPY(init_addr, naddr, af);
427 		PF_AINC(&rpool->counter, af);
428 		break;
429 	}
430 	if (*sn != NULL)
431 		PF_ACPY(&(*sn)->raddr, naddr, af);
432 
433 	if (pf_status.debug >= PF_DEBUG_NOISY &&
434 	    (rpool->opts & PF_POOL_TYPEMASK) != PF_POOL_NONE) {
435 		printf("pf_map_addr: selected address ");
436 		pf_print_host(naddr, 0, af);
437 		printf("\n");
438 	}
439 
440 	return (0);
441 }
442 
443 int
444 pf_get_transaddr(struct pf_rule *r, struct pf_pdesc *pd, struct pf_addr *saddr,
445     u_int16_t *sport, struct pf_addr *daddr, u_int16_t *dport)
446 {
447 	struct pf_addr	naddr;
448 	u_int16_t	nport = 0;
449 
450 	struct pf_src_node srcnode, *sn = &srcnode;
451 
452 	if (!TAILQ_EMPTY(&r->nat.list)) {
453 		if (pf_get_sport(pd->af, pd->proto, r, saddr,
454 		    daddr, *dport, &naddr, &nport, r->nat.proxy_port[0],
455 		    r->nat.proxy_port[1], &sn)) {
456 			DPFPRINTF(PF_DEBUG_MISC,
457 			    ("pf: NAT proxy port allocation "
458 			    "(%u-%u) failed\n",
459 			    r->nat.proxy_port[0],
460 			    r->nat.proxy_port[1]));
461 			return (-1);
462 		}
463 		PF_ACPY(saddr, &naddr, pd->af);
464 		if (nport)
465 			*sport = nport;
466 	}
467 	if (!TAILQ_EMPTY(&r->rdr.list)) {
468 		if (pf_map_addr(pd->af, r, saddr, &naddr, NULL, &sn, &r->rdr))
469 			return (-1);
470 		if ((r->rdr.opts & PF_POOL_TYPEMASK) == PF_POOL_BITMASK)
471 			PF_POOLMASK(&naddr, &naddr,  &r->rdr.cur->addr.v.a.mask,
472 			    daddr, pd->af);
473 
474 			if (r->rdr.proxy_port[1]) {
475 				u_int32_t	tmp_nport;
476 
477 				tmp_nport = ((ntohs(*dport) -
478 				    ntohs(r->dst.port[0])) %
479 				    (r->rdr.proxy_port[1] -
480 				    r->rdr.proxy_port[0] + 1)) +
481 				    r->rdr.proxy_port[0];
482 
483 				/* wrap around if necessary */
484 				if (tmp_nport > 65535)
485 					tmp_nport -= 65535;
486 				nport = htons((u_int16_t)tmp_nport);
487 			} else if (r->rdr.proxy_port[0])
488 				nport = htons(r->rdr.proxy_port[0]);
489 
490 		PF_ACPY(daddr, &naddr, pd->af);
491 		if (nport)
492 			*dport = nport;
493 	}
494 
495 	return (0);
496 }
497 
498