1 /* $NetBSD: ip_state.c,v 1.12 2020/04/18 17:02:00 christos Exp $ */
2
3 /*
4 * Copyright (C) 2012 by Darren Reed.
5 *
6 * See the IPFILTER.LICENCE file for details on licencing.
7 *
8 * Id: ip_state.c,v 1.1.1.2 2012/07/22 13:45:37 darrenr Exp
9 */
10 #if defined(KERNEL) || defined(_KERNEL)
11 # undef KERNEL
12 # undef _KERNEL
13 # define KERNEL 1
14 # define _KERNEL 1
15 #endif
16 #include <sys/errno.h>
17 #include <sys/types.h>
18 #include <sys/param.h>
19 #include <sys/file.h>
20 #if defined(_KERNEL) && defined(__FreeBSD_version) && \
21 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE)
22 #include "opt_inet6.h"
23 #endif
24 #if !defined(_KERNEL) && !defined(__KERNEL__)
25 # include <stdio.h>
26 # include <stdlib.h>
27 # include <string.h>
28 # define _KERNEL
29 # ifdef __OpenBSD__
30 struct file;
31 # endif
32 # include <sys/uio.h>
33 # undef _KERNEL
34 #endif
35 #if defined(_KERNEL) && (__FreeBSD_version >= 220000)
36 # include <sys/filio.h>
37 # include <sys/fcntl.h>
38 #else
39 # include <sys/ioctl.h>
40 #endif
41 #include <sys/time.h>
42 #if !defined(linux)
43 # include <sys/protosw.h>
44 #endif
45 #include <sys/socket.h>
46 #if defined(_KERNEL)
47 # include <sys/systm.h>
48 # if !defined(__SVR4) && !defined(__svr4__)
49 # include <sys/mbuf.h>
50 # endif
51 #endif
52 #if defined(__SVR4) || defined(__svr4__)
53 # include <sys/filio.h>
54 # include <sys/byteorder.h>
55 # ifdef _KERNEL
56 # include <sys/dditypes.h>
57 # endif
58 # include <sys/stream.h>
59 # include <sys/kmem.h>
60 #endif
61
62 #include <net/if.h>
63 #ifdef sun
64 # include <net/af.h>
65 #endif
66 #include <netinet/in.h>
67 #include <netinet/in_systm.h>
68 #include <netinet/ip.h>
69 #include <netinet/tcp.h>
70 #if !defined(__hpux) && !defined(linux)
71 # include <netinet/tcp_fsm.h>
72 #endif
73 #include <netinet/udp.h>
74 #include <netinet/ip_icmp.h>
75 #if !defined(_KERNEL)
76 # include "ipf.h"
77 #endif
78 #include "netinet/ip_compat.h"
79 #include "netinet/ip_fil.h"
80 #include "netinet/ip_nat.h"
81 #include "netinet/ip_frag.h"
82 #include "netinet/ip_state.h"
83 #include "netinet/ip_proxy.h"
84 #include "netinet/ip_lookup.h"
85 #include "netinet/ip_dstlist.h"
86 #include "netinet/ip_sync.h"
87 #ifdef USE_INET6
88 #include <netinet/icmp6.h>
89 #endif
90 #if FREEBSD_GE_REV(300000)
91 # include <sys/malloc.h>
92 # if defined(_KERNEL) && !defined(IPFILTER_LKM)
93 # include <sys/libkern.h>
94 # include <sys/systm.h>
95 # endif
96 #endif
97 /* END OF INCLUDES */
98
99
100 #if !defined(lint)
101 #if defined(__NetBSD__)
102 #include <sys/cdefs.h>
103 __KERNEL_RCSID(0, "$NetBSD: ip_state.c,v 1.12 2020/04/18 17:02:00 christos Exp $");
104 #else
105 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed";
106 static const char rcsid[] = "@(#)Id: ip_state.c,v 1.1.1.2 2012/07/22 13:45:37 darrenr Exp";
107 #endif
108 #endif
109
110
111 static const ipftuneable_t ipf_state_tuneables[] = {
112 { { (void *)offsetof(ipf_state_softc_t, ipf_state_max) },
113 "state_max", 1, 0x7fffffff,
114 stsizeof(ipf_state_softc_t, ipf_state_max),
115 0, NULL, NULL },
116 { { (void *)offsetof(ipf_state_softc_t, ipf_state_size) },
117 "state_size", 1, 0x7fffffff,
118 stsizeof(ipf_state_softc_t, ipf_state_size),
119 0, NULL, ipf_state_rehash },
120 { { (void *)offsetof(ipf_state_softc_t, ipf_state_lock) },
121 "state_lock", 0, 1,
122 stsizeof(ipf_state_softc_t, ipf_state_lock),
123 IPFT_RDONLY, NULL, NULL },
124 { { (void *)offsetof(ipf_state_softc_t, ipf_state_maxbucket) },
125 "state_maxbucket", 1, 0x7fffffff,
126 stsizeof(ipf_state_softc_t, ipf_state_maxbucket),
127 0, NULL, NULL },
128 { { (void *)offsetof(ipf_state_softc_t, ipf_state_logging) },
129 "state_logging",0, 1,
130 stsizeof(ipf_state_softc_t, ipf_state_logging),
131 0, NULL, NULL },
132 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_high) },
133 "state_wm_high",2, 100,
134 stsizeof(ipf_state_softc_t, ipf_state_wm_high),
135 0, NULL, NULL },
136 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_low) },
137 "state_wm_low", 1, 99,
138 stsizeof(ipf_state_softc_t, ipf_state_wm_low),
139 0, NULL, NULL },
140 { { (void *)offsetof(ipf_state_softc_t, ipf_state_wm_freq) },
141 "state_wm_freq",2, 999999,
142 stsizeof(ipf_state_softc_t, ipf_state_wm_freq),
143 0, NULL, NULL },
144 { { NULL },
145 NULL, 0, 0,
146 0,
147 0, NULL, NULL }
148 };
149
150 #define SINCL(x) ATOMIC_INCL(softs->x)
151 #define SBUMP(x) (softs->x)++
152 #define SBUMPD(x, y) do { (softs->x.y)++; DT(y); } while (0)
153 #define SBUMPDX(x, y, z)do { (softs->x.y)++; DT(z); } while (0)
154
155 #ifdef USE_INET6
156 static ipstate_t *ipf_checkicmp6matchingstate(fr_info_t *);
157 #endif
158 static int ipf_allowstateicmp(fr_info_t *, ipstate_t *, i6addr_t *);
159 static ipstate_t *ipf_matchsrcdst(fr_info_t *, ipstate_t *, i6addr_t *,
160 i6addr_t *, tcphdr_t *, u_32_t);
161 static ipstate_t *ipf_checkicmpmatchingstate(fr_info_t *);
162 static int ipf_state_flush_entry(ipf_main_softc_t *, void *);
163 static ips_stat_t *ipf_state_stats(ipf_main_softc_t *);
164 static int ipf_state_del(ipf_main_softc_t *, ipstate_t *, int);
165 static int ipf_state_remove(ipf_main_softc_t *, void *);
166 static int ipf_state_match(ipstate_t *is1, ipstate_t *is2);
167 static int ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2);
168 static int ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2);
169 static int ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2);
170 static int ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2);
171 static int ipf_state_matchports(udpinfo_t *is1, udpinfo_t *is2);
172 static int ipf_state_matcharray(ipstate_t *, int *, u_long);
173 static void ipf_ipsmove(ipf_state_softc_t *, ipstate_t *, u_int);
174 static int ipf_state_tcp(ipf_main_softc_t *, ipf_state_softc_t *,
175 fr_info_t *, tcphdr_t *, ipstate_t *);
176 static int ipf_tcpoptions(ipf_state_softc_t *, fr_info_t *,
177 tcphdr_t *, tcpdata_t *);
178 static ipstate_t *ipf_state_clone(fr_info_t *, tcphdr_t *, ipstate_t *);
179 static void ipf_fixinisn(fr_info_t *, ipstate_t *);
180 static void ipf_fixoutisn(fr_info_t *, ipstate_t *);
181 static void ipf_checknewisn(fr_info_t *, ipstate_t *);
182 static int ipf_state_iter(ipf_main_softc_t *, ipftoken_t *,
183 ipfgeniter_t *, ipfobj_t *);
184 static int ipf_state_gettable(ipf_main_softc_t *, ipf_state_softc_t *, char *);
185 static int ipf_state_tcpinwindow(struct fr_info *, struct tcpdata *,
186 struct tcpdata *, tcphdr_t *, int);
187
188 static int ipf_state_getent(ipf_main_softc_t *, ipf_state_softc_t *, void *);
189 static int ipf_state_putent(ipf_main_softc_t *, ipf_state_softc_t *, void *);
190
191 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */
192 #define FIVE_DAYS (5 * ONE_DAY)
193 #define DOUBLE_HASH(x) (((x) + softs->ipf_state_seed[(x) % \
194 softs->ipf_state_size]) % softs->ipf_state_size)
195
196
197 /* ------------------------------------------------------------------------ */
198 /* Function: ipf_state_main_load */
199 /* Returns: int - 0 == success, -1 == failure */
200 /* Parameters: Nil */
201 /* */
202 /* A null-op function that exists as a placeholder so that the flow in */
203 /* other functions is obvious. */
204 /* ------------------------------------------------------------------------ */
205 int
ipf_state_main_load(void)206 ipf_state_main_load(void)
207 {
208 return 0;
209 }
210
211
212 /* ------------------------------------------------------------------------ */
213 /* Function: ipf_state_main_unload */
214 /* Returns: int - 0 == success, -1 == failure */
215 /* Parameters: Nil */
216 /* */
217 /* A null-op function that exists as a placeholder so that the flow in */
218 /* other functions is obvious. */
219 /* ------------------------------------------------------------------------ */
220 int
ipf_state_main_unload(void)221 ipf_state_main_unload(void)
222 {
223 return 0;
224 }
225
226
227 /* ------------------------------------------------------------------------ */
228 /* Function: ipf_state_soft_create */
229 /* Returns: void * - NULL = failure, else pointer to soft context */
230 /* Parameters: softc(I) - pointer to soft context main structure */
231 /* */
232 /* Create a new state soft context structure and populate it with the list */
233 /* of tunables and other default settings. */
234 /* ------------------------------------------------------------------------ */
235 void *
ipf_state_soft_create(ipf_main_softc_t * softc)236 ipf_state_soft_create(ipf_main_softc_t *softc)
237 {
238 ipf_state_softc_t *softs;
239
240 KMALLOC(softs, ipf_state_softc_t *);
241 if (softs == NULL)
242 return NULL;
243
244 bzero((char *)softs, sizeof(*softs));
245
246 softs->ipf_state_tune = ipf_tune_array_copy(softs,
247 sizeof(ipf_state_tuneables),
248 ipf_state_tuneables);
249 if (softs->ipf_state_tune == NULL) {
250 ipf_state_soft_destroy(softc, softs);
251 return NULL;
252 }
253 if (ipf_tune_array_link(softc, softs->ipf_state_tune) == -1) {
254 ipf_state_soft_destroy(softc, softs);
255 return NULL;
256 }
257
258 #ifdef IPFILTER_LOG
259 softs->ipf_state_logging = 1;
260 #else
261 softs->ipf_state_logging = 0;
262 #endif
263 softs->ipf_state_size = IPSTATE_SIZE,
264 softs->ipf_state_maxbucket = 0;
265 softs->ipf_state_wm_freq = IPF_TTLVAL(10);
266 softs->ipf_state_max = IPSTATE_MAX;
267 softs->ipf_state_wm_last = 0;
268 softs->ipf_state_wm_high = 99;
269 softs->ipf_state_wm_low = 90;
270 softs->ipf_state_inited = 0;
271 softs->ipf_state_lock = 0;
272 softs->ipf_state_doflush = 0;
273
274 return softs;
275 }
276
277
278 /* ------------------------------------------------------------------------ */
279 /* Function: ipf_state_soft_destroy */
280 /* Returns: Nil */
281 /* Parameters: softc(I) - pointer to soft context main structure */
282 /* arg(I) - pointer to local context to use */
283 /* */
284 /* Undo only what we did in soft create: unlink and free the tunables and */
285 /* free the soft context structure itself. */
286 /* ------------------------------------------------------------------------ */
287 void
ipf_state_soft_destroy(ipf_main_softc_t * softc,void * arg)288 ipf_state_soft_destroy(ipf_main_softc_t *softc, void *arg)
289 {
290 ipf_state_softc_t *softs = arg;
291
292 if (softs->ipf_state_tune != NULL) {
293 ipf_tune_array_unlink(softc, softs->ipf_state_tune);
294 KFREES(softs->ipf_state_tune, sizeof(ipf_state_tuneables));
295 softs->ipf_state_tune = NULL;
296 }
297
298 KFREE(softs);
299 }
300
301 static void *
ipf_state_seed_alloc(u_int state_size,u_int state_max)302 ipf_state_seed_alloc(u_int state_size, u_int state_max)
303 {
304 u_int i;
305 u_long *state_seed;
306 KMALLOCS(state_seed, u_long *, state_size * sizeof(*state_seed));
307 if (state_seed == NULL)
308 return NULL;
309
310 for (i = 0; i < state_size; i++) {
311 /*
312 * XXX - ipf_state_seed[X] should be a random number of sorts.
313 */
314 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL)
315 state_seed[i] = cprng_fast32();
316 #else
317 state_seed[i] = ((u_long)state_seed + i) * state_size;
318 state_seed[i] ^= 0xa5a55a5a;
319 state_seed[i] *= (u_long)state_seed;
320 state_seed[i] ^= 0x5a5aa5a5;
321 state_seed[i] *= state_max;
322 #endif
323 }
324 return state_seed;
325 }
326
327
328 /* ------------------------------------------------------------------------ */
329 /* Function: ipf_state_soft_init */
330 /* Returns: int - 0 == success, -1 == failure */
331 /* Parameters: softc(I) - pointer to soft context main structure */
332 /* arg(I) - pointer to local context to use */
333 /* */
334 /* Initialise the state soft context structure so it is ready for use. */
335 /* This involves: */
336 /* - allocating a hash table and zero'ing it out */
337 /* - building a secondary table of seeds for double hashing to make it more */
338 /* difficult to attempt to attack the hash table itself (for DoS) */
339 /* - initialise all of the timeout queues, including a table for TCP, some */
340 /* pairs of query/response for UDP and other IP protocols (typically the */
341 /* reply queue has a shorter timeout than the query) */
342 /* ------------------------------------------------------------------------ */
343 int
ipf_state_soft_init(ipf_main_softc_t * softc,void * arg)344 ipf_state_soft_init(ipf_main_softc_t *softc, void *arg)
345 {
346 ipf_state_softc_t *softs = arg;
347 int i;
348
349 KMALLOCS(softs->ipf_state_table,
350 ipstate_t **, softs->ipf_state_size * sizeof(ipstate_t *));
351 if (softs->ipf_state_table == NULL)
352 return -1;
353
354 bzero((char *)softs->ipf_state_table,
355 softs->ipf_state_size * sizeof(ipstate_t *));
356
357 softs->ipf_state_seed = ipf_state_seed_alloc(softs->ipf_state_size,
358 softs->ipf_state_max);
359 if (softs->ipf_state_seed == NULL)
360 return -2;
361
362 KMALLOCS(softs->ipf_state_stats.iss_bucketlen, u_int *,
363 softs->ipf_state_size * sizeof(u_int));
364 if (softs->ipf_state_stats.iss_bucketlen == NULL)
365 return -3;
366
367 bzero((char *)softs->ipf_state_stats.iss_bucketlen,
368 softs->ipf_state_size * sizeof(u_int));
369
370 if (softs->ipf_state_maxbucket == 0) {
371 for (i = softs->ipf_state_size; i > 0; i >>= 1)
372 softs->ipf_state_maxbucket++;
373 softs->ipf_state_maxbucket *= 2;
374 }
375
376 ipf_sttab_init(softc, softs->ipf_state_tcptq);
377 softs->ipf_state_stats.iss_tcptab = softs->ipf_state_tcptq;
378 softs->ipf_state_tcptq[IPF_TCP_NSTATES - 1].ifq_next =
379 &softs->ipf_state_udptq;
380
381 IPFTQ_INIT(&softs->ipf_state_udptq, softc->ipf_udptimeout,
382 "ipftq udp tab");
383 softs->ipf_state_udptq.ifq_next = &softs->ipf_state_udpacktq;
384
385 IPFTQ_INIT(&softs->ipf_state_udpacktq, softc->ipf_udpacktimeout,
386 "ipftq udpack tab");
387 softs->ipf_state_udpacktq.ifq_next = &softs->ipf_state_icmptq;
388
389 IPFTQ_INIT(&softs->ipf_state_icmptq, softc->ipf_icmptimeout,
390 "ipftq icmp tab");
391 softs->ipf_state_icmptq.ifq_next = &softs->ipf_state_icmpacktq;
392
393 IPFTQ_INIT(&softs->ipf_state_icmpacktq, softc->ipf_icmpacktimeout,
394 "ipftq icmpack tab");
395 softs->ipf_state_icmpacktq.ifq_next = &softs->ipf_state_iptq;
396
397 IPFTQ_INIT(&softs->ipf_state_iptq, softc->ipf_iptimeout,
398 "ipftq iptimeout tab");
399 softs->ipf_state_iptq.ifq_next = &softs->ipf_state_pending;
400
401 IPFTQ_INIT(&softs->ipf_state_pending, IPF_HZ_DIVIDE, "ipftq pending");
402 softs->ipf_state_pending.ifq_next = &softs->ipf_state_deletetq;
403
404 IPFTQ_INIT(&softs->ipf_state_deletetq, 1, "ipftq delete");
405 softs->ipf_state_deletetq.ifq_next = NULL;
406
407 MUTEX_INIT(&softs->ipf_stinsert, "ipf state insert mutex");
408
409
410 softs->ipf_state_wm_last = softc->ipf_ticks;
411 softs->ipf_state_inited = 1;
412
413 return 0;
414 }
415
416
417 /* ------------------------------------------------------------------------ */
418 /* Function: ipf_state_soft_fini */
419 /* Returns: int - 0 = success, -1 = failure */
420 /* Parameters: softc(I) - pointer to soft context main structure */
421 /* arg(I) - pointer to local context to use */
422 /* */
423 /* Release and destroy any resources acquired or initialised so that */
424 /* IPFilter can be unloaded or re-initialised. */
425 /* ------------------------------------------------------------------------ */
426 int
ipf_state_soft_fini(ipf_main_softc_t * softc,void * arg)427 ipf_state_soft_fini(ipf_main_softc_t *softc, void *arg)
428 {
429 ipf_state_softc_t *softs = arg;
430 ipftq_t *ifq, *ifqnext;
431 ipstate_t *is;
432
433 while ((is = softs->ipf_state_list) != NULL)
434 ipf_state_del(softc, is, ISL_UNLOAD);
435
436 /*
437 * Proxy timeout queues are not cleaned here because although they
438 * exist on the state list, appr_unload is called after
439 * ipf_state_unload and the proxies actually are responsible for them
440 * being created. Should the proxy timeouts have their own list?
441 * There's no real justification as this is the only complication.
442 */
443 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
444 ifqnext = ifq->ifq_next;
445
446 if (ipf_deletetimeoutqueue(ifq) == 0)
447 ipf_freetimeoutqueue(softc, ifq);
448 }
449
450 softs->ipf_state_stats.iss_inuse = 0;
451 softs->ipf_state_stats.iss_active = 0;
452
453 if (softs->ipf_state_inited == 1) {
454 softs->ipf_state_inited = 0;
455 ipf_sttab_destroy(softs->ipf_state_tcptq);
456 MUTEX_DESTROY(&softs->ipf_state_udptq.ifq_lock);
457 MUTEX_DESTROY(&softs->ipf_state_icmptq.ifq_lock);
458 MUTEX_DESTROY(&softs->ipf_state_udpacktq.ifq_lock);
459 MUTEX_DESTROY(&softs->ipf_state_icmpacktq.ifq_lock);
460 MUTEX_DESTROY(&softs->ipf_state_iptq.ifq_lock);
461 MUTEX_DESTROY(&softs->ipf_state_deletetq.ifq_lock);
462 MUTEX_DESTROY(&softs->ipf_state_pending.ifq_lock);
463 MUTEX_DESTROY(&softs->ipf_stinsert);
464 }
465
466 if (softs->ipf_state_table != NULL) {
467 KFREES(softs->ipf_state_table,
468 softs->ipf_state_size * sizeof(*softs->ipf_state_table));
469 softs->ipf_state_table = NULL;
470 }
471
472 if (softs->ipf_state_seed != NULL) {
473 KFREES(softs->ipf_state_seed,
474 softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
475 softs->ipf_state_seed = NULL;
476 }
477
478 if (softs->ipf_state_stats.iss_bucketlen != NULL) {
479 KFREES(softs->ipf_state_stats.iss_bucketlen,
480 softs->ipf_state_size * sizeof(u_int));
481 softs->ipf_state_stats.iss_bucketlen = NULL;
482 }
483
484 return 0;
485 }
486
487
488 /* ------------------------------------------------------------------------ */
489 /* Function: ipf_state_set_lock */
490 /* Returns: Nil */
491 /* Parameters: arg(I) - pointer to local context to use */
492 /* tmp(I) - new value for lock */
493 /* */
494 /* Stub function that allows for external manipulation of ipf_state_lock */
495 /* ------------------------------------------------------------------------ */
496 void
ipf_state_setlock(void * arg,int tmp)497 ipf_state_setlock(void *arg, int tmp)
498 {
499 ipf_state_softc_t *softs = arg;
500
501 softs->ipf_state_lock = tmp;
502 }
503
504
505 /* ------------------------------------------------------------------------ */
506 /* Function: ipf_state_stats */
507 /* Returns: ips_state_t* - pointer to state stats structure */
508 /* Parameters: softc(I) - pointer to soft context main structure */
509 /* */
510 /* Put all the current numbers and pointers into a single struct and return */
511 /* a pointer to it. */
512 /* ------------------------------------------------------------------------ */
513 static ips_stat_t *
ipf_state_stats(ipf_main_softc_t * softc)514 ipf_state_stats(ipf_main_softc_t *softc)
515 {
516 ipf_state_softc_t *softs = softc->ipf_state_soft;
517 ips_stat_t *issp = &softs->ipf_state_stats;
518
519 issp->iss_state_size = softs->ipf_state_size;
520 issp->iss_state_max = softs->ipf_state_max;
521 issp->iss_table = softs->ipf_state_table;
522 issp->iss_list = softs->ipf_state_list;
523 issp->iss_ticks = softc->ipf_ticks;
524
525 #ifdef IPFILTER_LOGGING
526 issp->iss_log_ok = ipf_log_logok(softc, IPF_LOGSTATE);
527 issp->iss_log_fail = ipf_log_failures(softc, IPF_LOGSTATE);
528 #else
529 issp->iss_log_ok = 0;
530 issp->iss_log_fail = 0;
531 #endif
532 return issp;
533 }
534
535 /* ------------------------------------------------------------------------ */
536 /* Function: ipf_state_remove */
537 /* Returns: int - 0 == success, != 0 == failure */
538 /* Parameters: softc(I) - pointer to soft context main structure */
539 /* data(I) - pointer to state structure to delete from table */
540 /* */
541 /* Search for a state structure that matches the one passed, according to */
542 /* the IP addresses and other protocol specific information. */
543 /* ------------------------------------------------------------------------ */
544 static int
ipf_state_remove(ipf_main_softc_t * softc,void * data)545 ipf_state_remove(ipf_main_softc_t *softc, void *data)
546 {
547 ipf_state_softc_t *softs = softc->ipf_state_soft;
548 ipstate_t *sp, st;
549 int error;
550
551 sp = &st;
552 error = ipf_inobj(softc, data, NULL, &st, IPFOBJ_IPSTATE);
553 if (error)
554 return EFAULT;
555
556 WRITE_ENTER(&softc->ipf_state);
557 for (sp = softs->ipf_state_list; sp; sp = sp->is_next)
558 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) &&
559 !bcmp((void *)&sp->is_src, (void *)&st.is_src,
560 sizeof(st.is_src)) &&
561 !bcmp((void *)&sp->is_dst, (void *)&st.is_src,
562 sizeof(st.is_dst)) &&
563 !bcmp((void *)&sp->is_ps, (void *)&st.is_ps,
564 sizeof(st.is_ps))) {
565 ipf_state_del(softc, sp, ISL_REMOVE);
566 RWLOCK_EXIT(&softc->ipf_state);
567 return 0;
568 }
569 RWLOCK_EXIT(&softc->ipf_state);
570
571 IPFERROR(100001);
572 return ESRCH;
573 }
574
575
576 /* ------------------------------------------------------------------------ */
577 /* Function: ipf_state_ioctl */
578 /* Returns: int - 0 == success, != 0 == failure */
579 /* Parameters: softc(I) - pointer to soft context main structure */
580 /* data(I) - pointer to ioctl data */
581 /* cmd(I) - ioctl command integer */
582 /* mode(I) - file mode bits used with open */
583 /* uid(I) - uid of process making the ioctl call */
584 /* ctx(I) - pointer specific to context of the call */
585 /* */
586 /* Processes an ioctl call made to operate on the IP Filter state device. */
587 /* ------------------------------------------------------------------------ */
588 int
ipf_state_ioctl(ipf_main_softc_t * softc,void * data,ioctlcmd_t cmd,int mode,int uid,void * ctx)589 ipf_state_ioctl(ipf_main_softc_t *softc, void *data, ioctlcmd_t cmd, int mode,
590 int uid, void *ctx)
591 {
592 ipf_state_softc_t *softs = softc->ipf_state_soft;
593 int arg, ret, error = 0;
594 SPL_INT(s);
595
596 switch (cmd)
597 {
598 /*
599 * Delete an entry from the state table.
600 */
601 case SIOCDELST :
602 error = ipf_state_remove(softc, data);
603 break;
604
605 /*
606 * Flush the state table
607 */
608 case SIOCIPFFL :
609 error = BCOPYIN(data, &arg, sizeof(arg));
610 if (error != 0) {
611 IPFERROR(100002);
612 error = EFAULT;
613
614 } else {
615 WRITE_ENTER(&softc->ipf_state);
616 ret = ipf_state_flush(softc, arg, 4);
617 RWLOCK_EXIT(&softc->ipf_state);
618
619 error = BCOPYOUT(&ret, data, sizeof(ret));
620 if (error != 0) {
621 IPFERROR(100003);
622 error = EFAULT;
623 }
624 }
625 break;
626
627 #ifdef USE_INET6
628 case SIOCIPFL6 :
629 error = BCOPYIN(data, &arg, sizeof(arg));
630 if (error != 0) {
631 IPFERROR(100004);
632 error = EFAULT;
633
634 } else {
635 WRITE_ENTER(&softc->ipf_state);
636 ret = ipf_state_flush(softc, arg, 6);
637 RWLOCK_EXIT(&softc->ipf_state);
638
639 error = BCOPYOUT(&ret, data, sizeof(ret));
640 if (error != 0) {
641 IPFERROR(100005);
642 error = EFAULT;
643 }
644 }
645 break;
646 #endif
647
648 case SIOCMATCHFLUSH :
649 WRITE_ENTER(&softc->ipf_state);
650 error = ipf_state_matchflush(softc, data);
651 RWLOCK_EXIT(&softc->ipf_state);
652 break;
653
654 #ifdef IPFILTER_LOG
655 /*
656 * Flush the state log.
657 */
658 case SIOCIPFFB :
659 if (!(mode & FWRITE)) {
660 IPFERROR(100008);
661 error = EPERM;
662 } else {
663 int tmp;
664
665 tmp = ipf_log_clear(softc, IPL_LOGSTATE);
666 error = BCOPYOUT(&tmp, data, sizeof(tmp));
667 if (error != 0) {
668 IPFERROR(100009);
669 error = EFAULT;
670 }
671 }
672 break;
673
674 /*
675 * Turn logging of state information on/off.
676 */
677 case SIOCSETLG :
678 if (!(mode & FWRITE)) {
679 IPFERROR(100010);
680 error = EPERM;
681 } else {
682 error = BCOPYIN(data, &softs->ipf_state_logging,
683 sizeof(softs->ipf_state_logging));
684 if (error != 0) {
685 IPFERROR(100011);
686 error = EFAULT;
687 }
688 }
689 break;
690
691 /*
692 * Return the current state of logging.
693 */
694 case SIOCGETLG :
695 error = BCOPYOUT(&softs->ipf_state_logging, data,
696 sizeof(softs->ipf_state_logging));
697 if (error != 0) {
698 IPFERROR(100012);
699 error = EFAULT;
700 }
701 break;
702
703 /*
704 * Return the number of bytes currently waiting to be read.
705 */
706 case FIONREAD :
707 arg = ipf_log_bytesused(softc, IPL_LOGSTATE);
708 error = BCOPYOUT(&arg, data, sizeof(arg));
709 if (error != 0) {
710 IPFERROR(100013);
711 error = EFAULT;
712 }
713 break;
714 #endif
715
716 /*
717 * Get the current state statistics.
718 */
719 case SIOCGETFS :
720 error = ipf_outobj(softc, data, ipf_state_stats(softc),
721 IPFOBJ_STATESTAT);
722 break;
723
724 /*
725 * Lock/Unlock the state table. (Locking prevents any changes, which
726 * means no packets match).
727 */
728 case SIOCSTLCK :
729 if (!(mode & FWRITE)) {
730 IPFERROR(100014);
731 error = EPERM;
732 } else {
733 error = ipf_lock(data, &softs->ipf_state_lock);
734 }
735 break;
736
737 /*
738 * Add an entry to the current state table.
739 */
740 case SIOCSTPUT :
741 if (!softs->ipf_state_lock || !(mode &FWRITE)) {
742 IPFERROR(100015);
743 error = EACCES;
744 break;
745 }
746 error = ipf_state_putent(softc, softs, data);
747 break;
748
749 /*
750 * Get a state table entry.
751 */
752 case SIOCSTGET :
753 if (!softs->ipf_state_lock) {
754 IPFERROR(100016);
755 error = EACCES;
756 break;
757 }
758 error = ipf_state_getent(softc, softs, data);
759 break;
760
761 /*
762 * Return a copy of the hash table bucket lengths
763 */
764 case SIOCSTAT1 :
765 error = BCOPYOUT(softs->ipf_state_stats.iss_bucketlen, data,
766 softs->ipf_state_size * sizeof(u_int));
767 if (error != 0) {
768 IPFERROR(100017);
769 error = EFAULT;
770 }
771 break;
772
773 case SIOCGENITER :
774 {
775 ipftoken_t *token;
776 ipfgeniter_t iter;
777 ipfobj_t obj;
778
779 error = ipf_inobj(softc, data, &obj, &iter, IPFOBJ_GENITER);
780 if (error != 0)
781 break;
782
783 SPL_SCHED(s);
784 token = ipf_token_find(softc, IPFGENITER_STATE, uid, ctx);
785 if (token != NULL) {
786 error = ipf_state_iter(softc, token, &iter, &obj);
787 WRITE_ENTER(&softc->ipf_tokens);
788 ipf_token_deref(softc, token);
789 RWLOCK_EXIT(&softc->ipf_tokens);
790 } else {
791 IPFERROR(100018);
792 error = ESRCH;
793 }
794 SPL_X(s);
795 break;
796 }
797
798 case SIOCGTABL :
799 error = ipf_state_gettable(softc, softs, data);
800 break;
801
802 case SIOCIPFDELTOK :
803 error = BCOPYIN(data, &arg, sizeof(arg));
804 if (error != 0) {
805 IPFERROR(100019);
806 error = EFAULT;
807 } else {
808 SPL_SCHED(s);
809 error = ipf_token_del(softc, arg, uid, ctx);
810 SPL_X(s);
811 }
812 break;
813
814 case SIOCGTQTAB :
815 error = ipf_outobj(softc, data, softs->ipf_state_tcptq,
816 IPFOBJ_STATETQTAB);
817 break;
818
819 default :
820 IPFERROR(100020);
821 error = EINVAL;
822 break;
823 }
824 return error;
825 }
826
827
828 /* ------------------------------------------------------------------------ */
829 /* Function: ipf_state_getent */
830 /* Returns: int - 0 == success, != 0 == failure */
831 /* Parameters: softc(I) - pointer to soft context main structure */
832 /* softs(I) - pointer to state context structure */
833 /* data(I) - pointer to state structure to retrieve from table*/
834 /* */
835 /* Copy out state information from the kernel to a user space process. If */
836 /* there is a filter rule associated with the state entry, copy that out */
837 /* as well. The entry to copy out is taken from the value of "ips_next" in */
838 /* the struct passed in and if not null and not found in the list of current*/
839 /* state entries, the retrieval fails. */
840 /* ------------------------------------------------------------------------ */
841 static int
ipf_state_getent(ipf_main_softc_t * softc,ipf_state_softc_t * softs,void * data)842 ipf_state_getent(ipf_main_softc_t *softc, ipf_state_softc_t *softs, void *data)
843 {
844 ipstate_t *is, *isn;
845 ipstate_save_t ips;
846 int error;
847
848 error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
849 if (error)
850 return EFAULT;
851
852 READ_ENTER(&softc->ipf_state);
853 isn = ips.ips_next;
854 if (isn == NULL) {
855 isn = softs->ipf_state_list;
856 if (isn == NULL) {
857 if (ips.ips_next == NULL) {
858 RWLOCK_EXIT(&softc->ipf_state);
859 IPFERROR(100021);
860 return ENOENT;
861 }
862 return 0;
863 }
864 } else {
865 /*
866 * Make sure the pointer we're copying from exists in the
867 * current list of entries. Security precaution to prevent
868 * copying of random kernel data.
869 */
870 for (is = softs->ipf_state_list; is; is = is->is_next)
871 if (is == isn)
872 break;
873 if (!is) {
874 RWLOCK_EXIT(&softc->ipf_state);
875 IPFERROR(100022);
876 return ESRCH;
877 }
878 }
879 ips.ips_next = isn->is_next;
880 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is));
881 ips.ips_rule = isn->is_rule;
882 if (isn->is_rule != NULL)
883 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr,
884 sizeof(ips.ips_fr));
885 RWLOCK_EXIT(&softc->ipf_state);
886 error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
887 return error;
888 }
889
890
891 /* ------------------------------------------------------------------------ */
892 /* Function: ipf_state_putent */
893 /* Returns: int - 0 == success, != 0 == failure */
894 /* Parameters: softc(I) - pointer to soft context main structure */
895 /* softs(I) - pointer to state context structure */
896 /* data(I) - pointer to state information struct */
897 /* */
898 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */
899 /* the state table. If the state info. includes a pointer to a filter rule */
900 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */
901 /* output. */
902 /* ------------------------------------------------------------------------ */
903 int
ipf_state_putent(ipf_main_softc_t * softc,ipf_state_softc_t * softs,void * data)904 ipf_state_putent(ipf_main_softc_t *softc, ipf_state_softc_t *softs, void *data)
905 {
906 ipstate_t *is, *isn;
907 ipstate_save_t ips;
908 int error, i;
909 frentry_t *fr;
910 char *name;
911
912 error = ipf_inobj(softc, data, NULL, &ips, IPFOBJ_STATESAVE);
913 if (error != 0)
914 return error;
915
916 KMALLOC(isn, ipstate_t *);
917 if (isn == NULL) {
918 IPFERROR(100023);
919 return ENOMEM;
920 }
921
922 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn));
923 bzero((char *)isn, offsetof(struct ipstate, is_pkts));
924 isn->is_sti.tqe_pnext = NULL;
925 isn->is_sti.tqe_next = NULL;
926 isn->is_sti.tqe_ifq = NULL;
927 isn->is_sti.tqe_parent = isn;
928 isn->is_ifp[0] = NULL;
929 isn->is_ifp[1] = NULL;
930 isn->is_ifp[2] = NULL;
931 isn->is_ifp[3] = NULL;
932 isn->is_sync = NULL;
933 fr = ips.ips_rule;
934
935 if (fr == NULL) {
936 int inserr;
937
938 READ_ENTER(&softc->ipf_state);
939 inserr = ipf_state_insert(softc, isn, 0);
940 MUTEX_EXIT(&isn->is_lock);
941 RWLOCK_EXIT(&softc->ipf_state);
942
943 return inserr;
944 }
945
946 if (isn->is_flags & SI_NEWFR) {
947 KMALLOC(fr, frentry_t *);
948 if (fr == NULL) {
949 KFREE(isn);
950 IPFERROR(100024);
951 return ENOMEM;
952 }
953 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr));
954 isn->is_rule = fr;
955 ips.ips_is.is_rule = fr;
956 MUTEX_NUKE(&fr->fr_lock);
957 MUTEX_INIT(&fr->fr_lock, "state filter rule lock");
958
959 /*
960 * Look up all the interface names in the rule.
961 */
962 for (i = 0; i < 4; i++) {
963 if (fr->fr_ifnames[i] == -1) {
964 fr->fr_ifas[i] = NULL;
965 continue;
966 }
967 name = fr->fr_names + fr->fr_ifnames[i];
968 fr->fr_ifas[i] = ipf_resolvenic(softc, name,
969 fr->fr_family);
970 }
971
972 for (i = 0; i < 4; i++) {
973 name = isn->is_ifname[i];
974 isn->is_ifp[i] = ipf_resolvenic(softc, name,
975 isn->is_v);
976 }
977
978 fr->fr_ref = 0;
979 fr->fr_dsize = 0;
980 fr->fr_data = NULL;
981 fr->fr_type = FR_T_NONE;
982
983 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[0],
984 fr->fr_family);
985 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_tifs[1],
986 fr->fr_family);
987 (void) ipf_resolvedest(softc, fr->fr_names, &fr->fr_dif,
988 fr->fr_family);
989
990 /*
991 * send a copy back to userland of what we ended up
992 * to allow for verification.
993 */
994 error = ipf_outobj(softc, data, &ips, IPFOBJ_STATESAVE);
995 if (error != 0) {
996 KFREE(isn);
997 MUTEX_DESTROY(&fr->fr_lock);
998 KFREE(fr);
999 IPFERROR(100025);
1000 return EFAULT;
1001 }
1002 READ_ENTER(&softc->ipf_state);
1003 error = ipf_state_insert(softc, isn, 0);
1004 MUTEX_EXIT(&isn->is_lock);
1005 RWLOCK_EXIT(&softc->ipf_state);
1006
1007 } else {
1008 READ_ENTER(&softc->ipf_state);
1009 for (is = softs->ipf_state_list; is; is = is->is_next)
1010 if (is->is_rule == fr) {
1011 error = ipf_state_insert(softc, isn, 0);
1012 MUTEX_EXIT(&isn->is_lock);
1013 break;
1014 }
1015
1016 if (is == NULL) {
1017 KFREE(isn);
1018 isn = NULL;
1019 }
1020 RWLOCK_EXIT(&softc->ipf_state);
1021
1022 if (isn == NULL) {
1023 IPFERROR(100033);
1024 error = ESRCH;
1025 }
1026 }
1027
1028 return error;
1029 }
1030
1031
1032 /* ------------------------------------------------------------------------ */
1033 /* Function: ipf_state_insert */
1034 /* Returns: int - 0 == success, -1 == failure */
1035 /* Parameters: softc(I) - pointer to soft context main structure */
1036 /* Parameters: is(I) - pointer to state structure */
1037 /* rev(I) - flag indicating direction of packet */
1038 /* */
1039 /* Inserts a state structure into the hash table (for lookups) and the list */
1040 /* of state entries (for enumeration). Resolves all of the interface names */
1041 /* to pointers and adjusts running stats for the hash table as appropriate. */
1042 /* */
1043 /* This function can fail if the filter rule has had a population policy of */
1044 /* IP addresses used with stateful filtering assigned to it. */
1045 /* */
1046 /* Locking: it is assumed that some kind of lock on ipf_state is held. */
1047 /* Exits with is_lock initialised and held - *EVEN IF ERROR*. */
1048 /* ------------------------------------------------------------------------ */
1049 int
ipf_state_insert(ipf_main_softc_t * softc,ipstate_t * is,int rev)1050 ipf_state_insert(ipf_main_softc_t *softc, ipstate_t *is, int rev)
1051 {
1052 ipf_state_softc_t *softs = softc->ipf_state_soft;
1053 frentry_t *fr;
1054 u_int hv;
1055 int i;
1056
1057 /*
1058 * Look up all the interface names in the state entry.
1059 */
1060 for (i = 0; i < 4; i++) {
1061 if (is->is_ifp[i] != NULL)
1062 continue;
1063 is->is_ifp[i] = ipf_resolvenic(softc, is->is_ifname[i],
1064 is->is_v);
1065 }
1066
1067 /*
1068 * If we could trust is_hv, then the modulus would not be needed,
1069 * but when running with IPFILTER_SYNC, this stops bad values.
1070 */
1071 hv = is->is_hv % softs->ipf_state_size;
1072 /* TRACE is, hv */
1073 is->is_hv = hv;
1074
1075 /*
1076 * We need to get both of these locks...the first because it is
1077 * possible that once the insert is complete another packet might
1078 * come along, match the entry and want to update it.
1079 */
1080 MUTEX_INIT(&is->is_lock, "ipf state entry");
1081 MUTEX_ENTER(&is->is_lock);
1082 MUTEX_ENTER(&softs->ipf_stinsert);
1083
1084 fr = is->is_rule;
1085 if (fr != NULL) {
1086 if ((fr->fr_srctrack.ht_max_nodes != 0) &&
1087 (ipf_ht_node_add(softc, &fr->fr_srctrack,
1088 is->is_family, &is->is_src) == -1)) {
1089 SBUMPD(ipf_state_stats, iss_max_track);
1090 MUTEX_EXIT(&softs->ipf_stinsert);
1091 return -1;
1092 }
1093
1094 MUTEX_ENTER(&fr->fr_lock);
1095 fr->fr_ref++;
1096 MUTEX_EXIT(&fr->fr_lock);
1097 fr->fr_statecnt++;
1098 }
1099
1100 if (is->is_flags & (SI_WILDP|SI_WILDA)) {
1101 DT(iss_wild_plus_one);
1102 SINCL(ipf_state_stats.iss_wild);
1103 }
1104
1105 SBUMP(ipf_state_stats.iss_proto[is->is_p]);
1106 SBUMP(ipf_state_stats.iss_active_proto[is->is_p]);
1107
1108 /*
1109 * add into list table.
1110 */
1111 if (softs->ipf_state_list != NULL)
1112 softs->ipf_state_list->is_pnext = &is->is_next;
1113 is->is_pnext = &softs->ipf_state_list;
1114 is->is_next = softs->ipf_state_list;
1115 softs->ipf_state_list = is;
1116
1117 if (softs->ipf_state_table[hv] != NULL)
1118 softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
1119 else
1120 softs->ipf_state_stats.iss_inuse++;
1121 is->is_phnext = softs->ipf_state_table + hv;
1122 is->is_hnext = softs->ipf_state_table[hv];
1123 softs->ipf_state_table[hv] = is;
1124 softs->ipf_state_stats.iss_bucketlen[hv]++;
1125 softs->ipf_state_stats.iss_active++;
1126 MUTEX_EXIT(&softs->ipf_stinsert);
1127
1128 ipf_state_setqueue(softc, is, rev);
1129
1130 return 0;
1131 }
1132
1133
1134 /* ------------------------------------------------------------------------ */
1135 /* Function: ipf_state_matchipv4addrs */
1136 /* Returns: int - 2 addresses match (strong match), 1 reverse match, */
1137 /* 0 no match */
1138 /* Parameters: is1, is2 pointers to states we are checking */
1139 /* */
1140 /* Function matches IPv4 addresses it returns strong match for ICMP proto */
1141 /* even there is only reverse match */
1142 /* ------------------------------------------------------------------------ */
1143 static int
ipf_state_matchipv4addrs(ipstate_t * is1,ipstate_t * is2)1144 ipf_state_matchipv4addrs(ipstate_t *is1, ipstate_t *is2)
1145 {
1146 int rv;
1147
1148 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr)
1149 rv = 2;
1150 else if (is1->is_saddr == is2->is_daddr &&
1151 is1->is_daddr == is2->is_saddr) {
1152 /* force strong match for ICMP protocol */
1153 rv = (is1->is_p == IPPROTO_ICMP) ? 2 : 1;
1154 }
1155 else
1156 rv = 0;
1157
1158 return (rv);
1159 }
1160
1161
1162 /* ------------------------------------------------------------------------ */
1163 /* Function: ipf_state_matchipv6addrs */
1164 /* Returns: int - 2 addresses match (strong match), 1 reverse match, */
1165 /* 0 no match */
1166 /* Parameters: is1, is2 pointers to states we are checking */
1167 /* */
1168 /* Function matches IPv6 addresses it returns strong match for ICMP proto */
1169 /* even there is only reverse match */
1170 /* ------------------------------------------------------------------------ */
1171 static int
ipf_state_matchipv6addrs(ipstate_t * is1,ipstate_t * is2)1172 ipf_state_matchipv6addrs(ipstate_t *is1, ipstate_t *is2)
1173 {
1174 int rv;
1175
1176 if (IP6_EQ(&is1->is_src, &is2->is_src) &&
1177 IP6_EQ(&is1->is_dst, &is2->is_dst))
1178 rv = 2;
1179 else if (IP6_EQ(&is1->is_src, &is2->is_dst) &&
1180 IP6_EQ(&is1->is_dst, &is2->is_src)) {
1181 /* force strong match for ICMPv6 protocol */
1182 rv = (is1->is_p == IPPROTO_ICMPV6) ? 2 : 1;
1183 }
1184 else
1185 rv = 0;
1186
1187 return (rv);
1188 }
1189
1190
1191 /* ------------------------------------------------------------------------ */
1192 /* Function: ipf_state_matchaddresses */
1193 /* Returns: int - 2 addresses match, 1 reverse match, zero no match */
1194 /* Parameters: is1, is2 pointers to states we are checking */
1195 /* */
1196 /* function retruns true if two pairs of addresses belong to single */
1197 /* connection. suppose there are two endpoints: */
1198 /* endpoint1 1.1.1.1 */
1199 /* endpoint2 1.1.1.2 */
1200 /* */
1201 /* the state is established by packet flying from .1 to .2 so we see: */
1202 /* is1->src = 1.1.1.1 */
1203 /* is1->dst = 1.1.1.2 */
1204 /* now endpoint 1.1.1.2 sends answer */
1205 /* retreives is1 record created by first packat and compares it with is2 */
1206 /* temporal record, is2 is initialized as follows: */
1207 /* is2->src = 1.1.1.2 */
1208 /* is2->dst = 1.1.1.1 */
1209 /* in this case 1 will be returned */
1210 /* */
1211 /* the ipf_matchaddresses() assumes those two records to be same. of course */
1212 /* the ipf_matchaddresses() also assume records are same in case you pass */
1213 /* identical arguments (i.e. ipf_matchaddress(is1, is1) would return 2 */
1214 /* ------------------------------------------------------------------------ */
1215 static int
ipf_state_matchaddresses(ipstate_t * is1,ipstate_t * is2)1216 ipf_state_matchaddresses(ipstate_t *is1, ipstate_t *is2)
1217 {
1218 int rv;
1219
1220 if (is1->is_v == 4) {
1221 rv = ipf_state_matchipv4addrs(is1, is2);
1222 }
1223 else {
1224 rv = ipf_state_matchipv6addrs(is1, is2);
1225 }
1226
1227 return (rv);
1228 }
1229
1230
1231 /* ------------------------------------------------------------------------ */
1232 /* Function: ipf_matchports */
1233 /* Returns: int - 2 match, 1 rverse match, 0 no match */
1234 /* Parameters: ppairs1, ppairs - src, dst ports we want to match */
1235 /* */
1236 /* performs the same match for isps members as for addresses */
1237 /* ------------------------------------------------------------------------ */
1238 static int
ipf_state_matchports(udpinfo_t * ppairs1,udpinfo_t * ppairs2)1239 ipf_state_matchports(udpinfo_t *ppairs1, udpinfo_t *ppairs2)
1240 {
1241 int rv;
1242
1243 if (ppairs1->us_sport == ppairs2->us_sport &&
1244 ppairs1->us_dport == ppairs2->us_dport)
1245 rv = 2;
1246 else if (ppairs1->us_sport == ppairs2->us_dport &&
1247 ppairs1->us_dport == ppairs2->us_sport)
1248 rv = 1;
1249 else
1250 rv = 0;
1251
1252 return (rv);
1253 }
1254
1255
1256 /* ------------------------------------------------------------------------ */
1257 /* Function: ipf_matchisps */
1258 /* Returns: int - nonzero if isps members match, 0 nomatch */
1259 /* Parameters: is1, is2 - states we want to match */
1260 /* */
1261 /* performs the same match for isps members as for addresses */
1262 /* ------------------------------------------------------------------------ */
1263 static int
ipf_state_matchisps(ipstate_t * is1,ipstate_t * is2)1264 ipf_state_matchisps(ipstate_t *is1, ipstate_t *is2)
1265 {
1266 int rv;
1267
1268 if (is1->is_p == is2->is_p) {
1269 switch (is1->is_p)
1270 {
1271 case IPPROTO_TCP :
1272 case IPPROTO_UDP :
1273 case IPPROTO_GRE :
1274 /* greinfo_t can be also interprted as port pair */
1275 rv = ipf_state_matchports(&is1->is_ps.is_us,
1276 &is2->is_ps.is_us);
1277 break;
1278
1279 case IPPROTO_ICMP :
1280 case IPPROTO_ICMPV6 :
1281 /* force strong match for ICMP datagram. */
1282 if (bcmp(&is1->is_ps, &is2->is_ps,
1283 sizeof(icmpinfo_t)) == 0) {
1284 rv = 2;
1285 } else {
1286 rv = 0;
1287 }
1288 break;
1289
1290 default:
1291 rv = 0;
1292 }
1293 } else {
1294 rv = 0;
1295 }
1296
1297 return (rv);
1298 }
1299
1300
1301 /* ------------------------------------------------------------------------ */
1302 /* Function: ipf_state_match */
1303 /* Returns: int - nonzero match, zero no match */
1304 /* Parameters: is1, is2 - states we want to match */
1305 /* */
1306 /* ------------------------------------------------------------------------ */
1307 static int
ipf_state_match(ipstate_t * is1,ipstate_t * is2)1308 ipf_state_match(ipstate_t *is1, ipstate_t *is2)
1309 {
1310 int rv;
1311 int amatch;
1312 int pomatch;
1313
1314 if (bcmp(&is1->is_pass, &is2->is_pass,
1315 offsetof(struct ipstate, is_authmsk) -
1316 offsetof(struct ipstate, is_pass)) == 0) {
1317
1318 pomatch = ipf_state_matchisps(is1, is2);
1319 amatch = ipf_state_matchaddresses(is1, is2);
1320 rv = (amatch != 0) && (amatch == pomatch);
1321 } else {
1322 rv = 0;
1323 }
1324
1325 return (rv);
1326 }
1327
1328 /* ------------------------------------------------------------------------ */
1329 /* Function: ipf_state_add */
1330 /* Returns: ipstate_t - 0 = success */
1331 /* Parameters: softc(I) - pointer to soft context main structure */
1332 /* fin(I) - pointer to packet information */
1333 /* stsave(O) - pointer to place to save pointer to created */
1334 /* state structure. */
1335 /* flags(I) - flags to use when creating the structure */
1336 /* */
1337 /* Creates a new IP state structure from the packet information collected. */
1338 /* Inserts it into the state table and appends to the bottom of the active */
1339 /* list. If the capacity of the table has reached the maximum allowed then */
1340 /* the call will fail and a flush is scheduled for the next timeout call. */
1341 /* */
1342 /* NOTE: The use of stsave to point to nat_state will result in memory */
1343 /* corruption. It should only be used to point to objects that will */
1344 /* either outlive this (not expired) or will deref the ip_state_t */
1345 /* when they are deleted. */
1346 /* ------------------------------------------------------------------------ */
1347 int
ipf_state_add(ipf_main_softc_t * softc,fr_info_t * fin,ipstate_t ** stsave,u_int flags)1348 ipf_state_add(ipf_main_softc_t *softc, fr_info_t *fin, ipstate_t **stsave,
1349 u_int flags)
1350 {
1351 ipf_state_softc_t *softs = softc->ipf_state_soft;
1352 ipstate_t *is, ips;
1353 struct icmp *ic;
1354 u_int pass, hv;
1355 frentry_t *fr;
1356 tcphdr_t *tcp;
1357 frdest_t *fdp;
1358 int out;
1359
1360 /*
1361 * If a packet that was created locally is trying to go out but we
1362 * do not match here because of this lock, it is likely that
1363 * the policy will block it and return network unreachable back up
1364 * the stack. To mitigate this error, EAGAIN is returned instead,
1365 * telling the IP stack to try sending this packet again later.
1366 */
1367 if (softs->ipf_state_lock) {
1368 SBUMPD(ipf_state_stats, iss_add_locked);
1369 fin->fin_error = EAGAIN;
1370 return -1;
1371 }
1372
1373 if (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD)) {
1374 SBUMPD(ipf_state_stats, iss_add_bad);
1375 return -1;
1376 }
1377
1378 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) {
1379 SBUMPD(ipf_state_stats, iss_add_oow);
1380 return -1;
1381 }
1382
1383 if ((softs->ipf_state_stats.iss_active * 100 / softs->ipf_state_max) >
1384 softs->ipf_state_wm_high) {
1385 softs->ipf_state_doflush = 1;
1386 }
1387
1388 /*
1389 * If a "keep state" rule has reached the maximum number of references
1390 * to it, then schedule an automatic flush in case we can clear out
1391 * some "dead old wood". Note that because the lock isn't held on
1392 * fr it is possible that we could overflow. The cost of overflowing
1393 * is being ignored here as the number by which it can overflow is
1394 * a product of the number of simultaneous threads that could be
1395 * executing in here, so a limit of 100 won't result in 200, but could
1396 * result in 101 or 102.
1397 */
1398 fr = fin->fin_fr;
1399 if (fr != NULL) {
1400 if ((softs->ipf_state_stats.iss_active >=
1401 softs->ipf_state_max) && (fr->fr_statemax == 0)) {
1402 SBUMPD(ipf_state_stats, iss_max);
1403 return 1;
1404 }
1405 if ((fr->fr_statemax != 0) &&
1406 (fr->fr_statecnt >= fr->fr_statemax)) {
1407 SBUMPD(ipf_state_stats, iss_max_ref);
1408 return 2;
1409 }
1410 }
1411
1412 is = &ips;
1413 if (fr == NULL) {
1414 pass = softc->ipf_flags;
1415 is->is_tag = FR_NOLOGTAG;
1416 } else {
1417 pass = fr->fr_flags;
1418 }
1419
1420 ic = NULL;
1421 tcp = NULL;
1422 out = fin->fin_out;
1423 bzero((char *)is, sizeof(*is));
1424 is->is_die = 1 + softc->ipf_ticks;
1425 /*
1426 * We want to check everything that is a property of this packet,
1427 * but we don't (automatically) care about its fragment status as
1428 * this may change.
1429 */
1430 is->is_pass = pass;
1431 is->is_v = fin->fin_v;
1432 is->is_sec = fin->fin_secmsk;
1433 is->is_secmsk = 0xffff;
1434 is->is_auth = fin->fin_auth;
1435 is->is_authmsk = 0xffff;
1436 is->is_family = fin->fin_family;
1437 is->is_opt[0] = fin->fin_optmsk;
1438 is->is_optmsk[0] = 0xffffffff;
1439 if (is->is_v == 6) {
1440 is->is_opt[0] &= ~0x8;
1441 is->is_optmsk[0] &= ~0x8;
1442 }
1443
1444 /*
1445 * Copy and calculate...
1446 */
1447 hv = (is->is_p = fin->fin_fi.fi_p);
1448 is->is_src = fin->fin_fi.fi_src;
1449 hv += is->is_saddr;
1450 is->is_dst = fin->fin_fi.fi_dst;
1451 hv += is->is_daddr;
1452 #ifdef USE_INET6
1453 if (fin->fin_v == 6) {
1454 /*
1455 * For ICMPv6, we check to see if the destination address is
1456 * a multicast address. If it is, do not include it in the
1457 * calculation of the hash because the correct reply will come
1458 * back from a real address, not a multicast address.
1459 */
1460 if ((is->is_p == IPPROTO_ICMPV6) &&
1461 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) {
1462 /*
1463 * So you can do keep state with neighbour discovery.
1464 *
1465 * Here we could use the address from the neighbour
1466 * solicit message to put in the state structure and
1467 * we could use that without a wildcard flag too...
1468 */
1469 flags |= SI_W_DADDR;
1470 hv -= is->is_daddr;
1471 } else {
1472 hv += is->is_dst.i6[1];
1473 hv += is->is_dst.i6[2];
1474 hv += is->is_dst.i6[3];
1475 }
1476 hv += is->is_src.i6[1];
1477 hv += is->is_src.i6[2];
1478 hv += is->is_src.i6[3];
1479 }
1480 #endif
1481 if ((fin->fin_v == 4) &&
1482 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
1483 flags |= SI_W_DADDR;
1484 hv -= is->is_daddr;
1485 }
1486
1487 switch (is->is_p)
1488 {
1489 #ifdef USE_INET6
1490 case IPPROTO_ICMPV6 :
1491 ic = fin->fin_dp;
1492
1493 switch (ic->icmp_type)
1494 {
1495 case ICMP6_ECHO_REQUEST :
1496 hv += (is->is_icmp.ici_id = ic->icmp_id);
1497 /*FALLTHROUGH*/
1498 case ICMP6_MEMBERSHIP_QUERY :
1499 case ND_ROUTER_SOLICIT :
1500 case ND_NEIGHBOR_SOLICIT :
1501 case ICMP6_NI_QUERY :
1502 is->is_icmp.ici_type = ic->icmp_type;
1503 break;
1504 default :
1505 SBUMPD(ipf_state_stats, iss_icmp6_notquery);
1506 return -2;
1507 }
1508 break;
1509 #endif
1510 case IPPROTO_ICMP :
1511 ic = fin->fin_dp;
1512
1513 switch (ic->icmp_type)
1514 {
1515 case ICMP_ECHO :
1516 case ICMP_TSTAMP :
1517 case ICMP_IREQ :
1518 case ICMP_MASKREQ :
1519 is->is_icmp.ici_type = ic->icmp_type;
1520 hv += (is->is_icmp.ici_id = ic->icmp_id);
1521 break;
1522 default :
1523 SBUMPD(ipf_state_stats, iss_icmp_notquery);
1524 return -3;
1525 }
1526 break;
1527
1528 #if 0
1529 case IPPROTO_GRE :
1530 gre = fin->fin_dp;
1531
1532 is->is_gre.gs_flags = gre->gr_flags;
1533 is->is_gre.gs_ptype = gre->gr_ptype;
1534 if (GRE_REV(is->is_gre.gs_flags) == 1) {
1535 is->is_call[0] = fin->fin_data[0];
1536 is->is_call[1] = fin->fin_data[1];
1537 }
1538 break;
1539 #endif
1540
1541 case IPPROTO_TCP :
1542 tcp = fin->fin_dp;
1543
1544 if (tcp->th_flags & TH_RST) {
1545 SBUMPD(ipf_state_stats, iss_tcp_rstadd);
1546 return -4;
1547 }
1548
1549 /* TRACE is, flags, hv */
1550
1551 /*
1552 * The endian of the ports doesn't matter, but the ack and
1553 * sequence numbers do as we do mathematics on them later.
1554 */
1555 is->is_sport = htons(fin->fin_data[0]);
1556 is->is_dport = htons(fin->fin_data[1]);
1557 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1558 hv += is->is_sport;
1559 hv += is->is_dport;
1560 }
1561
1562 /* TRACE is, flags, hv */
1563
1564 /*
1565 * If this is a real packet then initialise fields in the
1566 * state information structure from the TCP header information.
1567 */
1568
1569 is->is_maxdwin = 1;
1570 is->is_maxswin = ntohs(tcp->th_win);
1571 if (is->is_maxswin == 0)
1572 is->is_maxswin = 1;
1573
1574 if ((fin->fin_flx & FI_IGNORE) == 0) {
1575 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen -
1576 (TCP_OFF(tcp) << 2) +
1577 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
1578 ((tcp->th_flags & TH_FIN) ? 1 : 0);
1579 is->is_maxsend = is->is_send;
1580
1581 /*
1582 * Window scale option is only present in
1583 * SYN/SYN-ACK packet.
1584 */
1585 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) ==
1586 TH_SYN &&
1587 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
1588 if (ipf_tcpoptions(softs, fin, tcp,
1589 &is->is_tcp.ts_data[0]) == -1)
1590 fin->fin_flx |= FI_BAD;
1591 }
1592
1593 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) {
1594 ipf_checknewisn(fin, is);
1595 ipf_fixoutisn(fin, is);
1596 }
1597
1598 if ((tcp->th_flags & TH_OPENING) == TH_SYN)
1599 flags |= IS_TCPFSM;
1600 else {
1601 is->is_maxdwin = is->is_maxswin * 2;
1602 is->is_dend = ntohl(tcp->th_ack);
1603 is->is_maxdend = ntohl(tcp->th_ack);
1604 is->is_maxdwin *= 2;
1605 }
1606 }
1607
1608 /*
1609 * If we're creating state for a starting connection, start
1610 * the timer on it as we'll never see an error if it fails
1611 * to connect.
1612 */
1613 break;
1614
1615 case IPPROTO_UDP :
1616 tcp = fin->fin_dp;
1617
1618 is->is_sport = htons(fin->fin_data[0]);
1619 is->is_dport = htons(fin->fin_data[1]);
1620 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) {
1621 hv += tcp->th_dport;
1622 hv += tcp->th_sport;
1623 }
1624 break;
1625
1626 default :
1627 break;
1628 }
1629 hv = DOUBLE_HASH(hv);
1630 is->is_hv = hv;
1631
1632 /*
1633 * Look for identical state.
1634 */
1635 for (is = softs->ipf_state_table[hv % softs->ipf_state_size];
1636 is != NULL; is = is->is_hnext) {
1637 if (ipf_state_match(&ips, is) == 1)
1638 break;
1639 }
1640 if (is != NULL) {
1641 SBUMPD(ipf_state_stats, iss_add_dup);
1642 return 3;
1643 }
1644
1645 if (softs->ipf_state_stats.iss_bucketlen[hv] >=
1646 softs->ipf_state_maxbucket) {
1647 SBUMPD(ipf_state_stats, iss_bucket_full);
1648 return 4;
1649 }
1650
1651 /*
1652 * No existing state; create new
1653 */
1654 KMALLOC(is, ipstate_t *);
1655 if (is == NULL) {
1656 SBUMPD(ipf_state_stats, iss_nomem);
1657 return 5;
1658 }
1659 bcopy((char *)&ips, (char *)is, sizeof(*is));
1660 is->is_flags = flags & IS_INHERITED;
1661 is->is_rulen = fin->fin_rule;
1662 is->is_rule = fr;
1663
1664 /*
1665 * Do not do the modulus here, it is done in ipf_state_insert().
1666 */
1667 if (fr != NULL) {
1668 ipftq_t *tq;
1669
1670 (void) strncpy(is->is_group, FR_NAME(fr, fr_group),
1671 FR_GROUPLEN);
1672 if (fr->fr_age[0] != 0) {
1673 tq = ipf_addtimeoutqueue(softc,
1674 &softs->ipf_state_usertq,
1675 fr->fr_age[0]);
1676 is->is_tqehead[0] = tq;
1677 is->is_sti.tqe_flags |= TQE_RULEBASED;
1678 }
1679 if (fr->fr_age[1] != 0) {
1680 tq = ipf_addtimeoutqueue(softc,
1681 &softs->ipf_state_usertq,
1682 fr->fr_age[1]);
1683 is->is_tqehead[1] = tq;
1684 is->is_sti.tqe_flags |= TQE_RULEBASED;
1685 }
1686
1687 is->is_tag = fr->fr_logtag;
1688 }
1689
1690 /*
1691 * It may seem strange to set is_ref to 2, but if stsave is not NULL
1692 * then a copy of the pointer is being stored somewhere else and in
1693 * the end, it will expect to be able to do something with it.
1694 */
1695 is->is_me = stsave;
1696 if (stsave != NULL) {
1697 *stsave = is;
1698 is->is_ref = 2;
1699 } else {
1700 is->is_ref = 1;
1701 }
1702 is->is_pkts[0] = 0, is->is_bytes[0] = 0;
1703 is->is_pkts[1] = 0, is->is_bytes[1] = 0;
1704 is->is_pkts[2] = 0, is->is_bytes[2] = 0;
1705 is->is_pkts[3] = 0, is->is_bytes[3] = 0;
1706 if ((fin->fin_flx & FI_IGNORE) == 0) {
1707 is->is_pkts[out] = 1;
1708 fin->fin_pktnum = 1;
1709 is->is_bytes[out] = fin->fin_plen;
1710 is->is_flx[out][0] = fin->fin_flx & FI_CMP;
1711 is->is_flx[out][0] &= ~FI_OOW;
1712 }
1713
1714 if (pass & FR_STLOOSE)
1715 is->is_flags |= IS_LOOSE;
1716
1717 if (pass & FR_STSTRICT)
1718 is->is_flags |= IS_STRICT;
1719
1720 if (pass & FR_STATESYNC)
1721 is->is_flags |= IS_STATESYNC;
1722
1723 if (pass & FR_LOGFIRST)
1724 is->is_pass &= ~(FR_LOGFIRST|FR_LOG);
1725
1726 READ_ENTER(&softc->ipf_state);
1727
1728 if (ipf_state_insert(softc, is, fin->fin_rev) == -1) {
1729 RWLOCK_EXIT(&softc->ipf_state);
1730 /*
1731 * This is a bit more manual than it should be but
1732 * ipf_state_del cannot be called.
1733 */
1734 MUTEX_EXIT(&is->is_lock);
1735 MUTEX_DESTROY(&is->is_lock);
1736 if (is->is_tqehead[0] != NULL) {
1737 if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
1738 ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
1739 is->is_tqehead[0] = NULL;
1740 }
1741 if (is->is_tqehead[1] != NULL) {
1742 if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
1743 ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
1744 is->is_tqehead[1] = NULL;
1745 }
1746 KFREE(is);
1747 return -1;
1748 }
1749
1750 /*
1751 * Filling in the interface name is after the insert so that an
1752 * event (such as add/delete) of an interface that is referenced
1753 * by this rule will see this state entry.
1754 */
1755 if (fr != NULL) {
1756 /*
1757 * The name '-' is special for network interfaces and causes
1758 * a NULL name to be present, always, allowing packets to
1759 * match it, regardless of their interface.
1760 */
1761 if ((fin->fin_ifp == NULL) ||
1762 (fr->fr_ifnames[out << 1] != -1 &&
1763 fr->fr_names[fr->fr_ifnames[out << 1] + 0] == '-' &&
1764 fr->fr_names[fr->fr_ifnames[out << 1] + 1] == '\0')) {
1765 is->is_ifp[out << 1] = fr->fr_ifas[0];
1766 strncpy(is->is_ifname[out << 1],
1767 fr->fr_names + fr->fr_ifnames[0],
1768 sizeof(fr->fr_ifnames[0]));
1769 } else {
1770 is->is_ifp[out << 1] = fin->fin_ifp;
1771 COPYIFNAME(fin->fin_v, fin->fin_ifp,
1772 is->is_ifname[out << 1]);
1773 }
1774
1775 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1];
1776 if (fr->fr_ifnames[1] != -1) {
1777 strncpy(is->is_ifname[(out << 1) + 1],
1778 fr->fr_names + fr->fr_ifnames[1],
1779 sizeof(fr->fr_ifnames[1]));
1780 }
1781
1782 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2];
1783 if (fr->fr_ifnames[2] != -1) {
1784 strncpy(is->is_ifname[((1 - out) << 1)],
1785 fr->fr_names + fr->fr_ifnames[2],
1786 sizeof(fr->fr_ifnames[2]));
1787 }
1788
1789 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3];
1790 if (fr->fr_ifnames[3] != -1) {
1791 strncpy(is->is_ifname[((1 - out) << 1) + 1],
1792 fr->fr_names + fr->fr_ifnames[3],
1793 sizeof(fr->fr_ifnames[3]));
1794 }
1795 } else {
1796 if (fin->fin_ifp != NULL) {
1797 is->is_ifp[out << 1] = fin->fin_ifp;
1798 COPYIFNAME(fin->fin_v, fin->fin_ifp,
1799 is->is_ifname[out << 1]);
1800 }
1801 }
1802
1803 if (fin->fin_p == IPPROTO_TCP) {
1804 /*
1805 * If we're creating state for a starting connection, start the
1806 * timer on it as we'll never see an error if it fails to
1807 * connect.
1808 */
1809 (void) ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
1810 is->is_flags, 2);
1811 }
1812 MUTEX_EXIT(&is->is_lock);
1813 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0))
1814 is->is_sync = ipf_sync_new(softc, SMC_STATE, fin, is);
1815 if (softs->ipf_state_logging)
1816 ipf_state_log(softc, is, ISL_NEW);
1817
1818 RWLOCK_EXIT(&softc->ipf_state);
1819
1820 fin->fin_flx |= FI_STATE;
1821 if (fin->fin_flx & FI_FRAG)
1822 (void) ipf_frag_new(softc, fin, pass);
1823
1824 fdp = &fr->fr_tifs[0];
1825 if (fdp->fd_type == FRD_DSTLIST) {
1826 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1827 &is->is_tifs[0]);
1828 } else {
1829 bcopy(fdp, &is->is_tifs[0], sizeof(*fdp));
1830 }
1831
1832 fdp = &fr->fr_tifs[1];
1833 if (fdp->fd_type == FRD_DSTLIST) {
1834 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1835 &is->is_tifs[1]);
1836 } else {
1837 bcopy(fdp, &is->is_tifs[1], sizeof(*fdp));
1838 }
1839 fin->fin_tif = &is->is_tifs[fin->fin_rev];
1840
1841 fdp = &fr->fr_dif;
1842 if (fdp->fd_type == FRD_DSTLIST) {
1843 ipf_dstlist_select_node(fin, fdp->fd_ptr, NULL,
1844 &is->is_dif);
1845 } else {
1846 bcopy(fdp, &is->is_dif, sizeof(*fdp));
1847 }
1848 fin->fin_dif = &is->is_dif;
1849
1850 return 0;
1851 }
1852
1853
1854 /* ------------------------------------------------------------------------ */
1855 /* Function: ipf_tcpoptions */
1856 /* Returns: int - 1 == packet matches state entry, 0 == it does not, */
1857 /* -1 == packet has bad TCP options data */
1858 /* Parameters: softs(I) - pointer to state context structure */
1859 /* fin(I) - pointer to packet information */
1860 /* tcp(I) - pointer to TCP packet header */
1861 /* td(I) - pointer to TCP data held as part of the state */
1862 /* */
1863 /* Look after the TCP header for any options and deal with those that are */
1864 /* present. Record details about those that we recogise. */
1865 /* ------------------------------------------------------------------------ */
1866 static int
ipf_tcpoptions(ipf_state_softc_t * softs,fr_info_t * fin,tcphdr_t * tcp,tcpdata_t * td)1867 ipf_tcpoptions(ipf_state_softc_t *softs, fr_info_t *fin, tcphdr_t *tcp,
1868 tcpdata_t *td)
1869 {
1870 int off, mlen, ol, i, len, retval;
1871 char buf[64], *s, opt;
1872 mb_t *m = NULL;
1873
1874 len = (TCP_OFF(tcp) << 2);
1875 if (fin->fin_dlen < len) {
1876 SBUMPD(ipf_state_stats, iss_tcp_toosmall);
1877 return 0;
1878 }
1879 len -= sizeof(*tcp);
1880
1881 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff;
1882
1883 m = fin->fin_m;
1884 mlen = MSGDSIZE(m) - off;
1885 if (len > mlen) {
1886 len = mlen;
1887 retval = 0;
1888 } else {
1889 retval = 1;
1890 }
1891
1892 COPYDATA(m, off, len, buf);
1893
1894 for (s = buf; len > 0; ) {
1895 opt = *s;
1896 if (opt == TCPOPT_EOL)
1897 break;
1898 else if (opt == TCPOPT_NOP)
1899 ol = 1;
1900 else {
1901 if (len < 2)
1902 break;
1903 ol = (int)*(s + 1);
1904 if (ol < 2 || ol > len)
1905 break;
1906
1907 /*
1908 * Extract the TCP options we are interested in out of
1909 * the header and store them in the tcpdata struct.
1910 */
1911 switch (opt)
1912 {
1913 case TCPOPT_WINDOW :
1914 if (ol == TCPOLEN_WINDOW) {
1915 i = (int)*(s + 2);
1916 if (i > TCP_WSCALE_MAX)
1917 i = TCP_WSCALE_MAX;
1918 else if (i < 0)
1919 i = 0;
1920 td->td_winscale = i;
1921 td->td_winflags |= TCP_WSCALE_SEEN|
1922 TCP_WSCALE_FIRST;
1923 } else
1924 retval = -1;
1925 break;
1926 case TCPOPT_MAXSEG :
1927 /*
1928 * So, if we wanted to set the TCP MAXSEG,
1929 * it should be done here...
1930 */
1931 if (ol == TCPOLEN_MAXSEG) {
1932 i = (int)*(s + 2);
1933 i <<= 8;
1934 i += (int)*(s + 3);
1935 td->td_maxseg = i;
1936 } else
1937 retval = -1;
1938 break;
1939 case TCPOPT_SACK_PERMITTED :
1940 if (ol == TCPOLEN_SACK_PERMITTED)
1941 td->td_winflags |= TCP_SACK_PERMIT;
1942 else
1943 retval = -1;
1944 break;
1945 }
1946 }
1947 len -= ol;
1948 s += ol;
1949 }
1950 if (retval == -1) {
1951 SBUMPD(ipf_state_stats, iss_tcp_badopt);
1952 }
1953 return retval;
1954 }
1955
1956
1957 /* ------------------------------------------------------------------------ */
1958 /* Function: ipf_state_tcp */
1959 /* Returns: int - 1 == packet matches state entry, 0 == it does not */
1960 /* Parameters: softc(I) - pointer to soft context main structure */
1961 /* softs(I) - pointer to state context structure */
1962 /* fin(I) - pointer to packet information */
1963 /* tcp(I) - pointer to TCP packet header */
1964 /* is(I) - pointer to master state structure */
1965 /* */
1966 /* Check to see if a packet with TCP headers fits within the TCP window. */
1967 /* Change timeout depending on whether new packet is a SYN-ACK returning */
1968 /* for a SYN or a RST or FIN which indicate time to close up shop. */
1969 /* ------------------------------------------------------------------------ */
1970 static int
ipf_state_tcp(ipf_main_softc_t * softc,ipf_state_softc_t * softs,fr_info_t * fin,tcphdr_t * tcp,ipstate_t * is)1971 ipf_state_tcp(ipf_main_softc_t *softc, ipf_state_softc_t *softs, fr_info_t *fin,
1972 tcphdr_t *tcp, ipstate_t *is)
1973 {
1974 tcpdata_t *fdata, *tdata;
1975 int source, ret, flags;
1976
1977 source = !fin->fin_rev;
1978 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) &&
1979 (ntohs(is->is_sport) != fin->fin_data[0]))
1980 source = 0;
1981 fdata = &is->is_tcp.ts_data[!source];
1982 tdata = &is->is_tcp.ts_data[source];
1983
1984 MUTEX_ENTER(&is->is_lock);
1985
1986 /*
1987 * If a SYN packet is received for a connection that is on the way out
1988 * but hasn't yet departed then advance this session along the way.
1989 */
1990 if ((tcp->th_flags & TH_OPENING) == TH_SYN) {
1991 if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
1992 (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
1993 is->is_state[!source] = IPF_TCPS_CLOSED;
1994 ipf_movequeue(softc->ipf_ticks, &is->is_sti,
1995 is->is_sti.tqe_ifq,
1996 &softs->ipf_state_deletetq);
1997 MUTEX_EXIT(&is->is_lock);
1998 DT1(iss_tcp_closing, ipstate_t *, is);
1999 SBUMP(ipf_state_stats.iss_tcp_closing);
2000 return 0;
2001 }
2002 }
2003
2004 if (is->is_flags & IS_LOOSE)
2005 ret = 1;
2006 else
2007 ret = ipf_state_tcpinwindow(fin, fdata, tdata, tcp,
2008 is->is_flags);
2009 if (ret > 0) {
2010 /*
2011 * Nearing end of connection, start timeout.
2012 */
2013 ret = ipf_tcp_age(&is->is_sti, fin, softs->ipf_state_tcptq,
2014 is->is_flags, ret);
2015 if (ret == 0) {
2016 MUTEX_EXIT(&is->is_lock);
2017 DT2(iss_tcp_fsm, fr_info_t *, fin, ipstate_t *, is);
2018 SBUMP(ipf_state_stats.iss_tcp_fsm);
2019 return 0;
2020 }
2021
2022 if (softs->ipf_state_logging > 4)
2023 ipf_state_log(softc, is, ISL_STATECHANGE);
2024
2025 /*
2026 * set s0's as appropriate. Use syn-ack packet as it
2027 * contains both pieces of required information.
2028 */
2029 /*
2030 * Window scale option is only present in SYN/SYN-ACK packet.
2031 * Compare with ~TH_FIN to mask out T/TCP setups.
2032 */
2033 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL);
2034 if (flags == (TH_SYN|TH_ACK)) {
2035 is->is_s0[source] = ntohl(tcp->th_ack);
2036 is->is_s0[!source] = ntohl(tcp->th_seq) + 1;
2037 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
2038 if (ipf_tcpoptions(softs, fin, tcp,
2039 fdata) == -1)
2040 fin->fin_flx |= FI_BAD;
2041 }
2042 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
2043 ipf_checknewisn(fin, is);
2044 } else if (flags == TH_SYN) {
2045 is->is_s0[source] = ntohl(tcp->th_seq) + 1;
2046 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) {
2047 if (ipf_tcpoptions(softs, fin, tcp,
2048 fdata) == -1)
2049 fin->fin_flx |= FI_BAD;
2050 }
2051
2052 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN))
2053 ipf_checknewisn(fin, is);
2054
2055 }
2056 ret = 1;
2057 } else {
2058 DT2(iss_tcp_oow, fr_info_t *, fin, ipstate_t *, is);
2059 SBUMP(ipf_state_stats.iss_tcp_oow);
2060 ret = 0;
2061 }
2062 MUTEX_EXIT(&is->is_lock);
2063 return ret;
2064 }
2065
2066
2067 /* ------------------------------------------------------------------------ */
2068 /* Function: ipf_checknewisn */
2069 /* Returns: Nil */
2070 /* Parameters: fin(I) - pointer to packet information */
2071 /* is(I) - pointer to master state structure */
2072 /* */
2073 /* Check to see if this TCP connection is expecting and needs a new */
2074 /* sequence number for a particular direction of the connection. */
2075 /* */
2076 /* NOTE: This does not actually change the sequence numbers, only gets new */
2077 /* one ready. */
2078 /* ------------------------------------------------------------------------ */
2079 static void
ipf_checknewisn(fr_info_t * fin,ipstate_t * is)2080 ipf_checknewisn(fr_info_t *fin, ipstate_t *is)
2081 {
2082 u_32_t sumd, old, new;
2083 tcphdr_t *tcp;
2084 int i;
2085
2086 i = fin->fin_rev;
2087 tcp = fin->fin_dp;
2088
2089 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) ||
2090 ((i == 1) && !(is->is_flags & IS_ISNACK))) {
2091 old = ntohl(tcp->th_seq);
2092 new = ipf_newisn(fin);
2093 is->is_isninc[i] = new - old;
2094 CALC_SUMD(old, new, sumd);
2095 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16);
2096
2097 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK);
2098 }
2099 }
2100
2101
2102 /* ------------------------------------------------------------------------ */
2103 /* Function: ipf_state_tcpinwindow */
2104 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */
2105 /* Parameters: fin(I) - pointer to packet information */
2106 /* fdata(I) - pointer to tcp state informatio (forward) */
2107 /* tdata(I) - pointer to tcp state informatio (reverse) */
2108 /* tcp(I) - pointer to TCP packet header */
2109 /* */
2110 /* Given a packet has matched addresses and ports, check to see if it is */
2111 /* within the TCP data window. In a show of generosity, allow packets that */
2112 /* are within the window space behind the current sequence # as well. */
2113 /* ------------------------------------------------------------------------ */
2114 static int
ipf_state_tcpinwindow(fr_info_t * fin,tcpdata_t * fdata,tcpdata_t * tdata,tcphdr_t * tcp,int flags)2115 ipf_state_tcpinwindow(fr_info_t *fin, tcpdata_t *fdata, tcpdata_t *tdata,
2116 tcphdr_t *tcp, int flags)
2117 {
2118 ipf_main_softc_t *softc = fin->fin_main_soft;
2119 ipf_state_softc_t *softs = softc->ipf_state_soft;
2120 tcp_seq seq, ack, end;
2121 int ackskew, tcpflags;
2122 u_32_t win, maxwin;
2123 int dsize, inseq;
2124
2125 /*
2126 * Find difference between last checked packet and this packet.
2127 */
2128 tcpflags = tcp->th_flags;
2129 seq = ntohl(tcp->th_seq);
2130 ack = ntohl(tcp->th_ack);
2131 if (tcpflags & TH_SYN)
2132 win = ntohs(tcp->th_win);
2133 else
2134 win = ntohs(tcp->th_win) << fdata->td_winscale;
2135
2136 /*
2137 * A window of 0 produces undesirable behaviour from this function.
2138 */
2139 if (win == 0)
2140 win = 1;
2141
2142 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2143 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0);
2144
2145 /*
2146 * if window scaling is present, the scaling is only allowed
2147 * for windows not in the first SYN packet. In that packet the
2148 * window is 65535 to specify the largest window possible
2149 * for receivers not implementing the window scale option.
2150 * Currently, we do not assume TTCP here. That means that
2151 * if we see a second packet from a host (after the initial
2152 * SYN), we can assume that the receiver of the SYN did
2153 * already send back the SYN/ACK (and thus that we know if
2154 * the receiver also does window scaling)
2155 */
2156 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) {
2157 fdata->td_winflags &= ~TCP_WSCALE_FIRST;
2158 fdata->td_maxwin = win;
2159 }
2160
2161 end = seq + dsize;
2162
2163 if ((fdata->td_end == 0) &&
2164 (!(flags & IS_TCPFSM) ||
2165 ((tcpflags & TH_OPENING) == TH_OPENING))) {
2166 /*
2167 * Must be a (outgoing) SYN-ACK in reply to a SYN.
2168 */
2169 fdata->td_end = end - 1;
2170 fdata->td_maxwin = 1;
2171 fdata->td_maxend = end + win;
2172 }
2173
2174 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */
2175 ack = tdata->td_end;
2176 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) &&
2177 (ack == 0)) {
2178 /* gross hack to get around certain broken tcp stacks */
2179 ack = tdata->td_end;
2180 }
2181
2182 maxwin = tdata->td_maxwin;
2183 ackskew = tdata->td_end - ack;
2184
2185 /*
2186 * Strict sequencing only allows in-order delivery.
2187 */
2188 if ((flags & IS_STRICT) != 0) {
2189 if (seq != fdata->td_end) {
2190 DT2(iss_tcp_struct, tcpdata_t *, fdata, int, seq);
2191 SBUMP(ipf_state_stats.iss_tcp_strict);
2192 fin->fin_flx |= FI_OOW;
2193 return 0;
2194 }
2195 }
2196
2197 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0)
2198 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0)
2199 inseq = 0;
2200 if ((SEQ_GE(fdata->td_maxend, end)) &&
2201 (SEQ_GE(seq, fdata->td_end - maxwin)) &&
2202 /* XXX what about big packets */
2203 #define MAXACKWINDOW 66000
2204 (-ackskew <= (MAXACKWINDOW)) &&
2205 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) {
2206 inseq = 1;
2207 /*
2208 * Microsoft Windows will send the next packet to the right of the
2209 * window if SACK is in use.
2210 */
2211 } else if ((seq == fdata->td_maxend) && (ackskew == 0) &&
2212 (fdata->td_winflags & TCP_SACK_PERMIT) &&
2213 (tdata->td_winflags & TCP_SACK_PERMIT)) {
2214 DT2(iss_sinsack, tcpdata_t *, fdata, int, seq);
2215 SBUMP(ipf_state_stats.iss_winsack);
2216 inseq = 1;
2217 /*
2218 * Sometimes a TCP RST will be generated with only the ACK field
2219 * set to non-zero.
2220 */
2221 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) &&
2222 (ackskew >= -1) && (ackskew <= 1)) {
2223 inseq = 1;
2224 } else if (!(flags & IS_TCPFSM)) {
2225 #if 0
2226 int i;
2227
2228 i = (fin->fin_rev << 1) + fin->fin_out;
2229
2230 if (is_pkts[i]0 == 0) {
2231 /*
2232 * Picking up a connection in the middle, the "next"
2233 * packet seen from a direction that is new should be
2234 * accepted, even if it appears out of sequence.
2235 */
2236 inseq = 1;
2237 } else
2238 #endif
2239 if (!(fdata->td_winflags &
2240 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) {
2241 /*
2242 * No TCPFSM and no window scaling, so make some
2243 * extra guesses.
2244 */
2245 if ((seq == fdata->td_maxend) && (ackskew == 0))
2246 inseq = 1;
2247 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin))
2248 inseq = 1;
2249 }
2250 }
2251
2252 /* TRACE(inseq, fdata, tdata, seq, end, ack, ackskew, win, maxwin) */
2253
2254 if (inseq) {
2255 /* if ackskew < 0 then this should be due to fragmented
2256 * packets. There is no way to know the length of the
2257 * total packet in advance.
2258 * We do know the total length from the fragment cache though.
2259 * Note however that there might be more sessions with
2260 * exactly the same source and destination parameters in the
2261 * state cache (and source and destination is the only stuff
2262 * that is saved in the fragment cache). Note further that
2263 * some TCP connections in the state cache are hashed with
2264 * sport and dport as well which makes it not worthwhile to
2265 * look for them.
2266 * Thus, when ackskew is negative but still seems to belong
2267 * to this session, we bump up the destinations end value.
2268 */
2269 if (ackskew < 0)
2270 tdata->td_end = ack;
2271
2272 /* update max window seen */
2273 if (fdata->td_maxwin < win)
2274 fdata->td_maxwin = win;
2275 if (SEQ_GT(end, fdata->td_end))
2276 fdata->td_end = end;
2277 if (SEQ_GE(ack + win, tdata->td_maxend))
2278 tdata->td_maxend = ack + win;
2279 return 1;
2280 }
2281 SBUMP(ipf_state_stats.iss_oow);
2282 fin->fin_flx |= FI_OOW;
2283 return 0;
2284 }
2285
2286
2287 /* ------------------------------------------------------------------------ */
2288 /* Function: ipf_state_clone */
2289 /* Returns: ipstate_t* - NULL == cloning failed, */
2290 /* else pointer to new state structure */
2291 /* Parameters: fin(I) - pointer to packet information */
2292 /* tcp(I) - pointer to TCP/UDP header */
2293 /* is(I) - pointer to master state structure */
2294 /* */
2295 /* Create a "duplcate" state table entry from the master. */
2296 /* ------------------------------------------------------------------------ */
2297 static ipstate_t *
ipf_state_clone(fr_info_t * fin,tcphdr_t * tcp,ipstate_t * is)2298 ipf_state_clone(fr_info_t *fin, tcphdr_t *tcp, ipstate_t *is)
2299 {
2300 ipf_main_softc_t *softc = fin->fin_main_soft;
2301 ipf_state_softc_t *softs = softc->ipf_state_soft;
2302 ipstate_t *clone;
2303 u_32_t send;
2304
2305 if (softs->ipf_state_stats.iss_active == softs->ipf_state_max) {
2306 SBUMPD(ipf_state_stats, iss_max);
2307 softs->ipf_state_doflush = 1;
2308 return NULL;
2309 }
2310 KMALLOC(clone, ipstate_t *);
2311 if (clone == NULL) {
2312 SBUMPD(ipf_state_stats, iss_clone_nomem);
2313 return NULL;
2314 }
2315 bcopy((char *)is, (char *)clone, sizeof(*clone));
2316
2317 MUTEX_NUKE(&clone->is_lock);
2318 /*
2319 * It has not yet been placed on any timeout queue, so make sure
2320 * all of that data is zero'd out.
2321 */
2322 clone->is_sti.tqe_pnext = NULL;
2323 clone->is_sti.tqe_next = NULL;
2324 clone->is_sti.tqe_ifq = NULL;
2325 clone->is_sti.tqe_parent = clone;
2326
2327 clone->is_die = ONE_DAY + softc->ipf_ticks;
2328 clone->is_state[0] = 0;
2329 clone->is_state[1] = 0;
2330 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) +
2331 ((tcp->th_flags & TH_SYN) ? 1 : 0) +
2332 ((tcp->th_flags & TH_FIN) ? 1 : 0);
2333
2334 if (fin->fin_rev == 1) {
2335 clone->is_dend = send;
2336 clone->is_maxdend = send;
2337 clone->is_send = 0;
2338 clone->is_maxswin = 1;
2339 clone->is_maxdwin = ntohs(tcp->th_win);
2340 if (clone->is_maxdwin == 0)
2341 clone->is_maxdwin = 1;
2342 } else {
2343 clone->is_send = send;
2344 clone->is_maxsend = send;
2345 clone->is_dend = 0;
2346 clone->is_maxdwin = 1;
2347 clone->is_maxswin = ntohs(tcp->th_win);
2348 if (clone->is_maxswin == 0)
2349 clone->is_maxswin = 1;
2350 }
2351
2352 clone->is_flags &= ~SI_CLONE;
2353 clone->is_flags |= SI_CLONED;
2354 if (ipf_state_insert(softc, clone, fin->fin_rev) == -1) {
2355 KFREE(clone);
2356 return NULL;
2357 }
2358
2359 clone->is_ref = 1;
2360 if (clone->is_p == IPPROTO_TCP) {
2361 (void) ipf_tcp_age(&clone->is_sti, fin, softs->ipf_state_tcptq,
2362 clone->is_flags, 2);
2363 }
2364 MUTEX_EXIT(&clone->is_lock);
2365 if (is->is_flags & IS_STATESYNC)
2366 clone->is_sync = ipf_sync_new(softc, SMC_STATE, fin, clone);
2367 DT2(iss_clone, ipstate_t *, is, ipstate_t *, clone);
2368 SBUMP(ipf_state_stats.iss_cloned);
2369 return clone;
2370 }
2371
2372
2373 /* ------------------------------------------------------------------------ */
2374 /* Function: ipf_matchsrcdst */
2375 /* Returns: Nil */
2376 /* Parameters: fin(I) - pointer to packet information */
2377 /* is(I) - pointer to state structure */
2378 /* src(I) - pointer to source address */
2379 /* dst(I) - pointer to destination address */
2380 /* tcp(I) - pointer to TCP/UDP header */
2381 /* cmask(I) - mask of FI_* bits to check */
2382 /* */
2383 /* Match a state table entry against an IP packet. The logic below is that */
2384 /* ret gets set to one if the match succeeds, else remains 0. If it is */
2385 /* still 0 after the test. no match. */
2386 /* ------------------------------------------------------------------------ */
2387 static ipstate_t *
ipf_matchsrcdst(fr_info_t * fin,ipstate_t * is,i6addr_t * src,i6addr_t * dst,tcphdr_t * tcp,u_32_t cmask)2388 ipf_matchsrcdst(fr_info_t *fin, ipstate_t *is, i6addr_t *src, i6addr_t *dst,
2389 tcphdr_t *tcp, u_32_t cmask)
2390 {
2391 ipf_main_softc_t *softc = fin->fin_main_soft;
2392 ipf_state_softc_t *softs = softc->ipf_state_soft;
2393 int ret = 0, rev, out, flags, flx = 0, idx;
2394 u_short sp, dp;
2395 u_32_t cflx;
2396 void *ifp;
2397
2398 /*
2399 * If a connection is about to be deleted, no packets
2400 * are allowed to match it.
2401 */
2402 if (is->is_sti.tqe_ifq == &softs->ipf_state_deletetq)
2403 return NULL;
2404
2405 rev = IP6_NEQ(&is->is_dst, dst);
2406 ifp = fin->fin_ifp;
2407 out = fin->fin_out;
2408 flags = is->is_flags;
2409 sp = 0;
2410 dp = 0;
2411
2412 if (tcp != NULL) {
2413 sp = htons(fin->fin_sport);
2414 dp = htons(fin->fin_dport);
2415 }
2416 if (!rev) {
2417 if (tcp != NULL) {
2418 if (!(flags & SI_W_SPORT) && (sp != is->is_sport))
2419 rev = 1;
2420 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport))
2421 rev = 1;
2422 }
2423 }
2424
2425 idx = (out << 1) + rev;
2426
2427 /*
2428 * If the interface for this 'direction' is set, make sure it matches.
2429 * An interface name that is not set matches any, as does a name of *.
2430 */
2431 if ((is->is_ifp[idx] == ifp) || (is->is_ifp[idx] == NULL &&
2432 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '-' ||
2433 *is->is_ifname[idx] == '*')))
2434 ret = 1;
2435
2436 if (ret == 0) {
2437 DT2(iss_lookup_badifp, fr_info_t *, fin, ipstate_t *, is);
2438 SBUMP(ipf_state_stats.iss_lookup_badifp);
2439 /* TRACE is, out, rev, idx */
2440 return NULL;
2441 }
2442 ret = 0;
2443
2444 /*
2445 * Match addresses and ports.
2446 */
2447 if (rev == 0) {
2448 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) &&
2449 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) {
2450 if (tcp) {
2451 if ((sp == is->is_sport || flags & SI_W_SPORT)
2452 &&
2453 (dp == is->is_dport || flags & SI_W_DPORT))
2454 ret = 1;
2455 } else {
2456 ret = 1;
2457 }
2458 }
2459 } else {
2460 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) &&
2461 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) {
2462 if (tcp) {
2463 if ((dp == is->is_sport || flags & SI_W_SPORT)
2464 &&
2465 (sp == is->is_dport || flags & SI_W_DPORT))
2466 ret = 1;
2467 } else {
2468 ret = 1;
2469 }
2470 }
2471 }
2472
2473 if (ret == 0) {
2474 SBUMP(ipf_state_stats.iss_lookup_badport);
2475 DT2(iss_lookup_badport, fr_info_t *, fin, ipstate_t *, is);
2476 /* TRACE rev, is, sp, dp, src, dst */
2477 return NULL;
2478 }
2479
2480 /*
2481 * Whether or not this should be here, is questionable, but the aim
2482 * is to get this out of the main line.
2483 */
2484 if (tcp == NULL)
2485 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED);
2486
2487 /*
2488 * Only one of the source or destination address can be flaged as a
2489 * wildcard. Fill in the missing address, if set.
2490 * For IPv6, if the address being copied in is multicast, then
2491 * don't reset the wild flag - multicast causes it to be set in the
2492 * first place!
2493 */
2494 if ((flags & (SI_W_SADDR|SI_W_DADDR))) {
2495 fr_ip_t *fi = &fin->fin_fi;
2496
2497 if ((flags & SI_W_SADDR) != 0) {
2498 if (rev == 0) {
2499 is->is_src = fi->fi_src;
2500 is->is_flags &= ~SI_W_SADDR;
2501 } else {
2502 if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
2503 is->is_src = fi->fi_dst;
2504 is->is_flags &= ~SI_W_SADDR;
2505 }
2506 }
2507 } else if ((flags & SI_W_DADDR) != 0) {
2508 if (rev == 0) {
2509 if (!(fin->fin_flx & (FI_MULTICAST|FI_MBCAST))){
2510 is->is_dst = fi->fi_dst;
2511 is->is_flags &= ~SI_W_DADDR;
2512 }
2513 } else {
2514 is->is_dst = fi->fi_src;
2515 is->is_flags &= ~SI_W_DADDR;
2516 }
2517 }
2518 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) {
2519 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
2520 }
2521 }
2522
2523 flx = fin->fin_flx & cmask;
2524 cflx = is->is_flx[out][rev];
2525
2526 /*
2527 * Match up any flags set from IP options.
2528 */
2529 if ((cflx && (flx != (cflx & cmask))) ||
2530 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) ||
2531 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) ||
2532 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) {
2533 SBUMPD(ipf_state_stats, iss_miss_mask);
2534 return NULL;
2535 }
2536
2537 if ((fin->fin_flx & FI_IGNORE) != 0) {
2538 fin->fin_rev = rev;
2539 return is;
2540 }
2541
2542 /*
2543 * Only one of the source or destination port can be flagged as a
2544 * wildcard. When filling it in, fill in a copy of the matched entry
2545 * if it has the cloning flag set.
2546 */
2547 if ((flags & (SI_W_SPORT|SI_W_DPORT))) {
2548 if ((flags & SI_CLONE) != 0) {
2549 ipstate_t *clone;
2550
2551 clone = ipf_state_clone(fin, tcp, is);
2552 if (clone == NULL)
2553 return NULL;
2554 is = clone;
2555 } else {
2556 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
2557 }
2558
2559 if ((flags & SI_W_SPORT) != 0) {
2560 if (rev == 0) {
2561 is->is_sport = sp;
2562 is->is_send = ntohl(tcp->th_seq);
2563 } else {
2564 is->is_sport = dp;
2565 is->is_send = ntohl(tcp->th_ack);
2566 }
2567 is->is_maxsend = is->is_send + 1;
2568 } else if ((flags & SI_W_DPORT) != 0) {
2569 if (rev == 0) {
2570 is->is_dport = dp;
2571 is->is_dend = ntohl(tcp->th_ack);
2572 } else {
2573 is->is_dport = sp;
2574 is->is_dend = ntohl(tcp->th_seq);
2575 }
2576 is->is_maxdend = is->is_dend + 1;
2577 }
2578 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT);
2579 if ((flags & SI_CLONED) && softs->ipf_state_logging)
2580 ipf_state_log(softc, is, ISL_CLONE);
2581 }
2582
2583 ret = -1;
2584
2585 if (is->is_flx[out][rev] == 0) {
2586 is->is_flx[out][rev] = flx;
2587 if (rev == 1 && is->is_optmsk[1] == 0) {
2588 is->is_opt[1] = fin->fin_optmsk;
2589 is->is_optmsk[1] = 0xffffffff;
2590 if (is->is_v == 6) {
2591 is->is_opt[1] &= ~0x8;
2592 is->is_optmsk[1] &= ~0x8;
2593 }
2594 }
2595 }
2596
2597 /*
2598 * Check if the interface name for this "direction" is set and if not,
2599 * fill it in.
2600 */
2601 if (is->is_ifp[idx] == NULL &&
2602 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) {
2603 is->is_ifp[idx] = ifp;
2604 COPYIFNAME(fin->fin_v, ifp, is->is_ifname[idx]);
2605 }
2606 fin->fin_rev = rev;
2607 return is;
2608 }
2609
2610
2611 /* ------------------------------------------------------------------------ */
2612 /* Function: ipf_checkicmpmatchingstate */
2613 /* Returns: Nil */
2614 /* Parameters: fin(I) - pointer to packet information */
2615 /* */
2616 /* If we've got an ICMP error message, using the information stored in the */
2617 /* ICMP packet, look for a matching state table entry. */
2618 /* */
2619 /* If we return NULL then no lock on ipf_state is held. */
2620 /* If we return non-null then a read-lock on ipf_state is held. */
2621 /* ------------------------------------------------------------------------ */
2622 static ipstate_t *
ipf_checkicmpmatchingstate(fr_info_t * fin)2623 ipf_checkicmpmatchingstate(fr_info_t *fin)
2624 {
2625 ipf_main_softc_t *softc = fin->fin_main_soft;
2626 ipf_state_softc_t *softs = softc->ipf_state_soft;
2627 ipstate_t *is, **isp;
2628 i6addr_t dst, src;
2629 struct icmp *ic;
2630 u_short savelen;
2631 icmphdr_t *icmp;
2632 fr_info_t ofin;
2633 tcphdr_t *tcp;
2634 int len;
2635 u_char pr;
2636 ip_t *oip;
2637 u_int hv;
2638
2639 /*
2640 * Does it at least have the return (basic) IP header ?
2641 * Is it an actual recognised ICMP error type?
2642 * Only a basic IP header (no options) should be with
2643 * an ICMP error header.
2644 */
2645 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) ||
2646 (fin->fin_plen < ICMPERR_MINPKTLEN) ||
2647 !(fin->fin_flx & FI_ICMPERR)) {
2648 SBUMPD(ipf_state_stats, iss_icmp_bad);
2649 return NULL;
2650 }
2651 ic = fin->fin_dp;
2652
2653 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN);
2654 /*
2655 * Check if the at least the old IP header (with options) and
2656 * 8 bytes of payload is present.
2657 */
2658 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) {
2659 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
2660 return NULL;
2661 }
2662
2663 /*
2664 * Sanity Checks.
2665 */
2666 len = fin->fin_dlen - ICMPERR_ICMPHLEN;
2667 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) {
2668 DT2(iss_icmp_len, fr_info_t *, fin, struct ip*, oip);
2669 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_1);
2670 return NULL;
2671 }
2672
2673 /*
2674 * Is the buffer big enough for all of it ? It's the size of the IP
2675 * header claimed in the encapsulated part which is of concern. It
2676 * may be too big to be in this buffer but not so big that it's
2677 * outside the ICMP packet, leading to TCP deref's causing problems.
2678 * This is possible because we don't know how big oip_hl is when we
2679 * do the pullup early in ipf_check() and thus can't guarantee it is
2680 * all here now.
2681 */
2682 #ifdef _KERNEL
2683 {
2684 mb_t *m;
2685
2686 m = fin->fin_m;
2687 # if defined(MENTAT)
2688 if ((char *)oip + len > (char *)m->b_wptr) {
2689 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_2);
2690 return NULL;
2691 }
2692 # else
2693 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) {
2694 SBUMPDX(ipf_state_stats, iss_icmp_short, iss_icmp_short_3);
2695 return NULL;
2696 }
2697 # endif
2698 }
2699 #endif
2700
2701 bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
2702
2703 /*
2704 * in the IPv4 case we must zero the i6addr union otherwise
2705 * the IP6_EQ and IP6_NEQ macros produce the wrong results because
2706 * of the 'junk' in the unused part of the union
2707 */
2708 bzero((char *)&src, sizeof(src));
2709 bzero((char *)&dst, sizeof(dst));
2710
2711 /*
2712 * we make an fin entry to be able to feed it to
2713 * matchsrcdst note that not all fields are encessary
2714 * but this is the cleanest way. Note further we fill
2715 * in fin_mp such that if someone uses it we'll get
2716 * a kernel panic. ipf_matchsrcdst does not use this.
2717 *
2718 * watch out here, as ip is in host order and oip in network
2719 * order. Any change we make must be undone afterwards, like
2720 * oip->ip_len.
2721 */
2722 savelen = oip->ip_len;
2723 oip->ip_len = htons(len);
2724
2725 ofin.fin_flx = FI_NOCKSUM;
2726 ofin.fin_v = 4;
2727 ofin.fin_ip = oip;
2728 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
2729 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
2730 (void) ipf_makefrip(IP_HL(oip) << 2, oip, &ofin);
2731 ofin.fin_ifp = fin->fin_ifp;
2732 ofin.fin_out = !fin->fin_out;
2733
2734 hv = (pr = oip->ip_p);
2735 src.in4 = oip->ip_src;
2736 hv += src.in4.s_addr;
2737 dst.in4 = oip->ip_dst;
2738 hv += dst.in4.s_addr;
2739
2740 /*
2741 * Reset the short and bad flag here because in ipf_matchsrcdst()
2742 * the flags for the current packet (fin_flx) are compared against
2743 * those for the existing session.
2744 */
2745 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
2746
2747 /*
2748 * Put old values of ip_len back as we don't know
2749 * if we have to forward the packet or process it again.
2750 */
2751 oip->ip_len = savelen;
2752
2753 switch (oip->ip_p)
2754 {
2755 case IPPROTO_ICMP :
2756 /*
2757 * an ICMP error can only be generated as a result of an
2758 * ICMP query, not as the response on an ICMP error
2759 *
2760 * XXX theoretically ICMP_ECHOREP and the other reply's are
2761 * ICMP query's as well, but adding them here seems strange XXX
2762 */
2763 if ((ofin.fin_flx & FI_ICMPERR) != 0) {
2764 DT1(iss_icmp_icmperr, fr_info_t *, &ofin);
2765 SBUMP(ipf_state_stats.iss_icmp_icmperr);
2766 return NULL;
2767 }
2768
2769 /*
2770 * perform a lookup of the ICMP packet in the state table
2771 */
2772 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2773 hv += icmp->icmp_id;
2774 hv = DOUBLE_HASH(hv);
2775
2776 READ_ENTER(&softc->ipf_state);
2777 for (isp = &softs->ipf_state_table[hv];
2778 ((is = *isp) != NULL); ) {
2779 isp = &is->is_hnext;
2780 if ((is->is_p != pr) || (is->is_v != 4))
2781 continue;
2782 if (is->is_pass & FR_NOICMPERR)
2783 continue;
2784
2785 is = ipf_matchsrcdst(&ofin, is, &src, &dst,
2786 NULL, FI_ICMPCMP);
2787 if ((is != NULL) && !ipf_allowstateicmp(fin, is, &src))
2788 return is;
2789 }
2790 RWLOCK_EXIT(&softc->ipf_state);
2791 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_1);
2792 return NULL;
2793 case IPPROTO_TCP :
2794 case IPPROTO_UDP :
2795 break;
2796 default :
2797 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_2);
2798 return NULL;
2799 }
2800
2801 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2));
2802
2803 hv += tcp->th_dport;;
2804 hv += tcp->th_sport;;
2805 hv = DOUBLE_HASH(hv);
2806
2807 READ_ENTER(&softc->ipf_state);
2808 for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
2809 isp = &is->is_hnext;
2810 /*
2811 * Only allow this icmp though if the
2812 * encapsulated packet was allowed through the
2813 * other way around. Note that the minimal amount
2814 * of info present does not allow for checking against
2815 * tcp internals such as seq and ack numbers. Only the
2816 * ports are known to be present and can be even if the
2817 * short flag is set.
2818 */
2819 if ((is->is_p == pr) && (is->is_v == 4) &&
2820 (is = ipf_matchsrcdst(&ofin, is, &src, &dst,
2821 tcp, FI_ICMPCMP))) {
2822 if (ipf_allowstateicmp(fin, is, &src) == 0)
2823 return is;
2824 }
2825 }
2826 RWLOCK_EXIT(&softc->ipf_state);
2827 SBUMPDX(ipf_state_stats, iss_icmp_miss, iss_icmp_miss_3);
2828 return NULL;
2829 }
2830
2831
2832 /* ------------------------------------------------------------------------ */
2833 /* Function: ipf_allowstateicmp */
2834 /* Returns: int - 1 = packet denied, 0 = packet allowed */
2835 /* Parameters: fin(I) - pointer to packet information */
2836 /* is(I) - pointer to state table entry */
2837 /* src(I) - source address to check permission for */
2838 /* */
2839 /* For an ICMP packet that has so far matched a state table entry, check if */
2840 /* there are any further refinements that might mean we want to block this */
2841 /* packet. This code isn't specific to either IPv4 or IPv6. */
2842 /* ------------------------------------------------------------------------ */
2843 static int
ipf_allowstateicmp(fr_info_t * fin,ipstate_t * is,i6addr_t * src)2844 ipf_allowstateicmp(fr_info_t *fin, ipstate_t *is, i6addr_t *src)
2845 {
2846 ipf_main_softc_t *softc = fin->fin_main_soft;
2847 ipf_state_softc_t *softs = softc->ipf_state_soft;
2848 frentry_t *savefr;
2849 frentry_t *fr;
2850 u_32_t ipass;
2851 int backward;
2852 int oi;
2853 int i;
2854
2855 fr = is->is_rule;
2856 if (fr != NULL && fr->fr_icmpgrp != NULL) {
2857 savefr = fin->fin_fr;
2858 fin->fin_fr = fr->fr_icmpgrp->fg_start;
2859
2860 ipass = ipf_scanlist(fin, softc->ipf_pass);
2861 fin->fin_fr = savefr;
2862 if (FR_ISBLOCK(ipass)) {
2863 SBUMPD(ipf_state_stats, iss_icmp_headblock);
2864 return 1;
2865 }
2866 }
2867
2868 /*
2869 * i : the index of this packet (the icmp unreachable)
2870 * oi : the index of the original packet found in the
2871 * icmp header (i.e. the packet causing this icmp)
2872 * backward : original packet was backward compared to
2873 * the state
2874 */
2875 backward = IP6_NEQ(&is->is_src, src);
2876 fin->fin_rev = !backward;
2877 i = (!backward << 1) + fin->fin_out;
2878 oi = (backward << 1) + !fin->fin_out;
2879
2880 if (is->is_pass & FR_NOICMPERR) {
2881 SBUMPD(ipf_state_stats, iss_icmp_banned);
2882 return 1;
2883 }
2884 if (is->is_icmppkts[i] > is->is_pkts[oi]) {
2885 SBUMPD(ipf_state_stats, iss_icmp_toomany);
2886 return 1;
2887 }
2888
2889 DT2(iss_icmp_hits, fr_info_t *, fin, ipstate_t *, is);
2890 SBUMP(ipf_state_stats.iss_icmp_hits);
2891 is->is_icmppkts[i]++;
2892
2893 /*
2894 * we deliberately do not touch the timeouts
2895 * for the accompanying state table entry.
2896 * It remains to be seen if that is correct. XXX
2897 */
2898 return 0;
2899 }
2900
2901
2902 /* ------------------------------------------------------------------------ */
2903 /* Function: ipf_ipsmove */
2904 /* Returns: Nil */
2905 /* Parameters: is(I) - pointer to state table entry */
2906 /* hv(I) - new hash value for state table entry */
2907 /* Write Locks: ipf_state */
2908 /* */
2909 /* Move a state entry from one position in the hash table to another. */
2910 /* ------------------------------------------------------------------------ */
2911 static void
ipf_ipsmove(ipf_state_softc_t * softs,ipstate_t * is,u_int hv)2912 ipf_ipsmove(ipf_state_softc_t *softs, ipstate_t *is, u_int hv)
2913 {
2914 ipstate_t **isp;
2915 u_int hvm;
2916
2917 hvm = is->is_hv;
2918
2919 /* TRACE is, is_hv, hvm */
2920
2921 /*
2922 * Remove the hash from the old location...
2923 */
2924 isp = is->is_phnext;
2925 if (is->is_hnext)
2926 is->is_hnext->is_phnext = isp;
2927 *isp = is->is_hnext;
2928 if (softs->ipf_state_table[hvm] == NULL)
2929 softs->ipf_state_stats.iss_inuse--;
2930 softs->ipf_state_stats.iss_bucketlen[hvm]--;
2931
2932 /*
2933 * ...and put the hash in the new one.
2934 */
2935 hvm = DOUBLE_HASH(hv);
2936 is->is_hv = hvm;
2937
2938 /* TRACE is, hv, is_hv, hvm */
2939
2940 isp = &softs->ipf_state_table[hvm];
2941 if (*isp)
2942 (*isp)->is_phnext = &is->is_hnext;
2943 else
2944 softs->ipf_state_stats.iss_inuse++;
2945 softs->ipf_state_stats.iss_bucketlen[hvm]++;
2946 is->is_phnext = isp;
2947 is->is_hnext = *isp;
2948 *isp = is;
2949 }
2950
2951
2952 /* ------------------------------------------------------------------------ */
2953 /* Function: ipf_state_lookup */
2954 /* Returns: ipstate_t* - NULL == no matching state found, */
2955 /* else pointer to state information is returned */
2956 /* Parameters: fin(I) - pointer to packet information */
2957 /* tcp(I) - pointer to TCP/UDP header. */
2958 /* ifqp(O) - pointer for storing tailq timeout */
2959 /* */
2960 /* Search the state table for a matching entry to the packet described by */
2961 /* the contents of *fin. For certain protocols, when a match is found the */
2962 /* timeout queue is also selected and stored in ifpq if it is non-NULL. */
2963 /* */
2964 /* If we return NULL then no lock on ipf_state is held. */
2965 /* If we return non-null then a read-lock on ipf_state is held. */
2966 /* ------------------------------------------------------------------------ */
2967 ipstate_t *
ipf_state_lookup(fr_info_t * fin,tcphdr_t * tcp,ipftq_t ** ifqp)2968 ipf_state_lookup(fr_info_t *fin, tcphdr_t *tcp, ipftq_t **ifqp)
2969 {
2970 ipf_main_softc_t *softc = fin->fin_main_soft;
2971 ipf_state_softc_t *softs = softc->ipf_state_soft;
2972 u_int hv, hvm, pr, v, tryagain;
2973 ipstate_t *is, **isp;
2974 u_short dport, sport;
2975 i6addr_t src, dst;
2976 struct icmp *ic;
2977 ipftq_t *ifq;
2978 int oow;
2979
2980 is = NULL;
2981 ifq = NULL;
2982 tcp = fin->fin_dp;
2983 ic = (struct icmp *)tcp;
2984 hv = (pr = fin->fin_fi.fi_p);
2985 src = fin->fin_fi.fi_src;
2986 dst = fin->fin_fi.fi_dst;
2987 hv += src.in4.s_addr;
2988 hv += dst.in4.s_addr;
2989
2990 v = fin->fin_fi.fi_v;
2991 #ifdef USE_INET6
2992 if (v == 6) {
2993 hv += fin->fin_fi.fi_src.i6[1];
2994 hv += fin->fin_fi.fi_src.i6[2];
2995 hv += fin->fin_fi.fi_src.i6[3];
2996
2997 if ((fin->fin_p == IPPROTO_ICMPV6) &&
2998 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) {
2999 hv -= dst.in4.s_addr;
3000 } else {
3001 hv += fin->fin_fi.fi_dst.i6[1];
3002 hv += fin->fin_fi.fi_dst.i6[2];
3003 hv += fin->fin_fi.fi_dst.i6[3];
3004 }
3005 }
3006 #endif
3007 if ((v == 4) &&
3008 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) {
3009 if (fin->fin_out == 0) {
3010 hv -= src.in4.s_addr;
3011 } else {
3012 hv -= dst.in4.s_addr;
3013 }
3014 }
3015
3016 /* TRACE fin_saddr, fin_daddr, hv */
3017
3018 /*
3019 * Search the hash table for matching packet header info.
3020 */
3021 switch (pr)
3022 {
3023 #ifdef USE_INET6
3024 case IPPROTO_ICMPV6 :
3025 tryagain = 0;
3026 if (v == 6) {
3027 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) ||
3028 (ic->icmp_type == ICMP6_ECHO_REPLY)) {
3029 hv += ic->icmp_id;
3030 }
3031 }
3032 READ_ENTER(&softc->ipf_state);
3033 icmp6again:
3034 hvm = DOUBLE_HASH(hv);
3035 for (isp = &softs->ipf_state_table[hvm];
3036 ((is = *isp) != NULL); ) {
3037 isp = &is->is_hnext;
3038 if ((is->is_p != pr) || (is->is_v != v))
3039 continue;
3040 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3041 if (is != NULL &&
3042 ipf_matchicmpqueryreply(v, &is->is_icmp,
3043 ic, fin->fin_rev)) {
3044 if (fin->fin_rev)
3045 ifq = &softs->ipf_state_icmpacktq;
3046 else
3047 ifq = &softs->ipf_state_icmptq;
3048 break;
3049 }
3050 }
3051
3052 if (is != NULL) {
3053 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) {
3054 hv += fin->fin_fi.fi_src.i6[0];
3055 hv += fin->fin_fi.fi_src.i6[1];
3056 hv += fin->fin_fi.fi_src.i6[2];
3057 hv += fin->fin_fi.fi_src.i6[3];
3058 ipf_ipsmove(softs, is, hv);
3059 MUTEX_DOWNGRADE(&softc->ipf_state);
3060 }
3061 break;
3062 }
3063 RWLOCK_EXIT(&softc->ipf_state);
3064
3065 /*
3066 * No matching icmp state entry. Perhaps this is a
3067 * response to another state entry.
3068 *
3069 * XXX With some ICMP6 packets, the "other" address is already
3070 * in the packet, after the ICMP6 header, and this could be
3071 * used in place of the multicast address. However, taking
3072 * advantage of this requires some significant code changes
3073 * to handle the specific types where that is the case.
3074 */
3075 if ((softs->ipf_state_stats.iss_wild != 0) &&
3076 ((fin->fin_flx & FI_NOWILD) == 0) &&
3077 (v == 6) && (tryagain == 0)) {
3078 hv -= fin->fin_fi.fi_src.i6[0];
3079 hv -= fin->fin_fi.fi_src.i6[1];
3080 hv -= fin->fin_fi.fi_src.i6[2];
3081 hv -= fin->fin_fi.fi_src.i6[3];
3082 tryagain = 1;
3083 WRITE_ENTER(&softc->ipf_state);
3084 goto icmp6again;
3085 }
3086
3087 is = ipf_checkicmp6matchingstate(fin);
3088 if (is != NULL)
3089 return is;
3090 break;
3091 #endif
3092
3093 case IPPROTO_ICMP :
3094 if (v == 4) {
3095 hv += ic->icmp_id;
3096 }
3097 hv = DOUBLE_HASH(hv);
3098 READ_ENTER(&softc->ipf_state);
3099 for (isp = &softs->ipf_state_table[hv];
3100 ((is = *isp) != NULL); ) {
3101 isp = &is->is_hnext;
3102 if ((is->is_p != pr) || (is->is_v != v))
3103 continue;
3104 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3105 if ((is != NULL) &&
3106 (ic->icmp_id == is->is_icmp.ici_id) &&
3107 ipf_matchicmpqueryreply(v, &is->is_icmp,
3108 ic, fin->fin_rev)) {
3109 if (fin->fin_rev)
3110 ifq = &softs->ipf_state_icmpacktq;
3111 else
3112 ifq = &softs->ipf_state_icmptq;
3113 break;
3114 }
3115 }
3116 if (is == NULL) {
3117 RWLOCK_EXIT(&softc->ipf_state);
3118 }
3119 break;
3120
3121 case IPPROTO_TCP :
3122 case IPPROTO_UDP :
3123 ifqp = NULL;
3124 sport = htons(fin->fin_data[0]);
3125 hv += sport;
3126 dport = htons(fin->fin_data[1]);
3127 hv += dport;
3128 oow = 0;
3129 tryagain = 0;
3130 READ_ENTER(&softc->ipf_state);
3131 retry_tcpudp:
3132 hvm = DOUBLE_HASH(hv);
3133
3134 /* TRACE hv, hvm */
3135
3136 for (isp = &softs->ipf_state_table[hvm];
3137 ((is = *isp) != NULL); ) {
3138 isp = &is->is_hnext;
3139 if ((is->is_p != pr) || (is->is_v != v))
3140 continue;
3141 fin->fin_flx &= ~FI_OOW;
3142 is = ipf_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP);
3143 if (is != NULL) {
3144 if (pr == IPPROTO_TCP) {
3145 if (!ipf_state_tcp(softc, softs, fin,
3146 tcp, is)) {
3147 oow |= fin->fin_flx & FI_OOW;
3148 continue;
3149 }
3150 }
3151 break;
3152 }
3153 }
3154 if (is != NULL) {
3155 if (tryagain &&
3156 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) {
3157 hv += dport;
3158 hv += sport;
3159 ipf_ipsmove(softs, is, hv);
3160 MUTEX_DOWNGRADE(&softc->ipf_state);
3161 }
3162 break;
3163 }
3164 RWLOCK_EXIT(&softc->ipf_state);
3165
3166 if ((softs->ipf_state_stats.iss_wild != 0) &&
3167 ((fin->fin_flx & FI_NOWILD) == 0)) {
3168 if (tryagain == 0) {
3169 hv -= dport;
3170 hv -= sport;
3171 } else if (tryagain == 1) {
3172 hv = fin->fin_fi.fi_p;
3173 /*
3174 * If we try to pretend this is a reply to a
3175 * multicast/broadcast packet then we need to
3176 * exclude part of the address from the hash
3177 * calculation.
3178 */
3179 if (fin->fin_out == 0) {
3180 hv += src.in4.s_addr;
3181 } else {
3182 hv += dst.in4.s_addr;
3183 }
3184 hv += dport;
3185 hv += sport;
3186 }
3187 tryagain++;
3188 if (tryagain <= 2) {
3189 WRITE_ENTER(&softc->ipf_state);
3190 goto retry_tcpudp;
3191 }
3192 }
3193 fin->fin_flx |= oow;
3194 break;
3195
3196 #if 0
3197 case IPPROTO_GRE :
3198 gre = fin->fin_dp;
3199 if (GRE_REV(gre->gr_flags) == 1) {
3200 hv += gre->gr_call;
3201 }
3202 /* FALLTHROUGH */
3203 #endif
3204 default :
3205 ifqp = NULL;
3206 hvm = DOUBLE_HASH(hv);
3207 READ_ENTER(&softc->ipf_state);
3208 for (isp = &softs->ipf_state_table[hvm];
3209 ((is = *isp) != NULL); ) {
3210 isp = &is->is_hnext;
3211 if ((is->is_p != pr) || (is->is_v != v))
3212 continue;
3213 is = ipf_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP);
3214 if (is != NULL) {
3215 ifq = &softs->ipf_state_iptq;
3216 break;
3217 }
3218 }
3219 if (is == NULL) {
3220 RWLOCK_EXIT(&softc->ipf_state);
3221 }
3222 break;
3223 }
3224
3225 if (is != NULL) {
3226 if (((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) &&
3227 (is->is_tqehead[fin->fin_rev] != NULL))
3228 ifq = is->is_tqehead[fin->fin_rev];
3229 if (ifq != NULL && ifqp != NULL)
3230 *ifqp = ifq;
3231 } else {
3232 SBUMP(ipf_state_stats.iss_lookup_miss);
3233 }
3234 return is;
3235 }
3236
3237
3238 /* ------------------------------------------------------------------------ */
3239 /* Function: ipf_state_check */
3240 /* Returns: frentry_t* - NULL == search failed, */
3241 /* else pointer to rule for matching state */
3242 /* Parameters: fin(I) - pointer to packet information */
3243 /* passp(I) - pointer to filtering result flags */
3244 /* */
3245 /* Check if a packet is associated with an entry in the state table. */
3246 /* ------------------------------------------------------------------------ */
3247 frentry_t *
ipf_state_check(fr_info_t * fin,u_32_t * passp)3248 ipf_state_check(fr_info_t *fin, u_32_t *passp)
3249 {
3250 ipf_main_softc_t *softc = fin->fin_main_soft;
3251 ipf_state_softc_t *softs = softc->ipf_state_soft;
3252 ipftqent_t *tqe;
3253 ipstate_t *is;
3254 frentry_t *fr;
3255 tcphdr_t *tcp;
3256 ipftq_t *ifq;
3257 u_int pass;
3258 int inout;
3259
3260 if (softs->ipf_state_lock || (softs->ipf_state_list == NULL))
3261 return NULL;
3262
3263 if (fin->fin_flx & (FI_SHORT|FI_FRAGBODY|FI_BAD)) {
3264 SBUMPD(ipf_state_stats, iss_check_bad);
3265 return NULL;
3266 }
3267
3268 if ((fin->fin_flx & FI_TCPUDP) ||
3269 (fin->fin_fi.fi_p == IPPROTO_ICMP)
3270 #ifdef USE_INET6
3271 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6)
3272 #endif
3273 )
3274 tcp = fin->fin_dp;
3275 else
3276 tcp = NULL;
3277
3278 ifq = NULL;
3279 /*
3280 * Search the hash table for matching packet header info.
3281 */
3282 is = ipf_state_lookup(fin, tcp, &ifq);
3283
3284 switch (fin->fin_p)
3285 {
3286 #ifdef USE_INET6
3287 case IPPROTO_ICMPV6 :
3288 if (is != NULL)
3289 break;
3290 if (fin->fin_v == 6) {
3291 is = ipf_checkicmp6matchingstate(fin);
3292 }
3293 break;
3294 #endif
3295 case IPPROTO_ICMP :
3296 if (is != NULL)
3297 break;
3298 /*
3299 * No matching icmp state entry. Perhaps this is a
3300 * response to another state entry.
3301 */
3302 is = ipf_checkicmpmatchingstate(fin);
3303 break;
3304
3305 case IPPROTO_TCP :
3306 if (is == NULL)
3307 break;
3308
3309 if (is->is_pass & FR_NEWISN) {
3310 if (fin->fin_out == 0)
3311 ipf_fixinisn(fin, is);
3312 else if (fin->fin_out == 1)
3313 ipf_fixoutisn(fin, is);
3314 }
3315 break;
3316 default :
3317 if (fin->fin_rev)
3318 ifq = &softs->ipf_state_udpacktq;
3319 else
3320 ifq = &softs->ipf_state_udptq;
3321 break;
3322 }
3323 if (is == NULL) {
3324 SBUMP(ipf_state_stats.iss_check_miss);
3325 return NULL;
3326 }
3327
3328 fr = is->is_rule;
3329 if (fr != NULL) {
3330 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) {
3331 if (fin->fin_nattag == NULL) {
3332 RWLOCK_EXIT(&softc->ipf_state);
3333 SBUMPD(ipf_state_stats, iss_check_notag);
3334 return NULL;
3335 }
3336 if (ipf_matchtag(&fr->fr_nattag, fin->fin_nattag)!=0) {
3337 RWLOCK_EXIT(&softc->ipf_state);
3338 SBUMPD(ipf_state_stats, iss_check_nattag);
3339 return NULL;
3340 }
3341 }
3342 (void) strncpy(fin->fin_group, FR_NAME(fr, fr_group),
3343 FR_GROUPLEN);
3344 fin->fin_icode = fr->fr_icode;
3345 }
3346
3347 fin->fin_rule = is->is_rulen;
3348 fin->fin_fr = fr;
3349
3350 /*
3351 * If this packet is a fragment and the rule says to track fragments,
3352 * then create a new fragment cache entry.
3353 */
3354 if (fin->fin_flx & FI_FRAG && FR_ISPASS(is->is_pass) &&
3355 is->is_pass & FR_KEEPFRAG)
3356 (void) ipf_frag_new(softc, fin, is->is_pass);
3357
3358 /*
3359 * For TCP packets, ifq == NULL. For all others, check if this new
3360 * queue is different to the last one it was on and move it if so.
3361 */
3362 tqe = &is->is_sti;
3363 if ((tqe->tqe_flags & TQE_RULEBASED) != 0)
3364 ifq = is->is_tqehead[fin->fin_rev];
3365
3366 MUTEX_ENTER(&is->is_lock);
3367
3368 if (ifq != NULL)
3369 ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq, ifq);
3370
3371 inout = (fin->fin_rev << 1) + fin->fin_out;
3372 is->is_pkts[inout]++;
3373 is->is_bytes[inout] += fin->fin_plen;
3374 fin->fin_pktnum = is->is_pkts[inout] + is->is_icmppkts[inout];
3375
3376 MUTEX_EXIT(&is->is_lock);
3377
3378 pass = is->is_pass;
3379
3380 if (is->is_flags & IS_STATESYNC)
3381 ipf_sync_update(softc, SMC_STATE, fin, is->is_sync);
3382
3383 RWLOCK_EXIT(&softc->ipf_state);
3384
3385 SBUMP(ipf_state_stats.iss_hits);
3386
3387 fin->fin_dif = &is->is_dif;
3388 fin->fin_tif = &is->is_tifs[fin->fin_rev];
3389 fin->fin_flx |= FI_STATE;
3390 if ((pass & FR_LOGFIRST) != 0)
3391 pass &= ~(FR_LOGFIRST|FR_LOG);
3392 *passp = pass;
3393 return fr;
3394 }
3395
3396
3397 /* ------------------------------------------------------------------------ */
3398 /* Function: ipf_fixoutisn */
3399 /* Returns: Nil */
3400 /* Parameters: fin(I) - pointer to packet information */
3401 /* is(I) - pointer to master state structure */
3402 /* */
3403 /* Called only for outbound packets, adjusts the sequence number and the */
3404 /* TCP checksum to match that change. */
3405 /* ------------------------------------------------------------------------ */
3406 static void
ipf_fixoutisn(fr_info_t * fin,ipstate_t * is)3407 ipf_fixoutisn(fr_info_t *fin, ipstate_t *is)
3408 {
3409 tcphdr_t *tcp;
3410 int rev;
3411 u_32_t seq;
3412
3413 tcp = fin->fin_dp;
3414 rev = fin->fin_rev;
3415 if ((is->is_flags & IS_ISNSYN) != 0) {
3416 if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
3417 seq = ntohl(tcp->th_seq);
3418 seq += is->is_isninc[0];
3419 tcp->th_seq = htonl(seq);
3420 ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[0], 0);
3421 }
3422 }
3423 if ((is->is_flags & IS_ISNACK) != 0) {
3424 if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
3425 seq = ntohl(tcp->th_seq);
3426 seq += is->is_isninc[1];
3427 tcp->th_seq = htonl(seq);
3428 ipf_fix_outcksum(0, &tcp->th_sum, is->is_sumd[1], 0);
3429 }
3430 }
3431 }
3432
3433
3434 /* ------------------------------------------------------------------------ */
3435 /* Function: ipf_fixinisn */
3436 /* Returns: Nil */
3437 /* Parameters: fin(I) - pointer to packet information */
3438 /* is(I) - pointer to master state structure */
3439 /* */
3440 /* Called only for inbound packets, adjusts the acknowledge number and the */
3441 /* TCP checksum to match that change. */
3442 /* ------------------------------------------------------------------------ */
3443 static void
ipf_fixinisn(fr_info_t * fin,ipstate_t * is)3444 ipf_fixinisn(fr_info_t *fin, ipstate_t *is)
3445 {
3446 tcphdr_t *tcp;
3447 int rev;
3448 u_32_t ack;
3449
3450 tcp = fin->fin_dp;
3451 rev = fin->fin_rev;
3452 if ((is->is_flags & IS_ISNSYN) != 0) {
3453 if ((rev == 1) && (fin->fin_cksum < FI_CK_L4PART)) {
3454 ack = ntohl(tcp->th_ack);
3455 ack -= is->is_isninc[0];
3456 tcp->th_ack = htonl(ack);
3457 ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[0], 0);
3458 }
3459 }
3460 if ((is->is_flags & IS_ISNACK) != 0) {
3461 if ((rev == 0) && (fin->fin_cksum < FI_CK_L4PART)) {
3462 ack = ntohl(tcp->th_ack);
3463 ack -= is->is_isninc[1];
3464 tcp->th_ack = htonl(ack);
3465 ipf_fix_incksum(0, &tcp->th_sum, is->is_sumd[1], 0);
3466 }
3467 }
3468 }
3469
3470
3471 /* ------------------------------------------------------------------------ */
3472 /* Function: ipf_state_sync */
3473 /* Returns: Nil */
3474 /* Parameters: softc(I) - pointer to soft context main structure */
3475 /* ifp(I) - pointer to interface */
3476 /* */
3477 /* Walk through all state entries and if an interface pointer match is */
3478 /* found then look it up again, based on its name in case the pointer has */
3479 /* changed since last time. */
3480 /* */
3481 /* If ifp is passed in as being non-null then we are only doing updates for */
3482 /* existing, matching, uses of it. */
3483 /* ------------------------------------------------------------------------ */
3484 void
ipf_state_sync(ipf_main_softc_t * softc,void * ifp)3485 ipf_state_sync(ipf_main_softc_t *softc, void *ifp)
3486 {
3487 ipf_state_softc_t *softs = softc->ipf_state_soft;
3488 ipstate_t *is;
3489 int i;
3490
3491 if (softc->ipf_running <= 0)
3492 return;
3493
3494 WRITE_ENTER(&softc->ipf_state);
3495
3496 if (softc->ipf_running <= 0) {
3497 RWLOCK_EXIT(&softc->ipf_state);
3498 return;
3499 }
3500
3501 for (is = softs->ipf_state_list; is; is = is->is_next) {
3502 /*
3503 * Look up all the interface names in the state entry.
3504 */
3505 for (i = 0; i < 4; i++) {
3506 if (ifp == NULL || ifp == is->is_ifp[i])
3507 is->is_ifp[i] = ipf_resolvenic(softc,
3508 is->is_ifname[i],
3509 is->is_v);
3510 }
3511 }
3512 RWLOCK_EXIT(&softc->ipf_state);
3513 }
3514
3515
3516 /* ------------------------------------------------------------------------ */
3517 /* Function: ipf_state_del */
3518 /* Returns: int - 0 = deleted, else refernce count on active struct */
3519 /* Parameters: softc(I) - pointer to soft context main structure */
3520 /* is(I) - pointer to state structure to delete */
3521 /* why(I) - if not 0, log reason why it was deleted */
3522 /* Write Locks: ipf_state */
3523 /* */
3524 /* Deletes a state entry from the enumerated list as well as the hash table */
3525 /* and timeout queue lists. Make adjustments to hash table statistics and */
3526 /* global counters as required. */
3527 /* ------------------------------------------------------------------------ */
3528 static int
ipf_state_del(ipf_main_softc_t * softc,ipstate_t * is,int why)3529 ipf_state_del(ipf_main_softc_t *softc, ipstate_t *is, int why)
3530 {
3531 ipf_state_softc_t *softs = softc->ipf_state_soft;
3532 int orphan = 1;
3533 frentry_t *fr;
3534
3535 /*
3536 * Since we want to delete this, remove it from the state table,
3537 * where it can be found & used, first.
3538 */
3539 if (is->is_phnext != NULL) {
3540 *is->is_phnext = is->is_hnext;
3541 if (is->is_hnext != NULL)
3542 is->is_hnext->is_phnext = is->is_phnext;
3543 if (softs->ipf_state_table[is->is_hv] == NULL)
3544 softs->ipf_state_stats.iss_inuse--;
3545 softs->ipf_state_stats.iss_bucketlen[is->is_hv]--;
3546
3547 is->is_phnext = NULL;
3548 is->is_hnext = NULL;
3549 orphan = 0;
3550 }
3551
3552 /*
3553 * Because ipf_state_stats.iss_wild is a count of entries in the state
3554 * table that have wildcard flags set, only decerement it once
3555 * and do it here.
3556 */
3557 if (is->is_flags & (SI_WILDP|SI_WILDA)) {
3558 if (!(is->is_flags & SI_CLONED)) {
3559 ATOMIC_DECL(softs->ipf_state_stats.iss_wild);
3560 }
3561 is->is_flags &= ~(SI_WILDP|SI_WILDA);
3562 }
3563
3564 /*
3565 * Next, remove it from the timeout queue it is in.
3566 */
3567 if (is->is_sti.tqe_ifq != NULL)
3568 ipf_deletequeueentry(&is->is_sti);
3569
3570 /*
3571 * If it is still in use by something else, do not go any further,
3572 * but note that at this point it is now an orphan. How can this
3573 * be? ipf_state_flush() calls ipf_delete() directly because it wants
3574 * to empty the table out and if something has a hold on a state
3575 * entry (such as ipfstat), it'll do the deref path that'll bring
3576 * us back here to do the real delete & free.
3577 */
3578 MUTEX_ENTER(&is->is_lock);
3579 if (is->is_me != NULL) {
3580 *is->is_me = NULL;
3581 is->is_me = NULL;
3582 is->is_ref--;
3583 }
3584 is->is_ref--;
3585 if (is->is_ref > 0) {
3586 int refs;
3587
3588 refs = is->is_ref;
3589 MUTEX_EXIT(&is->is_lock);
3590 if (!orphan)
3591 softs->ipf_state_stats.iss_orphan++;
3592 return refs;
3593 }
3594
3595 fr = is->is_rule;
3596 is->is_rule = NULL;
3597 if (fr != NULL) {
3598 if (fr->fr_srctrack.ht_max_nodes != 0) {
3599 (void) ipf_ht_node_del(&fr->fr_srctrack,
3600 is->is_family, &is->is_src);
3601 }
3602 }
3603
3604 ASSERT(is->is_ref == 0);
3605 MUTEX_EXIT(&is->is_lock);
3606
3607 if (is->is_tqehead[0] != NULL) {
3608 if (ipf_deletetimeoutqueue(is->is_tqehead[0]) == 0)
3609 ipf_freetimeoutqueue(softc, is->is_tqehead[0]);
3610 }
3611 if (is->is_tqehead[1] != NULL) {
3612 if (ipf_deletetimeoutqueue(is->is_tqehead[1]) == 0)
3613 ipf_freetimeoutqueue(softc, is->is_tqehead[1]);
3614 }
3615
3616 if (is->is_sync)
3617 ipf_sync_del_state(softc->ipf_sync_soft, is->is_sync);
3618
3619 /*
3620 * Now remove it from the linked list of known states
3621 */
3622 if (is->is_pnext != NULL) {
3623 *is->is_pnext = is->is_next;
3624
3625 if (is->is_next != NULL)
3626 is->is_next->is_pnext = is->is_pnext;
3627
3628 is->is_pnext = NULL;
3629 is->is_next = NULL;
3630 }
3631
3632 if (softs->ipf_state_logging != 0 && why != 0)
3633 ipf_state_log(softc, is, why);
3634
3635 if (is->is_p == IPPROTO_TCP)
3636 softs->ipf_state_stats.iss_fin++;
3637 else
3638 softs->ipf_state_stats.iss_expire++;
3639 if (orphan)
3640 softs->ipf_state_stats.iss_orphan--;
3641
3642 if (fr != NULL) {
3643 fr->fr_statecnt--;
3644 (void) ipf_derefrule(softc, &fr);
3645 }
3646
3647 softs->ipf_state_stats.iss_active_proto[is->is_p]--;
3648
3649 MUTEX_DESTROY(&is->is_lock);
3650 KFREE(is);
3651 softs->ipf_state_stats.iss_active--;
3652
3653 return 0;
3654 }
3655
3656
3657 /* ------------------------------------------------------------------------ */
3658 /* Function: ipf_state_expire */
3659 /* Returns: Nil */
3660 /* Parameters: softc(I) - pointer to soft context main structure */
3661 /* */
3662 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */
3663 /* used here is to keep the queue sorted with the oldest things at the top */
3664 /* and the youngest at the bottom. So if the top one doesn't need to be */
3665 /* expired then neither will any under it. */
3666 /* ------------------------------------------------------------------------ */
3667 void
ipf_state_expire(ipf_main_softc_t * softc)3668 ipf_state_expire(ipf_main_softc_t *softc)
3669 {
3670 ipf_state_softc_t *softs = softc->ipf_state_soft;
3671 ipftq_t *ifq, *ifqnext;
3672 ipftqent_t *tqe, *tqn;
3673 ipstate_t *is;
3674 SPL_INT(s);
3675
3676 SPL_NET(s);
3677 WRITE_ENTER(&softc->ipf_state);
3678 for (ifq = softs->ipf_state_tcptq; ifq != NULL; ifq = ifq->ifq_next)
3679 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3680 if (tqe->tqe_die > softc->ipf_ticks)
3681 break;
3682 tqn = tqe->tqe_next;
3683 is = tqe->tqe_parent;
3684 ipf_state_del(softc, is, ISL_EXPIRE);
3685 }
3686
3687 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
3688 ifqnext = ifq->ifq_next;
3689
3690 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3691 if (tqe->tqe_die > softc->ipf_ticks)
3692 break;
3693 tqn = tqe->tqe_next;
3694 is = tqe->tqe_parent;
3695 ipf_state_del(softc, is, ISL_EXPIRE);
3696 }
3697 }
3698
3699 for (ifq = softs->ipf_state_usertq; ifq != NULL; ifq = ifqnext) {
3700 ifqnext = ifq->ifq_next;
3701
3702 if (((ifq->ifq_flags & IFQF_DELETE) != 0) &&
3703 (ifq->ifq_ref == 0)) {
3704 ipf_freetimeoutqueue(softc, ifq);
3705 }
3706 }
3707
3708 if (softs->ipf_state_doflush) {
3709 (void) ipf_state_flush(softc, 2, 0);
3710 softs->ipf_state_doflush = 0;
3711 softs->ipf_state_wm_last = softc->ipf_ticks;
3712 }
3713
3714 RWLOCK_EXIT(&softc->ipf_state);
3715 SPL_X(s);
3716 }
3717
3718
3719 /* ------------------------------------------------------------------------ */
3720 /* Function: ipf_state_flush */
3721 /* Returns: int - 0 == success, -1 == failure */
3722 /* Parameters: softc(I) - pointer to soft context main structure */
3723 /* which(I) - which flush action to perform */
3724 /* proto(I) - which protocol to flush (0 == ALL) */
3725 /* Write Locks: ipf_state */
3726 /* */
3727 /* Flush state tables. Three actions currently defined: */
3728 /* which == 0 : flush all state table entries */
3729 /* which == 1 : flush TCP connections which have started to close but are */
3730 /* stuck for some reason. */
3731 /* which == 2 : flush TCP connections which have been idle for a long time, */
3732 /* starting at > 4 days idle and working back in successive half-*/
3733 /* days to at most 12 hours old. If this fails to free enough */
3734 /* slots then work backwards in half hour slots to 30 minutes. */
3735 /* If that too fails, then work backwards in 30 second intervals */
3736 /* for the last 30 minutes to at worst 30 seconds idle. */
3737 /* ------------------------------------------------------------------------ */
3738 int
ipf_state_flush(ipf_main_softc_t * softc,int which,int proto)3739 ipf_state_flush(ipf_main_softc_t *softc, int which, int proto)
3740 {
3741 ipf_state_softc_t *softs = softc->ipf_state_soft;
3742 ipftqent_t *tqe, *tqn;
3743 ipstate_t *is, **isp;
3744 ipftq_t *ifq;
3745 int removed;
3746 SPL_INT(s);
3747
3748 removed = 0;
3749
3750 SPL_NET(s);
3751
3752 switch (which)
3753 {
3754 case 0 :
3755 SBUMP(ipf_state_stats.iss_flush_all);
3756 /*
3757 * Style 0 flush removes everything...
3758 */
3759 for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
3760 if ((proto != 0) && (is->is_v != proto)) {
3761 isp = &is->is_next;
3762 continue;
3763 }
3764 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3765 removed++;
3766 else
3767 isp = &is->is_next;
3768 }
3769 break;
3770
3771 case 1 :
3772 SBUMP(ipf_state_stats.iss_flush_closing);
3773 /*
3774 * Since we're only interested in things that are closing,
3775 * we can start with the appropriate timeout queue.
3776 */
3777 for (ifq = softs->ipf_state_tcptq + IPF_TCPS_CLOSE_WAIT;
3778 ifq != NULL; ifq = ifq->ifq_next) {
3779
3780 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3781 tqn = tqe->tqe_next;
3782 is = tqe->tqe_parent;
3783 if (is->is_p != IPPROTO_TCP)
3784 break;
3785 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3786 removed++;
3787 }
3788 }
3789
3790 /*
3791 * Also need to look through the user defined queues.
3792 */
3793 for (ifq = softs->ipf_state_usertq; ifq != NULL;
3794 ifq = ifq->ifq_next) {
3795 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) {
3796 tqn = tqe->tqe_next;
3797 is = tqe->tqe_parent;
3798 if (is->is_p != IPPROTO_TCP)
3799 continue;
3800
3801 if ((is->is_state[0] > IPF_TCPS_ESTABLISHED) &&
3802 (is->is_state[1] > IPF_TCPS_ESTABLISHED)) {
3803 if (ipf_state_del(softc, is,
3804 ISL_FLUSH) == 0)
3805 removed++;
3806 }
3807 }
3808 }
3809 break;
3810
3811 case 2 :
3812 break;
3813
3814 /*
3815 * Args 5-11 correspond to flushing those particular states
3816 * for TCP connections.
3817 */
3818 case IPF_TCPS_CLOSE_WAIT :
3819 case IPF_TCPS_FIN_WAIT_1 :
3820 case IPF_TCPS_CLOSING :
3821 case IPF_TCPS_LAST_ACK :
3822 case IPF_TCPS_FIN_WAIT_2 :
3823 case IPF_TCPS_TIME_WAIT :
3824 case IPF_TCPS_CLOSED :
3825 SBUMP(ipf_state_stats.iss_flush_queue);
3826 tqn = softs->ipf_state_tcptq[which].ifq_head;
3827 while (tqn != NULL) {
3828 tqe = tqn;
3829 tqn = tqe->tqe_next;
3830 is = tqe->tqe_parent;
3831 if (ipf_state_del(softc, is, ISL_FLUSH) == 0)
3832 removed++;
3833 }
3834 break;
3835
3836 default :
3837 if (which < 30)
3838 break;
3839
3840 SBUMP(ipf_state_stats.iss_flush_state);
3841 /*
3842 * Take a large arbitrary number to mean the number of seconds
3843 * for which which consider to be the maximum value we'll allow
3844 * the expiration to be.
3845 */
3846 which = IPF_TTLVAL(which);
3847 for (isp = &softs->ipf_state_list; ((is = *isp) != NULL); ) {
3848 if ((proto == 0) || (is->is_v == proto)) {
3849 if (softc->ipf_ticks - is->is_touched > which) {
3850 if (ipf_state_del(softc, is,
3851 ISL_FLUSH) == 0) {
3852 removed++;
3853 continue;
3854 }
3855 }
3856 }
3857 isp = &is->is_next;
3858 }
3859 break;
3860 }
3861
3862 if (which != 2) {
3863 SPL_X(s);
3864 return removed;
3865 }
3866
3867 SBUMP(ipf_state_stats.iss_flush_timeout);
3868 /*
3869 * Asked to remove inactive entries because the table is full, try
3870 * again, 3 times, if first attempt failed with a different criteria
3871 * each time. The order tried in must be in decreasing age.
3872 * Another alternative is to implement random drop and drop N entries
3873 * at random until N have been freed up.
3874 */
3875 if (softc->ipf_ticks - softs->ipf_state_wm_last >
3876 softs->ipf_state_wm_freq) {
3877 removed = ipf_queueflush(softc, ipf_state_flush_entry,
3878 softs->ipf_state_tcptq,
3879 softs->ipf_state_usertq,
3880 &softs->ipf_state_stats.iss_active,
3881 softs->ipf_state_size,
3882 softs->ipf_state_wm_low);
3883 softs->ipf_state_wm_last = softc->ipf_ticks;
3884 }
3885
3886 SPL_X(s);
3887 return removed;
3888 }
3889
3890
3891 /* ------------------------------------------------------------------------ */
3892 /* Function: ipf_state_flush_entry */
3893 /* Returns: int - 0 = entry deleted, else not deleted */
3894 /* Parameters: softc(I) - pointer to soft context main structure */
3895 /* entry(I) - pointer to state structure to delete */
3896 /* Write Locks: ipf_state */
3897 /* */
3898 /* This function is a stepping stone between ipf_queueflush() and */
3899 /* ipf_state_del(). It is used so we can provide a uniform interface via */
3900 /* the ipf_queueflush() function. */
3901 /* ------------------------------------------------------------------------ */
3902 static int
ipf_state_flush_entry(ipf_main_softc_t * softc,void * entry)3903 ipf_state_flush_entry(ipf_main_softc_t *softc, void *entry)
3904 {
3905 return ipf_state_del(softc, entry, ISL_FLUSH);
3906 }
3907
3908
3909 /* ------------------------------------------------------------------------ */
3910 /* Function: ipf_tcp_age */
3911 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */
3912 /* Parameters: tqe(I) - pointer to timeout queue information */
3913 /* fin(I) - pointer to packet information */
3914 /* tqtab(I) - TCP timeout queue table this is in */
3915 /* flags(I) - flags from state/NAT entry */
3916 /* ok(I) - can we advance state */
3917 /* */
3918 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */
3919 /* */
3920 /* - (try to) base state transitions on real evidence only, */
3921 /* i.e. packets that are sent and have been received by ipfilter; */
3922 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */
3923 /* */
3924 /* - deal with half-closed connections correctly; */
3925 /* */
3926 /* - store the state of the source in state[0] such that ipfstat */
3927 /* displays the state as source/dest instead of dest/source; the calls */
3928 /* to ipf_tcp_age have been changed accordingly. */
3929 /* */
3930 /* Internal Parameters: */
3931 /* */
3932 /* state[0] = state of source (host that initiated connection) */
3933 /* state[1] = state of dest (host that accepted the connection) */
3934 /* */
3935 /* dir == 0 : a packet from source to dest */
3936 /* dir == 1 : a packet from dest to source */
3937 /* */
3938 /* A typical procession for a connection is as follows: */
3939 /* */
3940 /* +--------------+-------------------+ */
3941 /* | Side '0' | Side '1' | */
3942 /* +--------------+-------------------+ */
3943 /* | 0 -> 1 (SYN) | | */
3944 /* | | 0 -> 2 (SYN-ACK) | */
3945 /* | 1 -> 3 (ACK) | | */
3946 /* | | 2 -> 4 (ACK-PUSH) | */
3947 /* | 3 -> 4 (ACK) | | */
3948 /* | ... | ... | */
3949 /* | | 4 -> 6 (FIN-ACK) | */
3950 /* | 4 -> 5 (ACK) | | */
3951 /* | | 6 -> 6 (ACK-PUSH) | */
3952 /* | 5 -> 5 (ACK) | | */
3953 /* | 5 -> 8 (FIN) | | */
3954 /* | | 6 -> 10 (ACK) | */
3955 /* +--------------+-------------------+ */
3956 /* */
3957 /* Locking: it is assumed that the parent of the tqe structure is locked. */
3958 /* ------------------------------------------------------------------------ */
3959 int
ipf_tcp_age(ipftqent_t * tqe,fr_info_t * fin,ipftq_t * tqtab,int flags,int ok)3960 ipf_tcp_age(ipftqent_t *tqe, fr_info_t *fin, ipftq_t *tqtab, int flags, int ok)
3961 {
3962 ipf_main_softc_t *softc = fin->fin_main_soft;
3963 int dlen, ostate, nstate, rval, dir;
3964 u_char tcpflags;
3965 tcphdr_t *tcp;
3966
3967 tcp = fin->fin_dp;
3968
3969 rval = 0;
3970 dir = fin->fin_rev;
3971 tcpflags = tcp->th_flags;
3972 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2);
3973 ostate = tqe->tqe_state[1 - dir];
3974 nstate = tqe->tqe_state[dir];
3975
3976 if (tcpflags & TH_RST) {
3977 if (!(tcpflags & TH_PUSH) && !dlen)
3978 nstate = IPF_TCPS_CLOSED;
3979 else
3980 nstate = IPF_TCPS_CLOSE_WAIT;
3981
3982 if (ostate <= IPF_TCPS_ESTABLISHED) {
3983 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT;
3984 }
3985 rval = 1;
3986 } else {
3987 switch (nstate)
3988 {
3989 case IPF_TCPS_LISTEN: /* 0 */
3990 if ((tcpflags & TH_OPENING) == TH_OPENING) {
3991 /*
3992 * 'dir' received an S and sends SA in
3993 * response, LISTEN -> SYN_RECEIVED
3994 */
3995 nstate = IPF_TCPS_SYN_RECEIVED;
3996 rval = 1;
3997 } else if ((tcpflags & TH_OPENING) == TH_SYN) {
3998 /* 'dir' sent S, LISTEN -> SYN_SENT */
3999 nstate = IPF_TCPS_SYN_SENT;
4000 rval = 1;
4001 }
4002 /*
4003 * the next piece of code makes it possible to get
4004 * already established connections into the state table
4005 * after a restart or reload of the filter rules; this
4006 * does not work when a strict 'flags S keep state' is
4007 * used for tcp connections of course
4008 */
4009 if (((flags & IS_TCPFSM) == 0) &&
4010 ((tcpflags & TH_ACKMASK) == TH_ACK)) {
4011 /*
4012 * we saw an A, guess 'dir' is in ESTABLISHED
4013 * mode
4014 */
4015 switch (ostate)
4016 {
4017 case IPF_TCPS_LISTEN :
4018 case IPF_TCPS_SYN_RECEIVED :
4019 nstate = IPF_TCPS_HALF_ESTAB;
4020 rval = 1;
4021 break;
4022 case IPF_TCPS_HALF_ESTAB :
4023 case IPF_TCPS_ESTABLISHED :
4024 nstate = IPF_TCPS_ESTABLISHED;
4025 rval = 1;
4026 break;
4027 default :
4028 break;
4029 }
4030 }
4031 /*
4032 * TODO: besides regular ACK packets we can have other
4033 * packets as well; it is yet to be determined how we
4034 * should initialize the states in those cases
4035 */
4036 break;
4037
4038 case IPF_TCPS_SYN_SENT: /* 1 */
4039 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) {
4040 /*
4041 * A retransmitted SYN packet. We do not reset
4042 * the timeout here to ipf_tcptimeout because a
4043 * connection connect timeout does not renew
4044 * after every packet that is sent. We need to
4045 * set rval so as to indicate the packet has
4046 * passed the check for its flags being valid
4047 * in the TCP FSM. Setting rval to 2 has the
4048 * result of not resetting the timeout.
4049 */
4050 rval = 2;
4051 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) ==
4052 TH_ACK) {
4053 /*
4054 * we see an A from 'dir' which is in SYN_SENT
4055 * state: 'dir' sent an A in response to an SA
4056 * which it received, SYN_SENT -> ESTABLISHED
4057 */
4058 nstate = IPF_TCPS_ESTABLISHED;
4059 rval = 1;
4060 } else if (tcpflags & TH_FIN) {
4061 /*
4062 * we see an F from 'dir' which is in SYN_SENT
4063 * state and wants to close its side of the
4064 * connection; SYN_SENT -> FIN_WAIT_1
4065 */
4066 nstate = IPF_TCPS_FIN_WAIT_1;
4067 rval = 1;
4068 } else if ((tcpflags & TH_OPENING) == TH_OPENING) {
4069 /*
4070 * we see an SA from 'dir' which is already in
4071 * SYN_SENT state, this means we have a
4072 * simultaneous open; SYN_SENT -> SYN_RECEIVED
4073 */
4074 nstate = IPF_TCPS_SYN_RECEIVED;
4075 rval = 1;
4076 }
4077 break;
4078
4079 case IPF_TCPS_SYN_RECEIVED: /* 2 */
4080 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) {
4081 /*
4082 * we see an A from 'dir' which was in
4083 * SYN_RECEIVED state so it must now be in
4084 * established state, SYN_RECEIVED ->
4085 * ESTABLISHED
4086 */
4087 nstate = IPF_TCPS_ESTABLISHED;
4088 rval = 1;
4089 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) ==
4090 TH_OPENING) {
4091 /*
4092 * We see an SA from 'dir' which is already in
4093 * SYN_RECEIVED state.
4094 */
4095 rval = 2;
4096 } else if (tcpflags & TH_FIN) {
4097 /*
4098 * we see an F from 'dir' which is in
4099 * SYN_RECEIVED state and wants to close its
4100 * side of the connection; SYN_RECEIVED ->
4101 * FIN_WAIT_1
4102 */
4103 nstate = IPF_TCPS_FIN_WAIT_1;
4104 rval = 1;
4105 }
4106 break;
4107
4108 case IPF_TCPS_HALF_ESTAB: /* 3 */
4109 if (tcpflags & TH_FIN) {
4110 nstate = IPF_TCPS_FIN_WAIT_1;
4111 rval = 1;
4112 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) {
4113 /*
4114 * If we've picked up a connection in mid
4115 * flight, we could be looking at a follow on
4116 * packet from the same direction as the one
4117 * that created this state. Recognise it but
4118 * do not advance the entire connection's
4119 * state.
4120 */
4121 switch (ostate)
4122 {
4123 case IPF_TCPS_LISTEN :
4124 case IPF_TCPS_SYN_SENT :
4125 case IPF_TCPS_SYN_RECEIVED :
4126 rval = 1;
4127 break;
4128 case IPF_TCPS_HALF_ESTAB :
4129 case IPF_TCPS_ESTABLISHED :
4130 nstate = IPF_TCPS_ESTABLISHED;
4131 rval = 1;
4132 break;
4133 default :
4134 break;
4135 }
4136 }
4137 break;
4138
4139 case IPF_TCPS_ESTABLISHED: /* 4 */
4140 rval = 1;
4141 if (tcpflags & TH_FIN) {
4142 /*
4143 * 'dir' closed its side of the connection;
4144 * this gives us a half-closed connection;
4145 * ESTABLISHED -> FIN_WAIT_1
4146 */
4147 if (ostate == IPF_TCPS_FIN_WAIT_1) {
4148 nstate = IPF_TCPS_CLOSING;
4149 } else {
4150 nstate = IPF_TCPS_FIN_WAIT_1;
4151 }
4152 } else if (tcpflags & TH_ACK) {
4153 /*
4154 * an ACK, should we exclude other flags here?
4155 */
4156 if (ostate == IPF_TCPS_FIN_WAIT_1) {
4157 /*
4158 * We know the other side did an active
4159 * close, so we are ACKing the recvd
4160 * FIN packet (does the window matching
4161 * code guarantee this?) and go into
4162 * CLOSE_WAIT state; this gives us a
4163 * half-closed connection
4164 */
4165 nstate = IPF_TCPS_CLOSE_WAIT;
4166 } else if (ostate < IPF_TCPS_CLOSE_WAIT) {
4167 /*
4168 * still a fully established
4169 * connection reset timeout
4170 */
4171 nstate = IPF_TCPS_ESTABLISHED;
4172 }
4173 }
4174 break;
4175
4176 case IPF_TCPS_CLOSE_WAIT: /* 5 */
4177 rval = 1;
4178 if (tcpflags & TH_FIN) {
4179 /*
4180 * application closed and 'dir' sent a FIN,
4181 * we're now going into LAST_ACK state
4182 */
4183 nstate = IPF_TCPS_LAST_ACK;
4184 } else {
4185 /*
4186 * we remain in CLOSE_WAIT because the other
4187 * side has closed already and we did not
4188 * close our side yet; reset timeout
4189 */
4190 nstate = IPF_TCPS_CLOSE_WAIT;
4191 }
4192 break;
4193
4194 case IPF_TCPS_FIN_WAIT_1: /* 6 */
4195 rval = 1;
4196 if ((tcpflags & TH_ACK) &&
4197 ostate > IPF_TCPS_CLOSE_WAIT) {
4198 /*
4199 * if the other side is not active anymore
4200 * it has sent us a FIN packet that we are
4201 * ack'ing now with an ACK; this means both
4202 * sides have now closed the connection and
4203 * we go into TIME_WAIT
4204 */
4205 /*
4206 * XXX: how do we know we really are ACKing
4207 * the FIN packet here? does the window code
4208 * guarantee that?
4209 */
4210 nstate = IPF_TCPS_LAST_ACK;
4211 } else {
4212 /*
4213 * we closed our side of the connection
4214 * already but the other side is still active
4215 * (ESTABLISHED/CLOSE_WAIT); continue with
4216 * this half-closed connection
4217 */
4218 nstate = IPF_TCPS_FIN_WAIT_1;
4219 }
4220 break;
4221
4222 case IPF_TCPS_CLOSING: /* 7 */
4223 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) {
4224 nstate = IPF_TCPS_TIME_WAIT;
4225 }
4226 rval = 1;
4227 break;
4228
4229 case IPF_TCPS_LAST_ACK: /* 8 */
4230 if (tcpflags & TH_ACK) {
4231 rval = 1;
4232 }
4233 /*
4234 * we cannot detect when we go out of LAST_ACK state
4235 * to CLOSED because that is based on the reception
4236 * of ACK packets; ipfilter can only detect that a
4237 * packet has been sent by a host
4238 */
4239 break;
4240
4241 case IPF_TCPS_FIN_WAIT_2: /* 9 */
4242 /* NOT USED */
4243 break;
4244
4245 case IPF_TCPS_TIME_WAIT: /* 10 */
4246 /* we're in 2MSL timeout now */
4247 if (ostate == IPF_TCPS_LAST_ACK) {
4248 nstate = IPF_TCPS_CLOSED;
4249 rval = 1;
4250 } else {
4251 rval = 2;
4252 }
4253 break;
4254
4255 case IPF_TCPS_CLOSED: /* 11 */
4256 rval = 2;
4257 break;
4258
4259 default :
4260 #if !defined(_KERNEL)
4261 abort();
4262 #endif
4263 break;
4264 }
4265 }
4266
4267 /*
4268 * If rval == 2 then do not update the queue position, but treat the
4269 * packet as being ok.
4270 */
4271 if (rval == 2)
4272 rval = 1;
4273 else if (rval == 1) {
4274 if (ok)
4275 tqe->tqe_state[dir] = nstate;
4276 if ((tqe->tqe_flags & TQE_RULEBASED) == 0)
4277 ipf_movequeue(softc->ipf_ticks, tqe, tqe->tqe_ifq,
4278 tqtab + nstate);
4279 }
4280
4281 return rval;
4282 }
4283
4284
4285 /* ------------------------------------------------------------------------ */
4286 /* Function: ipf_state_log */
4287 /* Returns: Nil */
4288 /* Parameters: softc(I) - pointer to soft context main structure */
4289 /* is(I) - pointer to state structure */
4290 /* type(I) - type of log entry to create */
4291 /* */
4292 /* Creates a state table log entry using the state structure and type info. */
4293 /* passed in. Log packet/byte counts, source/destination address and other */
4294 /* protocol specific information. */
4295 /* ------------------------------------------------------------------------ */
4296 void
ipf_state_log(ipf_main_softc_t * softc,struct ipstate * is,u_int type)4297 ipf_state_log(ipf_main_softc_t *softc, struct ipstate *is, u_int type)
4298 {
4299 #ifdef IPFILTER_LOG
4300 struct ipslog ipsl;
4301 size_t sizes[1];
4302 void *items[1];
4303 int types[1];
4304
4305 /*
4306 * Copy information out of the ipstate_t structure and into the
4307 * structure used for logging.
4308 */
4309 ipsl.isl_type = type;
4310 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0];
4311 ipsl.isl_bytes[0] = is->is_bytes[0];
4312 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1];
4313 ipsl.isl_bytes[1] = is->is_bytes[1];
4314 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2];
4315 ipsl.isl_bytes[2] = is->is_bytes[2];
4316 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3];
4317 ipsl.isl_bytes[3] = is->is_bytes[3];
4318 ipsl.isl_src = is->is_src;
4319 ipsl.isl_dst = is->is_dst;
4320 ipsl.isl_p = is->is_p;
4321 ipsl.isl_v = is->is_v;
4322 ipsl.isl_flags = is->is_flags;
4323 ipsl.isl_tag = is->is_tag;
4324 ipsl.isl_rulen = is->is_rulen;
4325 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN);
4326
4327 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) {
4328 ipsl.isl_sport = is->is_sport;
4329 ipsl.isl_dport = is->is_dport;
4330 if (ipsl.isl_p == IPPROTO_TCP) {
4331 ipsl.isl_state[0] = is->is_state[0];
4332 ipsl.isl_state[1] = is->is_state[1];
4333 }
4334 } else if (ipsl.isl_p == IPPROTO_ICMP) {
4335 ipsl.isl_itype = is->is_icmp.ici_type;
4336 } else if (ipsl.isl_p == IPPROTO_ICMPV6) {
4337 ipsl.isl_itype = is->is_icmp.ici_type;
4338 } else {
4339 ipsl.isl_ps.isl_filler[0] = 0;
4340 ipsl.isl_ps.isl_filler[1] = 0;
4341 }
4342
4343 items[0] = &ipsl;
4344 sizes[0] = sizeof(ipsl);
4345 types[0] = 0;
4346
4347 (void) ipf_log_items(softc, IPL_LOGSTATE, NULL, items, sizes, types, 1);
4348 #endif
4349 }
4350
4351
4352 #ifdef USE_INET6
4353 /* ------------------------------------------------------------------------ */
4354 /* Function: ipf_checkicmp6matchingstate */
4355 /* Returns: ipstate_t* - NULL == no match found, */
4356 /* else pointer to matching state entry */
4357 /* Parameters: fin(I) - pointer to packet information */
4358 /* Locks: NULL == no locks, else Read Lock on ipf_state */
4359 /* */
4360 /* If we've got an ICMPv6 error message, using the information stored in */
4361 /* the ICMPv6 packet, look for a matching state table entry. */
4362 /* ------------------------------------------------------------------------ */
4363 static ipstate_t *
ipf_checkicmp6matchingstate(fr_info_t * fin)4364 ipf_checkicmp6matchingstate(fr_info_t *fin)
4365 {
4366 ipf_main_softc_t *softc = fin->fin_main_soft;
4367 ipf_state_softc_t *softs = softc->ipf_state_soft;
4368 struct icmp6_hdr *ic6, *oic;
4369 ipstate_t *is, **isp;
4370 u_short sport, dport;
4371 i6addr_t dst, src;
4372 u_short savelen;
4373 icmpinfo_t *ic;
4374 fr_info_t ofin;
4375 tcphdr_t *tcp;
4376 ip6_t *oip6;
4377 u_char pr;
4378 u_int hv;
4379
4380 /*
4381 * Does it at least have the return (basic) IP header ?
4382 * Is it an actual recognised ICMP error type?
4383 * Only a basic IP header (no options) should be with
4384 * an ICMP error header.
4385 */
4386 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) ||
4387 !(fin->fin_flx & FI_ICMPERR)) {
4388 SBUMPD(ipf_state_stats, iss_icmp_bad);
4389 return NULL;
4390 }
4391
4392 ic6 = fin->fin_dp;
4393
4394 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN);
4395 if (fin->fin_plen < sizeof(*oip6)) {
4396 SBUMPD(ipf_state_stats, iss_icmp_short);
4397 return NULL;
4398 }
4399
4400 bcopy((char *)fin, (char *)&ofin, sizeof(*fin));
4401 ofin.fin_v = 6;
4402 ofin.fin_ifp = fin->fin_ifp;
4403 ofin.fin_out = !fin->fin_out;
4404 ofin.fin_m = NULL; /* if dereferenced, panic XXX */
4405 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */
4406
4407 /*
4408 * We make a fin entry to be able to feed it to
4409 * matchsrcdst. Note that not all fields are necessary
4410 * but this is the cleanest way. Note further we fill
4411 * in fin_mp such that if someone uses it we'll get
4412 * a kernel panic. ipf_matchsrcdst does not use this.
4413 *
4414 * watch out here, as ip is in host order and oip6 in network
4415 * order. Any change we make must be undone afterwards.
4416 */
4417 savelen = oip6->ip6_plen;
4418 oip6->ip6_plen = htons(fin->fin_dlen - ICMPERR_ICMPHLEN);
4419 ofin.fin_flx = FI_NOCKSUM;
4420 ofin.fin_ip = (ip_t *)oip6;
4421 (void) ipf_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin);
4422 ofin.fin_flx &= ~(FI_BAD|FI_SHORT);
4423 oip6->ip6_plen = savelen;
4424 pr = ofin.fin_p;
4425
4426 /*
4427 * an ICMP error can never generate an ICMP error in response.
4428 */
4429 if (ofin.fin_flx & FI_ICMPERR) {
4430 DT1(iss_icmp6_icmperr, fr_info_t *, &ofin);
4431 SBUMP(ipf_state_stats.iss_icmp6_icmperr);
4432 return NULL;
4433 }
4434
4435 if (oip6->ip6_nxt == IPPROTO_ICMPV6) {
4436 oic = ofin.fin_dp;
4437 /*
4438 * an ICMP error can only be generated as a result of an
4439 * ICMP query, not as the response on an ICMP error
4440 *
4441 * XXX theoretically ICMP_ECHOREP and the other reply's are
4442 * ICMP query's as well, but adding them here seems strange XXX
4443 */
4444 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) {
4445 DT1(iss_icmp6_notinfo, fr_info_t *, &ofin);
4446 SBUMP(ipf_state_stats.iss_icmp6_notinfo);
4447 return NULL;
4448 }
4449
4450 /*
4451 * perform a lookup of the ICMP packet in the state table
4452 */
4453 hv = (pr = oip6->ip6_nxt);
4454 src.in6 = oip6->ip6_src;
4455 hv += src.in4.s_addr;
4456 dst.in6 = oip6->ip6_dst;
4457 hv += dst.in4.s_addr;
4458 hv += oic->icmp6_id;
4459 hv += oic->icmp6_seq;
4460 hv = DOUBLE_HASH(hv);
4461
4462 READ_ENTER(&softc->ipf_state);
4463 for (isp = &softs->ipf_state_table[hv];
4464 ((is = *isp) != NULL); ) {
4465 ic = &is->is_icmp;
4466 isp = &is->is_hnext;
4467 if ((is->is_p == pr) &&
4468 !(is->is_pass & FR_NOICMPERR) &&
4469 (oic->icmp6_id == ic->ici_id) &&
4470 (oic->icmp6_seq == ic->ici_seq) &&
4471 (is = ipf_matchsrcdst(&ofin, is, &src,
4472 &dst, NULL, FI_ICMPCMP))) {
4473 /*
4474 * in the state table ICMP query's are stored
4475 * with the type of the corresponding ICMP
4476 * response. Correct here
4477 */
4478 if (((ic->ici_type == ICMP6_ECHO_REPLY) &&
4479 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) ||
4480 (ic->ici_type - 1 == oic->icmp6_type )) {
4481 if (!ipf_allowstateicmp(fin, is, &src))
4482 return is;
4483 }
4484 }
4485 }
4486 RWLOCK_EXIT(&softc->ipf_state);
4487 SBUMPD(ipf_state_stats, iss_icmp6_miss);
4488 return NULL;
4489 }
4490
4491 hv = (pr = oip6->ip6_nxt);
4492 src.in6 = oip6->ip6_src;
4493 hv += src.i6[0];
4494 hv += src.i6[1];
4495 hv += src.i6[2];
4496 hv += src.i6[3];
4497 dst.in6 = oip6->ip6_dst;
4498 hv += dst.i6[0];
4499 hv += dst.i6[1];
4500 hv += dst.i6[2];
4501 hv += dst.i6[3];
4502
4503 tcp = NULL;
4504
4505 switch (oip6->ip6_nxt)
4506 {
4507 case IPPROTO_TCP :
4508 case IPPROTO_UDP :
4509 tcp = (tcphdr_t *)(oip6 + 1);
4510 dport = tcp->th_dport;
4511 sport = tcp->th_sport;
4512 hv += dport;
4513 hv += sport;
4514 break;
4515
4516 case IPPROTO_ICMPV6 :
4517 oic = (struct icmp6_hdr *)(oip6 + 1);
4518 hv += oic->icmp6_id;
4519 hv += oic->icmp6_seq;
4520 break;
4521
4522 default :
4523 break;
4524 }
4525
4526 hv = DOUBLE_HASH(hv);
4527
4528 READ_ENTER(&softc->ipf_state);
4529 for (isp = &softs->ipf_state_table[hv]; ((is = *isp) != NULL); ) {
4530 isp = &is->is_hnext;
4531 /*
4532 * Only allow this icmp though if the
4533 * encapsulated packet was allowed through the
4534 * other way around. Note that the minimal amount
4535 * of info present does not allow for checking against
4536 * tcp internals such as seq and ack numbers.
4537 */
4538 if ((is->is_p != pr) || (is->is_v != 6) ||
4539 (is->is_pass & FR_NOICMPERR))
4540 continue;
4541 is = ipf_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP);
4542 if ((is != NULL) && (ipf_allowstateicmp(fin, is, &src) == 0))
4543 return is;
4544 }
4545 RWLOCK_EXIT(&softc->ipf_state);
4546 SBUMPD(ipf_state_stats, iss_icmp_miss);
4547 return NULL;
4548 }
4549 #endif
4550
4551
4552 /* ------------------------------------------------------------------------ */
4553 /* Function: ipf_sttab_init */
4554 /* Returns: Nil */
4555 /* Parameters: softc(I) - pointer to soft context main structure */
4556 /* tqp(I) - pointer to an array of timeout queues for TCP */
4557 /* */
4558 /* Initialise the array of timeout queues for TCP. */
4559 /* ------------------------------------------------------------------------ */
4560 void
ipf_sttab_init(ipf_main_softc_t * softc,ipftq_t * tqp)4561 ipf_sttab_init(ipf_main_softc_t *softc, ipftq_t *tqp)
4562 {
4563 int i;
4564
4565 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) {
4566 IPFTQ_INIT(&tqp[i], 0, "ipftq tcp tab");
4567 tqp[i].ifq_next = tqp + i + 1;
4568 }
4569 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL;
4570 tqp[IPF_TCPS_CLOSED].ifq_ttl = softc->ipf_tcpclosed;
4571 tqp[IPF_TCPS_LISTEN].ifq_ttl = softc->ipf_tcptimeout;
4572 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = softc->ipf_tcpsynsent;
4573 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = softc->ipf_tcpsynrecv;
4574 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = softc->ipf_tcpidletimeout;
4575 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = softc->ipf_tcphalfclosed;
4576 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = softc->ipf_tcphalfclosed;
4577 tqp[IPF_TCPS_CLOSING].ifq_ttl = softc->ipf_tcptimeout;
4578 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = softc->ipf_tcplastack;
4579 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = softc->ipf_tcpclosewait;
4580 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = softc->ipf_tcptimewait;
4581 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = softc->ipf_tcptimeout;
4582 }
4583
4584
4585 /* ------------------------------------------------------------------------ */
4586 /* Function: ipf_sttab_destroy */
4587 /* Returns: Nil */
4588 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */
4589 /* */
4590 /* Do whatever is necessary to "destroy" each of the entries in the array */
4591 /* of timeout queues for TCP. */
4592 /* ------------------------------------------------------------------------ */
4593 void
ipf_sttab_destroy(ipftq_t * tqp)4594 ipf_sttab_destroy(ipftq_t *tqp)
4595 {
4596 int i;
4597
4598 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--)
4599 MUTEX_DESTROY(&tqp[i].ifq_lock);
4600 }
4601
4602
4603 /* ------------------------------------------------------------------------ */
4604 /* Function: ipf_state_deref */
4605 /* Returns: Nil */
4606 /* Parameters: softc(I) - pointer to soft context main structure */
4607 /* isp(I) - pointer to pointer to state table entry */
4608 /* */
4609 /* Decrement the reference counter for this state table entry and free it */
4610 /* if there are no more things using it. */
4611 /* */
4612 /* This function is only called when cleaning up after increasing is_ref by */
4613 /* one earlier in the 'code path' so if is_ref is 1 when entering, we do */
4614 /* have an orphan, otherwise not. However there is a possible race between */
4615 /* the entry being deleted via flushing with an ioctl call (that calls the */
4616 /* delete function directly) and the tail end of packet processing so we */
4617 /* need to grab is_lock before doing the check to synchronise the two code */
4618 /* paths. */
4619 /* */
4620 /* When operating in userland (ipftest), we have no timers to clear a state */
4621 /* entry. Therefore, we make a few simple tests before deleting an entry */
4622 /* outright. We compare states on each side looking for a combination of */
4623 /* TIME_WAIT (should really be FIN_WAIT_2?) and LAST_ACK. Then we factor */
4624 /* in packet direction with the interface list to make sure we don't */
4625 /* prematurely delete an entry on a final inbound packet that's we're also */
4626 /* supposed to route elsewhere. */
4627 /* */
4628 /* Internal parameters: */
4629 /* state[0] = state of source (host that initiated connection) */
4630 /* state[1] = state of dest (host that accepted the connection) */
4631 /* */
4632 /* dir == 0 : a packet from source to dest */
4633 /* dir == 1 : a packet from dest to source */
4634 /* ------------------------------------------------------------------------ */
4635 void
ipf_state_deref(ipf_main_softc_t * softc,ipstate_t ** isp)4636 ipf_state_deref(ipf_main_softc_t *softc, ipstate_t **isp)
4637 {
4638 ipstate_t *is = *isp;
4639
4640 is = *isp;
4641 *isp = NULL;
4642
4643 MUTEX_ENTER(&is->is_lock);
4644 if (is->is_ref > 1) {
4645 is->is_ref--;
4646 MUTEX_EXIT(&is->is_lock);
4647 #ifndef _KERNEL
4648 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) ||
4649 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) {
4650 ipf_state_del(softc, is, ISL_EXPIRE);
4651 }
4652 #endif
4653 return;
4654 }
4655 MUTEX_EXIT(&is->is_lock);
4656
4657 WRITE_ENTER(&softc->ipf_state);
4658 ipf_state_del(softc, is, ISL_ORPHAN);
4659 RWLOCK_EXIT(&softc->ipf_state);
4660 }
4661
4662
4663 /* ------------------------------------------------------------------------ */
4664 /* Function: ipf_state_setqueue */
4665 /* Returns: Nil */
4666 /* Parameters: softc(I) - pointer to soft context main structure */
4667 /* is(I) - pointer to state structure */
4668 /* rev(I) - forward(0) or reverse(1) direction */
4669 /* Locks: ipf_state (read or write) */
4670 /* */
4671 /* Put the state entry on its default queue entry, using rev as a helped in */
4672 /* determining which queue it should be placed on. */
4673 /* ------------------------------------------------------------------------ */
4674 void
ipf_state_setqueue(ipf_main_softc_t * softc,ipstate_t * is,int rev)4675 ipf_state_setqueue(ipf_main_softc_t *softc, ipstate_t *is, int rev)
4676 {
4677 ipf_state_softc_t *softs = softc->ipf_state_soft;
4678 ipftq_t *oifq, *nifq;
4679
4680 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0)
4681 nifq = is->is_tqehead[rev];
4682 else
4683 nifq = NULL;
4684
4685 if (nifq == NULL) {
4686 switch (is->is_p)
4687 {
4688 #ifdef USE_INET6
4689 case IPPROTO_ICMPV6 :
4690 if (rev == 1)
4691 nifq = &softs->ipf_state_icmpacktq;
4692 else
4693 nifq = &softs->ipf_state_icmptq;
4694 break;
4695 #endif
4696 case IPPROTO_ICMP :
4697 if (rev == 1)
4698 nifq = &softs->ipf_state_icmpacktq;
4699 else
4700 nifq = &softs->ipf_state_icmptq;
4701 break;
4702 case IPPROTO_TCP :
4703 nifq = softs->ipf_state_tcptq + is->is_state[rev];
4704 break;
4705
4706 case IPPROTO_UDP :
4707 if (rev == 1)
4708 nifq = &softs->ipf_state_udpacktq;
4709 else
4710 nifq = &softs->ipf_state_udptq;
4711 break;
4712
4713 default :
4714 nifq = &softs->ipf_state_iptq;
4715 break;
4716 }
4717 }
4718
4719 oifq = is->is_sti.tqe_ifq;
4720 /*
4721 * If it's currently on a timeout queue, move it from one queue to
4722 * another, else put it on the end of the newly determined queue.
4723 */
4724 if (oifq != NULL)
4725 ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq, nifq);
4726 else
4727 ipf_queueappend(softc->ipf_ticks, &is->is_sti, nifq, is);
4728 return;
4729 }
4730
4731
4732 /* ------------------------------------------------------------------------ */
4733 /* Function: ipf_state_iter */
4734 /* Returns: int - 0 == success, else error */
4735 /* Parameters: softc(I) - pointer to main soft context */
4736 /* token(I) - pointer to ipftoken structure */
4737 /* itp(I) - pointer to ipfgeniter structure */
4738 /* obj(I) - pointer to data description structure */
4739 /* */
4740 /* This function handles the SIOCGENITER ioctl for the state tables and */
4741 /* walks through the list of entries in the state table list (softs->ipf_state_list.) */
4742 /* ------------------------------------------------------------------------ */
4743 static int
ipf_state_iter(ipf_main_softc_t * softc,ipftoken_t * token,ipfgeniter_t * itp,ipfobj_t * obj)4744 ipf_state_iter(ipf_main_softc_t *softc, ipftoken_t *token, ipfgeniter_t *itp,
4745 ipfobj_t *obj)
4746 {
4747 ipf_state_softc_t *softs = softc->ipf_state_soft;
4748 ipstate_t *is, *next, zero;
4749 int error;
4750
4751 if (itp->igi_data == NULL) {
4752 IPFERROR(100026);
4753 return EFAULT;
4754 }
4755
4756 if (itp->igi_nitems < 1) {
4757 IPFERROR(100027);
4758 return ENOSPC;
4759 }
4760
4761 if (itp->igi_type != IPFGENITER_STATE) {
4762 IPFERROR(100028);
4763 return EINVAL;
4764 }
4765
4766 is = token->ipt_data;
4767 if (is == (void *)-1) {
4768 IPFERROR(100029);
4769 return ESRCH;
4770 }
4771
4772 error = 0;
4773 obj->ipfo_type = IPFOBJ_IPSTATE;
4774 obj->ipfo_size = sizeof(ipstate_t);
4775
4776 READ_ENTER(&softc->ipf_state);
4777
4778 is = token->ipt_data;
4779 if (is == NULL) {
4780 next = softs->ipf_state_list;
4781 } else {
4782 next = is->is_next;
4783 }
4784
4785 /*
4786 * If we find a state entry to use, bump its reference count so that
4787 * it can be used for is_next when we come back.
4788 */
4789 if (next != NULL) {
4790 MUTEX_ENTER(&next->is_lock);
4791 next->is_ref++;
4792 MUTEX_EXIT(&next->is_lock);
4793 token->ipt_data = next;
4794 } else {
4795 bzero(&zero, sizeof(zero));
4796 next = &zero;
4797 token->ipt_data = NULL;
4798 }
4799 if (next->is_next == NULL)
4800 ipf_token_mark_complete(token);
4801
4802 RWLOCK_EXIT(&softc->ipf_state);
4803
4804 obj->ipfo_ptr = itp->igi_data;
4805 error = ipf_outobjk(softc, obj, next);
4806 if (is != NULL)
4807 ipf_state_deref(softc, &is);
4808
4809 return error;
4810 }
4811
4812
4813 /* ------------------------------------------------------------------------ */
4814 /* Function: ipf_state_gettable */
4815 /* Returns: int - 0 = success, else error */
4816 /* Parameters: softc(I) - pointer to main soft context */
4817 /* softs(I) - pointer to state context structure */
4818 /* data(I) - pointer to ioctl data */
4819 /* */
4820 /* This function handles ioctl requests for tables of state information. */
4821 /* At present the only table it deals with is the hash bucket statistics. */
4822 /* ------------------------------------------------------------------------ */
4823 static int
ipf_state_gettable(ipf_main_softc_t * softc,ipf_state_softc_t * softs,char * data)4824 ipf_state_gettable(ipf_main_softc_t *softc, ipf_state_softc_t *softs,
4825 char *data)
4826 {
4827 ipftable_t table;
4828 int error;
4829
4830 error = ipf_inobj(softc, data, NULL, &table, IPFOBJ_GTABLE);
4831 if (error != 0)
4832 return error;
4833
4834 if (table.ita_type != IPFTABLE_BUCKETS) {
4835 IPFERROR(100031);
4836 return EINVAL;
4837 }
4838
4839 error = COPYOUT(softs->ipf_state_stats.iss_bucketlen, table.ita_table,
4840 softs->ipf_state_size * sizeof(u_int));
4841 if (error != 0) {
4842 IPFERROR(100032);
4843 error = EFAULT;
4844 }
4845 return error;
4846 }
4847
4848
4849 /* ------------------------------------------------------------------------ */
4850 /* Function: ipf_state_setpending */
4851 /* Returns: Nil */
4852 /* Parameters: softc(I) - pointer to main soft context */
4853 /* is(I) - pointer to state structure */
4854 /* Locks: ipf_state (read or write) */
4855 /* */
4856 /* Put the state entry on to the pending queue - this queue has a very */
4857 /* short lifetime where items are put that can't be deleted straight away */
4858 /* because of locking issues but we want to delete them ASAP, anyway. */
4859 /* ------------------------------------------------------------------------ */
4860 void
ipf_state_setpending(ipf_main_softc_t * softc,ipstate_t * is)4861 ipf_state_setpending(ipf_main_softc_t *softc, ipstate_t *is)
4862 {
4863 ipf_state_softc_t *softs = softc->ipf_state_soft;
4864 ipftq_t *oifq;
4865
4866 oifq = is->is_sti.tqe_ifq;
4867 if (oifq != NULL)
4868 ipf_movequeue(softc->ipf_ticks, &is->is_sti, oifq,
4869 &softs->ipf_state_pending);
4870 else
4871 ipf_queueappend(softc->ipf_ticks, &is->is_sti,
4872 &softs->ipf_state_pending, is);
4873
4874 MUTEX_ENTER(&is->is_lock);
4875 if (is->is_me != NULL) {
4876 *is->is_me = NULL;
4877 is->is_me = NULL;
4878 is->is_ref--;
4879 }
4880 MUTEX_EXIT(&is->is_lock);
4881 }
4882
4883
4884 /* ------------------------------------------------------------------------ */
4885 /* Function: ipf_state_matchflush */
4886 /* Returns: Nil */
4887 /* Parameters: softc(I) - pointer to main soft context */
4888 /* data(I) - pointer to state structure */
4889 /* Locks: ipf_state (read or write) */
4890 /* */
4891 /* Flush all entries from the list of state entries that match the */
4892 /* properties in the array loaded. */
4893 /* ------------------------------------------------------------------------ */
4894 int
ipf_state_matchflush(ipf_main_softc_t * softc,void * data)4895 ipf_state_matchflush(ipf_main_softc_t *softc, void *data)
4896 {
4897 ipf_state_softc_t *softs = softc->ipf_state_soft;
4898 int *array, flushed, error;
4899 ipstate_t *state, *statenext;
4900 ipfobj_t obj;
4901
4902 error = ipf_matcharray_load(softc, data, &obj, &array);
4903 if (error != 0)
4904 return error;
4905
4906 flushed = 0;
4907
4908 for (state = softs->ipf_state_list; state != NULL; state = statenext) {
4909 statenext = state->is_next;
4910 if (ipf_state_matcharray(state, array, softc->ipf_ticks) == 0) {
4911 ipf_state_del(softc, state, ISL_FLUSH);
4912 flushed++;
4913 }
4914 }
4915
4916 obj.ipfo_retval = flushed;
4917 error = BCOPYOUT(&obj, data, sizeof(obj));
4918
4919 KFREES(array, array[0] * sizeof(*array));
4920
4921 return error;
4922 }
4923
4924
4925 /* ------------------------------------------------------------------------ */
4926 /* Function: ipf_state_matcharray */
4927 /* Returns: int - 0 = no match, 1 = match */
4928 /* Parameters: state(I) - pointer to state structure */
4929 /* array(I) - pointer to ipf matching expression */
4930 /* ticks(I) - current value of ipfilter tick timer */
4931 /* Locks: ipf_state (read or write) */
4932 /* */
4933 /* Compare a state entry with the match array passed in and return a value */
4934 /* to indicate whether or not the matching was successful. */
4935 /* ------------------------------------------------------------------------ */
4936 static int
ipf_state_matcharray(ipstate_t * state,int * array,u_long ticks)4937 ipf_state_matcharray(ipstate_t *state, int *array, u_long ticks)
4938 {
4939 int i, n, *x, rv, p;
4940 ipfexp_t *e;
4941
4942 rv = 0;
4943 n = array[0];
4944 x = array + 1;
4945
4946 for (; n > 0; x += 3 + x[3], rv = 0) {
4947 e = (ipfexp_t *)x;
4948 n -= e->ipfe_size;
4949 if (x[0] == IPF_EXP_END)
4950 break;
4951
4952 /*
4953 * If we need to match the protocol and that doesn't match,
4954 * don't even both with the instruction array.
4955 */
4956 p = e->ipfe_cmd >> 16;
4957 if ((p != 0) && (p != state->is_p))
4958 break;
4959
4960 switch (e->ipfe_cmd)
4961 {
4962 case IPF_EXP_IP_PR :
4963 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4964 rv |= (state->is_p == e->ipfe_arg0[i]);
4965 }
4966 break;
4967
4968 case IPF_EXP_IP_SRCADDR :
4969 if (state->is_v != 4)
4970 break;
4971 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4972 rv |= ((state->is_saddr &
4973 e->ipfe_arg0[i * 2 + 1]) ==
4974 e->ipfe_arg0[i * 2]);
4975 }
4976 break;
4977
4978 case IPF_EXP_IP_DSTADDR :
4979 if (state->is_v != 4)
4980 break;
4981 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4982 rv |= ((state->is_daddr &
4983 e->ipfe_arg0[i * 2 + 1]) ==
4984 e->ipfe_arg0[i * 2]);
4985 }
4986 break;
4987
4988 case IPF_EXP_IP_ADDR :
4989 if (state->is_v != 4)
4990 break;
4991 for (i = 0; !rv && i < e->ipfe_narg; i++) {
4992 rv |= ((state->is_saddr &
4993 e->ipfe_arg0[i * 2 + 1]) ==
4994 e->ipfe_arg0[i * 2]) ||
4995 ((state->is_daddr &
4996 e->ipfe_arg0[i * 2 + 1]) ==
4997 e->ipfe_arg0[i * 2]);
4998 }
4999 break;
5000
5001 #ifdef USE_INET6
5002 case IPF_EXP_IP6_SRCADDR :
5003 if (state->is_v != 6)
5004 break;
5005 for (i = 0; !rv && i < x[3]; i++) {
5006 rv |= IP6_MASKEQ(&state->is_src.in6,
5007 &e->ipfe_arg0[i * 8 + 4],
5008 &e->ipfe_arg0[i * 8]);
5009 }
5010 break;
5011
5012 case IPF_EXP_IP6_DSTADDR :
5013 if (state->is_v != 6)
5014 break;
5015 for (i = 0; !rv && i < x[3]; i++) {
5016 rv |= IP6_MASKEQ(&state->is_dst.in6,
5017 &e->ipfe_arg0[i * 8 + 4],
5018 &e->ipfe_arg0[i * 8]);
5019 }
5020 break;
5021
5022 case IPF_EXP_IP6_ADDR :
5023 if (state->is_v != 6)
5024 break;
5025 for (i = 0; !rv && i < x[3]; i++) {
5026 rv |= IP6_MASKEQ(&state->is_src.in6,
5027 &e->ipfe_arg0[i * 8 + 4],
5028 &e->ipfe_arg0[i * 8]) ||
5029 IP6_MASKEQ(&state->is_dst.in6,
5030 &e->ipfe_arg0[i * 8 + 4],
5031 &e->ipfe_arg0[i * 8]);
5032 }
5033 break;
5034 #endif
5035
5036 case IPF_EXP_UDP_PORT :
5037 case IPF_EXP_TCP_PORT :
5038 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5039 rv |= (state->is_sport == e->ipfe_arg0[i]) ||
5040 (state->is_dport == e->ipfe_arg0[i]);
5041 }
5042 break;
5043
5044 case IPF_EXP_UDP_SPORT :
5045 case IPF_EXP_TCP_SPORT :
5046 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5047 rv |= (state->is_sport == e->ipfe_arg0[i]);
5048 }
5049 break;
5050
5051 case IPF_EXP_UDP_DPORT :
5052 case IPF_EXP_TCP_DPORT :
5053 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5054 rv |= (state->is_dport == e->ipfe_arg0[i]);
5055 }
5056 break;
5057
5058 case IPF_EXP_TCP_STATE :
5059 for (i = 0; !rv && i < e->ipfe_narg; i++) {
5060 rv |= (state->is_state[0] == e->ipfe_arg0[i]) ||
5061 (state->is_state[1] == e->ipfe_arg0[i]);
5062 }
5063 break;
5064
5065 case IPF_EXP_IDLE_GT :
5066 rv |= (ticks - state->is_touched > e->ipfe_arg0[0]);
5067 break;
5068 }
5069
5070 /*
5071 * Factor in doing a negative match.
5072 */
5073 rv ^= e->ipfe_not;
5074
5075 if (rv == 0)
5076 break;
5077 }
5078
5079 return rv;
5080 }
5081
5082
5083 /* ------------------------------------------------------------------------ */
5084 /* Function: ipf_state_settimeout */
5085 /* Returns: int 0 = success, else failure */
5086 /* Parameters: softc(I) - pointer to main soft context */
5087 /* t(I) - pointer to tuneable being changed */
5088 /* p(I) - pointer to the new value */
5089 /* */
5090 /* Sets a timeout value for one of the many timeout queues. We find the */
5091 /* correct queue using a somewhat manual process of comparing the timeout */
5092 /* names for each specific value available and calling ipf_apply_timeout on */
5093 /* that queue so that all of the items on it are updated accordingly. */
5094 /* ------------------------------------------------------------------------ */
5095 int
ipf_state_settimeout(struct ipf_main_softc_s * softc,ipftuneable_t * t,ipftuneval_t * p)5096 ipf_state_settimeout(struct ipf_main_softc_s *softc, ipftuneable_t *t,
5097 ipftuneval_t *p)
5098 {
5099 ipf_state_softc_t *softs = softc->ipf_state_soft;
5100
5101 /*
5102 * In case there is nothing to do...
5103 */
5104 if (*t->ipft_pint == p->ipftu_int)
5105 return 0;
5106
5107 if (!strncmp(t->ipft_name, "tcp_", 4))
5108 return ipf_settimeout_tcp(t, p, softs->ipf_state_tcptq);
5109
5110 if (!strcmp(t->ipft_name, "udp_timeout")) {
5111 ipf_apply_timeout(&softs->ipf_state_udptq, p->ipftu_int);
5112 } else if (!strcmp(t->ipft_name, "udp_ack_timeout")) {
5113 ipf_apply_timeout(&softs->ipf_state_udpacktq, p->ipftu_int);
5114 } else if (!strcmp(t->ipft_name, "icmp_timeout")) {
5115 ipf_apply_timeout(&softs->ipf_state_icmptq, p->ipftu_int);
5116 } else if (!strcmp(t->ipft_name, "icmp_ack_timeout")) {
5117 ipf_apply_timeout(&softs->ipf_state_icmpacktq, p->ipftu_int);
5118 } else if (!strcmp(t->ipft_name, "ip_timeout")) {
5119 ipf_apply_timeout(&softs->ipf_state_iptq, p->ipftu_int);
5120 } else {
5121 IPFERROR(100034);
5122 return ESRCH;
5123 }
5124
5125 /*
5126 * Update the tuneable being set.
5127 */
5128 *t->ipft_pint = p->ipftu_int;
5129
5130 return 0;
5131 }
5132
5133
5134 /* ------------------------------------------------------------------------ */
5135 /* Function: ipf_state_rehash */
5136 /* Returns: int 0 = success, else failure */
5137 /* Parameters: softc(I) - pointer to main soft context */
5138 /* t(I) - pointer to tuneable being changed */
5139 /* p(I) - pointer to the new value */
5140 /* */
5141 /* To change the size of the state hash table at runtime, a new table has */
5142 /* to be allocated and then all of the existing entries put in it, bumping */
5143 /* up the bucketlength for it as we go along. */
5144 /* ------------------------------------------------------------------------ */
5145 int
ipf_state_rehash(ipf_main_softc_t * softc,ipftuneable_t * t,ipftuneval_t * p)5146 ipf_state_rehash(ipf_main_softc_t *softc, ipftuneable_t *t, ipftuneval_t *p)
5147 {
5148 ipf_state_softc_t *softs = softc->ipf_state_soft;
5149 ipstate_t **newtab, *is;
5150 u_long *newseed;
5151 u_int *bucketlens;
5152 u_int maxbucket;
5153 u_int newsize;
5154 u_int hv;
5155 int i;
5156
5157 newsize = p->ipftu_int;
5158 /*
5159 * In case there is nothing to do...
5160 */
5161 if (newsize == softs->ipf_state_size)
5162 return 0;
5163
5164 KMALLOCS(newtab, ipstate_t **, newsize * sizeof(ipstate_t *));
5165 if (newtab == NULL) {
5166 IPFERROR(100035);
5167 return ENOMEM;
5168 }
5169
5170 KMALLOCS(bucketlens, u_int *, newsize * sizeof(u_int));
5171 if (bucketlens == NULL) {
5172 KFREES(newtab, newsize * sizeof(*softs->ipf_state_table));
5173 IPFERROR(100036);
5174 return ENOMEM;
5175 }
5176
5177 newseed = ipf_state_seed_alloc(newsize, softs->ipf_state_max);
5178 if (newseed == NULL) {
5179 KFREES(bucketlens, newsize * sizeof(*bucketlens));
5180 KFREES(newtab, newsize * sizeof(*newtab));
5181 IPFERROR(100037);
5182 return ENOMEM;
5183 }
5184
5185 for (maxbucket = 0, i = newsize; i > 0; i >>= 1)
5186 maxbucket++;
5187 maxbucket *= 2;
5188
5189 bzero((char *)newtab, newsize * sizeof(ipstate_t *));
5190 bzero((char *)bucketlens, newsize * sizeof(u_int));
5191
5192 WRITE_ENTER(&softc->ipf_state);
5193
5194 if (softs->ipf_state_table != NULL) {
5195 KFREES(softs->ipf_state_table,
5196 softs->ipf_state_size * sizeof(*softs->ipf_state_table));
5197 }
5198 softs->ipf_state_table = newtab;
5199
5200 if (softs->ipf_state_seed != NULL) {
5201 KFREES(softs->ipf_state_seed,
5202 softs->ipf_state_size * sizeof(*softs->ipf_state_seed));
5203 }
5204 softs->ipf_state_seed = newseed;
5205
5206 if (softs->ipf_state_stats.iss_bucketlen != NULL) {
5207 KFREES(softs->ipf_state_stats.iss_bucketlen,
5208 softs->ipf_state_size * sizeof(u_int));
5209 }
5210 softs->ipf_state_stats.iss_bucketlen = bucketlens;
5211 softs->ipf_state_maxbucket = maxbucket;
5212 softs->ipf_state_size = newsize;
5213
5214 /*
5215 * Walk through the entire list of state table entries and put them
5216 * in the new state table, somewhere. Because we have a new table,
5217 * we need to restart the counter of how many chains are in use.
5218 */
5219 softs->ipf_state_stats.iss_inuse = 0;
5220 for (is = softs->ipf_state_list; is != NULL; is = is->is_next) {
5221 is->is_hnext = NULL;
5222 is->is_phnext = NULL;
5223 hv = is->is_hv % softs->ipf_state_size;
5224
5225 if (softs->ipf_state_table[hv] != NULL)
5226 softs->ipf_state_table[hv]->is_phnext = &is->is_hnext;
5227 else
5228 softs->ipf_state_stats.iss_inuse++;
5229 is->is_phnext = softs->ipf_state_table + hv;
5230 is->is_hnext = softs->ipf_state_table[hv];
5231 softs->ipf_state_table[hv] = is;
5232 softs->ipf_state_stats.iss_bucketlen[hv]++;
5233 }
5234 RWLOCK_EXIT(&softc->ipf_state);
5235
5236 return 0;
5237 }
5238
5239
5240 /* ------------------------------------------------------------------------ */
5241 /* Function: ipf_state_add_tq */
5242 /* Returns: ipftq_t * - NULL = failure, else pointer to new timeout */
5243 /* queue */
5244 /* Parameters: softc(I) - pointer to main soft context */
5245 /* ttl(I) - pointer to the ttl for the new queue */
5246 /* */
5247 /* Request a pointer to a timeout queue that has a ttl as given by the */
5248 /* value being passed in. The timeout queue is added tot the list of those */
5249 /* used internally for stateful filtering. */
5250 /* ------------------------------------------------------------------------ */
5251 ipftq_t *
ipf_state_add_tq(ipf_main_softc_t * softc,int ttl)5252 ipf_state_add_tq(ipf_main_softc_t *softc, int ttl)
5253 {
5254 ipf_state_softc_t *softs = softc->ipf_state_soft;
5255
5256 return ipf_addtimeoutqueue(softc, &softs->ipf_state_usertq, ttl);
5257 }
5258
5259
5260 #ifndef _KERNEL
5261 /*
5262 * Display the built up state table rules and mapping entries.
5263 */
5264 void
ipf_state_dump(ipf_main_softc_t * softc,void * arg)5265 ipf_state_dump(ipf_main_softc_t *softc, void *arg)
5266 {
5267 ipf_state_softc_t *softs = arg;
5268 ipstate_t *ips;
5269
5270 printf("List of active state sessions:\n");
5271 for (ips = softs->ipf_state_list; ips != NULL; )
5272 ips = printstate(ips, opts & (OPT_DEBUG|OPT_VERBOSE),
5273 softc->ipf_ticks);
5274 }
5275 #endif
5276