1 /* 2 * Copyright (C) 1995-2003 by Darren Reed. 3 * 4 * See the IPFILTER.LICENCE file for details on licencing. 5 * 6 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 7 * 8 * Copyright (c) 2014, Joyent, Inc. All rights reserved. 9 */ 10 11 #if defined(KERNEL) || defined(_KERNEL) 12 # undef KERNEL 13 # undef _KERNEL 14 # define KERNEL 1 15 # define _KERNEL 1 16 #endif 17 #include <sys/errno.h> 18 #include <sys/types.h> 19 #include <sys/param.h> 20 #include <sys/file.h> 21 #if defined(__NetBSD__) && (NetBSD >= 199905) && !defined(IPFILTER_LKM) && \ 22 defined(_KERNEL) 23 # include "opt_ipfilter_log.h" 24 #endif 25 #if defined(_KERNEL) && defined(__FreeBSD_version) && \ 26 (__FreeBSD_version >= 400000) && !defined(KLD_MODULE) 27 #include "opt_inet6.h" 28 #endif 29 #if !defined(_KERNEL) && !defined(__KERNEL__) 30 # include <stdio.h> 31 # include <stdlib.h> 32 # include <string.h> 33 # define _KERNEL 34 # ifdef __OpenBSD__ 35 struct file; 36 # endif 37 # include <sys/uio.h> 38 # undef _KERNEL 39 #endif 40 #if defined(_KERNEL) && (__FreeBSD_version >= 220000) 41 # include <sys/filio.h> 42 # include <sys/fcntl.h> 43 # if (__FreeBSD_version >= 300000) && !defined(IPFILTER_LKM) 44 # include "opt_ipfilter.h" 45 # endif 46 #else 47 # include <sys/ioctl.h> 48 #endif 49 #include <sys/time.h> 50 #if !defined(linux) 51 # include <sys/protosw.h> 52 #endif 53 #include <sys/socket.h> 54 #if defined(_KERNEL) 55 # include <sys/systm.h> 56 # if !defined(__SVR4) && !defined(__svr4__) 57 # include <sys/mbuf.h> 58 # endif 59 #endif 60 #if defined(__SVR4) || defined(__svr4__) 61 # include <sys/filio.h> 62 # include <sys/byteorder.h> 63 # ifdef _KERNEL 64 # include <sys/dditypes.h> 65 # endif 66 # include <sys/stream.h> 67 # include <sys/kmem.h> 68 #endif 69 70 #include <net/if.h> 71 #ifdef sun 72 # include <net/af.h> 73 #endif 74 #include <net/route.h> 75 #include <netinet/in.h> 76 #include <netinet/in_systm.h> 77 #include <netinet/ip.h> 78 #include <netinet/tcp.h> 79 #if !defined(linux) 80 # include <netinet/ip_var.h> 81 #endif 82 #if !defined(__hpux) && !defined(linux) 83 # include <netinet/tcp_fsm.h> 84 #endif 85 #include <netinet/udp.h> 86 #include <netinet/ip_icmp.h> 87 #include "netinet/ip_compat.h" 88 #include <netinet/tcpip.h> 89 #include "netinet/ip_fil.h" 90 #include "netinet/ip_nat.h" 91 #include "netinet/ip_frag.h" 92 #include "netinet/ip_state.h" 93 #include "netinet/ip_proxy.h" 94 #include "netinet/ipf_stack.h" 95 #ifdef IPFILTER_SYNC 96 #include "netinet/ip_sync.h" 97 #endif 98 #ifdef IPFILTER_SCAN 99 #include "netinet/ip_scan.h" 100 #endif 101 #ifdef USE_INET6 102 #include <netinet/icmp6.h> 103 #endif 104 #if (__FreeBSD_version >= 300000) 105 # include <sys/malloc.h> 106 # if defined(_KERNEL) && !defined(IPFILTER_LKM) 107 # include <sys/libkern.h> 108 # include <sys/systm.h> 109 # endif 110 #endif 111 /* END OF INCLUDES */ 112 113 114 #if !defined(lint) 115 static const char sccsid[] = "@(#)ip_state.c 1.8 6/5/96 (C) 1993-2000 Darren Reed"; 116 static const char rcsid[] = "@(#)$Id: ip_state.c,v 2.186.2.36 2005/08/11 19:58:03 darrenr Exp $"; 117 #endif 118 119 #ifdef USE_INET6 120 static ipstate_t *fr_checkicmp6matchingstate __P((fr_info_t *)); 121 #endif 122 static ipstate_t *fr_matchsrcdst __P((fr_info_t *, ipstate_t *, i6addr_t *, 123 i6addr_t *, tcphdr_t *, u_32_t)); 124 static ipstate_t *fr_checkicmpmatchingstate __P((fr_info_t *)); 125 static int fr_state_flush __P((int, int, ipf_stack_t *)); 126 static ips_stat_t *fr_statetstats __P((ipf_stack_t *)); 127 static int fr_state_remove __P((caddr_t, ipf_stack_t *)); 128 static void fr_ipsmove __P((ipstate_t *, u_int, ipf_stack_t *)); 129 static int fr_tcpstate __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 130 static int fr_tcpoptions __P((fr_info_t *, tcphdr_t *, tcpdata_t *)); 131 static ipstate_t *fr_stclone __P((fr_info_t *, tcphdr_t *, ipstate_t *)); 132 static void fr_fixinisn __P((fr_info_t *, ipstate_t *)); 133 static void fr_fixoutisn __P((fr_info_t *, ipstate_t *)); 134 static void fr_checknewisn __P((fr_info_t *, ipstate_t *)); 135 static int fr_stateiter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *)); 136 137 int fr_stputent __P((caddr_t, ipf_stack_t *)); 138 int fr_stgetent __P((caddr_t, ipf_stack_t *)); 139 140 #define ONE_DAY IPF_TTLVAL(1 * 86400) /* 1 day */ 141 #define FIVE_DAYS (5 * ONE_DAY) 142 #define DOUBLE_HASH(x, ifs) \ 143 (((x) + ifs->ifs_ips_seed[(x) % ifs->ifs_fr_statesize]) % ifs->ifs_fr_statesize) 144 145 146 /* ------------------------------------------------------------------------ */ 147 /* Function: fr_stateinit */ 148 /* Returns: int - 0 == success, -1 == failure */ 149 /* Parameters: ifs - ipf stack instance */ 150 /* */ 151 /* Initialise all the global variables used within the state code. */ 152 /* This action also includes initiailising locks. */ 153 /* ------------------------------------------------------------------------ */ 154 int fr_stateinit(ifs) 155 ipf_stack_t *ifs; 156 { 157 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 158 struct timeval tv; 159 #endif 160 int i; 161 162 KMALLOCS(ifs->ifs_ips_table, ipstate_t **, 163 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 164 if (ifs->ifs_ips_table == NULL) 165 return -1; 166 bzero((char *)ifs->ifs_ips_table, 167 ifs->ifs_fr_statesize * sizeof(ipstate_t *)); 168 169 KMALLOCS(ifs->ifs_ips_seed, u_long *, 170 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 171 if (ifs->ifs_ips_seed == NULL) 172 return -2; 173 #if defined(NEED_LOCAL_RAND) || !defined(_KERNEL) 174 tv.tv_sec = 0; 175 GETKTIME(&tv); 176 #endif 177 for (i = 0; i < ifs->ifs_fr_statesize; i++) { 178 /* 179 * XXX - ips_seed[X] should be a random number of sorts. 180 */ 181 #if !defined(NEED_LOCAL_RAND) && defined(_KERNEL) 182 ifs->ifs_ips_seed[i] = ipf_random(); 183 #else 184 ifs->ifs_ips_seed[i] = ((u_long)ifs->ifs_ips_seed + i) * 185 ifs->ifs_fr_statesize; 186 ifs->ifs_ips_seed[i] += tv.tv_sec; 187 ifs->ifs_ips_seed[i] *= (u_long)ifs->ifs_ips_seed; 188 ifs->ifs_ips_seed[i] ^= 0x5a5aa5a5; 189 ifs->ifs_ips_seed[i] *= ifs->ifs_fr_statemax; 190 #endif 191 } 192 193 /* fill icmp reply type table */ 194 for (i = 0; i <= ICMP_MAXTYPE; i++) 195 icmpreplytype4[i] = -1; 196 icmpreplytype4[ICMP_ECHO] = ICMP_ECHOREPLY; 197 icmpreplytype4[ICMP_TSTAMP] = ICMP_TSTAMPREPLY; 198 icmpreplytype4[ICMP_IREQ] = ICMP_IREQREPLY; 199 icmpreplytype4[ICMP_MASKREQ] = ICMP_MASKREPLY; 200 #ifdef USE_INET6 201 /* fill icmp reply type table */ 202 for (i = 0; i <= ICMP6_MAXTYPE; i++) 203 icmpreplytype6[i] = -1; 204 icmpreplytype6[ICMP6_ECHO_REQUEST] = ICMP6_ECHO_REPLY; 205 icmpreplytype6[ICMP6_MEMBERSHIP_QUERY] = ICMP6_MEMBERSHIP_REPORT; 206 icmpreplytype6[ICMP6_NI_QUERY] = ICMP6_NI_REPLY; 207 icmpreplytype6[ND_ROUTER_SOLICIT] = ND_ROUTER_ADVERT; 208 icmpreplytype6[ND_NEIGHBOR_SOLICIT] = ND_NEIGHBOR_ADVERT; 209 #endif 210 211 KMALLOCS(ifs->ifs_ips_stats.iss_bucketlen, u_long *, 212 ifs->ifs_fr_statesize * sizeof(u_long)); 213 if (ifs->ifs_ips_stats.iss_bucketlen == NULL) 214 return -1; 215 bzero((char *)ifs->ifs_ips_stats.iss_bucketlen, 216 ifs->ifs_fr_statesize * sizeof(u_long)); 217 218 if (ifs->ifs_fr_state_maxbucket == 0) { 219 for (i = ifs->ifs_fr_statesize; i > 0; i >>= 1) 220 ifs->ifs_fr_state_maxbucket++; 221 ifs->ifs_fr_state_maxbucket *= 2; 222 } 223 224 fr_sttab_init(ifs->ifs_ips_tqtqb, ifs); 225 ifs->ifs_ips_tqtqb[IPF_TCP_NSTATES - 1].ifq_next = &ifs->ifs_ips_udptq; 226 ifs->ifs_ips_udptq.ifq_ttl = (u_long)ifs->ifs_fr_udptimeout; 227 ifs->ifs_ips_udptq.ifq_ref = 1; 228 ifs->ifs_ips_udptq.ifq_head = NULL; 229 ifs->ifs_ips_udptq.ifq_tail = &ifs->ifs_ips_udptq.ifq_head; 230 MUTEX_INIT(&ifs->ifs_ips_udptq.ifq_lock, "ipftq udp tab"); 231 ifs->ifs_ips_udptq.ifq_next = &ifs->ifs_ips_udpacktq; 232 ifs->ifs_ips_udpacktq.ifq_ttl = (u_long)ifs->ifs_fr_udpacktimeout; 233 ifs->ifs_ips_udpacktq.ifq_ref = 1; 234 ifs->ifs_ips_udpacktq.ifq_head = NULL; 235 ifs->ifs_ips_udpacktq.ifq_tail = &ifs->ifs_ips_udpacktq.ifq_head; 236 MUTEX_INIT(&ifs->ifs_ips_udpacktq.ifq_lock, "ipftq udpack tab"); 237 ifs->ifs_ips_udpacktq.ifq_next = &ifs->ifs_ips_icmptq; 238 ifs->ifs_ips_icmptq.ifq_ttl = (u_long)ifs->ifs_fr_icmptimeout; 239 ifs->ifs_ips_icmptq.ifq_ref = 1; 240 ifs->ifs_ips_icmptq.ifq_head = NULL; 241 ifs->ifs_ips_icmptq.ifq_tail = &ifs->ifs_ips_icmptq.ifq_head; 242 MUTEX_INIT(&ifs->ifs_ips_icmptq.ifq_lock, "ipftq icmp tab"); 243 ifs->ifs_ips_icmptq.ifq_next = &ifs->ifs_ips_icmpacktq; 244 ifs->ifs_ips_icmpacktq.ifq_ttl = (u_long)ifs->ifs_fr_icmpacktimeout; 245 ifs->ifs_ips_icmpacktq.ifq_ref = 1; 246 ifs->ifs_ips_icmpacktq.ifq_head = NULL; 247 ifs->ifs_ips_icmpacktq.ifq_tail = &ifs->ifs_ips_icmpacktq.ifq_head; 248 MUTEX_INIT(&ifs->ifs_ips_icmpacktq.ifq_lock, "ipftq icmpack tab"); 249 ifs->ifs_ips_icmpacktq.ifq_next = &ifs->ifs_ips_iptq; 250 ifs->ifs_ips_iptq.ifq_ttl = (u_long)ifs->ifs_fr_iptimeout; 251 ifs->ifs_ips_iptq.ifq_ref = 1; 252 ifs->ifs_ips_iptq.ifq_head = NULL; 253 ifs->ifs_ips_iptq.ifq_tail = &ifs->ifs_ips_iptq.ifq_head; 254 MUTEX_INIT(&ifs->ifs_ips_iptq.ifq_lock, "ipftq ip tab"); 255 ifs->ifs_ips_iptq.ifq_next = &ifs->ifs_ips_deletetq; 256 /* entry's ttl in deletetq is just 1 tick */ 257 ifs->ifs_ips_deletetq.ifq_ttl = (u_long) 1; 258 ifs->ifs_ips_deletetq.ifq_ref = 1; 259 ifs->ifs_ips_deletetq.ifq_head = NULL; 260 ifs->ifs_ips_deletetq.ifq_tail = &ifs->ifs_ips_deletetq.ifq_head; 261 MUTEX_INIT(&ifs->ifs_ips_deletetq.ifq_lock, "state delete queue"); 262 ifs->ifs_ips_deletetq.ifq_next = NULL; 263 264 RWLOCK_INIT(&ifs->ifs_ipf_state, "ipf IP state rwlock"); 265 MUTEX_INIT(&ifs->ifs_ipf_stinsert, "ipf state insert mutex"); 266 ifs->ifs_fr_state_init = 1; 267 268 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 269 return 0; 270 } 271 272 273 /* ------------------------------------------------------------------------ */ 274 /* Function: fr_stateunload */ 275 /* Returns: Nil */ 276 /* Parameters: ifs - ipf stack instance */ 277 /* */ 278 /* Release and destroy any resources acquired or initialised so that */ 279 /* IPFilter can be unloaded or re-initialised. */ 280 /* ------------------------------------------------------------------------ */ 281 void fr_stateunload(ifs) 282 ipf_stack_t *ifs; 283 { 284 ipftq_t *ifq, *ifqnext; 285 ipstate_t *is; 286 287 while ((is = ifs->ifs_ips_list) != NULL) 288 (void) fr_delstate(is, 0, ifs); 289 290 /* 291 * Proxy timeout queues are not cleaned here because although they 292 * exist on the state list, appr_unload is called after fr_stateunload 293 * and the proxies actually are responsible for them being created. 294 * Should the proxy timeouts have their own list? There's no real 295 * justification as this is the only complicationA 296 */ 297 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 298 ifqnext = ifq->ifq_next; 299 if (((ifq->ifq_flags & IFQF_PROXY) == 0) && 300 (fr_deletetimeoutqueue(ifq) == 0)) 301 fr_freetimeoutqueue(ifq, ifs); 302 } 303 304 ifs->ifs_ips_stats.iss_inuse = 0; 305 ifs->ifs_ips_num = 0; 306 307 if (ifs->ifs_fr_state_init == 1) { 308 fr_sttab_destroy(ifs->ifs_ips_tqtqb); 309 MUTEX_DESTROY(&ifs->ifs_ips_udptq.ifq_lock); 310 MUTEX_DESTROY(&ifs->ifs_ips_icmptq.ifq_lock); 311 MUTEX_DESTROY(&ifs->ifs_ips_udpacktq.ifq_lock); 312 MUTEX_DESTROY(&ifs->ifs_ips_icmpacktq.ifq_lock); 313 MUTEX_DESTROY(&ifs->ifs_ips_iptq.ifq_lock); 314 MUTEX_DESTROY(&ifs->ifs_ips_deletetq.ifq_lock); 315 } 316 317 if (ifs->ifs_ips_table != NULL) { 318 KFREES(ifs->ifs_ips_table, 319 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_table)); 320 ifs->ifs_ips_table = NULL; 321 } 322 323 if (ifs->ifs_ips_seed != NULL) { 324 KFREES(ifs->ifs_ips_seed, 325 ifs->ifs_fr_statesize * sizeof(*ifs->ifs_ips_seed)); 326 ifs->ifs_ips_seed = NULL; 327 } 328 329 if (ifs->ifs_ips_stats.iss_bucketlen != NULL) { 330 KFREES(ifs->ifs_ips_stats.iss_bucketlen, 331 ifs->ifs_fr_statesize * sizeof(u_long)); 332 ifs->ifs_ips_stats.iss_bucketlen = NULL; 333 } 334 335 if (ifs->ifs_fr_state_maxbucket_reset == 1) 336 ifs->ifs_fr_state_maxbucket = 0; 337 338 if (ifs->ifs_fr_state_init == 1) { 339 ifs->ifs_fr_state_init = 0; 340 RW_DESTROY(&ifs->ifs_ipf_state); 341 MUTEX_DESTROY(&ifs->ifs_ipf_stinsert); 342 } 343 } 344 345 346 /* ------------------------------------------------------------------------ */ 347 /* Function: fr_statetstats */ 348 /* Returns: ips_state_t* - pointer to state stats structure */ 349 /* Parameters: Nil */ 350 /* */ 351 /* Put all the current numbers and pointers into a single struct and return */ 352 /* a pointer to it. */ 353 /* ------------------------------------------------------------------------ */ 354 static ips_stat_t *fr_statetstats(ifs) 355 ipf_stack_t *ifs; 356 { 357 ifs->ifs_ips_stats.iss_active = ifs->ifs_ips_num; 358 ifs->ifs_ips_stats.iss_statesize = ifs->ifs_fr_statesize; 359 ifs->ifs_ips_stats.iss_statemax = ifs->ifs_fr_statemax; 360 ifs->ifs_ips_stats.iss_table = ifs->ifs_ips_table; 361 ifs->ifs_ips_stats.iss_list = ifs->ifs_ips_list; 362 ifs->ifs_ips_stats.iss_ticks = ifs->ifs_fr_ticks; 363 return &ifs->ifs_ips_stats; 364 } 365 366 /* ------------------------------------------------------------------------ */ 367 /* Function: fr_state_remove */ 368 /* Returns: int - 0 == success, != 0 == failure */ 369 /* Parameters: data(I) - pointer to state structure to delete from table */ 370 /* ifs - ipf stack instance */ 371 /* */ 372 /* Search for a state structure that matches the one passed, according to */ 373 /* the IP addresses and other protocol specific information. */ 374 /* ------------------------------------------------------------------------ */ 375 static int fr_state_remove(data, ifs) 376 caddr_t data; 377 ipf_stack_t *ifs; 378 { 379 ipstate_t *sp, st; 380 int error; 381 382 sp = &st; 383 error = fr_inobj(data, &st, IPFOBJ_IPSTATE); 384 if (error) 385 return EFAULT; 386 387 WRITE_ENTER(&ifs->ifs_ipf_state); 388 for (sp = ifs->ifs_ips_list; sp; sp = sp->is_next) 389 if ((sp->is_p == st.is_p) && (sp->is_v == st.is_v) && 390 !bcmp((caddr_t)&sp->is_src, (caddr_t)&st.is_src, 391 sizeof(st.is_src)) && 392 !bcmp((caddr_t)&sp->is_dst, (caddr_t)&st.is_dst, 393 sizeof(st.is_dst)) && 394 !bcmp((caddr_t)&sp->is_ps, (caddr_t)&st.is_ps, 395 sizeof(st.is_ps))) { 396 (void) fr_delstate(sp, ISL_REMOVE, ifs); 397 RWLOCK_EXIT(&ifs->ifs_ipf_state); 398 return 0; 399 } 400 RWLOCK_EXIT(&ifs->ifs_ipf_state); 401 return ESRCH; 402 } 403 404 405 /* ------------------------------------------------------------------------ */ 406 /* Function: fr_state_ioctl */ 407 /* Returns: int - 0 == success, != 0 == failure */ 408 /* Parameters: data(I) - pointer to ioctl data */ 409 /* cmd(I) - ioctl command integer */ 410 /* mode(I) - file mode bits used with open */ 411 /* uid(I) - uid of caller */ 412 /* ctx(I) - pointer to give the uid context */ 413 /* ifs - ipf stack instance */ 414 /* */ 415 /* Processes an ioctl call made to operate on the IP Filter state device. */ 416 /* ------------------------------------------------------------------------ */ 417 int fr_state_ioctl(data, cmd, mode, uid, ctx, ifs) 418 caddr_t data; 419 ioctlcmd_t cmd; 420 int mode, uid; 421 void *ctx; 422 ipf_stack_t *ifs; 423 { 424 int arg, ret, error = 0; 425 426 switch (cmd) 427 { 428 /* 429 * Delete an entry from the state table. 430 */ 431 case SIOCDELST : 432 error = fr_state_remove(data, ifs); 433 break; 434 /* 435 * Flush the state table 436 */ 437 case SIOCIPFFL : 438 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 439 if (error != 0) { 440 error = EFAULT; 441 } else { 442 if (VALID_TABLE_FLUSH_OPT(arg)) { 443 WRITE_ENTER(&ifs->ifs_ipf_state); 444 ret = fr_state_flush(arg, 4, ifs); 445 RWLOCK_EXIT(&ifs->ifs_ipf_state); 446 error = BCOPYOUT((char *)&ret, data, 447 sizeof(ret)); 448 if (error != 0) 449 return EFAULT; 450 } else { 451 error = EINVAL; 452 } 453 } 454 break; 455 456 #ifdef USE_INET6 457 case SIOCIPFL6 : 458 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 459 if (error != 0) { 460 error = EFAULT; 461 } else { 462 if (VALID_TABLE_FLUSH_OPT(arg)) { 463 WRITE_ENTER(&ifs->ifs_ipf_state); 464 ret = fr_state_flush(arg, 6, ifs); 465 RWLOCK_EXIT(&ifs->ifs_ipf_state); 466 error = BCOPYOUT((char *)&ret, data, 467 sizeof(ret)); 468 if (error != 0) 469 return EFAULT; 470 } else { 471 error = EINVAL; 472 } 473 } 474 break; 475 #endif 476 #ifdef IPFILTER_LOG 477 /* 478 * Flush the state log. 479 */ 480 case SIOCIPFFB : 481 if (!(mode & FWRITE)) 482 error = EPERM; 483 else { 484 int tmp; 485 486 tmp = ipflog_clear(IPL_LOGSTATE, ifs); 487 error = BCOPYOUT((char *)&tmp, data, sizeof(tmp)); 488 if (error != 0) 489 error = EFAULT; 490 } 491 break; 492 /* 493 * Turn logging of state information on/off. 494 */ 495 case SIOCSETLG : 496 if (!(mode & FWRITE)) { 497 error = EPERM; 498 } else { 499 error = BCOPYIN((char *)data, 500 (char *)&ifs->ifs_ipstate_logging, 501 sizeof(ifs->ifs_ipstate_logging)); 502 if (error != 0) 503 error = EFAULT; 504 } 505 break; 506 /* 507 * Return the current state of logging. 508 */ 509 case SIOCGETLG : 510 error = BCOPYOUT((char *)&ifs->ifs_ipstate_logging, 511 (char *)data, 512 sizeof(ifs->ifs_ipstate_logging)); 513 if (error != 0) 514 error = EFAULT; 515 break; 516 /* 517 * Return the number of bytes currently waiting to be read. 518 */ 519 case FIONREAD : 520 arg = ifs->ifs_iplused[IPL_LOGSTATE]; /* returned in an int */ 521 error = BCOPYOUT((char *)&arg, data, sizeof(arg)); 522 if (error != 0) 523 error = EFAULT; 524 break; 525 #endif 526 /* 527 * Get the current state statistics. 528 */ 529 case SIOCGETFS : 530 error = fr_outobj(data, fr_statetstats(ifs), IPFOBJ_STATESTAT); 531 break; 532 /* 533 * Lock/Unlock the state table. (Locking prevents any changes, which 534 * means no packets match). 535 */ 536 case SIOCSTLCK : 537 if (!(mode & FWRITE)) { 538 error = EPERM; 539 } else { 540 error = fr_lock(data, &ifs->ifs_fr_state_lock); 541 } 542 break; 543 /* 544 * Add an entry to the current state table. 545 */ 546 case SIOCSTPUT : 547 if (!ifs->ifs_fr_state_lock || !(mode & FWRITE)) { 548 error = EACCES; 549 break; 550 } 551 error = fr_stputent(data, ifs); 552 break; 553 /* 554 * Get a state table entry. 555 */ 556 case SIOCSTGET : 557 if (!ifs->ifs_fr_state_lock) { 558 error = EACCES; 559 break; 560 } 561 error = fr_stgetent(data, ifs); 562 break; 563 564 case SIOCGENITER : 565 { 566 ipftoken_t *token; 567 ipfgeniter_t iter; 568 569 error = fr_inobj(data, &iter, IPFOBJ_GENITER); 570 if (error != 0) 571 break; 572 573 token = ipf_findtoken(IPFGENITER_STATE, uid, ctx, ifs); 574 if (token != NULL) 575 error = fr_stateiter(token, &iter, ifs); 576 else 577 error = ESRCH; 578 RWLOCK_EXIT(&ifs->ifs_ipf_tokens); 579 break; 580 } 581 582 case SIOCIPFDELTOK : 583 error = BCOPYIN(data, (char *)&arg, sizeof(arg)); 584 if (error != 0) { 585 error = EFAULT; 586 } else { 587 error = ipf_deltoken(arg, uid, ctx, ifs); 588 } 589 break; 590 591 default : 592 error = EINVAL; 593 break; 594 } 595 return error; 596 } 597 598 599 /* ------------------------------------------------------------------------ */ 600 /* Function: fr_stgetent */ 601 /* Returns: int - 0 == success, != 0 == failure */ 602 /* Parameters: data(I) - pointer to state structure to retrieve from table */ 603 /* */ 604 /* Copy out state information from the kernel to a user space process. If */ 605 /* there is a filter rule associated with the state entry, copy that out */ 606 /* as well. The entry to copy out is taken from the value of "ips_next" in */ 607 /* the struct passed in and if not null and not found in the list of current*/ 608 /* state entries, the retrieval fails. */ 609 /* ------------------------------------------------------------------------ */ 610 int fr_stgetent(data, ifs) 611 caddr_t data; 612 ipf_stack_t *ifs; 613 { 614 ipstate_t *is, *isn; 615 ipstate_save_t ips; 616 int error; 617 618 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 619 if (error) 620 return EFAULT; 621 622 isn = ips.ips_next; 623 if (isn == NULL) { 624 isn = ifs->ifs_ips_list; 625 if (isn == NULL) { 626 if (ips.ips_next == NULL) 627 return ENOENT; 628 return 0; 629 } 630 } else { 631 /* 632 * Make sure the pointer we're copying from exists in the 633 * current list of entries. Security precaution to prevent 634 * copying of random kernel data. 635 */ 636 for (is = ifs->ifs_ips_list; is; is = is->is_next) 637 if (is == isn) 638 break; 639 if (!is) 640 return ESRCH; 641 } 642 ips.ips_next = isn->is_next; 643 bcopy((char *)isn, (char *)&ips.ips_is, sizeof(ips.ips_is)); 644 ips.ips_rule = isn->is_rule; 645 if (isn->is_rule != NULL) 646 bcopy((char *)isn->is_rule, (char *)&ips.ips_fr, 647 sizeof(ips.ips_fr)); 648 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 649 if (error) 650 return EFAULT; 651 return 0; 652 } 653 654 655 /* ------------------------------------------------------------------------ */ 656 /* Function: fr_stputent */ 657 /* Returns: int - 0 == success, != 0 == failure */ 658 /* Parameters: data(I) - pointer to state information struct */ 659 /* ifs - ipf stack instance */ 660 /* */ 661 /* This function implements the SIOCSTPUT ioctl: insert a state entry into */ 662 /* the state table. If the state info. includes a pointer to a filter rule */ 663 /* then also add in an orphaned rule (will not show up in any "ipfstat -io" */ 664 /* output. */ 665 /* ------------------------------------------------------------------------ */ 666 int fr_stputent(data, ifs) 667 caddr_t data; 668 ipf_stack_t *ifs; 669 { 670 ipstate_t *is, *isn; 671 ipstate_save_t ips; 672 int error, i; 673 frentry_t *fr; 674 char *name; 675 676 error = fr_inobj(data, &ips, IPFOBJ_STATESAVE); 677 if (error) 678 return EFAULT; 679 680 /* 681 * Trigger automatic call to fr_state_flush() if the 682 * table has reached capacity specified by hi watermark. 683 */ 684 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 685 ifs->ifs_fr_state_doflush = 1; 686 687 /* 688 * If automatic flushing did not do its job, and the table 689 * has filled up, don't try to create a new entry. 690 */ 691 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 692 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 693 return ENOMEM; 694 } 695 696 KMALLOC(isn, ipstate_t *); 697 if (isn == NULL) 698 return ENOMEM; 699 700 bcopy((char *)&ips.ips_is, (char *)isn, sizeof(*isn)); 701 bzero((char *)isn, offsetof(struct ipstate, is_pkts)); 702 isn->is_sti.tqe_pnext = NULL; 703 isn->is_sti.tqe_next = NULL; 704 isn->is_sti.tqe_ifq = NULL; 705 isn->is_sti.tqe_parent = isn; 706 isn->is_ifp[0] = NULL; 707 isn->is_ifp[1] = NULL; 708 isn->is_ifp[2] = NULL; 709 isn->is_ifp[3] = NULL; 710 isn->is_sync = NULL; 711 fr = ips.ips_rule; 712 713 if (fr == NULL) { 714 READ_ENTER(&ifs->ifs_ipf_state); 715 fr_stinsert(isn, 0, ifs); 716 MUTEX_EXIT(&isn->is_lock); 717 RWLOCK_EXIT(&ifs->ifs_ipf_state); 718 return 0; 719 } 720 721 if (isn->is_flags & SI_NEWFR) { 722 KMALLOC(fr, frentry_t *); 723 if (fr == NULL) { 724 KFREE(isn); 725 return ENOMEM; 726 } 727 bcopy((char *)&ips.ips_fr, (char *)fr, sizeof(*fr)); 728 isn->is_rule = fr; 729 ips.ips_is.is_rule = fr; 730 MUTEX_NUKE(&fr->fr_lock); 731 MUTEX_INIT(&fr->fr_lock, "state filter rule lock"); 732 733 /* 734 * Look up all the interface names in the rule. 735 */ 736 for (i = 0; i < 4; i++) { 737 name = fr->fr_ifnames[i]; 738 fr->fr_ifas[i] = fr_resolvenic(name, fr->fr_v, ifs); 739 name = isn->is_ifname[i]; 740 isn->is_ifp[i] = fr_resolvenic(name, isn->is_v, ifs); 741 } 742 743 fr->fr_ref = 0; 744 fr->fr_dsize = 0; 745 fr->fr_data = NULL; 746 fr->fr_type = FR_T_NONE; 747 748 fr_resolvedest(&fr->fr_tif, fr->fr_v, ifs); 749 fr_resolvedest(&fr->fr_dif, fr->fr_v, ifs); 750 fr_resolvedest(&fr->fr_rif, fr->fr_v, ifs); 751 752 /* 753 * send a copy back to userland of what we ended up 754 * to allow for verification. 755 */ 756 error = fr_outobj(data, &ips, IPFOBJ_STATESAVE); 757 if (error) { 758 KFREE(isn); 759 MUTEX_DESTROY(&fr->fr_lock); 760 KFREE(fr); 761 return EFAULT; 762 } 763 READ_ENTER(&ifs->ifs_ipf_state); 764 fr_stinsert(isn, 0, ifs); 765 MUTEX_EXIT(&isn->is_lock); 766 RWLOCK_EXIT(&ifs->ifs_ipf_state); 767 768 } else { 769 READ_ENTER(&ifs->ifs_ipf_state); 770 for (is = ifs->ifs_ips_list; is; is = is->is_next) 771 if (is->is_rule == fr) { 772 fr_stinsert(isn, 0, ifs); 773 MUTEX_EXIT(&isn->is_lock); 774 break; 775 } 776 777 if (is == NULL) { 778 KFREE(isn); 779 isn = NULL; 780 } 781 RWLOCK_EXIT(&ifs->ifs_ipf_state); 782 783 return (isn == NULL) ? ESRCH : 0; 784 } 785 786 return 0; 787 } 788 789 790 /* ------------------------------------------------------------------------ */ 791 /* Function: fr_stinsert */ 792 /* Returns: Nil */ 793 /* Parameters: is(I) - pointer to state structure */ 794 /* rev(I) - flag indicating forward/reverse direction of packet */ 795 /* */ 796 /* Inserts a state structure into the hash table (for lookups) and the list */ 797 /* of state entries (for enumeration). Resolves all of the interface names */ 798 /* to pointers and adjusts running stats for the hash table as appropriate. */ 799 /* */ 800 /* Locking: it is assumed that some kind of lock on ipf_state is held. */ 801 /* Exits with is_lock initialised and held. */ 802 /* ------------------------------------------------------------------------ */ 803 void fr_stinsert(is, rev, ifs) 804 ipstate_t *is; 805 int rev; 806 ipf_stack_t *ifs; 807 { 808 frentry_t *fr; 809 u_int hv; 810 int i; 811 812 MUTEX_INIT(&is->is_lock, "ipf state entry"); 813 814 fr = is->is_rule; 815 if (fr != NULL) { 816 MUTEX_ENTER(&fr->fr_lock); 817 fr->fr_ref++; 818 fr->fr_statecnt++; 819 MUTEX_EXIT(&fr->fr_lock); 820 } 821 822 /* 823 * Look up all the interface names in the state entry. 824 */ 825 for (i = 0; i < 4; i++) { 826 if (is->is_ifp[i] != NULL) 827 continue; 828 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], is->is_v, ifs); 829 } 830 831 /* 832 * If we could trust is_hv, then the modulous would not be needed, but 833 * when running with IPFILTER_SYNC, this stops bad values. 834 */ 835 hv = is->is_hv % ifs->ifs_fr_statesize; 836 is->is_hv = hv; 837 838 /* 839 * We need to get both of these locks...the first because it is 840 * possible that once the insert is complete another packet might 841 * come along, match the entry and want to update it. 842 */ 843 MUTEX_ENTER(&is->is_lock); 844 MUTEX_ENTER(&ifs->ifs_ipf_stinsert); 845 846 /* 847 * add into list table. 848 */ 849 if (ifs->ifs_ips_list != NULL) 850 ifs->ifs_ips_list->is_pnext = &is->is_next; 851 is->is_pnext = &ifs->ifs_ips_list; 852 is->is_next = ifs->ifs_ips_list; 853 ifs->ifs_ips_list = is; 854 855 if (ifs->ifs_ips_table[hv] != NULL) 856 ifs->ifs_ips_table[hv]->is_phnext = &is->is_hnext; 857 else 858 ifs->ifs_ips_stats.iss_inuse++; 859 is->is_phnext = ifs->ifs_ips_table + hv; 860 is->is_hnext = ifs->ifs_ips_table[hv]; 861 ifs->ifs_ips_table[hv] = is; 862 ifs->ifs_ips_stats.iss_bucketlen[hv]++; 863 ifs->ifs_ips_num++; 864 MUTEX_EXIT(&ifs->ifs_ipf_stinsert); 865 866 fr_setstatequeue(is, rev, ifs); 867 } 868 869 /* ------------------------------------------------------------------------ */ 870 /* Function: fr_match_ipv4addrs */ 871 /* Returns: int - 2 strong match (same addresses, same direction) */ 872 /* 1 weak match (same address, opposite direction) */ 873 /* 0 no match */ 874 /* */ 875 /* Function matches IPv4 addresses. */ 876 /* ------------------------------------------------------------------------ */ 877 static int fr_match_ipv4addrs(is1, is2) 878 ipstate_t *is1; 879 ipstate_t *is2; 880 { 881 int rv; 882 883 if (is1->is_saddr == is2->is_saddr && is1->is_daddr == is2->is_daddr) 884 rv = 2; 885 else if (is1->is_saddr == is2->is_daddr && 886 is1->is_daddr == is2->is_saddr) 887 rv = 1; 888 else 889 rv = 0; 890 891 return (rv); 892 } 893 894 /* ------------------------------------------------------------------------ */ 895 /* Function: fr_match_ipv6addrs */ 896 /* Returns: int - 2 strong match (same addresses, same direction) */ 897 /* 1 weak match (same addresses, opposite direction) */ 898 /* 0 no match */ 899 /* */ 900 /* Function matches IPv6 addresses. */ 901 /* ------------------------------------------------------------------------ */ 902 static int fr_match_ipv6addrs(is1, is2) 903 ipstate_t *is1; 904 ipstate_t *is2; 905 { 906 int rv; 907 908 if (IP6_EQ(&is1->is_src, &is2->is_src) && 909 IP6_EQ(&is1->is_dst, &is2->is_dst)) 910 rv = 2; 911 else if (IP6_EQ(&is1->is_src, &is2->is_dst) && 912 IP6_EQ(&is1->is_dst, &is2->is_src)) { 913 rv = 1; 914 } 915 else 916 rv = 0; 917 918 return (rv); 919 } 920 /* ------------------------------------------------------------------------ */ 921 /* Function: fr_match_addresses */ 922 /* Returns: int - 2 strong match (same addresses, same direction) */ 923 /* 1 weak match (same address, opposite directions) */ 924 /* 0 no match */ 925 /* Parameters: is1, is2 pointers to states we are checking */ 926 /* */ 927 /* Matches addresses, function uses fr_match_ipvXaddrs() to deal with IPv4 */ 928 /* and IPv6 address format. */ 929 /* ------------------------------------------------------------------------ */ 930 static int fr_match_addresses(is1, is2) 931 ipstate_t *is1; 932 ipstate_t *is2; 933 { 934 int rv; 935 936 if (is1->is_v == 4) { 937 rv = fr_match_ipv4addrs(is1, is2); 938 } else { 939 rv = fr_match_ipv6addrs(is1, is2); 940 } 941 942 return (rv); 943 } 944 945 /* ------------------------------------------------------------------------ */ 946 /* Function: fr_match_ppairs */ 947 /* Returns: int - 2 strong match (same ports, same direction) */ 948 /* 1 weak match (same ports, different direction) */ 949 /* 0 no match */ 950 /* Parameters ppairs1, ppairs - src, dst ports we want to match. */ 951 /* */ 952 /* Matches two port_pair_t types (port pairs). Each port pair contains */ 953 /* src, dst port, which belong to session (state entry). */ 954 /* ------------------------------------------------------------------------ */ 955 static int fr_match_ppairs(ppairs1, ppairs2) 956 port_pair_t *ppairs1; 957 port_pair_t *ppairs2; 958 { 959 int rv; 960 961 if (ppairs1->pp_sport == ppairs2->pp_sport && 962 ppairs1->pp_dport == ppairs2->pp_dport) 963 rv = 2; 964 else if (ppairs1->pp_sport == ppairs2->pp_dport && 965 ppairs1->pp_dport == ppairs2->pp_sport) 966 rv = 1; 967 else 968 rv = 0; 969 970 return (rv); 971 } 972 973 /* ------------------------------------------------------------------------ */ 974 /* Function: fr_match_l4_hdr */ 975 /* Returns: int - 0 no match, */ 976 /* 1 weak match (same ports, different directions) */ 977 /* 2 strong match (same ports, same direction) */ 978 /* Parameters is1, is2 - states we want to match */ 979 /* */ 980 /* Function matches L4 header data (source ports for TCP, UDP, CallIds for */ 981 /* GRE protocol). */ 982 /* ------------------------------------------------------------------------ */ 983 static int fr_match_l4_hdr(is1, is2) 984 ipstate_t *is1; 985 ipstate_t *is2; 986 { 987 int rv = 0; 988 port_pair_t pp1; 989 port_pair_t pp2; 990 991 if (is1->is_p != is2->is_p) 992 return (0); 993 994 switch (is1->is_p) { 995 case IPPROTO_TCP: 996 pp1.pp_sport = is1->is_ps.is_ts.ts_sport; 997 pp1.pp_dport = is1->is_ps.is_ts.ts_dport; 998 pp2.pp_sport = is2->is_ps.is_ts.ts_sport; 999 pp2.pp_dport = is2->is_ps.is_ts.ts_dport; 1000 rv = fr_match_ppairs(&pp1, &pp2); 1001 break; 1002 case IPPROTO_UDP: 1003 pp1.pp_sport = is1->is_ps.is_us.us_sport; 1004 pp1.pp_dport = is1->is_ps.is_us.us_dport; 1005 pp2.pp_sport = is2->is_ps.is_us.us_sport; 1006 pp2.pp_dport = is2->is_ps.is_us.us_dport; 1007 rv = fr_match_ppairs(&pp1, &pp2); 1008 break; 1009 case IPPROTO_GRE: 1010 /* greinfo_t can be also interprted as port pair */ 1011 pp1.pp_sport = is1->is_ps.is_ug.gs_call[0]; 1012 pp1.pp_dport = is1->is_ps.is_ug.gs_call[1]; 1013 pp2.pp_sport = is2->is_ps.is_ug.gs_call[0]; 1014 pp2.pp_dport = is2->is_ps.is_ug.gs_call[1]; 1015 rv = fr_match_ppairs(&pp1, &pp2); 1016 break; 1017 case IPPROTO_ICMP: 1018 case IPPROTO_ICMPV6: 1019 if (bcmp(&is1->is_ps, &is2->is_ps, sizeof (icmpinfo_t))) 1020 rv = 1; 1021 else 1022 rv = 0; 1023 break; 1024 default: 1025 rv = 0; 1026 } 1027 1028 return (rv); 1029 } 1030 1031 /* ------------------------------------------------------------------------ */ 1032 /* Function: fr_matchstates */ 1033 /* Returns: int - nonzero match, zero no match */ 1034 /* Parameters is1, is2 - states we want to match */ 1035 /* */ 1036 /* The state entries are equal (identical match) if they belong to the same */ 1037 /* session. Any time new state entry is being added the fr_addstate() */ 1038 /* function creates temporal state entry from the data it gets from IP and */ 1039 /* L4 header. The fr_matchstats() must be also aware of packet direction, */ 1040 /* which is also stored within the state entry. We should keep in mind the */ 1041 /* information about packet direction is spread accross L3 (addresses) and */ 1042 /* L4 (ports). There are three possible relationships betwee is1, is2: */ 1043 /* - no match (match(is1, is2) == 0)) */ 1044 /* - weak match same addresses (ports), but different */ 1045 /* directions (1) (fr_match_xxxx(is1, is2) == 1) */ 1046 /* - strong match same addresses (ports) and same directions */ 1047 /* (2) (fr_match_xxxx(is1, is2) == 2) */ 1048 /* */ 1049 /* There are functions, which match match addresses (L3 header) in is1, is2 */ 1050 /* and functions, which are used to compare ports (L4 header) data. We say */ 1051 /* the is1 and is2 are same (identical) if there is a match */ 1052 /* (fr_match_l4_hdr(is1, is2) != 0) and matchlevels are same for entries */ 1053 /* (fr_match_l3_hdr(is1, is2) == fr_match_l4_hdr(is1, is2)) for is1, is2. */ 1054 /* Such requirement deals with case as follows: */ 1055 /* suppose there are two connections between hosts A, B. Connection 1: */ 1056 /* a.a.a.a:12345 <=> b.b.b.b:54321 */ 1057 /* Connection 2: */ 1058 /* a.a.a.a:54321 <=> b.b.b.b:12345 */ 1059 /* since we've introduced match levels into our fr_matchstates(), we are */ 1060 /* able to identify, which packets belong to connection A and which belong */ 1061 /* to connection B. Assume there are two entries is1, is2. is1 has been */ 1062 /* from con. 1 packet, which travelled from A to B: */ 1063 /* a.a.a.a:12345 -> b.b.b.b:54321 */ 1064 /* while s2, has been created from packet which belongs to con. 2 and is */ 1065 /* also coming from A to B: */ 1066 /* a.a.a.a:54321 -> b.b.b.b:12345 */ 1067 /* fr_match_l3_hdr(is1, is2) == 2 -> strong match, while */ 1068 /* fr_match_l4_hdr(is1, is2) == 1 -> weak match. Since match levels are */ 1069 /* different the state entries are not identical -> no match as a final */ 1070 /* result. */ 1071 /* ------------------------------------------------------------------------ */ 1072 static int fr_matchstates(is1, is2) 1073 ipstate_t *is1; 1074 ipstate_t *is2; 1075 { 1076 int rv; 1077 int amatch; 1078 int pmatch; 1079 1080 if (bcmp(&is1->is_pass, &is2->is_pass, 1081 offsetof(struct ipstate, is_ps) - 1082 offsetof(struct ipstate, is_pass)) == 0) { 1083 1084 pmatch = fr_match_l4_hdr(is1, is2); 1085 amatch = fr_match_addresses(is1, is2); 1086 /* 1087 * If addresses match (amatch != 0), then 'match levels' 1088 * must be same for matching entries. If amatch and pmatch 1089 * have different values (different match levels), then 1090 * is1 and is2 belong to different sessions. 1091 */ 1092 rv = (amatch != 0) && (amatch == pmatch); 1093 } 1094 else 1095 rv = 0; 1096 1097 return (rv); 1098 } 1099 1100 /* ------------------------------------------------------------------------ */ 1101 /* Function: fr_addstate */ 1102 /* Returns: ipstate_t* - NULL == failure, else pointer to new state */ 1103 /* Parameters: fin(I) - pointer to packet information */ 1104 /* stsave(O) - pointer to place to save pointer to created */ 1105 /* state structure. */ 1106 /* flags(I) - flags to use when creating the structure */ 1107 /* */ 1108 /* Creates a new IP state structure from the packet information collected. */ 1109 /* Inserts it into the state table and appends to the bottom of the active */ 1110 /* list. If the capacity of the table has reached the maximum allowed then */ 1111 /* the call will fail and a flush is scheduled for the next timeout call. */ 1112 /* ------------------------------------------------------------------------ */ 1113 ipstate_t *fr_addstate(fin, stsave, flags) 1114 fr_info_t *fin; 1115 ipstate_t **stsave; 1116 u_int flags; 1117 { 1118 ipstate_t *is, ips; 1119 struct icmp *ic; 1120 u_int pass, hv; 1121 frentry_t *fr; 1122 tcphdr_t *tcp; 1123 grehdr_t *gre; 1124 void *ifp; 1125 int out; 1126 ipf_stack_t *ifs = fin->fin_ifs; 1127 1128 if (ifs->ifs_fr_state_lock || 1129 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 1130 return NULL; 1131 1132 if ((fin->fin_flx & FI_OOW) && !(fin->fin_tcpf & TH_SYN)) 1133 return NULL; 1134 1135 /* 1136 * Trigger automatic call to fr_state_flush() if the 1137 * table has reached capacity specified by hi watermark. 1138 */ 1139 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 1140 ifs->ifs_fr_state_doflush = 1; 1141 1142 /* 1143 * If the max number of state entries has been reached, and there is no 1144 * limit on the state count for the rule, then do not continue. In the 1145 * case where a limit exists, it's ok allow the entries to be created as 1146 * long as specified limit itself has not been reached. 1147 * 1148 * Note that because the lock isn't held on fr, it is possible to exceed 1149 * the specified size of the table. However, the cost of this is being 1150 * ignored here; as the number by which it can go over is a product of 1151 * the number of simultaneous threads that could be executing in here. 1152 * So, a limit of 100 won't result in 200, but could result in 101 or 102. 1153 * 1154 * Also note that, since the automatic flush should have been triggered 1155 * well before we reach the maximum number of state table entries, the 1156 * likelihood of reaching the max (and thus exceedng it) is minimal. 1157 */ 1158 fr = fin->fin_fr; 1159 if (fr != NULL) { 1160 if ((ifs->ifs_ips_num >= ifs->ifs_fr_statemax) && 1161 (fr->fr_statemax == 0)) { 1162 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 1163 return NULL; 1164 } 1165 if ((fr->fr_statemax != 0) && 1166 (fr->fr_statecnt >= fr->fr_statemax)) { 1167 ATOMIC_INCL(ifs->ifs_ips_stats.iss_maxref); 1168 ifs->ifs_fr_state_doflush = 1; 1169 return NULL; 1170 } 1171 } 1172 1173 ic = NULL; 1174 tcp = NULL; 1175 out = fin->fin_out; 1176 is = &ips; 1177 bzero((char *)is, sizeof(*is)); 1178 1179 if (fr == NULL) { 1180 pass = ifs->ifs_fr_flags; 1181 is->is_tag = FR_NOLOGTAG; 1182 } else { 1183 pass = fr->fr_flags; 1184 } 1185 1186 is->is_die = 1 + ifs->ifs_fr_ticks; 1187 /* 1188 * We want to check everything that is a property of this packet, 1189 * but we don't (automatically) care about it's fragment status as 1190 * this may change. 1191 */ 1192 is->is_pass = pass; 1193 is->is_v = fin->fin_v; 1194 is->is_opt[0] = fin->fin_optmsk; 1195 is->is_optmsk[0] = 0xffffffff; 1196 /* 1197 * The reverse direction option mask will be set in fr_matchsrcdst(), 1198 * when we will see the first packet from the peer. We will leave it 1199 * as zero for now. 1200 */ 1201 is->is_optmsk[1] = 0x0; 1202 1203 if (is->is_v == 6) { 1204 is->is_opt[0] &= ~0x8; 1205 is->is_optmsk[0] &= ~0x8; 1206 } 1207 is->is_sec = fin->fin_secmsk; 1208 is->is_secmsk = 0xffff; 1209 is->is_auth = fin->fin_auth; 1210 is->is_authmsk = 0xffff; 1211 1212 /* 1213 * Copy and calculate... 1214 */ 1215 hv = (is->is_p = fin->fin_fi.fi_p); 1216 is->is_src = fin->fin_fi.fi_src; 1217 hv += is->is_saddr; 1218 is->is_dst = fin->fin_fi.fi_dst; 1219 hv += is->is_daddr; 1220 #ifdef USE_INET6 1221 if (fin->fin_v == 6) { 1222 /* 1223 * For ICMPv6, we check to see if the destination address is 1224 * a multicast address. If it is, do not include it in the 1225 * calculation of the hash because the correct reply will come 1226 * back from a real address, not a multicast address. 1227 */ 1228 if ((is->is_p == IPPROTO_ICMPV6) && 1229 IN6_IS_ADDR_MULTICAST(&is->is_dst.in6)) { 1230 /* 1231 * So you can do keep state with neighbour discovery. 1232 * 1233 * Here we could use the address from the neighbour 1234 * solicit message to put in the state structure and 1235 * we could use that without a wildcard flag too... 1236 */ 1237 is->is_flags |= SI_W_DADDR; 1238 hv -= is->is_daddr; 1239 } else { 1240 hv += is->is_dst.i6[1]; 1241 hv += is->is_dst.i6[2]; 1242 hv += is->is_dst.i6[3]; 1243 } 1244 hv += is->is_src.i6[1]; 1245 hv += is->is_src.i6[2]; 1246 hv += is->is_src.i6[3]; 1247 } 1248 #endif 1249 if ((fin->fin_v == 4) && 1250 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 1251 if (fin->fin_out == 0) { 1252 flags |= SI_W_DADDR|SI_CLONE; 1253 hv -= is->is_daddr; 1254 } else { 1255 flags |= SI_W_SADDR|SI_CLONE; 1256 hv -= is->is_saddr; 1257 } 1258 } 1259 1260 switch (is->is_p) 1261 { 1262 #ifdef USE_INET6 1263 case IPPROTO_ICMPV6 : 1264 ic = fin->fin_dp; 1265 1266 switch (ic->icmp_type) 1267 { 1268 case ICMP6_ECHO_REQUEST : 1269 is->is_icmp.ici_type = ic->icmp_type; 1270 hv += (is->is_icmp.ici_id = ic->icmp_id); 1271 break; 1272 case ICMP6_MEMBERSHIP_QUERY : 1273 case ND_ROUTER_SOLICIT : 1274 case ND_NEIGHBOR_SOLICIT : 1275 case ICMP6_NI_QUERY : 1276 is->is_icmp.ici_type = ic->icmp_type; 1277 break; 1278 default : 1279 return NULL; 1280 } 1281 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1282 break; 1283 #endif 1284 case IPPROTO_ICMP : 1285 ic = fin->fin_dp; 1286 1287 switch (ic->icmp_type) 1288 { 1289 case ICMP_ECHO : 1290 case ICMP_ECHOREPLY : 1291 case ICMP_TSTAMP : 1292 case ICMP_IREQ : 1293 case ICMP_MASKREQ : 1294 is->is_icmp.ici_type = ic->icmp_type; 1295 hv += (is->is_icmp.ici_id = ic->icmp_id); 1296 break; 1297 default : 1298 return NULL; 1299 } 1300 ATOMIC_INCL(ifs->ifs_ips_stats.iss_icmp); 1301 break; 1302 1303 case IPPROTO_GRE : 1304 gre = fin->fin_dp; 1305 1306 is->is_gre.gs_flags = gre->gr_flags; 1307 is->is_gre.gs_ptype = gre->gr_ptype; 1308 if (GRE_REV(is->is_gre.gs_flags) == 1) { 1309 is->is_call[0] = fin->fin_data[0]; 1310 is->is_call[1] = fin->fin_data[1]; 1311 } 1312 break; 1313 1314 case IPPROTO_TCP : 1315 tcp = fin->fin_dp; 1316 1317 if (tcp->th_flags & TH_RST) 1318 return NULL; 1319 /* 1320 * The endian of the ports doesn't matter, but the ack and 1321 * sequence numbers do as we do mathematics on them later. 1322 */ 1323 is->is_sport = htons(fin->fin_data[0]); 1324 is->is_dport = htons(fin->fin_data[1]); 1325 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1326 hv += is->is_sport; 1327 hv += is->is_dport; 1328 } 1329 1330 /* 1331 * If this is a real packet then initialise fields in the 1332 * state information structure from the TCP header information. 1333 */ 1334 1335 is->is_maxdwin = 1; 1336 is->is_maxswin = ntohs(tcp->th_win); 1337 if (is->is_maxswin == 0) 1338 is->is_maxswin = 1; 1339 1340 if ((fin->fin_flx & FI_IGNORE) == 0) { 1341 is->is_send = ntohl(tcp->th_seq) + fin->fin_dlen - 1342 (TCP_OFF(tcp) << 2) + 1343 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 1344 ((tcp->th_flags & TH_FIN) ? 1 : 0); 1345 is->is_maxsend = is->is_send; 1346 1347 /* 1348 * Window scale option is only present in 1349 * SYN/SYN-ACK packet. 1350 */ 1351 if ((tcp->th_flags & ~(TH_FIN|TH_ACK|TH_ECNALL)) == 1352 TH_SYN && 1353 (TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) { 1354 if (fr_tcpoptions(fin, tcp, 1355 &is->is_tcp.ts_data[0]) == -1) { 1356 fin->fin_flx |= FI_BAD; 1357 } 1358 } 1359 1360 if ((fin->fin_out != 0) && (pass & FR_NEWISN) != 0) { 1361 fr_checknewisn(fin, is); 1362 fr_fixoutisn(fin, is); 1363 } 1364 1365 if ((tcp->th_flags & TH_OPENING) == TH_SYN) 1366 flags |= IS_TCPFSM; 1367 else { 1368 is->is_maxdwin = is->is_maxswin * 2; 1369 is->is_dend = ntohl(tcp->th_ack); 1370 is->is_maxdend = ntohl(tcp->th_ack); 1371 is->is_maxdwin *= 2; 1372 } 1373 } 1374 1375 /* 1376 * If we're creating state for a starting connection, start the 1377 * timer on it as we'll never see an error if it fails to 1378 * connect. 1379 */ 1380 ATOMIC_INCL(ifs->ifs_ips_stats.iss_tcp); 1381 break; 1382 1383 case IPPROTO_UDP : 1384 tcp = fin->fin_dp; 1385 1386 is->is_sport = htons(fin->fin_data[0]); 1387 is->is_dport = htons(fin->fin_data[1]); 1388 if ((flags & (SI_W_DPORT|SI_W_SPORT)) == 0) { 1389 hv += tcp->th_dport; 1390 hv += tcp->th_sport; 1391 } 1392 ATOMIC_INCL(ifs->ifs_ips_stats.iss_udp); 1393 break; 1394 1395 default : 1396 break; 1397 } 1398 hv = DOUBLE_HASH(hv, ifs); 1399 is->is_hv = hv; 1400 is->is_rule = fr; 1401 is->is_flags = flags & IS_INHERITED; 1402 1403 /* 1404 * Look for identical state. 1405 */ 1406 for (is = ifs->ifs_ips_table[is->is_hv % ifs->ifs_fr_statesize]; 1407 is != NULL; 1408 is = is->is_hnext) { 1409 if (fr_matchstates(&ips, is) == 1) 1410 break; 1411 } 1412 1413 /* 1414 * we've found a matching state -> state already exists, 1415 * we are not going to add a duplicate record. 1416 */ 1417 if (is != NULL) 1418 return NULL; 1419 1420 if (ifs->ifs_ips_stats.iss_bucketlen[hv] >= ifs->ifs_fr_state_maxbucket) { 1421 ATOMIC_INCL(ifs->ifs_ips_stats.iss_bucketfull); 1422 return NULL; 1423 } 1424 KMALLOC(is, ipstate_t *); 1425 if (is == NULL) { 1426 ATOMIC_INCL(ifs->ifs_ips_stats.iss_nomem); 1427 return NULL; 1428 } 1429 bcopy((char *)&ips, (char *)is, sizeof(*is)); 1430 /* 1431 * Do not do the modulous here, it is done in fr_stinsert(). 1432 */ 1433 if (fr != NULL) { 1434 (void) strncpy(is->is_group, fr->fr_group, FR_GROUPLEN); 1435 if (fr->fr_age[0] != 0) { 1436 is->is_tqehead[0] = 1437 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1438 fr->fr_age[0], ifs); 1439 is->is_sti.tqe_flags |= TQE_RULEBASED; 1440 } 1441 if (fr->fr_age[1] != 0) { 1442 is->is_tqehead[1] = 1443 fr_addtimeoutqueue(&ifs->ifs_ips_utqe, 1444 fr->fr_age[1], ifs); 1445 is->is_sti.tqe_flags |= TQE_RULEBASED; 1446 } 1447 is->is_tag = fr->fr_logtag; 1448 1449 is->is_ifp[(out << 1) + 1] = fr->fr_ifas[1]; 1450 is->is_ifp[(1 - out) << 1] = fr->fr_ifas[2]; 1451 is->is_ifp[((1 - out) << 1) + 1] = fr->fr_ifas[3]; 1452 1453 if (((ifp = fr->fr_ifas[1]) != NULL) && 1454 (ifp != (void *)-1)) { 1455 COPYIFNAME(ifp, is->is_ifname[(out << 1) + 1], fr->fr_v); 1456 } 1457 if (((ifp = fr->fr_ifas[2]) != NULL) && 1458 (ifp != (void *)-1)) { 1459 COPYIFNAME(ifp, is->is_ifname[(1 - out) << 1], fr->fr_v); 1460 } 1461 if (((ifp = fr->fr_ifas[3]) != NULL) && 1462 (ifp != (void *)-1)) { 1463 COPYIFNAME(ifp, is->is_ifname[((1 - out) << 1) + 1], fr->fr_v); 1464 } 1465 } 1466 1467 is->is_ifp[out << 1] = fin->fin_ifp; 1468 if (fin->fin_ifp != NULL) { 1469 COPYIFNAME(fin->fin_ifp, is->is_ifname[out << 1], fin->fin_v); 1470 } 1471 1472 is->is_ref = 1; 1473 is->is_pkts[0] = 0, is->is_bytes[0] = 0; 1474 is->is_pkts[1] = 0, is->is_bytes[1] = 0; 1475 is->is_pkts[2] = 0, is->is_bytes[2] = 0; 1476 is->is_pkts[3] = 0, is->is_bytes[3] = 0; 1477 if ((fin->fin_flx & FI_IGNORE) == 0) { 1478 is->is_pkts[out] = 1; 1479 is->is_bytes[out] = fin->fin_plen; 1480 is->is_flx[out][0] = fin->fin_flx & FI_CMP; 1481 is->is_flx[out][0] &= ~FI_OOW; 1482 } 1483 1484 if (pass & FR_STSTRICT) 1485 is->is_flags |= IS_STRICT; 1486 1487 if (pass & FR_STATESYNC) 1488 is->is_flags |= IS_STATESYNC; 1489 1490 if (flags & (SI_WILDP|SI_WILDA)) { 1491 ATOMIC_INCL(ifs->ifs_ips_stats.iss_wild); 1492 } 1493 is->is_rulen = fin->fin_rule; 1494 1495 1496 if (pass & FR_LOGFIRST) 1497 is->is_pass &= ~(FR_LOGFIRST|FR_LOG); 1498 1499 READ_ENTER(&ifs->ifs_ipf_state); 1500 is->is_me = stsave; 1501 1502 fr_stinsert(is, fin->fin_rev, ifs); 1503 1504 if (fin->fin_p == IPPROTO_TCP) { 1505 /* 1506 * If we're creating state for a starting connection, start the 1507 * timer on it as we'll never see an error if it fails to 1508 * connect. 1509 */ 1510 (void) fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1511 is->is_flags); 1512 MUTEX_EXIT(&is->is_lock); 1513 #ifdef IPFILTER_SCAN 1514 if ((is->is_flags & SI_CLONE) == 0) 1515 (void) ipsc_attachis(is); 1516 #endif 1517 } else { 1518 MUTEX_EXIT(&is->is_lock); 1519 } 1520 #ifdef IPFILTER_SYNC 1521 if ((is->is_flags & IS_STATESYNC) && ((is->is_flags & SI_CLONE) == 0)) 1522 is->is_sync = ipfsync_new(SMC_STATE, fin, is); 1523 #endif 1524 if (ifs->ifs_ipstate_logging) 1525 ipstate_log(is, ISL_NEW, ifs); 1526 1527 RWLOCK_EXIT(&ifs->ifs_ipf_state); 1528 fin->fin_rev = IP6_NEQ(&is->is_dst, &fin->fin_daddr); 1529 fin->fin_flx |= FI_STATE; 1530 if (fin->fin_flx & FI_FRAG) 1531 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 1532 1533 return is; 1534 } 1535 1536 1537 /* ------------------------------------------------------------------------ */ 1538 /* Function: fr_tcpoptions */ 1539 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1540 /* Parameters: fin(I) - pointer to packet information */ 1541 /* tcp(I) - pointer to TCP packet header */ 1542 /* td(I) - pointer to TCP data held as part of the state */ 1543 /* */ 1544 /* Look after the TCP header for any options and deal with those that are */ 1545 /* present. Record details about those that we recogise. */ 1546 /* ------------------------------------------------------------------------ */ 1547 static int fr_tcpoptions(fin, tcp, td) 1548 fr_info_t *fin; 1549 tcphdr_t *tcp; 1550 tcpdata_t *td; 1551 { 1552 int off, mlen, ol, i, len, retval; 1553 char buf[64], *s, opt; 1554 mb_t *m = NULL; 1555 1556 len = (TCP_OFF(tcp) << 2); 1557 if (fin->fin_dlen < len) 1558 return 0; 1559 len -= sizeof(*tcp); 1560 1561 off = fin->fin_plen - fin->fin_dlen + sizeof(*tcp) + fin->fin_ipoff; 1562 1563 m = fin->fin_m; 1564 mlen = MSGDSIZE(m) - off; 1565 if (len > mlen) { 1566 len = mlen; 1567 retval = 0; 1568 } else { 1569 retval = 1; 1570 } 1571 1572 COPYDATA(m, off, len, buf); 1573 1574 for (s = buf; len > 0; ) { 1575 opt = *s; 1576 if (opt == TCPOPT_EOL) 1577 break; 1578 else if (opt == TCPOPT_NOP) 1579 ol = 1; 1580 else { 1581 if (len < 2) 1582 break; 1583 ol = (int)*(s + 1); 1584 if (ol < 2 || ol > len) 1585 break; 1586 1587 /* 1588 * Extract the TCP options we are interested in out of 1589 * the header and store them in the the tcpdata struct. 1590 */ 1591 switch (opt) 1592 { 1593 case TCPOPT_WINDOW : 1594 if (ol == TCPOLEN_WINDOW) { 1595 i = (int)*(s + 2); 1596 if (i > TCP_WSCALE_MAX) 1597 i = TCP_WSCALE_MAX; 1598 else if (i < 0) 1599 i = 0; 1600 td->td_winscale = i; 1601 td->td_winflags |= TCP_WSCALE_SEEN | 1602 TCP_WSCALE_FIRST; 1603 } else 1604 retval = -1; 1605 break; 1606 case TCPOPT_MAXSEG : 1607 /* 1608 * So, if we wanted to set the TCP MAXSEG, 1609 * it should be done here... 1610 */ 1611 if (ol == TCPOLEN_MAXSEG) { 1612 i = (int)*(s + 2); 1613 i <<= 8; 1614 i += (int)*(s + 3); 1615 td->td_maxseg = i; 1616 } else 1617 retval = -1; 1618 break; 1619 case TCPOPT_SACK_PERMITTED : 1620 if (ol == TCPOLEN_SACK_PERMITTED) 1621 td->td_winflags |= TCP_SACK_PERMIT; 1622 else 1623 retval = -1; 1624 break; 1625 } 1626 } 1627 len -= ol; 1628 s += ol; 1629 } 1630 return retval; 1631 } 1632 1633 1634 /* ------------------------------------------------------------------------ */ 1635 /* Function: fr_tcpstate */ 1636 /* Returns: int - 1 == packet matches state entry, 0 == it does not */ 1637 /* Parameters: fin(I) - pointer to packet information */ 1638 /* tcp(I) - pointer to TCP packet header */ 1639 /* is(I) - pointer to master state structure */ 1640 /* */ 1641 /* Check to see if a packet with TCP headers fits within the TCP window. */ 1642 /* Change timeout depending on whether new packet is a SYN-ACK returning */ 1643 /* for a SYN or a RST or FIN which indicate time to close up shop. */ 1644 /* ------------------------------------------------------------------------ */ 1645 static int fr_tcpstate(fin, tcp, is) 1646 fr_info_t *fin; 1647 tcphdr_t *tcp; 1648 ipstate_t *is; 1649 { 1650 int source, ret = 0, flags; 1651 tcpdata_t *fdata, *tdata; 1652 ipf_stack_t *ifs = fin->fin_ifs; 1653 1654 source = !fin->fin_rev; 1655 if (((is->is_flags & IS_TCPFSM) != 0) && (source == 1) && 1656 (ntohs(is->is_sport) != fin->fin_data[0])) 1657 source = 0; 1658 fdata = &is->is_tcp.ts_data[!source]; 1659 tdata = &is->is_tcp.ts_data[source]; 1660 1661 MUTEX_ENTER(&is->is_lock); 1662 1663 /* 1664 * If a SYN packet is received for a connection that is in a half 1665 * closed state, then move its state entry to deletetq. In such case 1666 * the SYN packet will be consequently dropped. This allows new state 1667 * entry to be created with a retransmited SYN packet. 1668 */ 1669 if ((tcp->th_flags & TH_OPENING) == TH_SYN) { 1670 if ((is->is_state[source] > IPF_TCPS_ESTABLISHED) && 1671 (is->is_state[!source] > IPF_TCPS_ESTABLISHED)) { 1672 is->is_state[source] = IPF_TCPS_CLOSED; 1673 is->is_state[!source] = IPF_TCPS_CLOSED; 1674 /* 1675 * Do not update is->is_sti.tqe_die in case state entry 1676 * is already present in deletetq. It prevents state 1677 * entry ttl update by retransmitted SYN packets, which 1678 * may arrive before timer tick kicks off. The SYN 1679 * packet will be dropped again. 1680 */ 1681 if (is->is_sti.tqe_ifq != &ifs->ifs_ips_deletetq) 1682 fr_movequeue(&is->is_sti, is->is_sti.tqe_ifq, 1683 &fin->fin_ifs->ifs_ips_deletetq, 1684 fin->fin_ifs); 1685 1686 MUTEX_EXIT(&is->is_lock); 1687 return 0; 1688 } 1689 } 1690 1691 if (fr_tcpinwindow(fin, fdata, tdata, tcp, is->is_flags)) { 1692 #ifdef IPFILTER_SCAN 1693 if (is->is_flags & (IS_SC_CLIENT|IS_SC_SERVER)) { 1694 ipsc_packet(fin, is); 1695 if (FR_ISBLOCK(is->is_pass)) { 1696 MUTEX_EXIT(&is->is_lock); 1697 return 1; 1698 } 1699 } 1700 #endif 1701 1702 /* 1703 * Nearing end of connection, start timeout. 1704 */ 1705 ret = fr_tcp_age(&is->is_sti, fin, ifs->ifs_ips_tqtqb, 1706 is->is_flags); 1707 if (ret == 0) { 1708 MUTEX_EXIT(&is->is_lock); 1709 return 0; 1710 } 1711 1712 /* 1713 * set s0's as appropriate. Use syn-ack packet as it 1714 * contains both pieces of required information. 1715 */ 1716 /* 1717 * Window scale option is only present in SYN/SYN-ACK packet. 1718 * Compare with ~TH_FIN to mask out T/TCP setups. 1719 */ 1720 flags = tcp->th_flags & ~(TH_FIN|TH_ECNALL); 1721 if (flags == (TH_SYN|TH_ACK)) { 1722 is->is_s0[source] = ntohl(tcp->th_ack); 1723 is->is_s0[!source] = ntohl(tcp->th_seq) + 1; 1724 if (TCP_OFF(tcp) > (sizeof (tcphdr_t) >> 2)) { 1725 (void) fr_tcpoptions(fin, tcp, fdata); 1726 } 1727 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1728 fr_checknewisn(fin, is); 1729 } else if (flags == TH_SYN) { 1730 is->is_s0[source] = ntohl(tcp->th_seq) + 1; 1731 if ((TCP_OFF(tcp) > (sizeof(tcphdr_t) >> 2))) 1732 (void) fr_tcpoptions(fin, tcp, fdata); 1733 1734 if ((fin->fin_out != 0) && (is->is_pass & FR_NEWISN)) 1735 fr_checknewisn(fin, is); 1736 1737 } 1738 ret = 1; 1739 } else 1740 fin->fin_flx |= FI_OOW; 1741 MUTEX_EXIT(&is->is_lock); 1742 return ret; 1743 } 1744 1745 1746 /* ------------------------------------------------------------------------ */ 1747 /* Function: fr_checknewisn */ 1748 /* Returns: Nil */ 1749 /* Parameters: fin(I) - pointer to packet information */ 1750 /* is(I) - pointer to master state structure */ 1751 /* */ 1752 /* Check to see if this TCP connection is expecting and needs a new */ 1753 /* sequence number for a particular direction of the connection. */ 1754 /* */ 1755 /* NOTE: This does not actually change the sequence numbers, only gets new */ 1756 /* one ready. */ 1757 /* ------------------------------------------------------------------------ */ 1758 static void fr_checknewisn(fin, is) 1759 fr_info_t *fin; 1760 ipstate_t *is; 1761 { 1762 u_32_t sumd, old, new; 1763 tcphdr_t *tcp; 1764 int i; 1765 1766 i = fin->fin_rev; 1767 tcp = fin->fin_dp; 1768 1769 if (((i == 0) && !(is->is_flags & IS_ISNSYN)) || 1770 ((i == 1) && !(is->is_flags & IS_ISNACK))) { 1771 old = ntohl(tcp->th_seq); 1772 new = fr_newisn(fin); 1773 is->is_isninc[i] = new - old; 1774 CALC_SUMD(old, new, sumd); 1775 is->is_sumd[i] = (sumd & 0xffff) + (sumd >> 16); 1776 1777 is->is_flags |= ((i == 0) ? IS_ISNSYN : IS_ISNACK); 1778 } 1779 } 1780 1781 1782 /* ------------------------------------------------------------------------ */ 1783 /* Function: fr_tcpinwindow */ 1784 /* Returns: int - 1 == packet inside TCP "window", 0 == not inside. */ 1785 /* Parameters: fin(I) - pointer to packet information */ 1786 /* fdata(I) - pointer to tcp state informatio (forward) */ 1787 /* tdata(I) - pointer to tcp state informatio (reverse) */ 1788 /* tcp(I) - pointer to TCP packet header */ 1789 /* */ 1790 /* Given a packet has matched addresses and ports, check to see if it is */ 1791 /* within the TCP data window. In a show of generosity, allow packets that */ 1792 /* are within the window space behind the current sequence # as well. */ 1793 /* ------------------------------------------------------------------------ */ 1794 int fr_tcpinwindow(fin, fdata, tdata, tcp, flags) 1795 fr_info_t *fin; 1796 tcpdata_t *fdata, *tdata; 1797 tcphdr_t *tcp; 1798 int flags; 1799 { 1800 tcp_seq seq, ack, end; 1801 int ackskew, tcpflags; 1802 u_32_t win, maxwin; 1803 int dsize, inseq; 1804 1805 /* 1806 * Find difference between last checked packet and this packet. 1807 */ 1808 tcpflags = tcp->th_flags; 1809 seq = ntohl(tcp->th_seq); 1810 ack = ntohl(tcp->th_ack); 1811 1812 if (tcpflags & TH_SYN) 1813 win = ntohs(tcp->th_win); 1814 else 1815 win = ntohs(tcp->th_win) << fdata->td_winscale; 1816 1817 /* 1818 * win 0 means the receiving endpoint has closed the window, because it 1819 * has not enough memory to receive data from sender. In such case we 1820 * are pretending window size to be 1 to let TCP probe data through. 1821 * TCP probe data can be either 0 or 1 octet of data, the RFC does not 1822 * state this accurately, so we have to allow 1 octet (win = 1) even if 1823 * the window is closed (win == 0). 1824 */ 1825 if (win == 0) 1826 win = 1; 1827 1828 dsize = fin->fin_dlen - (TCP_OFF(tcp) << 2) + 1829 ((tcpflags & TH_SYN) ? 1 : 0) + ((tcpflags & TH_FIN) ? 1 : 0); 1830 1831 /* 1832 * if window scaling is present, the scaling is only allowed 1833 * for windows not in the first SYN packet. In that packet the 1834 * window is 65535 to specify the largest window possible 1835 * for receivers not implementing the window scale option. 1836 * Currently, we do not assume TTCP here. That means that 1837 * if we see a second packet from a host (after the initial 1838 * SYN), we can assume that the receiver of the SYN did 1839 * already send back the SYN/ACK (and thus that we know if 1840 * the receiver also does window scaling) 1841 */ 1842 if (!(tcpflags & TH_SYN) && (fdata->td_winflags & TCP_WSCALE_FIRST)) { 1843 fdata->td_winflags &= ~TCP_WSCALE_FIRST; 1844 fdata->td_maxwin = win; 1845 } 1846 1847 end = seq + dsize; 1848 1849 if ((fdata->td_end == 0) && 1850 (!(flags & IS_TCPFSM) || 1851 ((tcpflags & TH_OPENING) == TH_OPENING))) { 1852 /* 1853 * Must be a (outgoing) SYN-ACK in reply to a SYN. 1854 */ 1855 fdata->td_end = end - 1; 1856 fdata->td_maxwin = 1; 1857 fdata->td_maxend = end + win; 1858 } 1859 1860 if (!(tcpflags & TH_ACK)) { /* Pretend an ack was sent */ 1861 ack = tdata->td_end; 1862 } else if (((tcpflags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) && 1863 (ack == 0)) { 1864 /* gross hack to get around certain broken tcp stacks */ 1865 ack = tdata->td_end; 1866 } 1867 1868 maxwin = tdata->td_maxwin; 1869 ackskew = tdata->td_end - ack; 1870 1871 /* 1872 * Strict sequencing only allows in-order delivery. 1873 */ 1874 if ((flags & IS_STRICT) != 0) { 1875 if (seq != fdata->td_end) { 1876 DTRACE_PROBE(strict_check); 1877 return 0; 1878 } 1879 } 1880 1881 #define SEQ_GE(a,b) ((int)((a) - (b)) >= 0) 1882 #define SEQ_GT(a,b) ((int)((a) - (b)) > 0) 1883 inseq = 0; 1884 DTRACE_PROBE4( 1885 dyn_params, 1886 int, dsize, 1887 int, ackskew, 1888 int, maxwin, 1889 int, win 1890 ); 1891 if ( 1892 #if defined(_KERNEL) 1893 /* 1894 * end <-> s + n 1895 * maxend <-> ack + win 1896 * this is upperbound check 1897 */ 1898 (SEQ_GE(fdata->td_maxend, end)) && 1899 /* 1900 * this is lowerbound check 1901 */ 1902 (SEQ_GE(seq, fdata->td_end - maxwin)) && 1903 #endif 1904 /* XXX what about big packets */ 1905 #define MAXACKWINDOW 66000 1906 (-ackskew <= (MAXACKWINDOW)) && 1907 ( ackskew <= (MAXACKWINDOW << fdata->td_winscale))) { 1908 inseq = 1; 1909 /* 1910 * Microsoft Windows will send the next packet to the right of the 1911 * window if SACK is in use. 1912 */ 1913 } else if ((seq == fdata->td_maxend) && (ackskew == 0) && 1914 (fdata->td_winflags & TCP_SACK_PERMIT) && 1915 (tdata->td_winflags & TCP_SACK_PERMIT)) { 1916 inseq = 1; 1917 /* 1918 * RST ACK with SEQ equal to 0 is sent by some OSes (i.e. Solaris) as a 1919 * response to initial SYN packet, when there is no application 1920 * listeing to on a port, where the SYN packet has came to. 1921 */ 1922 } else if ((seq == 0) && (tcpflags == (TH_RST|TH_ACK)) && 1923 (ackskew >= -1) && (ackskew <= 1)) { 1924 inseq = 1; 1925 } else if (!(flags & IS_TCPFSM)) { 1926 1927 if (!(fdata->td_winflags & 1928 (TCP_WSCALE_SEEN|TCP_WSCALE_FIRST))) { 1929 /* 1930 * No TCPFSM and no window scaling, so make some 1931 * extra guesses. 1932 */ 1933 if ((seq == fdata->td_maxend) && (ackskew == 0)) 1934 inseq = 1; 1935 else if (SEQ_GE(seq + maxwin, fdata->td_end - maxwin)) 1936 inseq = 1; 1937 } 1938 } 1939 1940 if (inseq) { 1941 /* if ackskew < 0 then this should be due to fragmented 1942 * packets. There is no way to know the length of the 1943 * total packet in advance. 1944 * We do know the total length from the fragment cache though. 1945 * Note however that there might be more sessions with 1946 * exactly the same source and destination parameters in the 1947 * state cache (and source and destination is the only stuff 1948 * that is saved in the fragment cache). Note further that 1949 * some TCP connections in the state cache are hashed with 1950 * sport and dport as well which makes it not worthwhile to 1951 * look for them. 1952 * Thus, when ackskew is negative but still seems to belong 1953 * to this session, we bump up the destinations end value. 1954 */ 1955 if (ackskew < 0) { 1956 DTRACE_PROBE2(end_update_td, 1957 int, tdata->td_end, 1958 int, ack 1959 ); 1960 tdata->td_end = ack; 1961 } 1962 1963 /* update max window seen */ 1964 if (fdata->td_maxwin < win) { 1965 DTRACE_PROBE2(win_update_fd, 1966 int, fdata->td_maxwin, 1967 int, win 1968 ); 1969 fdata->td_maxwin = win; 1970 } 1971 1972 if (SEQ_GT(end, fdata->td_end)) { 1973 DTRACE_PROBE2(end_update_fd, 1974 int, fdata->td_end, 1975 int, end 1976 ); 1977 fdata->td_end = end; 1978 } 1979 1980 if (SEQ_GE(ack + win, tdata->td_maxend)) { 1981 DTRACE_PROBE2(max_end_update_td, 1982 int, tdata->td_maxend, 1983 int, ack + win 1984 ); 1985 tdata->td_maxend = ack + win; 1986 } 1987 1988 return 1; 1989 } 1990 fin->fin_flx |= FI_OOW; 1991 1992 #if defined(_KERNEL) 1993 if (!(SEQ_GE(seq, fdata->td_end - maxwin))) 1994 fin->fin_flx |= FI_NEG_OOW; 1995 #endif 1996 1997 return 0; 1998 } 1999 2000 2001 /* ------------------------------------------------------------------------ */ 2002 /* Function: fr_stclone */ 2003 /* Returns: ipstate_t* - NULL == cloning failed, */ 2004 /* else pointer to new state structure */ 2005 /* Parameters: fin(I) - pointer to packet information */ 2006 /* tcp(I) - pointer to TCP/UDP header */ 2007 /* is(I) - pointer to master state structure */ 2008 /* */ 2009 /* Create a "duplcate" state table entry from the master. */ 2010 /* ------------------------------------------------------------------------ */ 2011 static ipstate_t *fr_stclone(fin, tcp, is) 2012 fr_info_t *fin; 2013 tcphdr_t *tcp; 2014 ipstate_t *is; 2015 { 2016 ipstate_t *clone; 2017 u_32_t send; 2018 ipf_stack_t *ifs = fin->fin_ifs; 2019 2020 /* 2021 * Trigger automatic call to fr_state_flush() if the 2022 * table has reached capacity specified by hi watermark. 2023 */ 2024 if (ST_TAB_WATER_LEVEL(ifs) > ifs->ifs_state_flush_level_hi) 2025 ifs->ifs_fr_state_doflush = 1; 2026 2027 /* 2028 * If automatic flushing did not do its job, and the table 2029 * has filled up, don't try to create a new entry. A NULL 2030 * return will indicate that the cloning has failed. 2031 */ 2032 if (ifs->ifs_ips_num >= ifs->ifs_fr_statemax) { 2033 ATOMIC_INCL(ifs->ifs_ips_stats.iss_max); 2034 return NULL; 2035 } 2036 2037 KMALLOC(clone, ipstate_t *); 2038 if (clone == NULL) 2039 return NULL; 2040 bcopy((char *)is, (char *)clone, sizeof(*clone)); 2041 2042 MUTEX_NUKE(&clone->is_lock); 2043 2044 clone->is_die = ONE_DAY + ifs->ifs_fr_ticks; 2045 clone->is_state[0] = 0; 2046 clone->is_state[1] = 0; 2047 send = ntohl(tcp->th_seq) + fin->fin_dlen - (TCP_OFF(tcp) << 2) + 2048 ((tcp->th_flags & TH_SYN) ? 1 : 0) + 2049 ((tcp->th_flags & TH_FIN) ? 1 : 0); 2050 2051 if (fin->fin_rev == 1) { 2052 clone->is_dend = send; 2053 clone->is_maxdend = send; 2054 clone->is_send = 0; 2055 clone->is_maxswin = 1; 2056 clone->is_maxdwin = ntohs(tcp->th_win); 2057 if (clone->is_maxdwin == 0) 2058 clone->is_maxdwin = 1; 2059 } else { 2060 clone->is_send = send; 2061 clone->is_maxsend = send; 2062 clone->is_dend = 0; 2063 clone->is_maxdwin = 1; 2064 clone->is_maxswin = ntohs(tcp->th_win); 2065 if (clone->is_maxswin == 0) 2066 clone->is_maxswin = 1; 2067 } 2068 2069 clone->is_flags &= ~SI_CLONE; 2070 clone->is_flags |= SI_CLONED; 2071 fr_stinsert(clone, fin->fin_rev, ifs); 2072 clone->is_ref = 1; 2073 if (clone->is_p == IPPROTO_TCP) { 2074 (void) fr_tcp_age(&clone->is_sti, fin, ifs->ifs_ips_tqtqb, 2075 clone->is_flags); 2076 } 2077 MUTEX_EXIT(&clone->is_lock); 2078 #ifdef IPFILTER_SCAN 2079 (void) ipsc_attachis(is); 2080 #endif 2081 #ifdef IPFILTER_SYNC 2082 if (is->is_flags & IS_STATESYNC) 2083 clone->is_sync = ipfsync_new(SMC_STATE, fin, clone); 2084 #endif 2085 return clone; 2086 } 2087 2088 2089 /* ------------------------------------------------------------------------ */ 2090 /* Function: fr_matchsrcdst */ 2091 /* Returns: Nil */ 2092 /* Parameters: fin(I) - pointer to packet information */ 2093 /* is(I) - pointer to state structure */ 2094 /* src(I) - pointer to source address */ 2095 /* dst(I) - pointer to destination address */ 2096 /* tcp(I) - pointer to TCP/UDP header */ 2097 /* */ 2098 /* Match a state table entry against an IP packet. The logic below is that */ 2099 /* ret gets set to one if the match succeeds, else remains 0. If it is */ 2100 /* still 0 after the test. no match. */ 2101 /* ------------------------------------------------------------------------ */ 2102 static ipstate_t *fr_matchsrcdst(fin, is, src, dst, tcp, cmask) 2103 fr_info_t *fin; 2104 ipstate_t *is; 2105 i6addr_t *src, *dst; 2106 tcphdr_t *tcp; 2107 u_32_t cmask; 2108 { 2109 int ret = 0, rev, out, flags, flx = 0, idx; 2110 u_short sp, dp; 2111 u_32_t cflx; 2112 void *ifp; 2113 ipf_stack_t *ifs = fin->fin_ifs; 2114 2115 rev = IP6_NEQ(&is->is_dst, dst); 2116 ifp = fin->fin_ifp; 2117 out = fin->fin_out; 2118 flags = is->is_flags; 2119 sp = 0; 2120 dp = 0; 2121 2122 if (tcp != NULL) { 2123 sp = htons(fin->fin_sport); 2124 dp = ntohs(fin->fin_dport); 2125 } 2126 if (!rev) { 2127 if (tcp != NULL) { 2128 if (!(flags & SI_W_SPORT) && (sp != is->is_sport)) 2129 rev = 1; 2130 else if (!(flags & SI_W_DPORT) && (dp != is->is_dport)) 2131 rev = 1; 2132 } 2133 } 2134 2135 idx = (out << 1) + rev; 2136 2137 /* 2138 * If the interface for this 'direction' is set, make sure it matches. 2139 * An interface name that is not set matches any, as does a name of *. 2140 */ 2141 if ((is->is_ifp[idx] == NULL && 2142 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) || 2143 is->is_ifp[idx] == ifp) 2144 ret = 1; 2145 2146 if (ret == 0) { 2147 DTRACE_PROBE(no_match_on_iface); 2148 return NULL; 2149 } 2150 ret = 0; 2151 2152 /* 2153 * Match addresses and ports. 2154 */ 2155 if (rev == 0) { 2156 if ((IP6_EQ(&is->is_dst, dst) || (flags & SI_W_DADDR)) && 2157 (IP6_EQ(&is->is_src, src) || (flags & SI_W_SADDR))) { 2158 if (tcp) { 2159 if ((sp == is->is_sport || flags & SI_W_SPORT)&& 2160 (dp == is->is_dport || flags & SI_W_DPORT)) 2161 ret = 1; 2162 } else { 2163 ret = 1; 2164 } 2165 } 2166 } else { 2167 if ((IP6_EQ(&is->is_dst, src) || (flags & SI_W_DADDR)) && 2168 (IP6_EQ(&is->is_src, dst) || (flags & SI_W_SADDR))) { 2169 if (tcp) { 2170 if ((dp == is->is_sport || flags & SI_W_SPORT)&& 2171 (sp == is->is_dport || flags & SI_W_DPORT)) 2172 ret = 1; 2173 } else { 2174 ret = 1; 2175 } 2176 } 2177 } 2178 2179 if (ret == 0) { 2180 DTRACE_PROBE(no_match_on_addrs); 2181 return NULL; 2182 } 2183 /* 2184 * Whether or not this should be here, is questionable, but the aim 2185 * is to get this out of the main line. 2186 */ 2187 if (tcp == NULL) 2188 flags = is->is_flags & ~(SI_WILDP|SI_NEWFR|SI_CLONE|SI_CLONED); 2189 2190 /* 2191 * Only one of the source or destination address can be flaged as a 2192 * wildcard. Fill in the missing address, if set. 2193 * For IPv6, if the address being copied in is multicast, then 2194 * don't reset the wild flag - multicast causes it to be set in the 2195 * first place! 2196 */ 2197 if ((flags & (SI_W_SADDR|SI_W_DADDR))) { 2198 fr_ip_t *fi = &fin->fin_fi; 2199 2200 if ((flags & SI_W_SADDR) != 0) { 2201 if (rev == 0) { 2202 #ifdef USE_INET6 2203 if (is->is_v == 6 && 2204 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2205 /*EMPTY*/; 2206 else 2207 #endif 2208 { 2209 is->is_src = fi->fi_src; 2210 is->is_flags &= ~SI_W_SADDR; 2211 } 2212 } else { 2213 #ifdef USE_INET6 2214 if (is->is_v == 6 && 2215 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2216 /*EMPTY*/; 2217 else 2218 #endif 2219 { 2220 is->is_src = fi->fi_dst; 2221 is->is_flags &= ~SI_W_SADDR; 2222 } 2223 } 2224 } else if ((flags & SI_W_DADDR) != 0) { 2225 if (rev == 0) { 2226 #ifdef USE_INET6 2227 if (is->is_v == 6 && 2228 IN6_IS_ADDR_MULTICAST(&fi->fi_dst.in6)) 2229 /*EMPTY*/; 2230 else 2231 #endif 2232 { 2233 is->is_dst = fi->fi_dst; 2234 is->is_flags &= ~SI_W_DADDR; 2235 } 2236 } else { 2237 #ifdef USE_INET6 2238 if (is->is_v == 6 && 2239 IN6_IS_ADDR_MULTICAST(&fi->fi_src.in6)) 2240 /*EMPTY*/; 2241 else 2242 #endif 2243 { 2244 is->is_dst = fi->fi_src; 2245 is->is_flags &= ~SI_W_DADDR; 2246 } 2247 } 2248 } 2249 if ((is->is_flags & (SI_WILDA|SI_WILDP)) == 0) { 2250 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2251 } 2252 } 2253 2254 flx = fin->fin_flx & cmask; 2255 cflx = is->is_flx[out][rev]; 2256 2257 /* 2258 * Match up any flags set from IP options. 2259 */ 2260 if ((cflx && (flx != (cflx & cmask))) || 2261 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]) || 2262 ((fin->fin_secmsk & is->is_secmsk) != is->is_sec) || 2263 ((fin->fin_auth & is->is_authmsk) != is->is_auth)) { 2264 DTRACE_PROBE4(no_match_on_flags, 2265 int, (cflx && (flx != (cflx & cmask))), 2266 int, 2267 ((fin->fin_optmsk & is->is_optmsk[rev]) != is->is_opt[rev]), 2268 int, ((fin->fin_secmsk & is->is_secmsk) != is->is_sec), 2269 int, ((fin->fin_auth & is->is_authmsk) != is->is_auth) 2270 ); 2271 return NULL; 2272 } 2273 /* 2274 * Only one of the source or destination port can be flagged as a 2275 * wildcard. When filling it in, fill in a copy of the matched entry 2276 * if it has the cloning flag set. 2277 */ 2278 if ((fin->fin_flx & FI_IGNORE) != 0) { 2279 fin->fin_rev = rev; 2280 return is; 2281 } 2282 2283 if ((flags & (SI_W_SPORT|SI_W_DPORT))) { 2284 if ((flags & SI_CLONE) != 0) { 2285 ipstate_t *clone; 2286 2287 clone = fr_stclone(fin, tcp, is); 2288 if (clone == NULL) 2289 return NULL; 2290 is = clone; 2291 } else { 2292 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 2293 } 2294 2295 if ((flags & SI_W_SPORT) != 0) { 2296 if (rev == 0) { 2297 is->is_sport = sp; 2298 is->is_send = ntohl(tcp->th_seq); 2299 } else { 2300 is->is_sport = dp; 2301 is->is_send = ntohl(tcp->th_ack); 2302 } 2303 is->is_maxsend = is->is_send + 1; 2304 } else if ((flags & SI_W_DPORT) != 0) { 2305 if (rev == 0) { 2306 is->is_dport = dp; 2307 is->is_dend = ntohl(tcp->th_ack); 2308 } else { 2309 is->is_dport = sp; 2310 is->is_dend = ntohl(tcp->th_seq); 2311 } 2312 is->is_maxdend = is->is_dend + 1; 2313 } 2314 is->is_flags &= ~(SI_W_SPORT|SI_W_DPORT); 2315 if ((flags & SI_CLONED) && ifs->ifs_ipstate_logging) 2316 ipstate_log(is, ISL_CLONE, ifs); 2317 } 2318 2319 ret = -1; 2320 2321 if (is->is_flx[out][rev] == 0) { 2322 is->is_flx[out][rev] = flx; 2323 /* 2324 * If we are dealing with the first packet coming in reverse 2325 * direction (sent by peer), then we have to set options into 2326 * state. 2327 */ 2328 if (rev == 1 && is->is_optmsk[1] == 0x0) { 2329 is->is_optmsk[1] = 0xffffffff; 2330 is->is_opt[1] = fin->fin_optmsk; 2331 DTRACE_PROBE(set_rev_opts); 2332 } 2333 if (is->is_v == 6) { 2334 is->is_opt[rev] &= ~0x8; 2335 is->is_optmsk[rev] &= ~0x8; 2336 } 2337 } 2338 2339 /* 2340 * Check if the interface name for this "direction" is set and if not, 2341 * fill it in. 2342 */ 2343 if (is->is_ifp[idx] == NULL && 2344 (*is->is_ifname[idx] == '\0' || *is->is_ifname[idx] == '*')) { 2345 is->is_ifp[idx] = ifp; 2346 COPYIFNAME(ifp, is->is_ifname[idx], fin->fin_v); 2347 } 2348 fin->fin_rev = rev; 2349 return is; 2350 } 2351 2352 2353 /* ------------------------------------------------------------------------ */ 2354 /* Function: fr_checkicmpmatchingstate */ 2355 /* Returns: Nil */ 2356 /* Parameters: fin(I) - pointer to packet information */ 2357 /* */ 2358 /* If we've got an ICMP error message, using the information stored in the */ 2359 /* ICMP packet, look for a matching state table entry. */ 2360 /* */ 2361 /* If we return NULL then no lock on ipf_state is held. */ 2362 /* If we return non-null then a read-lock on ipf_state is held. */ 2363 /* ------------------------------------------------------------------------ */ 2364 static ipstate_t *fr_checkicmpmatchingstate(fin) 2365 fr_info_t *fin; 2366 { 2367 ipstate_t *is, **isp; 2368 u_short sport, dport; 2369 u_char pr; 2370 int backward, i, oi; 2371 i6addr_t dst, src; 2372 struct icmp *ic; 2373 u_short savelen; 2374 icmphdr_t *icmp; 2375 fr_info_t ofin; 2376 tcphdr_t *tcp; 2377 int len; 2378 ip_t *oip; 2379 u_int hv; 2380 ipf_stack_t *ifs = fin->fin_ifs; 2381 2382 /* 2383 * Does it at least have the return (basic) IP header ? 2384 * Is it an actual recognised ICMP error type? 2385 * Only a basic IP header (no options) should be with 2386 * an ICMP error header. 2387 */ 2388 if ((fin->fin_v != 4) || (fin->fin_hlen != sizeof(ip_t)) || 2389 (fin->fin_plen < ICMPERR_MINPKTLEN) || 2390 !(fin->fin_flx & FI_ICMPERR)) 2391 return NULL; 2392 ic = fin->fin_dp; 2393 2394 oip = (ip_t *)((char *)ic + ICMPERR_ICMPHLEN); 2395 /* 2396 * Check if the at least the old IP header (with options) and 2397 * 8 bytes of payload is present. 2398 */ 2399 if (fin->fin_plen < ICMPERR_MAXPKTLEN + ((IP_HL(oip) - 5) << 2)) 2400 return NULL; 2401 2402 /* 2403 * Sanity Checks. 2404 */ 2405 len = fin->fin_dlen - ICMPERR_ICMPHLEN; 2406 if ((len <= 0) || ((IP_HL(oip) << 2) > len)) 2407 return NULL; 2408 2409 /* 2410 * Is the buffer big enough for all of it ? It's the size of the IP 2411 * header claimed in the encapsulated part which is of concern. It 2412 * may be too big to be in this buffer but not so big that it's 2413 * outside the ICMP packet, leading to TCP deref's causing problems. 2414 * This is possible because we don't know how big oip_hl is when we 2415 * do the pullup early in fr_check() and thus can't guarantee it is 2416 * all here now. 2417 */ 2418 #ifdef _KERNEL 2419 { 2420 mb_t *m; 2421 2422 m = fin->fin_m; 2423 # if defined(MENTAT) 2424 if ((char *)oip + len > (char *)m->b_wptr) 2425 return NULL; 2426 # else 2427 if ((char *)oip + len > (char *)fin->fin_ip + m->m_len) 2428 return NULL; 2429 # endif 2430 } 2431 #endif 2432 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 2433 2434 /* 2435 * in the IPv4 case we must zero the i6addr union otherwise 2436 * the IP6_EQ and IP6_NEQ macros produce the wrong results because 2437 * of the 'junk' in the unused part of the union 2438 */ 2439 bzero((char *)&src, sizeof(src)); 2440 bzero((char *)&dst, sizeof(dst)); 2441 2442 /* 2443 * we make an fin entry to be able to feed it to 2444 * matchsrcdst note that not all fields are encessary 2445 * but this is the cleanest way. Note further we fill 2446 * in fin_mp such that if someone uses it we'll get 2447 * a kernel panic. fr_matchsrcdst does not use this. 2448 * 2449 * watch out here, as ip is in host order and oip in network 2450 * order. Any change we make must be undone afterwards, like 2451 * oip->ip_off - it is still in network byte order so fix it. 2452 */ 2453 savelen = oip->ip_len; 2454 oip->ip_len = len; 2455 oip->ip_off = ntohs(oip->ip_off); 2456 2457 ofin.fin_flx = FI_NOCKSUM; 2458 ofin.fin_v = 4; 2459 ofin.fin_ip = oip; 2460 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 2461 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 2462 ofin.fin_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 2463 (void) fr_makefrip(IP_HL(oip) << 2, oip, &ofin); 2464 ofin.fin_ifp = fin->fin_ifp; 2465 ofin.fin_out = !fin->fin_out; 2466 /* 2467 * Reset the short and bad flag here because in fr_matchsrcdst() 2468 * the flags for the current packet (fin_flx) are compared against 2469 * those for the existing session. 2470 */ 2471 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 2472 2473 /* 2474 * Put old values of ip_len and ip_off back as we don't know 2475 * if we have to forward the packet (or process it again. 2476 */ 2477 oip->ip_len = savelen; 2478 oip->ip_off = htons(oip->ip_off); 2479 2480 switch (oip->ip_p) 2481 { 2482 case IPPROTO_ICMP : 2483 /* 2484 * an ICMP error can only be generated as a result of an 2485 * ICMP query, not as the response on an ICMP error 2486 * 2487 * XXX theoretically ICMP_ECHOREP and the other reply's are 2488 * ICMP query's as well, but adding them here seems strange XXX 2489 */ 2490 if ((ofin.fin_flx & FI_ICMPERR) != 0) 2491 return NULL; 2492 2493 /* 2494 * perform a lookup of the ICMP packet in the state table 2495 */ 2496 icmp = (icmphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2497 hv = (pr = oip->ip_p); 2498 src.in4 = oip->ip_src; 2499 hv += src.in4.s_addr; 2500 dst.in4 = oip->ip_dst; 2501 hv += dst.in4.s_addr; 2502 hv += icmp->icmp_id; 2503 hv = DOUBLE_HASH(hv, ifs); 2504 2505 READ_ENTER(&ifs->ifs_ipf_state); 2506 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2507 isp = &is->is_hnext; 2508 if ((is->is_p != pr) || (is->is_v != 4)) 2509 continue; 2510 if (is->is_pass & FR_NOICMPERR) 2511 continue; 2512 is = fr_matchsrcdst(&ofin, is, &src, &dst, 2513 NULL, FI_ICMPCMP); 2514 if (is != NULL) { 2515 if ((is->is_pass & FR_NOICMPERR) != 0) { 2516 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2517 return NULL; 2518 } 2519 /* 2520 * i : the index of this packet (the icmp 2521 * unreachable) 2522 * oi : the index of the original packet found 2523 * in the icmp header (i.e. the packet 2524 * causing this icmp) 2525 * backward : original packet was backward 2526 * compared to the state 2527 */ 2528 backward = IP6_NEQ(&is->is_src, &src); 2529 fin->fin_rev = !backward; 2530 i = (!backward << 1) + fin->fin_out; 2531 oi = (backward << 1) + ofin.fin_out; 2532 if (is->is_icmppkts[i] > is->is_pkts[oi]) 2533 continue; 2534 ifs->ifs_ips_stats.iss_hits++; 2535 is->is_icmppkts[i]++; 2536 return is; 2537 } 2538 } 2539 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2540 return NULL; 2541 case IPPROTO_TCP : 2542 case IPPROTO_UDP : 2543 break; 2544 default : 2545 return NULL; 2546 } 2547 2548 tcp = (tcphdr_t *)((char *)oip + (IP_HL(oip) << 2)); 2549 dport = tcp->th_dport; 2550 sport = tcp->th_sport; 2551 2552 hv = (pr = oip->ip_p); 2553 src.in4 = oip->ip_src; 2554 hv += src.in4.s_addr; 2555 dst.in4 = oip->ip_dst; 2556 hv += dst.in4.s_addr; 2557 hv += dport; 2558 hv += sport; 2559 hv = DOUBLE_HASH(hv, ifs); 2560 2561 READ_ENTER(&ifs->ifs_ipf_state); 2562 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2563 isp = &is->is_hnext; 2564 /* 2565 * Only allow this icmp though if the 2566 * encapsulated packet was allowed through the 2567 * other way around. Note that the minimal amount 2568 * of info present does not allow for checking against 2569 * tcp internals such as seq and ack numbers. Only the 2570 * ports are known to be present and can be even if the 2571 * short flag is set. 2572 */ 2573 if ((is->is_p == pr) && (is->is_v == 4) && 2574 (is = fr_matchsrcdst(&ofin, is, &src, &dst, 2575 tcp, FI_ICMPCMP))) { 2576 /* 2577 * i : the index of this packet (the icmp unreachable) 2578 * oi : the index of the original packet found in the 2579 * icmp header (i.e. the packet causing this icmp) 2580 * backward : original packet was backward compared to 2581 * the state 2582 */ 2583 backward = IP6_NEQ(&is->is_src, &src); 2584 fin->fin_rev = !backward; 2585 i = (!backward << 1) + fin->fin_out; 2586 oi = (backward << 1) + ofin.fin_out; 2587 2588 if (((is->is_pass & FR_NOICMPERR) != 0) || 2589 (is->is_icmppkts[i] > is->is_pkts[oi])) 2590 break; 2591 ifs->ifs_ips_stats.iss_hits++; 2592 is->is_icmppkts[i]++; 2593 /* 2594 * we deliberately do not touch the timeouts 2595 * for the accompanying state table entry. 2596 * It remains to be seen if that is correct. XXX 2597 */ 2598 return is; 2599 } 2600 } 2601 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2602 return NULL; 2603 } 2604 2605 2606 /* ------------------------------------------------------------------------ */ 2607 /* Function: fr_ipsmove */ 2608 /* Returns: Nil */ 2609 /* Parameters: is(I) - pointer to state table entry */ 2610 /* hv(I) - new hash value for state table entry */ 2611 /* Write Locks: ipf_state */ 2612 /* */ 2613 /* Move a state entry from one position in the hash table to another. */ 2614 /* ------------------------------------------------------------------------ */ 2615 static void fr_ipsmove(is, hv, ifs) 2616 ipstate_t *is; 2617 u_int hv; 2618 ipf_stack_t *ifs; 2619 { 2620 ipstate_t **isp; 2621 u_int hvm; 2622 2623 ASSERT(rw_read_locked(&ifs->ifs_ipf_state.ipf_lk) == 0); 2624 2625 hvm = is->is_hv; 2626 /* 2627 * Remove the hash from the old location... 2628 */ 2629 isp = is->is_phnext; 2630 if (is->is_hnext) 2631 is->is_hnext->is_phnext = isp; 2632 *isp = is->is_hnext; 2633 if (ifs->ifs_ips_table[hvm] == NULL) 2634 ifs->ifs_ips_stats.iss_inuse--; 2635 ifs->ifs_ips_stats.iss_bucketlen[hvm]--; 2636 2637 /* 2638 * ...and put the hash in the new one. 2639 */ 2640 hvm = DOUBLE_HASH(hv, ifs); 2641 is->is_hv = hvm; 2642 isp = &ifs->ifs_ips_table[hvm]; 2643 if (*isp) 2644 (*isp)->is_phnext = &is->is_hnext; 2645 else 2646 ifs->ifs_ips_stats.iss_inuse++; 2647 ifs->ifs_ips_stats.iss_bucketlen[hvm]++; 2648 is->is_phnext = isp; 2649 is->is_hnext = *isp; 2650 *isp = is; 2651 } 2652 2653 2654 /* ------------------------------------------------------------------------ */ 2655 /* Function: fr_stlookup */ 2656 /* Returns: ipstate_t* - NULL == no matching state found, */ 2657 /* else pointer to state information is returned */ 2658 /* Parameters: fin(I) - pointer to packet information */ 2659 /* tcp(I) - pointer to TCP/UDP header. */ 2660 /* */ 2661 /* Search the state table for a matching entry to the packet described by */ 2662 /* the contents of *fin. */ 2663 /* */ 2664 /* If we return NULL then no lock on ipf_state is held. */ 2665 /* If we return non-null then a read-lock on ipf_state is held. */ 2666 /* ------------------------------------------------------------------------ */ 2667 ipstate_t *fr_stlookup(fin, tcp, ifqp) 2668 fr_info_t *fin; 2669 tcphdr_t *tcp; 2670 ipftq_t **ifqp; 2671 { 2672 u_int hv, hvm, pr, v, tryagain; 2673 ipstate_t *is, **isp; 2674 u_short dport, sport; 2675 i6addr_t src, dst; 2676 struct icmp *ic; 2677 ipftq_t *ifq; 2678 int oow; 2679 ipf_stack_t *ifs = fin->fin_ifs; 2680 2681 is = NULL; 2682 ifq = NULL; 2683 tcp = fin->fin_dp; 2684 ic = (struct icmp *)tcp; 2685 hv = (pr = fin->fin_fi.fi_p); 2686 src = fin->fin_fi.fi_src; 2687 dst = fin->fin_fi.fi_dst; 2688 hv += src.in4.s_addr; 2689 hv += dst.in4.s_addr; 2690 2691 v = fin->fin_fi.fi_v; 2692 #ifdef USE_INET6 2693 if (v == 6) { 2694 hv += fin->fin_fi.fi_src.i6[1]; 2695 hv += fin->fin_fi.fi_src.i6[2]; 2696 hv += fin->fin_fi.fi_src.i6[3]; 2697 2698 if ((fin->fin_p == IPPROTO_ICMPV6) && 2699 IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_dst.in6)) { 2700 hv -= dst.in4.s_addr; 2701 } else { 2702 hv += fin->fin_fi.fi_dst.i6[1]; 2703 hv += fin->fin_fi.fi_dst.i6[2]; 2704 hv += fin->fin_fi.fi_dst.i6[3]; 2705 } 2706 } 2707 #endif 2708 if ((v == 4) && 2709 (fin->fin_flx & (FI_MULTICAST|FI_BROADCAST|FI_MBCAST))) { 2710 if (fin->fin_out == 0) { 2711 hv -= src.in4.s_addr; 2712 } else { 2713 hv -= dst.in4.s_addr; 2714 } 2715 } 2716 2717 /* 2718 * Search the hash table for matching packet header info. 2719 */ 2720 switch (pr) 2721 { 2722 #ifdef USE_INET6 2723 case IPPROTO_ICMPV6 : 2724 tryagain = 0; 2725 if (v == 6) { 2726 if ((ic->icmp_type == ICMP6_ECHO_REQUEST) || 2727 (ic->icmp_type == ICMP6_ECHO_REPLY)) { 2728 hv += ic->icmp_id; 2729 } 2730 } 2731 READ_ENTER(&ifs->ifs_ipf_state); 2732 icmp6again: 2733 hvm = DOUBLE_HASH(hv, ifs); 2734 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2735 isp = &is->is_hnext; 2736 if ((is->is_p != pr) || (is->is_v != v)) 2737 continue; 2738 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2739 if (is != NULL && 2740 fr_matchicmpqueryreply(v, &is->is_icmp, 2741 ic, fin->fin_rev)) { 2742 if (fin->fin_rev) 2743 ifq = &ifs->ifs_ips_icmpacktq; 2744 else 2745 ifq = &ifs->ifs_ips_icmptq; 2746 break; 2747 } 2748 } 2749 2750 if (is != NULL) { 2751 if ((tryagain != 0) && !(is->is_flags & SI_W_DADDR)) { 2752 hv += fin->fin_fi.fi_src.i6[0]; 2753 hv += fin->fin_fi.fi_src.i6[1]; 2754 hv += fin->fin_fi.fi_src.i6[2]; 2755 hv += fin->fin_fi.fi_src.i6[3]; 2756 fr_ipsmove(is, hv, ifs); 2757 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2758 } 2759 break; 2760 } 2761 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2762 2763 /* 2764 * No matching icmp state entry. Perhaps this is a 2765 * response to another state entry. 2766 * 2767 * XXX With some ICMP6 packets, the "other" address is already 2768 * in the packet, after the ICMP6 header, and this could be 2769 * used in place of the multicast address. However, taking 2770 * advantage of this requires some significant code changes 2771 * to handle the specific types where that is the case. 2772 */ 2773 if ((ifs->ifs_ips_stats.iss_wild != 0) && (v == 6) && (tryagain == 0) && 2774 !IN6_IS_ADDR_MULTICAST(&fin->fin_fi.fi_src.in6)) { 2775 hv -= fin->fin_fi.fi_src.i6[0]; 2776 hv -= fin->fin_fi.fi_src.i6[1]; 2777 hv -= fin->fin_fi.fi_src.i6[2]; 2778 hv -= fin->fin_fi.fi_src.i6[3]; 2779 tryagain = 1; 2780 WRITE_ENTER(&ifs->ifs_ipf_state); 2781 goto icmp6again; 2782 } 2783 2784 is = fr_checkicmp6matchingstate(fin); 2785 if (is != NULL) 2786 return is; 2787 break; 2788 #endif 2789 2790 case IPPROTO_ICMP : 2791 if (v == 4) { 2792 hv += ic->icmp_id; 2793 } 2794 hv = DOUBLE_HASH(hv, ifs); 2795 READ_ENTER(&ifs->ifs_ipf_state); 2796 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 2797 isp = &is->is_hnext; 2798 if ((is->is_p != pr) || (is->is_v != v)) 2799 continue; 2800 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2801 if (is != NULL && 2802 fr_matchicmpqueryreply(v, &is->is_icmp, 2803 ic, fin->fin_rev)) { 2804 if (fin->fin_rev) 2805 ifq = &ifs->ifs_ips_icmpacktq; 2806 else 2807 ifq = &ifs->ifs_ips_icmptq; 2808 break; 2809 } 2810 } 2811 if (is == NULL) { 2812 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2813 } 2814 break; 2815 2816 case IPPROTO_TCP : 2817 case IPPROTO_UDP : 2818 ifqp = NULL; 2819 sport = htons(fin->fin_data[0]); 2820 hv += sport; 2821 dport = htons(fin->fin_data[1]); 2822 hv += dport; 2823 oow = 0; 2824 tryagain = 0; 2825 READ_ENTER(&ifs->ifs_ipf_state); 2826 retry_tcpudp: 2827 hvm = DOUBLE_HASH(hv, ifs); 2828 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2829 isp = &is->is_hnext; 2830 if ((is->is_p != pr) || (is->is_v != v)) 2831 continue; 2832 fin->fin_flx &= ~FI_OOW; 2833 is = fr_matchsrcdst(fin, is, &src, &dst, tcp, FI_CMP); 2834 if (is != NULL) { 2835 if (pr == IPPROTO_TCP) { 2836 if (!fr_tcpstate(fin, tcp, is)) { 2837 oow |= fin->fin_flx & FI_OOW; 2838 continue; 2839 } 2840 } 2841 break; 2842 } 2843 } 2844 if (is != NULL) { 2845 if (tryagain && 2846 !(is->is_flags & (SI_CLONE|SI_WILDP|SI_WILDA))) { 2847 hv += dport; 2848 hv += sport; 2849 fr_ipsmove(is, hv, ifs); 2850 MUTEX_DOWNGRADE(&ifs->ifs_ipf_state); 2851 } 2852 break; 2853 } 2854 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2855 2856 if (ifs->ifs_ips_stats.iss_wild) { 2857 if (tryagain == 0) { 2858 hv -= dport; 2859 hv -= sport; 2860 } else if (tryagain == 1) { 2861 hv = fin->fin_fi.fi_p; 2862 /* 2863 * If we try to pretend this is a reply to a 2864 * multicast/broadcast packet then we need to 2865 * exclude part of the address from the hash 2866 * calculation. 2867 */ 2868 if (fin->fin_out == 0) { 2869 hv += src.in4.s_addr; 2870 } else { 2871 hv += dst.in4.s_addr; 2872 } 2873 hv += dport; 2874 hv += sport; 2875 } 2876 tryagain++; 2877 if (tryagain <= 2) { 2878 WRITE_ENTER(&ifs->ifs_ipf_state); 2879 goto retry_tcpudp; 2880 } 2881 } 2882 fin->fin_flx |= oow; 2883 break; 2884 2885 #if 0 2886 case IPPROTO_GRE : 2887 gre = fin->fin_dp; 2888 if (GRE_REV(gre->gr_flags) == 1) { 2889 hv += gre->gr_call; 2890 } 2891 /* FALLTHROUGH */ 2892 #endif 2893 default : 2894 ifqp = NULL; 2895 hvm = DOUBLE_HASH(hv, ifs); 2896 READ_ENTER(&ifs->ifs_ipf_state); 2897 for (isp = &ifs->ifs_ips_table[hvm]; ((is = *isp) != NULL); ) { 2898 isp = &is->is_hnext; 2899 if ((is->is_p != pr) || (is->is_v != v)) 2900 continue; 2901 is = fr_matchsrcdst(fin, is, &src, &dst, NULL, FI_CMP); 2902 if (is != NULL) { 2903 ifq = &ifs->ifs_ips_iptq; 2904 break; 2905 } 2906 } 2907 if (is == NULL) { 2908 RWLOCK_EXIT(&ifs->ifs_ipf_state); 2909 } 2910 break; 2911 } 2912 2913 if ((is != NULL) && ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) && 2914 (is->is_tqehead[fin->fin_rev] != NULL)) 2915 ifq = is->is_tqehead[fin->fin_rev]; 2916 if (ifq != NULL && ifqp != NULL) 2917 *ifqp = ifq; 2918 return is; 2919 } 2920 2921 2922 /* ------------------------------------------------------------------------ */ 2923 /* Function: fr_updatestate */ 2924 /* Returns: Nil */ 2925 /* Parameters: fin(I) - pointer to packet information */ 2926 /* is(I) - pointer to state table entry */ 2927 /* Read Locks: ipf_state */ 2928 /* */ 2929 /* Updates packet and byte counters for a newly received packet. Seeds the */ 2930 /* fragment cache with a new entry as required. */ 2931 /* ------------------------------------------------------------------------ */ 2932 void fr_updatestate(fin, is, ifq) 2933 fr_info_t *fin; 2934 ipstate_t *is; 2935 ipftq_t *ifq; 2936 { 2937 ipftqent_t *tqe; 2938 int i, pass; 2939 ipf_stack_t *ifs = fin->fin_ifs; 2940 2941 i = (fin->fin_rev << 1) + fin->fin_out; 2942 2943 /* 2944 * For TCP packets, ifq == NULL. For all others, check if this new 2945 * queue is different to the last one it was on and move it if so. 2946 */ 2947 tqe = &is->is_sti; 2948 MUTEX_ENTER(&is->is_lock); 2949 if ((tqe->tqe_flags & TQE_RULEBASED) != 0) 2950 ifq = is->is_tqehead[fin->fin_rev]; 2951 2952 if (ifq != NULL) 2953 fr_movequeue(tqe, tqe->tqe_ifq, ifq, ifs); 2954 2955 is->is_pkts[i]++; 2956 fin->fin_pktnum = is->is_pkts[i] + is->is_icmppkts[i]; 2957 is->is_bytes[i] += fin->fin_plen; 2958 MUTEX_EXIT(&is->is_lock); 2959 2960 #ifdef IPFILTER_SYNC 2961 if (is->is_flags & IS_STATESYNC) 2962 ipfsync_update(SMC_STATE, fin, is->is_sync); 2963 #endif 2964 2965 ATOMIC_INCL(ifs->ifs_ips_stats.iss_hits); 2966 2967 fin->fin_fr = is->is_rule; 2968 2969 /* 2970 * If this packet is a fragment and the rule says to track fragments, 2971 * then create a new fragment cache entry. 2972 */ 2973 pass = is->is_pass; 2974 if ((fin->fin_flx & FI_FRAG) && FR_ISPASS(pass)) 2975 (void) fr_newfrag(fin, pass ^ FR_KEEPSTATE); 2976 } 2977 2978 2979 /* ------------------------------------------------------------------------ */ 2980 /* Function: fr_checkstate */ 2981 /* Returns: frentry_t* - NULL == search failed, */ 2982 /* else pointer to rule for matching state */ 2983 /* Parameters: ifp(I) - pointer to interface */ 2984 /* passp(I) - pointer to filtering result flags */ 2985 /* */ 2986 /* Check if a packet is associated with an entry in the state table. */ 2987 /* ------------------------------------------------------------------------ */ 2988 frentry_t *fr_checkstate(fin, passp) 2989 fr_info_t *fin; 2990 u_32_t *passp; 2991 { 2992 ipstate_t *is; 2993 frentry_t *fr; 2994 tcphdr_t *tcp; 2995 ipftq_t *ifq; 2996 u_int pass; 2997 ipf_stack_t *ifs = fin->fin_ifs; 2998 2999 if (ifs->ifs_fr_state_lock || (ifs->ifs_ips_list == NULL) || 3000 (fin->fin_flx & (FI_SHORT|FI_STATE|FI_FRAGBODY|FI_BAD))) 3001 return NULL; 3002 3003 is = NULL; 3004 if ((fin->fin_flx & FI_TCPUDP) || 3005 (fin->fin_fi.fi_p == IPPROTO_ICMP) 3006 #ifdef USE_INET6 3007 || (fin->fin_fi.fi_p == IPPROTO_ICMPV6) 3008 #endif 3009 ) 3010 tcp = fin->fin_dp; 3011 else 3012 tcp = NULL; 3013 3014 /* 3015 * Search the hash table for matching packet header info. 3016 */ 3017 ifq = NULL; 3018 is = fr_stlookup(fin, tcp, &ifq); 3019 switch (fin->fin_p) 3020 { 3021 #ifdef USE_INET6 3022 case IPPROTO_ICMPV6 : 3023 if (is != NULL) 3024 break; 3025 if (fin->fin_v == 6) { 3026 is = fr_checkicmp6matchingstate(fin); 3027 if (is != NULL) 3028 goto matched; 3029 } 3030 break; 3031 #endif 3032 case IPPROTO_ICMP : 3033 if (is != NULL) 3034 break; 3035 /* 3036 * No matching icmp state entry. Perhaps this is a 3037 * response to another state entry. 3038 */ 3039 is = fr_checkicmpmatchingstate(fin); 3040 if (is != NULL) 3041 goto matched; 3042 break; 3043 case IPPROTO_TCP : 3044 if (is == NULL) 3045 break; 3046 3047 if (is->is_pass & FR_NEWISN) { 3048 if (fin->fin_out == 0) 3049 fr_fixinisn(fin, is); 3050 else if (fin->fin_out == 1) 3051 fr_fixoutisn(fin, is); 3052 } 3053 break; 3054 default : 3055 if (fin->fin_rev) 3056 ifq = &ifs->ifs_ips_udpacktq; 3057 else 3058 ifq = &ifs->ifs_ips_udptq; 3059 break; 3060 } 3061 if (is == NULL) { 3062 ATOMIC_INCL(ifs->ifs_ips_stats.iss_miss); 3063 return NULL; 3064 } 3065 3066 matched: 3067 fr = is->is_rule; 3068 if (fr != NULL) { 3069 if ((fin->fin_out == 0) && (fr->fr_nattag.ipt_num[0] != 0)) { 3070 if (fin->fin_nattag == NULL) { 3071 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3072 return NULL; 3073 } 3074 if (fr_matchtag(&fr->fr_nattag, fin->fin_nattag) != 0) { 3075 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3076 return NULL; 3077 } 3078 } 3079 (void) strncpy(fin->fin_group, fr->fr_group, FR_GROUPLEN); 3080 fin->fin_icode = fr->fr_icode; 3081 } 3082 3083 fin->fin_rule = is->is_rulen; 3084 pass = is->is_pass; 3085 fr_updatestate(fin, is, ifq); 3086 3087 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3088 fin->fin_flx |= FI_STATE; 3089 if ((pass & FR_LOGFIRST) != 0) 3090 pass &= ~(FR_LOGFIRST|FR_LOG); 3091 *passp = pass; 3092 return fr; 3093 } 3094 3095 3096 /* ------------------------------------------------------------------------ */ 3097 /* Function: fr_fixoutisn */ 3098 /* Returns: Nil */ 3099 /* Parameters: fin(I) - pointer to packet information */ 3100 /* is(I) - pointer to master state structure */ 3101 /* */ 3102 /* Called only for outbound packets, adjusts the sequence number and the */ 3103 /* TCP checksum to match that change. */ 3104 /* ------------------------------------------------------------------------ */ 3105 static void fr_fixoutisn(fin, is) 3106 fr_info_t *fin; 3107 ipstate_t *is; 3108 { 3109 tcphdr_t *tcp; 3110 int rev; 3111 u_32_t seq; 3112 3113 tcp = fin->fin_dp; 3114 rev = fin->fin_rev; 3115 if ((is->is_flags & IS_ISNSYN) != 0) { 3116 if (rev == 0) { 3117 seq = ntohl(tcp->th_seq); 3118 seq += is->is_isninc[0]; 3119 tcp->th_seq = htonl(seq); 3120 fix_outcksum(&tcp->th_sum, is->is_sumd[0]); 3121 } 3122 } 3123 if ((is->is_flags & IS_ISNACK) != 0) { 3124 if (rev == 1) { 3125 seq = ntohl(tcp->th_seq); 3126 seq += is->is_isninc[1]; 3127 tcp->th_seq = htonl(seq); 3128 fix_outcksum(&tcp->th_sum, is->is_sumd[1]); 3129 } 3130 } 3131 } 3132 3133 3134 /* ------------------------------------------------------------------------ */ 3135 /* Function: fr_fixinisn */ 3136 /* Returns: Nil */ 3137 /* Parameters: fin(I) - pointer to packet information */ 3138 /* is(I) - pointer to master state structure */ 3139 /* */ 3140 /* Called only for inbound packets, adjusts the acknowledge number and the */ 3141 /* TCP checksum to match that change. */ 3142 /* ------------------------------------------------------------------------ */ 3143 static void fr_fixinisn(fin, is) 3144 fr_info_t *fin; 3145 ipstate_t *is; 3146 { 3147 tcphdr_t *tcp; 3148 int rev; 3149 u_32_t ack; 3150 3151 tcp = fin->fin_dp; 3152 rev = fin->fin_rev; 3153 if ((is->is_flags & IS_ISNSYN) != 0) { 3154 if (rev == 1) { 3155 ack = ntohl(tcp->th_ack); 3156 ack -= is->is_isninc[0]; 3157 tcp->th_ack = htonl(ack); 3158 fix_incksum(&tcp->th_sum, is->is_sumd[0]); 3159 } 3160 } 3161 if ((is->is_flags & IS_ISNACK) != 0) { 3162 if (rev == 0) { 3163 ack = ntohl(tcp->th_ack); 3164 ack -= is->is_isninc[1]; 3165 tcp->th_ack = htonl(ack); 3166 fix_incksum(&tcp->th_sum, is->is_sumd[1]); 3167 } 3168 } 3169 } 3170 3171 3172 /* ------------------------------------------------------------------------ */ 3173 /* Function: fr_statesync */ 3174 /* Returns: Nil */ 3175 /* Parameters: action(I) - type of synchronisation to do */ 3176 /* v(I) - IP version being sync'd (v4 or v6) */ 3177 /* ifp(I) - interface identifier associated with action */ 3178 /* name(I) - name associated with ifp parameter */ 3179 /* */ 3180 /* Walk through all state entries and if an interface pointer match is */ 3181 /* found then look it up again, based on its name in case the pointer has */ 3182 /* changed since last time. */ 3183 /* */ 3184 /* If ifp is passed in as being non-null then we are only doing updates for */ 3185 /* existing, matching, uses of it. */ 3186 /* ------------------------------------------------------------------------ */ 3187 void fr_statesync(action, v, ifp, name, ifs) 3188 int action, v; 3189 void *ifp; 3190 char *name; 3191 ipf_stack_t *ifs; 3192 { 3193 ipstate_t *is; 3194 int i; 3195 3196 if (ifs->ifs_fr_running <= 0) 3197 return; 3198 3199 WRITE_ENTER(&ifs->ifs_ipf_state); 3200 3201 if (ifs->ifs_fr_running <= 0) { 3202 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3203 return; 3204 } 3205 3206 switch (action) 3207 { 3208 case IPFSYNC_RESYNC : 3209 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3210 if (v != 0 && is->is_v != v) 3211 continue; 3212 /* 3213 * Look up all the interface names in the state entry. 3214 */ 3215 for (i = 0; i < 4; i++) { 3216 is->is_ifp[i] = fr_resolvenic(is->is_ifname[i], 3217 is->is_v, ifs); 3218 } 3219 } 3220 break; 3221 case IPFSYNC_NEWIFP : 3222 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3223 if (v != 0 && is->is_v != v) 3224 continue; 3225 /* 3226 * Look up all the interface names in the state entry. 3227 */ 3228 for (i = 0; i < 4; i++) { 3229 if (!strncmp(is->is_ifname[i], name, 3230 sizeof(is->is_ifname[i]))) 3231 is->is_ifp[i] = ifp; 3232 } 3233 } 3234 break; 3235 case IPFSYNC_OLDIFP : 3236 for (is = ifs->ifs_ips_list; is; is = is->is_next) { 3237 if (v != 0 && is->is_v != v) 3238 continue; 3239 /* 3240 * Look up all the interface names in the state entry. 3241 */ 3242 for (i = 0; i < 4; i++) { 3243 if (is->is_ifp[i] == ifp) 3244 is->is_ifp[i] = (void *)-1; 3245 } 3246 } 3247 break; 3248 } 3249 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3250 } 3251 3252 3253 #if SOLARIS2 >= 10 3254 /* ------------------------------------------------------------------------ */ 3255 /* Function: fr_stateifindexsync */ 3256 /* Returns: void */ 3257 /* Parameters: ifp - current network interface descriptor (ifindex) */ 3258 /* newifp - new interface descriptor (new ifindex) */ 3259 /* ifs - pointer to IPF stack */ 3260 /* */ 3261 /* Write Locks: assumes ipf_mutex is locked */ 3262 /* */ 3263 /* Updates all interface indeces matching to ifp with new interface index */ 3264 /* value. */ 3265 /* ------------------------------------------------------------------------ */ 3266 void fr_stateifindexsync(ifp, newifp, ifs) 3267 void *ifp; 3268 void *newifp; 3269 ipf_stack_t *ifs; 3270 { 3271 ipstate_t *is; 3272 int i; 3273 3274 WRITE_ENTER(&ifs->ifs_ipf_state); 3275 3276 for (is = ifs->ifs_ips_list; is != NULL; is = is->is_next) { 3277 3278 for (i = 0; i < 4; i++) { 3279 if (is->is_ifp[i] == ifp) 3280 is->is_ifp[i] = newifp; 3281 } 3282 } 3283 3284 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3285 } 3286 #endif 3287 3288 /* ------------------------------------------------------------------------ */ 3289 /* Function: fr_delstate */ 3290 /* Returns: int - 0 = entry deleted, else ref count on entry */ 3291 /* Parameters: is(I) - pointer to state structure to delete */ 3292 /* why(I) - if not 0, log reason why it was deleted */ 3293 /* ifs - ipf stack instance */ 3294 /* Write Locks: ipf_state/ipf_global */ 3295 /* */ 3296 /* Deletes a state entry from the enumerated list as well as the hash table */ 3297 /* and timeout queue lists. Make adjustments to hash table statistics and */ 3298 /* global counters as required. */ 3299 /* ------------------------------------------------------------------------ */ 3300 int fr_delstate(is, why, ifs) 3301 ipstate_t *is; 3302 int why; 3303 ipf_stack_t *ifs; 3304 { 3305 int removed = 0; 3306 3307 ASSERT(rw_write_held(&ifs->ifs_ipf_global.ipf_lk) == 0 || 3308 rw_write_held(&ifs->ifs_ipf_state.ipf_lk) == 0); 3309 3310 /* 3311 * Start by removing the entry from the hash table of state entries 3312 * so it will not be "used" again. 3313 * 3314 * It will remain in the "list" of state entries until all references 3315 * have been accounted for. 3316 */ 3317 if (is->is_phnext != NULL) { 3318 removed = 1; 3319 *is->is_phnext = is->is_hnext; 3320 if (is->is_hnext != NULL) 3321 is->is_hnext->is_phnext = is->is_phnext; 3322 if (ifs->ifs_ips_table[is->is_hv] == NULL) 3323 ifs->ifs_ips_stats.iss_inuse--; 3324 ifs->ifs_ips_stats.iss_bucketlen[is->is_hv]--; 3325 3326 is->is_phnext = NULL; 3327 is->is_hnext = NULL; 3328 } 3329 3330 /* 3331 * Because ifs->ifs_ips_stats.iss_wild is a count of entries in the state 3332 * table that have wildcard flags set, only decerement it once 3333 * and do it here. 3334 */ 3335 if (is->is_flags & (SI_WILDP|SI_WILDA)) { 3336 if (!(is->is_flags & SI_CLONED)) { 3337 ATOMIC_DECL(ifs->ifs_ips_stats.iss_wild); 3338 } 3339 is->is_flags &= ~(SI_WILDP|SI_WILDA); 3340 } 3341 3342 /* 3343 * Next, remove it from the timeout queue it is in. 3344 */ 3345 fr_deletequeueentry(&is->is_sti); 3346 3347 is->is_me = NULL; 3348 3349 /* 3350 * If it is still in use by something else, do not go any further, 3351 * but note that at this point it is now an orphan. 3352 */ 3353 MUTEX_ENTER(&is->is_lock); 3354 if (is->is_ref > 1) { 3355 is->is_ref--; 3356 MUTEX_EXIT(&is->is_lock); 3357 if (removed) 3358 ifs->ifs_ips_stats.iss_orphans++; 3359 return (is->is_ref); 3360 } 3361 MUTEX_EXIT(&is->is_lock); 3362 3363 is->is_ref = 0; 3364 3365 /* 3366 * If entry has already been removed from table, 3367 * it means we're simply cleaning up an orphan. 3368 */ 3369 if (!removed) 3370 ifs->ifs_ips_stats.iss_orphans--; 3371 3372 if (is->is_tqehead[0] != NULL) 3373 (void) fr_deletetimeoutqueue(is->is_tqehead[0]); 3374 3375 if (is->is_tqehead[1] != NULL) 3376 (void) fr_deletetimeoutqueue(is->is_tqehead[1]); 3377 3378 #ifdef IPFILTER_SYNC 3379 if (is->is_sync) 3380 ipfsync_del(is->is_sync); 3381 #endif 3382 #ifdef IPFILTER_SCAN 3383 (void) ipsc_detachis(is); 3384 #endif 3385 3386 /* 3387 * Now remove it from master list of state table entries. 3388 */ 3389 if (is->is_pnext != NULL) { 3390 *is->is_pnext = is->is_next; 3391 if (is->is_next != NULL) { 3392 is->is_next->is_pnext = is->is_pnext; 3393 is->is_next = NULL; 3394 } 3395 is->is_pnext = NULL; 3396 } 3397 3398 if (ifs->ifs_ipstate_logging != 0 && why != 0) 3399 ipstate_log(is, why, ifs); 3400 3401 if (is->is_rule != NULL) { 3402 is->is_rule->fr_statecnt--; 3403 (void)fr_derefrule(&is->is_rule, ifs); 3404 } 3405 3406 MUTEX_DESTROY(&is->is_lock); 3407 KFREE(is); 3408 ifs->ifs_ips_num--; 3409 3410 return (0); 3411 } 3412 3413 3414 /* ------------------------------------------------------------------------ */ 3415 /* Function: fr_timeoutstate */ 3416 /* Returns: Nil */ 3417 /* Parameters: ifs - ipf stack instance */ 3418 /* */ 3419 /* Slowly expire held state for thingslike UDP and ICMP. The algorithm */ 3420 /* used here is to keep the queue sorted with the oldest things at the top */ 3421 /* and the youngest at the bottom. So if the top one doesn't need to be */ 3422 /* expired then neither will any under it. */ 3423 /* ------------------------------------------------------------------------ */ 3424 void fr_timeoutstate(ifs) 3425 ipf_stack_t *ifs; 3426 { 3427 ipftq_t *ifq, *ifqnext; 3428 ipftqent_t *tqe, *tqn; 3429 ipstate_t *is; 3430 SPL_INT(s); 3431 3432 SPL_NET(s); 3433 WRITE_ENTER(&ifs->ifs_ipf_state); 3434 for (ifq = ifs->ifs_ips_tqtqb; ifq != NULL; ifq = ifq->ifq_next) 3435 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3436 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3437 break; 3438 tqn = tqe->tqe_next; 3439 is = tqe->tqe_parent; 3440 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3441 } 3442 3443 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifq->ifq_next) { 3444 for (tqn = ifq->ifq_head; ((tqe = tqn) != NULL); ) { 3445 if (tqe->tqe_die > ifs->ifs_fr_ticks) 3446 break; 3447 tqn = tqe->tqe_next; 3448 is = tqe->tqe_parent; 3449 (void) fr_delstate(is, ISL_EXPIRE, ifs); 3450 } 3451 } 3452 3453 for (ifq = ifs->ifs_ips_utqe; ifq != NULL; ifq = ifqnext) { 3454 ifqnext = ifq->ifq_next; 3455 3456 if (((ifq->ifq_flags & IFQF_DELETE) != 0) && 3457 (ifq->ifq_ref == 0)) { 3458 fr_freetimeoutqueue(ifq, ifs); 3459 } 3460 } 3461 3462 if (ifs->ifs_fr_state_doflush) { 3463 (void) fr_state_flush(FLUSH_TABLE_EXTRA, 0, ifs); 3464 ifs->ifs_fr_state_doflush = 0; 3465 } 3466 RWLOCK_EXIT(&ifs->ifs_ipf_state); 3467 SPL_X(s); 3468 } 3469 3470 3471 /* ---------------------------------------------------------------------- */ 3472 /* Function: fr_state_flush */ 3473 /* Returns: int - 0 == success, -1 == failure */ 3474 /* Parameters: flush_option - how to flush the active State table */ 3475 /* proto - IP version to flush (4, 6, or both) */ 3476 /* ifs - ipf stack instance */ 3477 /* Write Locks: ipf_state */ 3478 /* */ 3479 /* Flush state tables. Three possible flush options currently defined: */ 3480 /* */ 3481 /* FLUSH_TABLE_ALL : Flush all state table entries */ 3482 /* */ 3483 /* FLUSH_TABLE_CLOSING : Flush entries with TCP connections which */ 3484 /* have started to close on both ends using */ 3485 /* ipf_flushclosing(). */ 3486 /* */ 3487 /* FLUSH_TABLE_EXTRA : First, flush entries which are "almost" closed. */ 3488 /* Then, if needed, flush entries with TCP */ 3489 /* connections which have been idle for a long */ 3490 /* time with ipf_extraflush(). */ 3491 /* ---------------------------------------------------------------------- */ 3492 static int fr_state_flush(flush_option, proto, ifs) 3493 int flush_option, proto; 3494 ipf_stack_t *ifs; 3495 { 3496 ipstate_t *is, *isn; 3497 int removed; 3498 SPL_INT(s); 3499 3500 removed = 0; 3501 3502 SPL_NET(s); 3503 switch (flush_option) 3504 { 3505 case FLUSH_TABLE_ALL: 3506 isn = ifs->ifs_ips_list; 3507 while ((is = isn) != NULL) { 3508 isn = is->is_next; 3509 if ((proto != 0) && (is->is_v != proto)) 3510 continue; 3511 if (fr_delstate(is, ISL_FLUSH, ifs) == 0) 3512 removed++; 3513 } 3514 break; 3515 3516 case FLUSH_TABLE_CLOSING: 3517 removed = ipf_flushclosing(STATE_FLUSH, 3518 IPF_TCPS_CLOSE_WAIT, 3519 ifs->ifs_ips_tqtqb, 3520 ifs->ifs_ips_utqe, 3521 ifs); 3522 break; 3523 3524 case FLUSH_TABLE_EXTRA: 3525 removed = ipf_flushclosing(STATE_FLUSH, 3526 IPF_TCPS_FIN_WAIT_2, 3527 ifs->ifs_ips_tqtqb, 3528 ifs->ifs_ips_utqe, 3529 ifs); 3530 3531 /* 3532 * Be sure we haven't done this in the last 10 seconds. 3533 */ 3534 if (ifs->ifs_fr_ticks - ifs->ifs_ips_last_force_flush < 3535 IPF_TTLVAL(10)) 3536 break; 3537 ifs->ifs_ips_last_force_flush = ifs->ifs_fr_ticks; 3538 removed += ipf_extraflush(STATE_FLUSH, 3539 &ifs->ifs_ips_tqtqb[IPF_TCPS_ESTABLISHED], 3540 ifs->ifs_ips_utqe, 3541 ifs); 3542 break; 3543 3544 default: /* Flush Nothing */ 3545 break; 3546 } 3547 3548 SPL_X(s); 3549 return (removed); 3550 } 3551 3552 3553 /* ------------------------------------------------------------------------ */ 3554 /* Function: fr_tcp_age */ 3555 /* Returns: int - 1 == state transition made, 0 == no change (rejected) */ 3556 /* Parameters: tq(I) - pointer to timeout queue information */ 3557 /* fin(I) - pointer to packet information */ 3558 /* tqtab(I) - TCP timeout queue table this is in */ 3559 /* flags(I) - flags from state/NAT entry */ 3560 /* */ 3561 /* Rewritten by Arjan de Vet <Arjan.deVet@adv.iae.nl>, 2000-07-29: */ 3562 /* */ 3563 /* - (try to) base state transitions on real evidence only, */ 3564 /* i.e. packets that are sent and have been received by ipfilter; */ 3565 /* diagram 18.12 of TCP/IP volume 1 by W. Richard Stevens was used. */ 3566 /* */ 3567 /* - deal with half-closed connections correctly; */ 3568 /* */ 3569 /* - store the state of the source in state[0] such that ipfstat */ 3570 /* displays the state as source/dest instead of dest/source; the calls */ 3571 /* to fr_tcp_age have been changed accordingly. */ 3572 /* */ 3573 /* Internal Parameters: */ 3574 /* */ 3575 /* state[0] = state of source (host that initiated connection) */ 3576 /* state[1] = state of dest (host that accepted the connection) */ 3577 /* */ 3578 /* dir == 0 : a packet from source to dest */ 3579 /* dir == 1 : a packet from dest to source */ 3580 /* */ 3581 /* Locking: it is assumed that the parent of the tqe structure is locked. */ 3582 /* ------------------------------------------------------------------------ */ 3583 int fr_tcp_age(tqe, fin, tqtab, flags) 3584 ipftqent_t *tqe; 3585 fr_info_t *fin; 3586 ipftq_t *tqtab; 3587 int flags; 3588 { 3589 int dlen, ostate, nstate, rval, dir; 3590 u_char tcpflags; 3591 tcphdr_t *tcp; 3592 ipf_stack_t *ifs = fin->fin_ifs; 3593 3594 tcp = fin->fin_dp; 3595 3596 rval = 0; 3597 dir = fin->fin_rev; 3598 tcpflags = tcp->th_flags; 3599 dlen = fin->fin_dlen - (TCP_OFF(tcp) << 2); 3600 3601 ostate = tqe->tqe_state[1 - dir]; 3602 nstate = tqe->tqe_state[dir]; 3603 3604 DTRACE_PROBE4( 3605 indata, 3606 fr_info_t *, fin, 3607 int, ostate, 3608 int, nstate, 3609 u_char, tcpflags 3610 ); 3611 3612 if (tcpflags & TH_RST) { 3613 if (!(tcpflags & TH_PUSH) && !dlen) 3614 nstate = IPF_TCPS_CLOSED; 3615 else 3616 nstate = IPF_TCPS_CLOSE_WAIT; 3617 3618 /* 3619 * Once RST is received, we must advance peer's state to 3620 * CLOSE_WAIT. 3621 */ 3622 if (ostate <= IPF_TCPS_ESTABLISHED) { 3623 tqe->tqe_state[1 - dir] = IPF_TCPS_CLOSE_WAIT; 3624 } 3625 rval = 1; 3626 } else { 3627 3628 switch (nstate) 3629 { 3630 case IPF_TCPS_LISTEN: /* 0 */ 3631 if ((tcpflags & TH_OPENING) == TH_OPENING) { 3632 /* 3633 * 'dir' received an S and sends SA in 3634 * response, CLOSED -> SYN_RECEIVED 3635 */ 3636 nstate = IPF_TCPS_SYN_RECEIVED; 3637 rval = 1; 3638 } else if ((tcpflags & TH_OPENING) == TH_SYN) { 3639 /* 'dir' sent S, CLOSED -> SYN_SENT */ 3640 nstate = IPF_TCPS_SYN_SENT; 3641 rval = 1; 3642 } 3643 /* 3644 * the next piece of code makes it possible to get 3645 * already established connections into the state table 3646 * after a restart or reload of the filter rules; this 3647 * does not work when a strict 'flags S keep state' is 3648 * used for tcp connections of course 3649 */ 3650 if (((flags & IS_TCPFSM) == 0) && 3651 ((tcpflags & TH_ACKMASK) == TH_ACK)) { 3652 /* 3653 * we saw an A, guess 'dir' is in ESTABLISHED 3654 * mode 3655 */ 3656 switch (ostate) 3657 { 3658 case IPF_TCPS_LISTEN : 3659 case IPF_TCPS_SYN_RECEIVED : 3660 nstate = IPF_TCPS_HALF_ESTAB; 3661 rval = 1; 3662 break; 3663 case IPF_TCPS_HALF_ESTAB : 3664 case IPF_TCPS_ESTABLISHED : 3665 nstate = IPF_TCPS_ESTABLISHED; 3666 rval = 1; 3667 break; 3668 default : 3669 break; 3670 } 3671 } 3672 /* 3673 * TODO: besides regular ACK packets we can have other 3674 * packets as well; it is yet to be determined how we 3675 * should initialize the states in those cases 3676 */ 3677 break; 3678 3679 case IPF_TCPS_SYN_SENT: /* 1 */ 3680 if ((tcpflags & ~(TH_ECN|TH_CWR)) == TH_SYN) { 3681 /* 3682 * A retransmitted SYN packet. We do not reset 3683 * the timeout here to fr_tcptimeout because a 3684 * connection connect timeout does not renew 3685 * after every packet that is sent. We need to 3686 * set rval so as to indicate the packet has 3687 * passed the check for its flags being valid 3688 * in the TCP FSM. Setting rval to 2 has the 3689 * result of not resetting the timeout. 3690 */ 3691 rval = 2; 3692 } else if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == 3693 TH_ACK) { 3694 /* 3695 * we see an A from 'dir' which is in SYN_SENT 3696 * state: 'dir' sent an A in response to an SA 3697 * which it received, SYN_SENT -> ESTABLISHED 3698 */ 3699 nstate = IPF_TCPS_ESTABLISHED; 3700 rval = 1; 3701 } else if (tcpflags & TH_FIN) { 3702 /* 3703 * we see an F from 'dir' which is in SYN_SENT 3704 * state and wants to close its side of the 3705 * connection; SYN_SENT -> FIN_WAIT_1 3706 */ 3707 nstate = IPF_TCPS_FIN_WAIT_1; 3708 rval = 1; 3709 } else if ((tcpflags & TH_OPENING) == TH_OPENING) { 3710 /* 3711 * we see an SA from 'dir' which is already in 3712 * SYN_SENT state, this means we have a 3713 * simultaneous open; SYN_SENT -> SYN_RECEIVED 3714 */ 3715 nstate = IPF_TCPS_SYN_RECEIVED; 3716 rval = 1; 3717 } 3718 break; 3719 3720 case IPF_TCPS_SYN_RECEIVED: /* 2 */ 3721 if ((tcpflags & (TH_SYN|TH_FIN|TH_ACK)) == TH_ACK) { 3722 /* 3723 * we see an A from 'dir' which was in 3724 * SYN_RECEIVED state so it must now be in 3725 * established state, SYN_RECEIVED -> 3726 * ESTABLISHED 3727 */ 3728 nstate = IPF_TCPS_ESTABLISHED; 3729 rval = 1; 3730 } else if ((tcpflags & ~(TH_ECN|TH_CWR)) == 3731 TH_OPENING) { 3732 /* 3733 * We see an SA from 'dir' which is already in 3734 * SYN_RECEIVED state. 3735 */ 3736 rval = 2; 3737 } else if (tcpflags & TH_FIN) { 3738 /* 3739 * we see an F from 'dir' which is in 3740 * SYN_RECEIVED state and wants to close its 3741 * side of the connection; SYN_RECEIVED -> 3742 * FIN_WAIT_1 3743 */ 3744 nstate = IPF_TCPS_FIN_WAIT_1; 3745 rval = 1; 3746 } 3747 break; 3748 3749 case IPF_TCPS_HALF_ESTAB: /* 3 */ 3750 if (tcpflags & TH_FIN) { 3751 nstate = IPF_TCPS_FIN_WAIT_1; 3752 rval = 1; 3753 } else if ((tcpflags & TH_ACKMASK) == TH_ACK) { 3754 /* 3755 * If we've picked up a connection in mid 3756 * flight, we could be looking at a follow on 3757 * packet from the same direction as the one 3758 * that created this state. Recognise it but 3759 * do not advance the entire connection's 3760 * state. 3761 */ 3762 switch (ostate) 3763 { 3764 case IPF_TCPS_LISTEN : 3765 case IPF_TCPS_SYN_SENT : 3766 case IPF_TCPS_SYN_RECEIVED : 3767 rval = 1; 3768 break; 3769 case IPF_TCPS_HALF_ESTAB : 3770 case IPF_TCPS_ESTABLISHED : 3771 nstate = IPF_TCPS_ESTABLISHED; 3772 rval = 1; 3773 break; 3774 default : 3775 break; 3776 } 3777 } 3778 break; 3779 3780 case IPF_TCPS_ESTABLISHED: /* 4 */ 3781 rval = 1; 3782 if (tcpflags & TH_FIN) { 3783 /* 3784 * 'dir' closed its side of the connection; 3785 * this gives us a half-closed connection; 3786 * ESTABLISHED -> FIN_WAIT_1 3787 */ 3788 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3789 nstate = IPF_TCPS_CLOSING; 3790 } else { 3791 nstate = IPF_TCPS_FIN_WAIT_1; 3792 } 3793 } else if (tcpflags & TH_ACK) { 3794 /* 3795 * an ACK, should we exclude other flags here? 3796 */ 3797 if (ostate == IPF_TCPS_FIN_WAIT_1) { 3798 /* 3799 * We know the other side did an active 3800 * close, so we are ACKing the recvd 3801 * FIN packet (does the window matching 3802 * code guarantee this?) and go into 3803 * CLOSE_WAIT state; this gives us a 3804 * half-closed connection 3805 */ 3806 nstate = IPF_TCPS_CLOSE_WAIT; 3807 } else if (ostate < IPF_TCPS_CLOSE_WAIT) { 3808 /* 3809 * still a fully established 3810 * connection reset timeout 3811 */ 3812 nstate = IPF_TCPS_ESTABLISHED; 3813 } 3814 } 3815 break; 3816 3817 case IPF_TCPS_CLOSE_WAIT: /* 5 */ 3818 rval = 1; 3819 if (tcpflags & TH_FIN) { 3820 /* 3821 * application closed and 'dir' sent a FIN, 3822 * we're now going into LAST_ACK state 3823 */ 3824 nstate = IPF_TCPS_LAST_ACK; 3825 } else { 3826 /* 3827 * we remain in CLOSE_WAIT because the other 3828 * side has closed already and we did not 3829 * close our side yet; reset timeout 3830 */ 3831 nstate = IPF_TCPS_CLOSE_WAIT; 3832 } 3833 break; 3834 3835 case IPF_TCPS_FIN_WAIT_1: /* 6 */ 3836 rval = 1; 3837 if ((tcpflags & TH_ACK) && 3838 ostate > IPF_TCPS_CLOSE_WAIT) { 3839 /* 3840 * if the other side is not active anymore 3841 * it has sent us a FIN packet that we are 3842 * ack'ing now with an ACK; this means both 3843 * sides have now closed the connection and 3844 * we go into LAST_ACK 3845 */ 3846 /* 3847 * XXX: how do we know we really are ACKing 3848 * the FIN packet here? does the window code 3849 * guarantee that? 3850 */ 3851 nstate = IPF_TCPS_LAST_ACK; 3852 } else { 3853 /* 3854 * we closed our side of the connection 3855 * already but the other side is still active 3856 * (ESTABLISHED/CLOSE_WAIT); continue with 3857 * this half-closed connection 3858 */ 3859 nstate = IPF_TCPS_FIN_WAIT_1; 3860 } 3861 break; 3862 3863 case IPF_TCPS_CLOSING: /* 7 */ 3864 if ((tcpflags & (TH_FIN|TH_ACK)) == TH_ACK) { 3865 nstate = IPF_TCPS_TIME_WAIT; 3866 } 3867 rval = 1; 3868 break; 3869 3870 case IPF_TCPS_LAST_ACK: /* 8 */ 3871 /* 3872 * We want to reset timer here to keep state in table. 3873 * If we would allow the state to time out here, while 3874 * there would still be packets being retransmitted, we 3875 * would cut off line between the two peers preventing 3876 * them to close connection properly. 3877 */ 3878 rval = 1; 3879 break; 3880 3881 case IPF_TCPS_FIN_WAIT_2: /* 9 */ 3882 /* NOT USED */ 3883 break; 3884 3885 case IPF_TCPS_TIME_WAIT: /* 10 */ 3886 /* we're in 2MSL timeout now */ 3887 if (ostate == IPF_TCPS_LAST_ACK) { 3888 nstate = IPF_TCPS_CLOSED; 3889 rval = 1; 3890 } else { 3891 rval = 2; 3892 } 3893 break; 3894 3895 case IPF_TCPS_CLOSED: /* 11 */ 3896 rval = 2; 3897 break; 3898 3899 default : 3900 #if defined(_KERNEL) 3901 ASSERT(nstate >= IPF_TCPS_LISTEN && 3902 nstate <= IPF_TCPS_CLOSED); 3903 #else 3904 abort(); 3905 #endif 3906 break; 3907 } 3908 } 3909 3910 /* 3911 * If rval == 2 then do not update the queue position, but treat the 3912 * packet as being ok. 3913 */ 3914 if (rval == 2) { 3915 DTRACE_PROBE1(state_keeping_timer, int, nstate); 3916 rval = 1; 3917 } 3918 else if (rval == 1) { 3919 tqe->tqe_state[dir] = nstate; 3920 /* 3921 * The nstate can either advance to a new state, or remain 3922 * unchanged, resetting the timer by moving to the bottom of 3923 * the queue. 3924 */ 3925 DTRACE_PROBE1(state_done, int, nstate); 3926 3927 if ((tqe->tqe_flags & TQE_RULEBASED) == 0) 3928 fr_movequeue(tqe, tqe->tqe_ifq, tqtab + nstate, ifs); 3929 } 3930 3931 return rval; 3932 } 3933 3934 3935 /* ------------------------------------------------------------------------ */ 3936 /* Function: ipstate_log */ 3937 /* Returns: Nil */ 3938 /* Parameters: is(I) - pointer to state structure */ 3939 /* type(I) - type of log entry to create */ 3940 /* */ 3941 /* Creates a state table log entry using the state structure and type info. */ 3942 /* passed in. Log packet/byte counts, source/destination address and other */ 3943 /* protocol specific information. */ 3944 /* ------------------------------------------------------------------------ */ 3945 void ipstate_log(is, type, ifs) 3946 struct ipstate *is; 3947 u_int type; 3948 ipf_stack_t *ifs; 3949 { 3950 #ifdef IPFILTER_LOG 3951 struct ipslog ipsl; 3952 size_t sizes[1]; 3953 void *items[1]; 3954 int types[1]; 3955 3956 /* 3957 * Copy information out of the ipstate_t structure and into the 3958 * structure used for logging. 3959 */ 3960 ipsl.isl_type = type; 3961 ipsl.isl_pkts[0] = is->is_pkts[0] + is->is_icmppkts[0]; 3962 ipsl.isl_bytes[0] = is->is_bytes[0]; 3963 ipsl.isl_pkts[1] = is->is_pkts[1] + is->is_icmppkts[1]; 3964 ipsl.isl_bytes[1] = is->is_bytes[1]; 3965 ipsl.isl_pkts[2] = is->is_pkts[2] + is->is_icmppkts[2]; 3966 ipsl.isl_bytes[2] = is->is_bytes[2]; 3967 ipsl.isl_pkts[3] = is->is_pkts[3] + is->is_icmppkts[3]; 3968 ipsl.isl_bytes[3] = is->is_bytes[3]; 3969 ipsl.isl_src = is->is_src; 3970 ipsl.isl_dst = is->is_dst; 3971 ipsl.isl_p = is->is_p; 3972 ipsl.isl_v = is->is_v; 3973 ipsl.isl_flags = is->is_flags; 3974 ipsl.isl_tag = is->is_tag; 3975 ipsl.isl_rulen = is->is_rulen; 3976 (void) strncpy(ipsl.isl_group, is->is_group, FR_GROUPLEN); 3977 3978 if (ipsl.isl_p == IPPROTO_TCP || ipsl.isl_p == IPPROTO_UDP) { 3979 ipsl.isl_sport = is->is_sport; 3980 ipsl.isl_dport = is->is_dport; 3981 if (ipsl.isl_p == IPPROTO_TCP) { 3982 ipsl.isl_state[0] = is->is_state[0]; 3983 ipsl.isl_state[1] = is->is_state[1]; 3984 } 3985 } else if (ipsl.isl_p == IPPROTO_ICMP) { 3986 ipsl.isl_itype = is->is_icmp.ici_type; 3987 } else if (ipsl.isl_p == IPPROTO_ICMPV6) { 3988 ipsl.isl_itype = is->is_icmp.ici_type; 3989 } else { 3990 ipsl.isl_ps.isl_filler[0] = 0; 3991 ipsl.isl_ps.isl_filler[1] = 0; 3992 } 3993 3994 items[0] = &ipsl; 3995 sizes[0] = sizeof(ipsl); 3996 types[0] = 0; 3997 3998 if (ipllog(IPL_LOGSTATE, NULL, items, sizes, types, 1, ifs)) { 3999 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logged); 4000 } else { 4001 ATOMIC_INCL(ifs->ifs_ips_stats.iss_logfail); 4002 } 4003 #endif 4004 } 4005 4006 4007 #ifdef USE_INET6 4008 /* ------------------------------------------------------------------------ */ 4009 /* Function: fr_checkicmp6matchingstate */ 4010 /* Returns: ipstate_t* - NULL == no match found, */ 4011 /* else pointer to matching state entry */ 4012 /* Parameters: fin(I) - pointer to packet information */ 4013 /* Locks: NULL == no locks, else Read Lock on ipf_state */ 4014 /* */ 4015 /* If we've got an ICMPv6 error message, using the information stored in */ 4016 /* the ICMPv6 packet, look for a matching state table entry. */ 4017 /* ------------------------------------------------------------------------ */ 4018 static ipstate_t *fr_checkicmp6matchingstate(fin) 4019 fr_info_t *fin; 4020 { 4021 struct icmp6_hdr *ic6, *oic; 4022 int backward, i; 4023 ipstate_t *is, **isp; 4024 u_short sport, dport; 4025 i6addr_t dst, src; 4026 u_short savelen; 4027 icmpinfo_t *ic; 4028 fr_info_t ofin; 4029 tcphdr_t *tcp; 4030 ip6_t *oip6; 4031 u_char pr; 4032 u_int hv; 4033 ipf_stack_t *ifs = fin->fin_ifs; 4034 4035 /* 4036 * Does it at least have the return (basic) IP header ? 4037 * Is it an actual recognised ICMP error type? 4038 * Only a basic IP header (no options) should be with 4039 * an ICMP error header. 4040 */ 4041 if ((fin->fin_v != 6) || (fin->fin_plen < ICMP6ERR_MINPKTLEN) || 4042 !(fin->fin_flx & FI_ICMPERR)) 4043 return NULL; 4044 4045 ic6 = fin->fin_dp; 4046 4047 oip6 = (ip6_t *)((char *)ic6 + ICMPERR_ICMPHLEN); 4048 if (fin->fin_plen < sizeof(*oip6)) 4049 return NULL; 4050 4051 bcopy((char *)fin, (char *)&ofin, sizeof(*fin)); 4052 ofin.fin_v = 6; 4053 ofin.fin_ifp = fin->fin_ifp; 4054 ofin.fin_out = !fin->fin_out; 4055 ofin.fin_m = NULL; /* if dereferenced, panic XXX */ 4056 ofin.fin_mp = NULL; /* if dereferenced, panic XXX */ 4057 4058 /* 4059 * We make a fin entry to be able to feed it to 4060 * matchsrcdst. Note that not all fields are necessary 4061 * but this is the cleanest way. Note further we fill 4062 * in fin_mp such that if someone uses it we'll get 4063 * a kernel panic. fr_matchsrcdst does not use this. 4064 * 4065 * watch out here, as ip is in host order and oip6 in network 4066 * order. Any change we make must be undone afterwards. 4067 */ 4068 savelen = oip6->ip6_plen; 4069 oip6->ip6_plen = fin->fin_dlen - ICMPERR_ICMPHLEN; 4070 ofin.fin_flx = FI_NOCKSUM; 4071 ofin.fin_ip = (ip_t *)oip6; 4072 ofin.fin_plen = oip6->ip6_plen; 4073 (void) fr_makefrip(sizeof(*oip6), (ip_t *)oip6, &ofin); 4074 ofin.fin_flx &= ~(FI_BAD|FI_SHORT); 4075 oip6->ip6_plen = savelen; 4076 4077 if (oip6->ip6_nxt == IPPROTO_ICMPV6) { 4078 oic = (struct icmp6_hdr *)(oip6 + 1); 4079 /* 4080 * an ICMP error can only be generated as a result of an 4081 * ICMP query, not as the response on an ICMP error 4082 * 4083 * XXX theoretically ICMP_ECHOREP and the other reply's are 4084 * ICMP query's as well, but adding them here seems strange XXX 4085 */ 4086 if (!(oic->icmp6_type & ICMP6_INFOMSG_MASK)) 4087 return NULL; 4088 4089 /* 4090 * perform a lookup of the ICMP packet in the state table 4091 */ 4092 hv = (pr = oip6->ip6_nxt); 4093 src.in6 = oip6->ip6_src; 4094 hv += src.in4.s_addr; 4095 dst.in6 = oip6->ip6_dst; 4096 hv += dst.in4.s_addr; 4097 hv += oic->icmp6_id; 4098 hv += oic->icmp6_seq; 4099 hv = DOUBLE_HASH(hv, ifs); 4100 4101 READ_ENTER(&ifs->ifs_ipf_state); 4102 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4103 ic = &is->is_icmp; 4104 isp = &is->is_hnext; 4105 if ((is->is_p == pr) && 4106 !(is->is_pass & FR_NOICMPERR) && 4107 (oic->icmp6_id == ic->ici_id) && 4108 (oic->icmp6_seq == ic->ici_seq) && 4109 (is = fr_matchsrcdst(&ofin, is, &src, 4110 &dst, NULL, FI_ICMPCMP))) { 4111 /* 4112 * in the state table ICMP query's are stored 4113 * with the type of the corresponding ICMP 4114 * response. Correct here 4115 */ 4116 if (((ic->ici_type == ICMP6_ECHO_REPLY) && 4117 (oic->icmp6_type == ICMP6_ECHO_REQUEST)) || 4118 (ic->ici_type - 1 == oic->icmp6_type )) { 4119 ifs->ifs_ips_stats.iss_hits++; 4120 backward = IP6_NEQ(&is->is_dst, &src); 4121 fin->fin_rev = !backward; 4122 i = (backward << 1) + fin->fin_out; 4123 is->is_icmppkts[i]++; 4124 return is; 4125 } 4126 } 4127 } 4128 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4129 return NULL; 4130 } 4131 4132 hv = (pr = oip6->ip6_nxt); 4133 src.in6 = oip6->ip6_src; 4134 hv += src.i6[0]; 4135 hv += src.i6[1]; 4136 hv += src.i6[2]; 4137 hv += src.i6[3]; 4138 dst.in6 = oip6->ip6_dst; 4139 hv += dst.i6[0]; 4140 hv += dst.i6[1]; 4141 hv += dst.i6[2]; 4142 hv += dst.i6[3]; 4143 4144 if ((oip6->ip6_nxt == IPPROTO_TCP) || (oip6->ip6_nxt == IPPROTO_UDP)) { 4145 tcp = (tcphdr_t *)(oip6 + 1); 4146 dport = tcp->th_dport; 4147 sport = tcp->th_sport; 4148 hv += dport; 4149 hv += sport; 4150 } else 4151 tcp = NULL; 4152 hv = DOUBLE_HASH(hv, ifs); 4153 4154 READ_ENTER(&ifs->ifs_ipf_state); 4155 for (isp = &ifs->ifs_ips_table[hv]; ((is = *isp) != NULL); ) { 4156 isp = &is->is_hnext; 4157 /* 4158 * Only allow this icmp though if the 4159 * encapsulated packet was allowed through the 4160 * other way around. Note that the minimal amount 4161 * of info present does not allow for checking against 4162 * tcp internals such as seq and ack numbers. 4163 */ 4164 if ((is->is_p != pr) || (is->is_v != 6) || 4165 (is->is_pass & FR_NOICMPERR)) 4166 continue; 4167 is = fr_matchsrcdst(&ofin, is, &src, &dst, tcp, FI_ICMPCMP); 4168 if (is != NULL) { 4169 ifs->ifs_ips_stats.iss_hits++; 4170 backward = IP6_NEQ(&is->is_dst, &src); 4171 fin->fin_rev = !backward; 4172 i = (backward << 1) + fin->fin_out; 4173 is->is_icmppkts[i]++; 4174 /* 4175 * we deliberately do not touch the timeouts 4176 * for the accompanying state table entry. 4177 * It remains to be seen if that is correct. XXX 4178 */ 4179 return is; 4180 } 4181 } 4182 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4183 return NULL; 4184 } 4185 #endif 4186 4187 4188 /* ------------------------------------------------------------------------ */ 4189 /* Function: fr_sttab_init */ 4190 /* Returns: Nil */ 4191 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4192 /* */ 4193 /* Initialise the array of timeout queues for TCP. */ 4194 /* ------------------------------------------------------------------------ */ 4195 void fr_sttab_init(tqp, ifs) 4196 ipftq_t *tqp; 4197 ipf_stack_t *ifs; 4198 { 4199 int i; 4200 4201 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) { 4202 tqp[i].ifq_ttl = 0; 4203 tqp[i].ifq_ref = 1; 4204 tqp[i].ifq_head = NULL; 4205 tqp[i].ifq_tail = &tqp[i].ifq_head; 4206 tqp[i].ifq_next = tqp + i + 1; 4207 MUTEX_INIT(&tqp[i].ifq_lock, "ipftq tcp tab"); 4208 } 4209 tqp[IPF_TCP_NSTATES - 1].ifq_next = NULL; 4210 tqp[IPF_TCPS_CLOSED].ifq_ttl = ifs->ifs_fr_tcpclosed; 4211 tqp[IPF_TCPS_LISTEN].ifq_ttl = ifs->ifs_fr_tcptimeout; 4212 tqp[IPF_TCPS_SYN_SENT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4213 tqp[IPF_TCPS_SYN_RECEIVED].ifq_ttl = ifs->ifs_fr_tcptimeout; 4214 tqp[IPF_TCPS_ESTABLISHED].ifq_ttl = ifs->ifs_fr_tcpidletimeout; 4215 tqp[IPF_TCPS_CLOSE_WAIT].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4216 tqp[IPF_TCPS_FIN_WAIT_1].ifq_ttl = ifs->ifs_fr_tcphalfclosed; 4217 tqp[IPF_TCPS_CLOSING].ifq_ttl = ifs->ifs_fr_tcptimeout; 4218 tqp[IPF_TCPS_LAST_ACK].ifq_ttl = ifs->ifs_fr_tcplastack; 4219 tqp[IPF_TCPS_FIN_WAIT_2].ifq_ttl = ifs->ifs_fr_tcpclosewait; 4220 tqp[IPF_TCPS_TIME_WAIT].ifq_ttl = ifs->ifs_fr_tcptimeout; 4221 tqp[IPF_TCPS_HALF_ESTAB].ifq_ttl = ifs->ifs_fr_tcptimeout; 4222 } 4223 4224 4225 /* ------------------------------------------------------------------------ */ 4226 /* Function: fr_sttab_destroy */ 4227 /* Returns: Nil */ 4228 /* Parameters: tqp(I) - pointer to an array of timeout queues for TCP */ 4229 /* */ 4230 /* Do whatever is necessary to "destroy" each of the entries in the array */ 4231 /* of timeout queues for TCP. */ 4232 /* ------------------------------------------------------------------------ */ 4233 void fr_sttab_destroy(tqp) 4234 ipftq_t *tqp; 4235 { 4236 int i; 4237 4238 for (i = IPF_TCP_NSTATES - 1; i >= 0; i--) 4239 MUTEX_DESTROY(&tqp[i].ifq_lock); 4240 } 4241 4242 4243 /* ------------------------------------------------------------------------ */ 4244 /* Function: fr_statederef */ 4245 /* Returns: Nil */ 4246 /* Parameters: isp(I) - pointer to pointer to state table entry */ 4247 /* ifs - ipf stack instance */ 4248 /* */ 4249 /* Decrement the reference counter for this state table entry and free it */ 4250 /* if there are no more things using it. */ 4251 /* */ 4252 /* Internal parameters: */ 4253 /* state[0] = state of source (host that initiated connection) */ 4254 /* state[1] = state of dest (host that accepted the connection) */ 4255 /* ------------------------------------------------------------------------ */ 4256 void fr_statederef(isp, ifs) 4257 ipstate_t **isp; 4258 ipf_stack_t *ifs; 4259 { 4260 ipstate_t *is; 4261 4262 is = *isp; 4263 *isp = NULL; 4264 4265 MUTEX_ENTER(&is->is_lock); 4266 if (is->is_ref > 1) { 4267 is->is_ref--; 4268 MUTEX_EXIT(&is->is_lock); 4269 #ifndef _KERNEL 4270 if ((is->is_sti.tqe_state[0] > IPF_TCPS_ESTABLISHED) || 4271 (is->is_sti.tqe_state[1] > IPF_TCPS_ESTABLISHED)) { 4272 (void) fr_delstate(is, ISL_ORPHAN, ifs); 4273 } 4274 #endif 4275 return; 4276 } 4277 MUTEX_EXIT(&is->is_lock); 4278 4279 WRITE_ENTER(&ifs->ifs_ipf_state); 4280 (void) fr_delstate(is, ISL_EXPIRE, ifs); 4281 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4282 } 4283 4284 4285 /* ------------------------------------------------------------------------ */ 4286 /* Function: fr_setstatequeue */ 4287 /* Returns: Nil */ 4288 /* Parameters: is(I) - pointer to state structure */ 4289 /* rev(I) - forward(0) or reverse(1) direction */ 4290 /* Locks: ipf_state (read or write) */ 4291 /* */ 4292 /* Put the state entry on its default queue entry, using rev as a helped in */ 4293 /* determining which queue it should be placed on. */ 4294 /* ------------------------------------------------------------------------ */ 4295 void fr_setstatequeue(is, rev, ifs) 4296 ipstate_t *is; 4297 int rev; 4298 ipf_stack_t *ifs; 4299 { 4300 ipftq_t *oifq, *nifq; 4301 4302 4303 if ((is->is_sti.tqe_flags & TQE_RULEBASED) != 0) 4304 nifq = is->is_tqehead[rev]; 4305 else 4306 nifq = NULL; 4307 4308 if (nifq == NULL) { 4309 switch (is->is_p) 4310 { 4311 #ifdef USE_INET6 4312 case IPPROTO_ICMPV6 : 4313 if (rev == 1) 4314 nifq = &ifs->ifs_ips_icmpacktq; 4315 else 4316 nifq = &ifs->ifs_ips_icmptq; 4317 break; 4318 #endif 4319 case IPPROTO_ICMP : 4320 if (rev == 1) 4321 nifq = &ifs->ifs_ips_icmpacktq; 4322 else 4323 nifq = &ifs->ifs_ips_icmptq; 4324 break; 4325 case IPPROTO_TCP : 4326 nifq = ifs->ifs_ips_tqtqb + is->is_state[rev]; 4327 break; 4328 4329 case IPPROTO_UDP : 4330 if (rev == 1) 4331 nifq = &ifs->ifs_ips_udpacktq; 4332 else 4333 nifq = &ifs->ifs_ips_udptq; 4334 break; 4335 4336 default : 4337 nifq = &ifs->ifs_ips_iptq; 4338 break; 4339 } 4340 } 4341 4342 oifq = is->is_sti.tqe_ifq; 4343 /* 4344 * If it's currently on a timeout queue, move it from one queue to 4345 * another, else put it on the end of the newly determined queue. 4346 */ 4347 if (oifq != NULL) 4348 fr_movequeue(&is->is_sti, oifq, nifq, ifs); 4349 else 4350 fr_queueappend(&is->is_sti, nifq, is, ifs); 4351 return; 4352 } 4353 4354 4355 /* ------------------------------------------------------------------------ */ 4356 /* Function: fr_stateiter */ 4357 /* Returns: int - 0 == success, else error */ 4358 /* Parameters: token(I) - pointer to ipftoken structure */ 4359 /* itp(I) - pointer to ipfgeniter structure */ 4360 /* */ 4361 /* This function handles the SIOCGENITER ioctl for the state tables and */ 4362 /* walks through the list of entries in the state table list (ips_list.) */ 4363 /* ------------------------------------------------------------------------ */ 4364 static int fr_stateiter(token, itp, ifs) 4365 ipftoken_t *token; 4366 ipfgeniter_t *itp; 4367 ipf_stack_t *ifs; 4368 { 4369 ipstate_t *is, *next, zero; 4370 int error, count; 4371 char *dst; 4372 4373 if (itp->igi_data == NULL) 4374 return EFAULT; 4375 4376 if (itp->igi_nitems == 0) 4377 return EINVAL; 4378 4379 if (itp->igi_type != IPFGENITER_STATE) 4380 return EINVAL; 4381 4382 error = 0; 4383 4384 READ_ENTER(&ifs->ifs_ipf_state); 4385 4386 /* 4387 * Get "previous" entry from the token and find the next entry. 4388 */ 4389 is = token->ipt_data; 4390 if (is == NULL) { 4391 next = ifs->ifs_ips_list; 4392 } else { 4393 next = is->is_next; 4394 } 4395 4396 dst = itp->igi_data; 4397 for (count = itp->igi_nitems; count > 0; count--) { 4398 /* 4399 * If we found an entry, add a reference to it and update the token. 4400 * Otherwise, zero out data to be returned and NULL out token. 4401 */ 4402 if (next != NULL) { 4403 MUTEX_ENTER(&next->is_lock); 4404 next->is_ref++; 4405 MUTEX_EXIT(&next->is_lock); 4406 token->ipt_data = next; 4407 } else { 4408 bzero(&zero, sizeof(zero)); 4409 next = &zero; 4410 token->ipt_data = NULL; 4411 } 4412 4413 /* 4414 * Safe to release lock now the we have a reference. 4415 */ 4416 RWLOCK_EXIT(&ifs->ifs_ipf_state); 4417 4418 /* 4419 * Copy out data and clean up references and tokens. 4420 */ 4421 error = COPYOUT(next, dst, sizeof(*next)); 4422 if (error != 0) 4423 error = EFAULT; 4424 if (token->ipt_data == NULL) { 4425 ipf_freetoken(token, ifs); 4426 break; 4427 } else { 4428 if (is != NULL) 4429 fr_statederef(&is, ifs); 4430 if (next->is_next == NULL) { 4431 ipf_freetoken(token, ifs); 4432 break; 4433 } 4434 } 4435 4436 if ((count == 1) || (error != 0)) 4437 break; 4438 4439 READ_ENTER(&ifs->ifs_ipf_state); 4440 dst += sizeof(*next); 4441 is = next; 4442 next = is->is_next; 4443 } 4444 4445 return error; 4446 } 4447