1 /* $OpenBSD: pf_norm.c,v 1.153 2012/02/03 01:57:51 bluhm Exp $ */ 2 3 /* 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2009 Henning Brauer <henning@openbsd.org> 6 * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include "pflog.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/mbuf.h> 35 #include <sys/filio.h> 36 #include <sys/fcntl.h> 37 #include <sys/socket.h> 38 #include <sys/kernel.h> 39 #include <sys/time.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 43 #include <dev/rndvar.h> 44 #include <net/if.h> 45 #include <net/if_types.h> 46 #include <net/bpf.h> 47 #include <net/route.h> 48 #include <net/if_pflog.h> 49 50 #include <netinet/in.h> 51 #include <netinet/in_var.h> 52 #include <netinet/in_systm.h> 53 #include <netinet/ip.h> 54 #include <netinet/ip_var.h> 55 #include <netinet/tcp.h> 56 #include <netinet/tcp_seq.h> 57 #include <netinet/udp.h> 58 #include <netinet/ip_icmp.h> 59 60 #ifdef INET6 61 #include <netinet/ip6.h> 62 #include <netinet6/ip6_var.h> 63 #endif /* INET6 */ 64 65 #include <net/pfvar.h> 66 67 struct pf_frent { 68 TAILQ_ENTRY(pf_frent) fr_next; 69 struct mbuf *fe_m; 70 u_int16_t fe_hdrlen; /* ipv4 header lenght with ip options 71 ipv6, extension, fragment header */ 72 u_int16_t fe_extoff; /* last extension header offset or 0 */ 73 u_int16_t fe_len; /* fragment length */ 74 u_int16_t fe_off; /* fragment offset */ 75 u_int16_t fe_mff; /* more fragment flag */ 76 }; 77 78 /* keep synced with struct pf_fragment, used in RB_FIND */ 79 struct pf_fragment_cmp { 80 struct pf_addr fr_src; 81 struct pf_addr fr_dst; 82 u_int32_t fr_id; 83 sa_family_t fr_af; 84 u_int8_t fr_proto; 85 u_int8_t fr_direction; 86 }; 87 88 struct pf_fragment { 89 struct pf_addr fr_src; /* ip source address */ 90 struct pf_addr fr_dst; /* ip destination address */ 91 u_int32_t fr_id; /* fragment id for reassemble */ 92 sa_family_t fr_af; /* address family */ 93 u_int8_t fr_proto; /* protocol of this fragment */ 94 u_int8_t fr_direction; /* pf packet direction */ 95 96 RB_ENTRY(pf_fragment) fr_entry; 97 TAILQ_ENTRY(pf_fragment) frag_next; 98 TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; 99 u_int32_t fr_timeout; 100 u_int16_t fr_maxlen; /* maximum length of single fragment */ 101 }; 102 103 struct pf_fragment_tag { 104 u_int16_t ft_hdrlen; /* header lenght of reassembled pkt */ 105 u_int16_t ft_extoff; /* last extension header offset or 0 */ 106 u_int16_t ft_maxlen; /* maximum fragment payload length */ 107 }; 108 109 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 110 111 static __inline int pf_frag_compare(struct pf_fragment *, 112 struct pf_fragment *); 113 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 114 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 115 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 116 117 /* Private prototypes */ 118 void pf_flush_fragments(void); 119 void pf_free_fragment(struct pf_fragment *); 120 struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *, 121 struct pf_frag_tree *); 122 struct pf_frent *pf_create_fragment(u_short *); 123 struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, 124 struct pf_frent *, u_short *); 125 int pf_isfull_fragment(struct pf_fragment *); 126 struct mbuf *pf_join_fragment(struct pf_fragment *); 127 int pf_reassemble(struct mbuf **, int, u_short *); 128 #ifdef INET6 129 int pf_reassemble6(struct mbuf **, struct ip6_frag *, 130 u_int16_t, u_int16_t, int, u_short *); 131 #endif /* INET6 */ 132 133 /* Globals */ 134 struct pool pf_frent_pl, pf_frag_pl; 135 struct pool pf_state_scrub_pl; 136 int pf_nfrents; 137 138 void 139 pf_normalize_init(void) 140 { 141 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 142 NULL); 143 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 144 NULL); 145 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 146 "pfstscr", NULL); 147 148 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 149 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 150 151 TAILQ_INIT(&pf_fragqueue); 152 } 153 154 static __inline int 155 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 156 { 157 int diff; 158 159 if ((diff = a->fr_id - b->fr_id) != 0) 160 return (diff); 161 if ((diff = a->fr_proto - b->fr_proto) != 0) 162 return (diff); 163 if ((diff = a->fr_af - b->fr_af) != 0) 164 return (diff); 165 if ((diff = pf_addr_compare(&a->fr_src, &b->fr_src, a->fr_af)) != 0) 166 return (diff); 167 if ((diff = pf_addr_compare(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) 168 return (diff); 169 return (0); 170 } 171 172 void 173 pf_purge_expired_fragments(void) 174 { 175 struct pf_fragment *frag; 176 u_int32_t expire = time_second - 177 pf_default_rule.timeout[PFTM_FRAG]; 178 179 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 180 if (frag->fr_timeout > expire) 181 break; 182 183 DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); 184 pf_free_fragment(frag); 185 } 186 } 187 188 /* 189 * Try to flush old fragments to make space for new ones 190 */ 191 192 void 193 pf_flush_fragments(void) 194 { 195 struct pf_fragment *frag; 196 int goal; 197 198 goal = pf_nfrents * 9 / 10; 199 DPFPRINTF(LOG_NOTICE, "trying to free > %d frents", 200 pf_nfrents - goal); 201 while (goal < pf_nfrents) { 202 frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue); 203 if (frag == NULL) 204 break; 205 pf_free_fragment(frag); 206 } 207 } 208 209 /* 210 * Remove a fragment from the fragment queue, free its fragment entries, 211 * and free the fragment itself. 212 */ 213 void 214 pf_free_fragment(struct pf_fragment *frag) 215 { 216 struct pf_frent *frent; 217 218 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 219 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 220 221 /* Free all fragment entries */ 222 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 223 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 224 225 m_freem(frent->fe_m); 226 pool_put(&pf_frent_pl, frent); 227 pf_nfrents--; 228 } 229 230 pool_put(&pf_frag_pl, frag); 231 } 232 233 struct pf_fragment * 234 pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) 235 { 236 struct pf_fragment *frag; 237 238 frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); 239 if (frag != NULL) { 240 /* XXX Are we sure we want to update the timeout? */ 241 frag->fr_timeout = time_second; 242 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 243 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 244 } 245 246 return (frag); 247 } 248 249 struct pf_frent * 250 pf_create_fragment(u_short *reason) 251 { 252 struct pf_frent *frent; 253 254 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 255 if (frent == NULL) { 256 pf_flush_fragments(); 257 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 258 if (frent == NULL) { 259 REASON_SET(reason, PFRES_MEMORY); 260 return (NULL); 261 } 262 } 263 pf_nfrents++; 264 265 return (frent); 266 } 267 268 struct pf_fragment * 269 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, 270 u_short *reason) 271 { 272 struct pf_frent *after, *next, *prev; 273 struct pf_fragment *frag; 274 u_int16_t total; 275 276 /* No empty fragments */ 277 if (frent->fe_len == 0) { 278 DPFPRINTF(LOG_NOTICE, "bad fragment: len 0"); 279 goto bad_fragment; 280 } 281 282 /* All fragments are 8 byte aligned */ 283 if (frent->fe_mff && (frent->fe_len & 0x7)) { 284 DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d", 285 frent->fe_len); 286 goto bad_fragment; 287 } 288 289 /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */ 290 if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { 291 DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d", 292 frent->fe_off + frent->fe_len); 293 goto bad_fragment; 294 } 295 296 DPFPRINTF(LOG_NOTICE, key->fr_af == AF_INET ? 297 "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", 298 key->fr_id, frent->fe_off, frent->fe_off + frent->fe_len); 299 300 /* Fully buffer all of the fragments in this fragment queue */ 301 frag = pf_find_fragment(key, &pf_frag_tree); 302 303 /* Create a new reassembly queue for this packet */ 304 if (frag == NULL) { 305 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 306 if (frag == NULL) { 307 pf_flush_fragments(); 308 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 309 if (frag == NULL) { 310 REASON_SET(reason, PFRES_MEMORY); 311 goto drop_fragment; 312 } 313 } 314 315 *(struct pf_fragment_cmp *)frag = *key; 316 TAILQ_INIT(&frag->fr_queue); 317 frag->fr_timeout = time_second; 318 frag->fr_maxlen = frent->fe_len; 319 320 RB_INSERT(pf_frag_tree, &pf_frag_tree, frag); 321 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 322 323 /* We do not have a previous fragment */ 324 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 325 326 return (frag); 327 } 328 329 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 330 331 /* Remember maximum fragment len for refragmentation */ 332 if (frent->fe_len > frag->fr_maxlen) 333 frag->fr_maxlen = frent->fe_len; 334 335 /* Maximum data we have seen already */ 336 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 337 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 338 339 /* Non terminal fragments must have more fragments flag */ 340 if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) 341 goto bad_fragment; 342 343 /* Check if we saw the last fragment already */ 344 if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { 345 if (frent->fe_off + frent->fe_len > total || 346 (frent->fe_off + frent->fe_len == total && frent->fe_mff)) 347 goto bad_fragment; 348 } else { 349 if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) 350 goto bad_fragment; 351 } 352 353 /* Find a fragment after the current one */ 354 prev = NULL; 355 TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { 356 if (after->fe_off > frent->fe_off) 357 break; 358 prev = after; 359 } 360 361 KASSERT(prev != NULL || after != NULL); 362 363 if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { 364 u_int16_t precut; 365 366 #ifdef INET6 367 if (frag->fr_af == AF_INET6) 368 goto free_fragment; 369 #endif /* INET6 */ 370 371 precut = prev->fe_off + prev->fe_len - frent->fe_off; 372 if (precut >= frent->fe_len) { 373 DPFPRINTF(LOG_NOTICE, "new frag overlapped"); 374 goto drop_fragment; 375 } 376 DPFPRINTF(LOG_NOTICE, "frag head overlap %d", precut); 377 m_adj(frent->fe_m, precut); 378 frent->fe_off += precut; 379 frent->fe_len -= precut; 380 } 381 382 for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; 383 after = next) 384 { 385 u_int16_t aftercut; 386 387 #ifdef INET6 388 if (frag->fr_af == AF_INET6) 389 goto free_fragment; 390 #endif /* INET6 */ 391 392 aftercut = frent->fe_off + frent->fe_len - after->fe_off; 393 if (aftercut < after->fe_len) { 394 DPFPRINTF(LOG_NOTICE, "frag tail overlap %d", aftercut); 395 m_adj(after->fe_m, aftercut); 396 after->fe_off += aftercut; 397 after->fe_len -= aftercut; 398 break; 399 } 400 401 /* This fragment is completely overlapped, lose it */ 402 DPFPRINTF(LOG_NOTICE, "old frag overlapped"); 403 next = TAILQ_NEXT(after, fr_next); 404 TAILQ_REMOVE(&frag->fr_queue, after, fr_next); 405 406 m_freem(after->fe_m); 407 pool_put(&pf_frent_pl, after); 408 pf_nfrents--; 409 } 410 411 if (prev == NULL) 412 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 413 else 414 TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); 415 416 return (frag); 417 418 #ifdef INET6 419 free_fragment: 420 /* 421 * RFC 5722, Errata 3089: When reassembling an IPv6 datagram, if one 422 * or more its constituent fragments is determined to be an overlapping 423 * fragment, the entire datagram (and any constituent fragments) MUST 424 * be silently discarded. 425 */ 426 DPFPRINTF(LOG_NOTICE, "flush overlapping fragments"); 427 pf_free_fragment(frag); 428 #endif /* INET6 */ 429 bad_fragment: 430 REASON_SET(reason, PFRES_FRAG); 431 drop_fragment: 432 pool_put(&pf_frent_pl, frent); 433 pf_nfrents--; 434 return (NULL); 435 } 436 437 int 438 pf_isfull_fragment(struct pf_fragment *frag) 439 { 440 struct pf_frent *frent, *next; 441 u_int16_t off, total; 442 443 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 444 445 /* Check if we are completely reassembled */ 446 if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) 447 return (0); 448 449 /* Maximum data we have seen already */ 450 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 451 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 452 453 /* Check if we have all the data */ 454 off = 0; 455 for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { 456 next = TAILQ_NEXT(frent, fr_next); 457 458 off += frent->fe_len; 459 if (off < total && (next == NULL || next->fe_off != off)) { 460 DPFPRINTF(LOG_NOTICE, 461 "missing fragment at %d, next %d, total %d", 462 off, next == NULL ? -1 : next->fe_off, total); 463 return (0); 464 } 465 } 466 DPFPRINTF(LOG_NOTICE, "%d < %d?", off, total); 467 if (off < total) 468 return (0); 469 KASSERT(off == total); 470 471 return (1); 472 } 473 474 struct mbuf * 475 pf_join_fragment(struct pf_fragment *frag) 476 { 477 struct mbuf *m, *m2; 478 struct pf_frent *frent; 479 480 frent = TAILQ_FIRST(&frag->fr_queue); 481 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 482 483 /* Magic from ip_input */ 484 m = frent->fe_m; 485 m2 = m->m_next; 486 m->m_next = NULL; 487 m_cat(m, m2); 488 pool_put(&pf_frent_pl, frent); 489 pf_nfrents--; 490 491 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 492 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 493 494 m2 = frent->fe_m; 495 /* Strip off ip header */ 496 m_adj(m2, frent->fe_hdrlen); 497 pool_put(&pf_frent_pl, frent); 498 pf_nfrents--; 499 m_cat(m, m2); 500 } 501 502 /* Remove from fragment queue */ 503 pf_free_fragment(frag); 504 505 return (m); 506 } 507 508 int 509 pf_reassemble(struct mbuf **m0, int dir, u_short *reason) 510 { 511 struct mbuf *m = *m0; 512 struct ip *ip = mtod(m, struct ip *); 513 struct pf_frent *frent; 514 struct pf_fragment *frag; 515 struct pf_fragment_cmp key; 516 u_int16_t total, hdrlen; 517 518 /* Get an entry for the fragment queue */ 519 if ((frent = pf_create_fragment(reason)) == NULL) 520 return (PF_DROP); 521 522 frent->fe_m = m; 523 frent->fe_hdrlen = ip->ip_hl << 2; 524 frent->fe_extoff = 0; 525 frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 526 frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 527 frent->fe_mff = ntohs(ip->ip_off) & IP_MF; 528 529 key.fr_src.v4 = ip->ip_src; 530 key.fr_dst.v4 = ip->ip_dst; 531 key.fr_af = AF_INET; 532 key.fr_proto = ip->ip_p; 533 key.fr_id = ip->ip_id; 534 key.fr_direction = dir; 535 536 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 537 return (PF_DROP); 538 539 /* The mbuf is part of the fragment entry, no direct free or access */ 540 m = *m0 = NULL; 541 542 if (!pf_isfull_fragment(frag)) 543 return (PF_PASS); /* drop because *m0 is NULL, no error */ 544 545 /* We have all the data */ 546 frent = TAILQ_FIRST(&frag->fr_queue); 547 KASSERT(frent != NULL); 548 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 549 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 550 hdrlen = frent->fe_hdrlen; 551 552 m = *m0 = pf_join_fragment(frag); 553 frag = NULL; 554 555 if (m->m_flags & M_PKTHDR) { 556 int plen = 0; 557 for (m = *m0; m; m = m->m_next) 558 plen += m->m_len; 559 m = *m0; 560 m->m_pkthdr.len = plen; 561 } 562 563 ip = mtod(m, struct ip *); 564 ip->ip_len = htons(hdrlen + total); 565 ip->ip_off &= ~(IP_MF|IP_OFFMASK); 566 567 if (hdrlen + total > IP_MAXPACKET) { 568 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 569 ip->ip_len = 0; 570 REASON_SET(reason, PFRES_SHORT); 571 /* PF_DROP requires a valid mbuf *m0 in pf_test() */ 572 return (PF_DROP); 573 } 574 575 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip->ip_len)); 576 return (PF_PASS); 577 } 578 579 #ifdef INET6 580 int 581 pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, 582 u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason) 583 { 584 struct mbuf *m = *m0; 585 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 586 struct m_tag *mtag; 587 struct pf_fragment_tag *ftag; 588 struct pf_frent *frent; 589 struct pf_fragment *frag; 590 struct pf_fragment_cmp key; 591 int off; 592 u_int16_t total, maxlen; 593 u_int8_t proto; 594 595 /* Get an entry for the fragment queue */ 596 if ((frent = pf_create_fragment(reason)) == NULL) 597 return (PF_DROP); 598 599 frent->fe_m = m; 600 frent->fe_hdrlen = hdrlen; 601 frent->fe_extoff = extoff; 602 frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; 603 frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 604 frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; 605 606 key.fr_src.v6 = ip6->ip6_src; 607 key.fr_dst.v6 = ip6->ip6_dst; 608 key.fr_af = AF_INET6; 609 /* Only the first fragment's protocol is relevant */ 610 key.fr_proto = 0; 611 key.fr_id = fraghdr->ip6f_ident; 612 key.fr_direction = dir; 613 614 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 615 return (PF_DROP); 616 617 /* The mbuf is part of the fragment entry, no direct free or access */ 618 m = *m0 = NULL; 619 620 if (!pf_isfull_fragment(frag)) 621 return (PF_PASS); /* drop because *m0 is NULL, no error */ 622 623 /* We have all the data */ 624 extoff = frent->fe_extoff; 625 maxlen = frag->fr_maxlen; 626 frent = TAILQ_FIRST(&frag->fr_queue); 627 KASSERT(frent != NULL); 628 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 629 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 630 hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); 631 632 m = *m0 = pf_join_fragment(frag); 633 frag = NULL; 634 635 /* Take protocol from first fragment header */ 636 if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), 637 &off)) == NULL) 638 panic("pf_reassemble6: short mbuf chain"); 639 proto = *(mtod(m, caddr_t) + off); 640 m = *m0; 641 642 /* Delete frag6 header */ 643 if (frag6_deletefraghdr(m, hdrlen) != 0) 644 goto fail; 645 646 if (m->m_flags & M_PKTHDR) { 647 int plen = 0; 648 for (m = *m0; m; m = m->m_next) 649 plen += m->m_len; 650 m = *m0; 651 m->m_pkthdr.len = plen; 652 } 653 654 if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct 655 pf_fragment_tag), M_NOWAIT)) == NULL) 656 goto fail; 657 ftag = (struct pf_fragment_tag *)(mtag + 1); 658 ftag->ft_hdrlen = hdrlen; 659 ftag->ft_extoff = extoff; 660 ftag->ft_maxlen = maxlen; 661 m_tag_prepend(m, mtag); 662 663 ip6 = mtod(m, struct ip6_hdr *); 664 ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); 665 if (extoff) { 666 /* Write protocol into next field of last extension header */ 667 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 668 ip6e_nxt), &off)) == NULL) 669 panic("pf_reassemble6: short mbuf chain"); 670 *(mtod(m, caddr_t) + off) = proto; 671 m = *m0; 672 } else 673 ip6->ip6_nxt = proto; 674 675 if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { 676 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 677 ip6->ip6_plen = 0; 678 REASON_SET(reason, PFRES_SHORT); 679 /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ 680 return (PF_DROP); 681 } 682 683 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); 684 return (PF_PASS); 685 686 fail: 687 REASON_SET(reason, PFRES_MEMORY); 688 /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ 689 return (PF_DROP); 690 } 691 692 int 693 pf_refragment6(struct mbuf **m0, struct m_tag *mtag, int dir) 694 { 695 struct mbuf *m = *m0, *t; 696 struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); 697 u_int32_t mtu; 698 u_int16_t hdrlen, extoff, maxlen; 699 u_int8_t proto; 700 int error, action; 701 702 hdrlen = ftag->ft_hdrlen; 703 extoff = ftag->ft_extoff; 704 maxlen = ftag->ft_maxlen; 705 m_tag_delete(m, mtag); 706 mtag = NULL; 707 ftag = NULL; 708 709 if (extoff) { 710 int off; 711 712 /* Use protocol from next field of last extension header */ 713 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 714 ip6e_nxt), &off)) == NULL) 715 panic("pf_refragment6: short mbuf chain"); 716 proto = *(mtod(m, caddr_t) + off); 717 *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT; 718 m = *m0; 719 } else { 720 struct ip6_hdr *hdr; 721 722 hdr = mtod(m, struct ip6_hdr *); 723 proto = hdr->ip6_nxt; 724 hdr->ip6_nxt = IPPROTO_FRAGMENT; 725 } 726 727 /* 728 * Maxlen may be less than 8 iff there was only a single 729 * fragment. As it was fragmented before, add a fragment 730 * header also for a single fragment. If total or maxlen 731 * is less than 8, ip6_fragment() will return EMSGSIZE and 732 * we drop the packet. 733 */ 734 735 mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; 736 error = ip6_fragment(m, hdrlen, proto, mtu); 737 738 m = (*m0)->m_nextpkt; 739 (*m0)->m_nextpkt = NULL; 740 if (error == 0) { 741 /* The first mbuf contains the unfragmented packet */ 742 m_freem(*m0); 743 *m0 = NULL; 744 action = PF_PASS; 745 } else { 746 /* Drop expects an mbuf to free */ 747 DPFPRINTF(LOG_NOTICE, "refragment error %d", error); 748 action = PF_DROP; 749 } 750 for (t = m; m; m = t) { 751 t = m->m_nextpkt; 752 m->m_nextpkt = NULL; 753 m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; 754 if (error == 0) 755 ip6_forward(m, 0); 756 else 757 m_freem(m); 758 } 759 760 return (action); 761 } 762 #endif /* INET6 */ 763 764 int 765 pf_normalize_ip(struct pf_pdesc *pd, u_short *reason) 766 { 767 struct ip *h = mtod(pd->m, struct ip *); 768 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 769 u_int16_t mff = (ntohs(h->ip_off) & IP_MF); 770 771 if (!fragoff && !mff) 772 goto no_fragment; 773 774 /* Clear IP_DF if we're in no-df mode */ 775 if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF)) 776 h->ip_off &= htons(~IP_DF); 777 778 /* We're dealing with a fragment now. Don't allow fragments 779 * with IP_DF to enter the cache. If the flag was cleared by 780 * no-df above, fine. Otherwise drop it. 781 */ 782 if (h->ip_off & htons(IP_DF)) { 783 DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF"); 784 REASON_SET(reason, PFRES_FRAG); 785 return (PF_DROP); 786 } 787 788 if (!pf_status.reass) 789 return (PF_PASS); /* no reassembly */ 790 791 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 792 if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS) 793 return (PF_DROP); 794 if (pd->m == NULL) 795 return (PF_PASS); /* packet has been reassembled, no error */ 796 797 h = mtod(pd->m, struct ip *); 798 799 no_fragment: 800 /* At this point, only IP_DF is allowed in ip_off */ 801 if (h->ip_off & ~htons(IP_DF)) 802 h->ip_off &= htons(IP_DF); 803 804 return (PF_PASS); 805 } 806 807 #ifdef INET6 808 int 809 pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason) 810 { 811 struct ip6_frag frag; 812 813 if (pd->fragoff == 0) 814 goto no_fragment; 815 816 if (!pf_pull_hdr(pd->m, pd->fragoff, &frag, sizeof(frag), NULL, reason, 817 AF_INET6)) 818 return (PF_DROP); 819 820 if (!pf_status.reass) 821 return (PF_PASS); /* no reassembly */ 822 823 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 824 if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag), 825 pd->extoff, pd->dir, reason) != PF_PASS) 826 return (PF_DROP); 827 if (pd->m == NULL) 828 return (PF_PASS); /* packet has been reassembled, no error */ 829 830 no_fragment: 831 return (PF_PASS); 832 } 833 #endif /* INET6 */ 834 835 int 836 pf_normalize_tcp(struct pf_pdesc *pd) 837 { 838 struct tcphdr *th = pd->hdr.tcp; 839 u_short reason; 840 u_int8_t flags; 841 u_int rewrite = 0; 842 843 flags = th->th_flags; 844 if (flags & TH_SYN) { 845 /* Illegal packet */ 846 if (flags & TH_RST) 847 goto tcp_drop; 848 849 if (flags & TH_FIN) 850 flags &= ~TH_FIN; 851 } else { 852 /* Illegal packet */ 853 if (!(flags & (TH_ACK|TH_RST))) 854 goto tcp_drop; 855 } 856 857 if (!(flags & TH_ACK)) { 858 /* These flags are only valid if ACK is set */ 859 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG)) 860 goto tcp_drop; 861 } 862 863 /* If flags changed, or reserved data set, then adjust */ 864 if (flags != th->th_flags || th->th_x2 != 0) { 865 u_int16_t ov, nv; 866 867 ov = *(u_int16_t *)(&th->th_ack + 1); 868 th->th_flags = flags; 869 th->th_x2 = 0; 870 nv = *(u_int16_t *)(&th->th_ack + 1); 871 872 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0); 873 rewrite = 1; 874 } 875 876 /* Remove urgent pointer, if TH_URG is not set */ 877 if (!(flags & TH_URG) && th->th_urp) { 878 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0); 879 th->th_urp = 0; 880 rewrite = 1; 881 } 882 883 /* copy back packet headers if we sanitized */ 884 if (rewrite) 885 m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT); 886 887 return (PF_PASS); 888 889 tcp_drop: 890 REASON_SET(&reason, PFRES_NORM); 891 return (PF_DROP); 892 } 893 894 int 895 pf_normalize_tcp_init(struct pf_pdesc *pd, struct pf_state_peer *src, 896 struct pf_state_peer *dst) 897 { 898 struct tcphdr *th = pd->hdr.tcp; 899 u_int32_t tsval, tsecr; 900 u_int8_t hdr[60]; 901 u_int8_t *opt; 902 903 KASSERT(src->scrub == NULL); 904 905 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 906 if (src->scrub == NULL) 907 return (1); 908 bzero(src->scrub, sizeof(*src->scrub)); 909 910 switch (pd->af) { 911 #ifdef INET 912 case AF_INET: { 913 struct ip *h = mtod(pd->m, struct ip *); 914 src->scrub->pfss_ttl = h->ip_ttl; 915 break; 916 } 917 #endif /* INET */ 918 #ifdef INET6 919 case AF_INET6: { 920 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 921 src->scrub->pfss_ttl = h->ip6_hlim; 922 break; 923 } 924 #endif /* INET6 */ 925 } 926 927 928 /* 929 * All normalizations below are only begun if we see the start of 930 * the connections. They must all set an enabled bit in pfss_flags 931 */ 932 if ((th->th_flags & TH_SYN) == 0) 933 return (0); 934 935 936 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 937 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 938 pd->af)) { 939 /* Diddle with TCP options */ 940 int hlen; 941 opt = hdr + sizeof(struct tcphdr); 942 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 943 while (hlen >= TCPOLEN_TIMESTAMP) { 944 switch (*opt) { 945 case TCPOPT_EOL: /* FALLTHROUGH */ 946 case TCPOPT_NOP: 947 opt++; 948 hlen--; 949 break; 950 case TCPOPT_TIMESTAMP: 951 if (opt[1] >= TCPOLEN_TIMESTAMP) { 952 src->scrub->pfss_flags |= 953 PFSS_TIMESTAMP; 954 src->scrub->pfss_ts_mod = 955 htonl(arc4random()); 956 957 /* note PFSS_PAWS not set yet */ 958 memcpy(&tsval, &opt[2], 959 sizeof(u_int32_t)); 960 memcpy(&tsecr, &opt[6], 961 sizeof(u_int32_t)); 962 src->scrub->pfss_tsval0 = ntohl(tsval); 963 src->scrub->pfss_tsval = ntohl(tsval); 964 src->scrub->pfss_tsecr = ntohl(tsecr); 965 getmicrouptime(&src->scrub->pfss_last); 966 } 967 /* FALLTHROUGH */ 968 default: 969 hlen -= MAX(opt[1], 2); 970 opt += MAX(opt[1], 2); 971 break; 972 } 973 } 974 } 975 976 return (0); 977 } 978 979 void 980 pf_normalize_tcp_cleanup(struct pf_state *state) 981 { 982 if (state->src.scrub) 983 pool_put(&pf_state_scrub_pl, state->src.scrub); 984 if (state->dst.scrub) 985 pool_put(&pf_state_scrub_pl, state->dst.scrub); 986 987 /* Someday... flush the TCP segment reassembly descriptors. */ 988 } 989 990 int 991 pf_normalize_tcp_stateful(struct pf_pdesc *pd, u_short *reason, 992 struct pf_state *state, struct pf_state_peer *src, 993 struct pf_state_peer *dst, int *writeback) 994 { 995 struct tcphdr *th = pd->hdr.tcp; 996 struct timeval uptime; 997 u_int32_t tsval, tsecr; 998 u_int tsval_from_last; 999 u_int8_t hdr[60]; 1000 u_int8_t *opt; 1001 int copyback = 0; 1002 int got_ts = 0; 1003 1004 KASSERT(src->scrub || dst->scrub); 1005 1006 /* 1007 * Enforce the minimum TTL seen for this connection. Negate a common 1008 * technique to evade an intrusion detection system and confuse 1009 * firewall state code. 1010 */ 1011 switch (pd->af) { 1012 #ifdef INET 1013 case AF_INET: { 1014 if (src->scrub) { 1015 struct ip *h = mtod(pd->m, struct ip *); 1016 if (h->ip_ttl > src->scrub->pfss_ttl) 1017 src->scrub->pfss_ttl = h->ip_ttl; 1018 h->ip_ttl = src->scrub->pfss_ttl; 1019 } 1020 break; 1021 } 1022 #endif /* INET */ 1023 #ifdef INET6 1024 case AF_INET6: { 1025 if (src->scrub) { 1026 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 1027 if (h->ip6_hlim > src->scrub->pfss_ttl) 1028 src->scrub->pfss_ttl = h->ip6_hlim; 1029 h->ip6_hlim = src->scrub->pfss_ttl; 1030 } 1031 break; 1032 } 1033 #endif /* INET6 */ 1034 } 1035 1036 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1037 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1038 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1039 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 1040 pd->af)) { 1041 /* Diddle with TCP options */ 1042 int hlen; 1043 opt = hdr + sizeof(struct tcphdr); 1044 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1045 while (hlen >= TCPOLEN_TIMESTAMP) { 1046 switch (*opt) { 1047 case TCPOPT_EOL: /* FALLTHROUGH */ 1048 case TCPOPT_NOP: 1049 opt++; 1050 hlen--; 1051 break; 1052 case TCPOPT_TIMESTAMP: 1053 /* Modulate the timestamps. Can be used for 1054 * NAT detection, OS uptime determination or 1055 * reboot detection. 1056 */ 1057 1058 if (got_ts) { 1059 /* Huh? Multiple timestamps!? */ 1060 if (pf_status.debug >= LOG_NOTICE) { 1061 log(LOG_NOTICE, 1062 "pf: %s: multiple TS??", 1063 __func__); 1064 pf_print_state(state); 1065 addlog("\n"); 1066 } 1067 REASON_SET(reason, PFRES_TS); 1068 return (PF_DROP); 1069 } 1070 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1071 memcpy(&tsval, &opt[2], 1072 sizeof(u_int32_t)); 1073 if (tsval && src->scrub && 1074 (src->scrub->pfss_flags & 1075 PFSS_TIMESTAMP)) { 1076 tsval = ntohl(tsval); 1077 pf_change_a(&opt[2], 1078 &th->th_sum, 1079 htonl(tsval + 1080 src->scrub->pfss_ts_mod), 1081 0); 1082 copyback = 1; 1083 } 1084 1085 /* Modulate TS reply iff valid (!0) */ 1086 memcpy(&tsecr, &opt[6], 1087 sizeof(u_int32_t)); 1088 if (tsecr && dst->scrub && 1089 (dst->scrub->pfss_flags & 1090 PFSS_TIMESTAMP)) { 1091 tsecr = ntohl(tsecr) 1092 - dst->scrub->pfss_ts_mod; 1093 pf_change_a(&opt[6], 1094 &th->th_sum, htonl(tsecr), 1095 0); 1096 copyback = 1; 1097 } 1098 got_ts = 1; 1099 } 1100 /* FALLTHROUGH */ 1101 default: 1102 hlen -= MAX(opt[1], 2); 1103 opt += MAX(opt[1], 2); 1104 break; 1105 } 1106 } 1107 if (copyback) { 1108 /* Copyback the options, caller copys back header */ 1109 *writeback = 1; 1110 m_copyback(pd->m, pd->off + sizeof(struct tcphdr), 1111 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1112 sizeof(struct tcphdr), M_NOWAIT); 1113 } 1114 } 1115 1116 1117 /* 1118 * Must invalidate PAWS checks on connections idle for too long. 1119 * The fastest allowed timestamp clock is 1ms. That turns out to 1120 * be about 24 days before it wraps. XXX Right now our lowerbound 1121 * TS echo check only works for the first 12 days of a connection 1122 * when the TS has exhausted half its 32bit space 1123 */ 1124 #define TS_MAX_IDLE (24*24*60*60) 1125 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1126 1127 getmicrouptime(&uptime); 1128 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1129 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1130 time_uptime - state->creation > TS_MAX_CONN)) { 1131 if (pf_status.debug >= LOG_NOTICE) { 1132 log(LOG_NOTICE, "pf: src idled out of PAWS "); 1133 pf_print_state(state); 1134 addlog("\n"); 1135 } 1136 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS) 1137 | PFSS_PAWS_IDLED; 1138 } 1139 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1140 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1141 if (pf_status.debug >= LOG_NOTICE) { 1142 log(LOG_NOTICE, "pf: dst idled out of PAWS "); 1143 pf_print_state(state); 1144 addlog("\n"); 1145 } 1146 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS) 1147 | PFSS_PAWS_IDLED; 1148 } 1149 1150 if (got_ts && src->scrub && dst->scrub && 1151 (src->scrub->pfss_flags & PFSS_PAWS) && 1152 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1153 /* Validate that the timestamps are "in-window". 1154 * RFC1323 describes TCP Timestamp options that allow 1155 * measurement of RTT (round trip time) and PAWS 1156 * (protection against wrapped sequence numbers). PAWS 1157 * gives us a set of rules for rejecting packets on 1158 * long fat pipes (packets that were somehow delayed 1159 * in transit longer than the time it took to send the 1160 * full TCP sequence space of 4Gb). We can use these 1161 * rules and infer a few others that will let us treat 1162 * the 32bit timestamp and the 32bit echoed timestamp 1163 * as sequence numbers to prevent a blind attacker from 1164 * inserting packets into a connection. 1165 * 1166 * RFC1323 tells us: 1167 * - The timestamp on this packet must be greater than 1168 * or equal to the last value echoed by the other 1169 * endpoint. The RFC says those will be discarded 1170 * since it is a dup that has already been acked. 1171 * This gives us a lowerbound on the timestamp. 1172 * timestamp >= other last echoed timestamp 1173 * - The timestamp will be less than or equal to 1174 * the last timestamp plus the time between the 1175 * last packet and now. The RFC defines the max 1176 * clock rate as 1ms. We will allow clocks to be 1177 * up to 10% fast and will allow a total difference 1178 * or 30 seconds due to a route change. And this 1179 * gives us an upperbound on the timestamp. 1180 * timestamp <= last timestamp + max ticks 1181 * We have to be careful here. Windows will send an 1182 * initial timestamp of zero and then initialize it 1183 * to a random value after the 3whs; presumably to 1184 * avoid a DoS by having to call an expensive RNG 1185 * during a SYN flood. Proof MS has at least one 1186 * good security geek. 1187 * 1188 * - The TCP timestamp option must also echo the other 1189 * endpoints timestamp. The timestamp echoed is the 1190 * one carried on the earliest unacknowledged segment 1191 * on the left edge of the sequence window. The RFC 1192 * states that the host will reject any echoed 1193 * timestamps that were larger than any ever sent. 1194 * This gives us an upperbound on the TS echo. 1195 * tescr <= largest_tsval 1196 * - The lowerbound on the TS echo is a little more 1197 * tricky to determine. The other endpoint's echoed 1198 * values will not decrease. But there may be 1199 * network conditions that re-order packets and 1200 * cause our view of them to decrease. For now the 1201 * only lowerbound we can safely determine is that 1202 * the TS echo will never be less than the original 1203 * TS. XXX There is probably a better lowerbound. 1204 * Remove TS_MAX_CONN with better lowerbound check. 1205 * tescr >= other original TS 1206 * 1207 * It is also important to note that the fastest 1208 * timestamp clock of 1ms will wrap its 32bit space in 1209 * 24 days. So we just disable TS checking after 24 1210 * days of idle time. We actually must use a 12d 1211 * connection limit until we can come up with a better 1212 * lowerbound to the TS echo check. 1213 */ 1214 struct timeval delta_ts; 1215 int ts_fudge; 1216 1217 1218 /* 1219 * PFTM_TS_DIFF is how many seconds of leeway to allow 1220 * a host's timestamp. This can happen if the previous 1221 * packet got delayed in transit for much longer than 1222 * this packet. 1223 */ 1224 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1225 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1226 1227 1228 /* Calculate max ticks since the last timestamp */ 1229 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1230 #define TS_MICROSECS 1000000 /* microseconds per second */ 1231 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1232 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1233 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1234 1235 1236 if ((src->state >= TCPS_ESTABLISHED && 1237 dst->state >= TCPS_ESTABLISHED) && 1238 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1239 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1240 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1241 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1242 /* Bad RFC1323 implementation or an insertion attack. 1243 * 1244 * - Solaris 2.6 and 2.7 are known to send another ACK 1245 * after the FIN,FIN|ACK,ACK closing that carries 1246 * an old timestamp. 1247 */ 1248 1249 DPFPRINTF(LOG_NOTICE, "Timestamp failed %c%c%c%c", 1250 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1251 SEQ_GT(tsval, src->scrub->pfss_tsval + 1252 tsval_from_last) ? '1' : ' ', 1253 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1254 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '); 1255 DPFPRINTF(LOG_NOTICE, 1256 " tsval: %u tsecr: %u +ticks: %u " 1257 "idle: %lus %lums", 1258 tsval, tsecr, tsval_from_last, delta_ts.tv_sec, 1259 delta_ts.tv_usec / 1000); 1260 DPFPRINTF(LOG_NOTICE, 1261 " src->tsval: %u tsecr: %u", 1262 src->scrub->pfss_tsval, src->scrub->pfss_tsecr); 1263 DPFPRINTF(LOG_NOTICE, 1264 " dst->tsval: %u tsecr: %u tsval0: %u", 1265 dst->scrub->pfss_tsval, dst->scrub->pfss_tsecr, 1266 dst->scrub->pfss_tsval0); 1267 if (pf_status.debug >= LOG_NOTICE) { 1268 log(LOG_NOTICE, "pf: "); 1269 pf_print_state(state); 1270 pf_print_flags(th->th_flags); 1271 addlog("\n"); 1272 } 1273 REASON_SET(reason, PFRES_TS); 1274 return (PF_DROP); 1275 } 1276 1277 /* XXX I'd really like to require tsecr but it's optional */ 1278 1279 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1280 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1281 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1282 src->scrub && dst->scrub && 1283 (src->scrub->pfss_flags & PFSS_PAWS) && 1284 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1285 /* Didn't send a timestamp. Timestamps aren't really useful 1286 * when: 1287 * - connection opening or closing (often not even sent). 1288 * but we must not let an attacker to put a FIN on a 1289 * data packet to sneak it through our ESTABLISHED check. 1290 * - on a TCP reset. RFC suggests not even looking at TS. 1291 * - on an empty ACK. The TS will not be echoed so it will 1292 * probably not help keep the RTT calculation in sync and 1293 * there isn't as much danger when the sequence numbers 1294 * got wrapped. So some stacks don't include TS on empty 1295 * ACKs :-( 1296 * 1297 * To minimize the disruption to mostly RFC1323 conformant 1298 * stacks, we will only require timestamps on data packets. 1299 * 1300 * And what do ya know, we cannot require timestamps on data 1301 * packets. There appear to be devices that do legitimate 1302 * TCP connection hijacking. There are HTTP devices that allow 1303 * a 3whs (with timestamps) and then buffer the HTTP request. 1304 * If the intermediate device has the HTTP response cache, it 1305 * will spoof the response but not bother timestamping its 1306 * packets. So we can look for the presence of a timestamp in 1307 * the first data packet and if there, require it in all future 1308 * packets. 1309 */ 1310 1311 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1312 /* 1313 * Hey! Someone tried to sneak a packet in. Or the 1314 * stack changed its RFC1323 behavior?!?! 1315 */ 1316 if (pf_status.debug >= LOG_NOTICE) { 1317 log(LOG_NOTICE, 1318 "pf: did not receive expected RFC1323 " 1319 "timestamp"); 1320 pf_print_state(state); 1321 pf_print_flags(th->th_flags); 1322 addlog("\n"); 1323 } 1324 REASON_SET(reason, PFRES_TS); 1325 return (PF_DROP); 1326 } 1327 } 1328 1329 1330 /* 1331 * We will note if a host sends his data packets with or without 1332 * timestamps. And require all data packets to contain a timestamp 1333 * if the first does. PAWS implicitly requires that all data packets be 1334 * timestamped. But I think there are middle-man devices that hijack 1335 * TCP streams immediately after the 3whs and don't timestamp their 1336 * packets (seen in a WWW accelerator or cache). 1337 */ 1338 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1339 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1340 if (got_ts) 1341 src->scrub->pfss_flags |= PFSS_DATA_TS; 1342 else { 1343 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1344 if (pf_status.debug >= LOG_NOTICE && dst->scrub && 1345 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1346 /* Don't warn if other host rejected RFC1323 */ 1347 log(LOG_NOTICE, 1348 "pf: broken RFC1323 stack did not " 1349 "timestamp data packet. Disabled PAWS " 1350 "security."); 1351 pf_print_state(state); 1352 pf_print_flags(th->th_flags); 1353 addlog("\n"); 1354 } 1355 } 1356 } 1357 1358 1359 /* 1360 * Update PAWS values 1361 */ 1362 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1363 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1364 getmicrouptime(&src->scrub->pfss_last); 1365 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1366 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1367 src->scrub->pfss_tsval = tsval; 1368 1369 if (tsecr) { 1370 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1371 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1372 src->scrub->pfss_tsecr = tsecr; 1373 1374 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1375 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1376 src->scrub->pfss_tsval0 == 0)) { 1377 /* tsval0 MUST be the lowest timestamp */ 1378 src->scrub->pfss_tsval0 = tsval; 1379 } 1380 1381 /* Only fully initialized after a TS gets echoed */ 1382 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1383 src->scrub->pfss_flags |= PFSS_PAWS; 1384 } 1385 } 1386 1387 /* I have a dream.... TCP segment reassembly.... */ 1388 return (0); 1389 } 1390 1391 int 1392 pf_normalize_mss(struct pf_pdesc *pd, u_int16_t maxmss) 1393 { 1394 struct tcphdr *th = pd->hdr.tcp; 1395 u_int16_t mss; 1396 int thoff; 1397 int opt, cnt, optlen = 0; 1398 u_char opts[MAX_TCPOPTLEN]; 1399 u_char *optp = opts; 1400 1401 thoff = th->th_off << 2; 1402 cnt = thoff - sizeof(struct tcphdr); 1403 1404 if (cnt > 0 && !pf_pull_hdr(pd->m, pd->off + sizeof(*th), opts, cnt, 1405 NULL, NULL, pd->af)) 1406 return (0); 1407 1408 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1409 opt = optp[0]; 1410 if (opt == TCPOPT_EOL) 1411 break; 1412 if (opt == TCPOPT_NOP) 1413 optlen = 1; 1414 else { 1415 if (cnt < 2) 1416 break; 1417 optlen = optp[1]; 1418 if (optlen < 2 || optlen > cnt) 1419 break; 1420 } 1421 switch (opt) { 1422 case TCPOPT_MAXSEG: 1423 bcopy((caddr_t)(optp + 2), (caddr_t)&mss, 2); 1424 if (ntohs(mss) > maxmss) { 1425 th->th_sum = pf_cksum_fixup(th->th_sum, 1426 mss, htons(maxmss), 0); 1427 mss = htons(maxmss); 1428 m_copyback(pd->m, 1429 pd->off + sizeof(*th) + optp + 2 - opts, 1430 2, &mss, M_NOWAIT); 1431 m_copyback(pd->m, pd->off, sizeof(*th), th, 1432 M_NOWAIT); 1433 } 1434 break; 1435 default: 1436 break; 1437 } 1438 } 1439 1440 1441 1442 return (0); 1443 } 1444 1445 void 1446 pf_scrub(struct mbuf *m, u_int16_t flags, sa_family_t af, u_int8_t min_ttl, 1447 u_int8_t tos) 1448 { 1449 struct ip *h = mtod(m, struct ip *); 1450 #ifdef INET6 1451 struct ip6_hdr *h6 = mtod(m, struct ip6_hdr *); 1452 #endif 1453 1454 /* Clear IP_DF if no-df was requested */ 1455 if (flags & PFSTATE_NODF && af == AF_INET && h->ip_off & htons(IP_DF)) 1456 h->ip_off &= htons(~IP_DF); 1457 1458 /* Enforce a minimum ttl, may cause endless packet loops */ 1459 if (min_ttl && af == AF_INET && h->ip_ttl < min_ttl) 1460 h->ip_ttl = min_ttl; 1461 #ifdef INET6 1462 if (min_ttl && af == AF_INET6 && h6->ip6_hlim < min_ttl) 1463 h6->ip6_hlim = min_ttl; 1464 #endif 1465 1466 /* Enforce tos */ 1467 if (flags & PFSTATE_SETTOS) { 1468 if (af == AF_INET) 1469 h->ip_tos = tos; 1470 #ifdef INET6 1471 if (af == AF_INET6) { 1472 /* drugs are unable to explain such idiocy */ 1473 h6->ip6_flow &= ~htonl(0x0ff00000); 1474 h6->ip6_flow |= htonl(((u_int32_t)tos) << 20); 1475 } 1476 #endif 1477 } 1478 1479 /* random-id, but not for fragments */ 1480 if (flags & PFSTATE_RANDOMID && af == AF_INET && 1481 !(h->ip_off & ~htons(IP_DF))) 1482 h->ip_id = htons(ip_randomid()); 1483 } 1484