1 /* $OpenBSD: pf_norm.c,v 1.178 2015/05/05 23:27:47 chris Exp $ */ 2 3 /* 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2009 Henning Brauer <henning@openbsd.org> 6 * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include "pflog.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/mbuf.h> 35 #include <sys/filio.h> 36 #include <sys/fcntl.h> 37 #include <sys/socket.h> 38 #include <sys/kernel.h> 39 #include <sys/time.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 43 #include <netinet/in.h> 44 #include <netinet/ip.h> 45 #include <netinet/ip_var.h> 46 #include <netinet/tcp.h> 47 #include <netinet/tcp_seq.h> 48 #include <netinet/tcp_fsm.h> 49 #include <netinet/udp.h> 50 #include <netinet/ip_icmp.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/if_types.h> 55 #include <net/if_pflog.h> 56 57 #ifdef INET6 58 #include <netinet/ip6.h> 59 #include <netinet6/ip6_var.h> 60 #endif /* INET6 */ 61 62 #include <net/pfvar.h> 63 64 struct pf_frent { 65 TAILQ_ENTRY(pf_frent) fr_next; 66 struct mbuf *fe_m; 67 u_int16_t fe_hdrlen; /* ipv4 header length with ip options 68 ipv6, extension, fragment header */ 69 u_int16_t fe_extoff; /* last extension header offset or 0 */ 70 u_int16_t fe_len; /* fragment length */ 71 u_int16_t fe_off; /* fragment offset */ 72 u_int16_t fe_mff; /* more fragment flag */ 73 }; 74 75 /* keep synced with struct pf_fragment, used in RB_FIND */ 76 struct pf_fragment_cmp { 77 struct pf_addr fr_src; 78 struct pf_addr fr_dst; 79 u_int32_t fr_id; 80 sa_family_t fr_af; 81 u_int8_t fr_proto; 82 u_int8_t fr_direction; 83 }; 84 85 struct pf_fragment { 86 struct pf_addr fr_src; /* ip source address */ 87 struct pf_addr fr_dst; /* ip destination address */ 88 u_int32_t fr_id; /* fragment id for reassemble */ 89 sa_family_t fr_af; /* address family */ 90 u_int8_t fr_proto; /* protocol of this fragment */ 91 u_int8_t fr_direction; /* pf packet direction */ 92 93 RB_ENTRY(pf_fragment) fr_entry; 94 TAILQ_ENTRY(pf_fragment) frag_next; 95 TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; 96 int32_t fr_timeout; 97 u_int16_t fr_maxlen; /* maximum length of single fragment */ 98 }; 99 100 struct pf_fragment_tag { 101 u_int16_t ft_hdrlen; /* header length of reassembled pkt */ 102 u_int16_t ft_extoff; /* last extension header offset or 0 */ 103 u_int16_t ft_maxlen; /* maximum fragment payload length */ 104 }; 105 106 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 107 108 static __inline int pf_frag_compare(struct pf_fragment *, 109 struct pf_fragment *); 110 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 111 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 112 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 113 114 /* Private prototypes */ 115 void pf_flush_fragments(void); 116 void pf_free_fragment(struct pf_fragment *); 117 struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *, 118 struct pf_frag_tree *); 119 struct pf_frent *pf_create_fragment(u_short *); 120 struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, 121 struct pf_frent *, u_short *); 122 int pf_isfull_fragment(struct pf_fragment *); 123 struct mbuf *pf_join_fragment(struct pf_fragment *); 124 int pf_reassemble(struct mbuf **, int, u_short *); 125 #ifdef INET6 126 int pf_reassemble6(struct mbuf **, struct ip6_frag *, 127 u_int16_t, u_int16_t, int, u_short *); 128 #endif /* INET6 */ 129 130 /* Globals */ 131 struct pool pf_frent_pl, pf_frag_pl; 132 struct pool pf_state_scrub_pl; 133 int pf_nfrents; 134 135 void 136 pf_normalize_init(void) 137 { 138 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 139 NULL); 140 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 141 NULL); 142 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 143 "pfstscr", NULL); 144 145 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 146 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 147 148 TAILQ_INIT(&pf_fragqueue); 149 } 150 151 static __inline int 152 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 153 { 154 int diff; 155 156 if ((diff = a->fr_id - b->fr_id) != 0) 157 return (diff); 158 if ((diff = a->fr_proto - b->fr_proto) != 0) 159 return (diff); 160 if ((diff = a->fr_af - b->fr_af) != 0) 161 return (diff); 162 if ((diff = pf_addr_compare(&a->fr_src, &b->fr_src, a->fr_af)) != 0) 163 return (diff); 164 if ((diff = pf_addr_compare(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) 165 return (diff); 166 167 return (0); 168 } 169 170 void 171 pf_purge_expired_fragments(void) 172 { 173 struct pf_fragment *frag; 174 int32_t expire; 175 176 expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG]; 177 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 178 if (frag->fr_timeout > expire) 179 break; 180 DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); 181 pf_free_fragment(frag); 182 } 183 } 184 185 /* 186 * Try to flush old fragments to make space for new ones 187 */ 188 void 189 pf_flush_fragments(void) 190 { 191 struct pf_fragment *frag; 192 int goal; 193 194 goal = pf_nfrents * 9 / 10; 195 DPFPRINTF(LOG_NOTICE, "trying to free > %d frents", pf_nfrents - goal); 196 while (goal < pf_nfrents) { 197 if ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) == NULL) 198 break; 199 pf_free_fragment(frag); 200 } 201 } 202 203 /* 204 * Remove a fragment from the fragment queue, free its fragment entries, 205 * and free the fragment itself. 206 */ 207 void 208 pf_free_fragment(struct pf_fragment *frag) 209 { 210 struct pf_frent *frent; 211 212 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 213 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 214 215 /* Free all fragment entries */ 216 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 217 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 218 m_freem(frent->fe_m); 219 pool_put(&pf_frent_pl, frent); 220 pf_nfrents--; 221 } 222 pool_put(&pf_frag_pl, frag); 223 } 224 225 struct pf_fragment * 226 pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) 227 { 228 struct pf_fragment *frag; 229 230 frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); 231 if (frag != NULL) { 232 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 233 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 234 } 235 236 return (frag); 237 } 238 239 struct pf_frent * 240 pf_create_fragment(u_short *reason) 241 { 242 struct pf_frent *frent; 243 244 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 245 if (frent == NULL) { 246 pf_flush_fragments(); 247 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 248 if (frent == NULL) { 249 REASON_SET(reason, PFRES_MEMORY); 250 return (NULL); 251 } 252 } 253 pf_nfrents++; 254 255 return (frent); 256 } 257 258 struct pf_fragment * 259 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, 260 u_short *reason) 261 { 262 struct pf_frent *after, *next, *prev; 263 struct pf_fragment *frag; 264 u_int16_t total; 265 266 /* No empty fragments */ 267 if (frent->fe_len == 0) { 268 DPFPRINTF(LOG_NOTICE, "bad fragment: len 0"); 269 goto bad_fragment; 270 } 271 272 /* All fragments are 8 byte aligned */ 273 if (frent->fe_mff && (frent->fe_len & 0x7)) { 274 DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d", 275 frent->fe_len); 276 goto bad_fragment; 277 } 278 279 /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */ 280 if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { 281 DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d", 282 frent->fe_off + frent->fe_len); 283 goto bad_fragment; 284 } 285 286 DPFPRINTF(LOG_NOTICE, key->fr_af == AF_INET ? 287 "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", 288 key->fr_id, frent->fe_off, frent->fe_off + frent->fe_len); 289 290 /* Fully buffer all of the fragments in this fragment queue */ 291 frag = pf_find_fragment(key, &pf_frag_tree); 292 293 /* Create a new reassembly queue for this packet */ 294 if (frag == NULL) { 295 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 296 if (frag == NULL) { 297 pf_flush_fragments(); 298 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 299 if (frag == NULL) { 300 REASON_SET(reason, PFRES_MEMORY); 301 goto drop_fragment; 302 } 303 } 304 305 *(struct pf_fragment_cmp *)frag = *key; 306 TAILQ_INIT(&frag->fr_queue); 307 frag->fr_timeout = time_uptime; 308 frag->fr_maxlen = frent->fe_len; 309 310 RB_INSERT(pf_frag_tree, &pf_frag_tree, frag); 311 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 312 313 /* We do not have a previous fragment */ 314 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 315 316 return (frag); 317 } 318 319 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 320 321 /* Remember maximum fragment len for refragmentation */ 322 if (frent->fe_len > frag->fr_maxlen) 323 frag->fr_maxlen = frent->fe_len; 324 325 /* Maximum data we have seen already */ 326 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 327 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 328 329 /* Non terminal fragments must have more fragments flag */ 330 if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) 331 goto bad_fragment; 332 333 /* Check if we saw the last fragment already */ 334 if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { 335 if (frent->fe_off + frent->fe_len > total || 336 (frent->fe_off + frent->fe_len == total && frent->fe_mff)) 337 goto bad_fragment; 338 } else { 339 if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) 340 goto bad_fragment; 341 } 342 343 /* Find a fragment after the current one */ 344 prev = NULL; 345 TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { 346 if (after->fe_off > frent->fe_off) 347 break; 348 prev = after; 349 } 350 351 KASSERT(prev != NULL || after != NULL); 352 353 if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { 354 u_int16_t precut; 355 356 #ifdef INET6 357 if (frag->fr_af == AF_INET6) 358 goto free_fragment; 359 #endif /* INET6 */ 360 361 precut = prev->fe_off + prev->fe_len - frent->fe_off; 362 if (precut >= frent->fe_len) { 363 DPFPRINTF(LOG_NOTICE, "new frag overlapped"); 364 goto drop_fragment; 365 } 366 DPFPRINTF(LOG_NOTICE, "frag head overlap %d", precut); 367 m_adj(frent->fe_m, precut); 368 frent->fe_off += precut; 369 frent->fe_len -= precut; 370 } 371 372 for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; 373 after = next) { 374 u_int16_t aftercut; 375 376 #ifdef INET6 377 if (frag->fr_af == AF_INET6) 378 goto free_fragment; 379 #endif /* INET6 */ 380 381 aftercut = frent->fe_off + frent->fe_len - after->fe_off; 382 if (aftercut < after->fe_len) { 383 DPFPRINTF(LOG_NOTICE, "frag tail overlap %d", aftercut); 384 m_adj(after->fe_m, aftercut); 385 after->fe_off += aftercut; 386 after->fe_len -= aftercut; 387 break; 388 } 389 390 /* This fragment is completely overlapped, lose it */ 391 DPFPRINTF(LOG_NOTICE, "old frag overlapped"); 392 next = TAILQ_NEXT(after, fr_next); 393 TAILQ_REMOVE(&frag->fr_queue, after, fr_next); 394 m_freem(after->fe_m); 395 pool_put(&pf_frent_pl, after); 396 pf_nfrents--; 397 } 398 399 if (prev == NULL) 400 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 401 else 402 TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); 403 404 return (frag); 405 406 #ifdef INET6 407 free_fragment: 408 /* 409 * RFC 5722, Errata 3089: When reassembling an IPv6 datagram, if one 410 * or more its constituent fragments is determined to be an overlapping 411 * fragment, the entire datagram (and any constituent fragments) MUST 412 * be silently discarded. 413 */ 414 DPFPRINTF(LOG_NOTICE, "flush overlapping fragments"); 415 pf_free_fragment(frag); 416 #endif /* INET6 */ 417 bad_fragment: 418 REASON_SET(reason, PFRES_FRAG); 419 drop_fragment: 420 pool_put(&pf_frent_pl, frent); 421 pf_nfrents--; 422 return (NULL); 423 } 424 425 int 426 pf_isfull_fragment(struct pf_fragment *frag) 427 { 428 struct pf_frent *frent, *next; 429 u_int16_t off, total; 430 431 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 432 433 /* Check if we are completely reassembled */ 434 if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) 435 return (0); 436 437 /* Maximum data we have seen already */ 438 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 439 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 440 441 /* Check if we have all the data */ 442 off = 0; 443 for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { 444 next = TAILQ_NEXT(frent, fr_next); 445 off += frent->fe_len; 446 if (off < total && (next == NULL || next->fe_off != off)) { 447 DPFPRINTF(LOG_NOTICE, 448 "missing fragment at %d, next %d, total %d", 449 off, next == NULL ? -1 : next->fe_off, total); 450 return (0); 451 } 452 } 453 DPFPRINTF(LOG_NOTICE, "%d < %d?", off, total); 454 if (off < total) 455 return (0); 456 KASSERT(off == total); 457 458 return (1); 459 } 460 461 struct mbuf * 462 pf_join_fragment(struct pf_fragment *frag) 463 { 464 struct mbuf *m, *m2; 465 struct pf_frent *frent; 466 467 frent = TAILQ_FIRST(&frag->fr_queue); 468 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 469 470 m = frent->fe_m; 471 /* Strip off any trailing bytes */ 472 if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len) 473 m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len); 474 /* Magic from ip_input */ 475 m2 = m->m_next; 476 m->m_next = NULL; 477 m_cat(m, m2); 478 pool_put(&pf_frent_pl, frent); 479 pf_nfrents--; 480 481 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 482 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 483 m2 = frent->fe_m; 484 /* Strip off ip header */ 485 m_adj(m2, frent->fe_hdrlen); 486 /* Strip off any trailing bytes */ 487 if (frent->fe_len < m2->m_pkthdr.len) 488 m_adj(m2, frent->fe_len - m2->m_pkthdr.len); 489 pool_put(&pf_frent_pl, frent); 490 pf_nfrents--; 491 m_cat(m, m2); 492 } 493 494 /* Remove from fragment queue */ 495 pf_free_fragment(frag); 496 497 return (m); 498 } 499 500 int 501 pf_reassemble(struct mbuf **m0, int dir, u_short *reason) 502 { 503 struct mbuf *m = *m0; 504 struct ip *ip = mtod(m, struct ip *); 505 struct pf_frent *frent; 506 struct pf_fragment *frag; 507 struct pf_fragment_cmp key; 508 u_int16_t total, hdrlen; 509 510 /* Get an entry for the fragment queue */ 511 if ((frent = pf_create_fragment(reason)) == NULL) 512 return (PF_DROP); 513 514 frent->fe_m = m; 515 frent->fe_hdrlen = ip->ip_hl << 2; 516 frent->fe_extoff = 0; 517 frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 518 frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 519 frent->fe_mff = ntohs(ip->ip_off) & IP_MF; 520 521 key.fr_src.v4 = ip->ip_src; 522 key.fr_dst.v4 = ip->ip_dst; 523 key.fr_af = AF_INET; 524 key.fr_proto = ip->ip_p; 525 key.fr_id = ip->ip_id; 526 key.fr_direction = dir; 527 528 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 529 return (PF_DROP); 530 531 /* The mbuf is part of the fragment entry, no direct free or access */ 532 m = *m0 = NULL; 533 534 if (!pf_isfull_fragment(frag)) 535 return (PF_PASS); /* drop because *m0 is NULL, no error */ 536 537 /* We have all the data */ 538 frent = TAILQ_FIRST(&frag->fr_queue); 539 KASSERT(frent != NULL); 540 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 541 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 542 hdrlen = frent->fe_hdrlen; 543 m = *m0 = pf_join_fragment(frag); 544 frag = NULL; 545 546 if (m->m_flags & M_PKTHDR) { 547 int plen = 0; 548 for (m = *m0; m; m = m->m_next) 549 plen += m->m_len; 550 m = *m0; 551 m->m_pkthdr.len = plen; 552 } 553 554 ip = mtod(m, struct ip *); 555 ip->ip_len = htons(hdrlen + total); 556 ip->ip_off &= ~(IP_MF|IP_OFFMASK); 557 558 if (hdrlen + total > IP_MAXPACKET) { 559 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 560 ip->ip_len = 0; 561 REASON_SET(reason, PFRES_SHORT); 562 /* PF_DROP requires a valid mbuf *m0 in pf_test() */ 563 return (PF_DROP); 564 } 565 566 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip->ip_len)); 567 return (PF_PASS); 568 } 569 570 #ifdef INET6 571 int 572 pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, 573 u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason) 574 { 575 struct mbuf *m = *m0; 576 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 577 struct m_tag *mtag; 578 struct pf_fragment_tag *ftag; 579 struct pf_frent *frent; 580 struct pf_fragment *frag; 581 struct pf_fragment_cmp key; 582 int off; 583 u_int16_t total, maxlen; 584 u_int8_t proto; 585 586 /* Get an entry for the fragment queue */ 587 if ((frent = pf_create_fragment(reason)) == NULL) 588 return (PF_DROP); 589 590 frent->fe_m = m; 591 frent->fe_hdrlen = hdrlen; 592 frent->fe_extoff = extoff; 593 frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; 594 frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 595 frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; 596 597 key.fr_src.v6 = ip6->ip6_src; 598 key.fr_dst.v6 = ip6->ip6_dst; 599 key.fr_af = AF_INET6; 600 /* Only the first fragment's protocol is relevant */ 601 key.fr_proto = 0; 602 key.fr_id = fraghdr->ip6f_ident; 603 key.fr_direction = dir; 604 605 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 606 return (PF_DROP); 607 608 /* The mbuf is part of the fragment entry, no direct free or access */ 609 m = *m0 = NULL; 610 611 if (!pf_isfull_fragment(frag)) 612 return (PF_PASS); /* drop because *m0 is NULL, no error */ 613 614 /* We have all the data */ 615 extoff = frent->fe_extoff; 616 maxlen = frag->fr_maxlen; 617 frent = TAILQ_FIRST(&frag->fr_queue); 618 KASSERT(frent != NULL); 619 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 620 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 621 hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); 622 m = *m0 = pf_join_fragment(frag); 623 frag = NULL; 624 625 /* Take protocol from first fragment header */ 626 if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), 627 &off)) == NULL) 628 panic("pf_reassemble6: short mbuf chain"); 629 proto = *(mtod(m, caddr_t) + off); 630 m = *m0; 631 632 /* Delete frag6 header */ 633 if (frag6_deletefraghdr(m, hdrlen) != 0) 634 goto fail; 635 636 if (m->m_flags & M_PKTHDR) { 637 int plen = 0; 638 for (m = *m0; m; m = m->m_next) 639 plen += m->m_len; 640 m = *m0; 641 m->m_pkthdr.len = plen; 642 } 643 644 if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct 645 pf_fragment_tag), M_NOWAIT)) == NULL) 646 goto fail; 647 ftag = (struct pf_fragment_tag *)(mtag + 1); 648 ftag->ft_hdrlen = hdrlen; 649 ftag->ft_extoff = extoff; 650 ftag->ft_maxlen = maxlen; 651 m_tag_prepend(m, mtag); 652 653 ip6 = mtod(m, struct ip6_hdr *); 654 ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); 655 if (extoff) { 656 /* Write protocol into next field of last extension header */ 657 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 658 ip6e_nxt), &off)) == NULL) 659 panic("pf_reassemble6: short mbuf chain"); 660 *(mtod(m, caddr_t) + off) = proto; 661 m = *m0; 662 } else 663 ip6->ip6_nxt = proto; 664 665 if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { 666 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 667 ip6->ip6_plen = 0; 668 REASON_SET(reason, PFRES_SHORT); 669 /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ 670 return (PF_DROP); 671 } 672 673 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); 674 return (PF_PASS); 675 676 fail: 677 REASON_SET(reason, PFRES_MEMORY); 678 /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ 679 return (PF_DROP); 680 } 681 682 int 683 pf_refragment6(struct mbuf **m0, struct m_tag *mtag, int dir) 684 { 685 struct mbuf *m = *m0, *t; 686 struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); 687 u_int32_t mtu; 688 u_int16_t hdrlen, extoff, maxlen; 689 u_int8_t proto; 690 int error, action; 691 692 hdrlen = ftag->ft_hdrlen; 693 extoff = ftag->ft_extoff; 694 maxlen = ftag->ft_maxlen; 695 m_tag_delete(m, mtag); 696 mtag = NULL; 697 ftag = NULL; 698 699 /* Checksum must be calculated for the whole packet */ 700 in6_proto_cksum_out(m, NULL); 701 702 if (extoff) { 703 int off; 704 705 /* Use protocol from next field of last extension header */ 706 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 707 ip6e_nxt), &off)) == NULL) 708 panic("pf_refragment6: short mbuf chain"); 709 proto = *(mtod(m, caddr_t) + off); 710 *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT; 711 m = *m0; 712 } else { 713 struct ip6_hdr *hdr; 714 715 hdr = mtod(m, struct ip6_hdr *); 716 proto = hdr->ip6_nxt; 717 hdr->ip6_nxt = IPPROTO_FRAGMENT; 718 } 719 720 /* 721 * Maxlen may be less than 8 iff there was only a single 722 * fragment. As it was fragmented before, add a fragment 723 * header also for a single fragment. If total or maxlen 724 * is less than 8, ip6_fragment() will return EMSGSIZE and 725 * we drop the packet. 726 */ 727 mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; 728 error = ip6_fragment(m, hdrlen, proto, mtu); 729 730 m = (*m0)->m_nextpkt; 731 (*m0)->m_nextpkt = NULL; 732 if (error == 0) { 733 /* The first mbuf contains the unfragmented packet */ 734 m_freem(*m0); 735 *m0 = NULL; 736 action = PF_PASS; 737 } else { 738 /* Drop expects an mbuf to free */ 739 DPFPRINTF(LOG_NOTICE, "refragment error %d", error); 740 action = PF_DROP; 741 } 742 for (t = m; m; m = t) { 743 t = m->m_nextpkt; 744 m->m_nextpkt = NULL; 745 m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; 746 if (error == 0) 747 ip6_forward(m, 0); 748 else 749 m_freem(m); 750 } 751 752 return (action); 753 } 754 #endif /* INET6 */ 755 756 int 757 pf_normalize_ip(struct pf_pdesc *pd, u_short *reason) 758 { 759 struct ip *h = mtod(pd->m, struct ip *); 760 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 761 u_int16_t mff = (ntohs(h->ip_off) & IP_MF); 762 763 if (!fragoff && !mff) 764 goto no_fragment; 765 766 /* Clear IP_DF if we're in no-df mode */ 767 if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF)) 768 h->ip_off &= htons(~IP_DF); 769 770 /* We're dealing with a fragment now. Don't allow fragments 771 * with IP_DF to enter the cache. If the flag was cleared by 772 * no-df above, fine. Otherwise drop it. 773 */ 774 if (h->ip_off & htons(IP_DF)) { 775 DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF"); 776 REASON_SET(reason, PFRES_FRAG); 777 return (PF_DROP); 778 } 779 780 if (!pf_status.reass) 781 return (PF_PASS); /* no reassembly */ 782 783 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 784 if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS) 785 return (PF_DROP); 786 if (pd->m == NULL) 787 return (PF_PASS); /* packet has been reassembled, no error */ 788 789 h = mtod(pd->m, struct ip *); 790 791 no_fragment: 792 /* At this point, only IP_DF is allowed in ip_off */ 793 if (h->ip_off & ~htons(IP_DF)) 794 h->ip_off &= htons(IP_DF); 795 796 return (PF_PASS); 797 } 798 799 #ifdef INET6 800 int 801 pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason) 802 { 803 struct ip6_frag frag; 804 805 if (pd->fragoff == 0) 806 goto no_fragment; 807 808 if (!pf_pull_hdr(pd->m, pd->fragoff, &frag, sizeof(frag), NULL, reason, 809 AF_INET6)) 810 return (PF_DROP); 811 812 if (!pf_status.reass) 813 return (PF_PASS); /* no reassembly */ 814 815 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 816 if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag), 817 pd->extoff, pd->dir, reason) != PF_PASS) 818 return (PF_DROP); 819 if (pd->m == NULL) 820 return (PF_PASS); /* packet has been reassembled, no error */ 821 822 no_fragment: 823 return (PF_PASS); 824 } 825 #endif /* INET6 */ 826 827 int 828 pf_normalize_tcp(struct pf_pdesc *pd) 829 { 830 struct tcphdr *th = pd->hdr.tcp; 831 u_short reason; 832 u_int8_t flags; 833 u_int rewrite = 0; 834 835 if (pd->csum_status == PF_CSUM_UNKNOWN) 836 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 837 pd->proto, pd->af); 838 839 flags = th->th_flags; 840 if (flags & TH_SYN) { 841 /* Illegal packet */ 842 if (flags & TH_RST) 843 goto tcp_drop; 844 845 if (flags & TH_FIN) /* XXX why clear instead of drop? */ 846 flags &= ~TH_FIN; 847 } else { 848 /* Illegal packet */ 849 if (!(flags & (TH_ACK|TH_RST))) 850 goto tcp_drop; 851 } 852 853 if (!(flags & TH_ACK)) { 854 /* These flags are only valid if ACK is set */ 855 if (flags & (TH_FIN|TH_PUSH|TH_URG)) 856 goto tcp_drop; 857 } 858 859 /* If flags changed, or reserved data set, then adjust */ 860 if (flags != th->th_flags || th->th_x2 != 0) { 861 th->th_flags = flags; 862 th->th_x2 = 0; 863 rewrite = 1; 864 } 865 866 /* Remove urgent pointer, if TH_URG is not set */ 867 if (!(flags & TH_URG) && th->th_urp) { 868 th->th_urp = 0; 869 rewrite = 1; 870 } 871 872 /* copy back packet headers if we sanitized */ 873 if (rewrite) { 874 pf_cksum(pd, pd->m); 875 m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT); 876 } 877 878 return (PF_PASS); 879 880 tcp_drop: 881 REASON_SET(&reason, PFRES_NORM); 882 return (PF_DROP); 883 } 884 885 int 886 pf_normalize_tcp_init(struct pf_pdesc *pd, struct pf_state_peer *src, 887 struct pf_state_peer *dst) 888 { 889 struct tcphdr *th = pd->hdr.tcp; 890 u_int32_t tsval, tsecr; 891 u_int8_t hdr[60]; 892 u_int8_t *opt; 893 894 KASSERT(src->scrub == NULL); 895 896 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 897 if (src->scrub == NULL) 898 return (1); 899 bzero(src->scrub, sizeof(*src->scrub)); 900 901 switch (pd->af) { 902 case AF_INET: { 903 struct ip *h = mtod(pd->m, struct ip *); 904 src->scrub->pfss_ttl = h->ip_ttl; 905 break; 906 } 907 #ifdef INET6 908 case AF_INET6: { 909 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 910 src->scrub->pfss_ttl = h->ip6_hlim; 911 break; 912 } 913 #endif /* INET6 */ 914 } 915 916 /* 917 * All normalizations below are only begun if we see the start of 918 * the connections. They must all set an enabled bit in pfss_flags 919 */ 920 if ((th->th_flags & TH_SYN) == 0) 921 return (0); 922 923 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 924 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 925 pd->af)) { 926 /* Diddle with TCP options */ 927 int hlen; 928 929 opt = hdr + sizeof(struct tcphdr); 930 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 931 while (hlen >= TCPOLEN_TIMESTAMP) { 932 switch (*opt) { 933 case TCPOPT_EOL: /* FALLTHROUGH */ 934 case TCPOPT_NOP: 935 opt++; 936 hlen--; 937 break; 938 case TCPOPT_TIMESTAMP: 939 if (opt[1] >= TCPOLEN_TIMESTAMP) { 940 src->scrub->pfss_flags |= 941 PFSS_TIMESTAMP; 942 src->scrub->pfss_ts_mod = 943 htonl(arc4random()); 944 945 /* note PFSS_PAWS not set yet */ 946 memcpy(&tsval, &opt[2], 947 sizeof(u_int32_t)); 948 memcpy(&tsecr, &opt[6], 949 sizeof(u_int32_t)); 950 src->scrub->pfss_tsval0 = ntohl(tsval); 951 src->scrub->pfss_tsval = ntohl(tsval); 952 src->scrub->pfss_tsecr = ntohl(tsecr); 953 getmicrouptime(&src->scrub->pfss_last); 954 } 955 /* FALLTHROUGH */ 956 default: 957 hlen -= MAX(opt[1], 2); 958 opt += MAX(opt[1], 2); 959 break; 960 } 961 } 962 } 963 964 return (0); 965 } 966 967 void 968 pf_normalize_tcp_cleanup(struct pf_state *state) 969 { 970 if (state->src.scrub) 971 pool_put(&pf_state_scrub_pl, state->src.scrub); 972 if (state->dst.scrub) 973 pool_put(&pf_state_scrub_pl, state->dst.scrub); 974 975 /* Someday... flush the TCP segment reassembly descriptors. */ 976 } 977 978 int 979 pf_normalize_tcp_stateful(struct pf_pdesc *pd, u_short *reason, 980 struct pf_state *state, struct pf_state_peer *src, 981 struct pf_state_peer *dst, int *writeback) 982 { 983 struct tcphdr *th = pd->hdr.tcp; 984 struct timeval uptime; 985 u_int32_t tsval, tsecr; 986 u_int tsval_from_last; 987 u_int8_t hdr[60]; 988 u_int8_t *opt; 989 int copyback = 0; 990 int got_ts = 0; 991 992 KASSERT(src->scrub || dst->scrub); 993 994 /* 995 * Enforce the minimum TTL seen for this connection. Negate a common 996 * technique to evade an intrusion detection system and confuse 997 * firewall state code. 998 */ 999 switch (pd->af) { 1000 case AF_INET: 1001 if (src->scrub) { 1002 struct ip *h = mtod(pd->m, struct ip *); 1003 if (h->ip_ttl > src->scrub->pfss_ttl) 1004 src->scrub->pfss_ttl = h->ip_ttl; 1005 h->ip_ttl = src->scrub->pfss_ttl; 1006 } 1007 break; 1008 #ifdef INET6 1009 case AF_INET6: 1010 if (src->scrub) { 1011 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 1012 if (h->ip6_hlim > src->scrub->pfss_ttl) 1013 src->scrub->pfss_ttl = h->ip6_hlim; 1014 h->ip6_hlim = src->scrub->pfss_ttl; 1015 } 1016 break; 1017 #endif /* INET6 */ 1018 } 1019 1020 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1021 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1022 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1023 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 1024 pd->af)) { 1025 /* Diddle with TCP options */ 1026 int hlen; 1027 opt = hdr + sizeof(struct tcphdr); 1028 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1029 while (hlen >= TCPOLEN_TIMESTAMP) { 1030 switch (*opt) { 1031 case TCPOPT_EOL: /* FALLTHROUGH */ 1032 case TCPOPT_NOP: 1033 opt++; 1034 hlen--; 1035 break; 1036 case TCPOPT_TIMESTAMP: 1037 /* Modulate the timestamps. Can be used for 1038 * NAT detection, OS uptime determination or 1039 * reboot detection. 1040 */ 1041 1042 if (got_ts) { 1043 /* Huh? Multiple timestamps!? */ 1044 if (pf_status.debug >= LOG_NOTICE) { 1045 log(LOG_NOTICE, 1046 "pf: %s: multiple TS??", 1047 __func__); 1048 pf_print_state(state); 1049 addlog("\n"); 1050 } 1051 REASON_SET(reason, PFRES_TS); 1052 return (PF_DROP); 1053 } 1054 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1055 memcpy(&tsval, &opt[2], 1056 sizeof(u_int32_t)); 1057 if (tsval && src->scrub && 1058 (src->scrub->pfss_flags & 1059 PFSS_TIMESTAMP)) { 1060 tsval = ntohl(tsval); 1061 pf_change_a(pd, &opt[2], 1062 htonl(tsval + 1063 src->scrub->pfss_ts_mod)); 1064 copyback = 1; 1065 } 1066 1067 /* Modulate TS reply iff valid (!0) */ 1068 memcpy(&tsecr, &opt[6], 1069 sizeof(u_int32_t)); 1070 if (tsecr && dst->scrub && 1071 (dst->scrub->pfss_flags & 1072 PFSS_TIMESTAMP)) { 1073 tsecr = ntohl(tsecr) 1074 - dst->scrub->pfss_ts_mod; 1075 pf_change_a(pd, &opt[6], 1076 htonl(tsecr)); 1077 copyback = 1; 1078 } 1079 got_ts = 1; 1080 } 1081 /* FALLTHROUGH */ 1082 default: 1083 hlen -= MAX(opt[1], 2); 1084 opt += MAX(opt[1], 2); 1085 break; 1086 } 1087 } 1088 if (copyback) { 1089 /* Copyback the options, caller copys back header */ 1090 *writeback = 1; 1091 m_copyback(pd->m, pd->off + sizeof(struct tcphdr), 1092 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1093 sizeof(struct tcphdr), M_NOWAIT); 1094 } 1095 } 1096 1097 1098 /* 1099 * Must invalidate PAWS checks on connections idle for too long. 1100 * The fastest allowed timestamp clock is 1ms. That turns out to 1101 * be about 24 days before it wraps. XXX Right now our lowerbound 1102 * TS echo check only works for the first 12 days of a connection 1103 * when the TS has exhausted half its 32bit space 1104 */ 1105 #define TS_MAX_IDLE (24*24*60*60) 1106 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1107 1108 getmicrouptime(&uptime); 1109 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1110 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1111 time_uptime - state->creation > TS_MAX_CONN)) { 1112 if (pf_status.debug >= LOG_NOTICE) { 1113 log(LOG_NOTICE, "pf: src idled out of PAWS "); 1114 pf_print_state(state); 1115 addlog("\n"); 1116 } 1117 src->scrub->pfss_flags = 1118 (src->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; 1119 } 1120 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1121 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1122 if (pf_status.debug >= LOG_NOTICE) { 1123 log(LOG_NOTICE, "pf: dst idled out of PAWS "); 1124 pf_print_state(state); 1125 addlog("\n"); 1126 } 1127 dst->scrub->pfss_flags = 1128 (dst->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; 1129 } 1130 1131 if (got_ts && src->scrub && dst->scrub && 1132 (src->scrub->pfss_flags & PFSS_PAWS) && 1133 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1134 /* Validate that the timestamps are "in-window". 1135 * RFC1323 describes TCP Timestamp options that allow 1136 * measurement of RTT (round trip time) and PAWS 1137 * (protection against wrapped sequence numbers). PAWS 1138 * gives us a set of rules for rejecting packets on 1139 * long fat pipes (packets that were somehow delayed 1140 * in transit longer than the time it took to send the 1141 * full TCP sequence space of 4Gb). We can use these 1142 * rules and infer a few others that will let us treat 1143 * the 32bit timestamp and the 32bit echoed timestamp 1144 * as sequence numbers to prevent a blind attacker from 1145 * inserting packets into a connection. 1146 * 1147 * RFC1323 tells us: 1148 * - The timestamp on this packet must be greater than 1149 * or equal to the last value echoed by the other 1150 * endpoint. The RFC says those will be discarded 1151 * since it is a dup that has already been acked. 1152 * This gives us a lowerbound on the timestamp. 1153 * timestamp >= other last echoed timestamp 1154 * - The timestamp will be less than or equal to 1155 * the last timestamp plus the time between the 1156 * last packet and now. The RFC defines the max 1157 * clock rate as 1ms. We will allow clocks to be 1158 * up to 10% fast and will allow a total difference 1159 * or 30 seconds due to a route change. And this 1160 * gives us an upperbound on the timestamp. 1161 * timestamp <= last timestamp + max ticks 1162 * We have to be careful here. Windows will send an 1163 * initial timestamp of zero and then initialize it 1164 * to a random value after the 3whs; presumably to 1165 * avoid a DoS by having to call an expensive RNG 1166 * during a SYN flood. Proof MS has at least one 1167 * good security geek. 1168 * 1169 * - The TCP timestamp option must also echo the other 1170 * endpoints timestamp. The timestamp echoed is the 1171 * one carried on the earliest unacknowledged segment 1172 * on the left edge of the sequence window. The RFC 1173 * states that the host will reject any echoed 1174 * timestamps that were larger than any ever sent. 1175 * This gives us an upperbound on the TS echo. 1176 * tescr <= largest_tsval 1177 * - The lowerbound on the TS echo is a little more 1178 * tricky to determine. The other endpoint's echoed 1179 * values will not decrease. But there may be 1180 * network conditions that re-order packets and 1181 * cause our view of them to decrease. For now the 1182 * only lowerbound we can safely determine is that 1183 * the TS echo will never be less than the original 1184 * TS. XXX There is probably a better lowerbound. 1185 * Remove TS_MAX_CONN with better lowerbound check. 1186 * tescr >= other original TS 1187 * 1188 * It is also important to note that the fastest 1189 * timestamp clock of 1ms will wrap its 32bit space in 1190 * 24 days. So we just disable TS checking after 24 1191 * days of idle time. We actually must use a 12d 1192 * connection limit until we can come up with a better 1193 * lowerbound to the TS echo check. 1194 */ 1195 struct timeval delta_ts; 1196 int ts_fudge; 1197 1198 /* 1199 * PFTM_TS_DIFF is how many seconds of leeway to allow 1200 * a host's timestamp. This can happen if the previous 1201 * packet got delayed in transit for much longer than 1202 * this packet. 1203 */ 1204 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1205 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1206 1207 /* Calculate max ticks since the last timestamp */ 1208 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1209 #define TS_MICROSECS 1000000 /* microseconds per second */ 1210 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1211 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1212 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1213 1214 if ((src->state >= TCPS_ESTABLISHED && 1215 dst->state >= TCPS_ESTABLISHED) && 1216 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1217 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1218 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1219 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1220 /* Bad RFC1323 implementation or an insertion attack. 1221 * 1222 * - Solaris 2.6 and 2.7 are known to send another ACK 1223 * after the FIN,FIN|ACK,ACK closing that carries 1224 * an old timestamp. 1225 */ 1226 1227 DPFPRINTF(LOG_NOTICE, "Timestamp failed %c%c%c%c", 1228 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1229 SEQ_GT(tsval, src->scrub->pfss_tsval + 1230 tsval_from_last) ? '1' : ' ', 1231 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1232 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '); 1233 DPFPRINTF(LOG_NOTICE, " tsval: %u tsecr: %u " 1234 "+ticks: %u idle: %llu.%06lus", tsval, tsecr, 1235 tsval_from_last, (long long)delta_ts.tv_sec, 1236 delta_ts.tv_usec); 1237 DPFPRINTF(LOG_NOTICE, " src->tsval: %u tsecr: %u", 1238 src->scrub->pfss_tsval, src->scrub->pfss_tsecr); 1239 DPFPRINTF(LOG_NOTICE, " dst->tsval: %u tsecr: %u " 1240 "tsval0: %u", dst->scrub->pfss_tsval, 1241 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0); 1242 if (pf_status.debug >= LOG_NOTICE) { 1243 log(LOG_NOTICE, "pf: "); 1244 pf_print_state(state); 1245 pf_print_flags(th->th_flags); 1246 addlog("\n"); 1247 } 1248 REASON_SET(reason, PFRES_TS); 1249 return (PF_DROP); 1250 } 1251 /* XXX I'd really like to require tsecr but it's optional */ 1252 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1253 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1254 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1255 src->scrub && dst->scrub && 1256 (src->scrub->pfss_flags & PFSS_PAWS) && 1257 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1258 /* Didn't send a timestamp. Timestamps aren't really useful 1259 * when: 1260 * - connection opening or closing (often not even sent). 1261 * but we must not let an attacker to put a FIN on a 1262 * data packet to sneak it through our ESTABLISHED check. 1263 * - on a TCP reset. RFC suggests not even looking at TS. 1264 * - on an empty ACK. The TS will not be echoed so it will 1265 * probably not help keep the RTT calculation in sync and 1266 * there isn't as much danger when the sequence numbers 1267 * got wrapped. So some stacks don't include TS on empty 1268 * ACKs :-( 1269 * 1270 * To minimize the disruption to mostly RFC1323 conformant 1271 * stacks, we will only require timestamps on data packets. 1272 * 1273 * And what do ya know, we cannot require timestamps on data 1274 * packets. There appear to be devices that do legitimate 1275 * TCP connection hijacking. There are HTTP devices that allow 1276 * a 3whs (with timestamps) and then buffer the HTTP request. 1277 * If the intermediate device has the HTTP response cache, it 1278 * will spoof the response but not bother timestamping its 1279 * packets. So we can look for the presence of a timestamp in 1280 * the first data packet and if there, require it in all future 1281 * packets. 1282 */ 1283 1284 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1285 /* 1286 * Hey! Someone tried to sneak a packet in. Or the 1287 * stack changed its RFC1323 behavior?!?! 1288 */ 1289 if (pf_status.debug >= LOG_NOTICE) { 1290 log(LOG_NOTICE, 1291 "pf: did not receive expected RFC1323 " 1292 "timestamp"); 1293 pf_print_state(state); 1294 pf_print_flags(th->th_flags); 1295 addlog("\n"); 1296 } 1297 REASON_SET(reason, PFRES_TS); 1298 return (PF_DROP); 1299 } 1300 } 1301 1302 /* 1303 * We will note if a host sends his data packets with or without 1304 * timestamps. And require all data packets to contain a timestamp 1305 * if the first does. PAWS implicitly requires that all data packets be 1306 * timestamped. But I think there are middle-man devices that hijack 1307 * TCP streams immediately after the 3whs and don't timestamp their 1308 * packets (seen in a WWW accelerator or cache). 1309 */ 1310 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1311 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1312 if (got_ts) 1313 src->scrub->pfss_flags |= PFSS_DATA_TS; 1314 else { 1315 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1316 if (pf_status.debug >= LOG_NOTICE && dst->scrub && 1317 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1318 /* Don't warn if other host rejected RFC1323 */ 1319 log(LOG_NOTICE, 1320 "pf: broken RFC1323 stack did not " 1321 "timestamp data packet. Disabled PAWS " 1322 "security."); 1323 pf_print_state(state); 1324 pf_print_flags(th->th_flags); 1325 addlog("\n"); 1326 } 1327 } 1328 } 1329 1330 /* 1331 * Update PAWS values 1332 */ 1333 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1334 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1335 getmicrouptime(&src->scrub->pfss_last); 1336 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1337 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1338 src->scrub->pfss_tsval = tsval; 1339 1340 if (tsecr) { 1341 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1342 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1343 src->scrub->pfss_tsecr = tsecr; 1344 1345 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1346 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1347 src->scrub->pfss_tsval0 == 0)) { 1348 /* tsval0 MUST be the lowest timestamp */ 1349 src->scrub->pfss_tsval0 = tsval; 1350 } 1351 1352 /* Only fully initialized after a TS gets echoed */ 1353 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1354 src->scrub->pfss_flags |= PFSS_PAWS; 1355 } 1356 } 1357 1358 /* I have a dream.... TCP segment reassembly.... */ 1359 return (0); 1360 } 1361 1362 int 1363 pf_normalize_mss(struct pf_pdesc *pd, u_int16_t maxmss) 1364 { 1365 struct tcphdr *th = pd->hdr.tcp; 1366 u_int16_t mss; 1367 int thoff; 1368 int opt, cnt, optlen = 0; 1369 u_char opts[MAX_TCPOPTLEN]; 1370 u_char *optp = opts; 1371 1372 if (pd->csum_status == PF_CSUM_UNKNOWN) 1373 pf_check_proto_cksum(pd, pd->off, pd->tot_len - pd->off, 1374 pd->proto, pd->af); 1375 1376 thoff = th->th_off << 2; 1377 cnt = thoff - sizeof(struct tcphdr); 1378 1379 if (cnt <= 0 || cnt > MAX_TCPOPTLEN || !pf_pull_hdr(pd->m, 1380 pd->off + sizeof(*th), opts, cnt, NULL, NULL, pd->af)) 1381 return (0); 1382 1383 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1384 opt = optp[0]; 1385 if (opt == TCPOPT_EOL) 1386 break; 1387 if (opt == TCPOPT_NOP) 1388 optlen = 1; 1389 else { 1390 if (cnt < 2) 1391 break; 1392 optlen = optp[1]; 1393 if (optlen < 2 || optlen > cnt) 1394 break; 1395 } 1396 if (opt == TCPOPT_MAXSEG) { 1397 memcpy(&mss, (optp + 2), 2); 1398 if (ntohs(mss) > maxmss) { 1399 mss = htons(maxmss); 1400 m_copyback(pd->m, 1401 pd->off + sizeof(*th) + optp + 2 - opts, 1402 2, &mss, M_NOWAIT); 1403 pf_cksum(pd, pd->m); 1404 m_copyback(pd->m, pd->off, sizeof(*th), th, 1405 M_NOWAIT); 1406 } 1407 } 1408 } 1409 1410 return (0); 1411 } 1412 1413 void 1414 pf_scrub(struct mbuf *m, u_int16_t flags, sa_family_t af, u_int8_t min_ttl, 1415 u_int8_t tos) 1416 { 1417 struct ip *h = mtod(m, struct ip *); 1418 #ifdef INET6 1419 struct ip6_hdr *h6 = mtod(m, struct ip6_hdr *); 1420 #endif 1421 1422 /* Clear IP_DF if no-df was requested */ 1423 if (flags & PFSTATE_NODF && af == AF_INET && h->ip_off & htons(IP_DF)) 1424 h->ip_off &= htons(~IP_DF); 1425 1426 /* Enforce a minimum ttl, may cause endless packet loops */ 1427 if (min_ttl && af == AF_INET && h->ip_ttl < min_ttl) 1428 h->ip_ttl = min_ttl; 1429 #ifdef INET6 1430 if (min_ttl && af == AF_INET6 && h6->ip6_hlim < min_ttl) 1431 h6->ip6_hlim = min_ttl; 1432 #endif 1433 1434 /* Enforce tos */ 1435 if (flags & PFSTATE_SETTOS) { 1436 if (af == AF_INET) 1437 h->ip_tos = tos | (h->ip_tos & IPTOS_ECN_MASK); 1438 #ifdef INET6 1439 if (af == AF_INET6) { 1440 /* drugs are unable to explain such idiocy */ 1441 h6->ip6_flow &= ~htonl(0x0fc00000); 1442 h6->ip6_flow |= htonl(((u_int32_t)tos) << 20); 1443 } 1444 #endif 1445 } 1446 1447 /* random-id, but not for fragments */ 1448 if (flags & PFSTATE_RANDOMID && af == AF_INET && 1449 !(h->ip_off & ~htons(IP_DF))) 1450 h->ip_id = htons(ip_randomid()); 1451 } 1452