1 /* $OpenBSD: pf_norm.c,v 1.190 2016/08/24 09:41:12 mpi Exp $ */ 2 3 /* 4 * Copyright 2001 Niels Provos <provos@citi.umich.edu> 5 * Copyright 2009 Henning Brauer <henning@openbsd.org> 6 * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org> 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 #include "pflog.h" 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/mbuf.h> 35 #include <sys/filio.h> 36 #include <sys/fcntl.h> 37 #include <sys/socket.h> 38 #include <sys/kernel.h> 39 #include <sys/time.h> 40 #include <sys/pool.h> 41 #include <sys/syslog.h> 42 43 #include <netinet/in.h> 44 #include <netinet/ip.h> 45 #include <netinet/ip_var.h> 46 #include <netinet/tcp.h> 47 #include <netinet/tcp_seq.h> 48 #include <netinet/tcp_fsm.h> 49 #include <netinet/udp.h> 50 #include <netinet/ip_icmp.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/if_pflog.h> 55 56 #ifdef INET6 57 #include <netinet/ip6.h> 58 #include <netinet6/ip6_var.h> 59 #include <netinet6/in6_var.h> 60 #include <netinet6/nd6.h> 61 #include <netinet/icmp6.h> 62 #endif /* INET6 */ 63 64 #include <net/pfvar.h> 65 66 struct pf_frent { 67 TAILQ_ENTRY(pf_frent) fr_next; 68 struct mbuf *fe_m; 69 u_int16_t fe_hdrlen; /* ipv4 header length with ip options 70 ipv6, extension, fragment header */ 71 u_int16_t fe_extoff; /* last extension header offset or 0 */ 72 u_int16_t fe_len; /* fragment length */ 73 u_int16_t fe_off; /* fragment offset */ 74 u_int16_t fe_mff; /* more fragment flag */ 75 }; 76 77 /* keep synced with struct pf_fragment, used in RB_FIND */ 78 struct pf_fragment_cmp { 79 struct pf_addr fr_src; 80 struct pf_addr fr_dst; 81 u_int32_t fr_id; 82 sa_family_t fr_af; 83 u_int8_t fr_proto; 84 u_int8_t fr_direction; 85 }; 86 87 struct pf_fragment { 88 struct pf_addr fr_src; /* ip source address */ 89 struct pf_addr fr_dst; /* ip destination address */ 90 u_int32_t fr_id; /* fragment id for reassemble */ 91 sa_family_t fr_af; /* address family */ 92 u_int8_t fr_proto; /* protocol of this fragment */ 93 u_int8_t fr_direction; /* pf packet direction */ 94 95 RB_ENTRY(pf_fragment) fr_entry; 96 TAILQ_ENTRY(pf_fragment) frag_next; 97 TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; 98 int32_t fr_timeout; 99 u_int16_t fr_maxlen; /* maximum length of single fragment */ 100 }; 101 102 struct pf_fragment_tag { 103 u_int16_t ft_hdrlen; /* header length of reassembled pkt */ 104 u_int16_t ft_extoff; /* last extension header offset or 0 */ 105 u_int16_t ft_maxlen; /* maximum fragment payload length */ 106 }; 107 108 TAILQ_HEAD(pf_fragqueue, pf_fragment) pf_fragqueue; 109 110 static __inline int pf_frag_compare(struct pf_fragment *, 111 struct pf_fragment *); 112 RB_HEAD(pf_frag_tree, pf_fragment) pf_frag_tree, pf_cache_tree; 113 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 114 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare); 115 116 /* Private prototypes */ 117 void pf_flush_fragments(void); 118 void pf_free_fragment(struct pf_fragment *); 119 struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *, 120 struct pf_frag_tree *); 121 struct pf_frent *pf_create_fragment(u_short *); 122 struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, 123 struct pf_frent *, u_short *); 124 int pf_isfull_fragment(struct pf_fragment *); 125 struct mbuf *pf_join_fragment(struct pf_fragment *); 126 int pf_reassemble(struct mbuf **, int, u_short *); 127 #ifdef INET6 128 int pf_reassemble6(struct mbuf **, struct ip6_frag *, 129 u_int16_t, u_int16_t, int, u_short *); 130 #endif /* INET6 */ 131 132 /* Globals */ 133 struct pool pf_frent_pl, pf_frag_pl; 134 struct pool pf_state_scrub_pl; 135 int pf_nfrents; 136 137 void 138 pf_normalize_init(void) 139 { 140 pool_init(&pf_frent_pl, sizeof(struct pf_frent), 0, 0, 0, "pffrent", 141 NULL); 142 pool_init(&pf_frag_pl, sizeof(struct pf_fragment), 0, 0, 0, "pffrag", 143 NULL); 144 pool_init(&pf_state_scrub_pl, sizeof(struct pf_state_scrub), 0, 0, 0, 145 "pfstscr", NULL); 146 147 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT); 148 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); 149 150 TAILQ_INIT(&pf_fragqueue); 151 } 152 153 static __inline int 154 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b) 155 { 156 int diff; 157 158 if ((diff = a->fr_id - b->fr_id) != 0) 159 return (diff); 160 if ((diff = a->fr_proto - b->fr_proto) != 0) 161 return (diff); 162 if ((diff = a->fr_af - b->fr_af) != 0) 163 return (diff); 164 if ((diff = pf_addr_compare(&a->fr_src, &b->fr_src, a->fr_af)) != 0) 165 return (diff); 166 if ((diff = pf_addr_compare(&a->fr_dst, &b->fr_dst, a->fr_af)) != 0) 167 return (diff); 168 169 return (0); 170 } 171 172 void 173 pf_purge_expired_fragments(void) 174 { 175 struct pf_fragment *frag; 176 int32_t expire; 177 178 expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG]; 179 while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { 180 if (frag->fr_timeout > expire) 181 break; 182 DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); 183 pf_free_fragment(frag); 184 } 185 } 186 187 /* 188 * Try to flush old fragments to make space for new ones 189 */ 190 void 191 pf_flush_fragments(void) 192 { 193 struct pf_fragment *frag; 194 int goal; 195 196 goal = pf_nfrents * 9 / 10; 197 DPFPRINTF(LOG_NOTICE, "trying to free > %d frents", pf_nfrents - goal); 198 while (goal < pf_nfrents) { 199 if ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) == NULL) 200 break; 201 pf_free_fragment(frag); 202 } 203 } 204 205 /* 206 * Remove a fragment from the fragment queue, free its fragment entries, 207 * and free the fragment itself. 208 */ 209 void 210 pf_free_fragment(struct pf_fragment *frag) 211 { 212 struct pf_frent *frent; 213 214 RB_REMOVE(pf_frag_tree, &pf_frag_tree, frag); 215 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 216 217 /* Free all fragment entries */ 218 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 219 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 220 m_freem(frent->fe_m); 221 pool_put(&pf_frent_pl, frent); 222 pf_nfrents--; 223 } 224 pool_put(&pf_frag_pl, frag); 225 } 226 227 struct pf_fragment * 228 pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree) 229 { 230 struct pf_fragment *frag; 231 232 frag = RB_FIND(pf_frag_tree, tree, (struct pf_fragment *)key); 233 if (frag != NULL) { 234 TAILQ_REMOVE(&pf_fragqueue, frag, frag_next); 235 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 236 } 237 238 return (frag); 239 } 240 241 struct pf_frent * 242 pf_create_fragment(u_short *reason) 243 { 244 struct pf_frent *frent; 245 246 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 247 if (frent == NULL) { 248 pf_flush_fragments(); 249 frent = pool_get(&pf_frent_pl, PR_NOWAIT); 250 if (frent == NULL) { 251 REASON_SET(reason, PFRES_MEMORY); 252 return (NULL); 253 } 254 } 255 pf_nfrents++; 256 257 return (frent); 258 } 259 260 struct pf_fragment * 261 pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, 262 u_short *reason) 263 { 264 struct pf_frent *after, *next, *prev; 265 struct pf_fragment *frag; 266 u_int16_t total; 267 268 /* No empty fragments */ 269 if (frent->fe_len == 0) { 270 DPFPRINTF(LOG_NOTICE, "bad fragment: len 0"); 271 goto bad_fragment; 272 } 273 274 /* All fragments are 8 byte aligned */ 275 if (frent->fe_mff && (frent->fe_len & 0x7)) { 276 DPFPRINTF(LOG_NOTICE, "bad fragment: mff and len %d", 277 frent->fe_len); 278 goto bad_fragment; 279 } 280 281 /* Respect maximum length, IP_MAXPACKET == IPV6_MAXPACKET */ 282 if (frent->fe_off + frent->fe_len > IP_MAXPACKET) { 283 DPFPRINTF(LOG_NOTICE, "bad fragment: max packet %d", 284 frent->fe_off + frent->fe_len); 285 goto bad_fragment; 286 } 287 288 DPFPRINTF(LOG_NOTICE, key->fr_af == AF_INET ? 289 "reass frag %d @ %d-%d" : "reass frag %#08x @ %d-%d", 290 key->fr_id, frent->fe_off, frent->fe_off + frent->fe_len); 291 292 /* Fully buffer all of the fragments in this fragment queue */ 293 frag = pf_find_fragment(key, &pf_frag_tree); 294 295 /* Create a new reassembly queue for this packet */ 296 if (frag == NULL) { 297 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 298 if (frag == NULL) { 299 pf_flush_fragments(); 300 frag = pool_get(&pf_frag_pl, PR_NOWAIT); 301 if (frag == NULL) { 302 REASON_SET(reason, PFRES_MEMORY); 303 goto drop_fragment; 304 } 305 } 306 307 *(struct pf_fragment_cmp *)frag = *key; 308 TAILQ_INIT(&frag->fr_queue); 309 frag->fr_timeout = time_uptime; 310 frag->fr_maxlen = frent->fe_len; 311 312 RB_INSERT(pf_frag_tree, &pf_frag_tree, frag); 313 TAILQ_INSERT_HEAD(&pf_fragqueue, frag, frag_next); 314 315 /* We do not have a previous fragment */ 316 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 317 318 return (frag); 319 } 320 321 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 322 323 /* Remember maximum fragment len for refragmentation */ 324 if (frent->fe_len > frag->fr_maxlen) 325 frag->fr_maxlen = frent->fe_len; 326 327 /* Maximum data we have seen already */ 328 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 329 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 330 331 /* Non terminal fragments must have more fragments flag */ 332 if (frent->fe_off + frent->fe_len < total && !frent->fe_mff) 333 goto bad_fragment; 334 335 /* Check if we saw the last fragment already */ 336 if (!TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) { 337 if (frent->fe_off + frent->fe_len > total || 338 (frent->fe_off + frent->fe_len == total && frent->fe_mff)) 339 goto bad_fragment; 340 } else { 341 if (frent->fe_off + frent->fe_len == total && !frent->fe_mff) 342 goto bad_fragment; 343 } 344 345 /* Find a fragment after the current one */ 346 prev = NULL; 347 TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { 348 if (after->fe_off > frent->fe_off) 349 break; 350 prev = after; 351 } 352 353 KASSERT(prev != NULL || after != NULL); 354 355 if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { 356 u_int16_t precut; 357 358 #ifdef INET6 359 if (frag->fr_af == AF_INET6) 360 goto free_fragment; 361 #endif /* INET6 */ 362 363 precut = prev->fe_off + prev->fe_len - frent->fe_off; 364 if (precut >= frent->fe_len) { 365 DPFPRINTF(LOG_NOTICE, "new frag overlapped"); 366 goto drop_fragment; 367 } 368 DPFPRINTF(LOG_NOTICE, "frag head overlap %d", precut); 369 m_adj(frent->fe_m, precut); 370 frent->fe_off += precut; 371 frent->fe_len -= precut; 372 } 373 374 for (; after != NULL && frent->fe_off + frent->fe_len > after->fe_off; 375 after = next) { 376 u_int16_t aftercut; 377 378 #ifdef INET6 379 if (frag->fr_af == AF_INET6) 380 goto free_fragment; 381 #endif /* INET6 */ 382 383 aftercut = frent->fe_off + frent->fe_len - after->fe_off; 384 if (aftercut < after->fe_len) { 385 DPFPRINTF(LOG_NOTICE, "frag tail overlap %d", aftercut); 386 m_adj(after->fe_m, aftercut); 387 after->fe_off += aftercut; 388 after->fe_len -= aftercut; 389 break; 390 } 391 392 /* This fragment is completely overlapped, lose it */ 393 DPFPRINTF(LOG_NOTICE, "old frag overlapped"); 394 next = TAILQ_NEXT(after, fr_next); 395 TAILQ_REMOVE(&frag->fr_queue, after, fr_next); 396 m_freem(after->fe_m); 397 pool_put(&pf_frent_pl, after); 398 pf_nfrents--; 399 } 400 401 if (prev == NULL) 402 TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); 403 else 404 TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); 405 406 return (frag); 407 408 #ifdef INET6 409 free_fragment: 410 /* 411 * RFC 5722, Errata 3089: When reassembling an IPv6 datagram, if one 412 * or more its constituent fragments is determined to be an overlapping 413 * fragment, the entire datagram (and any constituent fragments) MUST 414 * be silently discarded. 415 */ 416 DPFPRINTF(LOG_NOTICE, "flush overlapping fragments"); 417 pf_free_fragment(frag); 418 #endif /* INET6 */ 419 bad_fragment: 420 REASON_SET(reason, PFRES_FRAG); 421 drop_fragment: 422 pool_put(&pf_frent_pl, frent); 423 pf_nfrents--; 424 return (NULL); 425 } 426 427 int 428 pf_isfull_fragment(struct pf_fragment *frag) 429 { 430 struct pf_frent *frent, *next; 431 u_int16_t off, total; 432 433 KASSERT(!TAILQ_EMPTY(&frag->fr_queue)); 434 435 /* Check if we are completely reassembled */ 436 if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) 437 return (0); 438 439 /* Maximum data we have seen already */ 440 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 441 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 442 443 /* Check if we have all the data */ 444 off = 0; 445 for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { 446 next = TAILQ_NEXT(frent, fr_next); 447 off += frent->fe_len; 448 if (off < total && (next == NULL || next->fe_off != off)) { 449 DPFPRINTF(LOG_NOTICE, 450 "missing fragment at %d, next %d, total %d", 451 off, next == NULL ? -1 : next->fe_off, total); 452 return (0); 453 } 454 } 455 DPFPRINTF(LOG_NOTICE, "%d < %d?", off, total); 456 if (off < total) 457 return (0); 458 KASSERT(off == total); 459 460 return (1); 461 } 462 463 struct mbuf * 464 pf_join_fragment(struct pf_fragment *frag) 465 { 466 struct mbuf *m, *m2; 467 struct pf_frent *frent; 468 469 frent = TAILQ_FIRST(&frag->fr_queue); 470 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 471 472 m = frent->fe_m; 473 /* Strip off any trailing bytes */ 474 if ((frent->fe_hdrlen + frent->fe_len) < m->m_pkthdr.len) 475 m_adj(m, (frent->fe_hdrlen + frent->fe_len) - m->m_pkthdr.len); 476 /* Magic from ip_input */ 477 m2 = m->m_next; 478 m->m_next = NULL; 479 m_cat(m, m2); 480 pool_put(&pf_frent_pl, frent); 481 pf_nfrents--; 482 483 while ((frent = TAILQ_FIRST(&frag->fr_queue)) != NULL) { 484 TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); 485 m2 = frent->fe_m; 486 /* Strip off ip header */ 487 m_adj(m2, frent->fe_hdrlen); 488 /* Strip off any trailing bytes */ 489 if (frent->fe_len < m2->m_pkthdr.len) 490 m_adj(m2, frent->fe_len - m2->m_pkthdr.len); 491 pool_put(&pf_frent_pl, frent); 492 pf_nfrents--; 493 m_cat(m, m2); 494 } 495 496 /* Remove from fragment queue */ 497 pf_free_fragment(frag); 498 499 return (m); 500 } 501 502 int 503 pf_reassemble(struct mbuf **m0, int dir, u_short *reason) 504 { 505 struct mbuf *m = *m0; 506 struct ip *ip = mtod(m, struct ip *); 507 struct pf_frent *frent; 508 struct pf_fragment *frag; 509 struct pf_fragment_cmp key; 510 u_int16_t total, hdrlen; 511 512 /* Get an entry for the fragment queue */ 513 if ((frent = pf_create_fragment(reason)) == NULL) 514 return (PF_DROP); 515 516 frent->fe_m = m; 517 frent->fe_hdrlen = ip->ip_hl << 2; 518 frent->fe_extoff = 0; 519 frent->fe_len = ntohs(ip->ip_len) - (ip->ip_hl << 2); 520 frent->fe_off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3; 521 frent->fe_mff = ntohs(ip->ip_off) & IP_MF; 522 523 key.fr_src.v4 = ip->ip_src; 524 key.fr_dst.v4 = ip->ip_dst; 525 key.fr_af = AF_INET; 526 key.fr_proto = ip->ip_p; 527 key.fr_id = ip->ip_id; 528 key.fr_direction = dir; 529 530 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 531 return (PF_DROP); 532 533 /* The mbuf is part of the fragment entry, no direct free or access */ 534 m = *m0 = NULL; 535 536 if (!pf_isfull_fragment(frag)) 537 return (PF_PASS); /* drop because *m0 is NULL, no error */ 538 539 /* We have all the data */ 540 frent = TAILQ_FIRST(&frag->fr_queue); 541 KASSERT(frent != NULL); 542 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 543 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 544 hdrlen = frent->fe_hdrlen; 545 m = *m0 = pf_join_fragment(frag); 546 frag = NULL; 547 548 if (m->m_flags & M_PKTHDR) { 549 int plen = 0; 550 for (m = *m0; m; m = m->m_next) 551 plen += m->m_len; 552 m = *m0; 553 m->m_pkthdr.len = plen; 554 } 555 556 ip = mtod(m, struct ip *); 557 ip->ip_len = htons(hdrlen + total); 558 ip->ip_off &= ~(IP_MF|IP_OFFMASK); 559 560 if (hdrlen + total > IP_MAXPACKET) { 561 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 562 ip->ip_len = 0; 563 REASON_SET(reason, PFRES_SHORT); 564 /* PF_DROP requires a valid mbuf *m0 in pf_test() */ 565 return (PF_DROP); 566 } 567 568 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip->ip_len)); 569 return (PF_PASS); 570 } 571 572 #ifdef INET6 573 int 574 pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, 575 u_int16_t hdrlen, u_int16_t extoff, int dir, u_short *reason) 576 { 577 struct mbuf *m = *m0; 578 struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *); 579 struct m_tag *mtag; 580 struct pf_fragment_tag *ftag; 581 struct pf_frent *frent; 582 struct pf_fragment *frag; 583 struct pf_fragment_cmp key; 584 int off; 585 u_int16_t total, maxlen; 586 u_int8_t proto; 587 588 /* Get an entry for the fragment queue */ 589 if ((frent = pf_create_fragment(reason)) == NULL) 590 return (PF_DROP); 591 592 frent->fe_m = m; 593 frent->fe_hdrlen = hdrlen; 594 frent->fe_extoff = extoff; 595 frent->fe_len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen) - hdrlen; 596 frent->fe_off = ntohs(fraghdr->ip6f_offlg & IP6F_OFF_MASK); 597 frent->fe_mff = fraghdr->ip6f_offlg & IP6F_MORE_FRAG; 598 599 key.fr_src.v6 = ip6->ip6_src; 600 key.fr_dst.v6 = ip6->ip6_dst; 601 key.fr_af = AF_INET6; 602 /* Only the first fragment's protocol is relevant */ 603 key.fr_proto = 0; 604 key.fr_id = fraghdr->ip6f_ident; 605 key.fr_direction = dir; 606 607 if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) 608 return (PF_DROP); 609 610 /* The mbuf is part of the fragment entry, no direct free or access */ 611 m = *m0 = NULL; 612 613 if (!pf_isfull_fragment(frag)) 614 return (PF_PASS); /* drop because *m0 is NULL, no error */ 615 616 /* We have all the data */ 617 extoff = frent->fe_extoff; 618 maxlen = frag->fr_maxlen; 619 frent = TAILQ_FIRST(&frag->fr_queue); 620 KASSERT(frent != NULL); 621 total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + 622 TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; 623 hdrlen = frent->fe_hdrlen - sizeof(struct ip6_frag); 624 m = *m0 = pf_join_fragment(frag); 625 frag = NULL; 626 627 /* Take protocol from first fragment header */ 628 if ((m = m_getptr(m, hdrlen + offsetof(struct ip6_frag, ip6f_nxt), 629 &off)) == NULL) 630 panic("pf_reassemble6: short mbuf chain"); 631 proto = *(mtod(m, caddr_t) + off); 632 m = *m0; 633 634 /* Delete frag6 header */ 635 if (frag6_deletefraghdr(m, hdrlen) != 0) 636 goto fail; 637 638 if (m->m_flags & M_PKTHDR) { 639 int plen = 0; 640 for (m = *m0; m; m = m->m_next) 641 plen += m->m_len; 642 m = *m0; 643 m->m_pkthdr.len = plen; 644 } 645 646 if ((mtag = m_tag_get(PACKET_TAG_PF_REASSEMBLED, sizeof(struct 647 pf_fragment_tag), M_NOWAIT)) == NULL) 648 goto fail; 649 ftag = (struct pf_fragment_tag *)(mtag + 1); 650 ftag->ft_hdrlen = hdrlen; 651 ftag->ft_extoff = extoff; 652 ftag->ft_maxlen = maxlen; 653 m_tag_prepend(m, mtag); 654 655 ip6 = mtod(m, struct ip6_hdr *); 656 ip6->ip6_plen = htons(hdrlen - sizeof(struct ip6_hdr) + total); 657 if (extoff) { 658 /* Write protocol into next field of last extension header */ 659 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 660 ip6e_nxt), &off)) == NULL) 661 panic("pf_reassemble6: short mbuf chain"); 662 *(mtod(m, caddr_t) + off) = proto; 663 m = *m0; 664 } else 665 ip6->ip6_nxt = proto; 666 667 if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { 668 DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); 669 ip6->ip6_plen = 0; 670 REASON_SET(reason, PFRES_SHORT); 671 /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ 672 return (PF_DROP); 673 } 674 675 DPFPRINTF(LOG_NOTICE, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); 676 return (PF_PASS); 677 678 fail: 679 REASON_SET(reason, PFRES_MEMORY); 680 /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ 681 return (PF_DROP); 682 } 683 684 int 685 pf_refragment6(struct mbuf **m0, struct m_tag *mtag, struct sockaddr_in6 *dst, 686 struct ifnet *ifp) 687 { 688 struct mbuf *m = *m0, *t; 689 struct pf_fragment_tag *ftag = (struct pf_fragment_tag *)(mtag + 1); 690 struct rtentry *rt = NULL; 691 u_int32_t mtu; 692 u_int16_t hdrlen, extoff, maxlen; 693 u_int8_t proto; 694 int error, action; 695 696 hdrlen = ftag->ft_hdrlen; 697 extoff = ftag->ft_extoff; 698 maxlen = ftag->ft_maxlen; 699 m_tag_delete(m, mtag); 700 mtag = NULL; 701 ftag = NULL; 702 703 /* Checksum must be calculated for the whole packet */ 704 in6_proto_cksum_out(m, NULL); 705 706 if (extoff) { 707 int off; 708 709 /* Use protocol from next field of last extension header */ 710 if ((m = m_getptr(m, extoff + offsetof(struct ip6_ext, 711 ip6e_nxt), &off)) == NULL) 712 panic("pf_refragment6: short mbuf chain"); 713 proto = *(mtod(m, caddr_t) + off); 714 *(mtod(m, caddr_t) + off) = IPPROTO_FRAGMENT; 715 m = *m0; 716 } else { 717 struct ip6_hdr *hdr; 718 719 hdr = mtod(m, struct ip6_hdr *); 720 proto = hdr->ip6_nxt; 721 hdr->ip6_nxt = IPPROTO_FRAGMENT; 722 } 723 724 /* 725 * Maxlen may be less than 8 iff there was only a single 726 * fragment. As it was fragmented before, add a fragment 727 * header also for a single fragment. If total or maxlen 728 * is less than 8, ip6_fragment() will return EMSGSIZE and 729 * we drop the packet. 730 */ 731 mtu = hdrlen + sizeof(struct ip6_frag) + maxlen; 732 error = ip6_fragment(m, hdrlen, proto, mtu); 733 734 m = (*m0)->m_nextpkt; 735 (*m0)->m_nextpkt = NULL; 736 if (error == 0) { 737 /* The first mbuf contains the unfragmented packet */ 738 m_freem(*m0); 739 *m0 = NULL; 740 action = PF_PASS; 741 } else { 742 /* Drop expects an mbuf to free */ 743 DPFPRINTF(LOG_NOTICE, "refragment error %d", error); 744 action = PF_DROP; 745 } 746 747 if (ifp != NULL) { 748 rt = rtalloc(sin6tosa(dst), RT_RESOLVE, 749 m->m_pkthdr.ph_rtableid); 750 if (rt == NULL) { 751 ip6stat.ip6s_noroute++; 752 error = -1; 753 } 754 } 755 756 for (t = m; m; m = t) { 757 t = m->m_nextpkt; 758 m->m_nextpkt = NULL; 759 m->m_pkthdr.pf.flags |= PF_TAG_REFRAGMENTED; 760 if (error == 0) { 761 if (ifp == NULL) { 762 ip6_forward(m, NULL, 0); 763 } else if ((u_long)m->m_pkthdr.len <= ifp->if_mtu) { 764 ifp->if_output(ifp, m, sin6tosa(dst), rt); 765 } else { 766 icmp6_error(m, ICMP6_PACKET_TOO_BIG, 0, 767 ifp->if_mtu); 768 } 769 } else { 770 m_freem(m); 771 } 772 } 773 rtfree(rt); 774 775 return (action); 776 } 777 #endif /* INET6 */ 778 779 int 780 pf_normalize_ip(struct pf_pdesc *pd, u_short *reason) 781 { 782 struct ip *h = mtod(pd->m, struct ip *); 783 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 784 u_int16_t mff = (ntohs(h->ip_off) & IP_MF); 785 786 if (!fragoff && !mff) 787 goto no_fragment; 788 789 /* Clear IP_DF if we're in no-df mode */ 790 if (pf_status.reass & PF_REASS_NODF && h->ip_off & htons(IP_DF)) 791 h->ip_off &= htons(~IP_DF); 792 793 /* We're dealing with a fragment now. Don't allow fragments 794 * with IP_DF to enter the cache. If the flag was cleared by 795 * no-df above, fine. Otherwise drop it. 796 */ 797 if (h->ip_off & htons(IP_DF)) { 798 DPFPRINTF(LOG_NOTICE, "bad fragment: IP_DF"); 799 REASON_SET(reason, PFRES_FRAG); 800 return (PF_DROP); 801 } 802 803 if (!pf_status.reass) 804 return (PF_PASS); /* no reassembly */ 805 806 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 807 if (pf_reassemble(&pd->m, pd->dir, reason) != PF_PASS) 808 return (PF_DROP); 809 if (pd->m == NULL) 810 return (PF_PASS); /* packet has been reassembled, no error */ 811 812 h = mtod(pd->m, struct ip *); 813 814 no_fragment: 815 /* At this point, only IP_DF is allowed in ip_off */ 816 if (h->ip_off & ~htons(IP_DF)) 817 h->ip_off &= htons(IP_DF); 818 819 return (PF_PASS); 820 } 821 822 #ifdef INET6 823 int 824 pf_normalize_ip6(struct pf_pdesc *pd, u_short *reason) 825 { 826 struct ip6_frag frag; 827 828 if (pd->fragoff == 0) 829 goto no_fragment; 830 831 if (!pf_pull_hdr(pd->m, pd->fragoff, &frag, sizeof(frag), NULL, reason, 832 AF_INET6)) 833 return (PF_DROP); 834 835 if (!pf_status.reass) 836 return (PF_PASS); /* no reassembly */ 837 838 /* Returns PF_DROP or m is NULL or completely reassembled mbuf */ 839 if (pf_reassemble6(&pd->m, &frag, pd->fragoff + sizeof(frag), 840 pd->extoff, pd->dir, reason) != PF_PASS) 841 return (PF_DROP); 842 if (pd->m == NULL) 843 return (PF_PASS); /* packet has been reassembled, no error */ 844 845 no_fragment: 846 return (PF_PASS); 847 } 848 #endif /* INET6 */ 849 850 int 851 pf_normalize_tcp(struct pf_pdesc *pd) 852 { 853 struct tcphdr *th = pd->hdr.tcp; 854 u_short reason; 855 u_int8_t flags; 856 u_int rewrite = 0; 857 858 flags = th->th_flags; 859 if (flags & TH_SYN) { 860 /* Illegal packet */ 861 if (flags & TH_RST) 862 goto tcp_drop; 863 864 if (flags & TH_FIN) /* XXX why clear instead of drop? */ 865 flags &= ~TH_FIN; 866 } else { 867 /* Illegal packet */ 868 if (!(flags & (TH_ACK|TH_RST))) 869 goto tcp_drop; 870 } 871 872 if (!(flags & TH_ACK)) { 873 /* These flags are only valid if ACK is set */ 874 if (flags & (TH_FIN|TH_PUSH|TH_URG)) 875 goto tcp_drop; 876 } 877 878 /* If flags changed, or reserved data set, then adjust */ 879 if (flags != th->th_flags || th->th_x2 != 0) { 880 /* hack: set 4-bit th_x2 = 0 */ 881 u_int8_t *th_off = (u_int8_t*)(&th->th_ack+1); 882 pf_patch_8(pd, th_off, th->th_off << 4, PF_HI); 883 884 pf_patch_8(pd, &th->th_flags, flags, PF_LO); 885 rewrite = 1; 886 } 887 888 /* Remove urgent pointer, if TH_URG is not set */ 889 if (!(flags & TH_URG) && th->th_urp) { 890 pf_patch_16(pd, &th->th_urp, 0); 891 rewrite = 1; 892 } 893 894 /* copy back packet headers if we sanitized */ 895 if (rewrite) { 896 m_copyback(pd->m, pd->off, sizeof(*th), th, M_NOWAIT); 897 } 898 899 return (PF_PASS); 900 901 tcp_drop: 902 REASON_SET(&reason, PFRES_NORM); 903 return (PF_DROP); 904 } 905 906 int 907 pf_normalize_tcp_init(struct pf_pdesc *pd, struct pf_state_peer *src) 908 { 909 struct tcphdr *th = pd->hdr.tcp; 910 u_int32_t tsval, tsecr; 911 u_int8_t hdr[60]; 912 u_int8_t *opt; 913 914 KASSERT(src->scrub == NULL); 915 916 src->scrub = pool_get(&pf_state_scrub_pl, PR_NOWAIT); 917 if (src->scrub == NULL) 918 return (1); 919 bzero(src->scrub, sizeof(*src->scrub)); 920 921 switch (pd->af) { 922 case AF_INET: { 923 struct ip *h = mtod(pd->m, struct ip *); 924 src->scrub->pfss_ttl = h->ip_ttl; 925 break; 926 } 927 #ifdef INET6 928 case AF_INET6: { 929 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 930 src->scrub->pfss_ttl = h->ip6_hlim; 931 break; 932 } 933 #endif /* INET6 */ 934 default: 935 unhandled_af(pd->af); 936 } 937 938 /* 939 * All normalizations below are only begun if we see the start of 940 * the connections. They must all set an enabled bit in pfss_flags 941 */ 942 if ((th->th_flags & TH_SYN) == 0) 943 return (0); 944 945 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub && 946 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 947 pd->af)) { 948 /* Diddle with TCP options */ 949 int hlen; 950 951 opt = hdr + sizeof(struct tcphdr); 952 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 953 while (hlen >= TCPOLEN_TIMESTAMP) { 954 switch (*opt) { 955 case TCPOPT_EOL: /* FALLTHROUGH */ 956 case TCPOPT_NOP: 957 opt++; 958 hlen--; 959 break; 960 case TCPOPT_TIMESTAMP: 961 if (opt[1] >= TCPOLEN_TIMESTAMP) { 962 src->scrub->pfss_flags |= 963 PFSS_TIMESTAMP; 964 src->scrub->pfss_ts_mod = arc4random(); 965 966 /* note PFSS_PAWS not set yet */ 967 memcpy(&tsval, &opt[2], 968 sizeof(u_int32_t)); 969 memcpy(&tsecr, &opt[6], 970 sizeof(u_int32_t)); 971 src->scrub->pfss_tsval0 = ntohl(tsval); 972 src->scrub->pfss_tsval = ntohl(tsval); 973 src->scrub->pfss_tsecr = ntohl(tsecr); 974 getmicrouptime(&src->scrub->pfss_last); 975 } 976 /* FALLTHROUGH */ 977 default: 978 hlen -= MAX(opt[1], 2); 979 opt += MAX(opt[1], 2); 980 break; 981 } 982 } 983 } 984 985 return (0); 986 } 987 988 void 989 pf_normalize_tcp_cleanup(struct pf_state *state) 990 { 991 if (state->src.scrub) 992 pool_put(&pf_state_scrub_pl, state->src.scrub); 993 if (state->dst.scrub) 994 pool_put(&pf_state_scrub_pl, state->dst.scrub); 995 996 /* Someday... flush the TCP segment reassembly descriptors. */ 997 } 998 999 int 1000 pf_normalize_tcp_stateful(struct pf_pdesc *pd, u_short *reason, 1001 struct pf_state *state, struct pf_state_peer *src, 1002 struct pf_state_peer *dst, int *writeback) 1003 { 1004 struct tcphdr *th = pd->hdr.tcp; 1005 struct timeval uptime; 1006 u_int32_t tsval, tsecr; 1007 u_int tsval_from_last; 1008 u_int8_t hdr[60]; 1009 u_int8_t *opts, *opt; 1010 int copyback = 0; 1011 int got_ts = 0; 1012 1013 KASSERT(src->scrub || dst->scrub); 1014 1015 /* 1016 * Enforce the minimum TTL seen for this connection. Negate a common 1017 * technique to evade an intrusion detection system and confuse 1018 * firewall state code. 1019 */ 1020 switch (pd->af) { 1021 case AF_INET: 1022 if (src->scrub) { 1023 struct ip *h = mtod(pd->m, struct ip *); 1024 if (h->ip_ttl > src->scrub->pfss_ttl) 1025 src->scrub->pfss_ttl = h->ip_ttl; 1026 h->ip_ttl = src->scrub->pfss_ttl; 1027 } 1028 break; 1029 #ifdef INET6 1030 case AF_INET6: 1031 if (src->scrub) { 1032 struct ip6_hdr *h = mtod(pd->m, struct ip6_hdr *); 1033 if (h->ip6_hlim > src->scrub->pfss_ttl) 1034 src->scrub->pfss_ttl = h->ip6_hlim; 1035 h->ip6_hlim = src->scrub->pfss_ttl; 1036 } 1037 break; 1038 #endif /* INET6 */ 1039 default: 1040 unhandled_af(pd->af); 1041 } 1042 1043 if (th->th_off > (sizeof(struct tcphdr) >> 2) && 1044 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) || 1045 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) && 1046 pf_pull_hdr(pd->m, pd->off, hdr, th->th_off << 2, NULL, NULL, 1047 pd->af)) { 1048 /* Diddle with TCP options */ 1049 int hlen; 1050 opt = opts = hdr + sizeof(struct tcphdr); 1051 hlen = (th->th_off << 2) - sizeof(struct tcphdr); 1052 while (hlen >= TCPOLEN_TIMESTAMP) { 1053 switch (*opt) { 1054 case TCPOPT_EOL: /* FALLTHROUGH */ 1055 case TCPOPT_NOP: 1056 opt++; 1057 hlen--; 1058 break; 1059 case TCPOPT_TIMESTAMP: 1060 /* Modulate the timestamps. Can be used for 1061 * NAT detection, OS uptime determination or 1062 * reboot detection. 1063 */ 1064 1065 if (got_ts) { 1066 /* Huh? Multiple timestamps!? */ 1067 if (pf_status.debug >= LOG_NOTICE) { 1068 log(LOG_NOTICE, 1069 "pf: %s: multiple TS??", 1070 __func__); 1071 pf_print_state(state); 1072 addlog("\n"); 1073 } 1074 REASON_SET(reason, PFRES_TS); 1075 return (PF_DROP); 1076 } 1077 if (opt[1] >= TCPOLEN_TIMESTAMP) { 1078 u_int8_t *ts = opt + 2; 1079 u_int8_t *tsr = opt + 6; 1080 1081 memcpy(&tsval, ts, sizeof(u_int32_t)); 1082 memcpy(&tsecr, tsr, sizeof(u_int32_t)); 1083 1084 /* modulate TS */ 1085 if (tsval && src->scrub && 1086 (src->scrub->pfss_flags & 1087 PFSS_TIMESTAMP)) { 1088 /* note: tsval used further on */ 1089 tsval = ntohl(tsval); 1090 pf_patch_32_unaligned(pd, ts, 1091 htonl(tsval + 1092 src->scrub->pfss_ts_mod), 1093 PF_ALGNMNT(ts - opts)); 1094 copyback = 1; 1095 } 1096 1097 /* modulate TS reply if any (!0) */ 1098 if (tsecr && dst->scrub && 1099 (dst->scrub->pfss_flags & 1100 PFSS_TIMESTAMP)) { 1101 /* note: tsecr used further on */ 1102 tsecr = ntohl(tsecr) 1103 - dst->scrub->pfss_ts_mod; 1104 pf_patch_32_unaligned(pd, tsr, 1105 htonl(tsecr), 1106 PF_ALGNMNT(tsr - opts)); 1107 copyback = 1; 1108 } 1109 got_ts = 1; 1110 } 1111 /* FALLTHROUGH */ 1112 default: 1113 hlen -= MAX(opt[1], 2); 1114 opt += MAX(opt[1], 2); 1115 break; 1116 } 1117 } 1118 if (copyback) { 1119 /* Copyback the options, caller copys back header */ 1120 *writeback = 1; 1121 m_copyback(pd->m, pd->off + sizeof(struct tcphdr), 1122 (th->th_off << 2) - sizeof(struct tcphdr), hdr + 1123 sizeof(struct tcphdr), M_NOWAIT); 1124 } 1125 } 1126 1127 1128 /* 1129 * Must invalidate PAWS checks on connections idle for too long. 1130 * The fastest allowed timestamp clock is 1ms. That turns out to 1131 * be about 24 days before it wraps. XXX Right now our lowerbound 1132 * TS echo check only works for the first 12 days of a connection 1133 * when the TS has exhausted half its 32bit space 1134 */ 1135 #define TS_MAX_IDLE (24*24*60*60) 1136 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */ 1137 1138 getmicrouptime(&uptime); 1139 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) && 1140 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE || 1141 time_uptime - state->creation > TS_MAX_CONN)) { 1142 if (pf_status.debug >= LOG_NOTICE) { 1143 log(LOG_NOTICE, "pf: src idled out of PAWS "); 1144 pf_print_state(state); 1145 addlog("\n"); 1146 } 1147 src->scrub->pfss_flags = 1148 (src->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; 1149 } 1150 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) && 1151 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) { 1152 if (pf_status.debug >= LOG_NOTICE) { 1153 log(LOG_NOTICE, "pf: dst idled out of PAWS "); 1154 pf_print_state(state); 1155 addlog("\n"); 1156 } 1157 dst->scrub->pfss_flags = 1158 (dst->scrub->pfss_flags & ~PFSS_PAWS) | PFSS_PAWS_IDLED; 1159 } 1160 1161 if (got_ts && src->scrub && dst->scrub && 1162 (src->scrub->pfss_flags & PFSS_PAWS) && 1163 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1164 /* Validate that the timestamps are "in-window". 1165 * RFC1323 describes TCP Timestamp options that allow 1166 * measurement of RTT (round trip time) and PAWS 1167 * (protection against wrapped sequence numbers). PAWS 1168 * gives us a set of rules for rejecting packets on 1169 * long fat pipes (packets that were somehow delayed 1170 * in transit longer than the time it took to send the 1171 * full TCP sequence space of 4Gb). We can use these 1172 * rules and infer a few others that will let us treat 1173 * the 32bit timestamp and the 32bit echoed timestamp 1174 * as sequence numbers to prevent a blind attacker from 1175 * inserting packets into a connection. 1176 * 1177 * RFC1323 tells us: 1178 * - The timestamp on this packet must be greater than 1179 * or equal to the last value echoed by the other 1180 * endpoint. The RFC says those will be discarded 1181 * since it is a dup that has already been acked. 1182 * This gives us a lowerbound on the timestamp. 1183 * timestamp >= other last echoed timestamp 1184 * - The timestamp will be less than or equal to 1185 * the last timestamp plus the time between the 1186 * last packet and now. The RFC defines the max 1187 * clock rate as 1ms. We will allow clocks to be 1188 * up to 10% fast and will allow a total difference 1189 * or 30 seconds due to a route change. And this 1190 * gives us an upperbound on the timestamp. 1191 * timestamp <= last timestamp + max ticks 1192 * We have to be careful here. Windows will send an 1193 * initial timestamp of zero and then initialize it 1194 * to a random value after the 3whs; presumably to 1195 * avoid a DoS by having to call an expensive RNG 1196 * during a SYN flood. Proof MS has at least one 1197 * good security geek. 1198 * 1199 * - The TCP timestamp option must also echo the other 1200 * endpoints timestamp. The timestamp echoed is the 1201 * one carried on the earliest unacknowledged segment 1202 * on the left edge of the sequence window. The RFC 1203 * states that the host will reject any echoed 1204 * timestamps that were larger than any ever sent. 1205 * This gives us an upperbound on the TS echo. 1206 * tescr <= largest_tsval 1207 * - The lowerbound on the TS echo is a little more 1208 * tricky to determine. The other endpoint's echoed 1209 * values will not decrease. But there may be 1210 * network conditions that re-order packets and 1211 * cause our view of them to decrease. For now the 1212 * only lowerbound we can safely determine is that 1213 * the TS echo will never be less than the original 1214 * TS. XXX There is probably a better lowerbound. 1215 * Remove TS_MAX_CONN with better lowerbound check. 1216 * tescr >= other original TS 1217 * 1218 * It is also important to note that the fastest 1219 * timestamp clock of 1ms will wrap its 32bit space in 1220 * 24 days. So we just disable TS checking after 24 1221 * days of idle time. We actually must use a 12d 1222 * connection limit until we can come up with a better 1223 * lowerbound to the TS echo check. 1224 */ 1225 struct timeval delta_ts; 1226 int ts_fudge; 1227 1228 /* 1229 * PFTM_TS_DIFF is how many seconds of leeway to allow 1230 * a host's timestamp. This can happen if the previous 1231 * packet got delayed in transit for much longer than 1232 * this packet. 1233 */ 1234 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0) 1235 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF]; 1236 1237 /* Calculate max ticks since the last timestamp */ 1238 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */ 1239 #define TS_MICROSECS 1000000 /* microseconds per second */ 1240 timersub(&uptime, &src->scrub->pfss_last, &delta_ts); 1241 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ; 1242 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ); 1243 1244 if ((src->state >= TCPS_ESTABLISHED && 1245 dst->state >= TCPS_ESTABLISHED) && 1246 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) || 1247 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) || 1248 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) || 1249 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) { 1250 /* Bad RFC1323 implementation or an insertion attack. 1251 * 1252 * - Solaris 2.6 and 2.7 are known to send another ACK 1253 * after the FIN,FIN|ACK,ACK closing that carries 1254 * an old timestamp. 1255 */ 1256 1257 DPFPRINTF(LOG_NOTICE, "Timestamp failed %c%c%c%c", 1258 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ', 1259 SEQ_GT(tsval, src->scrub->pfss_tsval + 1260 tsval_from_last) ? '1' : ' ', 1261 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ', 1262 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '); 1263 DPFPRINTF(LOG_NOTICE, " tsval: %u tsecr: %u " 1264 "+ticks: %u idle: %llu.%06lus", tsval, tsecr, 1265 tsval_from_last, (long long)delta_ts.tv_sec, 1266 delta_ts.tv_usec); 1267 DPFPRINTF(LOG_NOTICE, " src->tsval: %u tsecr: %u", 1268 src->scrub->pfss_tsval, src->scrub->pfss_tsecr); 1269 DPFPRINTF(LOG_NOTICE, " dst->tsval: %u tsecr: %u " 1270 "tsval0: %u", dst->scrub->pfss_tsval, 1271 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0); 1272 if (pf_status.debug >= LOG_NOTICE) { 1273 log(LOG_NOTICE, "pf: "); 1274 pf_print_state(state); 1275 pf_print_flags(th->th_flags); 1276 addlog("\n"); 1277 } 1278 REASON_SET(reason, PFRES_TS); 1279 return (PF_DROP); 1280 } 1281 /* XXX I'd really like to require tsecr but it's optional */ 1282 } else if (!got_ts && (th->th_flags & TH_RST) == 0 && 1283 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED) 1284 || pd->p_len > 0 || (th->th_flags & TH_SYN)) && 1285 src->scrub && dst->scrub && 1286 (src->scrub->pfss_flags & PFSS_PAWS) && 1287 (dst->scrub->pfss_flags & PFSS_PAWS)) { 1288 /* Didn't send a timestamp. Timestamps aren't really useful 1289 * when: 1290 * - connection opening or closing (often not even sent). 1291 * but we must not let an attacker to put a FIN on a 1292 * data packet to sneak it through our ESTABLISHED check. 1293 * - on a TCP reset. RFC suggests not even looking at TS. 1294 * - on an empty ACK. The TS will not be echoed so it will 1295 * probably not help keep the RTT calculation in sync and 1296 * there isn't as much danger when the sequence numbers 1297 * got wrapped. So some stacks don't include TS on empty 1298 * ACKs :-( 1299 * 1300 * To minimize the disruption to mostly RFC1323 conformant 1301 * stacks, we will only require timestamps on data packets. 1302 * 1303 * And what do ya know, we cannot require timestamps on data 1304 * packets. There appear to be devices that do legitimate 1305 * TCP connection hijacking. There are HTTP devices that allow 1306 * a 3whs (with timestamps) and then buffer the HTTP request. 1307 * If the intermediate device has the HTTP response cache, it 1308 * will spoof the response but not bother timestamping its 1309 * packets. So we can look for the presence of a timestamp in 1310 * the first data packet and if there, require it in all future 1311 * packets. 1312 */ 1313 1314 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) { 1315 /* 1316 * Hey! Someone tried to sneak a packet in. Or the 1317 * stack changed its RFC1323 behavior?!?! 1318 */ 1319 if (pf_status.debug >= LOG_NOTICE) { 1320 log(LOG_NOTICE, 1321 "pf: did not receive expected RFC1323 " 1322 "timestamp"); 1323 pf_print_state(state); 1324 pf_print_flags(th->th_flags); 1325 addlog("\n"); 1326 } 1327 REASON_SET(reason, PFRES_TS); 1328 return (PF_DROP); 1329 } 1330 } 1331 1332 /* 1333 * We will note if a host sends his data packets with or without 1334 * timestamps. And require all data packets to contain a timestamp 1335 * if the first does. PAWS implicitly requires that all data packets be 1336 * timestamped. But I think there are middle-man devices that hijack 1337 * TCP streams immediately after the 3whs and don't timestamp their 1338 * packets (seen in a WWW accelerator or cache). 1339 */ 1340 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags & 1341 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) { 1342 if (got_ts) 1343 src->scrub->pfss_flags |= PFSS_DATA_TS; 1344 else { 1345 src->scrub->pfss_flags |= PFSS_DATA_NOTS; 1346 if (pf_status.debug >= LOG_NOTICE && dst->scrub && 1347 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) { 1348 /* Don't warn if other host rejected RFC1323 */ 1349 log(LOG_NOTICE, 1350 "pf: broken RFC1323 stack did not " 1351 "timestamp data packet. Disabled PAWS " 1352 "security."); 1353 pf_print_state(state); 1354 pf_print_flags(th->th_flags); 1355 addlog("\n"); 1356 } 1357 } 1358 } 1359 1360 /* 1361 * Update PAWS values 1362 */ 1363 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags & 1364 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) { 1365 getmicrouptime(&src->scrub->pfss_last); 1366 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) || 1367 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1368 src->scrub->pfss_tsval = tsval; 1369 1370 if (tsecr) { 1371 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) || 1372 (src->scrub->pfss_flags & PFSS_PAWS) == 0) 1373 src->scrub->pfss_tsecr = tsecr; 1374 1375 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 && 1376 (SEQ_LT(tsval, src->scrub->pfss_tsval0) || 1377 src->scrub->pfss_tsval0 == 0)) { 1378 /* tsval0 MUST be the lowest timestamp */ 1379 src->scrub->pfss_tsval0 = tsval; 1380 } 1381 1382 /* Only fully initialized after a TS gets echoed */ 1383 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0) 1384 src->scrub->pfss_flags |= PFSS_PAWS; 1385 } 1386 } 1387 1388 /* I have a dream.... TCP segment reassembly.... */ 1389 return (0); 1390 } 1391 1392 int 1393 pf_normalize_mss(struct pf_pdesc *pd, u_int16_t maxmss) 1394 { 1395 struct tcphdr *th = pd->hdr.tcp; 1396 u_int16_t mss; 1397 int thoff; 1398 int opt, cnt, optlen = 0; 1399 u_int8_t opts[MAX_TCPOPTLEN]; 1400 u_int8_t *optp = opts; 1401 1402 thoff = th->th_off << 2; 1403 cnt = thoff - sizeof(struct tcphdr); 1404 1405 if (cnt <= 0 || cnt > MAX_TCPOPTLEN || !pf_pull_hdr(pd->m, 1406 pd->off + sizeof(*th), opts, cnt, NULL, NULL, pd->af)) 1407 return (0); 1408 1409 for (; cnt > 0; cnt -= optlen, optp += optlen) { 1410 opt = optp[0]; 1411 if (opt == TCPOPT_EOL) 1412 break; 1413 if (opt == TCPOPT_NOP) 1414 optlen = 1; 1415 else { 1416 if (cnt < 2) 1417 break; 1418 optlen = optp[1]; 1419 if (optlen < 2 || optlen > cnt) 1420 break; 1421 } 1422 if (opt == TCPOPT_MAXSEG) { 1423 u_int8_t *mssp = optp + 2; 1424 memcpy(&mss, mssp, sizeof(mss)); 1425 if (ntohs(mss) > maxmss) { 1426 size_t mssoffopts = mssp - opts; 1427 pf_patch_16_unaligned(pd, &mss, 1428 htons(maxmss), PF_ALGNMNT(mssoffopts)); 1429 m_copyback(pd->m, 1430 pd->off + sizeof(*th) + mssoffopts, 1431 sizeof(mss), &mss, M_NOWAIT); 1432 m_copyback(pd->m, pd->off, sizeof(*th), th, 1433 M_NOWAIT); 1434 } 1435 } 1436 } 1437 1438 return (0); 1439 } 1440 1441 void 1442 pf_scrub(struct mbuf *m, u_int16_t flags, sa_family_t af, u_int8_t min_ttl, 1443 u_int8_t tos) 1444 { 1445 struct ip *h = mtod(m, struct ip *); 1446 #ifdef INET6 1447 struct ip6_hdr *h6 = mtod(m, struct ip6_hdr *); 1448 #endif /* INET6 */ 1449 1450 /* Clear IP_DF if no-df was requested */ 1451 if (flags & PFSTATE_NODF && af == AF_INET && h->ip_off & htons(IP_DF)) 1452 h->ip_off &= htons(~IP_DF); 1453 1454 /* Enforce a minimum ttl, may cause endless packet loops */ 1455 if (min_ttl && af == AF_INET && h->ip_ttl < min_ttl) 1456 h->ip_ttl = min_ttl; 1457 #ifdef INET6 1458 if (min_ttl && af == AF_INET6 && h6->ip6_hlim < min_ttl) 1459 h6->ip6_hlim = min_ttl; 1460 #endif /* INET6 */ 1461 1462 /* Enforce tos */ 1463 if (flags & PFSTATE_SETTOS) { 1464 if (af == AF_INET) 1465 h->ip_tos = tos | (h->ip_tos & IPTOS_ECN_MASK); 1466 #ifdef INET6 1467 if (af == AF_INET6) { 1468 /* drugs are unable to explain such idiocy */ 1469 h6->ip6_flow &= ~htonl(0x0fc00000); 1470 h6->ip6_flow |= htonl(((u_int32_t)tos) << 20); 1471 } 1472 #endif /* INET6 */ 1473 } 1474 1475 /* random-id, but not for fragments */ 1476 if (flags & PFSTATE_RANDOMID && af == AF_INET && 1477 !(h->ip_off & ~htons(IP_DF))) 1478 h->ip_id = htons(ip_randomid()); 1479 } 1480