1 /* $OpenBSD: pf_norm.c,v 1.113 2008/05/07 07:07:29 markus Exp $ */
2
3 /*
4 * Copyright (c) 2010 The DragonFly Project. All rights reserved.
5 *
6 * Copyright 2001 Niels Provos <provos@citi.umich.edu>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 */
29
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/mbuf.h>
36 #include <sys/filio.h>
37 #include <sys/fcntl.h>
38 #include <sys/malloc.h>
39 #include <sys/socket.h>
40 #include <sys/kernel.h>
41 #include <sys/time.h>
42
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/if_types.h>
46 #include <net/bpf.h>
47 #include <net/route.h>
48 #include <net/pf/if_pflog.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_var.h>
52 #include <netinet/in_systm.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip_var.h>
55 #include <netinet/tcp.h>
56 #include <netinet/tcp_seq.h>
57 #include <netinet/udp.h>
58 #include <netinet/ip_icmp.h>
59
60 #ifdef INET6
61 #include <netinet/ip6.h>
62 #endif /* INET6 */
63
64 #include <net/pf/pfvar.h>
65
66 #define PFFRAG_SEENLAST 0x0001 /* Seen the last fragment for this */
67 #define PFFRAG_NOBUFFER 0x0002 /* Non-buffering fragment cache */
68 #define PFFRAG_DROP 0x0004 /* Drop all fragments */
69 #define BUFFER_FRAGMENTS(fr) (!((fr)->fr_flags & PFFRAG_NOBUFFER))
70
71
72 TAILQ_HEAD(pf_fragqueue, pf_fragment) *pf_fragqueue;
73 TAILQ_HEAD(pf_cachequeue, pf_fragment) *pf_cachequeue;
74
75 static __inline int pf_frag_compare(struct pf_fragment *,
76 struct pf_fragment *);
77 RB_HEAD(pf_frag_tree, pf_fragment) *pf_frag_tree,
78 *pf_cache_tree;
79 RB_PROTOTYPE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
80 RB_GENERATE(pf_frag_tree, pf_fragment, fr_entry, pf_frag_compare);
81
82 /* Private prototypes */
83 void pf_ip2key(struct pf_fragment *, struct ip *);
84 void pf_remove_fragment(struct pf_fragment *);
85 void pf_flush_fragments(void);
86 void pf_free_fragment(struct pf_fragment *);
87 struct pf_fragment *pf_find_fragment(struct ip *, struct pf_frag_tree *);
88 struct mbuf *pf_reassemble(struct mbuf **, struct pf_fragment **,
89 struct pf_frent *, int);
90 struct mbuf *pf_fragcache(struct mbuf **, struct ip*,
91 struct pf_fragment **, int, int, int *);
92 int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *,
93 struct tcphdr *, int, sa_family_t);
94
95 #define DPFPRINTF(x) do { \
96 if (pf_status.debug >= PF_DEBUG_MISC) { \
97 kprintf("%s: ", __func__); \
98 kprintf x ; \
99 } \
100 } while(0)
101
102 static MALLOC_DEFINE(M_PFFRAGPL, "pffrag", "pf fragment pool list");
103 static MALLOC_DEFINE(M_PFCACHEPL, "pffrcache", "pf fragment cache pool list");
104 static MALLOC_DEFINE(M_PFFRENTPL, "pffrent", "pf frent pool list");
105 static MALLOC_DEFINE(M_PFCENTPL, "pffrcent", "pf fragment cent pool list");
106 static MALLOC_DEFINE(M_PFSTATESCRUBPL, "pfstatescrub", "pf state scrub pool list");
107
108 /* Globals */
109 struct malloc_type *pf_frent_pl, *pf_frag_pl, *pf_cache_pl, *pf_cent_pl;
110 struct malloc_type *pf_state_scrub_pl;
111 int pf_nfrents, pf_ncache;
112
113 void
pf_normalize_init(void)114 pf_normalize_init(void)
115 {
116 int n;
117
118 /* XXX
119 pool_sethiwat(&pf_frag_pl, PFFRAG_FRAG_HIWAT);
120 pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
121 pool_sethardlimit(&pf_cache_pl, PFFRAG_FRCACHE_HIWAT, NULL, 0);
122 pool_sethardlimit(&pf_cent_pl, PFFRAG_FRCENT_HIWAT, NULL, 0);
123 */
124
125 /*
126 * pcpu queues and trees
127 */
128 pf_fragqueue = kmalloc(sizeof(*pf_fragqueue) * ncpus,
129 M_PF, M_WAITOK | M_ZERO);
130 pf_cachequeue = kmalloc(sizeof(*pf_cachequeue) * ncpus,
131 M_PF, M_WAITOK | M_ZERO);
132 pf_frag_tree = kmalloc(sizeof(*pf_frag_tree) * ncpus,
133 M_PF, M_WAITOK | M_ZERO);
134 pf_cache_tree = kmalloc(sizeof(*pf_cache_tree) * ncpus,
135 M_PF, M_WAITOK | M_ZERO);
136
137 for (n = 0; n < ncpus; ++n) {
138 TAILQ_INIT(&pf_fragqueue[n]);
139 TAILQ_INIT(&pf_cachequeue[n]);
140 RB_INIT(&pf_frag_tree[n]);
141 RB_INIT(&pf_cache_tree[n]);
142 }
143 }
144
145 void
pf_normalize_unload(void)146 pf_normalize_unload(void)
147 {
148 kfree(pf_fragqueue, M_PF);
149 kfree(pf_cachequeue, M_PF);
150 kfree(pf_frag_tree, M_PF);
151 kfree(pf_cache_tree, M_PF);
152 }
153
154 static __inline int
pf_frag_compare(struct pf_fragment * a,struct pf_fragment * b)155 pf_frag_compare(struct pf_fragment *a, struct pf_fragment *b)
156 {
157 int diff;
158
159 if ((diff = a->fr_id - b->fr_id))
160 return (diff);
161 else if ((diff = a->fr_p - b->fr_p))
162 return (diff);
163 else if (a->fr_src.s_addr < b->fr_src.s_addr)
164 return (-1);
165 else if (a->fr_src.s_addr > b->fr_src.s_addr)
166 return (1);
167 else if (a->fr_dst.s_addr < b->fr_dst.s_addr)
168 return (-1);
169 else if (a->fr_dst.s_addr > b->fr_dst.s_addr)
170 return (1);
171 return (0);
172 }
173
174 void
pf_purge_expired_fragments(void)175 pf_purge_expired_fragments(void)
176 {
177 struct pf_fragment *frag;
178 u_int32_t expire;
179 int cpu = mycpu->gd_cpuid;
180
181 expire = time_second - pf_default_rule.timeout[PFTM_FRAG];
182
183 while ((frag = TAILQ_LAST(&pf_fragqueue[cpu], pf_fragqueue)) != NULL) {
184 KASSERT((BUFFER_FRAGMENTS(frag)),
185 ("BUFFER_FRAGMENTS(frag) == 0: %s", __func__));
186 if (frag->fr_timeout > expire)
187 break;
188
189 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
190 pf_free_fragment(frag);
191 }
192
193 while ((frag = TAILQ_LAST(&pf_cachequeue[cpu], pf_cachequeue)) != NULL) {
194 KASSERT((!BUFFER_FRAGMENTS(frag)),
195 ("BUFFER_FRAGMENTS(frag) != 0: %s", __func__));
196 if (frag->fr_timeout > expire)
197 break;
198
199 DPFPRINTF(("expiring %d(%p)\n", frag->fr_id, frag));
200 pf_free_fragment(frag);
201 KASSERT((TAILQ_EMPTY(&pf_cachequeue[cpu]) ||
202 TAILQ_LAST(&pf_cachequeue[cpu], pf_cachequeue) != frag),
203 ("!(TAILQ_EMPTY() || TAILQ_LAST() == farg): %s",
204 __func__));
205 }
206 }
207
208 /*
209 * Try to flush old fragments to make space for new ones
210 */
211
212 void
pf_flush_fragments(void)213 pf_flush_fragments(void)
214 {
215 struct pf_fragment *frag;
216 int goal;
217 int cpu = mycpu->gd_cpuid;
218
219 goal = pf_nfrents * 9 / 10;
220 DPFPRINTF(("trying to free > %d frents\n",
221 pf_nfrents - goal));
222 while (goal < pf_nfrents) {
223 frag = TAILQ_LAST(&pf_fragqueue[cpu], pf_fragqueue);
224 if (frag == NULL)
225 break;
226 pf_free_fragment(frag);
227 }
228
229
230 goal = pf_ncache * 9 / 10;
231 DPFPRINTF(("trying to free > %d cache entries\n",
232 pf_ncache - goal));
233 while (goal < pf_ncache) {
234 frag = TAILQ_LAST(&pf_cachequeue[cpu], pf_cachequeue);
235 if (frag == NULL)
236 break;
237 pf_free_fragment(frag);
238 }
239 }
240
241 /* Frees the fragments and all associated entries */
242
243 void
pf_free_fragment(struct pf_fragment * frag)244 pf_free_fragment(struct pf_fragment *frag)
245 {
246 struct pf_frent *frent;
247 struct pf_frcache *frcache;
248
249 /* Free all fragments */
250 if (BUFFER_FRAGMENTS(frag)) {
251 for (frent = LIST_FIRST(&frag->fr_queue); frent;
252 frent = LIST_FIRST(&frag->fr_queue)) {
253 LIST_REMOVE(frent, fr_next);
254
255 m_freem(frent->fr_m);
256 kfree(frent, M_PFFRENTPL);
257 pf_nfrents--;
258 }
259 } else {
260 for (frcache = LIST_FIRST(&frag->fr_cache); frcache;
261 frcache = LIST_FIRST(&frag->fr_cache)) {
262 LIST_REMOVE(frcache, fr_next);
263
264 KASSERT((LIST_EMPTY(&frag->fr_cache) ||
265 LIST_FIRST(&frag->fr_cache)->fr_off >
266 frcache->fr_end),
267 ("! (LIST_EMPTY() || LIST_FIRST()->fr_off >"
268 " frcache->fr_end): %s", __func__));
269
270 kfree(frcache, M_PFCENTPL);
271 pf_ncache--;
272 }
273 }
274
275 pf_remove_fragment(frag);
276 }
277
278 void
pf_ip2key(struct pf_fragment * key,struct ip * ip)279 pf_ip2key(struct pf_fragment *key, struct ip *ip)
280 {
281 key->fr_p = ip->ip_p;
282 key->fr_id = ip->ip_id;
283 key->fr_src.s_addr = ip->ip_src.s_addr;
284 key->fr_dst.s_addr = ip->ip_dst.s_addr;
285 }
286
287 struct pf_fragment *
pf_find_fragment(struct ip * ip,struct pf_frag_tree * tree)288 pf_find_fragment(struct ip *ip, struct pf_frag_tree *tree)
289 {
290 struct pf_fragment key;
291 struct pf_fragment *frag;
292 int cpu = mycpu->gd_cpuid;
293
294 pf_ip2key(&key, ip);
295
296 frag = RB_FIND(pf_frag_tree, tree, &key);
297 if (frag != NULL) {
298 /* XXX Are we sure we want to update the timeout? */
299 frag->fr_timeout = time_second;
300 if (BUFFER_FRAGMENTS(frag)) {
301 TAILQ_REMOVE(&pf_fragqueue[cpu], frag, frag_next);
302 TAILQ_INSERT_HEAD(&pf_fragqueue[cpu], frag, frag_next);
303 } else {
304 TAILQ_REMOVE(&pf_cachequeue[cpu], frag, frag_next);
305 TAILQ_INSERT_HEAD(&pf_cachequeue[cpu], frag, frag_next);
306 }
307 }
308
309 return (frag);
310 }
311
312 /* Removes a fragment from the fragment queue and frees the fragment */
313
314 void
pf_remove_fragment(struct pf_fragment * frag)315 pf_remove_fragment(struct pf_fragment *frag)
316 {
317 int cpu = mycpu->gd_cpuid;
318
319 if (BUFFER_FRAGMENTS(frag)) {
320 RB_REMOVE(pf_frag_tree, &pf_frag_tree[cpu], frag);
321 TAILQ_REMOVE(&pf_fragqueue[cpu], frag, frag_next);
322 kfree(frag, M_PFFRAGPL);
323 } else {
324 RB_REMOVE(pf_frag_tree, &pf_cache_tree[cpu], frag);
325 TAILQ_REMOVE(&pf_cachequeue[cpu], frag, frag_next);
326 kfree(frag, M_PFCACHEPL);
327 }
328 }
329
330 #define FR_IP_OFF(fr) ((ntohs((fr)->fr_ip->ip_off) & IP_OFFMASK) << 3)
331
332 struct mbuf *
pf_reassemble(struct mbuf ** m0,struct pf_fragment ** frag,struct pf_frent * frent,int mff)333 pf_reassemble(struct mbuf **m0, struct pf_fragment **frag,
334 struct pf_frent *frent, int mff)
335 {
336 struct mbuf *m = *m0, *m2;
337 struct pf_frent *frea, *next;
338 struct pf_frent *frep = NULL;
339 struct ip *ip = frent->fr_ip;
340 int hlen = ip->ip_hl << 2;
341 u_int16_t off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
342 u_int16_t ip_len = ntohs(ip->ip_len) - hlen;
343 u_int16_t max = ip_len + off;
344 int cpu = mycpu->gd_cpuid;
345
346 KASSERT((*frag == NULL || BUFFER_FRAGMENTS(*frag)),
347 ("! (*frag == NULL || BUFFER_FRAGMENTS(*frag)): %s", __func__));
348
349 /* Strip off ip header */
350 m->m_data += hlen;
351 m->m_len -= hlen;
352
353 /* Create a new reassembly queue for this packet */
354 if (*frag == NULL) {
355 *frag = kmalloc(sizeof(struct pf_fragment),
356 M_PFFRAGPL, M_NOWAIT);
357 if (*frag == NULL) {
358 pf_flush_fragments();
359 *frag = kmalloc(sizeof(struct pf_fragment),
360 M_PFFRAGPL, M_NOWAIT);
361 if (*frag == NULL)
362 goto drop_fragment;
363 }
364
365 (*frag)->fr_flags = 0;
366 (*frag)->fr_max = 0;
367 (*frag)->fr_src = frent->fr_ip->ip_src;
368 (*frag)->fr_dst = frent->fr_ip->ip_dst;
369 (*frag)->fr_p = frent->fr_ip->ip_p;
370 (*frag)->fr_id = frent->fr_ip->ip_id;
371 (*frag)->fr_timeout = time_second;
372 LIST_INIT(&(*frag)->fr_queue);
373
374 RB_INSERT(pf_frag_tree, &pf_frag_tree[cpu], *frag);
375 TAILQ_INSERT_HEAD(&pf_fragqueue[cpu], *frag, frag_next);
376
377 /* We do not have a previous fragment */
378 frep = NULL;
379 goto insert;
380 }
381
382 /*
383 * Find a fragment after the current one:
384 * - off contains the real shifted offset.
385 */
386 LIST_FOREACH(frea, &(*frag)->fr_queue, fr_next) {
387 if (FR_IP_OFF(frea) > off)
388 break;
389 frep = frea;
390 }
391
392 KASSERT((frep != NULL || frea != NULL),
393 ("!(frep != NULL || frea != NULL): %s", __func__));
394
395 /*
396 * Merge with previous fragment by cutting the start of
397 * the current packet.
398 */
399 if (frep != NULL &&
400 (FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
401 frep->fr_ip->ip_hl * 4) > off)
402 {
403 u_int16_t precut;
404
405 precut = FR_IP_OFF(frep) + ntohs(frep->fr_ip->ip_len) -
406 frep->fr_ip->ip_hl * 4 - off;
407 if (precut >= ip_len)
408 goto drop_fragment;
409 m_adj(frent->fr_m, precut);
410 DPFPRINTF(("overlap -%d\n", precut));
411 /* Enforce 8 byte boundaries */
412 ip->ip_off = htons(ntohs(ip->ip_off) + (precut >> 3));
413 off = (ntohs(ip->ip_off) & IP_OFFMASK) << 3;
414 ip_len -= precut;
415 ip->ip_len = htons(ip_len + hlen);
416 }
417
418 /*
419 * Cut or delete overlapping later fragments.
420 */
421 for (; frea != NULL && ip_len + off > FR_IP_OFF(frea);
422 frea = next)
423 {
424 u_int16_t aftercut;
425
426 aftercut = ip_len + off - FR_IP_OFF(frea);
427 DPFPRINTF(("adjust overlap %d\n", aftercut));
428 if (aftercut < (ntohs(frea->fr_ip->ip_len) -
429 frea->fr_ip->ip_hl * 4))
430 {
431 frea->fr_ip->ip_len =
432 htons(ntohs(frea->fr_ip->ip_len) - aftercut +
433 frea->fr_ip->ip_hl * 4);
434 frea->fr_ip->ip_off =
435 htons(ntohs(frea->fr_ip->ip_off) + (aftercut >> 3));
436 m_adj(frea->fr_m, aftercut);
437 break;
438 }
439
440 /* This fragment is completely overlapped, lose it */
441 next = LIST_NEXT(frea, fr_next);
442 m_freem(frea->fr_m);
443 LIST_REMOVE(frea, fr_next);
444 kfree(frea, M_PFFRENTPL);
445 pf_nfrents--;
446 }
447
448 insert:
449 /* Update maximum data size */
450 if ((*frag)->fr_max < max)
451 (*frag)->fr_max = max;
452 /* This is the last segment */
453 if (!mff)
454 (*frag)->fr_flags |= PFFRAG_SEENLAST;
455
456 if (frep == NULL)
457 LIST_INSERT_HEAD(&(*frag)->fr_queue, frent, fr_next);
458 else
459 LIST_INSERT_AFTER(frep, frent, fr_next);
460
461 /* Check if we are completely reassembled */
462 if (!((*frag)->fr_flags & PFFRAG_SEENLAST))
463 return (NULL);
464
465 /* Check if we have all the data */
466 off = 0;
467 for (frep = LIST_FIRST(&(*frag)->fr_queue); frep; frep = next) {
468 next = LIST_NEXT(frep, fr_next);
469
470 off += ntohs(frep->fr_ip->ip_len) - frep->fr_ip->ip_hl * 4;
471 if (off < (*frag)->fr_max &&
472 (next == NULL || FR_IP_OFF(next) != off))
473 {
474 DPFPRINTF(("missing fragment at %d, next %d, max %d\n",
475 off, next == NULL ? -1 : FR_IP_OFF(next),
476 (*frag)->fr_max));
477 return (NULL);
478 }
479 }
480 DPFPRINTF(("%d < %d?\n", off, (*frag)->fr_max));
481 if (off < (*frag)->fr_max)
482 return (NULL);
483
484 /* We have all the data */
485 frent = LIST_FIRST(&(*frag)->fr_queue);
486 KASSERT((frent != NULL), ("frent == NULL: %s", __func__));
487 if ((frent->fr_ip->ip_hl << 2) + off > IP_MAXPACKET) {
488 DPFPRINTF(("drop: too big: %d\n", off));
489 pf_free_fragment(*frag);
490 *frag = NULL;
491 return (NULL);
492 }
493 next = LIST_NEXT(frent, fr_next);
494
495 /* Magic from ip_input */
496 ip = frent->fr_ip;
497 m = frent->fr_m;
498 m2 = m->m_next;
499 m->m_next = NULL;
500 m_cat(m, m2);
501 kfree(frent, M_PFFRENTPL);
502 pf_nfrents--;
503 for (frent = next; frent != NULL; frent = next) {
504 next = LIST_NEXT(frent, fr_next);
505
506 m2 = frent->fr_m;
507 kfree(frent, M_PFFRENTPL);
508 pf_nfrents--;
509 m_cat(m, m2);
510 }
511
512 ip->ip_src = (*frag)->fr_src;
513 ip->ip_dst = (*frag)->fr_dst;
514
515 /* Remove from fragment queue */
516 pf_remove_fragment(*frag);
517 *frag = NULL;
518
519 hlen = ip->ip_hl << 2;
520 ip->ip_len = htons(off + hlen);
521 ip->ip_off &= htons(IP_DF);
522 m->m_len += hlen;
523 m->m_data -= hlen;
524
525 /* some debugging cruft by sklower, below, will go away soon */
526 /* XXX this should be done elsewhere */
527 if (m->m_flags & M_PKTHDR) {
528 int plen = 0;
529 for (m2 = m; m2; m2 = m2->m_next)
530 plen += m2->m_len;
531 m->m_pkthdr.len = plen;
532 }
533
534 #if 0
535 kprintf("reassembly complete: len=%u\n", ntohs(ip->ip_len));
536 kprintf("ip_src=%08x dst=%08x tos=%u p=%u off=%u len=%u\n",
537 ip->ip_src.s_addr, ip->ip_dst.s_addr, ip->ip_tos, ip->ip_p,
538 ntohs(ip->ip_off), ntohs(ip->ip_len));
539 #endif
540
541 DPFPRINTF(("complete: %p(%d)\n", m, ntohs(ip->ip_len)));
542 return (m);
543
544 drop_fragment:
545 /* Oops - fail safe - drop packet */
546 kfree(frent, M_PFFRENTPL);
547 pf_nfrents--;
548 m_freem(m);
549 return (NULL);
550 }
551
552 struct mbuf *
pf_fragcache(struct mbuf ** m0,struct ip * h,struct pf_fragment ** frag,int mff,int drop,int * nomem)553 pf_fragcache(struct mbuf **m0, struct ip *h, struct pf_fragment **frag, int mff,
554 int drop, int *nomem)
555 {
556 struct mbuf *m = *m0;
557 struct pf_frcache *frp, *fra, *cur = NULL;
558 int ip_len = ntohs(h->ip_len) - (h->ip_hl << 2);
559 u_int16_t off = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
560 u_int16_t max = ip_len + off;
561 int hosed = 0;
562 int cpu = mycpu->gd_cpuid;
563
564 KASSERT((*frag == NULL || !BUFFER_FRAGMENTS(*frag)),
565 ("!(*frag == NULL || !BUFFER_FRAGMENTS(*frag)): %s", __func__));
566
567 /* Create a new range queue for this packet */
568 if (*frag == NULL) {
569 *frag = kmalloc(sizeof(struct pf_fragment), M_PFCACHEPL, M_NOWAIT);
570 if (*frag == NULL) {
571 pf_flush_fragments();
572 *frag = kmalloc(sizeof(struct pf_fragment), M_PFCACHEPL, M_NOWAIT);
573 if (*frag == NULL)
574 goto no_mem;
575 }
576
577 /* Get an entry for the queue */
578 cur = kmalloc(sizeof(struct pf_frcache), M_PFCENTPL, M_NOWAIT);
579 if (cur == NULL) {
580 kfree(*frag, M_PFCACHEPL);
581 *frag = NULL;
582 goto no_mem;
583 }
584 pf_ncache++;
585
586 (*frag)->fr_flags = PFFRAG_NOBUFFER;
587 (*frag)->fr_max = 0;
588 (*frag)->fr_src = h->ip_src;
589 (*frag)->fr_dst = h->ip_dst;
590 (*frag)->fr_p = h->ip_p;
591 (*frag)->fr_id = h->ip_id;
592 (*frag)->fr_timeout = time_second;
593
594 cur->fr_off = off;
595 cur->fr_end = max;
596 LIST_INIT(&(*frag)->fr_cache);
597 LIST_INSERT_HEAD(&(*frag)->fr_cache, cur, fr_next);
598
599 RB_INSERT(pf_frag_tree, &pf_cache_tree[cpu], *frag);
600 TAILQ_INSERT_HEAD(&pf_cachequeue[cpu], *frag, frag_next);
601
602 DPFPRINTF(("fragcache[%d]: new %d-%d\n", h->ip_id, off, max));
603
604 goto pass;
605 }
606
607 /*
608 * Find a fragment after the current one:
609 * - off contains the real shifted offset.
610 */
611 frp = NULL;
612 LIST_FOREACH(fra, &(*frag)->fr_cache, fr_next) {
613 if (fra->fr_off > off)
614 break;
615 frp = fra;
616 }
617
618 KASSERT((frp != NULL || fra != NULL),
619 ("!(frp != NULL || fra != NULL): %s", __func__));
620
621 if (frp != NULL) {
622 int precut;
623
624 precut = frp->fr_end - off;
625 if (precut >= ip_len) {
626 /* Fragment is entirely a duplicate */
627 DPFPRINTF(("fragcache[%d]: dead (%d-%d) %d-%d\n",
628 h->ip_id, frp->fr_off, frp->fr_end, off, max));
629 goto drop_fragment;
630 }
631 if (precut == 0) {
632 /* They are adjacent. Fixup cache entry */
633 DPFPRINTF(("fragcache[%d]: adjacent (%d-%d) %d-%d\n",
634 h->ip_id, frp->fr_off, frp->fr_end, off, max));
635 frp->fr_end = max;
636 } else if (precut > 0) {
637 /* The first part of this payload overlaps with a
638 * fragment that has already been passed.
639 * Need to trim off the first part of the payload.
640 * But to do so easily, we need to create another
641 * mbuf to throw the original header into.
642 */
643
644 DPFPRINTF(("fragcache[%d]: chop %d (%d-%d) %d-%d\n",
645 h->ip_id, precut, frp->fr_off, frp->fr_end, off,
646 max));
647
648 off += precut;
649 max -= precut;
650 /* Update the previous frag to encompass this one */
651 frp->fr_end = max;
652
653 if (!drop) {
654 /* XXX Optimization opportunity
655 * This is a very heavy way to trim the payload.
656 * we could do it much faster by diddling mbuf
657 * internals but that would be even less legible
658 * than this mbuf magic. For my next trick,
659 * I'll pull a rabbit out of my laptop.
660 */
661 *m0 = m_dup(m, M_NOWAIT);
662 /* From KAME Project : We have missed this! */
663 m_adj(*m0, (h->ip_hl << 2) -
664 (*m0)->m_pkthdr.len);
665 if (*m0 == NULL)
666 goto no_mem;
667 KASSERT(((*m0)->m_next == NULL),
668 ("(*m0)->m_next != NULL: %s",
669 __func__));
670 m_adj(m, precut + (h->ip_hl << 2));
671 m_cat(*m0, m);
672 m = *m0;
673 if (m->m_flags & M_PKTHDR) {
674 int plen = 0;
675 struct mbuf *t;
676 for (t = m; t; t = t->m_next)
677 plen += t->m_len;
678 m->m_pkthdr.len = plen;
679 }
680
681
682 h = mtod(m, struct ip *);
683
684 KASSERT(((int)m->m_len ==
685 ntohs(h->ip_len) - precut),
686 ("m->m_len != h->ip_len - precut: %s",
687 __func__));
688 h->ip_off = htons(ntohs(h->ip_off) +
689 (precut >> 3));
690 h->ip_len = htons(ntohs(h->ip_len) - precut);
691 } else {
692 hosed++;
693 }
694 } else {
695 /* There is a gap between fragments */
696
697 DPFPRINTF(("fragcache[%d]: gap %d (%d-%d) %d-%d\n",
698 h->ip_id, -precut, frp->fr_off, frp->fr_end, off,
699 max));
700
701 cur = kmalloc(sizeof(struct pf_frcache), M_PFCENTPL, M_NOWAIT);
702 if (cur == NULL)
703 goto no_mem;
704 pf_ncache++;
705
706 cur->fr_off = off;
707 cur->fr_end = max;
708 LIST_INSERT_AFTER(frp, cur, fr_next);
709 }
710 }
711
712 if (fra != NULL) {
713 int aftercut;
714 int merge = 0;
715
716 aftercut = max - fra->fr_off;
717 if (aftercut == 0) {
718 /* Adjacent fragments */
719 DPFPRINTF(("fragcache[%d]: adjacent %d-%d (%d-%d)\n",
720 h->ip_id, off, max, fra->fr_off, fra->fr_end));
721 fra->fr_off = off;
722 merge = 1;
723 } else if (aftercut > 0) {
724 /* Need to chop off the tail of this fragment */
725 DPFPRINTF(("fragcache[%d]: chop %d %d-%d (%d-%d)\n",
726 h->ip_id, aftercut, off, max, fra->fr_off,
727 fra->fr_end));
728 fra->fr_off = off;
729 max -= aftercut;
730
731 merge = 1;
732
733 if (!drop) {
734 m_adj(m, -aftercut);
735 if (m->m_flags & M_PKTHDR) {
736 int plen = 0;
737 struct mbuf *t;
738 for (t = m; t; t = t->m_next)
739 plen += t->m_len;
740 m->m_pkthdr.len = plen;
741 }
742 h = mtod(m, struct ip *);
743 KASSERT(((int)m->m_len ==
744 ntohs(h->ip_len) - aftercut),
745 ("m->m_len != h->ip_len - aftercut: %s",
746 __func__));
747 h->ip_len = htons(ntohs(h->ip_len) - aftercut);
748 } else {
749 hosed++;
750 }
751 } else if (frp == NULL) {
752 /* There is a gap between fragments */
753 DPFPRINTF(("fragcache[%d]: gap %d %d-%d (%d-%d)\n",
754 h->ip_id, -aftercut, off, max, fra->fr_off,
755 fra->fr_end));
756
757 cur = kmalloc(sizeof(struct pf_frcache), M_PFCENTPL, M_NOWAIT);
758 if (cur == NULL)
759 goto no_mem;
760 pf_ncache++;
761
762 cur->fr_off = off;
763 cur->fr_end = max;
764 LIST_INSERT_BEFORE(fra, cur, fr_next);
765 }
766
767
768 /* Need to glue together two separate fragment descriptors */
769 if (merge) {
770 if (cur && fra->fr_off <= cur->fr_end) {
771 /* Need to merge in a previous 'cur' */
772 DPFPRINTF(("fragcache[%d]: adjacent(merge "
773 "%d-%d) %d-%d (%d-%d)\n",
774 h->ip_id, cur->fr_off, cur->fr_end, off,
775 max, fra->fr_off, fra->fr_end));
776 fra->fr_off = cur->fr_off;
777 LIST_REMOVE(cur, fr_next);
778 kfree(cur, M_PFCENTPL);
779 pf_ncache--;
780 cur = NULL;
781
782 } else if (frp && fra->fr_off <= frp->fr_end) {
783 /* Need to merge in a modified 'frp' */
784 KASSERT((cur == NULL), ("cur != NULL: %s",
785 __func__));
786 DPFPRINTF(("fragcache[%d]: adjacent(merge "
787 "%d-%d) %d-%d (%d-%d)\n",
788 h->ip_id, frp->fr_off, frp->fr_end, off,
789 max, fra->fr_off, fra->fr_end));
790 fra->fr_off = frp->fr_off;
791 LIST_REMOVE(frp, fr_next);
792 kfree(frp, M_PFCENTPL);
793 pf_ncache--;
794 frp = NULL;
795
796 }
797 }
798 }
799
800 if (hosed) {
801 /*
802 * We must keep tracking the overall fragment even when
803 * we're going to drop it anyway so that we know when to
804 * free the overall descriptor. Thus we drop the frag late.
805 */
806 goto drop_fragment;
807 }
808
809
810 pass:
811 /* Update maximum data size */
812 if ((*frag)->fr_max < max)
813 (*frag)->fr_max = max;
814
815 /* This is the last segment */
816 if (!mff)
817 (*frag)->fr_flags |= PFFRAG_SEENLAST;
818
819 /* Check if we are completely reassembled */
820 if (((*frag)->fr_flags & PFFRAG_SEENLAST) &&
821 LIST_FIRST(&(*frag)->fr_cache)->fr_off == 0 &&
822 LIST_FIRST(&(*frag)->fr_cache)->fr_end == (*frag)->fr_max) {
823 /* Remove from fragment queue */
824 DPFPRINTF(("fragcache[%d]: done 0-%d\n", h->ip_id,
825 (*frag)->fr_max));
826 pf_free_fragment(*frag);
827 *frag = NULL;
828 }
829
830 return (m);
831
832 no_mem:
833 *nomem = 1;
834
835 /* Still need to pay attention to !IP_MF */
836 if (!mff && *frag != NULL)
837 (*frag)->fr_flags |= PFFRAG_SEENLAST;
838
839 m_freem(m);
840 return (NULL);
841
842 drop_fragment:
843
844 /* Still need to pay attention to !IP_MF */
845 if (!mff && *frag != NULL)
846 (*frag)->fr_flags |= PFFRAG_SEENLAST;
847
848 if (drop) {
849 /* This fragment has been deemed bad. Don't reass */
850 if (((*frag)->fr_flags & PFFRAG_DROP) == 0)
851 DPFPRINTF(("fragcache[%d]: dropping overall fragment\n",
852 h->ip_id));
853 (*frag)->fr_flags |= PFFRAG_DROP;
854 }
855
856 m_freem(m);
857 return (NULL);
858 }
859
860 int
pf_normalize_ip(struct mbuf ** m0,int dir,struct pfi_kif * kif,u_short * reason,struct pf_pdesc * pd)861 pf_normalize_ip(struct mbuf **m0, int dir, struct pfi_kif *kif, u_short *reason,
862 struct pf_pdesc *pd)
863 {
864 struct mbuf *m = *m0;
865 struct pf_rule *r;
866 struct pf_frent *frent;
867 struct pf_fragment *frag = NULL;
868 struct ip *h = mtod(m, struct ip *);
869 int mff = (h->ip_off & htons(IP_MF));
870 int hlen = h->ip_hl << 2;
871 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
872 u_int16_t max;
873 int ip_len;
874 int tag = -1;
875 int cpu = mycpu->gd_cpuid;
876
877 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
878 while (r != NULL) {
879 r->evaluations++;
880 if (pfi_kif_match(r->kif, kif) == r->ifnot)
881 r = r->skip[PF_SKIP_IFP].ptr;
882 else if (r->direction && r->direction != dir)
883 r = r->skip[PF_SKIP_DIR].ptr;
884 else if (r->af && r->af != AF_INET)
885 r = r->skip[PF_SKIP_AF].ptr;
886 else if (r->proto && r->proto != h->ip_p)
887 r = r->skip[PF_SKIP_PROTO].ptr;
888 else if (PF_MISMATCHAW(&r->src.addr,
889 (struct pf_addr *)&h->ip_src.s_addr, AF_INET,
890 r->src.neg, kif))
891 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
892 else if (PF_MISMATCHAW(&r->dst.addr,
893 (struct pf_addr *)&h->ip_dst.s_addr, AF_INET,
894 r->dst.neg, NULL))
895 r = r->skip[PF_SKIP_DST_ADDR].ptr;
896 else if (r->match_tag && !pf_match_tag(m, r, &tag))
897 r = TAILQ_NEXT(r, entries);
898 else
899 break;
900 }
901
902 if (r == NULL || r->action == PF_NOSCRUB)
903 return (PF_PASS);
904 else {
905 r->packets[dir == PF_OUT]++;
906 r->bytes[dir == PF_OUT] += pd->tot_len;
907 }
908
909 /* Check for illegal packets */
910 if (hlen < (int)sizeof(struct ip))
911 goto drop;
912
913 if (hlen > ntohs(h->ip_len))
914 goto drop;
915
916 /* Clear IP_DF if the rule uses the no-df option */
917 if ((r->rule_flag & PFRULE_NODF) && (h->ip_off & htons(IP_DF))) {
918 u_int16_t ip_off = h->ip_off; /* network byte order */
919
920 h->ip_off &= ~htons(IP_DF);
921 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
922 }
923
924 /* We will need other tests here */
925 if (!fragoff && !mff)
926 goto no_fragment;
927
928 /* A fragment; rehash required. */
929 m->m_flags &= ~M_HASH;
930
931 /* We're dealing with a fragment now. Don't allow fragments
932 * with IP_DF to enter the cache. If the flag was cleared by
933 * no-df above, fine. Otherwise drop it.
934 */
935 if (h->ip_off & htons(IP_DF)) {
936 DPFPRINTF(("IP_DF\n"));
937 goto bad;
938 }
939
940 ip_len = ntohs(h->ip_len) - hlen;
941
942 /* All fragments are 8 byte aligned */
943 if (mff && (ip_len & 0x7)) {
944 DPFPRINTF(("mff and %d\n", ip_len));
945 goto bad;
946 }
947
948 /* Respect maximum length */
949 if (fragoff + ip_len > IP_MAXPACKET) {
950 DPFPRINTF(("max packet %d\n", fragoff + ip_len));
951 goto bad;
952 }
953 max = fragoff + ip_len;
954
955 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0) {
956 /* Fully buffer all of the fragments */
957
958 frag = pf_find_fragment(h, &pf_frag_tree[cpu]);
959
960 /* Check if we saw the last fragment already */
961 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
962 max > frag->fr_max)
963 goto bad;
964
965 /* Get an entry for the fragment queue */
966 frent = kmalloc(sizeof(struct pf_frent), M_PFFRENTPL, M_NOWAIT);
967 if (frent == NULL) {
968 REASON_SET(reason, PFRES_MEMORY);
969 return (PF_DROP);
970 }
971 pf_nfrents++;
972 frent->fr_ip = h;
973 frent->fr_m = m;
974
975 /* Might return a completely reassembled mbuf, or NULL */
976 DPFPRINTF(("reass frag %d @ %d-%d\n", h->ip_id, fragoff, max));
977 *m0 = m = pf_reassemble(m0, &frag, frent, mff);
978
979 if (m == NULL)
980 return (PF_DROP);
981
982 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
983 goto drop;
984
985 h = mtod(m, struct ip *);
986 } else {
987 /* non-buffering fragment cache (drops or masks overlaps) */
988 int nomem = 0;
989
990 if (dir == PF_OUT && m->m_pkthdr.pf.flags & PF_TAG_FRAGCACHE) {
991 /*
992 * Already passed the fragment cache in the
993 * input direction. If we continued, it would
994 * appear to be a dup and would be dropped.
995 */
996 goto fragment_pass;
997 }
998
999 frag = pf_find_fragment(h, &pf_cache_tree[cpu]);
1000
1001 /* Check if we saw the last fragment already */
1002 if (frag != NULL && (frag->fr_flags & PFFRAG_SEENLAST) &&
1003 max > frag->fr_max) {
1004 if (r->rule_flag & PFRULE_FRAGDROP)
1005 frag->fr_flags |= PFFRAG_DROP;
1006 goto bad;
1007 }
1008
1009 *m0 = m = pf_fragcache(m0, h, &frag, mff,
1010 (r->rule_flag & PFRULE_FRAGDROP) ? 1 : 0, &nomem);
1011 if (m == NULL) {
1012 if (nomem)
1013 goto no_mem;
1014 goto drop;
1015 }
1016
1017 if (dir == PF_IN)
1018 m->m_pkthdr.pf.flags |= PF_TAG_FRAGCACHE;
1019
1020 if (frag != NULL && (frag->fr_flags & PFFRAG_DROP))
1021 goto drop;
1022 goto fragment_pass;
1023 }
1024
1025 no_fragment:
1026 /* At this point, only IP_DF is allowed in ip_off */
1027 if (h->ip_off & ~htons(IP_DF)) {
1028 u_int16_t ip_off = h->ip_off; /* network byte order */
1029
1030 h->ip_off &= htons(IP_DF);
1031 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_off, h->ip_off, 0);
1032 }
1033
1034 /* Enforce a minimum ttl, may cause endless packet loops */
1035 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1036 u_int16_t ip_ttl = h->ip_ttl;
1037
1038 h->ip_ttl = r->min_ttl;
1039 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1040 }
1041
1042 /* Enforce tos */
1043 if (r->rule_flag & PFRULE_SET_TOS) {
1044 u_int16_t ov, nv;
1045
1046 ov = *(u_int16_t *)h;
1047 h->ip_tos = r->set_tos;
1048 nv = *(u_int16_t *)h;
1049
1050 h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
1051 }
1052
1053 if (r->rule_flag & PFRULE_RANDOMID) {
1054 u_int16_t ip_id = h->ip_id;
1055
1056 h->ip_id = ip_randomid();
1057 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_id, h->ip_id, 0);
1058 }
1059 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1060 pd->flags |= PFDESC_IP_REAS;
1061
1062 return (PF_PASS);
1063
1064 fragment_pass:
1065 /* Enforce a minimum ttl, may cause endless packet loops */
1066 if (r->min_ttl && h->ip_ttl < r->min_ttl) {
1067 u_int16_t ip_ttl = h->ip_ttl;
1068
1069 h->ip_ttl = r->min_ttl;
1070 h->ip_sum = pf_cksum_fixup(h->ip_sum, ip_ttl, h->ip_ttl, 0);
1071 }
1072 /* Enforce tos */
1073 if (r->rule_flag & PFRULE_SET_TOS) {
1074 u_int16_t ov, nv;
1075
1076 ov = *(u_int16_t *)h;
1077 h->ip_tos = r->set_tos;
1078 nv = *(u_int16_t *)h;
1079
1080 h->ip_sum = pf_cksum_fixup(h->ip_sum, ov, nv, 0);
1081 }
1082 if ((r->rule_flag & (PFRULE_FRAGCROP|PFRULE_FRAGDROP)) == 0)
1083 pd->flags |= PFDESC_IP_REAS;
1084 return (PF_PASS);
1085
1086 no_mem:
1087 REASON_SET(reason, PFRES_MEMORY);
1088 if (r != NULL && r->log)
1089 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1090 return (PF_DROP);
1091
1092 drop:
1093 REASON_SET(reason, PFRES_NORM);
1094 if (r != NULL && r->log)
1095 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1096 return (PF_DROP);
1097
1098 bad:
1099 DPFPRINTF(("dropping bad fragment\n"));
1100
1101 /* Free associated fragments */
1102 if (frag != NULL)
1103 pf_free_fragment(frag);
1104
1105 REASON_SET(reason, PFRES_FRAG);
1106 if (r != NULL && r->log)
1107 PFLOG_PACKET(kif, h, m, AF_INET, dir, *reason, r, NULL, NULL, pd);
1108
1109 return (PF_DROP);
1110 }
1111
1112 #ifdef INET6
1113 int
pf_normalize_ip6(struct mbuf ** m0,int dir,struct pfi_kif * kif,u_short * reason,struct pf_pdesc * pd)1114 pf_normalize_ip6(struct mbuf **m0, int dir, struct pfi_kif *kif,
1115 u_short *reason, struct pf_pdesc *pd)
1116 {
1117 struct mbuf *m = *m0;
1118 struct pf_rule *r;
1119 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1120 int off;
1121 struct ip6_ext ext;
1122 struct ip6_opt opt;
1123 struct ip6_opt_jumbo jumbo;
1124 struct ip6_frag frag;
1125 u_int32_t jumbolen = 0, plen;
1126 u_int16_t fragoff = 0;
1127 int optend;
1128 int ooff;
1129 u_int8_t proto;
1130 int terminal;
1131
1132 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1133 while (r != NULL) {
1134 r->evaluations++;
1135 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1136 r = r->skip[PF_SKIP_IFP].ptr;
1137 else if (r->direction && r->direction != dir)
1138 r = r->skip[PF_SKIP_DIR].ptr;
1139 else if (r->af && r->af != AF_INET6)
1140 r = r->skip[PF_SKIP_AF].ptr;
1141 #if 0 /* header chain! */
1142 else if (r->proto && r->proto != h->ip6_nxt)
1143 r = r->skip[PF_SKIP_PROTO].ptr;
1144 #endif
1145 else if (PF_MISMATCHAW(&r->src.addr,
1146 (struct pf_addr *)&h->ip6_src, AF_INET6,
1147 r->src.neg, kif))
1148 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1149 else if (PF_MISMATCHAW(&r->dst.addr,
1150 (struct pf_addr *)&h->ip6_dst, AF_INET6,
1151 r->dst.neg, NULL))
1152 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1153 else
1154 break;
1155 }
1156
1157 if (r == NULL || r->action == PF_NOSCRUB)
1158 return (PF_PASS);
1159 else {
1160 r->packets[dir == PF_OUT]++;
1161 r->bytes[dir == PF_OUT] += pd->tot_len;
1162 }
1163
1164 /* Check for illegal packets */
1165 if (sizeof(struct ip6_hdr) + IPV6_MAXPACKET < m->m_pkthdr.len)
1166 goto drop;
1167
1168 off = sizeof(struct ip6_hdr);
1169 proto = h->ip6_nxt;
1170 terminal = 0;
1171 do {
1172 switch (proto) {
1173 case IPPROTO_FRAGMENT:
1174 goto fragment;
1175 break;
1176 case IPPROTO_AH:
1177 case IPPROTO_ROUTING:
1178 case IPPROTO_DSTOPTS:
1179 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1180 NULL, AF_INET6))
1181 goto shortpkt;
1182 if (proto == IPPROTO_AH)
1183 off += (ext.ip6e_len + 2) * 4;
1184 else
1185 off += (ext.ip6e_len + 1) * 8;
1186 proto = ext.ip6e_nxt;
1187 break;
1188 case IPPROTO_HOPOPTS:
1189 if (!pf_pull_hdr(m, off, &ext, sizeof(ext), NULL,
1190 NULL, AF_INET6))
1191 goto shortpkt;
1192 optend = off + (ext.ip6e_len + 1) * 8;
1193 ooff = off + sizeof(ext);
1194 do {
1195 if (!pf_pull_hdr(m, ooff, &opt.ip6o_type,
1196 sizeof(opt.ip6o_type), NULL, NULL,
1197 AF_INET6))
1198 goto shortpkt;
1199 if (opt.ip6o_type == IP6OPT_PAD1) {
1200 ooff++;
1201 continue;
1202 }
1203 if (!pf_pull_hdr(m, ooff, &opt, sizeof(opt),
1204 NULL, NULL, AF_INET6))
1205 goto shortpkt;
1206 if (ooff + sizeof(opt) + opt.ip6o_len > optend)
1207 goto drop;
1208 switch (opt.ip6o_type) {
1209 case IP6OPT_JUMBO:
1210 if (h->ip6_plen != 0)
1211 goto drop;
1212 if (!pf_pull_hdr(m, ooff, &jumbo,
1213 sizeof(jumbo), NULL, NULL,
1214 AF_INET6))
1215 goto shortpkt;
1216 memcpy(&jumbolen, jumbo.ip6oj_jumbo_len,
1217 sizeof(jumbolen));
1218 jumbolen = ntohl(jumbolen);
1219 if (jumbolen <= IPV6_MAXPACKET)
1220 goto drop;
1221 if (sizeof(struct ip6_hdr) + jumbolen !=
1222 m->m_pkthdr.len)
1223 goto drop;
1224 break;
1225 default:
1226 break;
1227 }
1228 ooff += sizeof(opt) + opt.ip6o_len;
1229 } while (ooff < optend);
1230
1231 off = optend;
1232 proto = ext.ip6e_nxt;
1233 break;
1234 default:
1235 terminal = 1;
1236 break;
1237 }
1238 } while (!terminal);
1239
1240 /* jumbo payload option must be present, or plen > 0 */
1241 if (ntohs(h->ip6_plen) == 0)
1242 plen = jumbolen;
1243 else
1244 plen = ntohs(h->ip6_plen);
1245 if (plen == 0)
1246 goto drop;
1247 if (sizeof(struct ip6_hdr) + plen > m->m_pkthdr.len)
1248 goto shortpkt;
1249
1250 /* Enforce a minimum ttl, may cause endless packet loops */
1251 if (r->min_ttl && h->ip6_hlim < r->min_ttl)
1252 h->ip6_hlim = r->min_ttl;
1253
1254 return (PF_PASS);
1255
1256 fragment:
1257 if (ntohs(h->ip6_plen) == 0 || jumbolen)
1258 goto drop;
1259 plen = ntohs(h->ip6_plen);
1260
1261 if (!pf_pull_hdr(m, off, &frag, sizeof(frag), NULL, NULL, AF_INET6))
1262 goto shortpkt;
1263 fragoff = ntohs(frag.ip6f_offlg & IP6F_OFF_MASK);
1264 if (fragoff + (plen - off - sizeof(frag)) > IPV6_MAXPACKET)
1265 goto badfrag;
1266
1267 /* do something about it */
1268 /* remember to set pd->flags |= PFDESC_IP_REAS */
1269 return (PF_PASS);
1270
1271 shortpkt:
1272 REASON_SET(reason, PFRES_SHORT);
1273 if (r != NULL && r->log)
1274 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1275 return (PF_DROP);
1276
1277 drop:
1278 REASON_SET(reason, PFRES_NORM);
1279 if (r != NULL && r->log)
1280 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1281 return (PF_DROP);
1282
1283 badfrag:
1284 REASON_SET(reason, PFRES_FRAG);
1285 if (r != NULL && r->log)
1286 PFLOG_PACKET(kif, h, m, AF_INET6, dir, *reason, r, NULL, NULL, pd);
1287 return (PF_DROP);
1288 }
1289 #endif /* INET6 */
1290
1291 int
pf_normalize_tcp(int dir,struct pfi_kif * kif,struct mbuf * m,int ipoff,int off,void * h,struct pf_pdesc * pd)1292 pf_normalize_tcp(int dir, struct pfi_kif *kif, struct mbuf *m, int ipoff,
1293 int off, void *h, struct pf_pdesc *pd)
1294 {
1295 struct pf_rule *r, *rm = NULL;
1296 struct tcphdr *th = pd->hdr.tcp;
1297 int rewrite = 0;
1298 u_short reason;
1299 u_int8_t flags;
1300 sa_family_t af = pd->af;
1301
1302 r = TAILQ_FIRST(pf_main_ruleset.rules[PF_RULESET_SCRUB].active.ptr);
1303 while (r != NULL) {
1304 r->evaluations++;
1305 if (pfi_kif_match(r->kif, kif) == r->ifnot)
1306 r = r->skip[PF_SKIP_IFP].ptr;
1307 else if (r->direction && r->direction != dir)
1308 r = r->skip[PF_SKIP_DIR].ptr;
1309 else if (r->af && r->af != af)
1310 r = r->skip[PF_SKIP_AF].ptr;
1311 else if (r->proto && r->proto != pd->proto)
1312 r = r->skip[PF_SKIP_PROTO].ptr;
1313 else if (PF_MISMATCHAW(&r->src.addr, pd->src, af,
1314 r->src.neg, kif))
1315 r = r->skip[PF_SKIP_SRC_ADDR].ptr;
1316 else if (r->src.port_op && !pf_match_port(r->src.port_op,
1317 r->src.port[0], r->src.port[1], th->th_sport))
1318 r = r->skip[PF_SKIP_SRC_PORT].ptr;
1319 else if (PF_MISMATCHAW(&r->dst.addr, pd->dst, af,
1320 r->dst.neg, NULL))
1321 r = r->skip[PF_SKIP_DST_ADDR].ptr;
1322 else if (r->dst.port_op && !pf_match_port(r->dst.port_op,
1323 r->dst.port[0], r->dst.port[1], th->th_dport))
1324 r = r->skip[PF_SKIP_DST_PORT].ptr;
1325 else if (r->os_fingerprint != PF_OSFP_ANY && !pf_osfp_match(
1326 pf_osfp_fingerprint(pd, m, off, th),
1327 r->os_fingerprint))
1328 r = TAILQ_NEXT(r, entries);
1329 else {
1330 rm = r;
1331 break;
1332 }
1333 }
1334
1335 if (rm == NULL || rm->action == PF_NOSCRUB)
1336 return (PF_PASS);
1337 else {
1338 r->packets[dir == PF_OUT]++;
1339 r->bytes[dir == PF_OUT] += pd->tot_len;
1340 }
1341
1342 if (rm->rule_flag & PFRULE_REASSEMBLE_TCP)
1343 pd->flags |= PFDESC_TCP_NORM;
1344
1345 flags = th->th_flags;
1346 if (flags & TH_SYN) {
1347 /* Illegal packet */
1348 if (flags & TH_RST)
1349 goto tcp_drop;
1350
1351 if (flags & TH_FIN)
1352 flags &= ~TH_FIN;
1353 } else {
1354 /* Illegal packet */
1355 if (!(flags & (TH_ACK|TH_RST)))
1356 goto tcp_drop;
1357 }
1358
1359 if (!(flags & TH_ACK)) {
1360 /* These flags are only valid if ACK is set */
1361 if ((flags & TH_FIN) || (flags & TH_PUSH) || (flags & TH_URG))
1362 goto tcp_drop;
1363 }
1364
1365 /* Check for illegal header length */
1366 if (th->th_off < (sizeof(struct tcphdr) >> 2))
1367 goto tcp_drop;
1368
1369 /* If flags changed, or reserved data set, then adjust */
1370 if (flags != th->th_flags || th->th_x2 != 0) {
1371 u_int16_t ov, nv;
1372
1373 ov = *(u_int16_t *)(&th->th_ack + 1);
1374 th->th_flags = flags;
1375 th->th_x2 = 0;
1376 nv = *(u_int16_t *)(&th->th_ack + 1);
1377
1378 th->th_sum = pf_cksum_fixup(th->th_sum, ov, nv, 0);
1379 rewrite = 1;
1380 }
1381
1382 /* Remove urgent pointer, if TH_URG is not set */
1383 if (!(flags & TH_URG) && th->th_urp) {
1384 th->th_sum = pf_cksum_fixup(th->th_sum, th->th_urp, 0, 0);
1385 th->th_urp = 0;
1386 rewrite = 1;
1387 }
1388
1389 /* Process options */
1390 if (r->max_mss && pf_normalize_tcpopt(r, m, th, off, pd->af))
1391 rewrite = 1;
1392
1393 /* copy back packet headers if we sanitized */
1394 if (rewrite)
1395 m_copyback(m, off, sizeof(*th), th);
1396
1397 return (PF_PASS);
1398
1399 tcp_drop:
1400 REASON_SET(&reason, PFRES_NORM);
1401 if (rm != NULL && r->log)
1402 PFLOG_PACKET(kif, h, m, AF_INET, dir, reason, r, NULL, NULL, pd);
1403 return (PF_DROP);
1404 }
1405
1406 int
pf_normalize_tcp_init(struct mbuf * m,int off,struct pf_pdesc * pd,struct tcphdr * th,struct pf_state_peer * src,struct pf_state_peer * dst)1407 pf_normalize_tcp_init(struct mbuf *m, int off, struct pf_pdesc *pd,
1408 struct tcphdr *th, struct pf_state_peer *src, struct pf_state_peer *dst)
1409 {
1410 u_int32_t tsval, tsecr;
1411 u_int8_t hdr[60];
1412 u_int8_t *opt;
1413
1414 KASSERT((src->scrub == NULL),
1415 ("pf_normalize_tcp_init: src->scrub != NULL"));
1416
1417 src->scrub = kmalloc(sizeof(struct pf_state_scrub), M_PFSTATESCRUBPL,
1418 M_NOWAIT | M_ZERO);
1419 if (src->scrub == NULL)
1420 return (1);
1421
1422 switch (pd->af) {
1423 #ifdef INET
1424 case AF_INET: {
1425 struct ip *h = mtod(m, struct ip *);
1426 src->scrub->pfss_ttl = h->ip_ttl;
1427 break;
1428 }
1429 #endif /* INET */
1430 #ifdef INET6
1431 case AF_INET6: {
1432 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1433 src->scrub->pfss_ttl = h->ip6_hlim;
1434 break;
1435 }
1436 #endif /* INET6 */
1437 }
1438
1439
1440 /*
1441 * All normalizations below are only begun if we see the start of
1442 * the connections. They must all set an enabled bit in pfss_flags
1443 */
1444 if ((th->th_flags & TH_SYN) == 0)
1445 return (0);
1446
1447
1448 if (th->th_off > (sizeof(struct tcphdr) >> 2) && src->scrub &&
1449 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1450 /* Diddle with TCP options */
1451 int hlen;
1452 opt = hdr + sizeof(struct tcphdr);
1453 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1454 while (hlen >= TCPOLEN_TIMESTAMP) {
1455 switch (*opt) {
1456 case TCPOPT_EOL: /* FALLTHROUGH */
1457 case TCPOPT_NOP:
1458 opt++;
1459 hlen--;
1460 break;
1461 case TCPOPT_TIMESTAMP:
1462 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1463 src->scrub->pfss_flags |=
1464 PFSS_TIMESTAMP;
1465 src->scrub->pfss_ts_mod = karc4random();
1466
1467 /* note PFSS_PAWS not set yet */
1468 memcpy(&tsval, &opt[2],
1469 sizeof(u_int32_t));
1470 memcpy(&tsecr, &opt[6],
1471 sizeof(u_int32_t));
1472 src->scrub->pfss_tsval0 = ntohl(tsval);
1473 src->scrub->pfss_tsval = ntohl(tsval);
1474 src->scrub->pfss_tsecr = ntohl(tsecr);
1475 getmicrouptime(&src->scrub->pfss_last);
1476 }
1477 /* FALLTHROUGH */
1478 default:
1479 hlen -= MAX(opt[1], 2);
1480 opt += MAX(opt[1], 2);
1481 break;
1482 }
1483 }
1484 }
1485
1486 return (0);
1487 }
1488
1489 void
pf_normalize_tcp_cleanup(struct pf_state * state)1490 pf_normalize_tcp_cleanup(struct pf_state *state)
1491 {
1492 if (state->src.scrub)
1493 kfree(state->src.scrub, M_PFSTATESCRUBPL);
1494 if (state->dst.scrub)
1495 kfree(state->dst.scrub, M_PFSTATESCRUBPL);
1496
1497 /* Someday... flush the TCP segment reassembly descriptors. */
1498 }
1499
1500 int
pf_normalize_tcp_stateful(struct mbuf * m,int off,struct pf_pdesc * pd,u_short * reason,struct tcphdr * th,struct pf_state * state,struct pf_state_peer * src,struct pf_state_peer * dst,int * writeback)1501 pf_normalize_tcp_stateful(struct mbuf *m, int off, struct pf_pdesc *pd,
1502 u_short *reason, struct tcphdr *th, struct pf_state *state,
1503 struct pf_state_peer *src, struct pf_state_peer *dst, int *writeback)
1504 {
1505 struct timeval uptime;
1506 u_int32_t tsval, tsecr;
1507 u_int tsval_from_last;
1508 u_int8_t hdr[60];
1509 u_int8_t *opt;
1510 int copyback = 0;
1511 int got_ts = 0;
1512
1513 KASSERT((src->scrub || dst->scrub),
1514 ("pf_normalize_tcp_statefull: src->scrub && dst->scrub!"));
1515
1516 tsval = 0; /* avoid gcc complaint */
1517 tsecr = 0; /* avoid gcc complaint */
1518
1519 /*
1520 * Enforce the minimum TTL seen for this connection. Negate a common
1521 * technique to evade an intrusion detection system and confuse
1522 * firewall state code.
1523 */
1524 switch (pd->af) {
1525 #ifdef INET
1526 case AF_INET: {
1527 if (src->scrub) {
1528 struct ip *h = mtod(m, struct ip *);
1529 if (h->ip_ttl > src->scrub->pfss_ttl)
1530 src->scrub->pfss_ttl = h->ip_ttl;
1531 h->ip_ttl = src->scrub->pfss_ttl;
1532 }
1533 break;
1534 }
1535 #endif /* INET */
1536 #ifdef INET6
1537 case AF_INET6: {
1538 if (src->scrub) {
1539 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
1540 if (h->ip6_hlim > src->scrub->pfss_ttl)
1541 src->scrub->pfss_ttl = h->ip6_hlim;
1542 h->ip6_hlim = src->scrub->pfss_ttl;
1543 }
1544 break;
1545 }
1546 #endif /* INET6 */
1547 }
1548
1549 if (th->th_off > (sizeof(struct tcphdr) >> 2) &&
1550 ((src->scrub && (src->scrub->pfss_flags & PFSS_TIMESTAMP)) ||
1551 (dst->scrub && (dst->scrub->pfss_flags & PFSS_TIMESTAMP))) &&
1552 pf_pull_hdr(m, off, hdr, th->th_off << 2, NULL, NULL, pd->af)) {
1553 /* Diddle with TCP options */
1554 int hlen;
1555 opt = hdr + sizeof(struct tcphdr);
1556 hlen = (th->th_off << 2) - sizeof(struct tcphdr);
1557 while (hlen >= TCPOLEN_TIMESTAMP) {
1558 switch (*opt) {
1559 case TCPOPT_EOL: /* FALLTHROUGH */
1560 case TCPOPT_NOP:
1561 opt++;
1562 hlen--;
1563 break;
1564 case TCPOPT_TIMESTAMP:
1565 /* Modulate the timestamps. Can be used for
1566 * NAT detection, OS uptime determination or
1567 * reboot detection.
1568 */
1569
1570 if (got_ts) {
1571 /* Huh? Multiple timestamps!? */
1572 if (pf_status.debug >= PF_DEBUG_MISC) {
1573 DPFPRINTF(("multiple TS??"));
1574 pf_print_state(state);
1575 kprintf("\n");
1576 }
1577 REASON_SET(reason, PFRES_TS);
1578 return (PF_DROP);
1579 }
1580 if (opt[1] >= TCPOLEN_TIMESTAMP) {
1581 memcpy(&tsval, &opt[2],
1582 sizeof(u_int32_t));
1583 if (tsval && src->scrub &&
1584 (src->scrub->pfss_flags &
1585 PFSS_TIMESTAMP)) {
1586 tsval = ntohl(tsval);
1587 pf_change_a(&opt[2],
1588 &th->th_sum,
1589 htonl(tsval +
1590 src->scrub->pfss_ts_mod),
1591 0);
1592 copyback = 1;
1593 }
1594
1595 /* Modulate TS reply iff valid (!0) */
1596 memcpy(&tsecr, &opt[6],
1597 sizeof(u_int32_t));
1598 if (tsecr && dst->scrub &&
1599 (dst->scrub->pfss_flags &
1600 PFSS_TIMESTAMP)) {
1601 tsecr = ntohl(tsecr)
1602 - dst->scrub->pfss_ts_mod;
1603 pf_change_a(&opt[6],
1604 &th->th_sum, htonl(tsecr),
1605 0);
1606 copyback = 1;
1607 }
1608 got_ts = 1;
1609 }
1610 /* FALLTHROUGH */
1611 default:
1612 hlen -= MAX(opt[1], 2);
1613 opt += MAX(opt[1], 2);
1614 break;
1615 }
1616 }
1617 if (copyback) {
1618 /* Copyback the options, caller copys back header */
1619 *writeback = 1;
1620 m_copyback(m, off + sizeof(struct tcphdr),
1621 (th->th_off << 2) - sizeof(struct tcphdr),
1622 hdr + sizeof(struct tcphdr));
1623 }
1624 }
1625
1626
1627 /*
1628 * Must invalidate PAWS checks on connections idle for too long.
1629 * The fastest allowed timestamp clock is 1ms. That turns out to
1630 * be about 24 days before it wraps. XXX Right now our lowerbound
1631 * TS echo check only works for the first 12 days of a connection
1632 * when the TS has exhausted half its 32bit space
1633 */
1634 #define TS_MAX_IDLE (24*24*60*60)
1635 #define TS_MAX_CONN (12*24*60*60) /* XXX remove when better tsecr check */
1636
1637 getmicrouptime(&uptime);
1638 if (src->scrub && (src->scrub->pfss_flags & PFSS_PAWS) &&
1639 (uptime.tv_sec - src->scrub->pfss_last.tv_sec > TS_MAX_IDLE ||
1640 time_second - state->creation > TS_MAX_CONN)) {
1641 if (pf_status.debug >= PF_DEBUG_MISC) {
1642 DPFPRINTF(("src idled out of PAWS\n"));
1643 pf_print_state(state);
1644 kprintf("\n");
1645 }
1646 src->scrub->pfss_flags = (src->scrub->pfss_flags & ~PFSS_PAWS)
1647 | PFSS_PAWS_IDLED;
1648 }
1649 if (dst->scrub && (dst->scrub->pfss_flags & PFSS_PAWS) &&
1650 uptime.tv_sec - dst->scrub->pfss_last.tv_sec > TS_MAX_IDLE) {
1651 if (pf_status.debug >= PF_DEBUG_MISC) {
1652 DPFPRINTF(("dst idled out of PAWS\n"));
1653 pf_print_state(state);
1654 kprintf("\n");
1655 }
1656 dst->scrub->pfss_flags = (dst->scrub->pfss_flags & ~PFSS_PAWS)
1657 | PFSS_PAWS_IDLED;
1658 }
1659
1660 if (got_ts && src->scrub && dst->scrub &&
1661 (src->scrub->pfss_flags & PFSS_PAWS) &&
1662 (dst->scrub->pfss_flags & PFSS_PAWS)) {
1663 /* Validate that the timestamps are "in-window".
1664 * RFC1323 describes TCP Timestamp options that allow
1665 * measurement of RTT (round trip time) and PAWS
1666 * (protection against wrapped sequence numbers). PAWS
1667 * gives us a set of rules for rejecting packets on
1668 * long fat pipes (packets that were somehow delayed
1669 * in transit longer than the time it took to send the
1670 * full TCP sequence space of 4Gb). We can use these
1671 * rules and infer a few others that will let us treat
1672 * the 32bit timestamp and the 32bit echoed timestamp
1673 * as sequence numbers to prevent a blind attacker from
1674 * inserting packets into a connection.
1675 *
1676 * RFC1323 tells us:
1677 * - The timestamp on this packet must be greater than
1678 * or equal to the last value echoed by the other
1679 * endpoint. The RFC says those will be discarded
1680 * since it is a dup that has already been acked.
1681 * This gives us a lowerbound on the timestamp.
1682 * timestamp >= other last echoed timestamp
1683 * - The timestamp will be less than or equal to
1684 * the last timestamp plus the time between the
1685 * last packet and now. The RFC defines the max
1686 * clock rate as 1ms. We will allow clocks to be
1687 * up to 10% fast and will allow a total difference
1688 * or 30 seconds due to a route change. And this
1689 * gives us an upperbound on the timestamp.
1690 * timestamp <= last timestamp + max ticks
1691 * We have to be careful here. Windows will send an
1692 * initial timestamp of zero and then initialize it
1693 * to a random value after the 3whs; presumably to
1694 * avoid a DoS by having to call an expensive RNG
1695 * during a SYN flood. Proof MS has at least one
1696 * good security geek.
1697 *
1698 * - The TCP timestamp option must also echo the other
1699 * endpoints timestamp. The timestamp echoed is the
1700 * one carried on the earliest unacknowledged segment
1701 * on the left edge of the sequence window. The RFC
1702 * states that the host will reject any echoed
1703 * timestamps that were larger than any ever sent.
1704 * This gives us an upperbound on the TS echo.
1705 * tescr <= largest_tsval
1706 * - The lowerbound on the TS echo is a little more
1707 * tricky to determine. The other endpoint's echoed
1708 * values will not decrease. But there may be
1709 * network conditions that re-order packets and
1710 * cause our view of them to decrease. For now the
1711 * only lowerbound we can safely determine is that
1712 * the TS echo will never be less than the original
1713 * TS. XXX There is probably a better lowerbound.
1714 * Remove TS_MAX_CONN with better lowerbound check.
1715 * tescr >= other original TS
1716 *
1717 * It is also important to note that the fastest
1718 * timestamp clock of 1ms will wrap its 32bit space in
1719 * 24 days. So we just disable TS checking after 24
1720 * days of idle time. We actually must use a 12d
1721 * connection limit until we can come up with a better
1722 * lowerbound to the TS echo check.
1723 */
1724 struct timeval delta_ts;
1725 int ts_fudge;
1726
1727
1728 /*
1729 * PFTM_TS_DIFF is how many seconds of leeway to allow
1730 * a host's timestamp. This can happen if the previous
1731 * packet got delayed in transit for much longer than
1732 * this packet.
1733 */
1734 if ((ts_fudge = state->rule.ptr->timeout[PFTM_TS_DIFF]) == 0)
1735 ts_fudge = pf_default_rule.timeout[PFTM_TS_DIFF];
1736
1737
1738 /* Calculate max ticks since the last timestamp */
1739 #define TS_MAXFREQ 1100 /* RFC max TS freq of 1Khz + 10% skew */
1740 #define TS_MICROSECS 1000000 /* microseconds per second */
1741 #ifndef timersub
1742 #define timersub(tvp, uvp, vvp) \
1743 do { \
1744 (vvp)->tv_sec = (tvp)->tv_sec - (uvp)->tv_sec; \
1745 (vvp)->tv_usec = (tvp)->tv_usec - (uvp)->tv_usec; \
1746 if ((vvp)->tv_usec < 0) { \
1747 (vvp)->tv_sec--; \
1748 (vvp)->tv_usec += 1000000; \
1749 } \
1750 } while (0)
1751 #endif
1752
1753 timersub(&uptime, &src->scrub->pfss_last, &delta_ts);
1754 tsval_from_last = (delta_ts.tv_sec + ts_fudge) * TS_MAXFREQ;
1755 tsval_from_last += delta_ts.tv_usec / (TS_MICROSECS/TS_MAXFREQ);
1756
1757
1758 if ((src->state >= TCPS_ESTABLISHED &&
1759 dst->state >= TCPS_ESTABLISHED) &&
1760 (SEQ_LT(tsval, dst->scrub->pfss_tsecr) ||
1761 SEQ_GT(tsval, src->scrub->pfss_tsval + tsval_from_last) ||
1762 (tsecr && (SEQ_GT(tsecr, dst->scrub->pfss_tsval) ||
1763 SEQ_LT(tsecr, dst->scrub->pfss_tsval0))))) {
1764 /* Bad RFC1323 implementation or an insertion attack.
1765 *
1766 * - Solaris 2.6 and 2.7 are known to send another ACK
1767 * after the FIN,FIN|ACK,ACK closing that carries
1768 * an old timestamp.
1769 */
1770
1771 DPFPRINTF(("Timestamp failed %c%c%c%c\n",
1772 SEQ_LT(tsval, dst->scrub->pfss_tsecr) ? '0' : ' ',
1773 SEQ_GT(tsval, src->scrub->pfss_tsval +
1774 tsval_from_last) ? '1' : ' ',
1775 SEQ_GT(tsecr, dst->scrub->pfss_tsval) ? '2' : ' ',
1776 SEQ_LT(tsecr, dst->scrub->pfss_tsval0)? '3' : ' '));
1777 DPFPRINTF((" tsval: %u tsecr: %u +ticks: %u "
1778 "idle: %lus %lums\n",
1779 tsval, tsecr, tsval_from_last, delta_ts.tv_sec,
1780 delta_ts.tv_usec / 1000));
1781 DPFPRINTF((" src->tsval: %u tsecr: %u\n",
1782 src->scrub->pfss_tsval, src->scrub->pfss_tsecr));
1783 DPFPRINTF((" dst->tsval: %u tsecr: %u tsval0: %u"
1784 "\n", dst->scrub->pfss_tsval,
1785 dst->scrub->pfss_tsecr, dst->scrub->pfss_tsval0));
1786 if (pf_status.debug >= PF_DEBUG_MISC) {
1787 pf_print_state(state);
1788 pf_print_flags(th->th_flags);
1789 kprintf("\n");
1790 }
1791 REASON_SET(reason, PFRES_TS);
1792 return (PF_DROP);
1793 }
1794
1795 /* XXX I'd really like to require tsecr but it's optional */
1796
1797 } else if (!got_ts && (th->th_flags & TH_RST) == 0 &&
1798 ((src->state == TCPS_ESTABLISHED && dst->state == TCPS_ESTABLISHED)
1799 || pd->p_len > 0 || (th->th_flags & TH_SYN)) &&
1800 src->scrub && dst->scrub &&
1801 (src->scrub->pfss_flags & PFSS_PAWS) &&
1802 (dst->scrub->pfss_flags & PFSS_PAWS)) {
1803 /* Didn't send a timestamp. Timestamps aren't really useful
1804 * when:
1805 * - connection opening or closing (often not even sent).
1806 * but we must not let an attacker to put a FIN on a
1807 * data packet to sneak it through our ESTABLISHED check.
1808 * - on a TCP reset. RFC suggests not even looking at TS.
1809 * - on an empty ACK. The TS will not be echoed so it will
1810 * probably not help keep the RTT calculation in sync and
1811 * there isn't as much danger when the sequence numbers
1812 * got wrapped. So some stacks don't include TS on empty
1813 * ACKs :-(
1814 *
1815 * To minimize the disruption to mostly RFC1323 conformant
1816 * stacks, we will only require timestamps on data packets.
1817 *
1818 * And what do ya know, we cannot require timestamps on data
1819 * packets. There appear to be devices that do legitimate
1820 * TCP connection hijacking. There are HTTP devices that allow
1821 * a 3whs (with timestamps) and then buffer the HTTP request.
1822 * If the intermediate device has the HTTP response cache, it
1823 * will spoof the response but not bother timestamping its
1824 * packets. So we can look for the presence of a timestamp in
1825 * the first data packet and if there, require it in all future
1826 * packets.
1827 */
1828
1829 if (pd->p_len > 0 && (src->scrub->pfss_flags & PFSS_DATA_TS)) {
1830 /*
1831 * Hey! Someone tried to sneak a packet in. Or the
1832 * stack changed its RFC1323 behavior?!?!
1833 */
1834 if (pf_status.debug >= PF_DEBUG_MISC) {
1835 DPFPRINTF(("Did not receive expected RFC1323 "
1836 "timestamp\n"));
1837 pf_print_state(state);
1838 pf_print_flags(th->th_flags);
1839 kprintf("\n");
1840 }
1841 REASON_SET(reason, PFRES_TS);
1842 return (PF_DROP);
1843 }
1844 }
1845
1846
1847 /*
1848 * We will note if a host sends his data packets with or without
1849 * timestamps. And require all data packets to contain a timestamp
1850 * if the first does. PAWS implicitly requires that all data packets be
1851 * timestamped. But I think there are middle-man devices that hijack
1852 * TCP streams immediately after the 3whs and don't timestamp their
1853 * packets (seen in a WWW accelerator or cache).
1854 */
1855 if (pd->p_len > 0 && src->scrub && (src->scrub->pfss_flags &
1856 (PFSS_TIMESTAMP|PFSS_DATA_TS|PFSS_DATA_NOTS)) == PFSS_TIMESTAMP) {
1857 if (got_ts)
1858 src->scrub->pfss_flags |= PFSS_DATA_TS;
1859 else {
1860 src->scrub->pfss_flags |= PFSS_DATA_NOTS;
1861 if (pf_status.debug >= PF_DEBUG_MISC && dst->scrub &&
1862 (dst->scrub->pfss_flags & PFSS_TIMESTAMP)) {
1863 /* Don't warn if other host rejected RFC1323 */
1864 DPFPRINTF(("Broken RFC1323 stack did not "
1865 "timestamp data packet. Disabled PAWS "
1866 "security.\n"));
1867 pf_print_state(state);
1868 pf_print_flags(th->th_flags);
1869 kprintf("\n");
1870 }
1871 }
1872 }
1873
1874
1875 /*
1876 * Update PAWS values
1877 */
1878 if (got_ts && src->scrub && PFSS_TIMESTAMP == (src->scrub->pfss_flags &
1879 (PFSS_PAWS_IDLED|PFSS_TIMESTAMP))) {
1880 getmicrouptime(&src->scrub->pfss_last);
1881 if (SEQ_GEQ(tsval, src->scrub->pfss_tsval) ||
1882 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1883 src->scrub->pfss_tsval = tsval;
1884
1885 if (tsecr) {
1886 if (SEQ_GEQ(tsecr, src->scrub->pfss_tsecr) ||
1887 (src->scrub->pfss_flags & PFSS_PAWS) == 0)
1888 src->scrub->pfss_tsecr = tsecr;
1889
1890 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0 &&
1891 (SEQ_LT(tsval, src->scrub->pfss_tsval0) ||
1892 src->scrub->pfss_tsval0 == 0)) {
1893 /* tsval0 MUST be the lowest timestamp */
1894 src->scrub->pfss_tsval0 = tsval;
1895 }
1896
1897 /* Only fully initialized after a TS gets echoed */
1898 if ((src->scrub->pfss_flags & PFSS_PAWS) == 0)
1899 src->scrub->pfss_flags |= PFSS_PAWS;
1900 }
1901 }
1902
1903 /* I have a dream.... TCP segment reassembly.... */
1904 return (0);
1905 }
1906
1907 int
pf_normalize_tcpopt(struct pf_rule * r,struct mbuf * m,struct tcphdr * th,int off,sa_family_t af)1908 pf_normalize_tcpopt(struct pf_rule *r, struct mbuf *m, struct tcphdr *th,
1909 int off, sa_family_t af)
1910 {
1911 u_int16_t *mss;
1912 int thoff;
1913 int opt, cnt, optlen = 0;
1914 int rewrite = 0;
1915 u_char opts[TCP_MAXOLEN];
1916 u_char *optp = opts;
1917
1918 thoff = th->th_off << 2;
1919 cnt = thoff - sizeof(struct tcphdr);
1920
1921 if (cnt > 0 && !pf_pull_hdr(m, off + sizeof(*th), opts, cnt,
1922 NULL, NULL, af))
1923 return (rewrite);
1924
1925 for (; cnt > 0; cnt -= optlen, optp += optlen) {
1926 opt = optp[0];
1927 if (opt == TCPOPT_EOL)
1928 break;
1929 if (opt == TCPOPT_NOP)
1930 optlen = 1;
1931 else {
1932 if (cnt < 2)
1933 break;
1934 optlen = optp[1];
1935 if (optlen < 2 || optlen > cnt)
1936 break;
1937 }
1938 switch (opt) {
1939 case TCPOPT_MAXSEG:
1940 mss = (u_int16_t *)(optp + 2);
1941 if ((ntohs(*mss)) > r->max_mss) {
1942 th->th_sum = pf_cksum_fixup(th->th_sum,
1943 *mss, htons(r->max_mss), 0);
1944 *mss = htons(r->max_mss);
1945 rewrite = 1;
1946 }
1947 break;
1948 default:
1949 break;
1950 }
1951 }
1952
1953 if (rewrite)
1954 m_copyback(m, off + sizeof(*th), thoff - sizeof(*th), opts);
1955
1956 return (rewrite);
1957 }
1958