1 /*
2 * Copyright (c) 2008-2020, OARC, Inc.
3 * Copyright (c) 2007-2008, Internet Systems Consortium, Inc.
4 * Copyright (c) 2003-2007, The Measurement Factory, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
17 * distribution.
18 *
19 * 3. Neither the name of the copyright holder nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27 * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 * POSSIBILITY OF SUCH DAMAGE.
35 */
36
37 #include "config.h"
38
39 #include "pcap.h"
40 #include "xmalloc.h"
41 #include "syslog_debug.h"
42 #include "hashtbl.h"
43 #include "pcap_layers/byteorder.h"
44 #include "pcap_layers/pcap_layers.h"
45 #include "dns_protocol.h"
46 #include "pcap-thread/pcap_thread.h"
47 #include "compat.h"
48
49 #include <sys/stat.h>
50 #include <string.h>
51 #include <assert.h>
52 #include <errno.h>
53 #include <stdlib.h>
54
55 #define PCAP_SNAPLEN 65536
56 #ifndef ETHER_HDR_LEN
57 #define ETHER_ADDR_LEN 6
58 #define ETHER_TYPE_LEN 2
59 #define ETHER_HDR_LEN (ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN)
60 #endif
61 #ifndef ETHERTYPE_8021Q
62 #define ETHERTYPE_8021Q 0x8100
63 #endif
64
65 #ifdef __OpenBSD__
66 #define assign_timeval(A, B) \
67 A.tv_sec = B.tv_sec; \
68 A.tv_usec = B.tv_usec
69 #else
70 #define assign_timeval(A, B) A = B
71 #endif
72
73 /* We might need to define ETHERTYPE_IPV6 */
74 #ifndef ETHERTYPE_IPV6
75 #define ETHERTYPE_IPV6 0x86dd
76 #endif
77
78 #ifdef __GLIBC__
79 #define uh_dport dest
80 #define uh_sport source
81 #define th_off doff
82 #define th_dport dest
83 #define th_sport source
84 #define th_seq seq
85 #define TCPFLAGFIN(a) (a)->fin
86 #define TCPFLAGSYN(a) (a)->syn
87 #define TCPFLAGRST(a) (a)->rst
88 #else
89 #define TCPFLAGSYN(a) ((a)->th_flags & TH_SYN)
90 #define TCPFLAGFIN(a) ((a)->th_flags & TH_FIN)
91 #define TCPFLAGRST(a) ((a)->th_flags & TH_RST)
92 #endif
93
94 #ifndef IP_OFFMASK
95 #define IP_OFFMASK 0x1fff
96 #endif
97
98 struct _interface {
99 char* device;
100 struct pcap_stat ps0, ps1;
101 unsigned int pkts_captured;
102 };
103
104 #define MAX_N_INTERFACES 10
105 static int n_interfaces = 0;
106 static struct _interface* interfaces = NULL;
107 unsigned short port53 = 53;
108 pcap_thread_t pcap_thread = PCAP_THREAD_T_INIT;
109
110 int n_pcap_offline = 0; /* global so daemon.c can use it */
111 char* bpf_program_str = NULL;
112 int vlan_tag_needs_byte_conversion = 1;
113
114 #if 0
115 static int debug_count = 20;
116 #endif
117 struct timeval last_ts;
118 static struct timeval start_ts;
119 static struct timeval finish_ts;
120 #define MAX_VLAN_IDS 100
121 static int n_vlan_ids = 0;
122 static int vlan_ids[MAX_VLAN_IDS];
123 static hashtbl* tcpHash;
124
125 static int
pcap_udp_handler(const struct udphdr * udp,int len,void * udata)126 pcap_udp_handler(const struct udphdr* udp, int len, void* udata)
127 {
128 transport_message* tm = udata;
129 tm->src_port = nptohs(&udp->uh_sport);
130 tm->dst_port = nptohs(&udp->uh_dport);
131 tm->proto = IPPROTO_UDP;
132 if (port53 != tm->dst_port && port53 != tm->src_port)
133 return 1;
134 return 0;
135 }
136
137 #define MAX_DNS_LENGTH 0xFFFF
138
139 #define MAX_TCP_WINDOW_SIZE (0xFFFF << 14)
140 #define MAX_TCP_STATE 65535
141 #define MAX_TCP_IDLE 60 /* tcpstate is tossed if idle for this many seconds */
142 #define MAX_FRAG_IDLE 60 /* keep fragments in pcap_layers for this many seconds */
143
144 /* These numbers define the sizes of small arrays which are simpler to work
145 * with than dynamically allocated lists. */
146 #define MAX_TCP_MSGS 8 /* messages being reassembled (per connection) */
147 #define MAX_TCP_SEGS 8 /* segments not assigned to a message (per connection) */
148 #define MAX_TCP_HOLES 8 /* holes in a msg buf (per message) */
149
150 typedef struct
151 {
152 inX_addr src_ip_addr;
153 inX_addr dst_ip_addr;
154 uint16_t dport;
155 uint16_t sport;
156 } tcpHashkey_t;
157
158 /* Description of hole in tcp reassembly buffer. */
159 typedef struct
160 {
161 uint16_t start; /* start of hole, measured from beginning of msgbuf->buf */
162 uint16_t len; /* length of hole (0 == unused) */
163 } tcphole_t;
164
165 /* TCP message reassembly buffer */
166 typedef struct
167 {
168 uint32_t seq; /* seq# of first byte of header of this DNS msg */
169 uint16_t dnslen; /* length of dns message, and size of buf */
170 tcphole_t hole[MAX_TCP_HOLES];
171 int holes; /* number of holes remaining in message */
172 u_char buf[]; /* reassembled message (C99 flexible array member) */
173 } tcp_msgbuf_t;
174
175 /* held TCP segment */
176 typedef struct
177 {
178 uint32_t seq; /* sequence number of first byte of segment */
179 uint16_t len; /* length of segment, and size of buf */
180 u_char buf[]; /* segment payload (C99 flexible array member) */
181 } tcp_segbuf_t;
182
183 /* TCP reassembly state */
184 typedef struct tcpstate {
185 tcpHashkey_t key;
186 struct tcpstate *newer, *older;
187 long last_use;
188 uint32_t seq_start; /* seq# of length field of next DNS msg */
189 short msgbufs; /* number of msgbufs in use */
190 u_char dnslen_buf[2]; /* full dnslen field might not arrive in first segment */
191 u_char dnslen_bytes_seen_mask; /* bitmask, when == 3 we have full dnslen */
192 int8_t fin; /* have we seen a FIN? */
193 tcp_msgbuf_t* msgbuf[MAX_TCP_MSGS];
194 tcp_segbuf_t* segbuf[MAX_TCP_SEGS];
195 } tcpstate_t;
196
197 /* List of tcpstates ordered by time of last use, so we can quickly identify
198 * and discard stale entries. */
199 struct
200 {
201 tcpstate_t* oldest;
202 tcpstate_t* newest;
203 } tcpList;
204
205 static void
tcpstate_reset(tcpstate_t * tcpstate,uint32_t seq)206 tcpstate_reset(tcpstate_t* tcpstate, uint32_t seq)
207 {
208 int i;
209 tcpstate->seq_start = seq;
210 tcpstate->fin = 0;
211 if (tcpstate->msgbufs > 0) {
212 tcpstate->msgbufs = 0;
213 for (i = 0; i < MAX_TCP_MSGS; i++) {
214 if (tcpstate->msgbuf[i]) {
215 xfree(tcpstate->msgbuf[i]);
216 tcpstate->msgbuf[i] = NULL;
217 }
218 }
219 }
220 for (i = 0; i < MAX_TCP_SEGS; i++) {
221 if (tcpstate->segbuf[i]) {
222 xfree(tcpstate->segbuf[i]);
223 tcpstate->segbuf[i] = NULL;
224 }
225 }
226 }
227
228 static void
tcpstate_free(void * p)229 tcpstate_free(void* p)
230 {
231 tcpstate_reset((tcpstate_t*)p, 0);
232 xfree(p);
233 }
234
235 static unsigned int
tcp_hashfunc(const void * key)236 tcp_hashfunc(const void* key)
237 {
238 tcpHashkey_t* k = (tcpHashkey_t*)key;
239 return (k->dport << 16) | k->sport | k->src_ip_addr._.in4.s_addr | k->dst_ip_addr._.in4.s_addr;
240 /* 32 low bits of ipv6 address are good enough for a hash */
241 }
242
243 static int
tcp_cmpfunc(const void * a,const void * b)244 tcp_cmpfunc(const void* a, const void* b)
245 {
246 return memcmp(a, b, sizeof(tcpHashkey_t));
247 }
248
249 /* TCP Reassembly.
250 *
251 * When we see a SYN, we allocate a new tcpstate for the connection, and
252 * establish the initial sequence number of the first dns message (seq_start)
253 * on the connection. We assume that no other segment can arrive before the
254 * SYN (if one does, it is discarded, and if is not repeated the message it
255 * belongs to can never be completely reassembled).
256 *
257 * Then, for each segment that arrives on the connection:
258 * - If it's the first segment of a message (containing the 2-byte message
259 * length), we allocate a msgbuf, and check for any held segments that might
260 * belong to it.
261 * - If the first byte of the segment belongs to any msgbuf, we fill
262 * in the holes of that message. If the message has no more holes, we
263 * handle the complete dns message. If the tail of the segment was longer
264 * than the hole, we recurse on the tail.
265 * - Otherwise, if the segment could be within the tcp window, we hold onto it
266 * pending the creation of a matching msgbuf.
267 *
268 * This algorithm handles segments that arrive out of order, duplicated or
269 * overlapping (including segments from different dns messages arriving out of
270 * order), and dns messages that do not necessarily start on segment
271 * boundaries.
272 *
273 */
274 static void
pcap_handle_tcp_segment(u_char * segment,int len,uint32_t seq,tcpstate_t * tcpstate,transport_message * tm)275 pcap_handle_tcp_segment(u_char* segment, int len, uint32_t seq, tcpstate_t* tcpstate, transport_message* tm)
276 {
277 int i, m, s;
278 uint16_t dnslen;
279 int segoff, seglen;
280
281 dfprintf(1, "pcap_handle_tcp_segment: seq=%u, len=%d", seq, len);
282
283 if (len <= 0) /* there is no more payload */
284 return;
285
286 if (seq - tcpstate->seq_start < 2) {
287 /* this segment contains all or part of the 2-byte DNS length field */
288 uint32_t o = seq - tcpstate->seq_start;
289 int l = (len > 1 && o == 0) ? 2 : 1;
290 dfprintf(1, "pcap_handle_tcp_segment: copying %d bytes to dnslen_buf[%d]", l, o);
291 memcpy(&tcpstate->dnslen_buf[o], segment, l);
292 if (l == 2)
293 tcpstate->dnslen_bytes_seen_mask = 3;
294 else
295 tcpstate->dnslen_bytes_seen_mask |= (1 << o);
296 len -= l;
297 segment += l;
298 seq += l;
299 }
300
301 if (3 == tcpstate->dnslen_bytes_seen_mask) {
302 /* We have the dnslen stored now */
303 dnslen = nptohs(tcpstate->dnslen_buf) & 0xffff;
304 /*
305 * Next we poison the mask to indicate we are in to the message body.
306 * If one doesn't remember we're past the then,
307 * one loops forever getting more msgbufs rather than filling
308 * in the contents of THIS message.
309 *
310 * We need to later reset that mask when we process the message
311 * (method: tcpstate->dnslen_bytes_seen_mask = 0).
312 */
313 tcpstate->dnslen_bytes_seen_mask = 7;
314 tcpstate->seq_start += sizeof(uint16_t) + dnslen;
315 dfprintf(1, "pcap_handle_tcp_segment: first segment; dnslen = %d", dnslen);
316 if (len >= dnslen) {
317 /* this segment contains a complete message - avoid the reassembly
318 * buffer and just handle the message immediately */
319 dns_protocol_handler(segment, dnslen, tm);
320 tcpstate->dnslen_bytes_seen_mask = 0; /* go back for another message in this tcp connection */
321 /* handle the trailing part of the segment? */
322 if (len > dnslen) {
323 dfprintf(1, "pcap_handle_tcp_segment: %s", "segment tail");
324 pcap_handle_tcp_segment(segment + dnslen, len - dnslen, seq + dnslen, tcpstate, tm);
325 }
326 return;
327 }
328 /*
329 * At this point we KNOW we have an incomplete message and need to do reassembly.
330 * i.e.: assert(len < dnslen);
331 */
332 dfprintf(2, "pcap_handle_tcp_segment: %s", "buffering segment");
333 /* allocate a msgbuf for reassembly */
334 for (m = 0; tcpstate->msgbuf[m];) {
335 if (++m >= MAX_TCP_MSGS) {
336 dfprintf(1, "pcap_handle_tcp_segment: %s", "out of msgbufs");
337 return;
338 }
339 }
340 tcpstate->msgbuf[m] = xcalloc(1, sizeof(tcp_msgbuf_t) + dnslen);
341 if (NULL == tcpstate->msgbuf[m]) {
342 dsyslogf(LOG_ERR, "out of memory for tcp_msgbuf (%d)", dnslen);
343 return;
344 }
345 tcpstate->msgbufs++;
346 tcpstate->msgbuf[m]->seq = seq;
347 tcpstate->msgbuf[m]->dnslen = dnslen;
348 tcpstate->msgbuf[m]->holes = 1;
349 tcpstate->msgbuf[m]->hole[0].start = len;
350 tcpstate->msgbuf[m]->hole[0].len = dnslen - len;
351 dfprintf(1,
352 "pcap_handle_tcp_segment: new msgbuf %d: seq = %u, dnslen = %d, hole start = %d, hole len = %d", m,
353 tcpstate->msgbuf[m]->seq, tcpstate->msgbuf[m]->dnslen, tcpstate->msgbuf[m]->hole[0].start,
354 tcpstate->msgbuf[m]->hole[0].len);
355 /* copy segment to appropriate location in reassembly buffer */
356 memcpy(tcpstate->msgbuf[m]->buf, segment, len);
357
358 /* Now that we know the length of this message, we must check any held
359 * segments to see if they belong to it. */
360 for (s = 0; s < MAX_TCP_SEGS; s++) {
361 if (!tcpstate->segbuf[s])
362 continue;
363 /* TODO: seq >= 0 */
364 if (tcpstate->segbuf[s]->seq - seq > 0 && tcpstate->segbuf[s]->seq - seq < dnslen) {
365 tcp_segbuf_t* segbuf = tcpstate->segbuf[s];
366 tcpstate->segbuf[s] = NULL;
367 dfprintf(1, "pcap_handle_tcp_segment: %s", "message reassembled");
368 pcap_handle_tcp_segment(segbuf->buf, segbuf->len, segbuf->seq, tcpstate, tm);
369 /*
370 * Note that our recursion will also cover any tail messages (I hope).
371 * Thus we do not need to do so here and can return.
372 */
373 xfree(segbuf);
374 }
375 }
376 return;
377 }
378
379 /*
380 * Welcome to reassembly-land.
381 */
382 /* find the message to which the first byte of this segment belongs */
383 for (m = 0; m < MAX_TCP_MSGS; m++) {
384 if (!tcpstate->msgbuf[m])
385 continue;
386 segoff = seq - tcpstate->msgbuf[m]->seq;
387 if (segoff >= 0 && segoff < tcpstate->msgbuf[m]->dnslen) {
388 /* segment starts in this msgbuf */
389 dfprintf(1, "pcap_handle_tcp_segment: seg matches msg %d: seq = %u, dnslen = %d",
390 m, tcpstate->msgbuf[m]->seq, tcpstate->msgbuf[m]->dnslen);
391 if (segoff + len > tcpstate->msgbuf[m]->dnslen) {
392 /* segment would overflow msgbuf */
393 seglen = tcpstate->msgbuf[m]->dnslen - segoff;
394 dfprintf(1, "pcap_handle_tcp_segment: using partial segment %d", seglen);
395 } else {
396 seglen = len;
397 }
398 break;
399 }
400 }
401 if (m >= MAX_TCP_MSGS) {
402 /* seg does not match any msgbuf; just hold on to it. */
403 dfprintf(1, "pcap_handle_tcp_segment: %s", "seg does not match any msgbuf");
404
405 if (seq - tcpstate->seq_start > MAX_TCP_WINDOW_SIZE) {
406 dfprintf(1, "pcap_handle_tcp_segment: %s", "seg is outside window; discarding");
407 return;
408 }
409 for (s = 0; s < MAX_TCP_SEGS; s++) {
410 if (tcpstate->segbuf[s])
411 continue;
412 tcpstate->segbuf[s] = xcalloc(1, sizeof(tcp_segbuf_t) + len);
413 tcpstate->segbuf[s]->seq = seq;
414 tcpstate->segbuf[s]->len = len;
415 memcpy(tcpstate->segbuf[s]->buf, segment, len);
416 dfprintf(1, "pcap_handle_tcp_segment: new segbuf %d: seq = %u, len = %d",
417 s, tcpstate->segbuf[s]->seq, tcpstate->segbuf[s]->len);
418 return;
419 }
420 dfprintf(1, "pcap_handle_tcp_segment: %s", "out of segbufs");
421 return;
422 }
423
424 /* Reassembly algorithm adapted from RFC 815. */
425 for (i = 0; i < MAX_TCP_HOLES; i++) {
426 tcphole_t* newhole;
427 uint16_t hole_start, hole_len;
428 if (tcpstate->msgbuf[m]->hole[i].len == 0)
429 continue; /* hole descriptor is not in use */
430 hole_start = tcpstate->msgbuf[m]->hole[i].start;
431 hole_len = tcpstate->msgbuf[m]->hole[i].len;
432 if (segoff >= hole_start + hole_len)
433 continue; /* segment is totally after hole */
434 if (segoff + seglen <= hole_start)
435 continue; /* segment is totally before hole */
436 /* The segment overlaps this hole. Delete the hole. */
437 dfprintf(1, "pcap_handle_tcp_segment: overlaping hole %d: %d %d", i, hole_start, hole_len);
438 tcpstate->msgbuf[m]->hole[i].len = 0;
439 tcpstate->msgbuf[m]->holes--;
440 if (segoff + seglen < hole_start + hole_len) {
441 /* create a new hole after the segment (common case) */
442 newhole = &tcpstate->msgbuf[m]->hole[i]; /* hole[i] is guaranteed free */
443 newhole->start = segoff + seglen;
444 newhole->len = (hole_start + hole_len) - newhole->start;
445 tcpstate->msgbuf[m]->holes++;
446 dfprintf(1, "pcap_handle_tcp_segment: new post-hole %d: %d %d", i, newhole->start, newhole->len);
447 }
448 if (segoff > hole_start) {
449 /* create a new hole before the segment */
450 int j;
451 for (j = 0; j < MAX_TCP_HOLES; j++) {
452 if (tcpstate->msgbuf[m]->hole[j].len == 0) {
453 newhole = &tcpstate->msgbuf[m]->hole[j];
454 break;
455 }
456 }
457 if (j >= MAX_TCP_HOLES) {
458 dfprintf(1, "pcap_handle_tcp_segment: %s", "out of hole descriptors");
459 return;
460 }
461 tcpstate->msgbuf[m]->holes++;
462 newhole->start = hole_start;
463 newhole->len = segoff - hole_start;
464 dfprintf(1, "pcap_handle_tcp_segment: new pre-hole %d: %d %d", j, newhole->start, newhole->len);
465 }
466 if (segoff >= hole_start && (hole_len == 0 || segoff + seglen < hole_start + hole_len)) {
467 /* The segment does not extend past hole boundaries; there is
468 * no need to look for other matching holes. */
469 break;
470 }
471 }
472
473 /* copy payload to appropriate location in reassembly buffer */
474 memcpy(&tcpstate->msgbuf[m]->buf[segoff], segment, seglen);
475
476 dfprintf(1, "pcap_handle_tcp_segment: holes remaining: %d", tcpstate->msgbuf[m]->holes);
477
478 if (tcpstate->msgbuf[m]->holes == 0) {
479 /* We now have a completely reassembled dns message */
480 dfprintf(2, "pcap_handle_tcp_segment: %s", "reassembly to dns_protocol_handler");
481 dns_protocol_handler(tcpstate->msgbuf[m]->buf, tcpstate->msgbuf[m]->dnslen, tm);
482 tcpstate->dnslen_bytes_seen_mask = 0; /* go back for another message in this tcp connection */
483 xfree(tcpstate->msgbuf[m]);
484 tcpstate->msgbuf[m] = NULL;
485 tcpstate->msgbufs--;
486 }
487
488 if (seglen < len) {
489 dfprintf(1, "pcap_handle_tcp_segment: %s", "segment tail after reassembly");
490 pcap_handle_tcp_segment(segment + seglen, len - seglen, seq + seglen, tcpstate, tm);
491 } else {
492 dfprintf(1, "pcap_handle_tcp_segment: %s", "nothing more after reassembly");
493 };
494 }
495
496 static void
tcpList_add_newest(tcpstate_t * tcpstate)497 tcpList_add_newest(tcpstate_t* tcpstate)
498 {
499 tcpstate->older = tcpList.newest;
500 tcpstate->newer = NULL;
501 *(tcpList.newest ? &tcpList.newest->newer : &tcpList.oldest) = tcpstate;
502 tcpList.newest = tcpstate;
503 }
504
505 static void
tcpList_remove(tcpstate_t * tcpstate)506 tcpList_remove(tcpstate_t* tcpstate)
507 {
508 *(tcpstate->older ? &tcpstate->older->newer : &tcpList.oldest) = tcpstate->newer;
509 *(tcpstate->newer ? &tcpstate->newer->older : &tcpList.newest) = tcpstate->older;
510 }
511
512 static void
tcpList_remove_older_than(long t)513 tcpList_remove_older_than(long t)
514 {
515 int n = 0;
516 tcpstate_t* tcpstate;
517 while (tcpList.oldest && tcpList.oldest->last_use < t) {
518 tcpstate = tcpList.oldest;
519 tcpList_remove(tcpstate);
520 hash_remove(&tcpstate->key, tcpHash);
521 n++;
522 }
523 dfprintf(1, "discarded %d old tcpstates", n);
524 }
525
526 /*
527 * This function always returns 1 because we do our own assembly and
528 * we don't want pcap_layers to do any further processing of this
529 * packet.
530 */
531 static int
pcap_tcp_handler(const struct tcphdr * tcp,int len,void * udata)532 pcap_tcp_handler(const struct tcphdr* tcp, int len, void* udata)
533 {
534 transport_message* tm = udata;
535 int offset = tcp->th_off << 2;
536 uint32_t seq;
537 tcpstate_t* tcpstate = NULL;
538 tcpHashkey_t key;
539
540 tm->src_port = nptohs(&tcp->th_sport);
541 tm->dst_port = nptohs(&tcp->th_dport);
542 tm->proto = IPPROTO_TCP;
543
544 key.src_ip_addr = tm->src_ip_addr;
545 key.dst_ip_addr = tm->dst_ip_addr;
546 key.sport = tm->src_port;
547 key.dport = tm->dst_port;
548
549 if (debug_flag > 1) {
550 char src[128], dst[128];
551 inXaddr_ntop(&key.src_ip_addr, src, sizeof(src));
552 inXaddr_ntop(&key.dst_ip_addr, dst, sizeof(dst));
553 dfprintf(1, "handle_tcp: %s:%d %s:%d", src, key.sport, dst, key.dport);
554 }
555
556 if (port53 != key.dport && port53 != key.sport)
557 return 1;
558
559 if (NULL == tcpHash) {
560 dfprintf(2, "pcap_tcp_handler: %s", "hash_create");
561 tcpHash = hash_create(MAX_TCP_STATE, tcp_hashfunc, tcp_cmpfunc, 0, NULL, tcpstate_free);
562 if (NULL == tcpHash)
563 return 1;
564 }
565
566 seq = nptohl(&tcp->th_seq);
567 len -= offset; /* len = length of TCP payload */
568 dfprintf(1, "handle_tcp: seq = %u, len = %d", seq, len);
569
570 tcpstate = hash_find(&key, tcpHash);
571 if (tcpstate)
572 dfprintf(1, "handle_tcp: tcpstate->seq_start = %u, ->msgs = %d", tcpstate->seq_start, tcpstate->msgbufs);
573
574 if (!tcpstate && !(TCPFLAGSYN(tcp))) {
575 /* There's no existing state, and this is not the start of a stream.
576 * We have no way to synchronize with the stream, so we give up.
577 * (This commonly happens for the final ACK in response to a FIN.) */
578 dfprintf(1, "handle_tcp: %s", "no state");
579 return 1;
580 }
581
582 if (tcpstate)
583 tcpList_remove(tcpstate); /* remove from its current position */
584
585 if (TCPFLAGRST(tcp)) {
586 dfprintf(1, "handle_tcp: RST at %u", seq);
587
588 /* remove the state for this direction */
589 if (tcpstate)
590 hash_remove(&key, tcpHash); /* this also frees tcpstate */
591
592 /* remove the state for the opposite direction */
593 key.src_ip_addr = tm->dst_ip_addr;
594 key.dst_ip_addr = tm->src_ip_addr;
595 key.sport = tm->dst_port;
596 key.dport = tm->src_port;
597 tcpstate = hash_find(&key, tcpHash);
598 if (tcpstate) {
599 tcpList_remove(tcpstate);
600 hash_remove(&key, tcpHash); /* this also frees tcpstate */
601 }
602 return 1;
603 }
604
605 if (TCPFLAGSYN(tcp)) {
606 dfprintf(1, "handle_tcp: SYN at %u", seq);
607 seq++; /* skip the syn */
608 if (tcpstate) {
609 dfprintf(2, "handle_tcp: %s", "...resetting existing tcpstate");
610 tcpstate_reset(tcpstate, seq);
611 } else {
612 dfprintf(2, "handle_tcp: %s", "...creating new tcpstate");
613 tcpstate = xcalloc(1, sizeof(*tcpstate));
614 if (!tcpstate)
615 return 1;
616 tcpstate_reset(tcpstate, seq);
617 tcpstate->key = key;
618 if (0 != hash_add(&tcpstate->key, tcpstate, tcpHash)) {
619 tcpstate_free(tcpstate);
620 return 1;
621 }
622 }
623 }
624
625 pcap_handle_tcp_segment((uint8_t*)tcp + offset, len, seq, tcpstate, tm);
626
627 if (TCPFLAGFIN(tcp) && !tcpstate->fin) {
628 /* End of tcp stream */
629 dfprintf(1, "handle_tcp: FIN at %u", seq);
630 tcpstate->fin = 1;
631 }
632
633 if (tcpstate->fin && tcpstate->msgbufs == 0) {
634 /* FIN was seen, and there are no incomplete msgbufs left */
635 dfprintf(1, "handle_tcp: %s", "connection done");
636 hash_remove(&key, tcpHash); /* this also frees tcpstate */
637
638 } else {
639 /* We're keeping this tcpstate. Store it in tcpList by age. */
640 tcpstate->last_use = tm->ts.tv_sec;
641 tcpList_add_newest(tcpstate);
642 }
643 return 1;
644 }
645
646 static int
pcap_ipv4_handler(const struct ip * ip4,int len,void * udata)647 pcap_ipv4_handler(const struct ip* ip4, int len, void* udata)
648 {
649 transport_message* tm = udata;
650 #ifdef __FreeBSD__ /* FreeBSD uses packed struct ip */
651 struct in_addr a;
652 memcpy(&a, &ip4->ip_src, sizeof(a));
653 inXaddr_assign_v4(&tm->src_ip_addr, &a);
654 memcpy(&a, &ip4->ip_dst, sizeof(a));
655 inXaddr_assign_v4(&tm->dst_ip_addr, &a);
656 #else
657 inXaddr_assign_v4(&tm->src_ip_addr, &ip4->ip_src);
658 inXaddr_assign_v4(&tm->dst_ip_addr, &ip4->ip_dst);
659 #endif
660 tm->ip_version = 4;
661 return 0;
662 }
663
664 static int
pcap_ipv6_handler(const struct ip6_hdr * ip6,int len,void * udata)665 pcap_ipv6_handler(const struct ip6_hdr* ip6, int len, void* udata)
666 {
667 transport_message* tm = udata;
668 #ifdef __FreeBSD__ /* FreeBSD uses packed struct ip6_hdr */
669 struct in6_addr a;
670 memcpy(&a, &ip6->ip6_src, sizeof(a));
671 inXaddr_assign_v6(&tm->src_ip_addr, &a);
672 memcpy(&a, &ip6->ip6_dst, sizeof(a));
673 inXaddr_assign_v6(&tm->dst_ip_addr, &a);
674 #else
675 inXaddr_assign_v6(&tm->src_ip_addr, &ip6->ip6_src);
676 inXaddr_assign_v6(&tm->dst_ip_addr, &ip6->ip6_dst);
677 #endif
678 tm->ip_version = 6;
679 return 0;
680 }
681
682 static int
pcap_match_vlan(unsigned short vlan,void * udata)683 pcap_match_vlan(unsigned short vlan, void* udata)
684 {
685 int i;
686 if (vlan_tag_needs_byte_conversion)
687 vlan = ntohs(vlan);
688 dfprintf(1, "vlan is %d", vlan);
689 for (i = 0; i < n_vlan_ids; i++)
690 if (vlan_ids[i] == vlan)
691 return 0;
692 return 1;
693 }
694
695 /*
696 * Forward declares for pcap_layers since we need to call datalink
697 * handlers directly.
698 */
699 #if USE_PPP
700 void handle_ppp(const u_char* pkt, int len, void* userdata);
701 #endif
702 void handle_null(const u_char* pkt, int len, void* userdata);
703 #ifdef DLT_LOOP
704 void handle_loop(const u_char* pkt, int len, void* userdata);
705 #endif
706 #ifdef DLT_RAW
707 void handle_raw(const u_char* pkt, int len, void* userdata);
708 #endif
709 void handle_ether(const u_char* pkt, int len, void* userdata);
710 #ifdef DLT_LINUX_SLL
711 void handle_linux_sll(const u_char* pkt, int len, void* userdata);
712 #endif
713
714 static void
pcap_handle_packet(u_char * udata,const struct pcap_pkthdr * hdr,const u_char * pkt,const char * name,int dlt)715 pcap_handle_packet(u_char* udata, const struct pcap_pkthdr* hdr, const u_char* pkt, const char* name, int dlt)
716 {
717 void (*handle_datalink)(const u_char* pkt, int len, void* userdata);
718 transport_message tm;
719
720 #if 0 /* enable this to test code with unaligned headers */
721 char buf[PCAP_SNAPLEN + 1];
722 memcpy(buf + 1, pkt, hdr->caplen);
723 pkt = buf + 1;
724 #endif
725
726 assign_timeval(last_ts, hdr->ts);
727 if (hdr->caplen < ETHER_HDR_LEN)
728 return;
729 memset(&tm, 0, sizeof(tm));
730 assign_timeval(tm.ts, hdr->ts);
731
732 switch (dlt) {
733 case DLT_EN10MB:
734 handle_datalink = handle_ether;
735 break;
736 #if USE_PPP
737 case DLT_PPP:
738 handle_datalink = handle_ppp;
739 break;
740 #endif
741 #ifdef DLT_LOOP
742 case DLT_LOOP:
743 handle_datalink = handle_loop;
744 break;
745 #endif
746 #ifdef DLT_RAW
747 case DLT_RAW:
748 handle_datalink = handle_raw;
749 break;
750 #endif
751 #ifdef DLT_LINUX_SLL
752 case DLT_LINUX_SLL:
753 handle_datalink = handle_linux_sll;
754 break;
755 #endif
756 case DLT_NULL:
757 handle_datalink = handle_null;
758 break;
759 default:
760 fprintf(stderr, "unsupported data link type %d", dlt);
761 exit(1);
762 }
763
764 handle_datalink(pkt, hdr->caplen, (u_char*)&tm);
765 }
766
767 /* ========================================================================= */
768
769 extern int sig_while_processing;
770
_callback(u_char * user,const struct pcap_pkthdr * pkthdr,const u_char * pkt,const char * name,int dlt)771 void _callback(u_char* user, const struct pcap_pkthdr* pkthdr, const u_char* pkt, const char* name, int dlt)
772 {
773 struct _interface* i;
774 if (!user) {
775 dsyslog(LOG_ERR, "internal error");
776 exit(2);
777 }
778 i = (struct _interface*)user;
779
780 i->pkts_captured++;
781
782 pcap_handle_packet(user, pkthdr, pkt, name, dlt);
783 }
784
Pcap_init(const char * device,int promisc,int monitor,int immediate,int threads,int buffer_size)785 void Pcap_init(const char* device, int promisc, int monitor, int immediate, int threads, int buffer_size)
786 {
787 char errbuf[512];
788 struct stat sb;
789 struct _interface* i;
790 int err;
791 extern int pt_timeout;
792
793 if (interfaces == NULL) {
794 interfaces = xcalloc(MAX_N_INTERFACES, sizeof(*interfaces));
795 if ((err = pcap_thread_set_promiscuous(&pcap_thread, promisc))) {
796 dsyslogf(LOG_ERR, "unable to set promiscuous mode: %s", pcap_thread_strerr(err));
797 exit(1);
798 }
799 if ((err = pcap_thread_set_monitor(&pcap_thread, monitor))) {
800 dsyslogf(LOG_ERR, "unable to set monitor mode: %s", pcap_thread_strerr(err));
801 exit(1);
802 }
803 if ((err = pcap_thread_set_immediate_mode(&pcap_thread, immediate))) {
804 dsyslogf(LOG_ERR, "unable to set immediate mode: %s", pcap_thread_strerr(err));
805 exit(1);
806 }
807 if ((err = pcap_thread_set_use_threads(&pcap_thread, threads))) {
808 dsyslogf(LOG_ERR, "unable to set use threads: %s", pcap_thread_strerr(err));
809 exit(1);
810 }
811 if ((err = pcap_thread_set_snaplen(&pcap_thread, PCAP_SNAPLEN))) {
812 dsyslogf(LOG_ERR, "unable to set snap length: %s", pcap_thread_strerr(err));
813 exit(1);
814 }
815 if (bpf_program_str && (err = pcap_thread_set_filter(&pcap_thread, bpf_program_str, strlen(bpf_program_str)))) {
816 dsyslogf(LOG_ERR, "unable to set pcap filter: %s", pcap_thread_strerr(err));
817 exit(1);
818 }
819 if ((err = pcap_thread_set_callback(&pcap_thread, _callback))) {
820 dsyslogf(LOG_ERR, "unable to set pcap callback: %s", pcap_thread_strerr(err));
821 exit(1);
822 }
823 if (buffer_size > 0 && (err = pcap_thread_set_buffer_size(&pcap_thread, buffer_size))) {
824 dsyslogf(LOG_ERR, "unable to set pcap buffer size: %s", pcap_thread_strerr(err));
825 exit(1);
826 }
827 if (pt_timeout > 0 && (err = pcap_thread_set_timeout(&pcap_thread, pt_timeout))) {
828 dsyslogf(LOG_ERR, "unable to set pcap-thread timeout: %s", pcap_thread_strerr(err));
829 exit(1);
830 }
831 }
832 assert(interfaces);
833 assert(n_interfaces < MAX_N_INTERFACES);
834 i = &interfaces[n_interfaces];
835 i->device = strdup(device);
836
837 last_ts.tv_sec = last_ts.tv_usec = 0;
838 finish_ts.tv_sec = finish_ts.tv_usec = 0;
839
840 if (!stat(device, &sb)) {
841 if ((err = pcap_thread_open_offline(&pcap_thread, device, i))) {
842 dsyslogf(LOG_ERR, "unable to open offline file %s: %s", device, pcap_thread_strerr(err));
843 if (err == PCAP_THREAD_EPCAP) {
844 dsyslogf(LOG_ERR, "libpcap error [%d]: %s (%s)",
845 pcap_thread_status(&pcap_thread),
846 pcap_statustostr(pcap_thread_status(&pcap_thread)),
847 pcap_thread_errbuf(&pcap_thread));
848 } else if (err == PCAP_THREAD_ERRNO) {
849 dsyslogf(LOG_ERR, "system error [%d]: %s (%s)\n",
850 errno,
851 dsc_strerror(errno, errbuf, sizeof(errbuf)),
852 pcap_thread_errbuf(&pcap_thread));
853 }
854 exit(1);
855 }
856
857 n_pcap_offline++;
858 } else {
859 if ((err = pcap_thread_open(&pcap_thread, device, i))) {
860 dsyslogf(LOG_ERR, "unable to open interface %s: %s", device, pcap_thread_strerr(err));
861 if (err == PCAP_THREAD_EPCAP) {
862 dsyslogf(LOG_ERR, "libpcap error [%d]: %s (%s)",
863 pcap_thread_status(&pcap_thread),
864 pcap_statustostr(pcap_thread_status(&pcap_thread)),
865 pcap_thread_errbuf(&pcap_thread));
866 } else if (err == PCAP_THREAD_ERRNO) {
867 dsyslogf(LOG_ERR, "system error [%d]: %s (%s)\n",
868 errno,
869 dsc_strerror(errno, errbuf, sizeof(errbuf)),
870 pcap_thread_errbuf(&pcap_thread));
871 }
872 exit(1);
873 }
874 }
875
876 if (0 == n_interfaces) {
877 extern int drop_ip_fragments;
878 /*
879 * Initialize pcap_layers library and specifiy IP fragment reassembly
880 * Datalink type is handled in callback
881 */
882 pcap_layers_init(DLT_EN10MB, drop_ip_fragments ? 0 : 1);
883 if (n_vlan_ids)
884 callback_vlan = pcap_match_vlan;
885 callback_ipv4 = pcap_ipv4_handler;
886 callback_ipv6 = pcap_ipv6_handler;
887 callback_udp = pcap_udp_handler;
888 callback_tcp = pcap_tcp_handler;
889 callback_l7 = dns_protocol_handler;
890 }
891 n_interfaces++;
892 if (n_pcap_offline > 1 || (n_pcap_offline > 0 && n_interfaces > n_pcap_offline)) {
893 dsyslog(LOG_ERR, "offline interface must be only interface");
894 exit(1);
895 }
896 }
897
_stats(u_char * user,const struct pcap_stat * stats,const char * name,int dlt)898 void _stats(u_char* user, const struct pcap_stat* stats, const char* name, int dlt)
899 {
900 int i;
901 struct _interface* I = 0;
902
903 for (i = 0; i < n_interfaces; i++) {
904 if (!strcmp(name, interfaces[i].device)) {
905 I = &interfaces[i];
906 break;
907 }
908 }
909
910 if (I) {
911 I->ps0 = I->ps1;
912 I->ps1 = *stats;
913 }
914 }
915
Pcap_run(void)916 int Pcap_run(void)
917 {
918 int i, err;
919 extern uint64_t statistics_interval;
920
921 for (i = 0; i < n_interfaces; i++)
922 interfaces[i].pkts_captured = 0;
923
924 if (n_pcap_offline > 0) {
925 if (finish_ts.tv_sec > 0) {
926 start_ts.tv_sec = finish_ts.tv_sec;
927 finish_ts.tv_sec += statistics_interval;
928 } else {
929 /*
930 * First run, need to walk each pcap savefile and find
931 * the first start time
932 */
933
934 if ((err = pcap_thread_next_reset(&pcap_thread))) {
935 dsyslogf(LOG_ERR, "unable to reset pcap thread next: %s", pcap_thread_strerr(err));
936 return 0;
937 }
938 for (i = 0; i < n_pcap_offline; i++) {
939 if ((err = pcap_thread_next(&pcap_thread))) {
940 if (err != PCAP_THREAD_EPCAP) {
941 dsyslogf(LOG_ERR, "unable to do pcap thread next: %s", pcap_thread_strerr(err));
942 return 0;
943 }
944 continue;
945 }
946
947 if (!start_ts.tv_sec
948 || last_ts.tv_sec < start_ts.tv_sec
949 || (last_ts.tv_sec == start_ts.tv_sec && last_ts.tv_usec < start_ts.tv_usec)) {
950 start_ts = last_ts;
951 }
952 }
953
954 if (!start_ts.tv_sec) {
955 return 0;
956 }
957
958 finish_ts.tv_sec = ((start_ts.tv_sec / statistics_interval) + 1) * statistics_interval;
959 finish_ts.tv_usec = 0;
960 }
961
962 i = 0;
963 do {
964 err = pcap_thread_next(&pcap_thread);
965 if (err == PCAP_THREAD_EPCAP) {
966 /*
967 * Potential EOF, count number of times
968 */
969 i++;
970 } else if (err) {
971 dsyslogf(LOG_ERR, "unable to do pcap thread next: %s", pcap_thread_strerr(err));
972 return 0;
973 } else {
974 i = 0;
975 }
976
977 if (i == n_pcap_offline || sig_while_processing) {
978 /*
979 * All pcaps reports EOF or we got a signal, nothing more to do
980 */
981 finish_ts = last_ts;
982 return 0;
983 }
984 } while (last_ts.tv_sec < finish_ts.tv_sec);
985 } else {
986 gettimeofday(&start_ts, NULL);
987 gettimeofday(&last_ts, NULL);
988 finish_ts.tv_sec = ((start_ts.tv_sec / statistics_interval) + 1) * statistics_interval;
989 finish_ts.tv_usec = 0;
990 if ((err = pcap_thread_set_timedrun_to(&pcap_thread, finish_ts))) {
991 dsyslogf(LOG_ERR, "unable to set pcap thread timed run: %s", pcap_thread_strerr(err));
992 return 0;
993 }
994
995 if ((err = pcap_thread_run(&pcap_thread))) {
996 if (err == PCAP_THREAD_ERRNO && errno == EINTR && sig_while_processing) {
997 dsyslog(LOG_INFO, "pcap thread run interruped by signal");
998 } else {
999 dsyslogf(LOG_ERR, "unable to pcap thread run: %s", pcap_thread_strerr(err));
1000 if (err == PCAP_THREAD_EPCAP) {
1001 dsyslogf(LOG_ERR, "libpcap error [%d]: %s (%s)",
1002 pcap_thread_status(&pcap_thread),
1003 pcap_statustostr(pcap_thread_status(&pcap_thread)),
1004 pcap_thread_errbuf(&pcap_thread));
1005 } else if (err == PCAP_THREAD_ERRNO) {
1006 char errbuf[512];
1007 dsyslogf(LOG_ERR, "system error [%d]: %s (%s)\n",
1008 errno,
1009 dsc_strerror(errno, errbuf, sizeof(errbuf)),
1010 pcap_thread_errbuf(&pcap_thread));
1011 }
1012 return 0;
1013 }
1014 }
1015
1016 if (sig_while_processing)
1017 finish_ts = last_ts;
1018
1019 if ((err = pcap_thread_stats(&pcap_thread, _stats, 0))) {
1020 dsyslogf(LOG_ERR, "unable to get pcap thread stats: %s", pcap_thread_strerr(err));
1021 if (err == PCAP_THREAD_EPCAP) {
1022 dsyslogf(LOG_ERR, "libpcap error [%d]: %s (%s)",
1023 pcap_thread_status(&pcap_thread),
1024 pcap_statustostr(pcap_thread_status(&pcap_thread)),
1025 pcap_thread_errbuf(&pcap_thread));
1026 }
1027 return 0;
1028 }
1029 }
1030 tcpList_remove_older_than(last_ts.tv_sec - MAX_TCP_IDLE);
1031 pcap_layers_clear_fragments(time(NULL) - MAX_FRAG_IDLE);
1032 return 1;
1033 }
1034
Pcap_stop(void)1035 void Pcap_stop(void)
1036 {
1037 pcap_thread_stop(&pcap_thread);
1038 }
1039
Pcap_close(void)1040 void Pcap_close(void)
1041 {
1042 int i;
1043
1044 pcap_thread_close(&pcap_thread);
1045 for (i = 0; i < n_interfaces; i++)
1046 if (interfaces[i].device)
1047 free(interfaces[i].device);
1048
1049 xfree(interfaces);
1050 interfaces = NULL;
1051 }
1052
Pcap_start_time(void)1053 int Pcap_start_time(void)
1054 {
1055 return (int)start_ts.tv_sec;
1056 }
1057
Pcap_finish_time(void)1058 int Pcap_finish_time(void)
1059 {
1060 return (int)finish_ts.tv_sec;
1061 }
1062
pcap_set_match_vlan(int vlan)1063 void pcap_set_match_vlan(int vlan)
1064 {
1065 assert(n_vlan_ids < MAX_VLAN_IDS);
1066 vlan_ids[n_vlan_ids++] = vlan;
1067 }
1068
1069 /* ========== PCAP_STAT INDEXER ========== */
1070
1071 int pcap_ifname_iterator(const char**);
1072 int pcap_stat_iterator(const char**);
1073
1074 static indexer indexers[] = {
1075 { "ifname", 0, 0, pcap_ifname_iterator },
1076 { "pcap_stat", 0, 0, pcap_stat_iterator },
1077 { 0 },
1078 };
1079
pcap_ifname_iterator(const char ** label)1080 int pcap_ifname_iterator(const char** label)
1081 {
1082 static int next_iter = 0;
1083 if (NULL == label) {
1084 next_iter = 0;
1085 return n_interfaces;
1086 }
1087 if (next_iter >= 0 && next_iter < n_interfaces) {
1088 *label = interfaces[next_iter].device;
1089 return next_iter++;
1090 }
1091 return -1;
1092 }
1093
pcap_stat_iterator(const char ** label)1094 int pcap_stat_iterator(const char** label)
1095 {
1096 static int next_iter = 0;
1097 if (NULL == label) {
1098 next_iter = 0;
1099 return 3;
1100 }
1101 if (0 == next_iter)
1102 *label = "pkts_captured";
1103 else if (1 == next_iter)
1104 *label = "filter_received";
1105 else if (2 == next_iter)
1106 *label = "kernel_dropped";
1107 else
1108 return -1;
1109 return next_iter++;
1110 }
1111
pcap_report(FILE * fp,md_array_printer * printer)1112 void pcap_report(FILE* fp, md_array_printer* printer)
1113 {
1114 int i;
1115 md_array* theArray = acalloc(1, sizeof(*theArray));
1116 if (!theArray) {
1117 dsyslog(LOG_ERR, "unable to write report, out of memory");
1118 return;
1119 }
1120 theArray->name = "pcap_stats";
1121 theArray->d1.indexer = &indexers[0];
1122 theArray->d1.type = "ifname";
1123 theArray->d1.alloc_sz = n_interfaces;
1124 theArray->d2.indexer = &indexers[1];
1125 theArray->d2.type = "pcap_stat";
1126 theArray->d2.alloc_sz = 3;
1127 theArray->array = acalloc(n_interfaces, sizeof(*theArray->array));
1128 if (!theArray->array) {
1129 dsyslog(LOG_ERR, "unable to write report, out of memory");
1130 return;
1131 }
1132 for (i = 0; i < n_interfaces; i++) {
1133 struct _interface* I = &interfaces[i];
1134 theArray->array[i].alloc_sz = 3;
1135 theArray->array[i].array = acalloc(3, sizeof(int));
1136 theArray->array[i].array[0] = I->pkts_captured;
1137 theArray->array[i].array[1] = I->ps1.ps_recv - I->ps0.ps_recv;
1138 theArray->array[i].array[2] = I->ps1.ps_drop - I->ps0.ps_drop;
1139 }
1140 md_array_print(theArray, printer, fp);
1141 }
1142