1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2
3 /*
4 * By design, this file can be read without reading config.h
5 * #include "config.h" must appear as the first line of your .cpp file.
6 */
7
8 #ifndef PACKAGE_NAME
9 #error bulk_extractor_i.h included before config.h
10 #endif
11
12 #ifndef BULK_EXTRACTOR_I_H
13 #define BULK_EXTRACTOR_I_H
14
15 #define DEBUG_PEDANTIC 0x0001 // check values more rigorously
16 #define DEBUG_PRINT_STEPS 0x0002 // prints as each scanner is started
17 #define DEBUG_SCANNER 0x0004 // dump all feature writes to stderr
18 #define DEBUG_NO_SCANNERS 0x0008 // do not run the scanners
19 #define DEBUG_DUMP_DATA 0x0010 // dump data as it is seen
20 #define DEBUG_DECODING 0x0020 // debug decoders in scanner
21 #define DEBUG_INFO 0x0040 // print extra info
22 #define DEBUG_EXIT_EARLY 1000 // just print the size of the volume and exis
23 #define DEBUG_ALLOCATE_512MiB 1002 // Allocate 512MiB, but don't set any flags
24
25 /* We need netinet/in.h or windowsx.h */
26 #ifdef HAVE_NETINET_IN_H
27 # include <netinet/in.h>
28 #endif
29
30 #include <assert.h>
31
32 #if defined(MINGW) || defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
33 #ifndef WIN32
34 #define WIN32
35 #endif
36 #endif
37
38 #if defined(WIN32) || defined(__MINGW32__)
39 # include <winsock2.h>
40 # include <windows.h>
41 # include <windowsx.h>
42 #endif
43
44 /* If byte_order hasn't been defined, assume its intel */
45
46 #if defined(WIN32) || !defined(__BYTE_ORDER)
47 # define __LITTLE_ENDIAN 1234
48 # define __BIG_ENDIAN 4321
49 # define __BYTE_ORDER __LITTLE_ENDIAN
50 #endif
51
52 #if (__BYTE_ORDER == __LITTLE_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)
53 # error Invalid __BYTE_ORDER
54 #endif
55
56 /**
57 * \addtogroup plugin_module
58 * @{
59 */
60
61 /**
62 * \file
63 * bulk_extractor scanner plug_in architecture.
64 *
65 * Scanners are called with two parameters:
66 * A reference to a scanner_params (SP) object.
67 * A reference to a recursion_control_block (RCB) object.
68 *
69 * On startup, each scanner is called with a special SP and RCB.
70 * The scanners respond by setting fields in the SP and returning.
71 *
72 * When executing, once again each scanner is called with the SP and RCB.
73 * This is the only file that needs to be included for a scanner.
74 *
75 * \li \c phase_startup - scanners are loaded and register the names of the feature files they want.
76 * \li \c phase_scan - each scanner is called to analyze 1 or more sbufs.
77 * \li \c phase_shutdown - scanners are given a chance to shutdown
78 */
79
80 #ifndef __cplusplus
81 # error bulk_extractor_i.h requires C++
82 #endif
83
84 #include "sbuf.h"
85 #include "utf8.h"
86 #include "utils.h" // for gmtime_r
87
88 #include <vector>
89 #include <set>
90 #include <map>
91
92 #include "feature_recorder.h"
93 #include "feature_recorder_set.h"
94
95 /* Network includes */
96
97 /****************************************************************
98 *** pcap.h --- If we don't have it, fake it. ---
99 ***/
100 #ifdef HAVE_NETINET_IF_ETHER_H
101 # include <netinet/if_ether.h>
102 #endif
103 #ifdef HAVE_NETINET_IN_H
104 # include <netinet/in.h>
105 #endif
106 #ifdef HAVE_NET_ETHERNET_H
107 # include <net/ethernet.h> // for freebsd
108 #endif
109
110
111 #if defined(HAVE_LIBPCAP)
112 # ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
113 # pragma GCC diagnostic ignored "-Wredundant-decls"
114 # endif
115 # if defined(HAVE_PCAP_PCAP_H)
116 # include <pcap/pcap.h>
117 # define GOT_PCAP
118 # endif
119 # if defined(HAVE_PCAP_H) && !defined(GOT_PCAP)
120 # include <pcap.h>
121 # define GOT_PCAP
122 # endif
123 # if defined(HAVE_WPCAP_PCAP_H) && !defined(GOT_PCAP)
124 # include <wpcap/pcap.h>
125 # define GOT_PCAP
126 # endif
127 # ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
128 # pragma GCC diagnostic warning "-Wredundant-decls"
129 # endif
130 #else
131 # include "pcap_fake.h"
132 #endif
133
134 /**
135 * \class scanner_params
136 * The scanner params class is the primary way that the bulk_extractor framework
137 * communicates with the scanners.
138 * @param sbuf - the buffer to be scanned
139 * @param feature_names - if fs==0, add to feature_names the feature file types that this
140 * scanner records.. The names can have a /c appended to indicate
141 * that the feature files should have context enabled. Do not scan.
142 * @param fs - where the features should be saved. Must be provided if feature_names==0.
143 **/
144
145 /*****************************************************************
146 *** bulk_extractor has a private implementation of IPv4 and IPv6,
147 *** UDP and TCP.
148 ***
149 *** We did this becuase we found slightly different versions on
150 *** MacOS, Ubuntu Linux, Fedora Linux, Centos, Mingw, and Cygwin.
151 *** TCP/IP isn't changing anytime soon, and when it changes (as it
152 *** did with IPv6), these different systems all implemented it slightly
153 *** differently, and that caused a lot of problems for us.
154 *** So the BE13 API has a single implementation and it's good enough
155 *** for our uses.
156 ***/
157
158 namespace be13 {
159
160 #ifndef ETH_ALEN
161 # define ETH_ALEN 6 // ethernet address len
162 #endif
163
164 #ifndef IPPROTO_TCP
165 # define IPPROTO_TCP 6 /* tcp */
166 #endif
167
168 struct ether_addr {
169 uint8_t ether_addr_octet[ETH_ALEN];
170 } __attribute__ ((__packed__));
171
172 /* 10Mb/s ethernet header */
173 struct ether_header {
174 uint8_t ether_dhost[ETH_ALEN]; /* destination eth addr */
175 uint8_t ether_shost[ETH_ALEN]; /* source ether addr */
176 uint16_t ether_type; /* packet type ID field */
177 } __attribute__ ((__packed__));
178
179 /* The mess below is becuase these items are typedefs and
180 * structs on some systems and #defines on other systems
181 * So in the interest of portability we need to define *new*
182 * structures that are only used here
183 */
184
185 typedef uint32_t ip4_addr_t; // historical
186
187 // on windows we use the definition that's in winsock
188 struct ip4_addr {
189 ip4_addr_t addr;
190 };
191
192 /*
193 * Structure of an internet header, naked of options.
194 */
195 struct ip4 {
196 #if __BYTE_ORDER == __LITTLE_ENDIAN
197 uint8_t ip_hl:4; /* header length */
198 uint8_t ip_v:4; /* version */
199 #endif
200 #if __BYTE_ORDER == __BIG_ENDIAN
201 uint8_t ip_v:4; /* version */
202 uint8_t ip_hl:4; /* header length */
203 #endif
204 uint8_t ip_tos; /* type of service */
205 uint16_t ip_len; /* total length */
206 uint16_t ip_id; /* identification */
207 uint16_t ip_off; /* fragment offset field */
208 #define IP_RF 0x8000 /* reserved fragment flag */
209 #define IP_DF 0x4000 /* dont fragment flag */
210 #define IP_MF 0x2000 /* more fragments flag */
211 #define IP_OFFMASK 0x1fff /* mask for fragmenting bits */
212 uint8_t ip_ttl; /* time to live */
213 uint8_t ip_p; /* protocol */
214 uint16_t ip_sum; /* checksum */
215 struct ip4_addr ip_src, ip_dst; /* source and dest address */
216 } __attribute__ ((__packed__));
217
218 struct ip4_dgram {
219 const struct ip4 *header;
220 const uint8_t *payload;
221 uint16_t payload_len;
222 };
223
224 /*
225 * IPv6 header structure
226 */
227 struct ip6_addr { // our own private ipv6 definition
228 union {
229 uint8_t addr8[16]; // three ways to get the data
230 uint16_t addr16[8];
231 uint32_t addr32[4];
232 } addr; /* 128-bit IP6 address */
233 };
234 struct ip6_hdr {
235 union {
236 struct ip6_hdrctl {
237 uint32_t ip6_un1_flow; /* 20 bits of flow-ID */
238 uint16_t ip6_un1_plen; /* payload length */
239 uint8_t ip6_un1_nxt; /* next header */
240 uint8_t ip6_un1_hlim; /* hop limit */
241 } ip6_un1;
242 uint8_t ip6_un2_vfc; /* 4 bits version, top 4 bits class */
243 } ip6_ctlun;
244 struct ip6_addr ip6_src; /* source address */
245 struct ip6_addr ip6_dst; /* destination address */
246 } __attribute__((__packed__));
247
248 struct ip6_dgram {
249 const struct ip6_hdr *header;
250 const uint8_t *payload;
251 uint16_t payload_len;
252 };
253
254 /*
255 * TCP header.
256 * Per RFC 793, September, 1981.
257 */
258 typedef uint32_t tcp_seq;
259 struct tcphdr {
260 uint16_t th_sport; /* source port */
261 uint16_t th_dport; /* destination port */
262 tcp_seq th_seq; /* sequence number */
263 tcp_seq th_ack; /* acknowledgement number */
264 # if __BYTE_ORDER == __LITTLE_ENDIAN
265 uint8_t th_x2:4; /* (unused) */
266 uint8_t th_off:4; /* data offset */
267 # endif
268 # if __BYTE_ORDER == __BIG_ENDIAN
269 uint8_t th_off:4; /* data offset */
270 uint8_t th_x2:4; /* (unused) */
271 # endif
272 uint8_t th_flags;
273 # define TH_FIN 0x01
274 # define TH_SYN 0x02
275 # define TH_RST 0x04
276 # define TH_PUSH 0x08
277 # define TH_ACK 0x10
278 # define TH_URG 0x20
279 uint16_t th_win; /* window */
280 uint16_t th_sum; /* checksum */
281 uint16_t th_urp; /* urgent pointer */
282 };
283 /*
284 * The packet_info structure records packets after they are read from the pcap library.
285 * It preserves the original pcap information and information decoded from the MAC and
286 * VLAN (IEEE 802.1Q) layers, as well as information that might be present from 802.11
287 * interfaces. However it does not preserve the full radiotap information.
288 *
289 * packet_info is created to make it easier to write network forensic software. It encapsulates
290 * much of the common knowledge needed to operate on packet-based IP networks.
291 *
292 * @param ts - the actual packet time to use (adjusted)
293 * @param pcap_data - Original data offset point from pcap
294 * @param data - the actual packet data, minus the MAC layer
295 * @param datalen - How much data is available at the datalen pointer
296 *
297 */
298 class packet_info {
299 public:
300 // IPv4 header offsets
301 static const size_t ip4_proto_off = 9;
302 static const size_t ip4_src_off = 12;
303 static const size_t ip4_dst_off = 16;
304 // IPv6 header offsets
305 static const size_t ip6_nxt_hdr_off = 6;
306 static const size_t ip6_plen_off = 4;
307 static const size_t ip6_src_off = 8;
308 static const size_t ip6_dst_off = 24;
309 // TCP header offsets
310 static const size_t tcp_sport_off = 0;
311 static const size_t tcp_dport_off = 2;
312
313 class frame_too_short : public std::logic_error {
314 public:
frame_too_short()315 frame_too_short() :
316 std::logic_error("frame too short to contain requisite network structures") {}
317 };
318
319 enum vlan_t {NO_VLAN=-1};
320 /** create a packet, usually an IP packet.
321 * @param d - start of MAC packet
322 * @param d2 - start of IP data
323 */
packet_info(const int dlt,const struct pcap_pkthdr * h,const u_char * d,const struct timeval & ts_,const uint8_t * d2,size_t dl2)324 packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d,
325 const struct timeval &ts_,const uint8_t *d2,size_t dl2):
326 pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(ts_),ip_data(d2),ip_datalen(dl2){}
packet_info(const int dlt,const struct pcap_pkthdr * h,const u_char * d)327 packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d):
328 pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(h->ts),ip_data(d),ip_datalen(h->caplen){}
329
330 const int pcap_dlt; // data link type; needed by libpcap, not provided
331 const struct pcap_pkthdr *pcap_hdr; // provided by libpcap
332 const u_char *pcap_data; // provided by libpcap; where the MAC layer begins
333 const struct timeval &ts; // when packet received; possibly modified before packet_info created
334 const uint8_t *const ip_data; // pointer to where ip data begins
335 const size_t ip_datalen; // length of ip data
336
337 static u_short nshort(const u_char *buf,size_t pos); // return a network byte order short at offset pos
338 int ip_version() const; // returns 4, 6 or 0
339 u_short ether_type() const; // returns 0 if not IEEE802, otherwise returns ether_type
340 int vlan() const; // returns NO_VLAN if not IEEE802 or not VLAN, othererwise VID
341 const uint8_t *get_ether_dhost() const; // returns a pointer to ether dhost if ether packet
342 const uint8_t *get_ether_shost() const; // returns a pointer to ether shost if ether packet
343
344 // packet typing
345 bool is_ip4() const;
346 bool is_ip6() const;
347 bool is_ip4_tcp() const;
348 bool is_ip6_tcp() const;
349 // packet extraction
350 // IPv4 - return pointers to fields or throws frame_too_short exception
351 const struct in_addr *get_ip4_src() const;
352 const struct in_addr *get_ip4_dst() const;
353 uint8_t get_ip4_proto() const;
354 // IPv6
355 uint8_t get_ip6_nxt_hdr() const;
356 uint16_t get_ip6_plen() const;
357 const struct ip6_addr *get_ip6_src() const;
358 const struct ip6_addr *get_ip6_dst() const;
359 // TCP
360 uint16_t get_ip4_tcp_sport() const;
361 uint16_t get_ip4_tcp_dport() const;
362 uint16_t get_ip6_tcp_sport() const;
363 uint16_t get_ip6_tcp_dport() const;
364 };
365
366 #ifdef DLT_IEEE802
ether_type()367 inline u_short packet_info::ether_type() const
368 {
369 if(pcap_dlt==DLT_IEEE802 || pcap_dlt==DLT_EN10MB){
370 const struct ether_header *eth_header = (struct ether_header *) pcap_data;
371 return ntohs(eth_header->ether_type);
372 }
373 return 0;
374 }
375 #endif
376
377 #ifndef ETHERTYPE_PUP
378 #define ETHERTYPE_PUP 0x0200 /* Xerox PUP */
379 #endif
380
381 #ifndef ETHERTYPE_SPRITE
382 #define ETHERTYPE_SPRITE 0x0500 /* Sprite */
383 #endif
384
385 #ifndef ETHERTYPE_IP
386 #define ETHERTYPE_IP 0x0800 /* IP */
387 #endif
388
389 #ifndef ETHERTYPE_ARP
390 #define ETHERTYPE_ARP 0x0806 /* Address resolution */
391 #endif
392
393 #ifndef ETHERTYPE_REVARP
394 #define ETHERTYPE_REVARP 0x8035 /* Reverse ARP */
395 #endif
396
397 #ifndef ETHERTYPE_AT
398 #define ETHERTYPE_AT 0x809B /* AppleTalk protocol */
399 #endif
400
401 #ifndef ETHERTYPE_AARP
402 #define ETHERTYPE_AARP 0x80F3 /* AppleTalk ARP */
403 #endif
404
405 #ifndef ETHERTYPE_VLAN
406 #define ETHERTYPE_VLAN 0x8100 /* IEEE 802.1Q VLAN tagging */
407 #endif
408
409 #ifndef ETHERTYPE_IPX
410 #define ETHERTYPE_IPX 0x8137 /* IPX */
411 #endif
412
413 #ifndef ETHERTYPE_IPV6
414 #define ETHERTYPE_IPV6 0x86dd /* IP protocol version 6 */
415 #endif
416
417 #ifndef ETHERTYPE_LOOPBACK
418 #define ETHERTYPE_LOOPBACK 0x9000 /* used to test interfaces */
419 #endif
420
421
nshort(const u_char * buf,size_t pos)422 inline u_short packet_info::nshort(const u_char *buf,size_t pos)
423 {
424 return (buf[pos]<<8) | (buf[pos+1]);
425 }
426
vlan()427 inline int packet_info::vlan() const
428 {
429 if(ether_type()==ETHERTYPE_VLAN){
430 return nshort(pcap_data,sizeof(struct ether_header));
431 }
432 return -1;
433 }
434
ip_version()435 inline int packet_info::ip_version() const
436 {
437 /* This takes advantage of the fact that ip4 and ip6 put the version number in the same place */
438 if (ip_datalen >= sizeof(struct ip4)) {
439 const struct ip4 *ip_header = (struct ip4 *) ip_data;
440 switch(ip_header->ip_v){
441 case 4: return 4;
442 case 6: return 6;
443 }
444 }
445 return 0;
446 }
447
448 // packet typing
449
is_ip4()450 inline bool packet_info::is_ip4() const
451 {
452 return ip_version() == 4;
453 }
454
is_ip6()455 inline bool packet_info::is_ip6() const
456 {
457 return ip_version() == 6;
458 }
459
is_ip4_tcp()460 inline bool packet_info::is_ip4_tcp() const
461 {
462 if(ip_datalen < sizeof(struct ip4) + sizeof(struct tcphdr)) {
463 return false;
464 }
465 return *((uint8_t*) (ip_data + ip4_proto_off)) == IPPROTO_TCP;
466 return false;
467 }
468
is_ip6_tcp()469 inline bool packet_info::is_ip6_tcp() const
470 {
471 if(ip_datalen < sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) {
472 return false;
473 }
474 return *((uint8_t*) (ip_data + ip6_nxt_hdr_off)) == IPPROTO_TCP;
475 }
476
477 // packet extraction
478 // precondition: the apropriate packet type function must return true before using these functions.
479 // example: is_ip4_tcp() must return true before calling get_ip4_tcp_sport()
480
481 // Get ether addresses; should this handle vlan and such?
get_ether_dhost()482 inline const uint8_t *packet_info::get_ether_dhost() const
483 {
484 if(pcap_hdr->caplen < sizeof(struct ether_addr)){
485 throw new frame_too_short();
486 }
487 return ((const struct ether_header *)pcap_data)->ether_dhost;
488 }
489
get_ether_shost()490 inline const uint8_t *packet_info::get_ether_shost() const
491 {
492 if(pcap_hdr->caplen < sizeof(struct ether_addr)){
493 throw new frame_too_short();
494 }
495 return ((const struct ether_header *)pcap_data)->ether_shost;
496 }
497
498 // IPv4
499 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
500 # pragma GCC diagnostic ignored "-Wcast-align"
501 # endif
get_ip4_src()502 inline const struct in_addr *packet_info::get_ip4_src() const
503 {
504 if(ip_datalen < sizeof(struct ip4)) {
505 throw new frame_too_short();
506 }
507 return (const struct in_addr *) ip_data + ip4_src_off;
508 }
get_ip4_dst()509 inline const struct in_addr *packet_info::get_ip4_dst() const
510 {
511 if(ip_datalen < sizeof(struct ip4)) {
512 throw new frame_too_short();
513 }
514 return (const struct in_addr *) ip_data + ip4_dst_off;
515 }
516 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
517 # pragma GCC diagnostic warning "-Wcast-align"
518 # endif
get_ip4_proto()519 inline uint8_t packet_info::get_ip4_proto() const
520 {
521 if(ip_datalen < sizeof(struct ip4)) {
522 throw new frame_too_short();
523 }
524 return *((uint8_t *) (ip_data + ip4_proto_off));
525 }
526 // IPv6
get_ip6_nxt_hdr()527 inline uint8_t packet_info::get_ip6_nxt_hdr() const
528 {
529 if(ip_datalen < sizeof(struct ip6_hdr)) {
530 throw new frame_too_short();
531 }
532 return *((uint8_t *) (ip_data + ip6_nxt_hdr_off));
533 }
get_ip6_plen()534 inline uint16_t packet_info::get_ip6_plen() const
535 {
536 if(ip_datalen < sizeof(struct ip6_hdr)) {
537 throw new frame_too_short();
538 }
539 //return ntohs(*((uint16_t *) (ip_data + ip6_plen_off)));
540 return nshort(ip_data,ip6_plen_off);
541 }
542 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
543 # pragma GCC diagnostic ignored "-Wcast-align"
544 # endif
get_ip6_src()545 inline const struct ip6_addr *packet_info::get_ip6_src() const
546 {
547 if(ip_datalen < sizeof(struct ip6_hdr)) {
548 throw new frame_too_short();
549 }
550 return (const struct ip6_addr *) ip_data + ip6_src_off;
551 }
get_ip6_dst()552 inline const struct ip6_addr *packet_info::get_ip6_dst() const
553 {
554 if(ip_datalen < sizeof(struct ip6_hdr)) {
555 throw new frame_too_short();
556 }
557 return (const struct ip6_addr *) ip_data + ip6_dst_off;
558 }
559 # ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
560 # pragma GCC diagnostic warning "-Wcast-align"
561 # endif
562
563 // TCP
get_ip4_tcp_sport()564 inline uint16_t packet_info::get_ip4_tcp_sport() const
565 {
566 if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
567 throw new frame_too_short();
568 }
569 //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_sport_off)));
570 return nshort(ip_data,sizeof(struct ip4) + tcp_sport_off);
571 }
get_ip4_tcp_dport()572 inline uint16_t packet_info::get_ip4_tcp_dport() const
573 {
574 if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
575 throw new frame_too_short();
576 }
577 //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_dport_off)));
578 return nshort(ip_data,sizeof(struct ip4) + tcp_dport_off); //
579
580 }
get_ip6_tcp_sport()581 inline uint16_t packet_info::get_ip6_tcp_sport() const
582 {
583 if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
584 throw new frame_too_short();
585 }
586 //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_sport_off)));
587 return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_sport_off); //
588 }
get_ip6_tcp_dport()589 inline uint16_t packet_info::get_ip6_tcp_dport() const
590 {
591 if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
592 throw new frame_too_short();
593 }
594 //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_dport_off)));
595 return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_dport_off); //
596 }
597 };
598
599
600 typedef void scanner_t(const class scanner_params &sp,const class recursion_control_block &rcb);
601 typedef void process_t(const class scanner_params &sp);
602 typedef void packet_callback_t(void *user,const be13::packet_info &pi);
603
604 /** scanner_info gets filled in by the scanner to tell the caller about the scanner.
605 *
606 */
607 class scanner_info {
608 private:
609 static std::stringstream helpstream; // where scanner info help messages are saved.
610
611 // default copy construction and assignment are meaningless
612 // and not implemented
613 scanner_info(const scanner_info &i);
614 scanner_info &operator=(const scanner_info &i);
615 public:
helpstr()616 static std::string helpstr(){return helpstream.str();}
617 typedef std::map<std::string,std::string> config_t; // configuration for scanner passed in
618
619 /* scanner flags */
620 static const int SCANNER_DISABLED = 0x001; // v1: enabled by default
621 static const int SCANNER_NO_USAGE = 0x002; // v1: do not show scanner in usage
622 static const int SCANNER_NO_ALL = 0x004; // v2: do not enable with -eall
623 static const int SCANNER_FIND_SCANNER = 0x008; // v2: this scanner uses the find_list
624 static const int SCANNER_RECURSE = 0x010; // v3: this scanner will recurse
625 static const int SCANNER_RECURSE_EXPAND = 0x020; // v3: recurses AND result is >= original size
626 static const int SCANNER_WANTS_NGRAMS = 0x040; // v3: Scanner gets buffers that are constant n-grams
627 static const int SCANNER_FAST_FIND = 0x080; // v3: This scanner is a very fast FIND scanner
628 static const int SCANNER_DEPTH_0 = 0x100; // v3: scanner only runs at depth 0 by default
629 static const int CURRENT_SI_VERSION = 4;
630
flag_to_string(const int flag)631 static const std::string flag_to_string(const int flag){
632 std::string ret;
633 if(flag==0) ret += "NONE ";
634 if(flag & SCANNER_DISABLED) ret += "SCANNER_DISABLED ";
635 if(flag & SCANNER_NO_USAGE) ret += "SCANNER_NO_USAGE ";
636 if(flag & SCANNER_NO_ALL) ret += "SCANNER_NO_ALL ";
637 if(flag & SCANNER_FIND_SCANNER) ret += "SCANNER_FIND_SCANNER ";
638 if(flag & SCANNER_RECURSE) ret += "SCANNER_RECURSE ";
639 if(flag & SCANNER_RECURSE_EXPAND) ret += "SCANNER_RECURSE_EXPAND ";
640 if(flag & SCANNER_WANTS_NGRAMS) ret += "SCANNER_WANTS_NGRAMS ";
641 return ret;
642 }
643
644 /* Global config is passed to each scanner as a pointer when it is loaded.
645 * Scanner histograms are added to 'histograms' by machinery.
646 */
647 struct scanner_config {
scanner_configscanner_config648 scanner_config():namevals(),debug(){};
~scanner_configscanner_config649 virtual ~scanner_config(){}
650 config_t namevals; // v3: (input) name=val map
651 int debug; // v3: (input) current debug level
652 };
653
654 // never change the order or delete old fields, or else you will
655 // break backwards compatability
scanner_info()656 scanner_info():si_version(CURRENT_SI_VERSION),
657 name(),author(),description(),url(),scanner_version(),flags(0),feature_names(),
658 histogram_defs(),packet_user(),packet_cb(),config(){}
659 /* PASSED FROM SCANNER to API: */
660 int si_version; // version number for this structure
661 std::string name; // v1: (output) scanner name
662 std::string author; // v1: (output) who wrote me?
663 std::string description; // v1: (output) what do I do?
664 std::string url; // v1: (output) where I come from
665 std::string scanner_version; // v1: (output) version for the scanner
666 uint64_t flags; // v1: (output) flags
667 std::set<std::string> feature_names; // v1: (output) features I need
668 histogram_defs_t histogram_defs; // v1: (output) histogram definition info
669 void *packet_user; // v2: (output) data for network callback
670 packet_callback_t *packet_cb; // v2: (output) callback for processing network packets, or NULL
671
672 /* PASSED FROM API TO SCANNER; access with functions below */
673 const scanner_config *config; // v3: (intput to scanner) config
674
675 // These methods are implemented in the plugin system for the scanner to get config information.
676 // The get_config methods should be called on the si object during PHASE_STARTUP
677 virtual void get_config(const scanner_info::config_t &c,
678 const std::string &name,std::string *val,const std::string &help);
679 virtual void get_config(const std::string &name,std::string *val,const std::string &help);
680 virtual void get_config(const std::string &name,uint64_t *val,const std::string &help);
681 virtual void get_config(const std::string &name,int32_t *val,const std::string &help);
682 virtual void get_config(const std::string &name,uint32_t *val,const std::string &help);
683 virtual void get_config(const std::string &name,uint16_t *val,const std::string &help);
684 virtual void get_config(const std::string &name,uint8_t *val,const std::string &help);
685 #ifdef __APPLE__
686 virtual void get_config(const std::string &name,size_t *val,const std::string &help);
687 #define HAVE_GET_CONFIG_SIZE_T
688 #endif
689 virtual void get_config(const std::string &name,bool *val,const std::string &help);
~scanner_info()690 virtual ~scanner_info(){};
691 };
692 #include <map>
693 /**
694 * The scanner_params class is a way for sending the scanner parameters
695 * for this particular sbuf to be scanned.
696 */
697
698 class scanner_params {
699 public:
700 enum print_mode_t {MODE_NONE=0,MODE_HEX,MODE_RAW,MODE_HTTP};
701 static const int CURRENT_SP_VERSION=3;
702
703 typedef std::map<std::string,std::string> PrintOptions;
getPrintMode(const PrintOptions & po)704 static print_mode_t getPrintMode(const PrintOptions &po){
705 PrintOptions::const_iterator p = po.find("print_mode_t");
706 if(p != po.end()){
707 if(p->second=="MODE_NONE") return MODE_NONE;
708 if(p->second=="MODE_HEX") return MODE_HEX;
709 if(p->second=="MODE_RAW") return MODE_RAW;
710 if(p->second=="MODE_HTTP") return MODE_HTTP;
711 }
712 return MODE_NONE;
713 }
setPrintMode(PrintOptions & po,int mode)714 static void setPrintMode(PrintOptions &po,int mode){
715 switch(mode){
716 default:
717 case MODE_NONE:po["print_mode_t"]="MODE_NONE";return;
718 case MODE_HEX:po["print_mode_t"]="MODE_HEX";return;
719 case MODE_RAW:po["print_mode_t"]="MODE_RAW";return;
720 case MODE_HTTP:po["print_mode_t"]="MODE_HTTP";return;
721 }
722 }
723
724 // phase_t specifies when the scanner is being called
725 typedef enum {
726 PHASE_NONE = -1,
727 PHASE_STARTUP = 0, // called in main thread when scanner loads; called on EVERY scanner (called for help)
728 PHASE_INIT = 3, // called in main thread for every ENABLED scanner after all scanners loaded
729 PHASE_THREAD_BEFORE_SCAN = 4, // called in worker thread for every ENABLED scanner before first scan
730 PHASE_SCAN = 1, // called in worker thread for every ENABLED scanner to scan an sbuf
731 PHASE_SHUTDOWN = 2, // called in main thread for every ENABLED scanner when scanner is shutdown
732 } phase_t ;
733 static PrintOptions no_options; // in common.cpp
734
735 /********************
736 *** CONSTRUCTORS ***
737 ********************/
738
739 /* A scanner params with all of the instance variables, typically for scanning */
scanner_params(phase_t phase_,const sbuf_t & sbuf_,class feature_recorder_set & fs_,PrintOptions & print_options_)740 scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,
741 PrintOptions &print_options_):
742 sp_version(CURRENT_SP_VERSION),
743 phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(print_options_),info(0),sxml(0){
744 }
745
746 /* A scanner params with no print options */
scanner_params(phase_t phase_,const sbuf_t & sbuf_,class feature_recorder_set & fs_)747 scanner_params(phase_t phase_,const sbuf_t &sbuf_, class feature_recorder_set &fs_):
748 sp_version(CURRENT_SP_VERSION),
749 phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(0){
750 }
751
752 /* A scanner params with no print options but an xmlstream */
scanner_params(phase_t phase_,const sbuf_t & sbuf_,class feature_recorder_set & fs_,std::stringstream * xmladd)753 scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,std::stringstream *xmladd):
754 sp_version(CURRENT_SP_VERSION),
755 phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(xmladd){
756 }
757
758 /** Construct a scanner_params for recursion from an existing sp and a new sbuf.
759 * Defaults to phase1
760 */
scanner_params(const scanner_params & sp_existing,const sbuf_t & sbuf_new)761 scanner_params(const scanner_params &sp_existing,const sbuf_t &sbuf_new):
762 sp_version(CURRENT_SP_VERSION),phase(sp_existing.phase),
763 sbuf(sbuf_new),fs(sp_existing.fs),depth(sp_existing.depth+1),
764 print_options(sp_existing.print_options),info(sp_existing.info),sxml(0){
765 assert(sp_existing.sp_version==CURRENT_SP_VERSION);
766 };
767
768 /**
769 * A scanner params with an empty info
770 */
771
772 /**************************
773 *** INSTANCE VARIABLES ***
774 **************************/
775
776 const int sp_version; /* version number of this structure */
777 const phase_t phase; /* v1: 0=startup, 1=normal, 2=shutdown (changed to phase_t in v1.3) */
778 const sbuf_t &sbuf; /* v1: what to scan / only valid in SCAN_PHASE */
779 class feature_recorder_set &fs; /* v1: where to put the results / only valid in SCAN_PHASE */
780 const uint32_t depth; /* v1: how far down are we? / only valid in SCAN_PHASE */
781
782 PrintOptions &print_options; /* v1: how to print / NOT USED IN SCANNERS */
783 scanner_info *info; /* v2: set/get parameters on startup, hasher */
784 std::stringstream *sxml; /* v3: on scanning and shutdown: CDATA added to XML stream (advanced feature) */
785 };
786
787
788 inline std::ostream & operator <<(std::ostream &os,const class scanner_params &sp){
789 os << "scanner_params(" << sp.sbuf << ")";
790 return os;
791 };
792
793 class recursion_control_block {
794 public:
795 /**
796 * @param callback_ - the function to call back
797 * @param partName_ - the part of the forensic path processed by this scanner.
798 */
recursion_control_block(process_t * callback_,std::string partName_)799 recursion_control_block(process_t *callback_,std::string partName_):
800 callback(callback_),partName(partName_){}
801 process_t *callback;
802 std::string partName; /* eg "ZIP", "GZIP" */
803 };
804
805 /* plugin.cpp. This will become a class... */
806 class scanner_def {
807 public:;
808 static uint32_t max_depth; // maximum depth to scan for the scanners
809 static uint32_t max_ngram; // maximum ngram size to change
scanner_def()810 scanner_def():scanner(0),enabled(false),info(),pathPrefix(){};
811 scanner_t *scanner; // pointer to the primary entry point
812 bool enabled; // is enabled?
813 scanner_info info; // info block sent to and returned by scanner
814 std::string pathPrefix; /* path prefix for recursive scanners */
815 };
816
817 namespace be13 {
818 /* plugin.cpp */
819
820 struct plugin {
821 typedef std::vector<scanner_def *> scanner_vector;
822 static scanner_vector current_scanners; // current scanners
823 static bool dup_data_alerts; // notify when duplicate data is not processed
824 static uint64_t dup_data_encountered; // amount of dup data encountered
825
826 static void set_scanner_debug(int debug);
827
828 static void load_scanner(scanner_t scanner,const scanner_info::scanner_config &sc); // load a specific scanner
829 static void load_scanner_file(std::string fn,const scanner_info::scanner_config &sc); // load a scanner from a file
830 static void load_scanners(scanner_t * const *scanners_builtin,const scanner_info::scanner_config &sc); // load the scan_ plugins
831 static void load_scanner_directory(const std::string &dirname,const scanner_info::scanner_config &sc); // load scanners in the directory
832 static void load_scanner_directories(const std::vector<std::string> &dirnames,const scanner_info::scanner_config &sc);
833 static void load_scanner_packet_handlers();
834
835 // send every enabled scanner the phase message
836 static void message_enabled_scanners(scanner_params::phase_t phase,feature_recorder_set &fs);
837
838 // returns the named scanner, or 0 if no scanner of that name
839 static scanner_t *find_scanner(const std::string &name);
840 static void get_enabled_scanners(std::vector<std::string> &svector); // put the enabled scanners into the vector
841 static void add_enabled_scanner_histograms_to_feature_recorder_set(feature_recorder_set &fs);
842 static bool find_scanner_enabled(); // return true if a find scanner is enabled
843
844 // print info about the scanners:
845 static void scanners_disable_all(); // saves a command to disable all
846 static void scanners_enable_all(); // enable all of them
847 static void set_scanner_enabled(const std::string &name,bool enable);
848 static void set_scanner_enabled_all(bool enable);
849 static void scanners_enable(const std::string &name); // saves a command to enable this scanner
850 static void scanners_disable(const std::string &name); // saves a command to disable this scanner
851 static void scanners_process_enable_disable_commands(); // process the enable/disable and config commands
852 static void scanners_init(feature_recorder_set &fs); // init the scanners
853
854 static void info_scanners(bool detailed_info,
855 bool detailed_settings,
856 scanner_t * const *scanners_builtin,const char enable_opt,const char disable_opt);
857
858
859 /* Run the phases on the scanners */
860 static void phase_shutdown(feature_recorder_set &fs,std::stringstream *sxml=0); // sxml is where to put XML from scanners that shutdown
861 static uint32_t get_max_depth_seen();
862 static void process_sbuf(const class scanner_params &sp); /* process for feature extraction */
863 static void process_packet(const be13::packet_info &pi);
864
865 /* recorders */
866 static void get_scanner_feature_file_names(feature_file_names_t &feature_file_names);
867
868 };
869 };
870
itos(int i)871 inline std::string itos(int i){ std::stringstream ss; ss << i;return ss.str();}
dtos(double d)872 inline std::string dtos(double d){ std::stringstream ss; ss << d;return ss.str();}
utos(unsigned int i)873 inline std::string utos(unsigned int i){ std::stringstream ss; ss << i;return ss.str();}
utos(uint64_t i)874 inline std::string utos(uint64_t i){ std::stringstream ss; ss << i;return ss.str();}
utos(uint16_t i)875 inline std::string utos(uint16_t i){ std::stringstream ss; ss << i;return ss.str();}
safe_utf16to8(std::wstring s)876 inline std::string safe_utf16to8(std::wstring s){ // needs to be cleaned up
877 std::string utf8_line;
878 try {
879 utf8::utf16to8(s.begin(),s.end(),back_inserter(utf8_line));
880 } catch(utf8::invalid_utf16){
881 /* Exception thrown: bad UTF16 encoding */
882 utf8_line = "";
883 }
884 return utf8_line;
885 }
886
safe_utf8to16(std::string s)887 inline std::wstring safe_utf8to16(std::string s){ // needs to be cleaned up
888 std::wstring utf16_line;
889 try {
890 utf8::utf8to16(s.begin(),s.end(),back_inserter(utf16_line));
891 } catch(utf8::invalid_utf8){
892 /* Exception thrown: bad UTF16 encoding */
893 utf16_line = L"";
894 }
895 return utf16_line;
896 }
897
898 // truncate string at the matching char
truncate_at(std::string & line,char ch)899 inline void truncate_at(std::string &line, char ch) {
900 size_t pos = line.find(ch);
901 if(pos != std::string::npos) line.resize(pos);
902 }
903
904 #ifndef HAVE_ISXDIGIT
isxdigit(int c)905 inline int isxdigit(int c)
906 {
907 return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F');
908 }
909 #endif
910
911 /* Useful functions for scanners */
912 #define ONE_HUNDRED_NANO_SEC_TO_SECONDS 10000000
913 #define SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH 11644473600LL
914 /*
915 * 11644473600 is the number of seconds between the Win32 epoch
916 * and the Unix epoch.
917 *
918 * http://arstechnica.com/civis/viewtopic.php?f=20&t=111992
919 * gmtime_r() is Linux-specific. You'll find a copy in util.cpp for Windows.
920 */
921
microsoftDateToISODate(const uint64_t & time)922 inline std::string microsoftDateToISODate(const uint64_t &time)
923 {
924 time_t tmp = (time / ONE_HUNDRED_NANO_SEC_TO_SECONDS) - SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH;
925
926 struct tm time_tm;
927 gmtime_r(&tmp, &time_tm);
928 char buf[256];
929 strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
930 return std::string(buf);
931 }
932
933 /* Convert Unix timestamp to ISO format */
unixTimeToISODate(const uint64_t & t)934 inline std::string unixTimeToISODate(const uint64_t &t)
935 {
936 struct tm time_tm;
937 time_t tmp=t;
938 gmtime_r(&tmp, &time_tm);
939 char buf[256];
940 strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
941 return std::string(buf);
942 }
943
944 /* Many internal windows and Linux structures require a valid printable name in ASCII */
validASCIIName(const std::string & name)945 inline bool validASCIIName(const std::string &name)
946 {
947 for(size_t i = 0; i< name.size(); i++){
948 if(((u_char)name[i]) & 0x80) return false; // high bit should not be set
949 if(((u_char)name[i]) < ' ') return false; // should not be control character
950 if(((u_char)name[i]) == 0x7f) return false; // DEL is not printable
951 }
952 return true;
953 }
954
955 #endif
956