1 /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2 
3 /*
4  * By design, this file can be read without reading config.h
5  * #include "config.h" must appear as the first line of your .cpp file.
6  */
7 
8 #ifndef PACKAGE_NAME
9 #error bulk_extractor_i.h included before config.h
10 #endif
11 
12 #ifndef BULK_EXTRACTOR_I_H
13 #define BULK_EXTRACTOR_I_H
14 
15 #define DEBUG_PEDANTIC    0x0001        // check values more rigorously
16 #define DEBUG_PRINT_STEPS 0x0002        // prints as each scanner is started
17 #define DEBUG_SCANNER     0x0004        // dump all feature writes to stderr
18 #define DEBUG_NO_SCANNERS 0x0008        // do not run the scanners
19 #define DEBUG_DUMP_DATA   0x0010        // dump data as it is seen
20 #define DEBUG_DECODING    0x0020        // debug decoders in scanner
21 #define DEBUG_INFO        0x0040        // print extra info
22 #define DEBUG_EXIT_EARLY  1000          // just print the size of the volume and exis
23 #define DEBUG_ALLOCATE_512MiB 1002      // Allocate 512MiB, but don't set any flags
24 
25 /* We need netinet/in.h or windowsx.h */
26 #ifdef HAVE_NETINET_IN_H
27 # include <netinet/in.h>
28 #endif
29 
30 #include <assert.h>
31 
32 #if defined(MINGW) || defined(__MINGW__) || defined(__MINGW32__) || defined(__MINGW64__)
33 #ifndef WIN32
34 #define WIN32
35 #endif
36 #endif
37 
38 #if defined(WIN32) || defined(__MINGW32__)
39 #  include <winsock2.h>
40 #  include <windows.h>
41 #  include <windowsx.h>
42 #endif
43 
44 /* If byte_order hasn't been defined, assume its intel */
45 
46 #if defined(WIN32) || !defined(__BYTE_ORDER)
47 #  define __LITTLE_ENDIAN 1234
48 #  define __BIG_ENDIAN    4321
49 #  define __BYTE_ORDER __LITTLE_ENDIAN
50 #endif
51 
52 #if (__BYTE_ORDER == __LITTLE_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)
53 #  error Invalid __BYTE_ORDER
54 #endif
55 
56 /**
57  * \addtogroup plugin_module
58  * @{
59  */
60 
61 /**
62  * \file
63  * bulk_extractor scanner plug_in architecture.
64  *
65  * Scanners are called with two parameters:
66  * A reference to a scanner_params (SP) object.
67  * A reference to a recursion_control_block (RCB) object.
68  *
69  * On startup, each scanner is called with a special SP and RCB.
70  * The scanners respond by setting fields in the SP and returning.
71  *
72  * When executing, once again each scanner is called with the SP and RCB.
73  * This is the only file that needs to be included for a scanner.
74  *
75  * \li \c phase_startup - scanners are loaded and register the names of the feature files they want.
76  * \li \c phase_scan - each scanner is called to analyze 1 or more sbufs.
77  * \li \c phase_shutdown - scanners are given a chance to shutdown
78  */
79 
80 #ifndef __cplusplus
81 # error bulk_extractor_i.h requires C++
82 #endif
83 
84 #include "sbuf.h"
85 #include "utf8.h"
86 #include "utils.h"                      // for gmtime_r
87 
88 #include <vector>
89 #include <set>
90 #include <map>
91 
92 #include "feature_recorder.h"
93 #include "feature_recorder_set.h"
94 
95 /* Network includes */
96 
97 /****************************************************************
98  *** pcap.h --- If we don't have it, fake it. ---
99  ***/
100 #ifdef HAVE_NETINET_IF_ETHER_H
101 # include <netinet/if_ether.h>
102 #endif
103 #ifdef HAVE_NETINET_IN_H
104 # include <netinet/in.h>
105 #endif
106 #ifdef HAVE_NET_ETHERNET_H
107 # include <net/ethernet.h>              // for freebsd
108 #endif
109 
110 
111 #if defined(HAVE_LIBPCAP)
112 #  ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
113 #    pragma GCC diagnostic ignored "-Wredundant-decls"
114 #  endif
115 #  if defined(HAVE_PCAP_PCAP_H)
116 #    include <pcap/pcap.h>
117 #    define GOT_PCAP
118 #  endif
119 #  if defined(HAVE_PCAP_H) && !defined(GOT_PCAP)
120 #    include <pcap.h>
121 #    define GOT_PCAP
122 #  endif
123 #  if defined(HAVE_WPCAP_PCAP_H) && !defined(GOT_PCAP)
124 #    include <wpcap/pcap.h>
125 #    define GOT_PCAP
126 #  endif
127 #  ifdef HAVE_DIAGNOSTIC_REDUNDANT_DECLS
128 #    pragma GCC diagnostic warning "-Wredundant-decls"
129 #  endif
130 #else
131 #  include "pcap_fake.h"
132 #endif
133 
134 /**
135  * \class scanner_params
136  * The scanner params class is the primary way that the bulk_extractor framework
137  * communicates with the scanners.
138  * @param sbuf - the buffer to be scanned
139  * @param feature_names - if fs==0, add to feature_names the feature file types that this
140  *                        scanner records.. The names can have a /c appended to indicate
141  *                        that the feature files should have context enabled. Do not scan.
142  * @param fs   - where the features should be saved. Must be provided if feature_names==0.
143  **/
144 
145 /*****************************************************************
146  *** bulk_extractor has a private implementation of IPv4 and IPv6,
147  *** UDP and TCP.
148  ***
149  *** We did this becuase we found slightly different versions on
150  *** MacOS, Ubuntu Linux, Fedora Linux, Centos, Mingw, and Cygwin.
151  *** TCP/IP isn't changing anytime soon, and when it changes (as it
152  *** did with IPv6), these different systems all implemented it slightly
153  *** differently, and that caused a lot of problems for us.
154  *** So the BE13 API has a single implementation and it's good enough
155  *** for our uses.
156  ***/
157 
158 namespace be13 {
159 
160 #ifndef ETH_ALEN
161 #  define ETH_ALEN 6                    // ethernet address len
162 #endif
163 
164 #ifndef IPPROTO_TCP
165 #  define IPPROTO_TCP     6               /* tcp */
166 #endif
167 
168     struct ether_addr {
169         uint8_t ether_addr_octet[ETH_ALEN];
170     } __attribute__ ((__packed__));
171 
172     /* 10Mb/s ethernet header */
173     struct ether_header {
174         uint8_t  ether_dhost[ETH_ALEN]; /* destination eth addr */
175         uint8_t  ether_shost[ETH_ALEN]; /* source ether addr    */
176         uint16_t ether_type;            /* packet type ID field */
177     } __attribute__ ((__packed__));
178 
179     /* The mess below is becuase these items are typedefs and
180      * structs on some systems and #defines on other systems
181      * So in the interest of portability we need to define *new*
182      * structures that are only used here
183      */
184 
185     typedef uint32_t ip4_addr_t;         // historical
186 
187     // on windows we use the definition that's in winsock
188     struct ip4_addr {
189         ip4_addr_t addr;
190     };
191 
192     /*
193      * Structure of an internet header, naked of options.
194      */
195     struct ip4 {
196 #if __BYTE_ORDER == __LITTLE_ENDIAN
197         uint8_t ip_hl:4;                /* header length */
198         uint8_t ip_v:4;                 /* version */
199 #endif
200 #if __BYTE_ORDER == __BIG_ENDIAN
201         uint8_t ip_v:4;                 /* version */
202         uint8_t ip_hl:4;                /* header length */
203 #endif
204         uint8_t  ip_tos;                /* type of service */
205         uint16_t ip_len;                /* total length */
206         uint16_t ip_id;                 /* identification */
207         uint16_t ip_off;                /* fragment offset field */
208 #define IP_RF 0x8000                    /* reserved fragment flag */
209 #define IP_DF 0x4000                    /* dont fragment flag */
210 #define IP_MF 0x2000                    /* more fragments flag */
211 #define IP_OFFMASK 0x1fff               /* mask for fragmenting bits */
212         uint8_t ip_ttl;                 /* time to live */
213         uint8_t ip_p;                   /* protocol */
214         uint16_t ip_sum;                        /* checksum */
215         struct ip4_addr ip_src, ip_dst; /* source and dest address */
216     } __attribute__ ((__packed__));
217 
218     struct ip4_dgram {
219         const struct ip4 *header;
220         const uint8_t *payload;
221         uint16_t payload_len;
222     };
223 
224     /*
225      * IPv6 header structure
226      */
227     struct ip6_addr {           // our own private ipv6 definition
228         union {
229             uint8_t   addr8[16];        // three ways to get the data
230             uint16_t  addr16[8];
231             uint32_t  addr32[4];
232         } addr;                    /* 128-bit IP6 address */
233     };
234     struct ip6_hdr {
235         union {
236             struct ip6_hdrctl {
237                 uint32_t ip6_un1_flow;  /* 20 bits of flow-ID */
238                 uint16_t ip6_un1_plen;  /* payload length */
239                 uint8_t  ip6_un1_nxt;   /* next header */
240                 uint8_t  ip6_un1_hlim;  /* hop limit */
241             } ip6_un1;
242             uint8_t ip6_un2_vfc;        /* 4 bits version, top 4 bits class */
243         } ip6_ctlun;
244         struct ip6_addr ip6_src;        /* source address */
245         struct ip6_addr ip6_dst;        /* destination address */
246     } __attribute__((__packed__));
247 
248     struct ip6_dgram {
249         const struct ip6_hdr *header;
250         const uint8_t *payload;
251         uint16_t payload_len;
252     };
253 
254     /*
255      * TCP header.
256      * Per RFC 793, September, 1981.
257      */
258     typedef     uint32_t tcp_seq;
259     struct tcphdr {
260         uint16_t th_sport;              /* source port */
261         uint16_t th_dport;              /* destination port */
262         tcp_seq th_seq;         /* sequence number */
263         tcp_seq th_ack;         /* acknowledgement number */
264 #  if __BYTE_ORDER == __LITTLE_ENDIAN
265         uint8_t th_x2:4;                /* (unused) */
266         uint8_t th_off:4;               /* data offset */
267 #  endif
268 #  if __BYTE_ORDER == __BIG_ENDIAN
269         uint8_t th_off:4;               /* data offset */
270         uint8_t th_x2:4;                /* (unused) */
271 #  endif
272         uint8_t th_flags;
273 #  define TH_FIN        0x01
274 #  define TH_SYN        0x02
275 #  define TH_RST        0x04
276 #  define TH_PUSH       0x08
277 #  define TH_ACK        0x10
278 #  define TH_URG        0x20
279     uint16_t th_win;            /* window */
280     uint16_t th_sum;            /* checksum */
281     uint16_t th_urp;            /* urgent pointer */
282 };
283 /*
284  * The packet_info structure records packets after they are read from the pcap library.
285  * It preserves the original pcap information and information decoded from the MAC and
286  * VLAN (IEEE 802.1Q) layers, as well as information that might be present from 802.11
287  * interfaces. However it does not preserve the full radiotap information.
288  *
289  * packet_info is created to make it easier to write network forensic software. It encapsulates
290  * much of the common knowledge needed to operate on packet-based IP networks.
291  *
292  * @param ts   - the actual packet time to use (adjusted)
293  * @param pcap_data - Original data offset point from pcap
294  * @param data - the actual packet data, minus the MAC layer
295  * @param datalen - How much data is available at the datalen pointer
296  *
297  */
298 class packet_info {
299 public:
300     // IPv4 header offsets
301     static const size_t ip4_proto_off = 9;
302     static const size_t ip4_src_off = 12;
303     static const size_t ip4_dst_off = 16;
304     // IPv6 header offsets
305     static const size_t ip6_nxt_hdr_off = 6;
306     static const size_t ip6_plen_off = 4;
307     static const size_t ip6_src_off = 8;
308     static const size_t ip6_dst_off = 24;
309     // TCP header offsets
310     static const size_t tcp_sport_off = 0;
311     static const size_t tcp_dport_off = 2;
312 
313     class frame_too_short : public std::logic_error {
314     public:
frame_too_short()315         frame_too_short() :
316             std::logic_error("frame too short to contain requisite network structures") {}
317     };
318 
319     enum vlan_t {NO_VLAN=-1};
320     /** create a packet, usually an IP packet.
321      * @param d - start of MAC packet
322      * @param d2 - start of IP data
323      */
packet_info(const int dlt,const struct pcap_pkthdr * h,const u_char * d,const struct timeval & ts_,const uint8_t * d2,size_t dl2)324     packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d,
325                 const struct timeval &ts_,const uint8_t *d2,size_t dl2):
326         pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(ts_),ip_data(d2),ip_datalen(dl2){}
packet_info(const int dlt,const struct pcap_pkthdr * h,const u_char * d)327     packet_info(const int dlt,const struct pcap_pkthdr *h,const u_char *d):
328         pcap_dlt(dlt),pcap_hdr(h),pcap_data(d),ts(h->ts),ip_data(d),ip_datalen(h->caplen){}
329 
330     const int    pcap_dlt;              // data link type; needed by libpcap, not provided
331     const struct pcap_pkthdr *pcap_hdr; // provided by libpcap
332     const u_char *pcap_data;            // provided by libpcap; where the MAC layer begins
333     const struct timeval &ts;           // when packet received; possibly modified before packet_info created
334     const uint8_t *const ip_data;       // pointer to where ip data begins
335     const size_t ip_datalen;            // length of ip data
336 
337     static u_short nshort(const u_char *buf,size_t pos);   // return a network byte order short at offset pos
338     int     ip_version() const;         // returns 4, 6 or 0
339     u_short ether_type() const;         // returns 0 if not IEEE802, otherwise returns ether_type
340     int     vlan() const;               // returns NO_VLAN if not IEEE802 or not VLAN, othererwise VID
341     const uint8_t *get_ether_dhost() const;   // returns a pointer to ether dhost if ether packet
342     const uint8_t *get_ether_shost() const;   // returns a pointer to ether shost if ether packet
343 
344     // packet typing
345     bool    is_ip4() const;
346     bool    is_ip6() const;
347     bool    is_ip4_tcp() const;
348     bool    is_ip6_tcp() const;
349     // packet extraction
350     // IPv4 - return pointers to fields or throws frame_too_short exception
351     const struct in_addr *get_ip4_src() const;
352     const struct in_addr *get_ip4_dst() const;
353     uint8_t get_ip4_proto() const;
354     // IPv6
355     uint8_t  get_ip6_nxt_hdr() const;
356     uint16_t get_ip6_plen() const;
357     const struct ip6_addr *get_ip6_src() const;
358     const struct ip6_addr *get_ip6_dst() const;
359     // TCP
360     uint16_t get_ip4_tcp_sport() const;
361     uint16_t get_ip4_tcp_dport() const;
362     uint16_t get_ip6_tcp_sport() const;
363     uint16_t get_ip6_tcp_dport() const;
364 };
365 
366 #ifdef DLT_IEEE802
ether_type()367     inline u_short packet_info::ether_type() const
368     {
369         if(pcap_dlt==DLT_IEEE802 || pcap_dlt==DLT_EN10MB){
370             const struct ether_header *eth_header = (struct ether_header *) pcap_data;
371             return ntohs(eth_header->ether_type);
372         }
373         return 0;
374     }
375 #endif
376 
377 #ifndef ETHERTYPE_PUP
378 #define ETHERTYPE_PUP           0x0200          /* Xerox PUP */
379 #endif
380 
381 #ifndef ETHERTYPE_SPRITE
382 #define ETHERTYPE_SPRITE        0x0500          /* Sprite */
383 #endif
384 
385 #ifndef ETHERTYPE_IP
386 #define ETHERTYPE_IP            0x0800          /* IP */
387 #endif
388 
389 #ifndef ETHERTYPE_ARP
390 #define ETHERTYPE_ARP           0x0806          /* Address resolution */
391 #endif
392 
393 #ifndef ETHERTYPE_REVARP
394 #define ETHERTYPE_REVARP        0x8035          /* Reverse ARP */
395 #endif
396 
397 #ifndef ETHERTYPE_AT
398 #define ETHERTYPE_AT            0x809B          /* AppleTalk protocol */
399 #endif
400 
401 #ifndef ETHERTYPE_AARP
402 #define ETHERTYPE_AARP          0x80F3          /* AppleTalk ARP */
403 #endif
404 
405 #ifndef ETHERTYPE_VLAN
406 #define ETHERTYPE_VLAN          0x8100          /* IEEE 802.1Q VLAN tagging */
407 #endif
408 
409 #ifndef ETHERTYPE_IPX
410 #define ETHERTYPE_IPX           0x8137          /* IPX */
411 #endif
412 
413 #ifndef ETHERTYPE_IPV6
414 #define ETHERTYPE_IPV6          0x86dd          /* IP protocol version 6 */
415 #endif
416 
417 #ifndef ETHERTYPE_LOOPBACK
418 #define ETHERTYPE_LOOPBACK      0x9000          /* used to test interfaces */
419 #endif
420 
421 
nshort(const u_char * buf,size_t pos)422     inline u_short packet_info::nshort(const u_char *buf,size_t pos)
423     {
424         return (buf[pos]<<8) | (buf[pos+1]);
425     }
426 
vlan()427     inline int packet_info::vlan() const
428     {
429         if(ether_type()==ETHERTYPE_VLAN){
430             return nshort(pcap_data,sizeof(struct ether_header));
431         }
432         return -1;
433     }
434 
ip_version()435     inline int packet_info::ip_version() const
436     {
437         /* This takes advantage of the fact that ip4 and ip6 put the version number in the same place */
438         if (ip_datalen >= sizeof(struct ip4)) {
439             const struct ip4 *ip_header = (struct ip4 *) ip_data;
440             switch(ip_header->ip_v){
441             case 4: return 4;
442             case 6: return 6;
443             }
444         }
445         return 0;
446     }
447 
448     // packet typing
449 
is_ip4()450     inline bool packet_info::is_ip4() const
451     {
452         return ip_version() == 4;
453     }
454 
is_ip6()455     inline bool packet_info::is_ip6() const
456     {
457         return ip_version() == 6;
458     }
459 
is_ip4_tcp()460     inline bool packet_info::is_ip4_tcp() const
461     {
462         if(ip_datalen < sizeof(struct ip4) + sizeof(struct tcphdr)) {
463             return false;
464         }
465         return *((uint8_t*) (ip_data + ip4_proto_off)) == IPPROTO_TCP;
466         return false;
467     }
468 
is_ip6_tcp()469     inline bool packet_info::is_ip6_tcp() const
470     {
471         if(ip_datalen < sizeof(struct ip6_hdr) + sizeof(struct tcphdr)) {
472             return false;
473         }
474         return *((uint8_t*) (ip_data + ip6_nxt_hdr_off)) == IPPROTO_TCP;
475     }
476 
477     // packet extraction
478     // precondition: the apropriate packet type function must return true before using these functions.
479     //     example: is_ip4_tcp() must return true before calling get_ip4_tcp_sport()
480 
481     // Get ether addresses; should this handle vlan and such?
get_ether_dhost()482     inline const uint8_t *packet_info::get_ether_dhost() const
483     {
484         if(pcap_hdr->caplen < sizeof(struct ether_addr)){
485             throw new frame_too_short();
486         }
487         return ((const struct ether_header *)pcap_data)->ether_dhost;
488     }
489 
get_ether_shost()490     inline const uint8_t *packet_info::get_ether_shost() const
491     {
492         if(pcap_hdr->caplen < sizeof(struct ether_addr)){
493             throw new frame_too_short();
494         }
495         return ((const struct ether_header *)pcap_data)->ether_shost;
496     }
497 
498     // IPv4
499 #  ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
500 #    pragma GCC diagnostic ignored "-Wcast-align"
501 #  endif
get_ip4_src()502     inline const struct in_addr *packet_info::get_ip4_src() const
503     {
504         if(ip_datalen < sizeof(struct ip4)) {
505             throw new frame_too_short();
506         }
507         return (const struct in_addr *) ip_data + ip4_src_off;
508     }
get_ip4_dst()509     inline const struct in_addr *packet_info::get_ip4_dst() const
510     {
511         if(ip_datalen < sizeof(struct ip4)) {
512             throw new frame_too_short();
513         }
514         return (const struct in_addr *) ip_data + ip4_dst_off;
515     }
516 #  ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
517 #    pragma GCC diagnostic warning "-Wcast-align"
518 #  endif
get_ip4_proto()519     inline uint8_t packet_info::get_ip4_proto() const
520     {
521         if(ip_datalen < sizeof(struct ip4)) {
522             throw new frame_too_short();
523         }
524         return *((uint8_t *) (ip_data + ip4_proto_off));
525     }
526     // IPv6
get_ip6_nxt_hdr()527     inline uint8_t packet_info::get_ip6_nxt_hdr() const
528     {
529         if(ip_datalen < sizeof(struct ip6_hdr)) {
530             throw new frame_too_short();
531         }
532         return *((uint8_t *) (ip_data + ip6_nxt_hdr_off));
533     }
get_ip6_plen()534     inline uint16_t packet_info::get_ip6_plen() const
535     {
536         if(ip_datalen < sizeof(struct ip6_hdr)) {
537             throw new frame_too_short();
538         }
539         //return ntohs(*((uint16_t *) (ip_data + ip6_plen_off)));
540         return nshort(ip_data,ip6_plen_off);
541     }
542 #  ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
543 #    pragma GCC diagnostic ignored "-Wcast-align"
544 #  endif
get_ip6_src()545     inline const struct ip6_addr *packet_info::get_ip6_src() const
546     {
547         if(ip_datalen < sizeof(struct ip6_hdr)) {
548             throw new frame_too_short();
549         }
550         return (const struct ip6_addr *) ip_data + ip6_src_off;
551     }
get_ip6_dst()552     inline const struct ip6_addr *packet_info::get_ip6_dst() const
553     {
554         if(ip_datalen < sizeof(struct ip6_hdr)) {
555             throw new frame_too_short();
556         }
557         return (const struct ip6_addr *) ip_data + ip6_dst_off;
558     }
559 #  ifdef HAVE_DIAGNOSTIC_CAST_ALIGN
560 #    pragma GCC diagnostic warning "-Wcast-align"
561 #  endif
562 
563     // TCP
get_ip4_tcp_sport()564     inline uint16_t packet_info::get_ip4_tcp_sport() const
565     {
566         if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
567             throw new frame_too_short();
568         }
569         //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_sport_off)));
570         return nshort(ip_data,sizeof(struct ip4) + tcp_sport_off);
571     }
get_ip4_tcp_dport()572     inline uint16_t packet_info::get_ip4_tcp_dport() const
573     {
574         if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip4)) {
575             throw new frame_too_short();
576         }
577         //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip4) + tcp_dport_off)));
578         return nshort(ip_data,sizeof(struct ip4) + tcp_dport_off); //
579 
580     }
get_ip6_tcp_sport()581     inline uint16_t packet_info::get_ip6_tcp_sport() const
582     {
583         if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
584             throw new frame_too_short();
585         }
586         //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_sport_off)));
587         return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_sport_off); //
588     }
get_ip6_tcp_dport()589     inline uint16_t packet_info::get_ip6_tcp_dport() const
590     {
591         if(ip_datalen < sizeof(struct tcphdr) + sizeof(struct ip6_hdr)) {
592             throw new frame_too_short();
593         }
594         //return ntohs(*((uint16_t *) (ip_data + sizeof(struct ip6_hdr) + tcp_dport_off)));
595         return nshort(ip_data,sizeof(struct ip6_hdr) + tcp_dport_off); //
596     }
597 };
598 
599 
600 typedef void scanner_t(const class scanner_params &sp,const class recursion_control_block &rcb);
601 typedef void process_t(const class scanner_params &sp);
602 typedef void packet_callback_t(void *user,const be13::packet_info &pi);
603 
604 /** scanner_info gets filled in by the scanner to tell the caller about the scanner.
605  *
606  */
607 class scanner_info {
608 private:
609     static std::stringstream helpstream; // where scanner info help messages are saved.
610 
611     // default copy construction and assignment are meaningless
612     // and not implemented
613     scanner_info(const scanner_info &i);
614     scanner_info &operator=(const scanner_info &i);
615  public:
helpstr()616     static std::string helpstr(){return helpstream.str();}
617     typedef std::map<std::string,std::string>  config_t; // configuration for scanner passed in
618 
619     /* scanner flags */
620     static const int SCANNER_DISABLED       = 0x001; // v1: enabled by default
621     static const int SCANNER_NO_USAGE       = 0x002; // v1: do not show scanner in usage
622     static const int SCANNER_NO_ALL         = 0x004; // v2: do not enable with -eall
623     static const int SCANNER_FIND_SCANNER   = 0x008; // v2: this scanner uses the find_list
624     static const int SCANNER_RECURSE        = 0x010; // v3: this scanner will recurse
625     static const int SCANNER_RECURSE_EXPAND = 0x020; // v3: recurses AND result is >= original size
626     static const int SCANNER_WANTS_NGRAMS   = 0x040; // v3: Scanner gets buffers that are constant n-grams
627     static const int SCANNER_FAST_FIND      = 0x080; // v3: This scanner is a very fast FIND scanner
628     static const int SCANNER_DEPTH_0        = 0x100; // v3: scanner only runs at depth 0 by default
629     static const int CURRENT_SI_VERSION     = 4;
630 
flag_to_string(const int flag)631     static const std::string flag_to_string(const int flag){
632         std::string ret;
633         if(flag==0) ret += "NONE ";
634         if(flag & SCANNER_DISABLED) ret += "SCANNER_DISABLED ";
635         if(flag & SCANNER_NO_USAGE) ret += "SCANNER_NO_USAGE ";
636         if(flag & SCANNER_NO_ALL) ret += "SCANNER_NO_ALL ";
637         if(flag & SCANNER_FIND_SCANNER) ret += "SCANNER_FIND_SCANNER ";
638         if(flag & SCANNER_RECURSE) ret += "SCANNER_RECURSE ";
639         if(flag & SCANNER_RECURSE_EXPAND) ret += "SCANNER_RECURSE_EXPAND ";
640         if(flag & SCANNER_WANTS_NGRAMS) ret += "SCANNER_WANTS_NGRAMS ";
641         return ret;
642     }
643 
644     /* Global config is passed to each scanner as a pointer when it is loaded.
645      * Scanner histograms are added to 'histograms' by machinery.
646      */
647     struct scanner_config {
scanner_configscanner_config648         scanner_config():namevals(),debug(){};
~scanner_configscanner_config649         virtual ~scanner_config(){}
650         config_t  namevals;             // v3: (input) name=val map
651         int       debug;                // v3: (input) current debug level
652     };
653 
654     // never change the order or delete old fields, or else you will
655     // break backwards compatability
scanner_info()656     scanner_info():si_version(CURRENT_SI_VERSION),
657                    name(),author(),description(),url(),scanner_version(),flags(0),feature_names(),
658                    histogram_defs(),packet_user(),packet_cb(),config(){}
659     /* PASSED FROM SCANNER to API: */
660     int         si_version;             // version number for this structure
661     std::string      name;                   // v1: (output) scanner name
662     std::string      author;                 // v1: (output) who wrote me?
663     std::string      description;            // v1: (output) what do I do?
664     std::string      url;                    // v1: (output) where I come from
665     std::string      scanner_version;        // v1: (output) version for the scanner
666     uint64_t    flags;                  // v1: (output) flags
667     std::set<std::string> feature_names;          // v1: (output) features I need
668     histogram_defs_t histogram_defs;        // v1: (output) histogram definition info
669     void        *packet_user;           // v2: (output) data for network callback
670     packet_callback_t *packet_cb;       // v2: (output) callback for processing network packets, or NULL
671 
672     /* PASSED FROM API TO SCANNER; access with functions below */
673     const scanner_config *config;       // v3: (intput to scanner) config
674 
675     // These methods are implemented in the plugin system for the scanner to get config information.
676     // The get_config methods should be called on the si object during PHASE_STARTUP
677     virtual void get_config(const scanner_info::config_t &c,
678                             const std::string &name,std::string *val,const std::string &help);
679     virtual void get_config(const std::string &name,std::string *val,const std::string &help);
680     virtual void get_config(const std::string &name,uint64_t *val,const std::string &help);
681     virtual void get_config(const std::string &name,int32_t *val,const std::string &help);
682     virtual void get_config(const std::string &name,uint32_t *val,const std::string &help);
683     virtual void get_config(const std::string &name,uint16_t *val,const std::string &help);
684     virtual void get_config(const std::string &name,uint8_t *val,const std::string &help);
685 #ifdef __APPLE__
686     virtual void get_config(const std::string &name,size_t *val,const std::string &help);
687 #define HAVE_GET_CONFIG_SIZE_T
688 #endif
689     virtual void get_config(const std::string &name,bool *val,const std::string &help);
~scanner_info()690     virtual ~scanner_info(){};
691 };
692 #include <map>
693 /**
694  * The scanner_params class is a way for sending the scanner parameters
695  * for this particular sbuf to be scanned.
696  */
697 
698 class scanner_params {
699  public:
700     enum print_mode_t {MODE_NONE=0,MODE_HEX,MODE_RAW,MODE_HTTP};
701     static const int CURRENT_SP_VERSION=3;
702 
703     typedef std::map<std::string,std::string> PrintOptions;
getPrintMode(const PrintOptions & po)704     static print_mode_t getPrintMode(const PrintOptions &po){
705         PrintOptions::const_iterator p = po.find("print_mode_t");
706         if(p != po.end()){
707             if(p->second=="MODE_NONE") return MODE_NONE;
708             if(p->second=="MODE_HEX") return MODE_HEX;
709             if(p->second=="MODE_RAW") return MODE_RAW;
710             if(p->second=="MODE_HTTP") return MODE_HTTP;
711         }
712         return MODE_NONE;
713     }
setPrintMode(PrintOptions & po,int mode)714     static void setPrintMode(PrintOptions &po,int mode){
715         switch(mode){
716         default:
717         case MODE_NONE:po["print_mode_t"]="MODE_NONE";return;
718         case MODE_HEX:po["print_mode_t"]="MODE_HEX";return;
719         case MODE_RAW:po["print_mode_t"]="MODE_RAW";return;
720         case MODE_HTTP:po["print_mode_t"]="MODE_HTTP";return;
721         }
722     }
723 
724     // phase_t specifies when the scanner is being called
725     typedef enum {
726         PHASE_NONE     = -1,
727         PHASE_STARTUP  = 0,            // called in main thread when scanner loads; called on EVERY scanner (called for help)
728         PHASE_INIT     = 3,            // called in main thread for every ENABLED scanner after all scanners loaded
729         PHASE_THREAD_BEFORE_SCAN = 4,  // called in worker thread for every ENABLED scanner before first scan
730         PHASE_SCAN     = 1,            // called in worker thread for every ENABLED scanner to scan an sbuf
731         PHASE_SHUTDOWN = 2,            // called in main thread for every ENABLED scanner when scanner is shutdown
732     } phase_t ;
733     static PrintOptions no_options;    // in common.cpp
734 
735     /********************
736      *** CONSTRUCTORS ***
737      ********************/
738 
739     /* A scanner params with all of the instance variables, typically for scanning  */
scanner_params(phase_t phase_,const sbuf_t & sbuf_,class feature_recorder_set & fs_,PrintOptions & print_options_)740     scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,
741                    PrintOptions &print_options_):
742         sp_version(CURRENT_SP_VERSION),
743         phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(print_options_),info(0),sxml(0){
744     }
745 
746     /* A scanner params with no print options */
scanner_params(phase_t phase_,const sbuf_t & sbuf_,class feature_recorder_set & fs_)747     scanner_params(phase_t phase_,const sbuf_t &sbuf_, class feature_recorder_set &fs_):
748         sp_version(CURRENT_SP_VERSION),
749         phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(0){
750     }
751 
752     /* A scanner params with no print options but an xmlstream */
scanner_params(phase_t phase_,const sbuf_t & sbuf_,class feature_recorder_set & fs_,std::stringstream * xmladd)753     scanner_params(phase_t phase_,const sbuf_t &sbuf_,class feature_recorder_set &fs_,std::stringstream *xmladd):
754         sp_version(CURRENT_SP_VERSION),
755         phase(phase_),sbuf(sbuf_),fs(fs_),depth(0),print_options(no_options),info(0),sxml(xmladd){
756     }
757 
758     /** Construct a scanner_params for recursion from an existing sp and a new sbuf.
759      * Defaults to phase1
760      */
scanner_params(const scanner_params & sp_existing,const sbuf_t & sbuf_new)761     scanner_params(const scanner_params &sp_existing,const sbuf_t &sbuf_new):
762         sp_version(CURRENT_SP_VERSION),phase(sp_existing.phase),
763         sbuf(sbuf_new),fs(sp_existing.fs),depth(sp_existing.depth+1),
764         print_options(sp_existing.print_options),info(sp_existing.info),sxml(0){
765         assert(sp_existing.sp_version==CURRENT_SP_VERSION);
766     };
767 
768     /**
769      * A scanner params with an empty info
770      */
771 
772     /**************************
773      *** INSTANCE VARIABLES ***
774      **************************/
775 
776     const int                   sp_version;                /* version number of this structure */
777     const phase_t               phase;                 /* v1: 0=startup, 1=normal, 2=shutdown (changed to phase_t in v1.3) */
778     const sbuf_t                &sbuf;                 /* v1: what to scan / only valid in SCAN_PHASE */
779     class feature_recorder_set  &fs;     /* v1: where to put the results / only valid in SCAN_PHASE */
780     const uint32_t              depth;            /* v1: how far down are we? / only valid in SCAN_PHASE */
781 
782     PrintOptions                &print_options;    /* v1: how to print / NOT USED IN SCANNERS */
783     scanner_info                *info;             /* v2: set/get parameters on startup, hasher */
784     std::stringstream           *sxml;         /* v3: on scanning and shutdown: CDATA added to XML stream (advanced feature) */
785 };
786 
787 
788 inline std::ostream & operator <<(std::ostream &os,const class scanner_params &sp){
789     os << "scanner_params(" << sp.sbuf << ")";
790     return os;
791 };
792 
793 class recursion_control_block {
794  public:
795 /**
796  * @param callback_ - the function to call back
797  * @param partName_ - the part of the forensic path processed by this scanner.
798  */
recursion_control_block(process_t * callback_,std::string partName_)799     recursion_control_block(process_t *callback_,std::string partName_):
800         callback(callback_),partName(partName_){}
801     process_t *callback;
802     std::string partName;            /* eg "ZIP", "GZIP" */
803 };
804 
805 /* plugin.cpp. This will become a class...  */
806 class scanner_def {
807 public:;
808     static uint32_t max_depth;          // maximum depth to scan for the scanners
809     static uint32_t max_ngram;          // maximum ngram size to change
scanner_def()810     scanner_def():scanner(0),enabled(false),info(),pathPrefix(){};
811     scanner_t  *scanner;                // pointer to the primary entry point
812     bool        enabled;                // is enabled?
813     scanner_info info;                  // info block sent to and returned by scanner
814     std::string      pathPrefix;             /* path prefix for recursive scanners */
815 };
816 
817 namespace be13 {
818     /* plugin.cpp */
819 
820     struct plugin {
821         typedef std::vector<scanner_def *> scanner_vector;
822         static scanner_vector current_scanners;                         // current scanners
823         static bool dup_data_alerts;  // notify when duplicate data is not processed
824         static uint64_t dup_data_encountered; // amount of dup data encountered
825 
826         static void set_scanner_debug(int debug);
827 
828         static void load_scanner(scanner_t scanner,const scanner_info::scanner_config &sc); // load a specific scanner
829         static void load_scanner_file(std::string fn,const scanner_info::scanner_config &sc);    // load a scanner from a file
830         static void load_scanners(scanner_t * const *scanners_builtin,const scanner_info::scanner_config &sc); // load the scan_ plugins
831         static void load_scanner_directory(const std::string &dirname,const scanner_info::scanner_config &sc); // load scanners in the directory
832         static void load_scanner_directories(const std::vector<std::string> &dirnames,const scanner_info::scanner_config &sc);
833         static void load_scanner_packet_handlers();
834 
835         // send every enabled scanner the phase message
836         static void message_enabled_scanners(scanner_params::phase_t phase,feature_recorder_set &fs);
837 
838         // returns the named scanner, or 0 if no scanner of that name
839         static scanner_t *find_scanner(const std::string &name);
840         static void get_enabled_scanners(std::vector<std::string> &svector); // put the enabled scanners into the vector
841         static void add_enabled_scanner_histograms_to_feature_recorder_set(feature_recorder_set &fs);
842         static bool find_scanner_enabled(); // return true if a find scanner is enabled
843 
844         // print info about the scanners:
845         static void scanners_disable_all();                    // saves a command to disable all
846         static void scanners_enable_all();                    // enable all of them
847         static void set_scanner_enabled(const std::string &name,bool enable);
848         static void set_scanner_enabled_all(bool enable);
849         static void scanners_enable(const std::string &name); // saves a command to enable this scanner
850         static void scanners_disable(const std::string &name); // saves a command to disable this scanner
851         static void scanners_process_enable_disable_commands();               // process the enable/disable and config commands
852         static void scanners_init(feature_recorder_set &fs); // init the scanners
853 
854         static void info_scanners(bool detailed_info,
855                                   bool detailed_settings,
856                                   scanner_t * const *scanners_builtin,const char enable_opt,const char disable_opt);
857 
858 
859         /* Run the phases on the scanners */
860         static void phase_shutdown(feature_recorder_set &fs,std::stringstream *sxml=0); // sxml is where to put XML from scanners that shutdown
861         static uint32_t get_max_depth_seen();
862         static void process_sbuf(const class scanner_params &sp);                              /* process for feature extraction */
863         static void process_packet(const be13::packet_info &pi);
864 
865         /* recorders */
866         static void get_scanner_feature_file_names(feature_file_names_t &feature_file_names);
867 
868     };
869 };
870 
itos(int i)871 inline std::string itos(int i){ std::stringstream ss; ss << i;return ss.str();}
dtos(double d)872 inline std::string dtos(double d){ std::stringstream ss; ss << d;return ss.str();}
utos(unsigned int i)873 inline std::string utos(unsigned int i){ std::stringstream ss; ss << i;return ss.str();}
utos(uint64_t i)874 inline std::string utos(uint64_t i){ std::stringstream ss; ss << i;return ss.str();}
utos(uint16_t i)875 inline std::string utos(uint16_t i){ std::stringstream ss; ss << i;return ss.str();}
safe_utf16to8(std::wstring s)876 inline std::string safe_utf16to8(std::wstring s){ // needs to be cleaned up
877     std::string utf8_line;
878     try {
879         utf8::utf16to8(s.begin(),s.end(),back_inserter(utf8_line));
880     } catch(utf8::invalid_utf16){
881         /* Exception thrown: bad UTF16 encoding */
882         utf8_line = "";
883     }
884     return utf8_line;
885 }
886 
safe_utf8to16(std::string s)887 inline std::wstring safe_utf8to16(std::string s){ // needs to be cleaned up
888     std::wstring utf16_line;
889     try {
890         utf8::utf8to16(s.begin(),s.end(),back_inserter(utf16_line));
891     } catch(utf8::invalid_utf8){
892         /* Exception thrown: bad UTF16 encoding */
893         utf16_line = L"";
894     }
895     return utf16_line;
896 }
897 
898 // truncate string at the matching char
truncate_at(std::string & line,char ch)899 inline void truncate_at(std::string &line, char ch) {
900     size_t pos = line.find(ch);
901     if(pos != std::string::npos) line.resize(pos);
902 }
903 
904 #ifndef HAVE_ISXDIGIT
isxdigit(int c)905 inline int isxdigit(int c)
906 {
907     return (c>='0' && c<='9') || (c>='a' && c<='f') || (c>='A' && c<='F');
908 }
909 #endif
910 
911 /* Useful functions for scanners */
912 #define ONE_HUNDRED_NANO_SEC_TO_SECONDS 10000000
913 #define SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH 11644473600LL
914 /*
915  * 11644473600 is the number of seconds between the Win32 epoch
916  * and the Unix epoch.
917  *
918  * http://arstechnica.com/civis/viewtopic.php?f=20&t=111992
919  * gmtime_r() is Linux-specific. You'll find a copy in util.cpp for Windows.
920  */
921 
microsoftDateToISODate(const uint64_t & time)922 inline std::string microsoftDateToISODate(const uint64_t &time)
923 {
924     time_t tmp = (time / ONE_HUNDRED_NANO_SEC_TO_SECONDS) - SECONDS_BETWEEN_WIN32_EPOCH_AND_UNIX_EPOCH;
925 
926     struct tm time_tm;
927     gmtime_r(&tmp, &time_tm);
928     char buf[256];
929     strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
930     return std::string(buf);
931 }
932 
933 /* Convert Unix timestamp to ISO format */
unixTimeToISODate(const uint64_t & t)934 inline std::string unixTimeToISODate(const uint64_t &t)
935 {
936     struct tm time_tm;
937     time_t tmp=t;
938     gmtime_r(&tmp, &time_tm);
939     char buf[256];
940     strftime(buf, sizeof(buf), "%Y-%m-%dT%H:%M:%SZ", &time_tm); // Zulu time
941     return std::string(buf);
942 }
943 
944 /* Many internal windows and Linux structures require a valid printable name in ASCII */
validASCIIName(const std::string & name)945 inline bool validASCIIName(const std::string &name)
946 {
947     for(size_t i = 0; i< name.size(); i++){
948         if(((u_char)name[i]) & 0x80) return false; // high bit should not be set
949         if(((u_char)name[i]) < ' ') return false;  // should not be control character
950         if(((u_char)name[i]) == 0x7f) return false; // DEL is not printable
951     }
952     return true;
953 }
954 
955 #endif
956