1 #ifndef TCPDEMUX_H
2 #define TCPDEMUX_H
3 
4 /**
5  * tcpdemux.h
6  *
7  * a tcpip demultiplier.
8  *
9  * Defines the basic classes used by the tcpflow program. This includes:
10  * - IP, TCP and UDP structures
11  * - class ipaddr    - IP address (IPv4 and IPv6)
12  * - class flow_addr - The flow address (source addr & port; dest addr & port; family)
13  * - class flow      - All of the information for a flow that's being tracked
14  * - class tcp_header_t - convenience class for working with TCP headers
15  * - class tcpip     - A one-sided TCP implementation
16  * - class tcpdemux  - Processes individual packets, identifies flows,
17  *                     and creates tcpip objects as required
18  */
19 
20 #include "pcap_writer.h"
21 #include "dfxml/src/dfxml_writer.h"
22 #include "dfxml/src/hash_t.h"
23 
24 #if defined(HAVE_SQLITE3_H)
25 #include <sqlite3.h>
26 #endif
27 
28 #if defined(HAVE_UNORDERED_MAP)
29 # include <unordered_map>
30 # include <unordered_set>
31 # undef HAVE_TR1_UNORDERED_MAP           // be sure we don't use it
32 #else
33 # if defined(HAVE_TR1_UNORDERED_MAP)
34 #  include <tr1/unordered_map>
35 #  include <tr1/unordered_set>
36 # else
37 #  error Requires <unordered_map> or <tr1/unordered_map>
38 # endif
39 #endif
40 
41 #include <queue>
42 #include "intrusive_list.h"
43 
44 /**
45  * the tcp demultiplixer
46  * This is a singleton class; we only need a single demultiplexer.
47  */
48 class tcpdemux {
49     /* These are not implemented */
50     tcpdemux(const tcpdemux &t);
51     tcpdemux &operator=(const tcpdemux &that);
52 
53     /* see http://mikecvet.wordpress.com/tag/hashing/ */
54     typedef struct {
operator__anonc3a3cb32010855         long operator() (const flow_addr &k) const {return k.hash(); }
56     } flow_addr_hash;
57 
58     typedef struct {
operator__anonc3a3cb32020859         bool operator() (const flow_addr &x, const flow_addr &y) const { return x==y;}
60     } flow_addr_key_eq;
61 
62 #ifdef HAVE_TR1_UNORDERED_MAP
63     typedef std::tr1::unordered_map<flow_addr,tcpip *,flow_addr_hash,flow_addr_key_eq> flow_map_t; // active flows
64     typedef std::tr1::unordered_map<flow_addr,saved_flow *,flow_addr_hash,flow_addr_key_eq> saved_flow_map_t; // flows that have been saved
65 #else
66     typedef std::unordered_map<flow_addr,tcpip *,flow_addr_hash,flow_addr_key_eq> flow_map_t; // active flows
67     typedef std::unordered_map<flow_addr,saved_flow *,flow_addr_hash,flow_addr_key_eq> saved_flow_map_t; // flows that have been saved
68 #endif
69     typedef std::vector<class saved_flow *> saved_flows_t; // needs to be ordered
70 
71 
72     tcpdemux();
73 #ifdef HAVE_SQLITE3
74     sqlite3 *db;
75     sqlite3_stmt *insert_flow;
76 #endif
77 
78 public:
79     static uint32_t tcp_timeout;
80     static unsigned int get_max_fds(void);             // returns the max
~tcpdemux()81     virtual ~tcpdemux(){
82         if(xreport) delete xreport;
83         if(pwriter) delete pwriter;
84     }
85 
86     /* The pure options class means we can add new options without having to modify the tcpdemux constructor. */
87     class options {
88     public:;
89         enum { MAX_SEEK=1024*1024*16 };
options()90         options():console_output(false),console_output_nonewline(false),
91                   store_output(true),opt_md5(false),
92                   post_processing(false),gzip_decompress(true),
93                   max_bytes_per_flow(-1),
94                   max_flows(0),suppress_header(0),
95                   output_strip_nonprint(true),output_hex(false),use_color(0),
96                   output_packet_index(false),max_seek(MAX_SEEK) {
97         }
98         bool    console_output;
99         bool    console_output_nonewline;
100         bool    store_output;   // do we output?
101         bool    opt_md5;                // do we calculate MD5 on DFXML output?
102         bool    post_processing;        // decode headers after tcp connection closes
103         bool    gzip_decompress;
104         int64_t  max_bytes_per_flow;
105         uint32_t max_flows;
106         bool    suppress_header;
107         bool    output_strip_nonprint;
108         bool    output_hex;
109         bool    use_color;
110         bool    output_packet_index;    // Generate a packet index file giving the timestamp and location
111                                         // bytes written to the flow file.
112         int32_t max_seek;               // signed becuase we compare with abs()
113     };
114 
115     enum { WARN_TOO_MANY_FILES=10000};  // warn if more than this number of files in a directory
116 
117     std::string outdir;                 /* output directory */
118     uint64_t    flow_counter;           // how many flows have we seen?
119     uint64_t    packet_counter;         // monotomically increasing
120     dfxml_writer  *xreport;               // DFXML output file
121     pcap_writer *pwriter;               // where we should write packets
122     unsigned int max_open_flows;        // how large did it ever get?
123     unsigned int max_fds;               // maximum number of file descriptors for this tcpdemux
124 
125     flow_map_t  flow_map;               // db of open tcpip objects, indexed by flow
126     intrusive_list<tcpip> open_flows; // the tcpip flows with open files in access order
127 
128     saved_flow_map_t saved_flow_map;  // db of saved flows, indexed by flow
129     saved_flows_t    saved_flows;     // the flows that were saved
130     bool             start_new_connections;  // true if we should start new connections
131 
132     options     opt;
133     class       feature_recorder_set *fs; // where features extracted from each flow should be stored
134 
135     static uint32_t max_saved_flows;       // how many saved flows are kept in the saved_flow_map
136     static tcpdemux *getInstance();
137 
138     /* Databse */
139 
140     void  openDB();                    // open the database file if we are using it in outdir directory.
141     void  write_flow_record(const std::string &starttime,const std::string &endtime,
142                             const std::string &src_ipn,const std::string &dst_ipn,
143                             const std::string &mac_daddr,const std::string &mac_saddr,
144                             uint64_t packets,uint16_t srcport,uint16_t dstport,
145                             const std::string &hashdigest_md5);
146 
147 
148     void  save_unk_packets(const std::string &wfname,const std::string &ifname);
149                                        // save unknown packets at this location
150     void  post_process(tcpip *tcp);    // just before closing; writes XML and closes fd
151 
152     /* management of open fds and in-process tcpip flows*/
153     void  close_tcpip_fd(tcpip *);
154     void  close_oldest_fd();
155     void  remove_flow(const flow_addr &flow); // remove a flow from the database, closing open files if necessary
156     void  remove_all_flows();                 // stop processing all tcpip connections
157 
158     /* open a new file, closing an fd in the openflow database if necessary */
159     int   retrying_open(const std::string &filename,int oflag,int mask);
160 
161     /* the flow database holds in-process tcpip connections */
162     tcpip *create_tcpip(const flow_addr &flow, be13::tcp_seq isn, const be13::packet_info &pi);
163     tcpip *find_tcpip(const flow_addr &flow);
164 
165     /* saved flows are completed flows that we remember in case straggling packets
166      * show up. Remembering the flows lets us resolve the packets rather than creating
167      * new flows.
168      */
169     void  save_flow(tcpip *);
170 
171     /** packet processing.
172      * Each returns 0 if processed, 1 if not processed, -1 if error.
173      */
174     int  process_tcp(const ipaddr &src, const ipaddr &dst,sa_family_t family,
175                      const u_char *tcp_data, uint32_t tcp_length,
176                      const be13::packet_info &pi);
177     int  process_ip4(const be13::packet_info &pi);
178     int  process_ip6(const be13::packet_info &pi);
179     int  process_pkt(const be13::packet_info &pi);
180 };
181 
182 
183 #endif
184