1 #ifndef TCPIP_H
2 #define TCPIP_H
3 
4 #include <fstream>
5 
6 #include "inet_ntop.h"
7 
8 /** On windows, there is no in_addr_t; this is from
9  * /usr/include/netinet/in.h
10  */
11 #ifndef HAVE_NETINET_IN_H
12 typedef uint32_t in_addr_t;
13 #endif
14 
15 #ifndef HAVE_SA_FAMILY_T
16 typedef unsigned short int sa_family_t;
17 #endif
18 
19 
20 /**
21  * ipaddress class.
22  * represents IPv4 and IPv6 addresses.
23  * IPv4 addresses have address in bytes 0..3 and all NULL for bytes 4..11
24  */
25 class ipaddr {
26 public:;
ipaddr()27     ipaddr(){
28 	memset(addr,0,sizeof(addr));
29     }
ipaddr(const in_addr_t & a)30     ipaddr(const in_addr_t &a){		// copy operator
31 	addr[0] = ((uint8_t *)&a)[0];	// copy the bottom 4 octets and blank the top 12
32 	addr[1] = ((uint8_t *)&a)[1];
33 	addr[2] = ((uint8_t *)&a)[2];
34 	addr[3] = ((uint8_t *)&a)[3];
35 	memset(addr+4,0,12);
36     }
ipaddr(const uint8_t a[16])37     ipaddr(const uint8_t a[16]){	// begin wiped
38 	memcpy(addr,a,16);
39     }
40 
41     uint8_t addr[16];			// holds v4 or v16
bit(int i)42     bool bit(int i) const {             // get the ith bit; 0 is MSB
43         return (addr[i / 8]) & (1<<(7-i%8));
44     }
quad(int i)45     uint32_t quad(int i) const {        // gets the ith quad as a 32-bit value
46         return (addr[i*4+0]<<24) | (addr[i*4+2]<<16) | (addr[i*4+1]<<8) |  (addr[i*4+3]<<0);
47     }
dquad(int i)48     uint64_t dquad(int i) const {       // gets the first 64-bit half or the second 64-bit half
49         return (uint64_t)(quad(i*2+1))<<32 | (uint64_t)(quad(i*2));
50     }
51 
52     inline bool operator ==(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))==0; };
53     inline bool operator <=(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))<=0; };
54     inline bool operator > (const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))>0; };
55     inline bool operator >=(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))>=0; };
56     inline bool operator < (const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(this->addr))<0; }
57 };
58 
59 class ipaddr_prn {
60 public:
61     const ipaddr& ia;
62     const sa_family_t family;
ipaddr_prn(const ipaddr & ia_,sa_family_t family_)63     ipaddr_prn(const ipaddr& ia_, sa_family_t family_)
64         : ia(ia_), family(family_)
65     { }
66 };
67 
68 inline std::ostream & operator <<(std::ostream &os, const ipaddr_prn &b) {
69     char buf[INET6_ADDRSTRLEN];
70     inet_ntop(b.family, b.ia.addr, buf, sizeof(buf));
71     os << buf;
72     return os;
73 }
74 
75 inline bool operator ==(const struct timeval &a,const struct timeval &b) {
76     return a.tv_sec==b.tv_sec && a.tv_usec==b.tv_usec;
77 }
78 
79 inline bool operator <(const struct timeval &a,const struct timeval &b) {
80     return (a.tv_sec<b.tv_sec) || ((a.tv_sec==b.tv_sec) && (a.tv_sec<b.tv_sec));
81 }
82 
83 /*
84  * describes the TCP flow.
85  * No timing information; this is used as a map index.
86  */
87 class flow_addr {
88 public:
flow_addr()89     flow_addr():src(),dst(),sport(0),dport(0),family(0){ }
flow_addr(const ipaddr & s,const ipaddr & d,uint16_t sp,uint16_t dp,sa_family_t f)90     flow_addr(const ipaddr &s,const ipaddr &d,uint16_t sp,uint16_t dp,sa_family_t f):
91 	src(s),dst(d),sport(sp),dport(dp),family(f){
92     }
flow_addr(const flow_addr & f)93     flow_addr(const flow_addr &f):src(f.src),dst(f.dst),sport(f.sport),dport(f.dport),
94 				  family(f.family){
95     }
~flow_addr()96     virtual ~flow_addr(){};
97     ipaddr	src;		// Source IP address; holds v4 or v6
98     ipaddr	dst;		// Destination IP address; holds v4 or v6
99     uint16_t    sport;		// Source port number
100     uint16_t    dport;		// Destination port number
101     sa_family_t family;		// AF_INET or AF_INET6 */
102 
hash()103     uint64_t hash() const {
104 	if(family==AF_INET){
105 	    return ((uint64_t)(src.quad(0))<<32 | dst.quad(0))
106                 ^ ((uint64_t)(dst.quad(0))<<32 | src.quad(0))
107                 ^ (sport<<16 | dport);
108 	} else {
109 	    return (src.dquad(0)<<32 ^ dst.dquad(0))
110                 ^ (dst.dquad(0)<<32  ^ src.dquad(0))
111                 ^ (src.dquad(1)      ^ dst.dquad(1))
112                 ^ (sport<<16 | dport);
113 	}
114     }
115 
116     inline bool operator ==(const flow_addr &b) const {
117 	return this->src==b.src &&
118 	    this->dst==b.dst &&
119 	    this->sport==b.sport &&
120 	    this->dport==b.dport &&
121 	    this->family==b.family;
122     }
123 
124     inline bool operator <(const flow_addr &b) const {
125 	if (this->src < b.src) return true;
126 	if (this->src > b.src) return false;
127 	if (this->dst < b.dst) return true;
128 	if (this->dst > b.dst) return false;
129 	if (this->sport < b.sport) return true;
130 	if (this->sport > b.sport) return false;
131 	if (this->dport < b.dport) return true;
132 	if (this->dport > b.dport) return false;
133 	if (this->family < b.family) return true;
134 	if (this->family > b.family) return false;
135 	return false;    /* they are equal! */
136     }
137 
str()138     std::string str() const {
139         std::stringstream s;
140         s << "flow[" << ipaddr_prn(src, family) << ":" << sport << "->" << ipaddr_prn(dst, family) << ":" << dport << "]";
141         return s.str();
142     }
143 };
144 
145 inline std::ostream & operator <<(std::ostream &os,const flow_addr &f)  {
146     os << f.str();
147     return os;
148 }
149 
150 
151 /*
152  * A flow is a flow_addr that has additional information regarding when it was seen
153  * and how many packets were seen. The address is used to locate the flow in the array.
154  * Notice that it contains no pointers, so it can be copied with the default operator.
155  */
156 class flow : public flow_addr {
157 public:;
158     static void usage();			// print information on flow notation
159     static std::string filename_template;	//
160     static std::string outdir;                  // where the output gets written
flow()161     flow():id(),vlan(),mac_daddr(),mac_saddr(),tstart(),tlast(),len(),caplen(),packet_count(){};
flow(const flow_addr & flow_addr_,uint64_t id_,const be13::packet_info & pi)162     flow(const flow_addr &flow_addr_,uint64_t id_,const be13::packet_info &pi):
163 	flow_addr(flow_addr_),id(id_),vlan(pi.vlan()),
164         mac_daddr(),
165         mac_saddr(),
166         tstart(pi.ts),tlast(pi.ts),
167         len(0),
168         caplen(0),
169 	packet_count(0){
170         if(pi.pcap_hdr){
171             memcpy(mac_daddr,pi.get_ether_dhost(),sizeof(mac_daddr));
172             memcpy(mac_saddr,pi.get_ether_shost(),sizeof(mac_saddr));
173         }
174     }
~flow()175     virtual ~flow(){};
176     uint64_t  id;			// flow_counter when this flow was created
177     int32_t   vlan;			// vlan interface we first observed; -1 means no vlan
178     uint8_t mac_daddr[6];               // dst mac address of first packet
179     uint8_t mac_saddr[6];               // source mac address of first packet
180     struct timeval tstart;		// when first seen
181     struct timeval tlast;		// when last seen
182     uint64_t len;     		        // off-wire length
183     uint64_t caplen;    		// captured length
184     uint64_t packet_count;		// packet count
185 
186     // return a filename for a flow based on the template and the connection count
187     std::string filename(uint32_t connection_count);
188     // return a new filename for a flow based on the temlate,
189     // optionally opening the file and returning a fd if &fd is provided
190     std::string new_filename(int *fd,int flags,int mode);
191 
has_mac_daddr()192     bool has_mac_daddr(){
193         return mac_daddr[0] || mac_daddr[1] || mac_daddr[2] || mac_daddr[3] || mac_daddr[4] || mac_daddr[5];
194     }
195 
has_mac_saddr()196     bool has_mac_saddr(){
197         return mac_saddr[0] || mac_saddr[1] || mac_saddr[2] || mac_saddr[3] || mac_saddr[4] || mac_saddr[5];
198     }
199 };
200 
201 /*
202  * Convenience class for working with TCP headers
203  */
204 #define PORT_HTTP 80
205 #define PORT_HTTP_ALT_0 8080
206 #define PORT_HTTP_ALT_1 8000
207 #define PORT_HTTP_ALT_2 8888
208 #define PORT_HTTP_ALT_3 81
209 #define PORT_HTTP_ALT_4 82
210 #define PORT_HTTP_ALT_5 8090
211 #define PORT_HTTPS 443
212 #define PORT_SSH 22
213 #define PORT_FTP_DATA 20
214 #define PORT_FTP_CONTROL 21
215 class tcp_header_t {
216 public:
217 #pragma GCC diagnostic ignored "-Wcast-align"
tcp_header_t(const u_char * data)218     tcp_header_t(const u_char *data):
219 	tcp_header((struct be13::tcphdr *)data){};
220 #pragma GCC diagnostic warning "-Wcast-align"
tcp_header_t(const tcp_header_t & b)221     tcp_header_t(const tcp_header_t &b):
222 	tcp_header(b.tcp_header){}
223     tcp_header_t &operator=(const tcp_header_t &that) {
224 	this->tcp_header = that.tcp_header;
225 	return *this;
226     }
227 
~tcp_header_t()228     virtual ~tcp_header_t(){}
229     struct be13::tcphdr *tcp_header;
tcp_header_len()230     size_t tcp_header_len(){ return tcp_header->th_off * 4; }
sport()231     uint16_t sport() {return ntohs(tcp_header->th_sport);}
dport()232     uint16_t dport() {return ntohs(tcp_header->th_dport);}
seq()233     be13::tcp_seq  seq()   {return ntohl(tcp_header->th_seq);}
th_fin()234     bool th_fin()    {return tcp_header->th_flags & TH_FIN;}
th_ack()235     bool th_ack()    {return tcp_header->th_flags & TH_ACK;}
th_syn()236     bool th_syn()    {return tcp_header->th_flags & TH_SYN;}
237 };
238 
239 
240 /*
241  * The tcpip class is a passive tcp/ip implementation.
242  * It can reconstruct flows!
243  *
244  * It includes:
245  *   - the flow (as an embedded object)
246  *   - Information about where the flow is written.
247  *   - Information about how much of the flow has been captured.
248  * Currently flows only go in one direction and do not know about their sibling flow
249  */
250 
251 #pragma GCC diagnostic ignored "-Weffc++"
252 #pragma GCC diagnostic ignored "-Wshadow"
253 #pragma GCC diagnostic ignored "-Wall"
254 #pragma GCC diagnostic ignored "-Wmissing-noreturn"
255 
256 #if defined(HAVE_BOOST_ICL_INTERVAL_HPP) && defined(HAVE_BOOST_ICL_INTERVAL_MAP_HPP) && defined(HAVE_BOOST_ICL_INTERVAL_SET_HPP)
257 #include <boost/icl/interval.hpp>
258 #include <boost/icl/interval_map.hpp>
259 #include <boost/icl/interval_set.hpp>
260 typedef boost::icl::interval_set<uint64_t> recon_set; // Boost interval set of bytes that were reconstructed.
261 #endif
262 
263 #include "intrusive_list.h"
264 
265 #pragma GCC diagnostic warning "-Weffc++"
266 #pragma GCC diagnostic warning "-Wshadow"
267 #pragma GCC diagnostic warning "-Wall"
268 #pragma GCC diagnostic warning "-Wmissing-noreturn"
269 
270 class tcpip {
271 public:
272     /** track the direction of the flow; this is largely unused */
273     typedef enum {
274 	unknown=0,			// unknown direction
275 	dir_sc,				// server-to-client 1
276 	dir_cs				// client-to-server 2
277     } dir_t;
278 
279 private:
280     /*** Begin Effective C++ error suppression                ***
281      *** This class does not implement assignment or copying. ***
282      ***/
283     tcpip(const tcpip &t);
284     tcpip &operator=(const tcpip &that);
285     /*** End Effective C++ error suppression */
286 
287 public:;
288     tcpip(class tcpdemux &demux_,const flow &flow_,be13::tcp_seq isn_);    /* constructor in tcpip.cpp */
289     virtual ~tcpip();			// destructor
290 
291     class tcpdemux &demux;		// our demultiplexer
292 
293     /* State information for the flow being reconstructed */
294     flow	myflow;			/* Description of this flow */
295     dir_t	dir;			// direction of flow
296     be13::tcp_seq isn;			// Flow's initial sequence number
297     be13::tcp_seq nsn;			// fd - expected next sequence number
298     uint32_t	syn_count;		// number of SYNs seen
299     uint32_t    fin_count;              // number of FINs received
300     uint32_t    fin_size;               // length of stream as determined when fin is sent
301     uint64_t	pos;			// fd - current position+1 (next byte in stream to be written)
302 
303     /* Archiving information */
304     std::string flow_pathname;		// path where flow is saved
305     int		fd;			// file descriptor for file storing this flow's data
306     bool	file_created;		// true if file was created
307 
308     /* Flow Index information - only used if flow packet/data indexing is requested --GDD */
309     std::string flow_index_pathname;	// Path for the flow index file
310     std::fstream		idx_file;				// File descriptor for storing the flow index data
311 
312     /* Stats */
313     recon_set   *seen;                  // what we've seen; it must be * due to boost lossage
314     uint64_t    last_byte;              // last byte in flow processed
315     uint64_t	last_packet_number;	// for finding most recent packet written
316     uint64_t	out_of_order_count;	// all packets were contigious
317     uint64_t    violations;		// protocol violation count
318 
319     /* File Acess Order */
320     intrusive_list<tcpip>::iterator it;
321 
322     /* Methods */
323     void close_file();			// close fd
324     int  open_file();                   // opens save file; return -1 if failure, 0 if success
325     void print_packet(const u_char *data, uint32_t length);
326     void store_packet(const u_char *data, uint32_t length, int32_t delta,struct timeval ts);
327     void process_packet(const struct timeval &ts,const int32_t delta,const u_char *data,const uint32_t length);
328     uint32_t seen_bytes();
329     void dump_seen();
330     void dump_xml(class dfxml_writer *xmlreport,const std::string &xmladd);
331     static bool compare(std::string a, std::string b);
332     void sort_index(std::fstream *idx_file);
333     void sort_index();
334 };
335 
336 /* print a tcpip data structure. Largely for debugging */
337 inline std::ostream & operator <<(std::ostream &os,const tcpip &f) {
338     os << "tcpip[" << f.myflow
339        << " dir:" << int(f.dir) << " isn:" << f.isn << " nsn: " << f.nsn
340        << " sc:" << f.syn_count << " fc:" << f.fin_count << " fs:" << f.fin_size
341        << " pos:" << f.pos << " fd: " << f.fd << " cr:" << f.file_created
342        << " lb:" << f.last_byte << " lpn:" << f.last_packet_number << " ooc:" << f.out_of_order_count
343        << "]";
344     if(f.fd>0) os << " ftell(" << f.fd << ")=" << lseek(f.fd,0L,SEEK_CUR);
345     return os;
346 }
347 
348 /*
349  * An saved_flow is a flow for which all of the packets have been received and tcpip state
350  * has been discarded. The saved_flow allows matches against newly received packets
351  * that are not SYN or ACK packets but have data. We can see if the data matches data that's
352  * been written to disk. To do this we need ot know the filename and the ISN...
353  */
354 
355 class saved_flow  {
356 public:
saved_flow(tcpip * tcp)357     saved_flow(tcpip *tcp):addr(tcp->myflow),
358                            saved_filename(tcp->flow_pathname),
359                            isn(tcp->isn) {}
360 
361     flow_addr         addr;                  // flow address
362     std::string       saved_filename;        // where the flow was saved
363     be13::tcp_seq     isn;                    // the flow's ISN
~saved_flow()364     virtual ~saved_flow(){};
365 };
366 
367 
368 #endif
369