1 #ifndef TCPIP_H 2 #define TCPIP_H 3 4 #include <fstream> 5 6 #include "inet_ntop.h" 7 8 /** On windows, there is no in_addr_t; this is from 9 * /usr/include/netinet/in.h 10 */ 11 #ifndef HAVE_NETINET_IN_H 12 typedef uint32_t in_addr_t; 13 #endif 14 15 #ifndef HAVE_SA_FAMILY_T 16 typedef unsigned short int sa_family_t; 17 #endif 18 19 20 /** 21 * ipaddress class. 22 * represents IPv4 and IPv6 addresses. 23 * IPv4 addresses have address in bytes 0..3 and all NULL for bytes 4..11 24 */ 25 class ipaddr { 26 public:; ipaddr()27 ipaddr(){ 28 memset(addr,0,sizeof(addr)); 29 } ipaddr(const in_addr_t & a)30 ipaddr(const in_addr_t &a){ // copy operator 31 addr[0] = ((uint8_t *)&a)[0]; // copy the bottom 4 octets and blank the top 12 32 addr[1] = ((uint8_t *)&a)[1]; 33 addr[2] = ((uint8_t *)&a)[2]; 34 addr[3] = ((uint8_t *)&a)[3]; 35 memset(addr+4,0,12); 36 } ipaddr(const uint8_t a[16])37 ipaddr(const uint8_t a[16]){ // begin wiped 38 memcpy(addr,a,16); 39 } 40 41 uint8_t addr[16]; // holds v4 or v16 bit(int i)42 bool bit(int i) const { // get the ith bit; 0 is MSB 43 return (addr[i / 8]) & (1<<(7-i%8)); 44 } quad(int i)45 uint32_t quad(int i) const { // gets the ith quad as a 32-bit value 46 return (addr[i*4+0]<<24) | (addr[i*4+2]<<16) | (addr[i*4+1]<<8) | (addr[i*4+3]<<0); 47 } dquad(int i)48 uint64_t dquad(int i) const { // gets the first 64-bit half or the second 64-bit half 49 return (uint64_t)(quad(i*2+1))<<32 | (uint64_t)(quad(i*2)); 50 } 51 52 inline bool operator ==(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))==0; }; 53 inline bool operator <=(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))<=0; }; 54 inline bool operator > (const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))>0; }; 55 inline bool operator >=(const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(addr))>=0; }; 56 inline bool operator < (const ipaddr &b) const { return memcmp(this->addr,b.addr,sizeof(this->addr))<0; } 57 }; 58 59 class ipaddr_prn { 60 public: 61 const ipaddr& ia; 62 const sa_family_t family; ipaddr_prn(const ipaddr & ia_,sa_family_t family_)63 ipaddr_prn(const ipaddr& ia_, sa_family_t family_) 64 : ia(ia_), family(family_) 65 { } 66 }; 67 68 inline std::ostream & operator <<(std::ostream &os, const ipaddr_prn &b) { 69 char buf[INET6_ADDRSTRLEN]; 70 inet_ntop(b.family, b.ia.addr, buf, sizeof(buf)); 71 os << buf; 72 return os; 73 } 74 75 inline bool operator ==(const struct timeval &a,const struct timeval &b) { 76 return a.tv_sec==b.tv_sec && a.tv_usec==b.tv_usec; 77 } 78 79 inline bool operator <(const struct timeval &a,const struct timeval &b) { 80 return (a.tv_sec<b.tv_sec) || ((a.tv_sec==b.tv_sec) && (a.tv_sec<b.tv_sec)); 81 } 82 83 /* 84 * describes the TCP flow. 85 * No timing information; this is used as a map index. 86 */ 87 class flow_addr { 88 public: flow_addr()89 flow_addr():src(),dst(),sport(0),dport(0),family(0){ } flow_addr(const ipaddr & s,const ipaddr & d,uint16_t sp,uint16_t dp,sa_family_t f)90 flow_addr(const ipaddr &s,const ipaddr &d,uint16_t sp,uint16_t dp,sa_family_t f): 91 src(s),dst(d),sport(sp),dport(dp),family(f){ 92 } flow_addr(const flow_addr & f)93 flow_addr(const flow_addr &f):src(f.src),dst(f.dst),sport(f.sport),dport(f.dport), 94 family(f.family){ 95 } ~flow_addr()96 virtual ~flow_addr(){}; 97 ipaddr src; // Source IP address; holds v4 or v6 98 ipaddr dst; // Destination IP address; holds v4 or v6 99 uint16_t sport; // Source port number 100 uint16_t dport; // Destination port number 101 sa_family_t family; // AF_INET or AF_INET6 */ 102 hash()103 uint64_t hash() const { 104 if(family==AF_INET){ 105 return ((uint64_t)(src.quad(0))<<32 | dst.quad(0)) 106 ^ ((uint64_t)(dst.quad(0))<<32 | src.quad(0)) 107 ^ (sport<<16 | dport); 108 } else { 109 return (src.dquad(0)<<32 ^ dst.dquad(0)) 110 ^ (dst.dquad(0)<<32 ^ src.dquad(0)) 111 ^ (src.dquad(1) ^ dst.dquad(1)) 112 ^ (sport<<16 | dport); 113 } 114 } 115 116 inline bool operator ==(const flow_addr &b) const { 117 return this->src==b.src && 118 this->dst==b.dst && 119 this->sport==b.sport && 120 this->dport==b.dport && 121 this->family==b.family; 122 } 123 124 inline bool operator <(const flow_addr &b) const { 125 if (this->src < b.src) return true; 126 if (this->src > b.src) return false; 127 if (this->dst < b.dst) return true; 128 if (this->dst > b.dst) return false; 129 if (this->sport < b.sport) return true; 130 if (this->sport > b.sport) return false; 131 if (this->dport < b.dport) return true; 132 if (this->dport > b.dport) return false; 133 if (this->family < b.family) return true; 134 if (this->family > b.family) return false; 135 return false; /* they are equal! */ 136 } 137 str()138 std::string str() const { 139 std::stringstream s; 140 s << "flow[" << ipaddr_prn(src, family) << ":" << sport << "->" << ipaddr_prn(dst, family) << ":" << dport << "]"; 141 return s.str(); 142 } 143 }; 144 145 inline std::ostream & operator <<(std::ostream &os,const flow_addr &f) { 146 os << f.str(); 147 return os; 148 } 149 150 151 /* 152 * A flow is a flow_addr that has additional information regarding when it was seen 153 * and how many packets were seen. The address is used to locate the flow in the array. 154 * Notice that it contains no pointers, so it can be copied with the default operator. 155 */ 156 class flow : public flow_addr { 157 public:; 158 static void usage(); // print information on flow notation 159 static std::string filename_template; // 160 static std::string outdir; // where the output gets written flow()161 flow():id(),vlan(),mac_daddr(),mac_saddr(),tstart(),tlast(),len(),caplen(),packet_count(){}; flow(const flow_addr & flow_addr_,uint64_t id_,const be13::packet_info & pi)162 flow(const flow_addr &flow_addr_,uint64_t id_,const be13::packet_info &pi): 163 flow_addr(flow_addr_),id(id_),vlan(pi.vlan()), 164 mac_daddr(), 165 mac_saddr(), 166 tstart(pi.ts),tlast(pi.ts), 167 len(0), 168 caplen(0), 169 packet_count(0){ 170 if(pi.pcap_hdr){ 171 memcpy(mac_daddr,pi.get_ether_dhost(),sizeof(mac_daddr)); 172 memcpy(mac_saddr,pi.get_ether_shost(),sizeof(mac_saddr)); 173 } 174 } ~flow()175 virtual ~flow(){}; 176 uint64_t id; // flow_counter when this flow was created 177 int32_t vlan; // vlan interface we first observed; -1 means no vlan 178 uint8_t mac_daddr[6]; // dst mac address of first packet 179 uint8_t mac_saddr[6]; // source mac address of first packet 180 struct timeval tstart; // when first seen 181 struct timeval tlast; // when last seen 182 uint64_t len; // off-wire length 183 uint64_t caplen; // captured length 184 uint64_t packet_count; // packet count 185 186 // return a filename for a flow based on the template and the connection count 187 std::string filename(uint32_t connection_count); 188 // return a new filename for a flow based on the temlate, 189 // optionally opening the file and returning a fd if &fd is provided 190 std::string new_filename(int *fd,int flags,int mode); 191 has_mac_daddr()192 bool has_mac_daddr(){ 193 return mac_daddr[0] || mac_daddr[1] || mac_daddr[2] || mac_daddr[3] || mac_daddr[4] || mac_daddr[5]; 194 } 195 has_mac_saddr()196 bool has_mac_saddr(){ 197 return mac_saddr[0] || mac_saddr[1] || mac_saddr[2] || mac_saddr[3] || mac_saddr[4] || mac_saddr[5]; 198 } 199 }; 200 201 /* 202 * Convenience class for working with TCP headers 203 */ 204 #define PORT_HTTP 80 205 #define PORT_HTTP_ALT_0 8080 206 #define PORT_HTTP_ALT_1 8000 207 #define PORT_HTTP_ALT_2 8888 208 #define PORT_HTTP_ALT_3 81 209 #define PORT_HTTP_ALT_4 82 210 #define PORT_HTTP_ALT_5 8090 211 #define PORT_HTTPS 443 212 #define PORT_SSH 22 213 #define PORT_FTP_DATA 20 214 #define PORT_FTP_CONTROL 21 215 class tcp_header_t { 216 public: 217 #pragma GCC diagnostic ignored "-Wcast-align" tcp_header_t(const u_char * data)218 tcp_header_t(const u_char *data): 219 tcp_header((struct be13::tcphdr *)data){}; 220 #pragma GCC diagnostic warning "-Wcast-align" tcp_header_t(const tcp_header_t & b)221 tcp_header_t(const tcp_header_t &b): 222 tcp_header(b.tcp_header){} 223 tcp_header_t &operator=(const tcp_header_t &that) { 224 this->tcp_header = that.tcp_header; 225 return *this; 226 } 227 ~tcp_header_t()228 virtual ~tcp_header_t(){} 229 struct be13::tcphdr *tcp_header; tcp_header_len()230 size_t tcp_header_len(){ return tcp_header->th_off * 4; } sport()231 uint16_t sport() {return ntohs(tcp_header->th_sport);} dport()232 uint16_t dport() {return ntohs(tcp_header->th_dport);} seq()233 be13::tcp_seq seq() {return ntohl(tcp_header->th_seq);} th_fin()234 bool th_fin() {return tcp_header->th_flags & TH_FIN;} th_ack()235 bool th_ack() {return tcp_header->th_flags & TH_ACK;} th_syn()236 bool th_syn() {return tcp_header->th_flags & TH_SYN;} 237 }; 238 239 240 /* 241 * The tcpip class is a passive tcp/ip implementation. 242 * It can reconstruct flows! 243 * 244 * It includes: 245 * - the flow (as an embedded object) 246 * - Information about where the flow is written. 247 * - Information about how much of the flow has been captured. 248 * Currently flows only go in one direction and do not know about their sibling flow 249 */ 250 251 #pragma GCC diagnostic ignored "-Weffc++" 252 #pragma GCC diagnostic ignored "-Wshadow" 253 #pragma GCC diagnostic ignored "-Wall" 254 #pragma GCC diagnostic ignored "-Wmissing-noreturn" 255 256 #if defined(HAVE_BOOST_ICL_INTERVAL_HPP) && defined(HAVE_BOOST_ICL_INTERVAL_MAP_HPP) && defined(HAVE_BOOST_ICL_INTERVAL_SET_HPP) 257 #include <boost/icl/interval.hpp> 258 #include <boost/icl/interval_map.hpp> 259 #include <boost/icl/interval_set.hpp> 260 typedef boost::icl::interval_set<uint64_t> recon_set; // Boost interval set of bytes that were reconstructed. 261 #endif 262 263 #include "intrusive_list.h" 264 265 #pragma GCC diagnostic warning "-Weffc++" 266 #pragma GCC diagnostic warning "-Wshadow" 267 #pragma GCC diagnostic warning "-Wall" 268 #pragma GCC diagnostic warning "-Wmissing-noreturn" 269 270 class tcpip { 271 public: 272 /** track the direction of the flow; this is largely unused */ 273 typedef enum { 274 unknown=0, // unknown direction 275 dir_sc, // server-to-client 1 276 dir_cs // client-to-server 2 277 } dir_t; 278 279 private: 280 /*** Begin Effective C++ error suppression *** 281 *** This class does not implement assignment or copying. *** 282 ***/ 283 tcpip(const tcpip &t); 284 tcpip &operator=(const tcpip &that); 285 /*** End Effective C++ error suppression */ 286 287 public:; 288 tcpip(class tcpdemux &demux_,const flow &flow_,be13::tcp_seq isn_); /* constructor in tcpip.cpp */ 289 virtual ~tcpip(); // destructor 290 291 class tcpdemux &demux; // our demultiplexer 292 293 /* State information for the flow being reconstructed */ 294 flow myflow; /* Description of this flow */ 295 dir_t dir; // direction of flow 296 be13::tcp_seq isn; // Flow's initial sequence number 297 be13::tcp_seq nsn; // fd - expected next sequence number 298 uint32_t syn_count; // number of SYNs seen 299 uint32_t fin_count; // number of FINs received 300 uint32_t fin_size; // length of stream as determined when fin is sent 301 uint64_t pos; // fd - current position+1 (next byte in stream to be written) 302 303 /* Archiving information */ 304 std::string flow_pathname; // path where flow is saved 305 int fd; // file descriptor for file storing this flow's data 306 bool file_created; // true if file was created 307 308 /* Flow Index information - only used if flow packet/data indexing is requested --GDD */ 309 std::string flow_index_pathname; // Path for the flow index file 310 std::fstream idx_file; // File descriptor for storing the flow index data 311 312 /* Stats */ 313 recon_set *seen; // what we've seen; it must be * due to boost lossage 314 uint64_t last_byte; // last byte in flow processed 315 uint64_t last_packet_number; // for finding most recent packet written 316 uint64_t out_of_order_count; // all packets were contigious 317 uint64_t violations; // protocol violation count 318 319 /* File Acess Order */ 320 intrusive_list<tcpip>::iterator it; 321 322 /* Methods */ 323 void close_file(); // close fd 324 int open_file(); // opens save file; return -1 if failure, 0 if success 325 void print_packet(const u_char *data, uint32_t length); 326 void store_packet(const u_char *data, uint32_t length, int32_t delta,struct timeval ts); 327 void process_packet(const struct timeval &ts,const int32_t delta,const u_char *data,const uint32_t length); 328 uint32_t seen_bytes(); 329 void dump_seen(); 330 void dump_xml(class dfxml_writer *xmlreport,const std::string &xmladd); 331 static bool compare(std::string a, std::string b); 332 void sort_index(std::fstream *idx_file); 333 void sort_index(); 334 }; 335 336 /* print a tcpip data structure. Largely for debugging */ 337 inline std::ostream & operator <<(std::ostream &os,const tcpip &f) { 338 os << "tcpip[" << f.myflow 339 << " dir:" << int(f.dir) << " isn:" << f.isn << " nsn: " << f.nsn 340 << " sc:" << f.syn_count << " fc:" << f.fin_count << " fs:" << f.fin_size 341 << " pos:" << f.pos << " fd: " << f.fd << " cr:" << f.file_created 342 << " lb:" << f.last_byte << " lpn:" << f.last_packet_number << " ooc:" << f.out_of_order_count 343 << "]"; 344 if(f.fd>0) os << " ftell(" << f.fd << ")=" << lseek(f.fd,0L,SEEK_CUR); 345 return os; 346 } 347 348 /* 349 * An saved_flow is a flow for which all of the packets have been received and tcpip state 350 * has been discarded. The saved_flow allows matches against newly received packets 351 * that are not SYN or ACK packets but have data. We can see if the data matches data that's 352 * been written to disk. To do this we need ot know the filename and the ISN... 353 */ 354 355 class saved_flow { 356 public: saved_flow(tcpip * tcp)357 saved_flow(tcpip *tcp):addr(tcp->myflow), 358 saved_filename(tcp->flow_pathname), 359 isn(tcp->isn) {} 360 361 flow_addr addr; // flow address 362 std::string saved_filename; // where the flow was saved 363 be13::tcp_seq isn; // the flow's ISN ~saved_flow()364 virtual ~saved_flow(){}; 365 }; 366 367 368 #endif 369