1 /*
2  * This file is part of tcpflow by Simson Garfinkel,
3  * originally by Jeremy Elson <jelson@circlemud.org>
4  *
5  * Modified by Greg Drew to add support for creating a packet time / data index
6  * which allows mapping bytes in the flow back to their relative arrival time.
7  * This is very useful in reassembling inherently bidirectional conversations
8  * such as chat or telnet sessions.  --GDD
9  *
10  * This source code is under the GNU Public License (GPL).  See
11  * LICENSE for details.
12  *
13  */
14 
15 #include "tcpflow.h"
16 #include "tcpip.h"
17 #include "tcpdemux.h"
18 
19 #include <iostream>
20 #include <sstream>
21 #include <vector>
22 #include <string>
23 
24 #pragma GCC diagnostic ignored "-Weffc++"
25 #pragma GCC diagnostic ignored "-Wshadow"
26 
27 
28 /* Create a new tcp object.
29  *
30  * Creating a new object creates a new passive TCP/IP decoder.
31  * It will *NOT* append to a flow that is already on the disk or in memory.
32  *
33  * called from tcpdemux::create_tcpip()
34  */
tcpip(tcpdemux & demux_,const flow & flow_,be13::tcp_seq isn_)35 tcpip::tcpip(tcpdemux &demux_,const flow &flow_,be13::tcp_seq isn_):
36     demux(demux_),myflow(flow_),dir(unknown),isn(isn_),nsn(0),
37     syn_count(0),fin_count(0),fin_size(0),pos(0),
38     flow_pathname(),fd(-1),file_created(false),
39     flow_index_pathname(),idx_file(),
40     seen(new recon_set()),
41     last_byte(),
42     last_packet_number(),out_of_order_count(0),violations(0)
43 {
44 }
45 
46 
seen_bytes()47 uint32_t tcpip::seen_bytes()
48 {
49     if(seen) return seen->size();
50     return 0;
51 }
52 
dump_seen()53 void tcpip::dump_seen()
54 {
55     if(seen){
56         for(recon_set::const_iterator it = seen->begin(); it!=seen->end(); it++){
57             std::cerr << *it << ", ";
58         }
59         std::cerr << std::endl;
60     }
61 }
62 
dump_xml(class dfxml_writer * xreport,const std::string & xmladd)63 void tcpip::dump_xml(class dfxml_writer *xreport,const std::string &xmladd)
64 {
65     static const std::string fileobject_str("fileobject");
66     static const std::string filesize_str("filesize");
67     static const std::string filename_str("filename");
68     static const std::string tcpflow_str("tcpflow");
69 
70     xreport->push(fileobject_str);
71     if(flow_pathname.size()) xreport->xmlout(filename_str,flow_pathname);
72 
73     xreport->xmlout(filesize_str,last_byte);
74 
75     std::stringstream attrs;
76     attrs << "startime='" << dfxml_writer::to8601(myflow.tstart) << "' ";
77     attrs << "endtime='"  << dfxml_writer::to8601(myflow.tlast)  << "' ";
78     if(myflow.has_mac_daddr()) attrs << "mac_daddr='" << macaddr(myflow.mac_daddr) << "' ";
79     if(myflow.has_mac_saddr()) attrs << "mac_saddr='" << macaddr(myflow.mac_saddr) << "' ";
80     attrs << "family='"   << (int)myflow.family << "' ";
81     attrs << "src_ipn='"  << ipaddr_prn(myflow.src, myflow.family) << "' ";
82     attrs << "dst_ipn='"  << ipaddr_prn(myflow.dst, myflow.family) << "' ";
83     attrs << "srcport='"  << myflow.sport << "' ";
84     attrs << "dstport='"  << myflow.dport << "' ";
85     attrs << "packets='"  << myflow.packet_count << "' ";
86     if(out_of_order_count) attrs << "out_of_order_count='" << out_of_order_count << "' ";
87     if(violations)         attrs << "violations='" << violations << "' ";
88     attrs << "len='"      << myflow.len << "' ";
89     if(myflow.len != myflow.caplen) attrs << "caplen='"   << myflow.caplen << "' ";
90     xreport->xmlout(tcpflow_str,"",attrs.str(),false);
91     if(xmladd.size()>0) xreport->xmlout("",xmladd,"",false);
92     xreport->pop();
93     xreport->flush();
94 }
95 
96 
97 /**
98  * Destructor is called when flow is closed.
99  * It implements "after" processing.
100  * This should only be called from remove_flow() or remove_all_flows()
101  * when a flow is deleted.
102  */
~tcpip()103 tcpip::~tcpip()
104 {
105     assert(fd<0);                       // file must be closed
106     if(seen) delete seen;
107 }
108 
109 #pragma GCC diagnostic warning "-Weffc++"
110 #pragma GCC diagnostic warning "-Wshadow"
111 
112 
113 /****************************************************************
114  ** SAVE FILE MANAGEMENT
115  ****************************************************************
116  *
117  * Unlike the tcp/ip object, which is created once, the file can be opened, closed, and
118  * re-opened depending on the availability of file handles.
119  *
120  * Closing the file does not delete the tcp/ip object.
121  */
122 
123 
124 /* Closes the file belonging to a flow.
125  * Does not take tcpip out of flow database.
126  * Does not change pos.
127  */
close_file()128 void tcpip::close_file()
129 {
130     if (fd>=0){
131 	struct timeval times[2];
132 	times[0] = myflow.tstart;
133 	times[1] = myflow.tstart;
134 
135 	DEBUG(5) ("%s: closing file in tcpip::close_file", flow_pathname.c_str());
136 	/* close the file and remember that it's closed */
137 #if defined(HAVE_FUTIMES)
138 	if(futimes(fd,times)){
139 	    fprintf(stderr,"%s: futimes(fd=%d)\n",strerror(errno),fd);
140             abort();
141 	}
142 #elif defined(HAVE_FUTIMENS)
143 	struct timespec tstimes[2];
144 	for(int i=0;i<2;i++){
145 	    tstimes[i].tv_sec = times[i].tv_sec;
146 	    tstimes[i].tv_nsec = times[i].tv_usec * 1000;
147 	}
148 	if(futimens(fd,tstimes)){
149 	    perror("futimens(fd=%d)",fd);
150 	}
151 #endif
152 	close(fd);
153 	fd = -1;
154 	demux.open_flows.erase(this);           // we are no longer open
155     }
156     // Also close the flow_index file, if flow indexing is in use --GDD
157     if(demux.opt.output_packet_index && idx_file.is_open()){
158     	idx_file.close();
159     }
160     //std::cerr << "close_file1 " << *this << "\n";
161 }
162 
163 /*
164  * Opens the file transcript file (creating file if necessary).
165  * Called by store_packet()
166  * Does not change pos.
167  */
168 
open_file()169 int tcpip::open_file()
170 {
171 	int create_idx_needed = false;
172     if(fd<0){
173         //std::cerr << "open_file0 " << ct << " " << *this << "\n";
174         /* If we don't have a filename, create the flow */
175         if(flow_pathname.size()==0) {
176             flow_pathname = myflow.new_filename(&fd,O_RDWR|O_BINARY|O_CREAT|O_EXCL,0666);
177             file_created = true;		// remember we made it
178             create_idx_needed = true;	// We created a new stream, so we need to create a new flow file. --GDD
179             DEBUG(5) ("%s: created new file",flow_pathname.c_str());
180         } else {
181             /* open an existing flow */
182             fd = demux.retrying_open(flow_pathname,O_RDWR | O_BINARY | O_CREAT,0666);
183             lseek(fd,pos,SEEK_SET);
184             DEBUG(5) ("%s: opening existing file", flow_pathname.c_str());
185         }
186 
187         /* If the file isn't open at this point, there's a problem */
188         if (fd < 0 ) {
189             /* we had some problem opening the file -- set FINISHED so we
190              * don't keep trying over and over again to reopen it
191              */
192             perror(flow_pathname.c_str());
193             return -1;
194         }
195         /* Remember that we have this open */
196         demux.open_flows.push_back(this);
197         if(demux.open_flows.size() > demux.max_open_flows) demux.max_open_flows = demux.open_flows.size();
198         //std::cerr << "open_file1 " << *this << "\n";
199     }
200     if(demux.opt.output_packet_index){
201     	//Open the file for the flow index.  We don't do this if the flow file could not be
202     	//	opened.  The file must be opened for append, in case this is a reopen.  The filename
203     	//	standard is the flow name followed by ".findx", which google currently says does not
204     	//	conflict with anything major.
205     	flow_index_pathname = flow_pathname + ".findx";
206     	DEBUG(10)("opening index file: %s",flow_index_pathname.c_str());
207     	if(create_idx_needed){
208     		//New flow file, even if there was an old one laying around --GDD
209     		idx_file.open(flow_index_pathname.c_str(),std::ios::trunc|std::ios::in|std::ios::out);
210     	}else{
211     		//Use existing flow file --GDD
212     		idx_file.open(flow_index_pathname.c_str(),std::ios::ate|std::ios::in|std::ios::out);
213     	}
214     	if(idx_file.bad()){
215     		perror(flow_index_pathname.c_str());
216     		// Be nice and be sure the flow has been closed in the demultiplexer.
217     		// demux.close_tcpip_fd(this);  Need to fix this.  Also, when called, it will
218     		// have to differentiate the fact that the open fd cound only needs to be
219     		// decremented by one and not by 2.--GDD
220     		return -1;
221     	}
222 
223     }
224     return 0;
225 }
226 
227 
228 
229 /*************************************************************************/
230 
231 /* print the contents of this packet to the console.
232  * This is nice for immediate satisfaction, but it can't handle
233  * out of order packets, etc.
234  */
print_packet(const u_char * data,uint32_t length)235 void tcpip::print_packet(const u_char *data, uint32_t length)
236 {
237     /* green, blue, read */
238     const char *color[3] = { "\033[0;32m", "\033[0;34m", "\033[0;31m" };
239 
240     if(demux.opt.max_bytes_per_flow>=0){
241         uint64_t max_bytes_per_flow = (uint64_t)demux.opt.max_bytes_per_flow;
242 
243 	if(last_byte > max_bytes_per_flow) return; /* too much has been printed */
244 	if(length > max_bytes_per_flow - last_byte){
245 	    length = max_bytes_per_flow - last_byte; /* can only output this much */
246 	    if(length==0) return;
247 	}
248     }
249 
250 #ifdef HAVE_PTHREAD
251     if(semlock){
252 	if(sem_wait(semlock)){
253 	    fprintf(stderr,"%s: attempt to acquire semaphore failed: %s\n",progname,strerror(errno));
254 	    exit(1);
255 	}
256     }
257 #endif
258 
259     if (demux.opt.use_color) fputs(dir==dir_cs ? color[1] : color[2], stdout);
260     if (demux.opt.suppress_header == 0){
261         if(flow_pathname.size()==0) flow_pathname = myflow.filename(0);
262         printf("%s: ", flow_pathname.c_str());
263         if(demux.opt.output_hex) putchar('\n');
264     }
265 
266     size_t written = 0;
267     if(demux.opt.output_hex){
268         const size_t bytes_per_line = 32;
269         size_t max_spaces = 0;
270         for(u_int i=0;i<length;i+=bytes_per_line){
271             size_t spaces=0;
272 
273             /* Print the offset */
274             char b[64];
275             size_t count = snprintf(b,sizeof(b),"%04x: ",(int)i);
276             if(fwrite(b,1,count,stdout)!=count){
277 	      perror("fwrite");
278 	    }
279             spaces += count;
280 
281             /* Print the hext bytes */
282             for(size_t j=0;j<bytes_per_line && i+j<length ;j++){
283                 unsigned char ch = data[i+j];
284                 fprintf(stdout,"%02x",ch);  spaces += 2;
285                 if(j%2==1){
286                     fputc(' ',stdout);
287                     spaces += 1;
288                 }
289             }
290             /* space out to where the ASCII region is */
291             if(spaces>max_spaces) max_spaces=spaces;
292             for(;spaces<max_spaces;spaces++){
293                 fputc(' ',stdout);
294             }
295             putchar(' ');
296             /* Print the ascii */
297             for(size_t j=0;j<bytes_per_line && i+j<length;j++){
298                 unsigned char ch = data[i+j];
299                 if(ch>=' ' && ch<='~') fputc(ch,stdout);
300                 else fputc('.',stdout);
301             }
302             fputc('\n',stdout);
303         }
304         written = length;               // just fake it.
305     }
306     else if(demux.opt.output_strip_nonprint){
307 	for(const u_char *cc = data;cc<data+length;cc++){
308 	    if(isprint(*cc) || (*cc=='\n') || (*cc=='\r')){
309                 int ret = fputc(*cc,stdout);
310                 if(ret==EOF){
311                     std::cerr << "EOF on write to stdout\n";
312                     exit(1);
313 
314                 }
315 	    }
316 	    else fputc('.',stdout);
317             written += 1; // treat even unprintable characters as "written". It
318                           // really means "processed"
319 	}
320     }
321     else {
322 	written = fwrite(data,1,length,stdout);
323         if(length != written) std::cerr << "\nwrite error to stdout (" << length << "!=" << written << ") \n";
324     }
325 
326     last_byte += length;
327 
328     if (demux.opt.use_color) printf("\033[0m");
329 
330     if (! demux.opt.console_output_nonewline) putchar('\n');
331     fflush(stdout);
332 
333 #ifdef HAVE_PTHREAD
334     if(semlock){
335 	if(sem_post(semlock)){
336 	    fprintf(stderr,"%s: attempt to post semaphore failed: %s\n",progname,strerror(errno));
337 	    exit(1);
338 	}
339     }
340 #endif
341 }
342 
343 /*
344  * extend_file_and_insert():
345  * A handy function for inserting in the middle or beginning of a file.
346  *
347  * Based on:
348  * http://stackoverflow.com/questions/10467711/c-write-in-the-middle-of-a-binary-file-without-overwriting-any-existing-content
349  */
350 
shift_file(int fd,size_t inslen)351 static int shift_file(int fd, size_t inslen)
352 {
353     enum { BUFFERSIZE = 64 * 1024 };
354     char buffer[BUFFERSIZE];
355     struct stat sb;
356 
357     DEBUG(100)("shift_file(%d,%d)",fd,(int)inslen);
358 
359     if (fstat(fd, &sb) != 0) return -1;
360 
361     /* Move data after offset up by inslen bytes */
362     size_t bytes_to_move = sb.st_size;
363     off_t read_end_offset = sb.st_size;
364     while (bytes_to_move != 0) {
365 	ssize_t bytes_this_time = bytes_to_move < BUFFERSIZE ? bytes_to_move : BUFFERSIZE ;
366 	ssize_t rd_off = read_end_offset - bytes_this_time;
367 	ssize_t wr_off = rd_off + inslen;
368 	lseek(fd, rd_off, SEEK_SET);
369 	if (read(fd, buffer, bytes_this_time) != bytes_this_time)
370 	    return -1;
371 	lseek(fd, wr_off, SEEK_SET);
372 	if (write(fd, buffer, bytes_this_time) != bytes_this_time)
373 	    return -1;
374 	bytes_to_move -= bytes_this_time;
375     }
376     return 0;
377 }
378 
379 #pragma GCC diagnostic ignored "-Weffc++"
update_seen(recon_set * seen,uint64_t pos,uint32_t length)380 void update_seen(recon_set *seen,uint64_t pos,uint32_t length)
381 {
382     if(seen){
383         (*seen) += boost::icl::discrete_interval<uint64_t>::closed(pos,pos+length-1);
384     }
385 }
386 
387 /* store the contents of this packet to its place in its file
388  * This has to handle out-of-order packets as well as writes
389  * past the 4GiB boundary.
390  *
391  * 2012-10-24 Originally this code simply computed the 32-bit offset
392  * from the beginning of the file using the isn. The new version tracks
393  * nsn (the expected next sequence number for the open file).
394  *
395  * A relative seek before the beginning of the file means that we need
396  * to insert.  A relative seek more than max_seek means that we have a
397  * different flow that needs to be separately handled.
398  *
399  * called from tcpdemux::process_tcp_packet()
400  */
store_packet(const u_char * data,uint32_t length,int32_t delta,struct timeval ts)401 void tcpip::store_packet(const u_char *data, uint32_t length, int32_t delta,struct timeval ts)
402 {
403     if(length==0) return;               // no need to do anything
404 
405     uint32_t insert_bytes=0;
406     uint64_t offset = pos+delta;	// where the data will go in absolute byte positions (first byte is pos=0)
407 
408     if((int64_t)offset < 0){
409 	/* We got bytes before the beginning of the TCP connection.
410 	 * Either this is a protocol violation,
411 	 * or else we never saw a SYN and we got the ISN wrong.
412 	 */
413 	if(syn_count>0){
414 	    DEBUG(2)("packet received with offset %" PRId64 "; ignoring",offset);
415 	    violations++;
416 	    return;
417 	}
418 	insert_bytes = -offset;		// open up this much space
419 	offset = 0;			// and write the data here
420     }
421 
422     /* reduce length to write if it goes beyond the number of bytes per flow,
423      * but remember to seek out to the actual position after the truncated write...
424      */
425     uint32_t wlength = length;		// length to write
426     if (demux.opt.max_bytes_per_flow >= 0){
427         uint64_t max_bytes_per_flow = (uint64_t)demux.opt.max_bytes_per_flow;
428 
429 	if(offset >= max_bytes_per_flow){
430 	    wlength = 0;
431 	}
432 	if(offset < max_bytes_per_flow &&  offset+length > max_bytes_per_flow){
433 	    DEBUG(2) ("packet truncated by max_bytes_per_flow on %s", flow_pathname.c_str());
434 	    wlength = max_bytes_per_flow - offset;
435 	}
436     }
437 
438     /* if we don't have a file open for this flow, try to open it.
439      * return if the open fails.  Note that we don't have to explicitly
440      * save the return value because open_tcpfile() puts the file pointer
441      * into the structure for us.
442      */
443     if (fd < 0) {
444 	if (open_file()) {
445 	    DEBUG(1)("unable to open TCP file %s  fd=%d  wlength=%d",
446                      flow_pathname.c_str(),fd,(int)wlength);
447 	    return;
448 	}
449     }
450 
451     /* Shift the file now if we were going shift it */
452 
453     if(insert_bytes>0){
454 	if(fd>=0) shift_file(fd,insert_bytes);
455 	isn -= insert_bytes;		// it's really earlier
456 	lseek(fd,(off_t)0,SEEK_SET);	// put at the beginning
457 	pos = 0;
458 	nsn = isn+1;
459 	out_of_order_count++;
460 	DEBUG(25)("%s: insert(0,%d); lseek(%d,0,SEEK_SET) out_of_order_count=%" PRId64,
461 		  flow_pathname.c_str(), insert_bytes,
462 		  fd,out_of_order_count);
463 
464         /* TK: If we have seen packets, everything in the recon set needs to be shifted as well.*/
465         if(seen){
466             delete seen;
467             seen = 0;
468         }
469     }
470 
471     /* if we're not at the correct point in the file, seek there */
472     if (offset != pos) {
473         /* Check for a keepalive */
474         if(delta == -1 && length == 1) {
475             DEBUG(25)("%s: RFC1122 keepalive detected and ignored",flow_pathname.c_str());
476             return;
477         }
478 
479 	if(fd>=0) lseek(fd,(off_t)delta,SEEK_CUR);
480 	if(delta<0) out_of_order_count++; // only increment for backwards seeks
481 	DEBUG(25)("%s: lseek(%d,%d,SEEK_CUR) offset=%" PRId64 " pos=%" PRId64 " out_of_order_count=%" PRId64,
482 		  flow_pathname.c_str(), fd,(int)delta,offset,pos,out_of_order_count);
483 	pos += delta;			// where we are now
484 	nsn += delta;			// what we expect the nsn to be now
485     }
486 
487     /* write the data into the file */
488     DEBUG(25) ("%s: %s write %ld bytes @%" PRId64,
489                flow_pathname.c_str(),
490                fd>=0 ? "will" : "won't",
491                (long) wlength, offset);
492 
493     if(fd>=0){
494       if ((uint32_t)write(fd,data, wlength) != wlength) {
495 	    DEBUG(1) ("write to %s failed: ", flow_pathname.c_str());
496 	    if (debug >= 1) perror("");
497 	}
498 	// Write to the index file if needed.  Note, index file is sorted before close, so no need to jump around --GDD
499 		if (demux.opt.output_packet_index && idx_file.is_open()) {
500 			idx_file << offset << "|" << ts.tv_sec << "." << std::setw(6) << std::setfill('0') << ts.tv_usec << "|"
501 					<< wlength << "\n";
502 			if (idx_file.bad()){
503 				DEBUG(1)("write to index file %s failed: ",flow_index_pathname.c_str());
504 				if(debug >= 1){
505 					perror("");
506 				}
507 			}
508 		}
509 	if(wlength != length){
510 	    off_t p = lseek(fd,length-wlength,SEEK_CUR); // seek out the space we didn't write
511             DEBUG(100)("   lseek(%" PRId64 ",SEEK_CUR)=%" PRId64,(int64_t)(length-wlength),(int64_t)p);
512 	}
513     }
514 
515     /* Update the database of bytes that we've seen */
516     if(seen) update_seen(seen,pos,length);
517 
518     /* Update the position in the file and the next expected sequence number */
519     pos += length;
520     nsn += length;			// expected next sequence number
521 
522     if(pos>last_byte) last_byte = pos;
523 
524     if(debug>=100){
525         uint64_t rpos = lseek(fd,(off_t)0,SEEK_CUR);
526         DEBUG(100)("    pos=%" PRId64 "  lseek(fd,0,SEEK_CUR)=%" PRId64,pos,rpos);
527         assert(pos==rpos);
528     }
529 
530 #ifdef DEBUG_REOPEN_LOGIC
531     /* For debugging, force this connection closed */
532     demux.close_tcpip_fd(this);
533 #endif
534 }
535 
536 /*
537  * Compare two index strings and return the result.  Called by
538  * the vector::sort in sort_index.
539  * --GDD
540  */
compare(std::string a,std::string b)541 bool tcpip::compare(std::string a, std::string b){
542 	std::stringstream ss_a(a),ss_b(b);
543 	long a_l,b_l;
544 
545 	ss_a >> a_l;
546 	ss_b >> b_l;
547 	return a_l < b_l;
548 }
549 
550 /*
551  * Sort an index file (presumably from this object) if file indexing is
552  * turned on and the file exists.  Index files may be out of order due
553  * to the arrival of out of order packets.  It is cheaper to reorder them
554  * one time at the end of processing than it is to continually keep them
555  * in order.
556  * --GDD
557  */
sort_index(std::fstream * ix_file)558 void tcpip::sort_index(std::fstream *ix_file) {
559 
560 	std::vector<std::string> idx;
561 	std::string line;
562 
563 	if (demux.opt.output_packet_index) {
564 		if (!(idx_file.good() && idx_file.is_open())) {
565 			DEBUG(5)("Skipping index file sort.  Unusual behavior.\n");
566 			return; //Nothing to do
567 		}
568 		//Make sure we are at the beginning.
569 		ix_file->clear();
570 		ix_file->seekg(0);
571 		do {
572 			*ix_file >> line;
573 			if (!ix_file->eof()) {
574 				idx.push_back(line);
575 			}
576 		} while (ix_file->good());
577 		std::sort(idx.begin(), idx.end(), &tcpip::compare);
578 		ix_file->clear();
579 		ix_file->seekg(0);
580 		for (std::vector<std::string>::iterator s = idx.begin(); s != idx.end();
581 				s++) {
582 			*ix_file << *s << "\n";
583 		}
584 	}
585 }
586 
587 /*
588  * Convenience function to cause the local index file to be sorted.
589  * --GDD
590  */
sort_index()591 void tcpip::sort_index(){
592 	tcpip::sort_index(&(this->idx_file));
593 }
594 
595 #pragma GCC diagnostic ignored "-Weffc++"
596 #pragma GCC diagnostic ignored "-Wshadow"
597 
598 /* Note --- Turn off warning so that creating the seen() map doesn't throw an error */
599 //#pragma GCC diagnostic ignored "-Weffc++"
600