1 /*
2 * This file is part of tcpflow by Simson Garfinkel,
3 * originally by Jeremy Elson <jelson@circlemud.org>
4 *
5 * Modified by Greg Drew to add support for creating a packet time / data index
6 * which allows mapping bytes in the flow back to their relative arrival time.
7 * This is very useful in reassembling inherently bidirectional conversations
8 * such as chat or telnet sessions. --GDD
9 *
10 * This source code is under the GNU Public License (GPL). See
11 * LICENSE for details.
12 *
13 */
14
15 #include "tcpflow.h"
16 #include "tcpip.h"
17 #include "tcpdemux.h"
18
19 #include <iostream>
20 #include <sstream>
21 #include <vector>
22 #include <string>
23
24 #pragma GCC diagnostic ignored "-Weffc++"
25 #pragma GCC diagnostic ignored "-Wshadow"
26
27
28 /* Create a new tcp object.
29 *
30 * Creating a new object creates a new passive TCP/IP decoder.
31 * It will *NOT* append to a flow that is already on the disk or in memory.
32 *
33 * called from tcpdemux::create_tcpip()
34 */
tcpip(tcpdemux & demux_,const flow & flow_,be13::tcp_seq isn_)35 tcpip::tcpip(tcpdemux &demux_,const flow &flow_,be13::tcp_seq isn_):
36 demux(demux_),myflow(flow_),dir(unknown),isn(isn_),nsn(0),
37 syn_count(0),fin_count(0),fin_size(0),pos(0),
38 flow_pathname(),fd(-1),file_created(false),
39 flow_index_pathname(),idx_file(),
40 seen(new recon_set()),
41 last_byte(),
42 last_packet_number(),out_of_order_count(0),violations(0)
43 {
44 }
45
46
seen_bytes()47 uint32_t tcpip::seen_bytes()
48 {
49 if(seen) return seen->size();
50 return 0;
51 }
52
dump_seen()53 void tcpip::dump_seen()
54 {
55 if(seen){
56 for(recon_set::const_iterator it = seen->begin(); it!=seen->end(); it++){
57 std::cerr << *it << ", ";
58 }
59 std::cerr << std::endl;
60 }
61 }
62
dump_xml(class dfxml_writer * xreport,const std::string & xmladd)63 void tcpip::dump_xml(class dfxml_writer *xreport,const std::string &xmladd)
64 {
65 static const std::string fileobject_str("fileobject");
66 static const std::string filesize_str("filesize");
67 static const std::string filename_str("filename");
68 static const std::string tcpflow_str("tcpflow");
69
70 xreport->push(fileobject_str);
71 if(flow_pathname.size()) xreport->xmlout(filename_str,flow_pathname);
72
73 xreport->xmlout(filesize_str,last_byte);
74
75 std::stringstream attrs;
76 attrs << "startime='" << dfxml_writer::to8601(myflow.tstart) << "' ";
77 attrs << "endtime='" << dfxml_writer::to8601(myflow.tlast) << "' ";
78 if(myflow.has_mac_daddr()) attrs << "mac_daddr='" << macaddr(myflow.mac_daddr) << "' ";
79 if(myflow.has_mac_saddr()) attrs << "mac_saddr='" << macaddr(myflow.mac_saddr) << "' ";
80 attrs << "family='" << (int)myflow.family << "' ";
81 attrs << "src_ipn='" << ipaddr_prn(myflow.src, myflow.family) << "' ";
82 attrs << "dst_ipn='" << ipaddr_prn(myflow.dst, myflow.family) << "' ";
83 attrs << "srcport='" << myflow.sport << "' ";
84 attrs << "dstport='" << myflow.dport << "' ";
85 attrs << "packets='" << myflow.packet_count << "' ";
86 if(out_of_order_count) attrs << "out_of_order_count='" << out_of_order_count << "' ";
87 if(violations) attrs << "violations='" << violations << "' ";
88 attrs << "len='" << myflow.len << "' ";
89 if(myflow.len != myflow.caplen) attrs << "caplen='" << myflow.caplen << "' ";
90 xreport->xmlout(tcpflow_str,"",attrs.str(),false);
91 if(xmladd.size()>0) xreport->xmlout("",xmladd,"",false);
92 xreport->pop();
93 xreport->flush();
94 }
95
96
97 /**
98 * Destructor is called when flow is closed.
99 * It implements "after" processing.
100 * This should only be called from remove_flow() or remove_all_flows()
101 * when a flow is deleted.
102 */
~tcpip()103 tcpip::~tcpip()
104 {
105 assert(fd<0); // file must be closed
106 if(seen) delete seen;
107 }
108
109 #pragma GCC diagnostic warning "-Weffc++"
110 #pragma GCC diagnostic warning "-Wshadow"
111
112
113 /****************************************************************
114 ** SAVE FILE MANAGEMENT
115 ****************************************************************
116 *
117 * Unlike the tcp/ip object, which is created once, the file can be opened, closed, and
118 * re-opened depending on the availability of file handles.
119 *
120 * Closing the file does not delete the tcp/ip object.
121 */
122
123
124 /* Closes the file belonging to a flow.
125 * Does not take tcpip out of flow database.
126 * Does not change pos.
127 */
close_file()128 void tcpip::close_file()
129 {
130 if (fd>=0){
131 struct timeval times[2];
132 times[0] = myflow.tstart;
133 times[1] = myflow.tstart;
134
135 DEBUG(5) ("%s: closing file in tcpip::close_file", flow_pathname.c_str());
136 /* close the file and remember that it's closed */
137 #if defined(HAVE_FUTIMES)
138 if(futimes(fd,times)){
139 fprintf(stderr,"%s: futimes(fd=%d)\n",strerror(errno),fd);
140 abort();
141 }
142 #elif defined(HAVE_FUTIMENS)
143 struct timespec tstimes[2];
144 for(int i=0;i<2;i++){
145 tstimes[i].tv_sec = times[i].tv_sec;
146 tstimes[i].tv_nsec = times[i].tv_usec * 1000;
147 }
148 if(futimens(fd,tstimes)){
149 perror("futimens(fd=%d)",fd);
150 }
151 #endif
152 close(fd);
153 fd = -1;
154 demux.open_flows.erase(this); // we are no longer open
155 }
156 // Also close the flow_index file, if flow indexing is in use --GDD
157 if(demux.opt.output_packet_index && idx_file.is_open()){
158 idx_file.close();
159 }
160 //std::cerr << "close_file1 " << *this << "\n";
161 }
162
163 /*
164 * Opens the file transcript file (creating file if necessary).
165 * Called by store_packet()
166 * Does not change pos.
167 */
168
open_file()169 int tcpip::open_file()
170 {
171 int create_idx_needed = false;
172 if(fd<0){
173 //std::cerr << "open_file0 " << ct << " " << *this << "\n";
174 /* If we don't have a filename, create the flow */
175 if(flow_pathname.size()==0) {
176 flow_pathname = myflow.new_filename(&fd,O_RDWR|O_BINARY|O_CREAT|O_EXCL,0666);
177 file_created = true; // remember we made it
178 create_idx_needed = true; // We created a new stream, so we need to create a new flow file. --GDD
179 DEBUG(5) ("%s: created new file",flow_pathname.c_str());
180 } else {
181 /* open an existing flow */
182 fd = demux.retrying_open(flow_pathname,O_RDWR | O_BINARY | O_CREAT,0666);
183 lseek(fd,pos,SEEK_SET);
184 DEBUG(5) ("%s: opening existing file", flow_pathname.c_str());
185 }
186
187 /* If the file isn't open at this point, there's a problem */
188 if (fd < 0 ) {
189 /* we had some problem opening the file -- set FINISHED so we
190 * don't keep trying over and over again to reopen it
191 */
192 perror(flow_pathname.c_str());
193 return -1;
194 }
195 /* Remember that we have this open */
196 demux.open_flows.push_back(this);
197 if(demux.open_flows.size() > demux.max_open_flows) demux.max_open_flows = demux.open_flows.size();
198 //std::cerr << "open_file1 " << *this << "\n";
199 }
200 if(demux.opt.output_packet_index){
201 //Open the file for the flow index. We don't do this if the flow file could not be
202 // opened. The file must be opened for append, in case this is a reopen. The filename
203 // standard is the flow name followed by ".findx", which google currently says does not
204 // conflict with anything major.
205 flow_index_pathname = flow_pathname + ".findx";
206 DEBUG(10)("opening index file: %s",flow_index_pathname.c_str());
207 if(create_idx_needed){
208 //New flow file, even if there was an old one laying around --GDD
209 idx_file.open(flow_index_pathname.c_str(),std::ios::trunc|std::ios::in|std::ios::out);
210 }else{
211 //Use existing flow file --GDD
212 idx_file.open(flow_index_pathname.c_str(),std::ios::ate|std::ios::in|std::ios::out);
213 }
214 if(idx_file.bad()){
215 perror(flow_index_pathname.c_str());
216 // Be nice and be sure the flow has been closed in the demultiplexer.
217 // demux.close_tcpip_fd(this); Need to fix this. Also, when called, it will
218 // have to differentiate the fact that the open fd cound only needs to be
219 // decremented by one and not by 2.--GDD
220 return -1;
221 }
222
223 }
224 return 0;
225 }
226
227
228
229 /*************************************************************************/
230
231 /* print the contents of this packet to the console.
232 * This is nice for immediate satisfaction, but it can't handle
233 * out of order packets, etc.
234 */
print_packet(const u_char * data,uint32_t length)235 void tcpip::print_packet(const u_char *data, uint32_t length)
236 {
237 /* green, blue, read */
238 const char *color[3] = { "\033[0;32m", "\033[0;34m", "\033[0;31m" };
239
240 if(demux.opt.max_bytes_per_flow>=0){
241 uint64_t max_bytes_per_flow = (uint64_t)demux.opt.max_bytes_per_flow;
242
243 if(last_byte > max_bytes_per_flow) return; /* too much has been printed */
244 if(length > max_bytes_per_flow - last_byte){
245 length = max_bytes_per_flow - last_byte; /* can only output this much */
246 if(length==0) return;
247 }
248 }
249
250 #ifdef HAVE_PTHREAD
251 if(semlock){
252 if(sem_wait(semlock)){
253 fprintf(stderr,"%s: attempt to acquire semaphore failed: %s\n",progname,strerror(errno));
254 exit(1);
255 }
256 }
257 #endif
258
259 if (demux.opt.use_color) fputs(dir==dir_cs ? color[1] : color[2], stdout);
260 if (demux.opt.suppress_header == 0){
261 if(flow_pathname.size()==0) flow_pathname = myflow.filename(0);
262 printf("%s: ", flow_pathname.c_str());
263 if(demux.opt.output_hex) putchar('\n');
264 }
265
266 size_t written = 0;
267 if(demux.opt.output_hex){
268 const size_t bytes_per_line = 32;
269 size_t max_spaces = 0;
270 for(u_int i=0;i<length;i+=bytes_per_line){
271 size_t spaces=0;
272
273 /* Print the offset */
274 char b[64];
275 size_t count = snprintf(b,sizeof(b),"%04x: ",(int)i);
276 if(fwrite(b,1,count,stdout)!=count){
277 perror("fwrite");
278 }
279 spaces += count;
280
281 /* Print the hext bytes */
282 for(size_t j=0;j<bytes_per_line && i+j<length ;j++){
283 unsigned char ch = data[i+j];
284 fprintf(stdout,"%02x",ch); spaces += 2;
285 if(j%2==1){
286 fputc(' ',stdout);
287 spaces += 1;
288 }
289 }
290 /* space out to where the ASCII region is */
291 if(spaces>max_spaces) max_spaces=spaces;
292 for(;spaces<max_spaces;spaces++){
293 fputc(' ',stdout);
294 }
295 putchar(' ');
296 /* Print the ascii */
297 for(size_t j=0;j<bytes_per_line && i+j<length;j++){
298 unsigned char ch = data[i+j];
299 if(ch>=' ' && ch<='~') fputc(ch,stdout);
300 else fputc('.',stdout);
301 }
302 fputc('\n',stdout);
303 }
304 written = length; // just fake it.
305 }
306 else if(demux.opt.output_strip_nonprint){
307 for(const u_char *cc = data;cc<data+length;cc++){
308 if(isprint(*cc) || (*cc=='\n') || (*cc=='\r')){
309 int ret = fputc(*cc,stdout);
310 if(ret==EOF){
311 std::cerr << "EOF on write to stdout\n";
312 exit(1);
313
314 }
315 }
316 else fputc('.',stdout);
317 written += 1; // treat even unprintable characters as "written". It
318 // really means "processed"
319 }
320 }
321 else {
322 written = fwrite(data,1,length,stdout);
323 if(length != written) std::cerr << "\nwrite error to stdout (" << length << "!=" << written << ") \n";
324 }
325
326 last_byte += length;
327
328 if (demux.opt.use_color) printf("\033[0m");
329
330 if (! demux.opt.console_output_nonewline) putchar('\n');
331 fflush(stdout);
332
333 #ifdef HAVE_PTHREAD
334 if(semlock){
335 if(sem_post(semlock)){
336 fprintf(stderr,"%s: attempt to post semaphore failed: %s\n",progname,strerror(errno));
337 exit(1);
338 }
339 }
340 #endif
341 }
342
343 /*
344 * extend_file_and_insert():
345 * A handy function for inserting in the middle or beginning of a file.
346 *
347 * Based on:
348 * http://stackoverflow.com/questions/10467711/c-write-in-the-middle-of-a-binary-file-without-overwriting-any-existing-content
349 */
350
shift_file(int fd,size_t inslen)351 static int shift_file(int fd, size_t inslen)
352 {
353 enum { BUFFERSIZE = 64 * 1024 };
354 char buffer[BUFFERSIZE];
355 struct stat sb;
356
357 DEBUG(100)("shift_file(%d,%d)",fd,(int)inslen);
358
359 if (fstat(fd, &sb) != 0) return -1;
360
361 /* Move data after offset up by inslen bytes */
362 size_t bytes_to_move = sb.st_size;
363 off_t read_end_offset = sb.st_size;
364 while (bytes_to_move != 0) {
365 ssize_t bytes_this_time = bytes_to_move < BUFFERSIZE ? bytes_to_move : BUFFERSIZE ;
366 ssize_t rd_off = read_end_offset - bytes_this_time;
367 ssize_t wr_off = rd_off + inslen;
368 lseek(fd, rd_off, SEEK_SET);
369 if (read(fd, buffer, bytes_this_time) != bytes_this_time)
370 return -1;
371 lseek(fd, wr_off, SEEK_SET);
372 if (write(fd, buffer, bytes_this_time) != bytes_this_time)
373 return -1;
374 bytes_to_move -= bytes_this_time;
375 }
376 return 0;
377 }
378
379 #pragma GCC diagnostic ignored "-Weffc++"
update_seen(recon_set * seen,uint64_t pos,uint32_t length)380 void update_seen(recon_set *seen,uint64_t pos,uint32_t length)
381 {
382 if(seen){
383 (*seen) += boost::icl::discrete_interval<uint64_t>::closed(pos,pos+length-1);
384 }
385 }
386
387 /* store the contents of this packet to its place in its file
388 * This has to handle out-of-order packets as well as writes
389 * past the 4GiB boundary.
390 *
391 * 2012-10-24 Originally this code simply computed the 32-bit offset
392 * from the beginning of the file using the isn. The new version tracks
393 * nsn (the expected next sequence number for the open file).
394 *
395 * A relative seek before the beginning of the file means that we need
396 * to insert. A relative seek more than max_seek means that we have a
397 * different flow that needs to be separately handled.
398 *
399 * called from tcpdemux::process_tcp_packet()
400 */
store_packet(const u_char * data,uint32_t length,int32_t delta,struct timeval ts)401 void tcpip::store_packet(const u_char *data, uint32_t length, int32_t delta,struct timeval ts)
402 {
403 if(length==0) return; // no need to do anything
404
405 uint32_t insert_bytes=0;
406 uint64_t offset = pos+delta; // where the data will go in absolute byte positions (first byte is pos=0)
407
408 if((int64_t)offset < 0){
409 /* We got bytes before the beginning of the TCP connection.
410 * Either this is a protocol violation,
411 * or else we never saw a SYN and we got the ISN wrong.
412 */
413 if(syn_count>0){
414 DEBUG(2)("packet received with offset %" PRId64 "; ignoring",offset);
415 violations++;
416 return;
417 }
418 insert_bytes = -offset; // open up this much space
419 offset = 0; // and write the data here
420 }
421
422 /* reduce length to write if it goes beyond the number of bytes per flow,
423 * but remember to seek out to the actual position after the truncated write...
424 */
425 uint32_t wlength = length; // length to write
426 if (demux.opt.max_bytes_per_flow >= 0){
427 uint64_t max_bytes_per_flow = (uint64_t)demux.opt.max_bytes_per_flow;
428
429 if(offset >= max_bytes_per_flow){
430 wlength = 0;
431 }
432 if(offset < max_bytes_per_flow && offset+length > max_bytes_per_flow){
433 DEBUG(2) ("packet truncated by max_bytes_per_flow on %s", flow_pathname.c_str());
434 wlength = max_bytes_per_flow - offset;
435 }
436 }
437
438 /* if we don't have a file open for this flow, try to open it.
439 * return if the open fails. Note that we don't have to explicitly
440 * save the return value because open_tcpfile() puts the file pointer
441 * into the structure for us.
442 */
443 if (fd < 0) {
444 if (open_file()) {
445 DEBUG(1)("unable to open TCP file %s fd=%d wlength=%d",
446 flow_pathname.c_str(),fd,(int)wlength);
447 return;
448 }
449 }
450
451 /* Shift the file now if we were going shift it */
452
453 if(insert_bytes>0){
454 if(fd>=0) shift_file(fd,insert_bytes);
455 isn -= insert_bytes; // it's really earlier
456 lseek(fd,(off_t)0,SEEK_SET); // put at the beginning
457 pos = 0;
458 nsn = isn+1;
459 out_of_order_count++;
460 DEBUG(25)("%s: insert(0,%d); lseek(%d,0,SEEK_SET) out_of_order_count=%" PRId64,
461 flow_pathname.c_str(), insert_bytes,
462 fd,out_of_order_count);
463
464 /* TK: If we have seen packets, everything in the recon set needs to be shifted as well.*/
465 if(seen){
466 delete seen;
467 seen = 0;
468 }
469 }
470
471 /* if we're not at the correct point in the file, seek there */
472 if (offset != pos) {
473 /* Check for a keepalive */
474 if(delta == -1 && length == 1) {
475 DEBUG(25)("%s: RFC1122 keepalive detected and ignored",flow_pathname.c_str());
476 return;
477 }
478
479 if(fd>=0) lseek(fd,(off_t)delta,SEEK_CUR);
480 if(delta<0) out_of_order_count++; // only increment for backwards seeks
481 DEBUG(25)("%s: lseek(%d,%d,SEEK_CUR) offset=%" PRId64 " pos=%" PRId64 " out_of_order_count=%" PRId64,
482 flow_pathname.c_str(), fd,(int)delta,offset,pos,out_of_order_count);
483 pos += delta; // where we are now
484 nsn += delta; // what we expect the nsn to be now
485 }
486
487 /* write the data into the file */
488 DEBUG(25) ("%s: %s write %ld bytes @%" PRId64,
489 flow_pathname.c_str(),
490 fd>=0 ? "will" : "won't",
491 (long) wlength, offset);
492
493 if(fd>=0){
494 if ((uint32_t)write(fd,data, wlength) != wlength) {
495 DEBUG(1) ("write to %s failed: ", flow_pathname.c_str());
496 if (debug >= 1) perror("");
497 }
498 // Write to the index file if needed. Note, index file is sorted before close, so no need to jump around --GDD
499 if (demux.opt.output_packet_index && idx_file.is_open()) {
500 idx_file << offset << "|" << ts.tv_sec << "." << std::setw(6) << std::setfill('0') << ts.tv_usec << "|"
501 << wlength << "\n";
502 if (idx_file.bad()){
503 DEBUG(1)("write to index file %s failed: ",flow_index_pathname.c_str());
504 if(debug >= 1){
505 perror("");
506 }
507 }
508 }
509 if(wlength != length){
510 off_t p = lseek(fd,length-wlength,SEEK_CUR); // seek out the space we didn't write
511 DEBUG(100)(" lseek(%" PRId64 ",SEEK_CUR)=%" PRId64,(int64_t)(length-wlength),(int64_t)p);
512 }
513 }
514
515 /* Update the database of bytes that we've seen */
516 if(seen) update_seen(seen,pos,length);
517
518 /* Update the position in the file and the next expected sequence number */
519 pos += length;
520 nsn += length; // expected next sequence number
521
522 if(pos>last_byte) last_byte = pos;
523
524 if(debug>=100){
525 uint64_t rpos = lseek(fd,(off_t)0,SEEK_CUR);
526 DEBUG(100)(" pos=%" PRId64 " lseek(fd,0,SEEK_CUR)=%" PRId64,pos,rpos);
527 assert(pos==rpos);
528 }
529
530 #ifdef DEBUG_REOPEN_LOGIC
531 /* For debugging, force this connection closed */
532 demux.close_tcpip_fd(this);
533 #endif
534 }
535
536 /*
537 * Compare two index strings and return the result. Called by
538 * the vector::sort in sort_index.
539 * --GDD
540 */
compare(std::string a,std::string b)541 bool tcpip::compare(std::string a, std::string b){
542 std::stringstream ss_a(a),ss_b(b);
543 long a_l,b_l;
544
545 ss_a >> a_l;
546 ss_b >> b_l;
547 return a_l < b_l;
548 }
549
550 /*
551 * Sort an index file (presumably from this object) if file indexing is
552 * turned on and the file exists. Index files may be out of order due
553 * to the arrival of out of order packets. It is cheaper to reorder them
554 * one time at the end of processing than it is to continually keep them
555 * in order.
556 * --GDD
557 */
sort_index(std::fstream * ix_file)558 void tcpip::sort_index(std::fstream *ix_file) {
559
560 std::vector<std::string> idx;
561 std::string line;
562
563 if (demux.opt.output_packet_index) {
564 if (!(idx_file.good() && idx_file.is_open())) {
565 DEBUG(5)("Skipping index file sort. Unusual behavior.\n");
566 return; //Nothing to do
567 }
568 //Make sure we are at the beginning.
569 ix_file->clear();
570 ix_file->seekg(0);
571 do {
572 *ix_file >> line;
573 if (!ix_file->eof()) {
574 idx.push_back(line);
575 }
576 } while (ix_file->good());
577 std::sort(idx.begin(), idx.end(), &tcpip::compare);
578 ix_file->clear();
579 ix_file->seekg(0);
580 for (std::vector<std::string>::iterator s = idx.begin(); s != idx.end();
581 s++) {
582 *ix_file << *s << "\n";
583 }
584 }
585 }
586
587 /*
588 * Convenience function to cause the local index file to be sorted.
589 * --GDD
590 */
sort_index()591 void tcpip::sort_index(){
592 tcpip::sort_index(&(this->idx_file));
593 }
594
595 #pragma GCC diagnostic ignored "-Weffc++"
596 #pragma GCC diagnostic ignored "-Wshadow"
597
598 /* Note --- Turn off warning so that creating the seen() map doesn't throw an error */
599 //#pragma GCC diagnostic ignored "-Weffc++"
600