1 /*
2  * Copyright (c) 1991, 1992, 1993, 1995, 1996, 1999
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that: (1) source code distributions
7  * retain the above copyright notice and this paragraph in its entirety, (2)
8  * distributions including binary code include the above copyright notice and
9  * this paragraph in its entirety in the documentation or other materials
10  * provided with the distribution, and (3) all advertising materials mentioning
11  * features or use of this software display the following acknowledgement:
12  * ``This product includes software developed by the University of California,
13  * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14  * the University nor the names of its contributors may be used to endorse
15  * or promote products derived from this software without specific prior
16  * written permission.
17  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18  * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20  */
21 
22 /*
23  * search.c - supports fast searching through pcap files for timestamps
24  */
25 
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29 
30 #include <sys/types.h>
31 
32 #include <pcap.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <time.h>
37 #include <unistd.h>
38 
39 #ifdef HAVE_OS_PROTO_H
40 #include "os-proto.h"
41 #endif
42 
43 #include "tcpslice.h"
44 
45 /*
46  * We directly read a pcap file, so declare the per-packet header
47  * ourselves.  It's not platform-dependent or version-dependent;
48  * if the per-packet header doesn't look *exactly* like this, it's
49  * not a valid pcap file.
50  */
51 
52 /*
53  * This is a timeval as stored in a savefile.
54  * It has to use the same types everywhere, independent of the actual
55  * `struct timeval'; `struct timeval' has 32-bit tv_sec values on some
56  * platforms and 64-bit tv_sec values on other platforms, and writing
57  * out native `struct timeval' values would mean files could only be
58  * read on systems with the same tv_sec size as the system on which
59  * the file was written.
60  */
61 struct pcap_timeval {
62     bpf_int32 tv_sec;		/* seconds */
63     bpf_int32 tv_usec;		/* microseconds */
64 };
65 
66 /*
67  * This is a `pcap_pkthdr' as actually stored in a savefile.
68  */
69 struct pcap_sf_pkthdr {
70     struct pcap_timeval ts;	/* time stamp */
71     bpf_u_int32 caplen;		/* length of portion present */
72     bpf_u_int32 len;		/* length this packet (off wire) */
73 };
74 
75 /* Maximum number of seconds that we can conceive of a dump file spanning. */
76 #define MAX_REASONABLE_FILE_SPAN (3600*24*366)	/* one year */
77 
78 /* Maximum packet length we ever expect to see. */
79 #define MAX_REASONABLE_PACKET_LENGTH 262144
80 
81 /* Size of a packet header in bytes; easier than typing the sizeof() all
82  * the time ...
83  */
84 #define PACKET_HDR_LEN (sizeof( struct pcap_sf_pkthdr ))
85 
86 extern int snaplen;
87 
88 /* The maximum size of a packet, including its header. */
89 #define MAX_PACKET_SIZE (PACKET_HDR_LEN + snaplen)
90 
91 /* Number of contiguous bytes from a dumpfile in which there's guaranteed
92  * to be enough information to find a "definite" header if one exists
93  * therein.  This takes 3 full packets - the first to be just misaligned
94  * (one byte short of a full packet), missing its timestamp; the second
95  * to have the legitimate timestamp; and the third to provide confirmation
96  * that the second is legit, making it a "definite" header.  We could
97  * scrimp a bit here since not the entire third packet is required, but
98  * it doesn't seem worth it
99  */
100 #define MAX_BYTES_FOR_DEFINITE_HEADER (3 * MAX_PACKET_SIZE)
101 
102 /* Maximum number of seconds that might reasonably separate two headers. */
103 #define MAX_REASONABLE_HDR_SEPARATION (3600 * 24 * 7)	/* one week */
104 
105 /* When searching a file for a packet, if we think we're within this many
106  * bytes of the packet we just search linearly.  Since linear searches are
107  * probably much faster than random ones (random ones require searching for
108  * the beginning of the packet, which may be unaligned in memory), we make
109  * this value pretty hefty.
110  */
111 #define STRAIGHT_SCAN_THRESHOLD (100 * MAX_PACKET_SIZE)
112 
113 
114 /* Given a header and an acceptable first and last time stamp, returns non-zero
115  * if the header looks reasonable and zero otherwise.
116  */
117 static int
reasonable_header(const struct pcap_pkthdr * hdr,const time_t first_time,time_t last_time)118 reasonable_header( const struct pcap_pkthdr *hdr, const time_t first_time, time_t last_time )
119 {
120 	if ( last_time == 0 )
121 		last_time = first_time + MAX_REASONABLE_FILE_SPAN;
122 
123 	return hdr->ts.tv_sec >= first_time &&
124 	       hdr->ts.tv_sec <= last_time &&
125 	       hdr->len > 0 &&
126 	       hdr->len <= MAX_REASONABLE_PACKET_LENGTH &&
127 	       hdr->caplen > 0 &&
128 	       hdr->caplen <= MAX_REASONABLE_PACKET_LENGTH;
129 }
130 
131 #define	SWAPLONG(y) \
132 ((((y)&0xff)<<24) | (((y)&0xff00)<<8) | (((y)&0xff0000)>>8) | (((y)>>24)&0xff))
133 
134 /* Given a buffer, extracts a (properly aligned) packet header from it. */
135 static void
extract_header(pcap_t * p,const u_char * buf,struct pcap_pkthdr * hdr)136 extract_header( pcap_t *p, const u_char *buf, struct pcap_pkthdr *hdr )
137 {
138 	struct pcap_sf_pkthdr sfhdr;
139 
140 	memcpy(&sfhdr, buf, sizeof(sfhdr));
141 
142 	hdr->ts.tv_sec = sfhdr.ts.tv_sec;
143 	hdr->ts.tv_usec = sfhdr.ts.tv_usec;
144 	hdr->caplen = sfhdr.caplen;
145 	hdr->len = sfhdr.len;
146 
147 	if ( pcap_is_swapped( p ) )
148 	{
149 		hdr->ts.tv_sec = SWAPLONG(hdr->ts.tv_sec);
150 		hdr->ts.tv_usec = SWAPLONG(hdr->ts.tv_usec);
151 		hdr->len = SWAPLONG(hdr->len);
152 		hdr->caplen = SWAPLONG(hdr->caplen);
153 	}
154 
155 	/*
156 	 * From bpf/libpcap/savefile.c:
157 	 *
158 	 * We interchanged the caplen and len fields at version 2.3,
159 	 * in order to match the bpf header layout.  But unfortunately
160 	 * some files were written with version 2.3 in their headers
161 	 * but without the interchanged fields.
162 	 */
163 	if ( pcap_minor_version( p ) < 3 ||
164 	     (pcap_minor_version( p ) == 3 && hdr->caplen > hdr->len) )
165 		{
166 		int t = hdr->caplen;
167 		hdr->caplen = hdr->len;
168 		hdr->len = t;
169 		}
170 }
171 
172 /* Search a buffer to locate the first header within it.  Return values
173  * are HEADER_NONE, HEADER_CLASH, HEADER_PERHAPS, and HEADER_DEFINITELY.
174  * The first indicates that no evidence of a header was found; the second
175  * that two or more possible headers were found, neither more convincing
176  * than the other(s); the third that exactly one "possible" header was
177  * found; and the fourth that exactly one "definite" header was found.
178  *
179  * Headers are detected by looking for positions in the buffer which have
180  * reasonable timestamps and lengths.  If there is enough room in the buffer
181  * for another header to follow a candidate header, a check is made for
182  * that following header.  If it is present then the header is *definite*
183  * (unless another "perhaps" or "definite" header is found); if not, then
184  * the header is discarded.  If there is not enough room in the buffer for
185  * another header then the candidate is *perhaps* (unless another header
186  * is subsequently found).  A "tie" between a "definite" header and a
187  * "perhaps" header is resolved in favor of the definite header.  Any
188  * other tie leads to HEADER_CLASH.
189  *
190  * The buffer position of the header is returned in hdrpos_addr and
191  * for convenience the corresponding header in return_hdr.
192  *
193  * first_time is the earliest possible acceptable timestamp in the
194  * header.  last_time, if non-zero, is the last such timestamp.  If
195  * zero, then up to MAX_REASONABLE_FILE_SPAN seconds after first_time
196  * is acceptable.
197  */
198 
199 #define HEADER_NONE 0
200 #define HEADER_CLASH 1
201 #define HEADER_PERHAPS 2
202 #define HEADER_DEFINITELY 3
203 
204 static int
find_header(pcap_t * p,u_char * buf,const int buf_len,const time_t first_time,const time_t last_time,u_char ** hdrpos_addr,struct pcap_pkthdr * return_hdr)205 find_header( pcap_t *p, u_char *buf, const int buf_len,
206 		const time_t first_time, const time_t last_time,
207 		u_char **hdrpos_addr, struct pcap_pkthdr *return_hdr )
208 {
209 	u_char *bufptr, *bufend, *last_pos_to_try;
210 	struct pcap_pkthdr hdr, hdr2;
211 	int status = HEADER_NONE;
212 	int saw_PERHAPS_clash = 0;
213 
214 	/* Initially, try each buffer position to see whether it looks like
215 	 * a valid packet header.  We may later restrict the positions we look
216 	 * at to avoid seeing a sequence of legitimate headers as conflicting
217 	 * with one another.
218 	 */
219 	bufend = buf + buf_len;
220 	last_pos_to_try = bufend - PACKET_HDR_LEN;
221 
222 	for ( bufptr = buf; bufptr < last_pos_to_try; ++bufptr )
223 	{
224 	    extract_header( p, bufptr, &hdr );
225 
226 	    if ( reasonable_header( &hdr, first_time, last_time ) )
227 	    {
228 		u_char *next_header = bufptr + PACKET_HDR_LEN + hdr.caplen;
229 
230 		if ( next_header + PACKET_HDR_LEN < bufend )
231 		{ /* check for another good header */
232 		    extract_header( p, next_header, &hdr2 );
233 
234 		    if ( reasonable_header( &hdr2, hdr.ts.tv_sec,
235 			    hdr.ts.tv_sec + MAX_REASONABLE_HDR_SEPARATION ) )
236 		    { /* a confirmed header */
237 			switch ( status )
238 			{
239 			    case HEADER_NONE:
240 			    case HEADER_PERHAPS:
241 				status = HEADER_DEFINITELY;
242 				*hdrpos_addr = bufptr;
243 				*return_hdr = hdr;
244 
245 				/* Make sure we don't demote this "definite"
246 				 * to a "clash" if we stumble across its
247 				 * successor.
248 				 */
249 				last_pos_to_try = next_header - PACKET_HDR_LEN;
250 				break;
251 
252 			    case HEADER_DEFINITELY:
253 				return HEADER_CLASH;
254 
255 			    default:
256 				error( "bad status in %s()", __func__ );
257 			}
258 		    }
259 
260 		    /* ... else the header is bogus - we've verified that it's
261 		     * not followed by a reasonable header.
262 		     */
263 		}
264 		else
265 		{ /* can't check for another good header */
266 		    switch ( status )
267 		    {
268 			case HEADER_NONE:
269 			    status = HEADER_PERHAPS;
270 			    *hdrpos_addr = bufptr;
271 			    *return_hdr = hdr;
272 			    break;
273 
274 			case HEADER_PERHAPS:
275 			    /* We don't immediately turn this into a
276 			     * clash because perhaps we'll later see a
277 			     * "definite" which will save us ...
278 			     */
279 			    saw_PERHAPS_clash = 1;
280 			    break;
281 
282 			case HEADER_DEFINITELY:
283 			    /* Keep the definite in preference to this one. */
284 			    break;
285 
286 			default:
287 			    error( "bad status in %s()", __func__ );
288 		    }
289 		}
290 	    }
291 	}
292 
293 	if ( status == HEADER_PERHAPS && saw_PERHAPS_clash )
294 		status = HEADER_CLASH;
295 
296 	return status;
297 }
298 
299 /* Positions the sf_readfile stream such that the next sf_read() will
300  * read the final full packet in the file.  Returns non-zero if
301  * successful, zero if unsuccessful.  If successful, returns the
302  * timestamp of the last packet in last_timestamp.
303  *
304  * Note that this routine is a special case of sf_find_packet().  In
305  * order to use sf_find_packet(), one first must use this routine in
306  * order to give sf_find_packet() an upper bound on the timestamps
307  * present in the dump file.
308  */
309 int
sf_find_end(pcap_t * p,const struct timeval * first_timestamp,struct timeval * last_timestamp)310 sf_find_end( pcap_t *p, const struct timeval *first_timestamp,
311 		struct timeval *last_timestamp )
312 {
313 	time_t first_time = first_timestamp->tv_sec;
314 	int64_t len_file;
315 	int num_bytes;
316 	u_char *buf, *bufpos, *bufend;
317 	u_char *hdrpos;
318 	struct pcap_pkthdr hdr, successor_hdr;
319 	int status;
320 
321 	/* Find the length of the file. */
322 	if ( fseek64( pcap_file (p), (int64_t) 0, SEEK_END ) < 0 )
323 		return 0;
324 
325 	len_file = ftell64( pcap_file (p) );
326 	if ( len_file < 0 )
327 		return 0;
328 	/* Casting a non-negative int64_t to uint64_t always works as expected. */
329 	if ( (uint64_t) len_file < MAX_BYTES_FOR_DEFINITE_HEADER )
330 		num_bytes = len_file;
331 	else
332 		num_bytes = MAX_BYTES_FOR_DEFINITE_HEADER;
333 
334 	/* Allow enough room for at least two full (untruncated) packets,
335 	 * perhaps followed by a truncated packet, so we have a shot at
336 	 * finding a "definite" header and following its chain to the
337 	 * end of the file.
338 	 */
339 	if ( fseek64( pcap_file( p ), -(int64_t) num_bytes, SEEK_END ) < 0 )
340 		return 0;
341 
342 	buf = (u_char *)malloc((u_int) num_bytes);
343 	if ( ! buf )
344 		return 0;
345 
346 	status = 0;
347 	bufpos = buf;
348 	bufend = buf + num_bytes;
349 
350 	if ( fread( (char *) bufpos, num_bytes, 1, pcap_file( p ) ) != 1 )
351 		goto done;
352 
353 	if ( find_header( p, bufpos, num_bytes,
354 			  first_time, 0L, &hdrpos, &hdr ) != HEADER_DEFINITELY )
355 		goto done;
356 
357 	/* Okay, we have a definite header in our hands.  Follow its
358 	 * chain till we find the last valid packet in the file ...
359 	 */
360 	for ( ; ; )
361 	{
362 		/* move to the next header position */
363 		bufpos = hdrpos + PACKET_HDR_LEN + hdr.caplen;
364 
365 		/* bufpos now points to a candidate packet, which if valid
366 		 * should replace the current packet pointed to by hdrpos as
367 		 * the last valid packet ...
368 		 */
369 		if ( bufpos >= bufend - PACKET_HDR_LEN )
370 			/* not enough room for another header */
371 			break;
372 
373 		extract_header( p, bufpos, &successor_hdr );
374 
375 		first_time = hdr.ts.tv_sec;
376 		if ( ! reasonable_header( &successor_hdr, first_time, 0L ) )
377 			/* this bodes ill - it means bufpos is perhaps a
378 			 * bogus packet header after all ...
379 			 */
380 			break;
381 
382 		/* Note that the following test is for whether the next
383 		 * packet starts at a position > bufend, *not* for a
384 		 * position >= bufend.  If this is the last packet in the
385 		 * file and there isn't a subsequent partial packet, then
386 		 * we expect the first buffer position beyond this packet
387 		 * to be just beyond the end of the buffer, i.e., at bufend
388 		 * itself.
389 		 */
390 		if ( bufpos + PACKET_HDR_LEN + successor_hdr.caplen > bufend )
391 			/* the packet is truncated */
392 			break;
393 
394 		/* Accept this packet as fully legit. */
395 		hdrpos = bufpos;
396 		hdr = successor_hdr;
397 	}
398 
399 	/* Success!  Last valid packet is at hdrpos. */
400 	TIMEVAL_FROM_PKTHDR_TS(*last_timestamp, hdr.ts);
401 	status = 1;
402 
403 	/* Seek so that the next read will start at last valid packet. */
404 	if ( fseek64( pcap_file( p ), -(int64_t) (bufend - hdrpos), SEEK_END ) < 0 )
405 		error( "final fseek64() failed in %s()", __func__ );
406 
407     done:
408 	free( (char *) buf );
409 
410 	return status;
411 }
412 
413 /* Takes two timeval's and returns the difference, tv2 - tv1, as a double. */
414 static double
timeval_diff(const struct timeval * tv1,const struct timeval * tv2)415 timeval_diff( const struct timeval *tv1, const struct timeval *tv2 )
416 {
417 	double result = (tv2->tv_sec - tv1->tv_sec);
418 	result += (tv2->tv_usec - tv1->tv_usec) / 1000000.0;
419 
420 	return result;
421 }
422 
423 /* Returns true if timestamp t1 is chronologically less than timestamp t2. */
424 int
sf_timestamp_less_than(const struct timeval * t1,const struct timeval * t2)425 sf_timestamp_less_than( const struct timeval *t1, const struct timeval *t2 )
426 {
427 	return t1->tv_sec < t2->tv_sec ||
428 	       (t1->tv_sec == t2->tv_sec &&
429 	        t1->tv_usec < t2->tv_usec);
430 }
431 
432 /* Given two timestamps on either side of desired_time and their positions,
433  * returns the interpolated position of the desired_time packet.  Returns a
434  * negative value if the desired_time is outside the given range.
435  */
436 static int64_t
interpolated_position(const struct timeval * min_time,const int64_t min_pos,const struct timeval * max_time,const int64_t max_pos,const struct timeval * desired_time)437 interpolated_position( const struct timeval *min_time, const int64_t min_pos,
438 			const struct timeval *max_time, const int64_t max_pos,
439 			const struct timeval *desired_time )
440 {
441 	double full_span = timeval_diff( max_time, min_time );
442 	double desired_span = timeval_diff( desired_time, min_time );
443 	int64_t full_span_pos = max_pos - min_pos;
444 	double fractional_offset = desired_span / full_span;
445 
446 	if ( fractional_offset < 0.0 || fractional_offset > 1.0 )
447 		return -1;
448 
449 	return min_pos + (int64_t) (fractional_offset * (double) full_span_pos);
450 }
451 
452 /* Reads packets linearly until one with a time >= the given desired time
453  * is found; positions the dump file so that the next read will start
454  * at the given packet.  Returns non-zero on success, 0 if an EOF was
455  * first encountered.
456  */
457 static int
read_up_to(pcap_t * p,const struct timeval * desired_time)458 read_up_to( pcap_t *p, const struct timeval *desired_time )
459 {
460 	struct pcap_pkthdr hdr;
461 	int64_t pos;
462 	int status;
463 
464 	for ( ; ; )
465 	{
466 		struct timeval tvbuf;
467 
468 		pos = ftell64( pcap_file( p ) );
469 
470 		if ( pcap_next( p, &hdr ) == NULL )
471 		{
472 			if ( feof( pcap_file( p ) ) )
473 				{
474 				status = 0;
475 				clearerr( pcap_file( p ) );
476 				break;
477 				}
478 
479 			error( "bad status in %s()", __func__ );
480 		}
481 
482 		TIMEVAL_FROM_PKTHDR_TS(tvbuf, hdr.ts);
483 
484 		if ( ! sf_timestamp_less_than( &tvbuf, desired_time ) )
485 		{
486 			status = 1;
487 			break;
488 		}
489 	}
490 
491 	if ( fseek64( pcap_file( p ), pos, SEEK_SET ) < 0 )
492 		error( "fseek64() failed in %s()", __func__ );
493 
494 	return (status);
495 }
496 
497 /* Positions the sf_readfile stream so that the next sf_read() will
498  * return the first packet with a time greater than or equal to
499  * desired_time.  desired_time must be greater than min_time and less
500  * than max_time, which should correspond to actual packets in the
501  * file.  min_pos is the file position (byte offset) corresponding to
502  * the min_time packet and max_pos is the same for the max_time packet.
503  *
504  * Returns non-zero on success, 0 if the given position is beyond max_pos.
505  *
506  * NOTE: when calling this routine, the sf_readfile stream *must* be
507  * already aligned so that the next call to sf_next_packet() will yield
508  * a valid packet.
509  */
510 int
sf_find_packet(pcap_t * p,struct timeval * min_time,int64_t min_pos,struct timeval * max_time,int64_t max_pos,const struct timeval * desired_time)511 sf_find_packet( pcap_t *p,
512 		struct timeval *min_time, int64_t min_pos,
513 		struct timeval *max_time, int64_t max_pos,
514 		const struct timeval *desired_time )
515 {
516 	int status = 1;
517 	struct timeval min_time_copy, max_time_copy;
518 	u_int num_bytes = MAX_BYTES_FOR_DEFINITE_HEADER;
519 	u_char *buf, *hdrpos;
520 	struct pcap_pkthdr hdr;
521 
522 	buf = (u_char *) malloc( num_bytes );
523 	if ( ! buf )
524 		error( "malloc() failed in %s()", __func__ );
525 
526 	min_time_copy = *min_time;
527 	min_time = &min_time_copy;
528 
529 	max_time_copy = *max_time;
530 	max_time = &max_time_copy;
531 
532 	for ( ; ; )	/* loop until positioned correctly */
533 	{
534 		int64_t desired_pos =
535 			interpolated_position( min_time, min_pos,
536 					       max_time, max_pos,
537 					       desired_time );
538 		struct timeval tvbuf;
539 
540 		if ( desired_pos < 0 )
541 			{
542 			status = 0;
543 			break;
544 			}
545 
546 		int64_t present_pos = ftell64( pcap_file( p ) );
547 		if ( present_pos < 0 )
548 			error ( "ftell64() failed in %s()", __func__ );
549 
550 		if ( present_pos <= desired_pos &&
551 		     (uint64_t) (desired_pos - present_pos) < STRAIGHT_SCAN_THRESHOLD )
552 		{ /* we're close enough to just blindly read ahead */
553 			status = read_up_to( p, desired_time );
554 			break;
555 		}
556 
557 		/* Undershoot the target a little bit - it's much easier to
558 		 * then scan straight forward than to try to read backwards ...
559 		 */
560 		desired_pos -= STRAIGHT_SCAN_THRESHOLD / 2;
561 		if ( desired_pos < min_pos )
562 			desired_pos = min_pos;
563 
564 		if ( fseek64( pcap_file( p ), desired_pos, SEEK_SET ) < 0 )
565 			error( "fseek64() failed in %s()", __func__ );
566 
567 		int num_bytes_read =
568 			fread( (char *) buf, 1, num_bytes, pcap_file( p ) );
569 
570 		if ( num_bytes_read == 0 )
571 			/* This shouldn't ever happen because we try to
572 			 * undershoot, unless the dump file has only a
573 			 * couple packets in it ...
574 			 */
575 			error( "fread() failed in %s()", __func__ );
576 
577 		if ( find_header( p, buf, num_bytes, min_time->tv_sec,
578 				  max_time->tv_sec, &hdrpos, &hdr ) !=
579 		     HEADER_DEFINITELY )
580 			error( "can't find header at position %ld in dump file",
581 				desired_pos );
582 
583 		/* Correct desired_pos to reflect beginning of packet. */
584 		desired_pos += (hdrpos - buf);
585 
586 		/* Seek to the beginning of the header. */
587 		if ( fseek64( pcap_file( p ), desired_pos, SEEK_SET ) < 0 )
588 			error( "fseek64() failed in %s()", __func__ );
589 
590 		TIMEVAL_FROM_PKTHDR_TS(tvbuf, hdr.ts);
591 		if ( sf_timestamp_less_than( &tvbuf, desired_time ) )
592 		{ /* too early in the file */
593 			*min_time = tvbuf;
594 			min_pos = desired_pos;
595 		}
596 
597 		else if ( sf_timestamp_less_than( desired_time, &tvbuf ) )
598 		{ /* too late in the file */
599 			*max_time = tvbuf;
600 			max_pos = desired_pos;
601 		}
602 
603 		else
604 			/* got it! */
605 			break;
606 	}
607 
608 	free( (char *) buf );
609 
610 	return status;
611 }
612