1 /*
2 * Copyright (c) 1991, 1992, 1993, 1995, 1996, 1999
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that: (1) source code distributions
7 * retain the above copyright notice and this paragraph in its entirety, (2)
8 * distributions including binary code include the above copyright notice and
9 * this paragraph in its entirety in the documentation or other materials
10 * provided with the distribution, and (3) all advertising materials mentioning
11 * features or use of this software display the following acknowledgement:
12 * ``This product includes software developed by the University of California,
13 * Lawrence Berkeley Laboratory and its contributors.'' Neither the name of
14 * the University nor the names of its contributors may be used to endorse
15 * or promote products derived from this software without specific prior
16 * written permission.
17 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20 */
21
22 /*
23 * search.c - supports fast searching through pcap files for timestamps
24 */
25
26 #ifdef HAVE_CONFIG_H
27 #include <config.h>
28 #endif
29
30 #include <sys/types.h>
31
32 #include <pcap.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <time.h>
37 #include <unistd.h>
38
39 #ifdef HAVE_OS_PROTO_H
40 #include "os-proto.h"
41 #endif
42
43 #include "tcpslice.h"
44
45 /*
46 * We directly read a pcap file, so declare the per-packet header
47 * ourselves. It's not platform-dependent or version-dependent;
48 * if the per-packet header doesn't look *exactly* like this, it's
49 * not a valid pcap file.
50 */
51
52 /*
53 * This is a timeval as stored in a savefile.
54 * It has to use the same types everywhere, independent of the actual
55 * `struct timeval'; `struct timeval' has 32-bit tv_sec values on some
56 * platforms and 64-bit tv_sec values on other platforms, and writing
57 * out native `struct timeval' values would mean files could only be
58 * read on systems with the same tv_sec size as the system on which
59 * the file was written.
60 */
61 struct pcap_timeval {
62 bpf_int32 tv_sec; /* seconds */
63 bpf_int32 tv_usec; /* microseconds */
64 };
65
66 /*
67 * This is a `pcap_pkthdr' as actually stored in a savefile.
68 */
69 struct pcap_sf_pkthdr {
70 struct pcap_timeval ts; /* time stamp */
71 bpf_u_int32 caplen; /* length of portion present */
72 bpf_u_int32 len; /* length this packet (off wire) */
73 };
74
75 /* Maximum number of seconds that we can conceive of a dump file spanning. */
76 #define MAX_REASONABLE_FILE_SPAN (3600*24*366) /* one year */
77
78 /* Maximum packet length we ever expect to see. */
79 #define MAX_REASONABLE_PACKET_LENGTH 262144
80
81 /* Size of a packet header in bytes; easier than typing the sizeof() all
82 * the time ...
83 */
84 #define PACKET_HDR_LEN (sizeof( struct pcap_sf_pkthdr ))
85
86 extern int snaplen;
87
88 /* The maximum size of a packet, including its header. */
89 #define MAX_PACKET_SIZE (PACKET_HDR_LEN + snaplen)
90
91 /* Number of contiguous bytes from a dumpfile in which there's guaranteed
92 * to be enough information to find a "definite" header if one exists
93 * therein. This takes 3 full packets - the first to be just misaligned
94 * (one byte short of a full packet), missing its timestamp; the second
95 * to have the legitimate timestamp; and the third to provide confirmation
96 * that the second is legit, making it a "definite" header. We could
97 * scrimp a bit here since not the entire third packet is required, but
98 * it doesn't seem worth it
99 */
100 #define MAX_BYTES_FOR_DEFINITE_HEADER (3 * MAX_PACKET_SIZE)
101
102 /* Maximum number of seconds that might reasonably separate two headers. */
103 #define MAX_REASONABLE_HDR_SEPARATION (3600 * 24 * 7) /* one week */
104
105 /* When searching a file for a packet, if we think we're within this many
106 * bytes of the packet we just search linearly. Since linear searches are
107 * probably much faster than random ones (random ones require searching for
108 * the beginning of the packet, which may be unaligned in memory), we make
109 * this value pretty hefty.
110 */
111 #define STRAIGHT_SCAN_THRESHOLD (100 * MAX_PACKET_SIZE)
112
113
114 /* Given a header and an acceptable first and last time stamp, returns non-zero
115 * if the header looks reasonable and zero otherwise.
116 */
117 static int
reasonable_header(const struct pcap_pkthdr * hdr,const time_t first_time,time_t last_time)118 reasonable_header( const struct pcap_pkthdr *hdr, const time_t first_time, time_t last_time )
119 {
120 if ( last_time == 0 )
121 last_time = first_time + MAX_REASONABLE_FILE_SPAN;
122
123 return hdr->ts.tv_sec >= first_time &&
124 hdr->ts.tv_sec <= last_time &&
125 hdr->len > 0 &&
126 hdr->len <= MAX_REASONABLE_PACKET_LENGTH &&
127 hdr->caplen > 0 &&
128 hdr->caplen <= MAX_REASONABLE_PACKET_LENGTH;
129 }
130
131 #define SWAPLONG(y) \
132 ((((y)&0xff)<<24) | (((y)&0xff00)<<8) | (((y)&0xff0000)>>8) | (((y)>>24)&0xff))
133
134 /* Given a buffer, extracts a (properly aligned) packet header from it. */
135 static void
extract_header(pcap_t * p,const u_char * buf,struct pcap_pkthdr * hdr)136 extract_header( pcap_t *p, const u_char *buf, struct pcap_pkthdr *hdr )
137 {
138 struct pcap_sf_pkthdr sfhdr;
139
140 memcpy(&sfhdr, buf, sizeof(sfhdr));
141
142 hdr->ts.tv_sec = sfhdr.ts.tv_sec;
143 hdr->ts.tv_usec = sfhdr.ts.tv_usec;
144 hdr->caplen = sfhdr.caplen;
145 hdr->len = sfhdr.len;
146
147 if ( pcap_is_swapped( p ) )
148 {
149 hdr->ts.tv_sec = SWAPLONG(hdr->ts.tv_sec);
150 hdr->ts.tv_usec = SWAPLONG(hdr->ts.tv_usec);
151 hdr->len = SWAPLONG(hdr->len);
152 hdr->caplen = SWAPLONG(hdr->caplen);
153 }
154
155 /*
156 * From bpf/libpcap/savefile.c:
157 *
158 * We interchanged the caplen and len fields at version 2.3,
159 * in order to match the bpf header layout. But unfortunately
160 * some files were written with version 2.3 in their headers
161 * but without the interchanged fields.
162 */
163 if ( pcap_minor_version( p ) < 3 ||
164 (pcap_minor_version( p ) == 3 && hdr->caplen > hdr->len) )
165 {
166 int t = hdr->caplen;
167 hdr->caplen = hdr->len;
168 hdr->len = t;
169 }
170 }
171
172 /* Search a buffer to locate the first header within it. Return values
173 * are HEADER_NONE, HEADER_CLASH, HEADER_PERHAPS, and HEADER_DEFINITELY.
174 * The first indicates that no evidence of a header was found; the second
175 * that two or more possible headers were found, neither more convincing
176 * than the other(s); the third that exactly one "possible" header was
177 * found; and the fourth that exactly one "definite" header was found.
178 *
179 * Headers are detected by looking for positions in the buffer which have
180 * reasonable timestamps and lengths. If there is enough room in the buffer
181 * for another header to follow a candidate header, a check is made for
182 * that following header. If it is present then the header is *definite*
183 * (unless another "perhaps" or "definite" header is found); if not, then
184 * the header is discarded. If there is not enough room in the buffer for
185 * another header then the candidate is *perhaps* (unless another header
186 * is subsequently found). A "tie" between a "definite" header and a
187 * "perhaps" header is resolved in favor of the definite header. Any
188 * other tie leads to HEADER_CLASH.
189 *
190 * The buffer position of the header is returned in hdrpos_addr and
191 * for convenience the corresponding header in return_hdr.
192 *
193 * first_time is the earliest possible acceptable timestamp in the
194 * header. last_time, if non-zero, is the last such timestamp. If
195 * zero, then up to MAX_REASONABLE_FILE_SPAN seconds after first_time
196 * is acceptable.
197 */
198
199 #define HEADER_NONE 0
200 #define HEADER_CLASH 1
201 #define HEADER_PERHAPS 2
202 #define HEADER_DEFINITELY 3
203
204 static int
find_header(pcap_t * p,u_char * buf,const int buf_len,const time_t first_time,const time_t last_time,u_char ** hdrpos_addr,struct pcap_pkthdr * return_hdr)205 find_header( pcap_t *p, u_char *buf, const int buf_len,
206 const time_t first_time, const time_t last_time,
207 u_char **hdrpos_addr, struct pcap_pkthdr *return_hdr )
208 {
209 u_char *bufptr, *bufend, *last_pos_to_try;
210 struct pcap_pkthdr hdr, hdr2;
211 int status = HEADER_NONE;
212 int saw_PERHAPS_clash = 0;
213
214 /* Initially, try each buffer position to see whether it looks like
215 * a valid packet header. We may later restrict the positions we look
216 * at to avoid seeing a sequence of legitimate headers as conflicting
217 * with one another.
218 */
219 bufend = buf + buf_len;
220 last_pos_to_try = bufend - PACKET_HDR_LEN;
221
222 for ( bufptr = buf; bufptr < last_pos_to_try; ++bufptr )
223 {
224 extract_header( p, bufptr, &hdr );
225
226 if ( reasonable_header( &hdr, first_time, last_time ) )
227 {
228 u_char *next_header = bufptr + PACKET_HDR_LEN + hdr.caplen;
229
230 if ( next_header + PACKET_HDR_LEN < bufend )
231 { /* check for another good header */
232 extract_header( p, next_header, &hdr2 );
233
234 if ( reasonable_header( &hdr2, hdr.ts.tv_sec,
235 hdr.ts.tv_sec + MAX_REASONABLE_HDR_SEPARATION ) )
236 { /* a confirmed header */
237 switch ( status )
238 {
239 case HEADER_NONE:
240 case HEADER_PERHAPS:
241 status = HEADER_DEFINITELY;
242 *hdrpos_addr = bufptr;
243 *return_hdr = hdr;
244
245 /* Make sure we don't demote this "definite"
246 * to a "clash" if we stumble across its
247 * successor.
248 */
249 last_pos_to_try = next_header - PACKET_HDR_LEN;
250 break;
251
252 case HEADER_DEFINITELY:
253 return HEADER_CLASH;
254
255 default:
256 error( "bad status in %s()", __func__ );
257 }
258 }
259
260 /* ... else the header is bogus - we've verified that it's
261 * not followed by a reasonable header.
262 */
263 }
264 else
265 { /* can't check for another good header */
266 switch ( status )
267 {
268 case HEADER_NONE:
269 status = HEADER_PERHAPS;
270 *hdrpos_addr = bufptr;
271 *return_hdr = hdr;
272 break;
273
274 case HEADER_PERHAPS:
275 /* We don't immediately turn this into a
276 * clash because perhaps we'll later see a
277 * "definite" which will save us ...
278 */
279 saw_PERHAPS_clash = 1;
280 break;
281
282 case HEADER_DEFINITELY:
283 /* Keep the definite in preference to this one. */
284 break;
285
286 default:
287 error( "bad status in %s()", __func__ );
288 }
289 }
290 }
291 }
292
293 if ( status == HEADER_PERHAPS && saw_PERHAPS_clash )
294 status = HEADER_CLASH;
295
296 return status;
297 }
298
299 /* Positions the sf_readfile stream such that the next sf_read() will
300 * read the final full packet in the file. Returns non-zero if
301 * successful, zero if unsuccessful. If successful, returns the
302 * timestamp of the last packet in last_timestamp.
303 *
304 * Note that this routine is a special case of sf_find_packet(). In
305 * order to use sf_find_packet(), one first must use this routine in
306 * order to give sf_find_packet() an upper bound on the timestamps
307 * present in the dump file.
308 */
309 int
sf_find_end(pcap_t * p,const struct timeval * first_timestamp,struct timeval * last_timestamp)310 sf_find_end( pcap_t *p, const struct timeval *first_timestamp,
311 struct timeval *last_timestamp )
312 {
313 time_t first_time = first_timestamp->tv_sec;
314 int64_t len_file;
315 int num_bytes;
316 u_char *buf, *bufpos, *bufend;
317 u_char *hdrpos;
318 struct pcap_pkthdr hdr, successor_hdr;
319 int status;
320
321 /* Find the length of the file. */
322 if ( fseek64( pcap_file (p), (int64_t) 0, SEEK_END ) < 0 )
323 return 0;
324
325 len_file = ftell64( pcap_file (p) );
326 if ( len_file < 0 )
327 return 0;
328 /* Casting a non-negative int64_t to uint64_t always works as expected. */
329 if ( (uint64_t) len_file < MAX_BYTES_FOR_DEFINITE_HEADER )
330 num_bytes = len_file;
331 else
332 num_bytes = MAX_BYTES_FOR_DEFINITE_HEADER;
333
334 /* Allow enough room for at least two full (untruncated) packets,
335 * perhaps followed by a truncated packet, so we have a shot at
336 * finding a "definite" header and following its chain to the
337 * end of the file.
338 */
339 if ( fseek64( pcap_file( p ), -(int64_t) num_bytes, SEEK_END ) < 0 )
340 return 0;
341
342 buf = (u_char *)malloc((u_int) num_bytes);
343 if ( ! buf )
344 return 0;
345
346 status = 0;
347 bufpos = buf;
348 bufend = buf + num_bytes;
349
350 if ( fread( (char *) bufpos, num_bytes, 1, pcap_file( p ) ) != 1 )
351 goto done;
352
353 if ( find_header( p, bufpos, num_bytes,
354 first_time, 0L, &hdrpos, &hdr ) != HEADER_DEFINITELY )
355 goto done;
356
357 /* Okay, we have a definite header in our hands. Follow its
358 * chain till we find the last valid packet in the file ...
359 */
360 for ( ; ; )
361 {
362 /* move to the next header position */
363 bufpos = hdrpos + PACKET_HDR_LEN + hdr.caplen;
364
365 /* bufpos now points to a candidate packet, which if valid
366 * should replace the current packet pointed to by hdrpos as
367 * the last valid packet ...
368 */
369 if ( bufpos >= bufend - PACKET_HDR_LEN )
370 /* not enough room for another header */
371 break;
372
373 extract_header( p, bufpos, &successor_hdr );
374
375 first_time = hdr.ts.tv_sec;
376 if ( ! reasonable_header( &successor_hdr, first_time, 0L ) )
377 /* this bodes ill - it means bufpos is perhaps a
378 * bogus packet header after all ...
379 */
380 break;
381
382 /* Note that the following test is for whether the next
383 * packet starts at a position > bufend, *not* for a
384 * position >= bufend. If this is the last packet in the
385 * file and there isn't a subsequent partial packet, then
386 * we expect the first buffer position beyond this packet
387 * to be just beyond the end of the buffer, i.e., at bufend
388 * itself.
389 */
390 if ( bufpos + PACKET_HDR_LEN + successor_hdr.caplen > bufend )
391 /* the packet is truncated */
392 break;
393
394 /* Accept this packet as fully legit. */
395 hdrpos = bufpos;
396 hdr = successor_hdr;
397 }
398
399 /* Success! Last valid packet is at hdrpos. */
400 TIMEVAL_FROM_PKTHDR_TS(*last_timestamp, hdr.ts);
401 status = 1;
402
403 /* Seek so that the next read will start at last valid packet. */
404 if ( fseek64( pcap_file( p ), -(int64_t) (bufend - hdrpos), SEEK_END ) < 0 )
405 error( "final fseek64() failed in %s()", __func__ );
406
407 done:
408 free( (char *) buf );
409
410 return status;
411 }
412
413 /* Takes two timeval's and returns the difference, tv2 - tv1, as a double. */
414 static double
timeval_diff(const struct timeval * tv1,const struct timeval * tv2)415 timeval_diff( const struct timeval *tv1, const struct timeval *tv2 )
416 {
417 double result = (tv2->tv_sec - tv1->tv_sec);
418 result += (tv2->tv_usec - tv1->tv_usec) / 1000000.0;
419
420 return result;
421 }
422
423 /* Returns true if timestamp t1 is chronologically less than timestamp t2. */
424 int
sf_timestamp_less_than(const struct timeval * t1,const struct timeval * t2)425 sf_timestamp_less_than( const struct timeval *t1, const struct timeval *t2 )
426 {
427 return t1->tv_sec < t2->tv_sec ||
428 (t1->tv_sec == t2->tv_sec &&
429 t1->tv_usec < t2->tv_usec);
430 }
431
432 /* Given two timestamps on either side of desired_time and their positions,
433 * returns the interpolated position of the desired_time packet. Returns a
434 * negative value if the desired_time is outside the given range.
435 */
436 static int64_t
interpolated_position(const struct timeval * min_time,const int64_t min_pos,const struct timeval * max_time,const int64_t max_pos,const struct timeval * desired_time)437 interpolated_position( const struct timeval *min_time, const int64_t min_pos,
438 const struct timeval *max_time, const int64_t max_pos,
439 const struct timeval *desired_time )
440 {
441 double full_span = timeval_diff( max_time, min_time );
442 double desired_span = timeval_diff( desired_time, min_time );
443 int64_t full_span_pos = max_pos - min_pos;
444 double fractional_offset = desired_span / full_span;
445
446 if ( fractional_offset < 0.0 || fractional_offset > 1.0 )
447 return -1;
448
449 return min_pos + (int64_t) (fractional_offset * (double) full_span_pos);
450 }
451
452 /* Reads packets linearly until one with a time >= the given desired time
453 * is found; positions the dump file so that the next read will start
454 * at the given packet. Returns non-zero on success, 0 if an EOF was
455 * first encountered.
456 */
457 static int
read_up_to(pcap_t * p,const struct timeval * desired_time)458 read_up_to( pcap_t *p, const struct timeval *desired_time )
459 {
460 struct pcap_pkthdr hdr;
461 int64_t pos;
462 int status;
463
464 for ( ; ; )
465 {
466 struct timeval tvbuf;
467
468 pos = ftell64( pcap_file( p ) );
469
470 if ( pcap_next( p, &hdr ) == NULL )
471 {
472 if ( feof( pcap_file( p ) ) )
473 {
474 status = 0;
475 clearerr( pcap_file( p ) );
476 break;
477 }
478
479 error( "bad status in %s()", __func__ );
480 }
481
482 TIMEVAL_FROM_PKTHDR_TS(tvbuf, hdr.ts);
483
484 if ( ! sf_timestamp_less_than( &tvbuf, desired_time ) )
485 {
486 status = 1;
487 break;
488 }
489 }
490
491 if ( fseek64( pcap_file( p ), pos, SEEK_SET ) < 0 )
492 error( "fseek64() failed in %s()", __func__ );
493
494 return (status);
495 }
496
497 /* Positions the sf_readfile stream so that the next sf_read() will
498 * return the first packet with a time greater than or equal to
499 * desired_time. desired_time must be greater than min_time and less
500 * than max_time, which should correspond to actual packets in the
501 * file. min_pos is the file position (byte offset) corresponding to
502 * the min_time packet and max_pos is the same for the max_time packet.
503 *
504 * Returns non-zero on success, 0 if the given position is beyond max_pos.
505 *
506 * NOTE: when calling this routine, the sf_readfile stream *must* be
507 * already aligned so that the next call to sf_next_packet() will yield
508 * a valid packet.
509 */
510 int
sf_find_packet(pcap_t * p,struct timeval * min_time,int64_t min_pos,struct timeval * max_time,int64_t max_pos,const struct timeval * desired_time)511 sf_find_packet( pcap_t *p,
512 struct timeval *min_time, int64_t min_pos,
513 struct timeval *max_time, int64_t max_pos,
514 const struct timeval *desired_time )
515 {
516 int status = 1;
517 struct timeval min_time_copy, max_time_copy;
518 u_int num_bytes = MAX_BYTES_FOR_DEFINITE_HEADER;
519 u_char *buf, *hdrpos;
520 struct pcap_pkthdr hdr;
521
522 buf = (u_char *) malloc( num_bytes );
523 if ( ! buf )
524 error( "malloc() failed in %s()", __func__ );
525
526 min_time_copy = *min_time;
527 min_time = &min_time_copy;
528
529 max_time_copy = *max_time;
530 max_time = &max_time_copy;
531
532 for ( ; ; ) /* loop until positioned correctly */
533 {
534 int64_t desired_pos =
535 interpolated_position( min_time, min_pos,
536 max_time, max_pos,
537 desired_time );
538 struct timeval tvbuf;
539
540 if ( desired_pos < 0 )
541 {
542 status = 0;
543 break;
544 }
545
546 int64_t present_pos = ftell64( pcap_file( p ) );
547 if ( present_pos < 0 )
548 error ( "ftell64() failed in %s()", __func__ );
549
550 if ( present_pos <= desired_pos &&
551 (uint64_t) (desired_pos - present_pos) < STRAIGHT_SCAN_THRESHOLD )
552 { /* we're close enough to just blindly read ahead */
553 status = read_up_to( p, desired_time );
554 break;
555 }
556
557 /* Undershoot the target a little bit - it's much easier to
558 * then scan straight forward than to try to read backwards ...
559 */
560 desired_pos -= STRAIGHT_SCAN_THRESHOLD / 2;
561 if ( desired_pos < min_pos )
562 desired_pos = min_pos;
563
564 if ( fseek64( pcap_file( p ), desired_pos, SEEK_SET ) < 0 )
565 error( "fseek64() failed in %s()", __func__ );
566
567 int num_bytes_read =
568 fread( (char *) buf, 1, num_bytes, pcap_file( p ) );
569
570 if ( num_bytes_read == 0 )
571 /* This shouldn't ever happen because we try to
572 * undershoot, unless the dump file has only a
573 * couple packets in it ...
574 */
575 error( "fread() failed in %s()", __func__ );
576
577 if ( find_header( p, buf, num_bytes, min_time->tv_sec,
578 max_time->tv_sec, &hdrpos, &hdr ) !=
579 HEADER_DEFINITELY )
580 error( "can't find header at position %ld in dump file",
581 desired_pos );
582
583 /* Correct desired_pos to reflect beginning of packet. */
584 desired_pos += (hdrpos - buf);
585
586 /* Seek to the beginning of the header. */
587 if ( fseek64( pcap_file( p ), desired_pos, SEEK_SET ) < 0 )
588 error( "fseek64() failed in %s()", __func__ );
589
590 TIMEVAL_FROM_PKTHDR_TS(tvbuf, hdr.ts);
591 if ( sf_timestamp_less_than( &tvbuf, desired_time ) )
592 { /* too early in the file */
593 *min_time = tvbuf;
594 min_pos = desired_pos;
595 }
596
597 else if ( sf_timestamp_less_than( desired_time, &tvbuf ) )
598 { /* too late in the file */
599 *max_time = tvbuf;
600 max_pos = desired_pos;
601 }
602
603 else
604 /* got it! */
605 break;
606 }
607
608 free( (char *) buf );
609
610 return status;
611 }
612