1package Net::Analysis::Listener::HTTPPipelining;
2# $Id: HTTP.pm 140 2005-10-21 16:31:29Z abworrall $
3
4# {{{ Boilerplate
5
6use 5.008000;
7our $VERSION = '0.02';
8use strict;
9use warnings;
10
11use Carp qw(carp croak confess);
12
13use Params::Validate qw(:all);
14
15use base qw(Net::Analysis::Listener::Base);
16
17# }}}
18
19use Net::Analysis::Packet qw(:pktslots :func);
20use Net::Analysis::TCPMonologue;
21
22use HTTP::Response;
23use HTTP::Request;
24
25#### Callbacks
26#
27# {{{ validate_configuration
28
29sub validate_configuration {
30    my $self = shift;
31
32    my %h = validate (@_, { v => {type => SCALAR,
33                                  default => 0} });
34
35    return \%h;
36}
37
38# }}}
39
40# {{{ setup
41
42# A chance to setup stuff for our listener
43sub setup {
44    my ($self) = shift;
45
46    $self->{sesh} = {}; # TCP sessions
47}
48
49# }}}
50# {{{ teardown
51
52sub teardown {
53    my ($self) = shift;
54}
55
56# }}}
57
58# {{{ tcp_session_start
59
60sub tcp_session_start {
61    my ($self, $args) = @_;
62    my $pkt = $args->{pkt}; # Might well be undef
63    my $k   = $args->{socketpair_key};
64
65    if ($self->{v} & 0x04) {
66        $self->trace ("  ==== tcp session start [$pkt->{from} -> $pkt->{to}]");
67    }
68}
69
70# }}}
71# {{{ tcp_session_end
72
73sub tcp_session_end {
74    my ($self, $args) = @_;
75    my $pkt = $args->{pkt}; # Might well be undef
76    my $k   = $args->{socketpair_key};
77
78    $self->trace("  ==== tcp session end [$k]") if ($self->{v} & 0x04);
79
80    my $sesh = $self->_remove_sesh ($k);
81    return if (! defined $sesh);
82
83    $self->_unpipeline_data($sesh);
84}
85
86# }}}
87# {{{ tcp_monologue
88
89# In a pipelined request, the concept of orderly back-and-forth
90#  monologues breaks down. Conceptually, all the requests are
91#  glued together into one big monologue, and then all the responses
92#  are returned in another big monologue.
93# These jumbo monlogues may be transmitted simultaneusly (that is
94#  kinda the point of pipelining ;), so from the perspective of our
95#  pcap files, the divisions between monologues are arbitrary.
96# So, we assign no meaning to the monologues that arrive; all we do
97#  is steal the packets from them.
98# All the hard work - breaking apart the jumbo monologues into separate
99#  requests and responses, pairing them up, and generating synthetic
100#  monologues, is done in tcp_session_end.
101
102sub tcp_monologue {
103    my ($self, $args) = @_;
104    my $k    = $args->{socketpair_key};
105    my $mono = $args->{monologue};
106    my $sesh = $self->_get_sesh ($k);
107
108    # Decide which jumbo monologue to place these pkts in
109    my $socketpair_from = $mono->first_packet()->[PKT_SLOT_FROM];
110
111    my $pkts = $sesh->{$socketpair_from} ||= [];
112    push (@$pkts, @{ $mono->_data_packets }); # POTENTIAL MEM LEAK !! TAKE CARE
113}
114
115# }}}
116# {{{ http_transaction
117
118# Listen to our own event and print very basic report, if asked
119sub http_transaction {
120    my ($self, $args) = @_;
121    return if (! $self->{v});
122
123    my $req = $args->{req};
124    my $uri = (defined $req) ? $req->uri() : "(no uri)";
125    my $method = (defined $req) ? $req->method() : "(no method)";
126    my $t = $args->{t_elapsed} || -1.0;
127    printf "%8.4fs : $method %s\n", $t, $uri;
128}
129
130# }}}
131
132# {{{ as_string
133
134sub as_string {
135    my ($self) = @_;
136    my $s = '';
137
138    $s .= "[".ref($self)."]";
139
140    return $s;
141}
142
143# }}}
144
145#### Support funcs
146#
147# {{{ _trace
148
149# This may become more clever ...
150
151our $TRACE=0;
152
153sub _trace {
154    my ($self) = shift;
155
156    return if (! $TRACE);
157
158    foreach (@_) {
159        my $l = $_; #  Skip 'Modification of a read-only value' errors
160        chomp ($l);
161        print "$l\n";
162    }
163}
164
165# }}}
166# {{{ _{get|remove}_sesh
167
168sub _get_sesh {
169    my ($self, $sesh_key) = @_;
170
171    if (! exists $self->{sesh}{$sesh_key}) {
172        $self->{sesh}{$sesh_key} = {};
173    }
174
175    return $self->{sesh}{$sesh_key};
176}
177
178sub _remove_sesh {
179    my ($self, $sesh_key) = @_;
180
181    return delete ($self->{sesh}{$sesh_key});
182}
183
184# }}}
185# {{{ _printable
186
187sub _printable {
188    my $raw = shift;
189    $raw =~ s {([^\x20-\x7e])} {.}g;
190    return $raw;
191}
192
193# }}}
194# {{{ _unchunk_response
195
196sub _unchunk_response {
197    my ($resp) = @_;
198
199    my $transfer_encoding = $resp->header('transfer-encoding');
200
201    return if (!$transfer_encoding);
202
203    # http://www.jmarshall.com/easy/http/#http1.1c2
204    if ($transfer_encoding eq 'chunked') {
205        my $chunked_data = $resp->content();
206        my $unchunked_data = '';
207
208        my ($chunk_size_hex, $chunk_size, $chunk);
209        while ($chunked_data) {
210            # Read chunk size. Discard chunking comments.
211            ($chunk_size_hex, $chunked_data) = ($chunked_data =~ /^([0-9a-fA-F]+)(?:;.*)?\r\n(.*)/s);
212            last if (!defined $chunk_size_hex);
213            $chunk_size = oct("0x$chunk_size_hex");
214
215            last if ($chunk_size == 0); # Sod trailing headers!
216
217            # allow for \r\n trailing the chunk
218            $chunk = substr ($chunked_data, 0, $chunk_size+2, '');
219            substr ($chunk, -2, 2, '');
220
221            $unchunked_data .= $chunk;
222        }
223
224        $resp->content($unchunked_data);
225    }
226}
227
228# }}}
229
230# {{{ _unpipeline_data
231
232sub _unpipeline_data {
233    my $self = shift;
234    my ($sesh) = @_;
235
236    my @jumbo_monos;
237    foreach my $k (keys %$sesh) {
238        my $mono = Net::Analysis::TCPMonologue->new();
239        foreach my $pkt (@{ $sesh->{$k} }) {
240            $mono->add_packet ($pkt);
241        }
242        push (@jumbo_monos, $mono);
243    }
244
245    my ($req_mono, $resp_mono);
246    if ($jumbo_monos[0]->data =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) {
247        ($req_mono, $resp_mono) = @jumbo_monos;
248    } elsif ($jumbo_monos[1]->data =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) {
249        ($resp_mono, $req_mono) = @jumbo_monos;
250    } else {
251        carp ("This TCP session doesn't look very HTTPish");
252    }
253
254    my (@reqs)  = $self->_unpipeline_http_requests ($req_mono);
255    my (@resps) = $self->_unpipeline_http_responses ($resp_mono);
256    if (@reqs != @resps) {
257        carp ("found ".scalar(@reqs)." reqs but ".scalar(@resps).
258              " resps in pipelined HTTP");
259        return;
260    }
261
262    while (@reqs) {
263        my ($req_data, $req_mono) = @{ shift (@reqs) };
264        my ($resp_data, $resp_mono) = @{ shift (@resps) };
265
266        my ($http_req) = HTTP::Request->parse($req_data);
267        my ($http_resp) = HTTP::Response->parse($resp_data);
268        _unchunk_response ($http_resp); # Should port this to HTTP::Message
269
270        my $host = $http_req->header('host') || '(nohost)';
271        my $uri = $http_req->uri() || '/noURI';
272        $self->_trace (">>>> $host$uri <<\n");
273
274        $self->_trace ("  << ".$http_resp->code().", ".
275                       length($http_resp->content())." bytes");
276
277        my $args = {socketpair_key => $req_mono->socketpair_key(),
278                    req            => $http_req,
279                    req_mono       => $req_mono,
280                    resp           => $http_resp,
281                    resp_mono      => $resp_mono,
282                    t_end          => $resp_mono->t_end()->clone()};
283
284        $args->{t_start} = $req_mono->t_start()->clone();
285        $args->{t_elapsed} = $args->{t_end} - $args->{t_start};
286
287        $self->emit (name => 'http_transaction', args => $args);
288    }
289}
290
291# }}}
292# {{{ _unpipeline_http_requests
293
294sub _unpipeline_http_requests {
295    my $self = shift;
296    my ($mono) = @_;
297
298    # rfc 2612/8.1.2.2 : "Clients SHOULD NOT pipeline requests using
299    #                     non-idempotent methods"
300    # rfc 2612/9.1.2   : (GET, HEAD, PUT, DELETE) are idempotent.
301    # But, PUT may contain a data block (like POST), so we need to take care
302
303    my @ret;
304    my ($n_start,$n_end) = (0,0);
305    my @blocks = (map {$_."\r\n\r\n"} split ("\r\n\r\n", $mono->data()));
306    while (@blocks) {
307        $_ = shift(@blocks);
308
309        # PUT and POST requests have data after the request/header block;
310        #  snarf it if needed
311        $_ .= shift(@blocks) if (/^(put|post) /i);
312
313        $n_end = $n_start + length($_) - 1;
314
315        #print "  >> [$n_start, $n_end] ".(split("\n",$_))[0]."\n";
316
317        # Now build a mono from the packets that made up this block
318        my $sub_mono = Net::Analysis::TCPMonologue->new();
319        foreach my $pkt (@{ $mono->which_pkts($n_start,$n_end) }) {
320            $sub_mono->add_packet ($pkt);
321        }
322
323        push (@ret, [$_, $sub_mono]);
324
325        $n_start = $n_end+1;
326    }
327
328    #print ">>>>[".($n_end+1)." / ".$mono->length()." ]>>>>\n";
329    return @ret;
330}
331
332# }}}
333# {{{ _unpipeline_http_responses
334
335# This is knottier than it looks.
336
337# I just split on what looks like the start of a response. But if a response
338#  just so happened to contain a string that matched "HTTP/1.1 200 OK", this
339#  breaks.
340# Ideally, I'd parse the Content-Length header instead, and only read the
341#  right amount of data. But, a HEAD request generates a response with no
342#  body data, but a content-length that indicates what the response *would*
343#  have been (for a GET).
344# So: I need to know the request method in order to know whether to trust
345#  the Content-Length header, which is altogether too much irritation for now.
346
347sub _unpipeline_http_responses {
348    my $self = shift;
349    my ($mono) = @_;
350    my (@ret);
351
352    my @bits = split (qr{(HTTP/\d\.\d \d\d\d)}, $mono->data());
353    shift (@bits); # Grr, empty field to the left of first "HTTP/1.1..."
354
355    my ($n_start,$n_end) = (0,0);
356    while (@bits) {
357        $_ = shift (@bits);
358        $_ .= shift (@bits);
359
360        $n_end = $n_start + length($_) - 1;
361
362        # print "  << [$n_start, $n_end] ".(split("\n",$_))[0]."\n";
363
364        # Now build a mono from the packets that made up this block
365        my $sub_mono = Net::Analysis::TCPMonologue->new();
366        foreach my $pkt (@{ $mono->which_pkts($n_start,$n_end) }) {
367            $sub_mono->add_packet ($pkt);
368        }
369
370        push (@ret, [$_, $sub_mono]);
371
372        $n_start = $n_end+1;
373    }
374
375    #print "<<<<[".($n_end+1)." / ".$mono->length()." ]<<<<\n";
376    return @ret;
377}
378
379# }}}
380
3811;
382__END__
383# {{{ POD
384
385=head1 NAME
386
387Net::Analysis::Listener::HTTPPipelining - another HTTP listener
388
389=head1 SYNOPSIS
390
391This is an alternate version of N::A::L::HTTP, which has support for
392pipelined HTTP requests. It is experimental; eventually, it will
393become the default version of N::A::L::HTTP.
394
395=head1 SEE ALSO
396
397Net::Analysis::Listener::HTTP
398
399=head1 AUTHOR
400
401Adam B. Worrall, E<lt>worrall@cpan.orgE<gt>
402
403=head1 COPYRIGHT AND LICENSE
404
405Copyright (C) 2004 by Adam B. Worrall
406
407This library is free software; you can redistribute it and/or modify
408it under the same terms as Perl itself, either Perl version 5.8.5 or,
409at your option, any later version of Perl 5 you may have available.
410
411=cut
412
413# }}}
414
415# {{{ tcp_session_end
416
417sub tcp_session_end {
418    my ($self, $args) = @_;
419    my $pkt = $args->{pkt}; # Might well be undef
420    my $k   = $args->{socketpair_key};
421
422    $self->trace("  ==== tcp session end [$k]") if ($self->{v} & 0x04);
423
424    my $sesh = $self->_remove_sesh ($k);
425    return if (! defined $sesh);
426
427    my ($req_k, $resp_k);
428    foreach my $k (keys %$sesh) {
429        my $d = $sesh->{$k}->data();
430        # If a HTTP response happens to contain this string, this breaks
431        $req_k = $k if ($d =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i);
432    }
433    foreach my $k (keys %$sesh) {
434        $resp_k = $k if ($k ne $req_k);
435    }
436
437    # Basically, I'm assuming that we got all packets in question
438    #  (e.g. we're not missing a few initial request packets).
439
440    # split up all the requests ...
441    if ($sesh->{$req_k} !~ /^(get|post|head)\s+/i) {
442        carp "HTTP request stream in $k had leading junk\n";
443    }
444    my @reqs = split ("\r\n\r\n", $sesh->{$req_k});
445
446    # ... split up all the responses
447    if ($sesh->{$resp_k} !~ m{^http/\d.\d \d.\d\s+}i) {
448        carp "HTTP response stream in $k had leading junk\n";
449    }
450    my @t_resps = split (qr{(http/\d.\d \d{3}\s+[^\n]+)}i, $sesh->{$resp_k});
451    shift(@t_resps); # LHS of the first split is the empty string
452    # Recombine the elements, to glue the split field (HTTP ...) onto the data
453    my @resps;
454    while (@t_resps) {
455        push (@resps, shift(@t_resps).shift(@t_resps));
456    }
457
458    if (@reqs != @resps) {
459        carp "number of HTTP reqs not the same as HTTP resps";
460    }
461
462    while (@reqs) {
463        my $d_req = shift (@reqs);
464        my $d_resp = shift (@resps);
465
466        #print "$d_req\n";
467
468        my $req = HTTP::Request->parse ($d_req);
469        my $resp = HTTP::Response->parse($d_resp);
470        _unchunk_response ($resp); # Should really port this to HTTP::Message
471
472        my $host = $req->header('host') || '(nohost)';
473        my $uri = $req->uri() || '/noURI';
474        $self->_trace (">>>> $host$uri <<\n");
475
476        $self->_trace ("  << ".$resp->code().", ".
477                       length($resp->content())." bytes");
478
479        my $args = {socketpair_key => $k,
480                    req            => $req,
481                    #req_mono       => $req_mono,
482                    resp           => $resp,
483                    #resp_mono      => $mono,
484                    #t_end          => $mono->t_end()->clone(),
485                   };
486        #if (defined $req_mono) {
487        #    $args->{t_start} = $sesh->{req_mono}->t_start()->clone();
488        #    $args->{t_elapsed} = $args->{t_end} - $args->{t_start};
489        #}
490
491        $self->emit (name => 'http_transaction', args => $args);
492    }
493}
494
495# }}}
496# {{{ tcp_monologue2
497
498sub tcp_monologue2 {
499    my ($self, $args) = @_;
500    my $k    = $args->{socketpair_key};
501    my $mono = $args->{monologue};
502    my $sesh = $self->_get_sesh ($k);
503    my $d    = $mono->{data};
504
505    my ($l) = (split('\n', $d))[0];
506    my ($first_line) = '';
507    if (defined $l) {
508        $l = substr($l,0,40) if (length($l) > 40);
509        $first_line = _printable($l);
510    }
511
512    our $TRACE=0;
513
514#    $TRACE = 1 if ($k eq '10.6.94.7:8080-159.206.22.101:2647');
515#    if ($k eq '10.6.94.7:8080-159.206.22.101:2647') {
516#        print "mono $k ".$mono->first_packet()->{time}."\n";
517#    }
518
519    if ($d =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) {
520        if (exists $sesh->{req}) {
521            carp "already have a req for $k, overwriting it\n";
522        }
523        $sesh->{req} = HTTP::Request->parse($d);
524        $sesh->{req_mono} = $mono; # Careful ! Must delete this ...
525
526        my $host = $sesh->{req}->header('host') || '(nohost)';
527        my $uri = $sesh->{req}->uri() || '/noURI';
528        $self->_trace (">>!> $host$uri <<\n");
529
530
531    } elsif ($d =~ m!^HTTP/\d.\d\s+(\d{3})!i) {
532        my $resp = HTTP::Response->parse($d);
533
534        _unchunk_response ($resp); # Should really port this to HTTP::Message
535
536        if (defined $sesh->{req}) {
537            my $host = $sesh->{req}->header('host') || '(nohost)';
538            my $uri = $sesh->{req}->uri() || '/noURI';
539            $self->_trace (">>>> $host$uri <<\n");
540        } else {
541            $self->_trace (">>>> ????? (no req found in sesh) <<\n");
542        }
543
544        $self->_trace ("  << ".$resp->code().", ".
545                       length($resp->content())." bytes");
546
547        my $req_mono = $sesh->{req_mono};
548        my $args = {socketpair_key => $k,
549                    req            => $sesh->{req},
550                    req_mono       => $req_mono,
551                    resp           => $resp,
552                    resp_mono      => $mono,
553                    t_end          => $mono->t_end()->clone()};
554        if (defined $req_mono) {
555            $args->{t_start} = $sesh->{req_mono}->t_start()->clone();
556            $args->{t_elapsed} = $args->{t_end} - $args->{t_start};
557        }
558
559        $self->emit (name => 'http_transaction', args => $args);
560
561        delete ($sesh->{req});
562        delete ($sesh->{req_mono});
563
564    } else {
565        $self->_trace ("malformed HTTP monologue in $k starts: $first_line\n");
566    }
567}
568
569# }}}
570# {{{ _http_message_has_body
571
572sub _http_message_has_body {
573    my $self = shift;
574    my ($str,$str2) = @_;
575
576    if ($str =~ m!^(get|post|head|put|delete)\s+([^ ]+)(HTTP/\d.\d)?!i) {
577        return (($1 eq 'PUT') || ($1 eq 'POST')) ? 1 : 0;
578
579    } elsif ($str =~ m!^HTTP/\d.\d\s+(\d{3})!i) {
580        return 0;
581    }
582}
583
584# }}}
585
586# {{{ -------------------------={ E N D }=----------------------------------
587
588# Local variables:
589# folded-file: t
590# end:
591
592# }}}
593