1package Net::Analysis::Listener::HTTPPipelining; 2# $Id: HTTP.pm 140 2005-10-21 16:31:29Z abworrall $ 3 4# {{{ Boilerplate 5 6use 5.008000; 7our $VERSION = '0.02'; 8use strict; 9use warnings; 10 11use Carp qw(carp croak confess); 12 13use Params::Validate qw(:all); 14 15use base qw(Net::Analysis::Listener::Base); 16 17# }}} 18 19use Net::Analysis::Packet qw(:pktslots :func); 20use Net::Analysis::TCPMonologue; 21 22use HTTP::Response; 23use HTTP::Request; 24 25#### Callbacks 26# 27# {{{ validate_configuration 28 29sub validate_configuration { 30 my $self = shift; 31 32 my %h = validate (@_, { v => {type => SCALAR, 33 default => 0} }); 34 35 return \%h; 36} 37 38# }}} 39 40# {{{ setup 41 42# A chance to setup stuff for our listener 43sub setup { 44 my ($self) = shift; 45 46 $self->{sesh} = {}; # TCP sessions 47} 48 49# }}} 50# {{{ teardown 51 52sub teardown { 53 my ($self) = shift; 54} 55 56# }}} 57 58# {{{ tcp_session_start 59 60sub tcp_session_start { 61 my ($self, $args) = @_; 62 my $pkt = $args->{pkt}; # Might well be undef 63 my $k = $args->{socketpair_key}; 64 65 if ($self->{v} & 0x04) { 66 $self->trace (" ==== tcp session start [$pkt->{from} -> $pkt->{to}]"); 67 } 68} 69 70# }}} 71# {{{ tcp_session_end 72 73sub tcp_session_end { 74 my ($self, $args) = @_; 75 my $pkt = $args->{pkt}; # Might well be undef 76 my $k = $args->{socketpair_key}; 77 78 $self->trace(" ==== tcp session end [$k]") if ($self->{v} & 0x04); 79 80 my $sesh = $self->_remove_sesh ($k); 81 return if (! defined $sesh); 82 83 $self->_unpipeline_data($sesh); 84} 85 86# }}} 87# {{{ tcp_monologue 88 89# In a pipelined request, the concept of orderly back-and-forth 90# monologues breaks down. Conceptually, all the requests are 91# glued together into one big monologue, and then all the responses 92# are returned in another big monologue. 93# These jumbo monlogues may be transmitted simultaneusly (that is 94# kinda the point of pipelining ;), so from the perspective of our 95# pcap files, the divisions between monologues are arbitrary. 96# So, we assign no meaning to the monologues that arrive; all we do 97# is steal the packets from them. 98# All the hard work - breaking apart the jumbo monologues into separate 99# requests and responses, pairing them up, and generating synthetic 100# monologues, is done in tcp_session_end. 101 102sub tcp_monologue { 103 my ($self, $args) = @_; 104 my $k = $args->{socketpair_key}; 105 my $mono = $args->{monologue}; 106 my $sesh = $self->_get_sesh ($k); 107 108 # Decide which jumbo monologue to place these pkts in 109 my $socketpair_from = $mono->first_packet()->[PKT_SLOT_FROM]; 110 111 my $pkts = $sesh->{$socketpair_from} ||= []; 112 push (@$pkts, @{ $mono->_data_packets }); # POTENTIAL MEM LEAK !! TAKE CARE 113} 114 115# }}} 116# {{{ http_transaction 117 118# Listen to our own event and print very basic report, if asked 119sub http_transaction { 120 my ($self, $args) = @_; 121 return if (! $self->{v}); 122 123 my $req = $args->{req}; 124 my $uri = (defined $req) ? $req->uri() : "(no uri)"; 125 my $method = (defined $req) ? $req->method() : "(no method)"; 126 my $t = $args->{t_elapsed} || -1.0; 127 printf "%8.4fs : $method %s\n", $t, $uri; 128} 129 130# }}} 131 132# {{{ as_string 133 134sub as_string { 135 my ($self) = @_; 136 my $s = ''; 137 138 $s .= "[".ref($self)."]"; 139 140 return $s; 141} 142 143# }}} 144 145#### Support funcs 146# 147# {{{ _trace 148 149# This may become more clever ... 150 151our $TRACE=0; 152 153sub _trace { 154 my ($self) = shift; 155 156 return if (! $TRACE); 157 158 foreach (@_) { 159 my $l = $_; # Skip 'Modification of a read-only value' errors 160 chomp ($l); 161 print "$l\n"; 162 } 163} 164 165# }}} 166# {{{ _{get|remove}_sesh 167 168sub _get_sesh { 169 my ($self, $sesh_key) = @_; 170 171 if (! exists $self->{sesh}{$sesh_key}) { 172 $self->{sesh}{$sesh_key} = {}; 173 } 174 175 return $self->{sesh}{$sesh_key}; 176} 177 178sub _remove_sesh { 179 my ($self, $sesh_key) = @_; 180 181 return delete ($self->{sesh}{$sesh_key}); 182} 183 184# }}} 185# {{{ _printable 186 187sub _printable { 188 my $raw = shift; 189 $raw =~ s {([^\x20-\x7e])} {.}g; 190 return $raw; 191} 192 193# }}} 194# {{{ _unchunk_response 195 196sub _unchunk_response { 197 my ($resp) = @_; 198 199 my $transfer_encoding = $resp->header('transfer-encoding'); 200 201 return if (!$transfer_encoding); 202 203 # http://www.jmarshall.com/easy/http/#http1.1c2 204 if ($transfer_encoding eq 'chunked') { 205 my $chunked_data = $resp->content(); 206 my $unchunked_data = ''; 207 208 my ($chunk_size_hex, $chunk_size, $chunk); 209 while ($chunked_data) { 210 # Read chunk size. Discard chunking comments. 211 ($chunk_size_hex, $chunked_data) = ($chunked_data =~ /^([0-9a-fA-F]+)(?:;.*)?\r\n(.*)/s); 212 last if (!defined $chunk_size_hex); 213 $chunk_size = oct("0x$chunk_size_hex"); 214 215 last if ($chunk_size == 0); # Sod trailing headers! 216 217 # allow for \r\n trailing the chunk 218 $chunk = substr ($chunked_data, 0, $chunk_size+2, ''); 219 substr ($chunk, -2, 2, ''); 220 221 $unchunked_data .= $chunk; 222 } 223 224 $resp->content($unchunked_data); 225 } 226} 227 228# }}} 229 230# {{{ _unpipeline_data 231 232sub _unpipeline_data { 233 my $self = shift; 234 my ($sesh) = @_; 235 236 my @jumbo_monos; 237 foreach my $k (keys %$sesh) { 238 my $mono = Net::Analysis::TCPMonologue->new(); 239 foreach my $pkt (@{ $sesh->{$k} }) { 240 $mono->add_packet ($pkt); 241 } 242 push (@jumbo_monos, $mono); 243 } 244 245 my ($req_mono, $resp_mono); 246 if ($jumbo_monos[0]->data =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) { 247 ($req_mono, $resp_mono) = @jumbo_monos; 248 } elsif ($jumbo_monos[1]->data =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) { 249 ($resp_mono, $req_mono) = @jumbo_monos; 250 } else { 251 carp ("This TCP session doesn't look very HTTPish"); 252 } 253 254 my (@reqs) = $self->_unpipeline_http_requests ($req_mono); 255 my (@resps) = $self->_unpipeline_http_responses ($resp_mono); 256 if (@reqs != @resps) { 257 carp ("found ".scalar(@reqs)." reqs but ".scalar(@resps). 258 " resps in pipelined HTTP"); 259 return; 260 } 261 262 while (@reqs) { 263 my ($req_data, $req_mono) = @{ shift (@reqs) }; 264 my ($resp_data, $resp_mono) = @{ shift (@resps) }; 265 266 my ($http_req) = HTTP::Request->parse($req_data); 267 my ($http_resp) = HTTP::Response->parse($resp_data); 268 _unchunk_response ($http_resp); # Should port this to HTTP::Message 269 270 my $host = $http_req->header('host') || '(nohost)'; 271 my $uri = $http_req->uri() || '/noURI'; 272 $self->_trace (">>>> $host$uri <<\n"); 273 274 $self->_trace (" << ".$http_resp->code().", ". 275 length($http_resp->content())." bytes"); 276 277 my $args = {socketpair_key => $req_mono->socketpair_key(), 278 req => $http_req, 279 req_mono => $req_mono, 280 resp => $http_resp, 281 resp_mono => $resp_mono, 282 t_end => $resp_mono->t_end()->clone()}; 283 284 $args->{t_start} = $req_mono->t_start()->clone(); 285 $args->{t_elapsed} = $args->{t_end} - $args->{t_start}; 286 287 $self->emit (name => 'http_transaction', args => $args); 288 } 289} 290 291# }}} 292# {{{ _unpipeline_http_requests 293 294sub _unpipeline_http_requests { 295 my $self = shift; 296 my ($mono) = @_; 297 298 # rfc 2612/8.1.2.2 : "Clients SHOULD NOT pipeline requests using 299 # non-idempotent methods" 300 # rfc 2612/9.1.2 : (GET, HEAD, PUT, DELETE) are idempotent. 301 # But, PUT may contain a data block (like POST), so we need to take care 302 303 my @ret; 304 my ($n_start,$n_end) = (0,0); 305 my @blocks = (map {$_."\r\n\r\n"} split ("\r\n\r\n", $mono->data())); 306 while (@blocks) { 307 $_ = shift(@blocks); 308 309 # PUT and POST requests have data after the request/header block; 310 # snarf it if needed 311 $_ .= shift(@blocks) if (/^(put|post) /i); 312 313 $n_end = $n_start + length($_) - 1; 314 315 #print " >> [$n_start, $n_end] ".(split("\n",$_))[0]."\n"; 316 317 # Now build a mono from the packets that made up this block 318 my $sub_mono = Net::Analysis::TCPMonologue->new(); 319 foreach my $pkt (@{ $mono->which_pkts($n_start,$n_end) }) { 320 $sub_mono->add_packet ($pkt); 321 } 322 323 push (@ret, [$_, $sub_mono]); 324 325 $n_start = $n_end+1; 326 } 327 328 #print ">>>>[".($n_end+1)." / ".$mono->length()." ]>>>>\n"; 329 return @ret; 330} 331 332# }}} 333# {{{ _unpipeline_http_responses 334 335# This is knottier than it looks. 336 337# I just split on what looks like the start of a response. But if a response 338# just so happened to contain a string that matched "HTTP/1.1 200 OK", this 339# breaks. 340# Ideally, I'd parse the Content-Length header instead, and only read the 341# right amount of data. But, a HEAD request generates a response with no 342# body data, but a content-length that indicates what the response *would* 343# have been (for a GET). 344# So: I need to know the request method in order to know whether to trust 345# the Content-Length header, which is altogether too much irritation for now. 346 347sub _unpipeline_http_responses { 348 my $self = shift; 349 my ($mono) = @_; 350 my (@ret); 351 352 my @bits = split (qr{(HTTP/\d\.\d \d\d\d)}, $mono->data()); 353 shift (@bits); # Grr, empty field to the left of first "HTTP/1.1..." 354 355 my ($n_start,$n_end) = (0,0); 356 while (@bits) { 357 $_ = shift (@bits); 358 $_ .= shift (@bits); 359 360 $n_end = $n_start + length($_) - 1; 361 362 # print " << [$n_start, $n_end] ".(split("\n",$_))[0]."\n"; 363 364 # Now build a mono from the packets that made up this block 365 my $sub_mono = Net::Analysis::TCPMonologue->new(); 366 foreach my $pkt (@{ $mono->which_pkts($n_start,$n_end) }) { 367 $sub_mono->add_packet ($pkt); 368 } 369 370 push (@ret, [$_, $sub_mono]); 371 372 $n_start = $n_end+1; 373 } 374 375 #print "<<<<[".($n_end+1)." / ".$mono->length()." ]<<<<\n"; 376 return @ret; 377} 378 379# }}} 380 3811; 382__END__ 383# {{{ POD 384 385=head1 NAME 386 387Net::Analysis::Listener::HTTPPipelining - another HTTP listener 388 389=head1 SYNOPSIS 390 391This is an alternate version of N::A::L::HTTP, which has support for 392pipelined HTTP requests. It is experimental; eventually, it will 393become the default version of N::A::L::HTTP. 394 395=head1 SEE ALSO 396 397Net::Analysis::Listener::HTTP 398 399=head1 AUTHOR 400 401Adam B. Worrall, E<lt>worrall@cpan.orgE<gt> 402 403=head1 COPYRIGHT AND LICENSE 404 405Copyright (C) 2004 by Adam B. Worrall 406 407This library is free software; you can redistribute it and/or modify 408it under the same terms as Perl itself, either Perl version 5.8.5 or, 409at your option, any later version of Perl 5 you may have available. 410 411=cut 412 413# }}} 414 415# {{{ tcp_session_end 416 417sub tcp_session_end { 418 my ($self, $args) = @_; 419 my $pkt = $args->{pkt}; # Might well be undef 420 my $k = $args->{socketpair_key}; 421 422 $self->trace(" ==== tcp session end [$k]") if ($self->{v} & 0x04); 423 424 my $sesh = $self->_remove_sesh ($k); 425 return if (! defined $sesh); 426 427 my ($req_k, $resp_k); 428 foreach my $k (keys %$sesh) { 429 my $d = $sesh->{$k}->data(); 430 # If a HTTP response happens to contain this string, this breaks 431 $req_k = $k if ($d =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i); 432 } 433 foreach my $k (keys %$sesh) { 434 $resp_k = $k if ($k ne $req_k); 435 } 436 437 # Basically, I'm assuming that we got all packets in question 438 # (e.g. we're not missing a few initial request packets). 439 440 # split up all the requests ... 441 if ($sesh->{$req_k} !~ /^(get|post|head)\s+/i) { 442 carp "HTTP request stream in $k had leading junk\n"; 443 } 444 my @reqs = split ("\r\n\r\n", $sesh->{$req_k}); 445 446 # ... split up all the responses 447 if ($sesh->{$resp_k} !~ m{^http/\d.\d \d.\d\s+}i) { 448 carp "HTTP response stream in $k had leading junk\n"; 449 } 450 my @t_resps = split (qr{(http/\d.\d \d{3}\s+[^\n]+)}i, $sesh->{$resp_k}); 451 shift(@t_resps); # LHS of the first split is the empty string 452 # Recombine the elements, to glue the split field (HTTP ...) onto the data 453 my @resps; 454 while (@t_resps) { 455 push (@resps, shift(@t_resps).shift(@t_resps)); 456 } 457 458 if (@reqs != @resps) { 459 carp "number of HTTP reqs not the same as HTTP resps"; 460 } 461 462 while (@reqs) { 463 my $d_req = shift (@reqs); 464 my $d_resp = shift (@resps); 465 466 #print "$d_req\n"; 467 468 my $req = HTTP::Request->parse ($d_req); 469 my $resp = HTTP::Response->parse($d_resp); 470 _unchunk_response ($resp); # Should really port this to HTTP::Message 471 472 my $host = $req->header('host') || '(nohost)'; 473 my $uri = $req->uri() || '/noURI'; 474 $self->_trace (">>>> $host$uri <<\n"); 475 476 $self->_trace (" << ".$resp->code().", ". 477 length($resp->content())." bytes"); 478 479 my $args = {socketpair_key => $k, 480 req => $req, 481 #req_mono => $req_mono, 482 resp => $resp, 483 #resp_mono => $mono, 484 #t_end => $mono->t_end()->clone(), 485 }; 486 #if (defined $req_mono) { 487 # $args->{t_start} = $sesh->{req_mono}->t_start()->clone(); 488 # $args->{t_elapsed} = $args->{t_end} - $args->{t_start}; 489 #} 490 491 $self->emit (name => 'http_transaction', args => $args); 492 } 493} 494 495# }}} 496# {{{ tcp_monologue2 497 498sub tcp_monologue2 { 499 my ($self, $args) = @_; 500 my $k = $args->{socketpair_key}; 501 my $mono = $args->{monologue}; 502 my $sesh = $self->_get_sesh ($k); 503 my $d = $mono->{data}; 504 505 my ($l) = (split('\n', $d))[0]; 506 my ($first_line) = ''; 507 if (defined $l) { 508 $l = substr($l,0,40) if (length($l) > 40); 509 $first_line = _printable($l); 510 } 511 512 our $TRACE=0; 513 514# $TRACE = 1 if ($k eq '10.6.94.7:8080-159.206.22.101:2647'); 515# if ($k eq '10.6.94.7:8080-159.206.22.101:2647') { 516# print "mono $k ".$mono->first_packet()->{time}."\n"; 517# } 518 519 if ($d =~ m!^(get|post|head)\s+([^ ]+)(HTTP/\d.\d)?!i) { 520 if (exists $sesh->{req}) { 521 carp "already have a req for $k, overwriting it\n"; 522 } 523 $sesh->{req} = HTTP::Request->parse($d); 524 $sesh->{req_mono} = $mono; # Careful ! Must delete this ... 525 526 my $host = $sesh->{req}->header('host') || '(nohost)'; 527 my $uri = $sesh->{req}->uri() || '/noURI'; 528 $self->_trace (">>!> $host$uri <<\n"); 529 530 531 } elsif ($d =~ m!^HTTP/\d.\d\s+(\d{3})!i) { 532 my $resp = HTTP::Response->parse($d); 533 534 _unchunk_response ($resp); # Should really port this to HTTP::Message 535 536 if (defined $sesh->{req}) { 537 my $host = $sesh->{req}->header('host') || '(nohost)'; 538 my $uri = $sesh->{req}->uri() || '/noURI'; 539 $self->_trace (">>>> $host$uri <<\n"); 540 } else { 541 $self->_trace (">>>> ????? (no req found in sesh) <<\n"); 542 } 543 544 $self->_trace (" << ".$resp->code().", ". 545 length($resp->content())." bytes"); 546 547 my $req_mono = $sesh->{req_mono}; 548 my $args = {socketpair_key => $k, 549 req => $sesh->{req}, 550 req_mono => $req_mono, 551 resp => $resp, 552 resp_mono => $mono, 553 t_end => $mono->t_end()->clone()}; 554 if (defined $req_mono) { 555 $args->{t_start} = $sesh->{req_mono}->t_start()->clone(); 556 $args->{t_elapsed} = $args->{t_end} - $args->{t_start}; 557 } 558 559 $self->emit (name => 'http_transaction', args => $args); 560 561 delete ($sesh->{req}); 562 delete ($sesh->{req_mono}); 563 564 } else { 565 $self->_trace ("malformed HTTP monologue in $k starts: $first_line\n"); 566 } 567} 568 569# }}} 570# {{{ _http_message_has_body 571 572sub _http_message_has_body { 573 my $self = shift; 574 my ($str,$str2) = @_; 575 576 if ($str =~ m!^(get|post|head|put|delete)\s+([^ ]+)(HTTP/\d.\d)?!i) { 577 return (($1 eq 'PUT') || ($1 eq 'POST')) ? 1 : 0; 578 579 } elsif ($str =~ m!^HTTP/\d.\d\s+(\d{3})!i) { 580 return 0; 581 } 582} 583 584# }}} 585 586# {{{ -------------------------={ E N D }=---------------------------------- 587 588# Local variables: 589# folded-file: t 590# end: 591 592# }}} 593