1# -*- perl -*- 2# $Id: ftp.pm,v 1.11 2003/02/19 14:58:37 langhein Exp $ 3# derived from: ftp.pm,v 1.31 2001/10/26 20:13:20 gisle Exp 4 5# Implementation of the ftp protocol (RFC 959). We let the Net::FTP 6# package do all the dirty work. 7 8package LWP::Parallel::Protocol::ftp; 9 10use Carp (); 11 12use HTTP::Status (); 13use HTTP::Negotiate (); 14use HTTP::Response (); 15use LWP::MediaTypes (); 16use File::Listing (); 17 18require LWP::Parallel::Protocol; 19require LWP::Protocol::ftp; 20@ISA = qw(LWP::Parallel::Protocol LWP::Protocol::ftp); 21 22use strict; 23 24eval { 25 package LWP::Parallel::Protocol::MyFTP; 26 27 require Net::FTP; 28 Net::FTP->require_version(2.00); 29 30 use vars qw(@ISA); 31 @ISA=qw(Net::FTP); 32 33 sub new { 34 my $class = shift; 35 LWP::Debug::trace('()'); 36 37 my $self = $class->SUPER::new(@_) || return undef; 38 39 my $mess = $self->message; # welcome message 40 LWP::Debug::debug($mess); 41 $mess =~ s|\n.*||s; # only first line left 42 $mess =~ s|\s*ready\.?$||; 43 # Make the version number more HTTP like 44 $mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||; 45 ${*$self}{myftp_server} = $mess; 46 #$response->header("Server", $mess); 47 48 $self; 49 } 50 51 sub http_server { 52 my $self = shift; 53 ${*$self}{myftp_server}; 54 } 55 56 sub home { 57 my $self = shift; 58 my $old = ${*$self}{myftp_home}; 59 if (@_) { 60 ${*$self}{myftp_home} = shift; 61 } 62 $old; 63 } 64 65 sub go_home { 66 LWP::Debug::trace(''); 67 my $self = shift; 68 $self->cwd(${*$self}{myftp_home}); 69 } 70 71 sub request_count { 72 my $self = shift; 73 ++${*$self}{myftp_reqcount}; 74 } 75 76 sub ping { 77 LWP::Debug::trace(''); 78 my $self = shift; 79 return $self->go_home; 80 } 81 82}; 83my $init_failed = $@; 84 85=item ($socket, $second_arg) = $prot->handle_connect ($req, $proxy, $timeout); 86 87This method connects with the server on the machine and port specified 88in the $req object. If a $proxy is given, it will return an error, 89since the FTP protocol does not allow proxying. (See below on how such 90an error is propagated to the caller). 91 92If successful, the first argument will contain the IO::Socket object 93that connects to the specified site. The second argument is empty (for 94ftp, that is. See LWP::Protocol::http for different usage). 95 96If the connection fails, $socket is set to 'undef', and the second 97argument contains a HTTP::Response object holding a textual 98representation of the error. (You can use its 'code' and 'message' 99methods to find out what went wrong) 100 101=cut 102 103sub handle_connect { 104 my ($self, $request, $proxy, $timeout) = @_; 105 106 # mostly directly copied from the original Protocol::ftp, changes 107 # are marked with "# ML" comment (mostly return values) 108 109 # check proxy 110 if (defined $proxy) 111 { 112 return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST, 113 'You can not proxy through the ftp'); # ML 114 } 115 116 my $url = $request->url; 117 if ($url->scheme ne 'ftp') { 118 my $scheme = $url->scheme; 119 return (undef, new HTTP::Response &HTTP::Status::RC_INTERNAL_SERVER_ERROR, 120 "LWP::Protocol::ftp::request called for '$scheme'"); # ML 121 } 122 123 # check method 124 my $method = $request->method; 125 126 unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') { 127 return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST, 128 'Library does not allow method ' . 129 "$method for 'ftp:' URLs"); # ML 130 } 131 132 if ($init_failed) { 133 return (undef, new HTTP::Response &HTTP::Status::RC_INTERNAL_SERVER_ERROR, 134 $init_failed); # ML 135 } 136 137 my $host = $url->host; 138 my $port = $url->port; 139 my $user = $url->user; 140 # taken out some additional variable declarations here, that are now 141 # only needed in 'write_request' method. 142 143 ################# 144 # new in LWP 5.60 145 my $account = $request->header('Account'); # ML 146 147 my $key; 148 my $conn_cache = $self->{ua}{conn_cache}; 149 if ($conn_cache) { 150 $key = "$host:$port:$user"; 151 $key .= ":$account" if defined($account); 152 if (my $ftp = $conn_cache->withdraw("ftp", $key)) { 153 if ($ftp->ping) { 154 LWP::Debug::debug('Reusing old connection'); 155 # save it again 156 $conn_cache->deposit("ftp", $key, $ftp); 157 # added $response object # ML 158 my $response = 159 HTTP::Response->new(&HTTP::Status::RC_OK, "Document follows"); 160 return ($ftp, $response); 161 } 162 } 163 } 164 165 # try to make a connection 166 my $ftp = LWP::Parallel::Protocol::MyFTP->new($host, 167 Port => $port, 168 Timeout => $timeout, 169 ); 170 # XXX Should be some what to pass on 'Passive' (header??) 171 ################# 172 173 my $response; 174 unless ($ftp) { 175 $@ =~ s/^Net::FTP: //; # new in LWP 5.60 176 $response = HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,$@); 177 } else { 178 # Create an initial response object 179 $response = HTTP::Response->new(&HTTP::Status::RC_OK, "Document follows"); 180 ################# 181 # new in LWP 5.60 182 $response->header(Server => $ftp->http_server); 183 $response->header('Client-Request-Num' => $ftp->request_count); 184 ################# 185 $response->request($request); 186 } 187 188 return ($ftp, $response); # ML 189} 190 191sub write_request { 192 my ($self, $request, $ftp, $response, $arg, $timeout) = @_; 193 194 # Some of the following variable declarations, directly copied from 195 # the original Protocol::ftp module, appear both in 'handle_connect' 196 # _and_ 'write_request' method. Although it introduces additional 197 # overhead, we can't pass additional variables between those two 198 # methods, but we need some of the values in both routines. We 199 # allow the account to be specified in the "Account" header 200 my $account = $request->header('Account'); 201 202 my $url = $request->url; 203 my $host = $url->host; 204 my $port = $url->port; 205 my $user = $url->user; 206 my $password = $url->password; 207 208 # If a basic autorization header is present than we prefer these over 209 # the username/password specified in the URL. 210 { 211 my($u,$p) = $request->authorization_basic; 212 if (defined $u) { 213 $user = $u; 214 $password = $p; 215 } 216 } 217 218 my $method = $request->method; 219 220 # from here on mostly directly clipped from the original 221 # Protocol::ftp. Changes are marked with "# ML" comment 222 223 # from here on it seems FTP will handle timeouts, right? # ML 224 $ftp->timeout($timeout) if $timeout; 225 226 LWP::Debug::debug("Logging in as $user (password $password)..."); 227 unless ($ftp->login($user, $password, $account)) { 228 # Unauthorized. Let's fake a RC_UNAUTHORIZED response 229 my $mess = scalar($ftp->message); 230 LWP::Debug::debug($mess); 231 $mess =~ s/\n$//; 232 my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess); 233 $res->header("Server", $ftp->http_server); 234 $res->header("WWW-Authenticate", qq(Basic Realm="FTP login")); 235 return (undef, $res); # ML 236 } 237 LWP::Debug::debug($ftp->message); 238 239 ################# 240 # new in LWP 5.60 241 my $home = $ftp->pwd; 242 LWP::Debug::debug("home: '$home'"); 243 $ftp->home($home); 244 245 # ML 246 my $key; 247 $key = "$host:$port:$user"; 248 $key .= ":$account" if defined($account); 249 # 250 251 my $conn_cache = $self->{ua}{conn_cache}; 252 $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache; 253 ################# 254 255 # Get & fix the path 256 my @path = $url->path_segments; 257 # removed in LWP 5.48 258 #shift(@path); # There will always be an empty first component 259 #pop(@path) while @path && $path[-1] eq ''; # remove empty tailing comps 260 261 my $remote_file = pop(@path); 262 $remote_file = '' unless defined $remote_file; 263 264 my $type; 265 if (ref $remote_file) { 266 my @params; 267 ($remote_file, @params) = @$remote_file; 268 for (@params) { 269 $type = $_ if s/^type=//; 270 } 271 } 272 273 if ($type && $type eq 'a') { 274 $ftp->ascii; 275 } else { 276 $ftp->binary; 277 } 278 279 for (@path) { 280 LWP::Debug::debug("CWD $_"); 281 unless ($ftp->cwd($_)) { 282 return (undef, new HTTP::Response &HTTP::Status::RC_NOT_FOUND, 283 "Can't chdir to $_"); 284 } 285 } 286 287 if ($method eq 'GET' || $method eq 'HEAD') { 288 # new in ftp.pm,v 1.23 (fixed in ftp.pm,v 1.24) 289 LWP::Debug::debug("MDTM"); 290 if (my $mod_time = $ftp->mdtm($remote_file)) { 291 $response->last_modified($mod_time); 292 if (my $ims = $request->if_modified_since) { 293 if ($mod_time <= $ims) { 294 $response->code(&HTTP::Status::RC_NOT_MODIFIED); 295 $response->message("Not modified"); 296 return (undef, $response); 297 } 298 } 299 } 300 # end_of_new_stuff 301 302 ################# 303 # new in LWP 5.60 304 305 # We'll use this later to abort the transfer if necessary. 306 # if $max_size is defined, we need to abort early. Otherwise, it's 307 # a normal transfer 308 my $max_size = undef; 309 310 # Set resume location, if the client requested it 311 if ($request->header('Range') && $ftp->supported('REST')) 312 { 313 my $range_info = $request->header('Range'); 314 315 # Change bytes=2772992-6781209 to just 2772992 316 my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)/; 317 318 if (!defined $start_byte || !defined $end_byte || 319 ($start_byte < 0) || ($start_byte > $end_byte) || ($end_byte < 0)) 320 { 321 return (undef, HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST, 322 'Incorrect syntax for Range request')); 323 } 324 325 $max_size = $end_byte-$start_byte; 326 327 $ftp->restart($start_byte); 328 } elsif ($request->header('Range') && !$ftp->supported('REST')) { 329 return (undef,HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED, 330 "Server does not support resume.")); 331 } 332 ################ 333 334 335 my $data; # the data handle 336 LWP::Debug::debug("retrieve file?"); 337 if (length($remote_file) and $data = $ftp->retr($remote_file)) { 338 # remove reading from socket into 'read_chunk' method. 339 # just return our new $listen_socket here. 340 my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file); 341 $response->header('Content-Type', $type) if $type; 342 for (@enc) { 343 $response->push_header('Content-Encoding', $_); 344 } 345 my $mess = $ftp->message; 346 LWP::Debug::debug($mess); 347 if ($mess =~ /\((\d+)\s+bytes\)/) { 348 $response->header('Content-Length', "$1"); 349 } 350 return ($data, $response); # ML 351 } elsif (!length($remote_file) || $ftp->code == 550) { 352 # no file, the remote file is actually a directory, so cdw into directory 353 if (length($remote_file) && !$ftp->cwd($remote_file)) { 354 LWP::Debug::debug("chdir before listing failed"); 355 return (undef, new HTTP::Response &HTTP::Status::RC_NOT_FOUND, 356 "File '$remote_file' not found"); # ML 357 } 358 359 # It should now be safe to try to list the directory 360 LWP::Debug::debug("dir"); 361 my @lsl = $ftp->dir; 362 363 # Try to figure out if the user want us to convert the 364 # directory listing to HTML. 365 my @variants = 366 ( 367 ['html', 0.60, 'text/html' ], 368 ['dir', 1.00, 'text/ftp-dir-listing' ] 369 ); 370 #$HTTP::Negotiate::DEBUG=1; 371 my $prefer = HTTP::Negotiate::choose(\@variants, $request); 372 373 my $content = ''; 374 375 if (!defined($prefer)) { 376 return (undef, new HTTP::Response &HTTP::Status::RC_NOT_ACCEPTABLE, 377 "Neither HTML nor directory listing wanted"); # ML 378 } elsif ($prefer eq 'html') { 379 $response->header('Content-Type' => 'text/html'); 380 $content = "<HEAD><TITLE>File Listing</TITLE>\n"; 381 my $base = $request->url->clone; 382 my $path = $base->path; 383 $base->path("$path/") unless $path =~ m|/$|; 384 $content .= qq(<BASE HREF="$base">\n</HEAD>\n); 385 $content .= "<BODY>\n<UL>\n"; 386 for (File::Listing::parse_dir(\@lsl, 'GMT')) { 387 my($name, $type, $size, $mtime, $mode) = @$_; 388 $content .= qq( <LI> <a href="$name">$name</a>); 389 $content .= " $size bytes" if $type eq 'f'; 390 $content .= "\n"; 391 } 392 $content .= "</UL></body>\n"; 393 } else { 394 $response->header('Content-Type', 'text/ftp-dir-listing'); 395 $content = join("\n", @lsl, ''); 396 } 397 398 $response->header('Content-Length', length($content)); 399 400 if ($method ne 'HEAD') { 401 # $self->receive_once($arg, $response, $content); 402 # calling receive_once is now done in UserAgent.pm #ML 7/99 403 # here we just add the content to the response: 404 $response->content($content); 405 } 406 } else { 407 my $res = new HTTP::Response &HTTP::Status::RC_BAD_REQUEST, 408 "FTP return code " . $ftp->code; 409 $res->content_type("text/plain"); 410 $res->content($ftp->message); 411 return (undef, $res); # ML 412 } 413 } elsif ($method eq 'PUT') { 414 # method must be PUT 415 unless (length($remote_file)) { 416 return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST, 417 "Must have a file name to PUT to"); # ML 418 } 419 my $data; 420 if ($data = $ftp->stor($remote_file)) { 421 LWP::Debug::debug($ftp->message); 422 LWP::Debug::debug("$data"); 423 my $content = $request->content; 424 my $bytes = 0; 425 if (defined $content) { 426 if (ref($content) eq 'SCALAR') { 427 $bytes = $data->write($$content, length($$content)); 428 } elsif (ref($content) eq 'CODE') { 429 my($buf, $n); 430 while (length($buf = &$content)) { 431 $n = $data->write($buf, length($buf)); 432 last unless $n; 433 $bytes += $n; 434 } 435 } elsif (!ref($content)) { 436 if (defined $content && length($content)) { 437 $bytes = $data->write($content, length($content)); 438 } 439 } else { 440 die "Bad content"; 441 } 442 } 443 $data->close; 444 LWP::Debug::debug($ftp->message); 445 446 $response->code(&HTTP::Status::RC_CREATED); 447 $response->header('Content-Type', 'text/plain'); 448 $response->content("$bytes bytes stored as $remote_file on $host\n") 449 } else { 450 my $res = new HTTP::Response &HTTP::Status::RC_BAD_REQUEST, 451 "FTP return code " . $ftp->code; 452 $res->content_type("text/plain"); 453 $res->content($ftp->message); 454 return (undef, $res); # ML 455 } 456 } else { 457 return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST, 458 "Illegal method $method"); # ML 459 } 460 return (undef, $response); 461} 462 463sub read_chunk { 464 my ($self, $response, $data, $request, $arg, $size, $timeout, $entry) = @_; 465 466 my $method = $request->method; 467 if ($method ne 'HEAD') { 468 LWP::Debug::debug('reading response'); 469 470 my $buf = ""; 471 # read one chunk at a time from $socket 472 my $bytes_read; 473 # decide whether to use 'read' or 'sysread' 474 $bytes_read = $data->sysread( $buf, $size ); # IO::Socket 475 476 ## XXX find a way here to check maxsize (line 298 in LWP::Protocol::ftp) 477 ## problem: get current size of response from entry object. 478 ## trim buf-content if necessary 479 ## return undef at the end when we're done, no? 480 481 # parse data from server 482 my $retval = $self->receive($arg, $response, \$buf, $entry); 483 # A return value lower than zero means a command from our 484 # callback function. Make sure it reaches ParallelUA: 485 # return (defined($retval) and (0 > $retval) ? 486 # $retval : $bytes_read); 487 return (defined $retval? $retval : $bytes_read); 488 } 489} 490 491sub close_connection { 492 my ($self, $response, $data, $request, $ftp) = @_; 493 494 my $method = $request->method; 495 if ($method ne 'HEAD') { 496 unless ($data->close) { 497 # Something did not work too well 498 $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR); 499 $response->message("FTP close response: " . $ftp->code . 500 " " . $ftp->message); 501 } 502 } 503} 504 505sub request 506{ 507 my($self, $request, $proxy, $arg, $size, $timeout) = @_; 508 509 $size = 4096 unless $size; 510 511 LWP::Debug::trace('()'); 512 513 # handle connect already gives us our response object 514 # porting remark: ParallelUA expects this function to return 515 # ($socket, $fullpath). Luckily, the Net::FTP is a IO::Socket::INET 516 # object, so ParallelUA won't notice the difference between the 517 # $socket object returned by http.pm's "handle_connect" method, and 518 # the $ftp object returned by ftp.pm's "handle_connect" method :) 519 # As for the $fullpath parameter -- ParallelUA doesn't do anything 520 # with this value other than passing it as a second argument to 521 # the "write_request" method (well, and storing it in its entry list, 522 # in the meantime. But so who cares -- perl certainly doesn't -- if 523 # we store a string or a pointer to an object in there!). 524 my ($ftp, $response) = $self->handle_connect ($request, $proxy, $timeout); 525 526 # if its status is not "OK", then something went wrong during our 527 # call to handle_connect, and we should stop here and return the 528 # response object containing the reason for this error: 529 return $response unless $response->is_success; 530 531 # issue request (in case of error creates Error-Response) 532 my ($listen_socket, $error_response) = 533 $self->write_request ($request, $ftp, $response, $arg, $timeout); 534 535 unless ($error_response) { 536 # now we can start reading from our $listen_socket 537 while (1) { 538 last unless $self->read_chunk ($response, $listen_socket, 539 $request, $arg, $size, $timeout, $ftp); 540 } 541 $self->close_connection ($response, $listen_socket, $request, $ftp); 542 $listen_socket = undef; 543 } else { 544 $response = $error_response; 545 } 546 547 $ftp = undef; # close it (ditto) 548 $response; 549} 550 5511; 552 553__END__ 554 555# This is what RFC 1738 has to say about FTP access: 556# -------------------------------------------------- 557# 558# 3.2. FTP 559# 560# The FTP URL scheme is used to designate files and directories on 561# Internet hosts accessible using the FTP protocol (RFC959). 562# 563# A FTP URL follow the syntax described in Section 3.1. If :<port> is 564# omitted, the port defaults to 21. 565# 566# 3.2.1. FTP Name and Password 567# 568# A user name and password may be supplied; they are used in the ftp 569# "USER" and "PASS" commands after first making the connection to the 570# FTP server. If no user name or password is supplied and one is 571# requested by the FTP server, the conventions for "anonymous" FTP are 572# to be used, as follows: 573# 574# The user name "anonymous" is supplied. 575# 576# The password is supplied as the Internet e-mail address 577# of the end user accessing the resource. 578# 579# If the URL supplies a user name but no password, and the remote 580# server requests a password, the program interpreting the FTP URL 581# should request one from the user. 582# 583# 3.2.2. FTP url-path 584# 585# The url-path of a FTP URL has the following syntax: 586# 587# <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode> 588# 589# Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings 590# and <typecode> is one of the characters "a", "i", or "d". The part 591# ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be 592# empty. The whole url-path may be omitted, including the "/" 593# delimiting it from the prefix containing user, password, host, and 594# port. 595# 596# The url-path is interpreted as a series of FTP commands as follows: 597# 598# Each of the <cwd> elements is to be supplied, sequentially, as the 599# argument to a CWD (change working directory) command. 600# 601# If the typecode is "d", perform a NLST (name list) command with 602# <name> as the argument, and interpret the results as a file 603# directory listing. 604# 605# Otherwise, perform a TYPE command with <typecode> as the argument, 606# and then access the file whose name is <name> (for example, using 607# the RETR command.) 608# 609# Within a name or CWD component, the characters "/" and ";" are 610# reserved and must be encoded. The components are decoded prior to 611# their use in the FTP protocol. In particular, if the appropriate FTP 612# sequence to access a particular file requires supplying a string 613# containing a "/" as an argument to a CWD or RETR command, it is 614# necessary to encode each "/". 615# 616# For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is 617# interpreted by FTP-ing to "host.dom", logging in as "myname" 618# (prompting for a password if it is asked for), and then executing 619# "CWD /etc" and then "RETR motd". This has a different meaning from 620# <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then 621# "RETR motd"; the initial "CWD" might be executed relative to the 622# default directory for "myname". On the other hand, 623# <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null 624# argument, then "CWD etc", and then "RETR motd". 625# 626# FTP URLs may also be used for other operations; for example, it is 627# possible to update a file on a remote file server, or infer 628# information about it from the directory listings. The mechanism for 629# doing so is not spelled out here. 630# 631# 3.2.3. FTP Typecode is Optional 632# 633# The entire ;type=<typecode> part of a FTP URL is optional. If it is 634# omitted, the client program interpreting the URL must guess the 635# appropriate mode to use. In general, the data content type of a file 636# can only be guessed from the name, e.g., from the suffix of the name; 637# the appropriate type code to be used for transfer of the file can 638# then be deduced from the data content of the file. 639# 640# 3.2.4 Hierarchy 641# 642# For some file systems, the "/" used to denote the hierarchical 643# structure of the URL corresponds to the delimiter used to construct a 644# file name hierarchy, and thus, the filename will look similar to the 645# URL path. This does NOT mean that the URL is a Unix filename. 646# 647# 3.2.5. Optimization 648# 649# Clients accessing resources via FTP may employ additional heuristics 650# to optimize the interaction. For some FTP servers, for example, it 651# may be reasonable to keep the control connection open while accessing 652# multiple URLs from the same server. However, there is no common 653# hierarchical model to the FTP protocol, so if a directory change 654# command has been given, it is impossible in general to deduce what 655# sequence should be given to navigate to another directory for a 656# second retrieval, if the paths are different. The only reliable 657# algorithm is to disconnect and reestablish the control connection. 658