1# -*- perl -*-
2# $Id: ftp.pm,v 1.11 2003/02/19 14:58:37 langhein Exp $
3# derived from: ftp.pm,v 1.31 2001/10/26 20:13:20 gisle Exp
4
5# Implementation of the ftp protocol (RFC 959). We let the Net::FTP
6# package do all the dirty work.
7
8package LWP::Parallel::Protocol::ftp;
9
10use Carp ();
11
12use HTTP::Status ();
13use HTTP::Negotiate ();
14use HTTP::Response ();
15use LWP::MediaTypes ();
16use File::Listing ();
17
18require LWP::Parallel::Protocol;
19require LWP::Protocol::ftp;
20@ISA = qw(LWP::Parallel::Protocol LWP::Protocol::ftp);
21
22use strict;
23
24eval {
25    package LWP::Parallel::Protocol::MyFTP;
26
27    require Net::FTP;
28    Net::FTP->require_version(2.00);
29
30    use vars qw(@ISA);
31    @ISA=qw(Net::FTP);
32
33    sub new {
34	my $class = shift;
35	LWP::Debug::trace('()');
36
37	my $self = $class->SUPER::new(@_) || return undef;
38
39	my $mess = $self->message;  # welcome message
40	LWP::Debug::debug($mess);
41	$mess =~ s|\n.*||s; # only first line left
42	$mess =~ s|\s*ready\.?$||;
43	# Make the version number more HTTP like
44	$mess =~ s|\s*\(Version\s*|/| and $mess =~ s|\)$||;
45	${*$self}{myftp_server} = $mess;
46	#$response->header("Server", $mess);
47
48	$self;
49    }
50
51    sub http_server {
52	my $self = shift;
53	${*$self}{myftp_server};
54    }
55
56    sub home {
57	my $self = shift;
58	my $old = ${*$self}{myftp_home};
59	if (@_) {
60	    ${*$self}{myftp_home} = shift;
61	}
62	$old;
63    }
64
65    sub go_home {
66	LWP::Debug::trace('');
67	my $self = shift;
68	$self->cwd(${*$self}{myftp_home});
69    }
70
71    sub request_count {
72	my $self = shift;
73	++${*$self}{myftp_reqcount};
74    }
75
76    sub ping {
77	LWP::Debug::trace('');
78	my $self = shift;
79	return $self->go_home;
80    }
81
82};
83my $init_failed = $@;
84
85=item ($socket, $second_arg) = $prot->handle_connect ($req, $proxy, $timeout);
86
87This method connects with the server on the machine and port specified
88in the $req object. If a $proxy is given, it will return an error,
89since the FTP protocol does not allow proxying. (See below on how such
90an error is propagated to the caller).
91
92If successful, the first argument will contain the IO::Socket object
93that connects to the specified site. The second argument is empty (for
94ftp, that is. See LWP::Protocol::http for different usage).
95
96If the connection fails, $socket is set to 'undef', and the second
97argument contains a HTTP::Response object holding a textual
98representation of the error. (You can use its 'code' and 'message'
99methods to find out what went wrong)
100
101=cut
102
103sub handle_connect {
104  my ($self, $request, $proxy, $timeout) = @_;
105
106  # mostly directly copied from the original Protocol::ftp, changes
107  # are marked with "# ML" comment (mostly return values)
108
109  # check proxy
110  if (defined $proxy)
111    {
112      return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
113	      'You can not proxy through the ftp'); # ML
114    }
115
116  my $url = $request->url;
117  if ($url->scheme ne 'ftp') {
118    my $scheme = $url->scheme;
119    return (undef, new HTTP::Response &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
120	    "LWP::Protocol::ftp::request called for '$scheme'"); # ML
121  }
122
123  # check method
124  my $method = $request->method;
125
126  unless ($method eq 'GET' || $method eq 'HEAD' || $method eq 'PUT') {
127    return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
128	    'Library does not allow method ' .
129	    "$method for 'ftp:' URLs"); # ML
130  }
131
132  if ($init_failed) {
133    return (undef, new HTTP::Response &HTTP::Status::RC_INTERNAL_SERVER_ERROR,
134	    $init_failed); # ML
135  }
136
137  my $host     = $url->host;
138  my $port     = $url->port;
139  my $user     = $url->user;
140  # taken out some additional variable declarations here, that are now
141  # only needed in 'write_request' method.
142
143  #################
144  # new in LWP 5.60
145  my $account = $request->header('Account'); # ML
146
147  my $key;
148  my $conn_cache = $self->{ua}{conn_cache};
149  if ($conn_cache) {
150	$key = "$host:$port:$user";
151	$key .= ":$account" if defined($account);
152	if (my $ftp = $conn_cache->withdraw("ftp", $key)) {
153	    if ($ftp->ping) {
154		LWP::Debug::debug('Reusing old connection');
155		# save it again
156		$conn_cache->deposit("ftp", $key, $ftp);
157                # added $response object # ML
158                my $response =
159                  HTTP::Response->new(&HTTP::Status::RC_OK, "Document follows");
160		return ($ftp, $response);
161	    }
162	}
163  }
164
165  # try to make a connection
166  my $ftp = LWP::Parallel::Protocol::MyFTP->new($host,
167					Port => $port,
168					Timeout => $timeout,
169				       );
170  # XXX Should be some what to pass on 'Passive' (header??)
171  #################
172
173  my $response;
174  unless ($ftp) {
175    $@ =~ s/^Net::FTP: //; # new in LWP 5.60
176    $response = HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR,$@);
177  } else {
178    # Create an initial response object
179    $response = HTTP::Response->new(&HTTP::Status::RC_OK, "Document follows");
180    #################
181    # new in LWP 5.60
182    $response->header(Server => $ftp->http_server);
183    $response->header('Client-Request-Num' => $ftp->request_count);
184    #################
185    $response->request($request);
186  }
187
188  return ($ftp, $response); # ML
189}
190
191sub write_request {
192  my ($self, $request, $ftp, $response, $arg, $timeout) = @_;
193
194  # Some of the following variable declarations, directly copied from
195  # the original Protocol::ftp module, appear both in 'handle_connect'
196  # _and_ 'write_request' method. Although it introduces additional
197  # overhead, we can't pass additional variables between those two
198  # methods, but we need some of the values in both routines.  We
199  # allow the account to be specified in the "Account" header
200  my $account  = $request->header('Account');
201
202  my $url      = $request->url;
203  my $host     = $url->host;
204  my $port     = $url->port;
205  my $user     = $url->user;
206  my $password = $url->password;
207
208  # If a basic autorization header is present than we prefer these over
209  # the username/password specified in the URL.
210  {
211    my($u,$p) = $request->authorization_basic;
212    if (defined $u) {
213      $user = $u;
214      $password = $p;
215    }
216  }
217
218  my $method = $request->method;
219
220  # from here on mostly directly clipped from the original
221  # Protocol::ftp. Changes are marked with "# ML" comment
222
223  # from here on it seems FTP will handle timeouts, right? # ML
224  $ftp->timeout($timeout) if $timeout;
225
226  LWP::Debug::debug("Logging in as $user (password $password)...");
227  unless ($ftp->login($user, $password, $account)) {
228    # Unauthorized.  Let's fake a RC_UNAUTHORIZED response
229    my $mess = scalar($ftp->message);
230    LWP::Debug::debug($mess);
231    $mess =~ s/\n$//;
232    my $res = HTTP::Response->new(&HTTP::Status::RC_UNAUTHORIZED, $mess);
233    $res->header("Server", $ftp->http_server);
234    $res->header("WWW-Authenticate", qq(Basic Realm="FTP login"));
235    return (undef, $res); # ML
236  }
237  LWP::Debug::debug($ftp->message);
238
239  #################
240  # new in LWP 5.60
241  my $home = $ftp->pwd;
242  LWP::Debug::debug("home: '$home'");
243  $ftp->home($home);
244
245  # ML
246  my $key;
247  $key = "$host:$port:$user";
248  $key .= ":$account" if defined($account);
249  #
250
251  my $conn_cache = $self->{ua}{conn_cache};
252  $conn_cache->deposit("ftp", $key, $ftp) if $conn_cache;
253  #################
254
255  # Get & fix the path
256  my @path =  $url->path_segments;
257  # removed in LWP 5.48
258  #shift(@path);           # There will always be an empty first component
259  #pop(@path) while @path && $path[-1] eq ''; # remove empty tailing comps
260
261  my $remote_file = pop(@path);
262  $remote_file = '' unless defined $remote_file;
263
264  my $type;
265   if (ref $remote_file) {
266       my @params;
267       ($remote_file, @params) = @$remote_file;
268       for (@params) {
269           $type = $_ if s/^type=//;
270       }
271  }
272
273  if ($type && $type eq 'a') {
274      $ftp->ascii;
275  } else {
276      $ftp->binary;
277  }
278
279  for (@path) {
280    LWP::Debug::debug("CWD $_");
281    unless ($ftp->cwd($_)) {
282      return (undef, new HTTP::Response &HTTP::Status::RC_NOT_FOUND,
283	      "Can't chdir to $_");
284    }
285  }
286
287  if ($method eq 'GET' || $method eq 'HEAD') {
288    # new in ftp.pm,v 1.23 (fixed in ftp.pm,v 1.24)
289    LWP::Debug::debug("MDTM");
290    if (my $mod_time = $ftp->mdtm($remote_file)) {
291      $response->last_modified($mod_time);
292      if (my $ims = $request->if_modified_since) {
293	if ($mod_time <= $ims) {
294	  $response->code(&HTTP::Status::RC_NOT_MODIFIED);
295	  $response->message("Not modified");
296	  return (undef, $response);
297	}
298      }
299    }
300    # end_of_new_stuff
301
302    #################
303    # new in LWP 5.60
304
305    # We'll use this later to abort the transfer if necessary.
306    # if $max_size is defined, we need to abort early. Otherwise, it's
307    # a normal transfer
308    my $max_size = undef;
309
310    # Set resume location, if the client requested it
311    if ($request->header('Range') && $ftp->supported('REST'))
312    {
313	my $range_info = $request->header('Range');
314
315	# Change bytes=2772992-6781209 to just 2772992
316	my ($start_byte,$end_byte) = $range_info =~ /.*=\s*(\d+)-(\d+)/;
317
318	if (!defined $start_byte || !defined $end_byte ||
319	  ($start_byte < 0) || ($start_byte > $end_byte) || ($end_byte < 0))
320	{
321	  return (undef, HTTP::Response->new(&HTTP::Status::RC_BAD_REQUEST,
322	     'Incorrect syntax for Range request'));
323	}
324
325	$max_size = $end_byte-$start_byte;
326
327	$ftp->restart($start_byte);
328    } elsif ($request->header('Range') && !$ftp->supported('REST')) {
329	return (undef,HTTP::Response->new(&HTTP::Status::RC_NOT_IMPLEMENTED,
330         "Server does not support resume."));
331    }
332    ################
333
334
335    my $data;			# the data handle
336    LWP::Debug::debug("retrieve file?");
337    if (length($remote_file) and $data = $ftp->retr($remote_file)) {
338      # remove reading from socket into 'read_chunk' method.
339      # just return our new $listen_socket here.
340      my($type, @enc) = LWP::MediaTypes::guess_media_type($remote_file);
341      $response->header('Content-Type',   $type) if $type;
342      for (@enc) {
343	$response->push_header('Content-Encoding', $_);
344      }
345      my $mess = $ftp->message;
346      LWP::Debug::debug($mess);
347      if ($mess =~ /\((\d+)\s+bytes\)/) {
348	$response->header('Content-Length', "$1");
349      }
350      return ($data, $response);	# ML
351    } elsif (!length($remote_file) || $ftp->code == 550) {
352      # no file, the remote file is actually a directory, so cdw into directory
353      if (length($remote_file) && !$ftp->cwd($remote_file)) {
354	LWP::Debug::debug("chdir before listing failed");
355	return (undef, new HTTP::Response &HTTP::Status::RC_NOT_FOUND,
356		"File '$remote_file' not found"); # ML
357      }
358
359      # It should now be safe to try to list the directory
360      LWP::Debug::debug("dir");
361      my @lsl = $ftp->dir;
362
363      # Try to figure out if the user want us to convert the
364      # directory listing to HTML.
365      my @variants =
366	(
367	 ['html',  0.60, 'text/html'            ],
368	 ['dir',   1.00, 'text/ftp-dir-listing' ]
369	);
370      #$HTTP::Negotiate::DEBUG=1;
371      my $prefer = HTTP::Negotiate::choose(\@variants, $request);
372
373      my $content = '';
374
375      if (!defined($prefer)) {
376	return (undef, new HTTP::Response &HTTP::Status::RC_NOT_ACCEPTABLE,
377		"Neither HTML nor directory listing wanted"); # ML
378      } elsif ($prefer eq 'html') {
379	$response->header('Content-Type' => 'text/html');
380	$content = "<HEAD><TITLE>File Listing</TITLE>\n";
381	my $base = $request->url->clone;
382	my $path = $base->path;
383	$base->path("$path/") unless $path =~ m|/$|;
384	$content .= qq(<BASE HREF="$base">\n</HEAD>\n);
385	$content .= "<BODY>\n<UL>\n";
386	for (File::Listing::parse_dir(\@lsl, 'GMT')) {
387	  my($name, $type, $size, $mtime, $mode) = @$_;
388	  $content .= qq(  <LI> <a href="$name">$name</a>);
389	  $content .= " $size bytes" if $type eq 'f';
390	  $content .= "\n";
391	}
392	$content .= "</UL></body>\n";
393      } else {
394	$response->header('Content-Type', 'text/ftp-dir-listing');
395	$content = join("\n", @lsl, '');
396      }
397
398      $response->header('Content-Length', length($content));
399
400      if ($method ne 'HEAD') {
401	# $self->receive_once($arg, $response, $content);
402        # calling receive_once is now done in UserAgent.pm #ML 7/99
403	# here we just add the content to the response:
404	$response->content($content);
405      }
406    } else {
407      my $res = new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
408      "FTP return code " . $ftp->code;
409      $res->content_type("text/plain");
410      $res->content($ftp->message);
411      return (undef, $res); # ML
412    }
413  } elsif ($method eq 'PUT') {
414    # method must be PUT
415    unless (length($remote_file)) {
416      return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
417	      "Must have a file name to PUT to"); # ML
418    }
419    my $data;
420    if ($data = $ftp->stor($remote_file)) {
421      LWP::Debug::debug($ftp->message);
422      LWP::Debug::debug("$data");
423      my $content = $request->content;
424      my $bytes = 0;
425      if (defined $content) {
426	if (ref($content) eq 'SCALAR') {
427	  $bytes = $data->write($$content, length($$content));
428	} elsif (ref($content) eq 'CODE') {
429	  my($buf, $n);
430	  while (length($buf = &$content)) {
431	    $n = $data->write($buf, length($buf));
432	    last unless $n;
433	    $bytes += $n;
434	  }
435	} elsif (!ref($content)) {
436	  if (defined $content && length($content)) {
437	    $bytes = $data->write($content, length($content));
438	  }
439	} else {
440	  die "Bad content";
441	}
442      }
443      $data->close;
444      LWP::Debug::debug($ftp->message);
445
446      $response->code(&HTTP::Status::RC_CREATED);
447      $response->header('Content-Type', 'text/plain');
448      $response->content("$bytes bytes stored as $remote_file on $host\n")
449    } else {
450      my $res = new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
451      "FTP return code " . $ftp->code;
452      $res->content_type("text/plain");
453      $res->content($ftp->message);
454      return (undef, $res);	# ML
455    }
456  } else {
457    return (undef, new HTTP::Response &HTTP::Status::RC_BAD_REQUEST,
458	    "Illegal method $method"); # ML
459  }
460  return (undef, $response);
461}
462
463sub read_chunk {
464  my ($self, $response, $data, $request, $arg, $size, $timeout, $entry) = @_;
465
466  my $method = $request->method;
467  if ($method ne 'HEAD') {
468    LWP::Debug::debug('reading response');
469
470    my $buf = "";
471    # read one chunk at a time from $socket
472    my $bytes_read;
473    # decide whether to use 'read' or 'sysread'
474    $bytes_read = $data->sysread( $buf, $size );	# IO::Socket
475
476    ## XXX find a way here to check maxsize (line 298 in LWP::Protocol::ftp)
477    ## problem: get current size of response from entry object.
478    ## trim buf-content if necessary
479    ## return undef at the end when we're done, no?
480
481    # parse data from server
482    my $retval = $self->receive($arg, $response, \$buf, $entry);
483    # A return value lower than zero means a command from our
484    # callback function. Make sure it reaches ParallelUA:
485    #	return (defined($retval) and (0 > $retval) ?
486    #		$retval : $bytes_read);
487    return (defined $retval? $retval : $bytes_read);
488  }
489}
490
491sub close_connection {
492  my ($self, $response, $data, $request, $ftp) = @_;
493
494  my $method = $request->method;
495  if ($method ne 'HEAD') {
496    unless ($data->close) {
497      # Something did not work too well
498      $response->code(&HTTP::Status::RC_INTERNAL_SERVER_ERROR);
499      $response->message("FTP close response: " . $ftp->code .
500			 " " . $ftp->message);
501    }
502  }
503}
504
505sub request
506{
507  my($self, $request, $proxy, $arg, $size, $timeout) = @_;
508
509  $size = 4096 unless $size;
510
511  LWP::Debug::trace('()');
512
513  # handle connect already gives us our response object
514  # porting remark: ParallelUA expects this function to return
515  # ($socket, $fullpath). Luckily, the Net::FTP is a IO::Socket::INET
516  # object, so ParallelUA won't notice the difference between the
517  # $socket object returned by http.pm's "handle_connect" method, and
518  # the $ftp object returned by ftp.pm's "handle_connect" method :)
519  # As for the $fullpath parameter -- ParallelUA doesn't do anything
520  # with this value other than passing it as a second argument to
521  # the "write_request" method (well, and storing it in its entry list,
522  # in the meantime. But so who cares -- perl certainly doesn't -- if
523  # we store a string or a pointer to an object in there!).
524  my ($ftp, $response) = $self->handle_connect ($request, $proxy, $timeout);
525
526  # if its status is not "OK", then something went wrong during our
527  # call to handle_connect, and we should stop here and return the
528  # response object containing the reason for this error:
529  return $response unless $response->is_success;
530
531  # issue request (in case of error creates Error-Response)
532  my ($listen_socket, $error_response) =
533	$self->write_request ($request, $ftp, $response, $arg, $timeout);
534
535  unless ($error_response) {
536    # now we can start reading from our $listen_socket
537    while (1) {
538      last unless $self->read_chunk ($response, $listen_socket,
539				     $request, $arg, $size, $timeout, $ftp);
540    }
541    $self->close_connection ($response, $listen_socket, $request, $ftp);
542    $listen_socket = undef;
543  } else {
544    $response = $error_response;
545  }
546
547  $ftp = undef;  # close it (ditto)
548  $response;
549}
550
5511;
552
553__END__
554
555# This is what RFC 1738 has to say about FTP access:
556# --------------------------------------------------
557#
558# 3.2. FTP
559#
560#    The FTP URL scheme is used to designate files and directories on
561#    Internet hosts accessible using the FTP protocol (RFC959).
562#
563#    A FTP URL follow the syntax described in Section 3.1.  If :<port> is
564#    omitted, the port defaults to 21.
565#
566# 3.2.1. FTP Name and Password
567#
568#    A user name and password may be supplied; they are used in the ftp
569#    "USER" and "PASS" commands after first making the connection to the
570#    FTP server.  If no user name or password is supplied and one is
571#    requested by the FTP server, the conventions for "anonymous" FTP are
572#    to be used, as follows:
573#
574#         The user name "anonymous" is supplied.
575#
576#         The password is supplied as the Internet e-mail address
577#         of the end user accessing the resource.
578#
579#    If the URL supplies a user name but no password, and the remote
580#    server requests a password, the program interpreting the FTP URL
581#    should request one from the user.
582#
583# 3.2.2. FTP url-path
584#
585#    The url-path of a FTP URL has the following syntax:
586#
587#         <cwd1>/<cwd2>/.../<cwdN>/<name>;type=<typecode>
588#
589#    Where <cwd1> through <cwdN> and <name> are (possibly encoded) strings
590#    and <typecode> is one of the characters "a", "i", or "d".  The part
591#    ";type=<typecode>" may be omitted. The <cwdx> and <name> parts may be
592#    empty. The whole url-path may be omitted, including the "/"
593#    delimiting it from the prefix containing user, password, host, and
594#    port.
595#
596#    The url-path is interpreted as a series of FTP commands as follows:
597#
598#       Each of the <cwd> elements is to be supplied, sequentially, as the
599#       argument to a CWD (change working directory) command.
600#
601#       If the typecode is "d", perform a NLST (name list) command with
602#       <name> as the argument, and interpret the results as a file
603#       directory listing.
604#
605#       Otherwise, perform a TYPE command with <typecode> as the argument,
606#       and then access the file whose name is <name> (for example, using
607#       the RETR command.)
608#
609#    Within a name or CWD component, the characters "/" and ";" are
610#    reserved and must be encoded. The components are decoded prior to
611#    their use in the FTP protocol.  In particular, if the appropriate FTP
612#    sequence to access a particular file requires supplying a string
613#    containing a "/" as an argument to a CWD or RETR command, it is
614#    necessary to encode each "/".
615#
616#    For example, the URL <URL:ftp://myname@host.dom/%2Fetc/motd> is
617#    interpreted by FTP-ing to "host.dom", logging in as "myname"
618#    (prompting for a password if it is asked for), and then executing
619#    "CWD /etc" and then "RETR motd". This has a different meaning from
620#    <URL:ftp://myname@host.dom/etc/motd> which would "CWD etc" and then
621#    "RETR motd"; the initial "CWD" might be executed relative to the
622#    default directory for "myname". On the other hand,
623#    <URL:ftp://myname@host.dom//etc/motd>, would "CWD " with a null
624#    argument, then "CWD etc", and then "RETR motd".
625#
626#    FTP URLs may also be used for other operations; for example, it is
627#    possible to update a file on a remote file server, or infer
628#    information about it from the directory listings. The mechanism for
629#    doing so is not spelled out here.
630#
631# 3.2.3. FTP Typecode is Optional
632#
633#    The entire ;type=<typecode> part of a FTP URL is optional. If it is
634#    omitted, the client program interpreting the URL must guess the
635#    appropriate mode to use. In general, the data content type of a file
636#    can only be guessed from the name, e.g., from the suffix of the name;
637#    the appropriate type code to be used for transfer of the file can
638#    then be deduced from the data content of the file.
639#
640# 3.2.4 Hierarchy
641#
642#    For some file systems, the "/" used to denote the hierarchical
643#    structure of the URL corresponds to the delimiter used to construct a
644#    file name hierarchy, and thus, the filename will look similar to the
645#    URL path. This does NOT mean that the URL is a Unix filename.
646#
647# 3.2.5. Optimization
648#
649#    Clients accessing resources via FTP may employ additional heuristics
650#    to optimize the interaction. For some FTP servers, for example, it
651#    may be reasonable to keep the control connection open while accessing
652#    multiple URLs from the same server. However, there is no common
653#    hierarchical model to the FTP protocol, so if a directory change
654#    command has been given, it is impossible in general to deduce what
655#    sequence should be given to navigate to another directory for a
656#    second retrieval, if the paths are different.  The only reliable
657#    algorithm is to disconnect and reestablish the control connection.
658