1 /***************************************************************************
2  *                                  _   _ ____  _
3  *  Project                     ___| | | |  _ \| |
4  *                             / __| | | | |_) | |
5  *                            | (__| |_| |  _ <| |___
6  *                             \___|\___/|_| \_\_____|
7  *
8  * Copyright (C) 1998 - 2009, Daniel Stenberg, <daniel@haxx.se>, et al.
9  *
10  * This software is licensed as described in the file COPYING, which
11  * you should have received as part of this distribution. The terms
12  * are also available at http://curl.haxx.se/docs/copyright.html.
13  *
14  * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15  * copies of the Software, and permit persons to whom the Software is
16  * furnished to do so, under the terms of the COPYING file.
17  *
18  * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19  * KIND, either express or implied.
20  *
21  * $Id: file.c,v 1.119 2009-02-03 22:28:41 bagder Exp $
22  ***************************************************************************/
23 
24 #include "setup.h"
25 
26 #ifndef CURL_DISABLE_FILE
27 /* -- WIN32 approved -- */
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdarg.h>
31 #include <stdlib.h>
32 #include <ctype.h>
33 
34 #ifdef WIN32
35 #include <time.h>
36 #include <io.h>
37 #include <fcntl.h>
38 #else
39 #ifdef HAVE_SYS_SOCKET_H
40 #include <sys/socket.h>
41 #endif
42 #ifdef HAVE_NETINET_IN_H
43 #include <netinet/in.h>
44 #endif
45 #ifdef HAVE_SYS_TIME_H
46 #include <sys/time.h>
47 #endif
48 #ifdef HAVE_UNISTD_H
49 #include <unistd.h>
50 #endif
51 #ifdef HAVE_NETDB_H
52 #include <netdb.h>
53 #endif
54 #ifdef HAVE_ARPA_INET_H
55 #include <arpa/inet.h>
56 #endif
57 #ifdef HAVE_NET_IF_H
58 #include <net/if.h>
59 #endif
60 #ifdef HAVE_SYS_IOCTL_H
61 #include <sys/ioctl.h>
62 #endif
63 
64 #ifdef HAVE_SYS_PARAM_H
65 #include <sys/param.h>
66 #endif
67 
68 #ifdef HAVE_FCNTL_H
69 #include <fcntl.h>
70 #endif
71 
72 #endif /* WIN32 */
73 
74 #include "strtoofft.h"
75 #include "urldata.h"
76 #include <curl/curl.h>
77 #include "progress.h"
78 #include "sendf.h"
79 #include "escape.h"
80 #include "file.h"
81 #include "speedcheck.h"
82 #include "getinfo.h"
83 #include "transfer.h"
84 #include "url.h"
85 #include "memory.h"
86 #include "parsedate.h" /* for the week day and month names */
87 
88 #define _MPRINTF_REPLACE /* use our functions only */
89 #include <curl/mprintf.h>
90 
91 /* The last #include file should be: */
92 #include "memdebug.h"
93 
94 #if defined(WIN32) || defined(MSDOS) || defined(__EMX__) || defined(__SYMBIAN32__)
95 #define DOS_FILESYSTEM 1
96 #endif
97 
98 /*
99  * Forward declarations.
100  */
101 
102 static CURLcode file_do(struct connectdata *, bool *done);
103 static CURLcode file_done(struct connectdata *conn,
104                           CURLcode status, bool premature);
105 static CURLcode file_connect(struct connectdata *conn, bool *done);
106 
107 /*
108  * FILE scheme handler.
109  */
110 
111 const struct Curl_handler Curl_handler_file = {
112   "FILE",                               /* scheme */
113   ZERO_NULL,                            /* setup_connection */
114   file_do,                              /* do_it */
115   file_done,                            /* done */
116   ZERO_NULL,                            /* do_more */
117   file_connect,                         /* connect_it */
118   ZERO_NULL,                            /* connecting */
119   ZERO_NULL,                            /* doing */
120   ZERO_NULL,                            /* proto_getsock */
121   ZERO_NULL,                            /* doing_getsock */
122   ZERO_NULL,                            /* perform_getsock */
123   ZERO_NULL,                            /* disconnect */
124   0,                                    /* defport */
125   PROT_FILE                             /* protocol */
126 };
127 
128 
129  /*
130   Check if this is a range download, and if so, set the internal variables
131   properly. This code is copied from the FTP implementation and might as
132   well be factored out.
133  */
file_range(struct connectdata * conn)134 static CURLcode file_range(struct connectdata *conn)
135 {
136   curl_off_t from, to;
137   curl_off_t totalsize=-1;
138   char *ptr;
139   char *ptr2;
140   struct SessionHandle *data = conn->data;
141 
142   if(data->state.use_range && data->state.range) {
143     from=curlx_strtoofft(data->state.range, &ptr, 0);
144     while(ptr && *ptr && (isspace((int)*ptr) || (*ptr=='-')))
145       ptr++;
146     to=curlx_strtoofft(ptr, &ptr2, 0);
147     if(ptr == ptr2) {
148       /* we didn't get any digit */
149       to=-1;
150     }
151     if((-1 == to) && (from>=0)) {
152       /* X - */
153       data->state.resume_from = from;
154       DEBUGF(infof(data, "RANGE %" FORMAT_OFF_T " to end of file\n",
155                    from));
156     }
157     else if(from < 0) {
158       /* -Y */
159       totalsize = -from;
160       data->req.maxdownload = -from;
161       data->state.resume_from = from;
162       DEBUGF(infof(data, "RANGE the last %" FORMAT_OFF_T " bytes\n",
163                    totalsize));
164     }
165     else {
166       /* X-Y */
167       totalsize = to-from;
168       data->req.maxdownload = totalsize+1; /* include last byte */
169       data->state.resume_from = from;
170       DEBUGF(infof(data, "RANGE from %" FORMAT_OFF_T
171                    " getting %" FORMAT_OFF_T " bytes\n",
172                    from, data->req.maxdownload));
173     }
174     DEBUGF(infof(data, "range-download from %" FORMAT_OFF_T
175                  " to %" FORMAT_OFF_T ", totally %" FORMAT_OFF_T " bytes\n",
176                  from, to, data->req.maxdownload));
177   }
178   else
179     data->req.maxdownload = -1;
180   return CURLE_OK;
181 }
182 
183 /*
184  * file_connect() gets called from Curl_protocol_connect() to allow us to
185  * do protocol-specific actions at connect-time.  We emulate a
186  * connect-then-transfer protocol and "connect" to the file here
187  */
file_connect(struct connectdata * conn,bool * done)188 static CURLcode file_connect(struct connectdata *conn, bool *done)
189 {
190   struct SessionHandle *data = conn->data;
191   char *real_path = curl_easy_unescape(data, data->state.path, 0, NULL);
192   struct FILEPROTO *file;
193   int fd;
194 #ifdef DOS_FILESYSTEM
195   int i;
196   char *actual_path;
197 #endif
198 
199   if(!real_path)
200     return CURLE_OUT_OF_MEMORY;
201 
202   /* If there already is a protocol-specific struct allocated for this
203      sessionhandle, deal with it */
204   Curl_reset_reqproto(conn);
205 
206   if(!data->state.proto.file) {
207     file = calloc(sizeof(struct FILEPROTO), 1);
208     if(!file) {
209       free(real_path);
210       return CURLE_OUT_OF_MEMORY;
211     }
212     data->state.proto.file = file;
213   }
214   else {
215     /* file is not a protocol that can deal with "persistancy" */
216     file = data->state.proto.file;
217     Curl_safefree(file->freepath);
218     if(file->fd != -1)
219       close(file->fd);
220     file->path = NULL;
221     file->freepath = NULL;
222     file->fd = -1;
223   }
224 
225 #ifdef DOS_FILESYSTEM
226   /* If the first character is a slash, and there's
227      something that looks like a drive at the beginning of
228      the path, skip the slash.  If we remove the initial
229      slash in all cases, paths without drive letters end up
230      relative to the current directory which isn't how
231      browsers work.
232 
233      Some browsers accept | instead of : as the drive letter
234      separator, so we do too.
235 
236      On other platforms, we need the slash to indicate an
237      absolute pathname.  On Windows, absolute paths start
238      with a drive letter.
239   */
240   actual_path = real_path;
241   if((actual_path[0] == '/') &&
242       actual_path[1] &&
243       (actual_path[2] == ':' || actual_path[2] == '|'))
244   {
245     actual_path[2] = ':';
246     actual_path++;
247   }
248 
249   /* change path separators from '/' to '\\' for DOS, Windows and OS/2 */
250   for (i=0; actual_path[i] != '\0'; ++i)
251     if(actual_path[i] == '/')
252       actual_path[i] = '\\';
253 
254   fd = open(actual_path, O_RDONLY | O_BINARY);  /* no CR/LF translation! */
255   file->path = actual_path;
256 #else
257   fd = open(real_path, O_RDONLY);
258   file->path = real_path;
259 #endif
260   file->freepath = real_path; /* free this when done */
261 
262   file->fd = fd;
263   if(!data->set.upload && (fd == -1)) {
264     failf(data, "Couldn't open file %s", data->state.path);
265     file_done(conn, CURLE_FILE_COULDNT_READ_FILE, FALSE);
266     return CURLE_FILE_COULDNT_READ_FILE;
267   }
268   *done = TRUE;
269 
270   return CURLE_OK;
271 }
272 
file_done(struct connectdata * conn,CURLcode status,bool premature)273 static CURLcode file_done(struct connectdata *conn,
274                                CURLcode status, bool premature)
275 {
276   struct FILEPROTO *file = conn->data->state.proto.file;
277   (void)status; /* not used */
278   (void)premature; /* not used */
279   Curl_safefree(file->freepath);
280 
281   if(file->fd != -1)
282     close(file->fd);
283 
284   return CURLE_OK;
285 }
286 
287 #ifdef DOS_FILESYSTEM
288 #define DIRSEP '\\'
289 #else
290 #define DIRSEP '/'
291 #endif
292 
file_upload(struct connectdata * conn)293 static CURLcode file_upload(struct connectdata *conn)
294 {
295   struct FILEPROTO *file = conn->data->state.proto.file;
296   const char *dir = strchr(file->path, DIRSEP);
297   FILE *fp;
298   CURLcode res=CURLE_OK;
299   struct SessionHandle *data = conn->data;
300   char *buf = data->state.buffer;
301   size_t nread;
302   size_t nwrite;
303   curl_off_t bytecount = 0;
304   struct timeval now = Curl_tvnow();
305   struct_stat file_stat;
306   const char* buf2;
307 
308   /*
309    * Since FILE: doesn't do the full init, we need to provide some extra
310    * assignments here.
311    */
312   conn->fread_func = data->set.fread_func;
313   conn->fread_in = data->set.in;
314   conn->data->req.upload_fromhere = buf;
315 
316   if(!dir)
317     return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
318 
319   if(!dir[1])
320      return CURLE_FILE_COULDNT_READ_FILE; /* fix: better error code */
321 
322   if(data->state.resume_from)
323     fp = fopen( file->path, "ab" );
324   else {
325     int fd;
326 
327 #ifdef DOS_FILESYSTEM
328     fd = open(file->path, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY,
329               conn->data->set.new_file_perms);
330 #else
331     fd = open(file->path, O_WRONLY|O_CREAT|O_TRUNC,
332               conn->data->set.new_file_perms);
333 #endif
334     if(fd < 0) {
335       failf(data, "Can't open %s for writing", file->path);
336       return CURLE_WRITE_ERROR;
337     }
338     close(fd);
339     fp = fopen(file->path, "wb");
340   }
341 
342   if(!fp) {
343     failf(data, "Can't open %s for writing", file->path);
344     return CURLE_WRITE_ERROR;
345   }
346 
347   if(-1 != data->set.infilesize)
348     /* known size of data to "upload" */
349     Curl_pgrsSetUploadSize(data, data->set.infilesize);
350 
351   /* treat the negative resume offset value as the case of "-" */
352   if(data->state.resume_from < 0) {
353     if(fstat(fileno(fp), &file_stat)) {
354       fclose(fp);
355       failf(data, "Can't get the size of %s", file->path);
356       return CURLE_WRITE_ERROR;
357     }
358     else
359       data->state.resume_from = (curl_off_t)file_stat.st_size;
360   }
361 
362   while(res == CURLE_OK) {
363     int readcount;
364     res = Curl_fillreadbuffer(conn, BUFSIZE, &readcount);
365     if(res)
366       break;
367 
368     if(readcount <= 0)  /* fix questionable compare error. curlvms */
369       break;
370 
371     nread = (size_t)readcount;
372 
373     /*skip bytes before resume point*/
374     if(data->state.resume_from) {
375       if( (curl_off_t)nread <= data->state.resume_from ) {
376         data->state.resume_from -= nread;
377         nread = 0;
378         buf2 = buf;
379       }
380       else {
381         buf2 = buf + data->state.resume_from;
382         nread -= (size_t)data->state.resume_from;
383         data->state.resume_from = 0;
384       }
385     }
386     else
387       buf2 = buf;
388 
389     /* write the data to the target */
390     nwrite = fwrite(buf2, 1, nread, fp);
391     if(nwrite != nread) {
392       res = CURLE_SEND_ERROR;
393       break;
394     }
395 
396     bytecount += nread;
397 
398     Curl_pgrsSetUploadCounter(data, bytecount);
399 
400     if(Curl_pgrsUpdate(conn))
401       res = CURLE_ABORTED_BY_CALLBACK;
402     else
403       res = Curl_speedcheck(data, now);
404   }
405   if(!res && Curl_pgrsUpdate(conn))
406     res = CURLE_ABORTED_BY_CALLBACK;
407 
408   fclose(fp);
409 
410   return res;
411 }
412 
413 /*
414  * file_do() is the protocol-specific function for the do-phase, separated
415  * from the connect-phase above. Other protocols merely setup the transfer in
416  * the do-phase, to have it done in the main transfer loop but since some
417  * platforms we support don't allow select()ing etc on file handles (as
418  * opposed to sockets) we instead perform the whole do-operation in this
419  * function.
420  */
file_do(struct connectdata * conn,bool * done)421 static CURLcode file_do(struct connectdata *conn, bool *done)
422 {
423   /* This implementation ignores the host name in conformance with
424      RFC 1738. Only local files (reachable via the standard file system)
425      are supported. This means that files on remotely mounted directories
426      (via NFS, Samba, NT sharing) can be accessed through a file:// URL
427   */
428   CURLcode res = CURLE_OK;
429   struct_stat statbuf; /* struct_stat instead of struct stat just to allow the
430                           Windows version to have a different struct without
431                           having to redefine the simple word 'stat' */
432   curl_off_t expected_size=0;
433   bool fstated=FALSE;
434   ssize_t nread;
435   size_t bytestoread;
436   struct SessionHandle *data = conn->data;
437   char *buf = data->state.buffer;
438   curl_off_t bytecount = 0;
439   int fd;
440   struct timeval now = Curl_tvnow();
441 
442   *done = TRUE; /* unconditionally */
443 
444   Curl_initinfo(data);
445   Curl_pgrsStartNow(data);
446 
447   if(data->set.upload)
448     return file_upload(conn);
449 
450   /* get the fd from the connection phase */
451   fd = conn->data->state.proto.file->fd;
452 
453   /* VMS: This only works reliable for STREAMLF files */
454   if( -1 != fstat(fd, &statbuf)) {
455     /* we could stat it, then read out the size */
456     expected_size = statbuf.st_size;
457     /* and store the modification time */
458     data->info.filetime = (long)statbuf.st_mtime;
459     fstated = TRUE;
460   }
461 
462   /* If we have selected NOBODY and HEADER, it means that we only want file
463      information. Which for FILE can't be much more than the file size and
464      date. */
465   if(data->set.opt_no_body && data->set.include_header && fstated) {
466     CURLcode result;
467     snprintf(buf, sizeof(data->state.buffer),
468              "Content-Length: %" FORMAT_OFF_T "\r\n", expected_size);
469     result = Curl_client_write(conn, CLIENTWRITE_BOTH, buf, 0);
470     if(result)
471       return result;
472 
473     result = Curl_client_write(conn, CLIENTWRITE_BOTH,
474                                (char *)"Accept-ranges: bytes\r\n", 0);
475     if(result)
476       return result;
477 
478     if(fstated) {
479       const struct tm *tm;
480       time_t filetime = (time_t)statbuf.st_mtime;
481 #ifdef HAVE_GMTIME_R
482       struct tm buffer;
483       tm = (const struct tm *)gmtime_r(&filetime, &buffer);
484 #else
485       tm = gmtime(&filetime);
486 #endif
487       /* format: "Tue, 15 Nov 1994 12:45:26 GMT" */
488       snprintf(buf, BUFSIZE-1,
489                "Last-Modified: %s, %02d %s %4d %02d:%02d:%02d GMT\r\n",
490                Curl_wkday[tm->tm_wday?tm->tm_wday-1:6],
491                tm->tm_mday,
492                Curl_month[tm->tm_mon],
493                tm->tm_year + 1900,
494                tm->tm_hour,
495                tm->tm_min,
496                tm->tm_sec);
497       result = Curl_client_write(conn, CLIENTWRITE_BOTH, buf, 0);
498     }
499     /* if we fstat()ed the file, set the file size to make it available post-
500        transfer */
501     if(fstated)
502       Curl_pgrsSetDownloadSize(data, expected_size);
503     return result;
504   }
505 
506   /* Check whether file range has been specified */
507   file_range(conn);
508 
509   /* Adjust the start offset in case we want to get the N last bytes
510    * of the stream iff the filesize could be determined */
511   if(data->state.resume_from < 0) {
512     if(!fstated) {
513       failf(data, "Can't get the size of file.");
514       return CURLE_READ_ERROR;
515     }
516     else
517       data->state.resume_from += (curl_off_t)statbuf.st_size;
518   }
519 
520   if(data->state.resume_from <= expected_size)
521     expected_size -= data->state.resume_from;
522   else {
523     failf(data, "failed to resume file:// transfer");
524     return CURLE_BAD_DOWNLOAD_RESUME;
525   }
526 
527   /* A high water mark has been specified so we obey... */
528   if (data->req.maxdownload > 0)
529     expected_size = data->req.maxdownload;
530 
531   if(fstated && (expected_size == 0))
532     return CURLE_OK;
533 
534   /* The following is a shortcut implementation of file reading
535      this is both more efficient than the former call to download() and
536      it avoids problems with select() and recv() on file descriptors
537      in Winsock */
538   if(fstated)
539     Curl_pgrsSetDownloadSize(data, expected_size);
540 
541   if(data->state.resume_from) {
542     if(data->state.resume_from !=
543        lseek(fd, data->state.resume_from, SEEK_SET))
544       return CURLE_BAD_DOWNLOAD_RESUME;
545   }
546 
547   Curl_pgrsTime(data, TIMER_STARTTRANSFER);
548 
549   while(res == CURLE_OK) {
550     /* Don't fill a whole buffer if we want less than all data */
551     bytestoread = (expected_size < BUFSIZE-1)?(size_t)expected_size:BUFSIZE-1;
552     nread = read(fd, buf, bytestoread);
553 
554     if( nread > 0)
555       buf[nread] = 0;
556 
557     if (nread <= 0 || expected_size == 0)
558       break;
559 
560     bytecount += nread;
561     expected_size -= nread;
562 
563     res = Curl_client_write(conn, CLIENTWRITE_BODY, buf, nread);
564     if(res)
565       return res;
566 
567     Curl_pgrsSetDownloadCounter(data, bytecount);
568 
569     if(Curl_pgrsUpdate(conn))
570       res = CURLE_ABORTED_BY_CALLBACK;
571     else
572       res = Curl_speedcheck(data, now);
573   }
574   if(Curl_pgrsUpdate(conn))
575     res = CURLE_ABORTED_BY_CALLBACK;
576 
577   return res;
578 }
579 
580 #endif
581