1 //
2 //      aegis - project change supervisor
3 //      Copyright (C) 2003-2008, 2012 Peter Miller
4 //
5 //      This program is free software; you can redistribute it and/or modify
6 //      it under the terms of the GNU General Public License as published by
7 //      the Free Software Foundation; either version 3 of the License, or
8 //      (at your option) any later version.
9 //
10 //      This program is distributed in the hope that it will be useful,
11 //      but WITHOUT ANY WARRANTY; without even the implied warranty of
12 //      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13 //      GNU General Public License for more details.
14 //
15 //      You should have received a copy of the GNU General Public License
16 //      along with this program. If not, see
17 //      <http://www.gnu.org/licenses/>.
18 //
19 
20 #include <common/ac/assert.h>
21 #include <common/ac/ctype.h>
22 #include <common/ac/curl/curl.h>
23 #include <common/ac/errno.h>
24 #include <common/ac/stdlib.h>
25 #include <common/ac/string.h>
26 #include <common/ac/time.h>
27 #include <common/ac/unistd.h>
28 
29 #include <common/error.h>
30 #include <common/format_elpsd.h>
31 #include <common/itab.h>
32 #include <common/mem.h>
33 #include <common/nstring.h>
34 #include <common/page.h>
35 #include <libaegis/input/curl.h>
36 #include <libaegis/option.h>
37 #include <libaegis/os.h>
38 #include <libaegis/sub.h>
39 #include <libaegis/url.h>
40 
41 
42 #ifdef HAVE_LIBCURL
43 
44 #define FATAL(function, reason) \
45         fatal_raw("%s: %d: " function ": %s", __FILE__, __LINE__, reason);
46 
47 
48 //
49 // If there is more than one URL open at a time, all are processed
50 // in parallel.  The multi-handle aggregates them all.
51 //
52 static CURLM *multi_handle;
53 static bool call_multi_immediate;
54 static itab_ty *stp;
55 
56 
~input_curl()57 input_curl::~input_curl()
58 {
59     //
60     // Release libcurl resources.
61     //
62     curl_multi_remove_handle(multi_handle, handle);
63     curl_easy_cleanup(handle);
64     handle = 0;
65     eof = true;
66 
67     if (progress_cleanup)
68     {
69         write(2, "\n", 1);
70         progress_cleanup = 0;
71     }
72 
73     //
74     // Release dynamic memory resources.
75     //
76     delete [] curl_buffer;
77     curl_buffer = 0;
78     curl_buffer_position = 0;
79     curl_buffer_length = 0;
80     curl_buffer_maximum = 0;
81 }
82 
83 
84 static int
progress_callback(void * p,double dt,double dc,double,double)85 progress_callback(void *p, double dt, double dc, double, double)
86 {
87     input_curl *icp = (input_curl *)p;
88     icp->progress_callback(dt, dc);
89     return 0;
90 }
91 
92 
93 //
94 // Libcurl calls this function when it receives more data.
95 //
96 static size_t
write_callback(char * data,size_t size,size_t nitems,void * p)97 write_callback(char *data, size_t size, size_t nitems, void *p)
98 {
99     input_curl *icp = (input_curl *)p;
100     size_t nbytes = size * nitems;
101     return icp->write_callback(data, nbytes);
102 }
103 
104 
input_curl(const nstring & arg)105 input_curl::input_curl(const nstring &arg) :
106     fn(arg),
107     pos(0),
108     curl_buffer(0),
109     curl_buffer_maximum(0),
110     curl_buffer_position(0),
111     curl_buffer_length(0),
112     eof(false)
113 {
114     handle = curl_easy_init();
115     if (!handle)
116         nfatal("curl_easy_init");
117 
118     CURLcode err = curl_easy_setopt(handle, CURLOPT_ERRORBUFFER, errbuf);
119     if (err)
120         FATAL("curl_easy_setopt", curl_easy_strerror(err));
121 
122 #if (LIBCURL_VERSION_NUM < 0x070b01)
123     //
124     // libcurl prior to 7.11.1 has problems handling autenticated
125     // proxy specified by http_proxy or HTTP_PROXY, so we set them
126     // manually.
127     //
128 
129     int uid;
130     int gid;
131     int umask;
132     //
133     // We need to save the user identity because the url::split method
134     // call os_become_ itself and we must issue os_become_undo and
135     // os_become to not raise a multiple permission error.
136     //
137     os_become_query(&uid, &gid, &umask);
138     os_become_undo();
139     url target_url(fn);
140     os_become(uid, gid, umask);
141     if (target_url.get_protocol() == "http")
142     {
143         char *http_proxy = getenv("http_proxy");
144         if (!http_proxy || http_proxy[0] == '\0')
145             http_proxy = getenv("HTTP_PROXY");
146         if (http_proxy && http_proxy[0] != '\0')
147         {
148             //
149             // We use the user's identity previously saved to
150             // undo/restore the process identity in order to prevent a
151             // multiple permission error from url::split.
152             //
153             os_become_undo();
154             url proxy_url(http_proxy);
155             os_become(uid, gid, umask);
156             userpass = proxy_url.get_userpass();
157             proxy = proxy_url.reassemble(true);
158             if (!userpass.empty())
159             {
160                 curl_easy_setopt
161                 (
162                     handle,
163                     CURLOPT_PROXYUSERPWD,
164                     userpass.c_str()
165                 );
166             }
167             curl_easy_setopt(handle, CURLOPT_PROXY, proxy.c_str());
168         }
169     }
170 #endif
171     err = curl_easy_setopt(handle, CURLOPT_URL, fn.c_str());
172     if (err)
173         FATAL("curl_easy_setopt", curl_easy_strerror(err));
174     err = curl_easy_setopt(handle, CURLOPT_FILE, this);
175     if (err)
176         FATAL("curl_easy_setopt", curl_easy_strerror(err));
177     err = curl_easy_setopt(handle, CURLOPT_VERBOSE, 0);
178     if (err)
179         FATAL("curl_easy_setopt", curl_easy_strerror(err));
180     err = curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ::write_callback);
181     if (err)
182         FATAL("curl_easy_setopt", curl_easy_strerror(err));
183     err = curl_easy_setopt(handle, CURLOPT_FOLLOWLOCATION, 1);
184     if (err)
185         FATAL("curl_easy_setopt", curl_easy_strerror(err));
186 
187     progress_start = 0;
188     progress_buflen = 0;
189     progress_buffer = 0;
190     progress_cleanup = 0;
191     if (option_verbose_get())
192     {
193         err = curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 0);
194         if (err)
195             FATAL("curl_easy_setopt", curl_easy_strerror(err));
196         err =
197             curl_easy_setopt
198             (
199                 handle,
200                 CURLOPT_PROGRESSFUNCTION,
201                 ::progress_callback
202             );
203         if (err)
204             FATAL("curl_easy_setopt", curl_easy_strerror(err));
205         err = curl_easy_setopt(handle, CURLOPT_PROGRESSDATA, this);
206         if (err)
207             FATAL("curl_easy_setopt", curl_easy_strerror(err));
208         time(&progress_start);
209         progress_buflen = page_width_get(80);
210         if (progress_buflen < 40)
211             progress_buflen = 40;
212         progress_buffer = new char [progress_buflen];
213     }
214 
215     if (!multi_handle)
216     {
217         multi_handle = curl_multi_init();
218         if (!multi_handle)
219             nfatal("curl_multi_init");
220     }
221     CURLMcode merr = curl_multi_add_handle(multi_handle, handle);
222     switch (merr)
223     {
224     case CURLM_CALL_MULTI_PERFORM:
225         call_multi_immediate = true;
226         break;
227 
228     case CURLM_OK:
229         break;
230 
231     default:
232         FATAL("curl_multi_add_handle", curl_multi_strerror(merr));
233     }
234 
235     //
236     // Start the fetch as soon as possible.
237     //
238     call_multi_immediate = true;
239 
240     //
241     // Build an associate table from libcurl handles to our file pointers.
242     //
243     if (!stp)
244         stp = itab_alloc();
245     itab_assign(stp, (itab_key_ty)handle, (void *)this);
246 }
247 
248 
249 static void
print_byte_count(char * buf,size_t len,double number)250 print_byte_count(char *buf, size_t len, double number)
251 {
252     if (number < 0)
253     {
254         snprintf(buf, len, "-----");
255         return;
256     }
257     // K is Kelvin, k is kilo
258     const char *units = " kMGTPEZY";
259     for (;;)
260     {
261         if (*units != ' ')
262         {
263             if (number < 10)
264             {
265                 snprintf(buf, len, "%4.2f%cB", number, *units);
266                 return;
267             }
268             if (number < 100)
269             {
270                 snprintf(buf, len, "%4.1f%cB", number, *units);
271                 return;
272             }
273         }
274         if (number < (1<<10))
275         {
276             snprintf(buf, len, "%4d%cB", (int)number, *units);
277             return;
278         }
279         number /= 1024.;
280         ++units;
281     }
282 }
283 
284 
285 void
progress_callback(double down_total,double down_current)286 input_curl::progress_callback(double down_total, double down_current)
287 {
288     if (down_current <= 0 || down_total <= 0)
289         return;
290     if (down_current >= down_total && !progress_cleanup)
291         return;
292     time_t curtim;
293     time(&curtim);
294     curtim -= progress_start;
295     char buf1[7];
296     print_byte_count(buf1, sizeof(buf1), (long)down_current);
297     char buf2[7];
298     print_byte_count(buf2, sizeof(buf2), (long)down_total);
299     double frac = (down_total <= 0) ? 0 : (down_current / down_total);
300     time_t predict = (time_t)(frac ? (0.5 + curtim / frac) : 0);
301     time_t remaining = predict - curtim;
302     char buf3[7];
303     format_elapsed(buf3, sizeof(buf3), remaining);
304 
305     memset(progress_buffer, ' ', progress_buflen);
306     memcpy(progress_buffer +  0, buf1, 6);
307     memcpy(progress_buffer +  6, " of ", 4);
308     memcpy(progress_buffer + 10, buf2, 6);
309     snprintf(progress_buffer + 17, 5, "%3d%%", (int)(100 * frac + 0.5));
310 
311     int lhs = 23;
312     int rhs = (int)(lhs + (progress_buflen - 37) * frac);
313     while (lhs < rhs)
314         progress_buffer[lhs++] = '=';
315     progress_buffer[lhs] = '>';
316 
317     memcpy(progress_buffer + progress_buflen - 11, "ETA", 3);
318     memcpy(progress_buffer + progress_buflen - 7, buf3, 6);
319     progress_buffer[progress_buflen - 1] = '\r';
320     write(2, progress_buffer, progress_buflen);
321     progress_cleanup = 1;
322 
323     if (down_current >= down_total)
324     {
325         write(2, "\n", 1);
326         progress_cleanup = 0;
327     }
328 }
329 
330 
331 size_t
write_callback(char * data,size_t nbytes)332 input_curl::write_callback(char *data, size_t nbytes)
333 {
334     //
335     // Grow the buffer if necessary.
336     //
337     // Always keep it a power of 2, because sigma(2**-n)==1, so we get
338     // O(1) behaviour.  (That +32 means we are always just 32 bytes
339     // short of a power of 2, leaving room for the malloc header, which
340     // results in a nicer malloc fit on many systems.
341     //
342     if (curl_buffer_length + nbytes > curl_buffer_maximum)
343     {
344         for (;;)
345         {
346             curl_buffer_maximum = curl_buffer_maximum * 2 + 32;
347             if (curl_buffer_length + nbytes <= curl_buffer_maximum)
348                 break;
349         }
350         char *new_curl_buffer = new char [curl_buffer_maximum];
351         memcpy(new_curl_buffer, curl_buffer, curl_buffer_length);
352         delete [] curl_buffer;
353         curl_buffer = new_curl_buffer;
354     }
355 
356     //
357     // Copy the data into the buffer.
358     //
359     memcpy(curl_buffer + curl_buffer_length, data, nbytes);
360     curl_buffer_length += nbytes;
361 
362     //
363     // A negative return will stop the transfer for this stream.
364     //
365     return nbytes;
366 }
367 
368 
369 static input_curl *
handle_to_fp(CURL * handle)370 handle_to_fp(CURL *handle)
371 {
372     assert(stp);
373     input_curl *result = (input_curl *)itab_query(stp, (itab_key_ty)handle);
374     if (!result || !result->verify_handle(handle))
375     {
376         fatal_raw
377         (
378             "%s: %d: handle %p gave file %p",
379             __FILE__,
380             __LINE__,
381             (void *)handle,
382             (void *)result
383         );
384     }
385     return result;
386 }
387 
388 
389 /**
390   * The perform function is a wrapper around the curl_multi_perform
391   * function.  It checks for messages that may be waiting, waits in
392   * select if necessary, and calls curl_multi_perform eventually.
393   *
394   * It is expected that this function will be repeatedly called from a
395   * tight loop, so it doesn't loop itself.
396   */
397 
398 static void
perform(void)399 perform(void)
400 {
401     //
402     // See if there are any messages waiting.
403     // These tell us about errors, and completed transfers.
404     //
405     for (;;)
406     {
407         int msgs = 0;
408         CURLMsg *msg = curl_multi_info_read(multi_handle, &msgs);
409         if (!msg)
410             break;
411         if (msg->msg == CURLMSG_NONE)
412             break;
413         if (msg->msg != CURLMSG_DONE)
414             fatal_raw("curl_multi_info_read -> %d (bug)", msg->msg);
415         input_curl *fp = handle_to_fp(msg->easy_handle);
416         if (msg->data.result == 0)
417         {
418             // transfer over, no error
419             fp->eof_notify();
420         }
421         else
422         {
423             fp->read_error();
424         }
425     }
426 
427     //
428     // Look for more to happen.
429     //
430     if (call_multi_immediate)
431     {
432         call_multi_immediate = false;
433         for (;;)
434         {
435             int num_xfer = 0;
436             CURLMcode ret = curl_multi_perform(multi_handle, &num_xfer);
437             switch (ret)
438             {
439             case CURLM_CALL_MULTI_PERFORM:
440                 call_multi_immediate = true;
441                 return;
442 
443             case CURLM_OK:
444                 return;
445 
446             default:
447                 error_raw
448                 (
449                     "%s: %d: curl_multi_perform: %s",
450                     __FILE__,
451                     __LINE__,
452                     curl_multi_strerror(ret)
453                 );
454             }
455         }
456     }
457     else
458     {
459         fd_set fdread;
460         FD_ZERO(&fdread);
461         fd_set fdwrite;
462         FD_ZERO(&fdwrite);
463         fd_set fdexcep;
464         FD_ZERO(&fdexcep);
465 
466         // get file descriptors from the transfers
467         int maxfd = 0;
468         CURLcode err =
469             (CURLcode)
470             curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);
471         if (err)
472             FATAL("curl_multi_fdset", curl_easy_strerror(err));
473 
474         if (maxfd >= 0)
475         {
476             // set a suitable timeout to fail on
477             struct timeval timeout;
478             timeout.tv_sec = 60; // 1 minute
479             timeout.tv_usec = 0;
480 
481             int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
482             if (rc < 0)
483             {
484                 if (errno != EINTR)
485                 {
486                     nfatal
487                     (
488                         "%s: %d: select: %s",
489                         __FILE__,
490                         __LINE__,
491                         strerror(errno)
492                     );
493                     // NOTREACHED
494                 }
495             }
496             if (rc > 0)
497             {
498                 //
499                 // Some sockets are ready.
500                 //
501                 call_multi_immediate = true;
502             }
503         }
504     }
505 }
506 
507 
508 void
read_error()509 input_curl::read_error()
510 {
511     sub_context_ty sc;
512     sc.var_set_string("File_Name", fn);
513     sc.var_set_charstar("ERRNO", errbuf);
514     sc.var_override("ERRNO");
515     sc.fatal_intl(i18n("read $filename: $errno"));
516     // NOTREACHED
517 }
518 
519 
520 /**
521   * The read_data function is used to read data into the data buffer provided.
522   * Returns the number of bytes read.
523   */
524 
525 long
read_data(void * data,size_t nbytes)526 input_curl::read_data(void *data, size_t nbytes)
527 {
528     //
529     // attempt to fill buffer
530     //
531     while (!eof && curl_buffer_position + nbytes > curl_buffer_length)
532         perform();
533 
534     //
535     // Extract as much data as possible from the buffer.
536     //
537     size_t size_of_buffer = curl_buffer_length - curl_buffer_position;
538     if (nbytes > size_of_buffer)
539         nbytes = size_of_buffer;
540     memcpy(data, curl_buffer + curl_buffer_position, nbytes);
541     curl_buffer_position += nbytes;
542 
543     //
544     // Rearrange the buffer so that it does not grow forever.
545     //
546     size_of_buffer = curl_buffer_length - curl_buffer_position;
547     if (size_of_buffer == 0)
548     {
549         curl_buffer_position = 0;
550         curl_buffer_length = 0;
551     }
552     else if (size_of_buffer <= curl_buffer_position)
553     {
554         // can shuffle the data down easily
555         memcpy(curl_buffer, curl_buffer + curl_buffer_position, size_of_buffer);
556         curl_buffer_position = 0;
557         curl_buffer_length = size_of_buffer;
558     }
559 
560     if (nbytes == 0 && progress_cleanup)
561     {
562         write(2, "\n", 1);
563         progress_cleanup = 0;
564     }
565 
566     //
567     // Return the number of bytes read.
568     //
569     return nbytes;
570 }
571 
572 
573 ssize_t
read_inner(void * data,size_t len)574 input_curl::read_inner(void *data, size_t len)
575 {
576     os_become_must_be_active();
577 
578     long result = read_data(data, len);
579     assert(result >= 0);
580 
581     pos += result;
582     return result;
583 }
584 
585 
586 off_t
ftell_inner()587 input_curl::ftell_inner()
588 {
589     return pos;
590 }
591 
592 
593 nstring
name()594 input_curl::name()
595 {
596     return fn;
597 }
598 
599 
600 off_t
length()601 input_curl::length()
602 {
603     // Maybe there was a Content-Length header?
604     return -1;
605 }
606 
607 
608 #else
609 
610 
~input_curl()611 input_curl::~input_curl()
612 {
613 }
614 
615 
input_curl(const nstring & arg)616 input_curl::input_curl(const nstring &arg) :
617     fn(arg)
618 {
619     sub_context_ty sc;
620     sc.var_set_string("FileLine", fn);
621     sc.fatal_intl(i18n("open $filename: no curl library"));
622 }
623 
624 
625 ssize_t
read_inner(void *,size_t)626 input_curl::read_inner(void *, size_t)
627 {
628     return 0;
629 }
630 
631 
632 long
ftell_inner()633 input_curl::ftell_inner()
634 {
635     return 0;
636 }
637 
638 
639 nstring
name()640 input_curl::name()
641 {
642     return fn;
643 }
644 
645 
646 long
length()647 input_curl::length()
648 {
649     return -1;
650 }
651 
652 #endif // HAVE_LIBCURL
653 
654 
655 bool
looks_likely(const nstring & file_name)656 input_curl::looks_likely(const nstring &file_name)
657 {
658     const char *cp = file_name.c_str();
659     if (!isalpha((unsigned char)*cp))
660         return 0;
661     for (;;)
662     {
663         ++cp;
664         if (!isalpha((unsigned char)*cp))
665             break;
666     }
667     return (cp[0] == ':' && cp[1] != '\0');
668 }
669 
670 
671 bool
is_remote() const672 input_curl::is_remote()
673     const
674 {
675     return true;
676 }
677 
678 
679 // vim: set ts=8 sw=4 et :
680