1 //
2 // aegis - project change supervisor
3 // Copyright (C) 2003-2008, 2012 Peter Miller
4 //
5 // This program is free software; you can redistribute it and/or modify
6 // it under the terms of the GNU General Public License as published by
7 // the Free Software Foundation; either version 3 of the License, or
8 // (at your option) any later version.
9 //
10 // This program is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 // GNU General Public License for more details.
14 //
15 // You should have received a copy of the GNU General Public License
16 // along with this program. If not, see
17 // <http://www.gnu.org/licenses/>.
18 //
19
20 #include <common/ac/assert.h>
21 #include <common/ac/ctype.h>
22 #include <common/ac/curl/curl.h>
23 #include <common/ac/errno.h>
24 #include <common/ac/stdlib.h>
25 #include <common/ac/string.h>
26 #include <common/ac/time.h>
27 #include <common/ac/unistd.h>
28
29 #include <common/error.h>
30 #include <common/format_elpsd.h>
31 #include <common/itab.h>
32 #include <common/mem.h>
33 #include <common/nstring.h>
34 #include <common/page.h>
35 #include <libaegis/input/curl.h>
36 #include <libaegis/option.h>
37 #include <libaegis/os.h>
38 #include <libaegis/sub.h>
39 #include <libaegis/url.h>
40
41
42 #ifdef HAVE_LIBCURL
43
44 #define FATAL(function, reason) \
45 fatal_raw("%s: %d: " function ": %s", __FILE__, __LINE__, reason);
46
47
48 //
49 // If there is more than one URL open at a time, all are processed
50 // in parallel. The multi-handle aggregates them all.
51 //
52 static CURLM *multi_handle;
53 static bool call_multi_immediate;
54 static itab_ty *stp;
55
56
~input_curl()57 input_curl::~input_curl()
58 {
59 //
60 // Release libcurl resources.
61 //
62 curl_multi_remove_handle(multi_handle, handle);
63 curl_easy_cleanup(handle);
64 handle = 0;
65 eof = true;
66
67 if (progress_cleanup)
68 {
69 write(2, "\n", 1);
70 progress_cleanup = 0;
71 }
72
73 //
74 // Release dynamic memory resources.
75 //
76 delete [] curl_buffer;
77 curl_buffer = 0;
78 curl_buffer_position = 0;
79 curl_buffer_length = 0;
80 curl_buffer_maximum = 0;
81 }
82
83
84 static int
progress_callback(void * p,double dt,double dc,double,double)85 progress_callback(void *p, double dt, double dc, double, double)
86 {
87 input_curl *icp = (input_curl *)p;
88 icp->progress_callback(dt, dc);
89 return 0;
90 }
91
92
93 //
94 // Libcurl calls this function when it receives more data.
95 //
96 static size_t
write_callback(char * data,size_t size,size_t nitems,void * p)97 write_callback(char *data, size_t size, size_t nitems, void *p)
98 {
99 input_curl *icp = (input_curl *)p;
100 size_t nbytes = size * nitems;
101 return icp->write_callback(data, nbytes);
102 }
103
104
input_curl(const nstring & arg)105 input_curl::input_curl(const nstring &arg) :
106 fn(arg),
107 pos(0),
108 curl_buffer(0),
109 curl_buffer_maximum(0),
110 curl_buffer_position(0),
111 curl_buffer_length(0),
112 eof(false)
113 {
114 handle = curl_easy_init();
115 if (!handle)
116 nfatal("curl_easy_init");
117
118 CURLcode err = curl_easy_setopt(handle, CURLOPT_ERRORBUFFER, errbuf);
119 if (err)
120 FATAL("curl_easy_setopt", curl_easy_strerror(err));
121
122 #if (LIBCURL_VERSION_NUM < 0x070b01)
123 //
124 // libcurl prior to 7.11.1 has problems handling autenticated
125 // proxy specified by http_proxy or HTTP_PROXY, so we set them
126 // manually.
127 //
128
129 int uid;
130 int gid;
131 int umask;
132 //
133 // We need to save the user identity because the url::split method
134 // call os_become_ itself and we must issue os_become_undo and
135 // os_become to not raise a multiple permission error.
136 //
137 os_become_query(&uid, &gid, &umask);
138 os_become_undo();
139 url target_url(fn);
140 os_become(uid, gid, umask);
141 if (target_url.get_protocol() == "http")
142 {
143 char *http_proxy = getenv("http_proxy");
144 if (!http_proxy || http_proxy[0] == '\0')
145 http_proxy = getenv("HTTP_PROXY");
146 if (http_proxy && http_proxy[0] != '\0')
147 {
148 //
149 // We use the user's identity previously saved to
150 // undo/restore the process identity in order to prevent a
151 // multiple permission error from url::split.
152 //
153 os_become_undo();
154 url proxy_url(http_proxy);
155 os_become(uid, gid, umask);
156 userpass = proxy_url.get_userpass();
157 proxy = proxy_url.reassemble(true);
158 if (!userpass.empty())
159 {
160 curl_easy_setopt
161 (
162 handle,
163 CURLOPT_PROXYUSERPWD,
164 userpass.c_str()
165 );
166 }
167 curl_easy_setopt(handle, CURLOPT_PROXY, proxy.c_str());
168 }
169 }
170 #endif
171 err = curl_easy_setopt(handle, CURLOPT_URL, fn.c_str());
172 if (err)
173 FATAL("curl_easy_setopt", curl_easy_strerror(err));
174 err = curl_easy_setopt(handle, CURLOPT_FILE, this);
175 if (err)
176 FATAL("curl_easy_setopt", curl_easy_strerror(err));
177 err = curl_easy_setopt(handle, CURLOPT_VERBOSE, 0);
178 if (err)
179 FATAL("curl_easy_setopt", curl_easy_strerror(err));
180 err = curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, ::write_callback);
181 if (err)
182 FATAL("curl_easy_setopt", curl_easy_strerror(err));
183 err = curl_easy_setopt(handle, CURLOPT_FOLLOWLOCATION, 1);
184 if (err)
185 FATAL("curl_easy_setopt", curl_easy_strerror(err));
186
187 progress_start = 0;
188 progress_buflen = 0;
189 progress_buffer = 0;
190 progress_cleanup = 0;
191 if (option_verbose_get())
192 {
193 err = curl_easy_setopt(handle, CURLOPT_NOPROGRESS, 0);
194 if (err)
195 FATAL("curl_easy_setopt", curl_easy_strerror(err));
196 err =
197 curl_easy_setopt
198 (
199 handle,
200 CURLOPT_PROGRESSFUNCTION,
201 ::progress_callback
202 );
203 if (err)
204 FATAL("curl_easy_setopt", curl_easy_strerror(err));
205 err = curl_easy_setopt(handle, CURLOPT_PROGRESSDATA, this);
206 if (err)
207 FATAL("curl_easy_setopt", curl_easy_strerror(err));
208 time(&progress_start);
209 progress_buflen = page_width_get(80);
210 if (progress_buflen < 40)
211 progress_buflen = 40;
212 progress_buffer = new char [progress_buflen];
213 }
214
215 if (!multi_handle)
216 {
217 multi_handle = curl_multi_init();
218 if (!multi_handle)
219 nfatal("curl_multi_init");
220 }
221 CURLMcode merr = curl_multi_add_handle(multi_handle, handle);
222 switch (merr)
223 {
224 case CURLM_CALL_MULTI_PERFORM:
225 call_multi_immediate = true;
226 break;
227
228 case CURLM_OK:
229 break;
230
231 default:
232 FATAL("curl_multi_add_handle", curl_multi_strerror(merr));
233 }
234
235 //
236 // Start the fetch as soon as possible.
237 //
238 call_multi_immediate = true;
239
240 //
241 // Build an associate table from libcurl handles to our file pointers.
242 //
243 if (!stp)
244 stp = itab_alloc();
245 itab_assign(stp, (itab_key_ty)handle, (void *)this);
246 }
247
248
249 static void
print_byte_count(char * buf,size_t len,double number)250 print_byte_count(char *buf, size_t len, double number)
251 {
252 if (number < 0)
253 {
254 snprintf(buf, len, "-----");
255 return;
256 }
257 // K is Kelvin, k is kilo
258 const char *units = " kMGTPEZY";
259 for (;;)
260 {
261 if (*units != ' ')
262 {
263 if (number < 10)
264 {
265 snprintf(buf, len, "%4.2f%cB", number, *units);
266 return;
267 }
268 if (number < 100)
269 {
270 snprintf(buf, len, "%4.1f%cB", number, *units);
271 return;
272 }
273 }
274 if (number < (1<<10))
275 {
276 snprintf(buf, len, "%4d%cB", (int)number, *units);
277 return;
278 }
279 number /= 1024.;
280 ++units;
281 }
282 }
283
284
285 void
progress_callback(double down_total,double down_current)286 input_curl::progress_callback(double down_total, double down_current)
287 {
288 if (down_current <= 0 || down_total <= 0)
289 return;
290 if (down_current >= down_total && !progress_cleanup)
291 return;
292 time_t curtim;
293 time(&curtim);
294 curtim -= progress_start;
295 char buf1[7];
296 print_byte_count(buf1, sizeof(buf1), (long)down_current);
297 char buf2[7];
298 print_byte_count(buf2, sizeof(buf2), (long)down_total);
299 double frac = (down_total <= 0) ? 0 : (down_current / down_total);
300 time_t predict = (time_t)(frac ? (0.5 + curtim / frac) : 0);
301 time_t remaining = predict - curtim;
302 char buf3[7];
303 format_elapsed(buf3, sizeof(buf3), remaining);
304
305 memset(progress_buffer, ' ', progress_buflen);
306 memcpy(progress_buffer + 0, buf1, 6);
307 memcpy(progress_buffer + 6, " of ", 4);
308 memcpy(progress_buffer + 10, buf2, 6);
309 snprintf(progress_buffer + 17, 5, "%3d%%", (int)(100 * frac + 0.5));
310
311 int lhs = 23;
312 int rhs = (int)(lhs + (progress_buflen - 37) * frac);
313 while (lhs < rhs)
314 progress_buffer[lhs++] = '=';
315 progress_buffer[lhs] = '>';
316
317 memcpy(progress_buffer + progress_buflen - 11, "ETA", 3);
318 memcpy(progress_buffer + progress_buflen - 7, buf3, 6);
319 progress_buffer[progress_buflen - 1] = '\r';
320 write(2, progress_buffer, progress_buflen);
321 progress_cleanup = 1;
322
323 if (down_current >= down_total)
324 {
325 write(2, "\n", 1);
326 progress_cleanup = 0;
327 }
328 }
329
330
331 size_t
write_callback(char * data,size_t nbytes)332 input_curl::write_callback(char *data, size_t nbytes)
333 {
334 //
335 // Grow the buffer if necessary.
336 //
337 // Always keep it a power of 2, because sigma(2**-n)==1, so we get
338 // O(1) behaviour. (That +32 means we are always just 32 bytes
339 // short of a power of 2, leaving room for the malloc header, which
340 // results in a nicer malloc fit on many systems.
341 //
342 if (curl_buffer_length + nbytes > curl_buffer_maximum)
343 {
344 for (;;)
345 {
346 curl_buffer_maximum = curl_buffer_maximum * 2 + 32;
347 if (curl_buffer_length + nbytes <= curl_buffer_maximum)
348 break;
349 }
350 char *new_curl_buffer = new char [curl_buffer_maximum];
351 memcpy(new_curl_buffer, curl_buffer, curl_buffer_length);
352 delete [] curl_buffer;
353 curl_buffer = new_curl_buffer;
354 }
355
356 //
357 // Copy the data into the buffer.
358 //
359 memcpy(curl_buffer + curl_buffer_length, data, nbytes);
360 curl_buffer_length += nbytes;
361
362 //
363 // A negative return will stop the transfer for this stream.
364 //
365 return nbytes;
366 }
367
368
369 static input_curl *
handle_to_fp(CURL * handle)370 handle_to_fp(CURL *handle)
371 {
372 assert(stp);
373 input_curl *result = (input_curl *)itab_query(stp, (itab_key_ty)handle);
374 if (!result || !result->verify_handle(handle))
375 {
376 fatal_raw
377 (
378 "%s: %d: handle %p gave file %p",
379 __FILE__,
380 __LINE__,
381 (void *)handle,
382 (void *)result
383 );
384 }
385 return result;
386 }
387
388
389 /**
390 * The perform function is a wrapper around the curl_multi_perform
391 * function. It checks for messages that may be waiting, waits in
392 * select if necessary, and calls curl_multi_perform eventually.
393 *
394 * It is expected that this function will be repeatedly called from a
395 * tight loop, so it doesn't loop itself.
396 */
397
398 static void
perform(void)399 perform(void)
400 {
401 //
402 // See if there are any messages waiting.
403 // These tell us about errors, and completed transfers.
404 //
405 for (;;)
406 {
407 int msgs = 0;
408 CURLMsg *msg = curl_multi_info_read(multi_handle, &msgs);
409 if (!msg)
410 break;
411 if (msg->msg == CURLMSG_NONE)
412 break;
413 if (msg->msg != CURLMSG_DONE)
414 fatal_raw("curl_multi_info_read -> %d (bug)", msg->msg);
415 input_curl *fp = handle_to_fp(msg->easy_handle);
416 if (msg->data.result == 0)
417 {
418 // transfer over, no error
419 fp->eof_notify();
420 }
421 else
422 {
423 fp->read_error();
424 }
425 }
426
427 //
428 // Look for more to happen.
429 //
430 if (call_multi_immediate)
431 {
432 call_multi_immediate = false;
433 for (;;)
434 {
435 int num_xfer = 0;
436 CURLMcode ret = curl_multi_perform(multi_handle, &num_xfer);
437 switch (ret)
438 {
439 case CURLM_CALL_MULTI_PERFORM:
440 call_multi_immediate = true;
441 return;
442
443 case CURLM_OK:
444 return;
445
446 default:
447 error_raw
448 (
449 "%s: %d: curl_multi_perform: %s",
450 __FILE__,
451 __LINE__,
452 curl_multi_strerror(ret)
453 );
454 }
455 }
456 }
457 else
458 {
459 fd_set fdread;
460 FD_ZERO(&fdread);
461 fd_set fdwrite;
462 FD_ZERO(&fdwrite);
463 fd_set fdexcep;
464 FD_ZERO(&fdexcep);
465
466 // get file descriptors from the transfers
467 int maxfd = 0;
468 CURLcode err =
469 (CURLcode)
470 curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);
471 if (err)
472 FATAL("curl_multi_fdset", curl_easy_strerror(err));
473
474 if (maxfd >= 0)
475 {
476 // set a suitable timeout to fail on
477 struct timeval timeout;
478 timeout.tv_sec = 60; // 1 minute
479 timeout.tv_usec = 0;
480
481 int rc = select(maxfd + 1, &fdread, &fdwrite, &fdexcep, &timeout);
482 if (rc < 0)
483 {
484 if (errno != EINTR)
485 {
486 nfatal
487 (
488 "%s: %d: select: %s",
489 __FILE__,
490 __LINE__,
491 strerror(errno)
492 );
493 // NOTREACHED
494 }
495 }
496 if (rc > 0)
497 {
498 //
499 // Some sockets are ready.
500 //
501 call_multi_immediate = true;
502 }
503 }
504 }
505 }
506
507
508 void
read_error()509 input_curl::read_error()
510 {
511 sub_context_ty sc;
512 sc.var_set_string("File_Name", fn);
513 sc.var_set_charstar("ERRNO", errbuf);
514 sc.var_override("ERRNO");
515 sc.fatal_intl(i18n("read $filename: $errno"));
516 // NOTREACHED
517 }
518
519
520 /**
521 * The read_data function is used to read data into the data buffer provided.
522 * Returns the number of bytes read.
523 */
524
525 long
read_data(void * data,size_t nbytes)526 input_curl::read_data(void *data, size_t nbytes)
527 {
528 //
529 // attempt to fill buffer
530 //
531 while (!eof && curl_buffer_position + nbytes > curl_buffer_length)
532 perform();
533
534 //
535 // Extract as much data as possible from the buffer.
536 //
537 size_t size_of_buffer = curl_buffer_length - curl_buffer_position;
538 if (nbytes > size_of_buffer)
539 nbytes = size_of_buffer;
540 memcpy(data, curl_buffer + curl_buffer_position, nbytes);
541 curl_buffer_position += nbytes;
542
543 //
544 // Rearrange the buffer so that it does not grow forever.
545 //
546 size_of_buffer = curl_buffer_length - curl_buffer_position;
547 if (size_of_buffer == 0)
548 {
549 curl_buffer_position = 0;
550 curl_buffer_length = 0;
551 }
552 else if (size_of_buffer <= curl_buffer_position)
553 {
554 // can shuffle the data down easily
555 memcpy(curl_buffer, curl_buffer + curl_buffer_position, size_of_buffer);
556 curl_buffer_position = 0;
557 curl_buffer_length = size_of_buffer;
558 }
559
560 if (nbytes == 0 && progress_cleanup)
561 {
562 write(2, "\n", 1);
563 progress_cleanup = 0;
564 }
565
566 //
567 // Return the number of bytes read.
568 //
569 return nbytes;
570 }
571
572
573 ssize_t
read_inner(void * data,size_t len)574 input_curl::read_inner(void *data, size_t len)
575 {
576 os_become_must_be_active();
577
578 long result = read_data(data, len);
579 assert(result >= 0);
580
581 pos += result;
582 return result;
583 }
584
585
586 off_t
ftell_inner()587 input_curl::ftell_inner()
588 {
589 return pos;
590 }
591
592
593 nstring
name()594 input_curl::name()
595 {
596 return fn;
597 }
598
599
600 off_t
length()601 input_curl::length()
602 {
603 // Maybe there was a Content-Length header?
604 return -1;
605 }
606
607
608 #else
609
610
~input_curl()611 input_curl::~input_curl()
612 {
613 }
614
615
input_curl(const nstring & arg)616 input_curl::input_curl(const nstring &arg) :
617 fn(arg)
618 {
619 sub_context_ty sc;
620 sc.var_set_string("FileLine", fn);
621 sc.fatal_intl(i18n("open $filename: no curl library"));
622 }
623
624
625 ssize_t
read_inner(void *,size_t)626 input_curl::read_inner(void *, size_t)
627 {
628 return 0;
629 }
630
631
632 long
ftell_inner()633 input_curl::ftell_inner()
634 {
635 return 0;
636 }
637
638
639 nstring
name()640 input_curl::name()
641 {
642 return fn;
643 }
644
645
646 long
length()647 input_curl::length()
648 {
649 return -1;
650 }
651
652 #endif // HAVE_LIBCURL
653
654
655 bool
looks_likely(const nstring & file_name)656 input_curl::looks_likely(const nstring &file_name)
657 {
658 const char *cp = file_name.c_str();
659 if (!isalpha((unsigned char)*cp))
660 return 0;
661 for (;;)
662 {
663 ++cp;
664 if (!isalpha((unsigned char)*cp))
665 break;
666 }
667 return (cp[0] == ':' && cp[1] != '\0');
668 }
669
670
671 bool
is_remote() const672 input_curl::is_remote()
673 const
674 {
675 return true;
676 }
677
678
679 // vim: set ts=8 sw=4 et :
680