1 /*
2 * R : A Computer Language for Statistical Data Analysis
3 * Copyright (C) 2015-2020 The R Core Team
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, a copy is available at
17 * https://www.R-project.org/Licenses/
18 */
19
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23
24 #ifdef Win32
25 # define R_USE_SIGNALS 1
26 #endif
27 #include <Defn.h>
28 #include <Internal.h>
29 #include <Fileio.h>
30 #include <errno.h>
31
32 #ifdef HAVE_UNISTD_H
33 // for unlink
34 # include <unistd.h>
35 #endif
36
37 #ifdef HAVE_LIBCURL
38 # include <curl/curl.h>
39 /*
40 This needed libcurl >= 7.28.0 (Oct 2012) for curl_multi_wait.
41 Substitute code is provided for a Unix-alike only.
42
43 There is a configure test but it is not used on Windows and system
44 software can change.
45 */
46 # ifdef Win32
47 # if LIBCURL_VERSION_MAJOR < 7 || (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR < 28)
48 # error libcurl 7.28.0 or later is required.
49 # endif
50 # else
51 # if LIBCURL_VERSION_MAJOR < 7 || (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR < 22)
52 # error libcurl 7.22.0 or later is required.
53 # endif
54 # endif
55 extern void Rsleep(double timeint);
56 #endif
57
58 static int current_timeout = 0;
59
60 # if (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR < 28)
61
62 // curl/curl.h includes <sys/select.h> and headers it requires.
63
64 #define curl_multi_wait R_curl_multi_wait
65
66
67 static CURLMcode
R_curl_multi_wait(CURLM * multi_handle,void * unused,unsigned int extra,int timeout_ms,int * ret)68 R_curl_multi_wait(CURLM *multi_handle,
69 /* IGNORED */ void *unused,
70 /* IGNORED */ unsigned int extra,
71 int timeout_ms, int *ret)
72 {
73 fd_set fdread;
74 fd_set fdwrite;
75 fd_set fdexcep;
76 FD_ZERO(&fdread);
77 FD_ZERO(&fdwrite);
78 FD_ZERO(&fdexcep);
79
80 struct timeval timeout;
81
82 timeout.tv_sec = timeout_ms / 1000;
83 timeout.tv_usec = (timeout_ms % 1000) * 1000;
84
85 int maxfd = -1;
86 CURLMcode
87 mc = curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);
88 if (maxfd == -1) {
89 *ret = 0;
90 Rsleep(0.1);
91 } else
92 *ret = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout);
93
94 return mc;
95 }
96 #endif
97
in_do_curlVersion(SEXP call,SEXP op,SEXP args,SEXP rho)98 SEXP attribute_hidden in_do_curlVersion(SEXP call, SEXP op, SEXP args, SEXP rho)
99 {
100 checkArity(op, args);
101 SEXP ans = PROTECT(allocVector(STRSXP, 1));
102 #ifdef HAVE_LIBCURL
103 curl_version_info_data *d = curl_version_info(CURLVERSION_NOW);
104 SET_STRING_ELT(ans, 0, mkChar(d->version));
105 SEXP sSSLVersion = install("ssl_version");
106 setAttrib(ans, sSSLVersion,
107 mkString(d->ssl_version ? d->ssl_version : "none"));
108 SEXP sLibSSHVersion = install("libssh_version");
109 setAttrib(ans, sLibSSHVersion,
110 mkString(((d->age >= 3) && d->libssh_version) ? d->libssh_version : ""));
111 const char * const *p;
112 int n, i;
113 for (p = d->protocols, n = 0; *p; p++, n++) ;
114 SEXP protocols = PROTECT(allocVector(STRSXP, n));
115 for (p = d->protocols, i = 0; i < n; i++, p++)
116 SET_STRING_ELT(protocols, i, mkChar(*p));
117 setAttrib(ans, install("protocols"), protocols);
118 UNPROTECT(1);
119 #else
120 SET_STRING_ELT(ans, 0, mkChar(""));
121 #endif
122 UNPROTECT(1);
123 return ans;
124 }
125
126 #ifdef HAVE_LIBCURL
http_errstr(const long status)127 static const char *http_errstr(const long status)
128 {
129 const char *str;
130 switch(status) {
131 case 400: str = "Bad Request"; break;
132 case 401: str = "Unauthorized"; break;
133 case 402: str = "Payment Required"; break;
134 case 403: str = "Forbidden"; break;
135 case 404: str = "Not Found"; break;
136 case 405: str = "Method Not Allowed"; break;
137 case 406: str = "Not Acceptable"; break;
138 case 407: str = "Proxy Authentication Required"; break;
139 case 408: str = "Request Timeout"; break;
140 case 409: str = "Conflict"; break;
141 case 410: str = "Gone"; break;
142 case 411: str = "Length Required"; break;
143 case 412: str = "Precondition Failed"; break;
144 case 413: str = "Request Entity Too Large"; break;
145 case 414: str = "Request-URI Too Long"; break;
146 case 415: str = "Unsupported Media Type"; break;
147 case 416: str = "Requested Range Not Satisfiable"; break;
148 case 417: str = "Expectation Failed"; break;
149 case 500: str = "Internal Server Error"; break;
150 case 501: str = "Not Implemented"; break;
151 case 502: str = "Bad Gateway"; break;
152 case 503: str = "Service Unavailable"; break;
153 case 504: str = "Gateway Timeout"; break;
154 default: str = "Unknown Error"; break;
155 }
156 return str;
157 }
158
ftp_errstr(const long status)159 static const char *ftp_errstr(const long status)
160 {
161 const char *str;
162 switch (status) {
163 case 421: str = "Service not available, closing control connection"; break;
164 case 425: str = "Cannot open data connection"; break;
165 case 426: str = "Connection closed; transfer aborted"; break;
166 case 430: str = "Invalid username or password"; break;
167 case 434: str = "Requested host unavailable"; break;
168 case 450: str = "Requested file action not taken"; break;
169 case 451: str = "Requested action aborted; local error in processing"; break;
170 case 452:
171 str = "Requested action not taken; insufficient storage space in system";
172 break;
173 case 501: str = "Syntax error in parameters or arguments"; break;
174 case 502: str = "Command not implemented"; break;
175 case 503: str = "Bad sequence of commands"; break;
176 case 504: str = "Command not implemented for that parameter"; break;
177 case 530: str = "Not logged in"; break;
178 case 532: str = "Need account for storing files"; break;
179 case 550:
180 str = "Requested action not taken; file unavailable";
181 break;
182 case 551: str = "Requested action aborted; page type unknown"; break;
183 case 552:
184 str = "Requested file action aborted; exceeded storage allocation";
185 break;
186 case 553: str = "Requested action not taken; file name not allowed"; break;
187 default: str = "Unknown Error"; break;
188 }
189 return str;
190 }
191
192 /*
193 Check curl_multi_info_read for errors, reporting as warnings
194
195 Return: number of errors encountered
196 */
curlMultiCheckerrs(CURLM * mhnd)197 static int curlMultiCheckerrs(CURLM *mhnd)
198 {
199 int retval = 0;
200 for(int n = 1; n > 0;) {
201 CURLMsg *msg = curl_multi_info_read(mhnd, &n);
202 if (msg && (msg->data.result != CURLE_OK)) {
203 const char *url, *strerr, *type;
204 long status = 0;
205 curl_easy_getinfo(msg->easy_handle, CURLINFO_EFFECTIVE_URL, &url);
206 curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE,
207 &status);
208 // This reports the redirected URL
209 if (status >= 400) {
210 if (url && url[0] == 'h') {
211 strerr = http_errstr(status);
212 type = "HTTP";
213 } else {
214 strerr = ftp_errstr(status);
215 type = "FTP";
216 }
217 warning(_("cannot open URL '%s': %s status was '%d %s'"),
218 url, type, status, strerr);
219 } else {
220 strerr = curl_easy_strerror(msg->data.result);
221 if (streql(strerr, "Timeout was reached"))
222 warning(_("URL '%s': Timeout of %d seconds was reached"),
223 url, current_timeout);
224 else
225 warning(_("URL '%s': status was '%s'"), url, strerr);
226 }
227 retval++;
228 }
229 }
230 return retval;
231 }
curlCommon(CURL * hnd,int redirect,int verify)232 static void curlCommon(CURL *hnd, int redirect, int verify)
233 {
234 const char *capath = getenv("CURL_CA_BUNDLE");
235 if (verify) {
236 if (capath && capath[0])
237 curl_easy_setopt(hnd, CURLOPT_CAINFO, capath);
238 #ifdef Win32
239 else
240 curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYPEER, 0L);
241 #endif
242 } else {
243 curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYHOST, 0L);
244 curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYPEER, 0L);
245 }
246 #if 0
247 // for consistency, but all utils:::makeUserAgent does is look up an option.
248 SEXP sMakeUserAgent = install("makeUserAgent");
249 SEXP agentFun = PROTECT(lang2(sMakeUserAgent, ScalarLogical(0)));
250 SEXP utilsNS = PROTECT(R_FindNamespace(mkString("utils")));
251 SEXP sua = eval(agentFun, utilsNS);
252 UNPROTECT(1); /* utilsNS */
253 PROTECT(sua);
254 if(TYPEOF(sua) != NILSXP)
255 curl_easy_setopt(hnd, CURLOPT_USERAGENT, CHAR(STRING_ELT(sua, 0)));
256 UNPROTECT(2);
257 #else
258 int Default = 1;
259 SEXP sua = GetOption1(install("HTTPUserAgent")); // set in utils startup
260 if (TYPEOF(sua) == STRSXP && LENGTH(sua) == 1 ) {
261 const char *p = CHAR(STRING_ELT(sua, 0));
262 if (p[0] && p[1] && p[2] && p[0] == 'R' && p[1] == ' ' && p[2] == '(') {
263 } else {
264 Default = 0;
265 curl_easy_setopt(hnd, CURLOPT_USERAGENT, p);
266 }
267 }
268 if (Default) {
269 char buf[20];
270 curl_version_info_data *d = curl_version_info(CURLVERSION_NOW);
271 snprintf(buf, 20, "libcurl/%s", d->version);
272 curl_easy_setopt(hnd, CURLOPT_USERAGENT, buf);
273 }
274 #endif
275 int timeout0 = asInteger(GetOption1(install("timeout")));
276 long timeout = (timeout0 == NA_INTEGER) ? 0 : (1000L * timeout0);
277 current_timeout = (timeout0 == NA_INTEGER) ? 0 : timeout0;
278 curl_easy_setopt(hnd, CURLOPT_CONNECTTIMEOUT_MS, timeout);
279 curl_easy_setopt(hnd, CURLOPT_TIMEOUT_MS, timeout);
280 if (redirect) {
281 curl_easy_setopt(hnd, CURLOPT_FOLLOWLOCATION, 1L);
282 curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 20L);
283 }
284 int verbosity = asInteger(GetOption1(install("internet.info")));
285 if (verbosity < 2) curl_easy_setopt(hnd, CURLOPT_VERBOSE, 1L);
286
287 // enable the cookie engine, keep cookies in memory
288 curl_easy_setopt(hnd, CURLOPT_COOKIEFILE, "");
289 }
290
291 static char headers[500][2049]; // allow for terminator
292 static int used;
293
294 static size_t
rcvHeaders(void * buffer,size_t size,size_t nmemb,void * userp)295 rcvHeaders(void *buffer, size_t size, size_t nmemb, void *userp)
296 {
297 char *d = (char*)buffer;
298 size_t result = size * nmemb, res = result > 2048 ? 2048 : result;
299 if (used >= 500) return result;
300 strncpy(headers[used], d, res);
301 // 'Do not assume that the header line is zero terminated!'
302 headers[used][res] = '\0';
303 used++;
304 return result;
305 }
306
307 static size_t
rcvBody(void * buffer,size_t size,size_t nmemb,void * userp)308 rcvBody(void *buffer, size_t size, size_t nmemb, void *userp)
309 {
310 // needed to discard spurious ftp 'body' otherwise written to stdout
311 return size * nmemb;
312 }
313 #endif
314
315
316 SEXP attribute_hidden
in_do_curlGetHeaders(SEXP call,SEXP op,SEXP args,SEXP rho)317 in_do_curlGetHeaders(SEXP call, SEXP op, SEXP args, SEXP rho)
318 {
319 checkArity(op, args);
320 #ifndef HAVE_LIBCURL
321 error(_("curlGetHeaders is not supported on this platform"));
322 return R_NilValue;
323 #else
324 if (!isString(CAR(args)) || LENGTH(CAR(args)) != 1)
325 error("invalid %s argument", "url");
326 const char *url = translateChar(STRING_ELT(CAR(args), 0));
327 used = 0;
328 int redirect = asLogical(CADR(args));
329 if (redirect == NA_LOGICAL)
330 error(_("invalid %s argument"), "redirect");
331 int verify = asLogical(CADDR(args));
332 if (verify == NA_LOGICAL)
333 error(_("invalid %s argument"), "verify");
334 int timeout = asInteger(CADDDR(args));
335 if (timeout == NA_INTEGER)
336 error(_("invalid %s argument"), "timeout");
337 SEXP sTLS = CAD4R(args);
338 const char *TLS = "";
339 if (isString(sTLS) && LENGTH(sTLS) == 1 && STRING_ELT(sTLS, 0) != NA_STRING)
340 TLS = translateChar(STRING_ELT(sTLS, 0));
341 else error(_("invalid %s argument"), "TLS");
342
343 CURL *hnd = curl_easy_init();
344 curl_easy_setopt(hnd, CURLOPT_URL, url);
345 curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L);
346 curl_easy_setopt(hnd, CURLOPT_NOBODY, 1L);
347 curl_easy_setopt(hnd, CURLOPT_HEADERFUNCTION, &rcvHeaders);
348 curl_easy_setopt(hnd, CURLOPT_WRITEHEADER, &headers);
349 /* libcurl (at least 7.40.0) does not respect CURLOPT_NOBODY
350 for some ftp header info (Content-Length and Accept-ranges). */
351 curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, &rcvBody);
352 curlCommon(hnd, redirect, verify);
353 if (timeout > 0) {
354 curl_easy_setopt(hnd, CURLOPT_TIMEOUT, timeout);
355 current_timeout = timeout;
356 }
357 if (!streql(TLS, "")) {
358 // 7.34.0 was released 2013-12-17
359 #if LIBCURL_VERSION_MAJOR > 7 || (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 34)
360 long TLS_ver = CURL_SSLVERSION_TLSv1_0;
361 if (streql(TLS, "1.0")) TLS_ver = CURL_SSLVERSION_TLSv1_0;
362 else if (streql(TLS, "1.1")) TLS_ver = CURL_SSLVERSION_TLSv1_1;
363 else if (streql(TLS, "1.2")) TLS_ver = CURL_SSLVERSION_TLSv1_2;
364 # if LIBCURL_VERSION_MAJOR > 7 || (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 52)
365 else if (streql(TLS, "1.3")) TLS_ver = CURL_SSLVERSION_TLSv1_3;
366 # endif
367 else error(_("invalid %s argument"), "TLS");
368 curl_easy_setopt(hnd, CURLOPT_SSLVERSION, TLS_ver);
369 # else
370 error("TLS argument is unsupported in this libcurl version %d.%d",
371 LIBCURL_VERSION_MAJOR, LIBCURL_VERSION_MINOR);
372 #endif
373 }
374
375 char errbuf[CURL_ERROR_SIZE];
376 curl_easy_setopt(hnd, CURLOPT_ERRORBUFFER, errbuf);
377 // libcurl does not initialize this
378 errbuf[0] = '\0';
379 CURLcode ret = curl_easy_perform(hnd);
380 if (ret != CURLE_OK) {
381 if (errbuf[0])
382 error(_("libcurl error code %d:\n\t%s\n"), ret, errbuf);
383 else if(ret == 77)
384 error(_("libcurl error code %d:\n\t%s\n"), ret,
385 "unable to access SSL/TLS CA certificates");
386 else // rare case, error but no message
387 error("libcurl error code %d\n", ret);
388 }
389 long http_code = 0;
390 curl_easy_getinfo (hnd, CURLINFO_RESPONSE_CODE, &http_code);
391 curl_easy_cleanup(hnd);
392
393 SEXP ans = PROTECT(allocVector(STRSXP, used));
394 for (int i = 0; i < used; i++)
395 SET_STRING_ELT(ans, i, mkChar(headers[i]));
396 SEXP sStatus = install("status");
397 setAttrib(ans, sStatus, ScalarInteger((int) http_code));
398 UNPROTECT(1);
399 return ans;
400 #endif
401 }
402
403 #ifdef HAVE_LIBCURL
404 static double total;
405
406 static int ndashes;
putdashes(int * pold,int new)407 static void putdashes(int *pold, int new)
408 {
409 for (int i = *pold; i < new; i++) REprintf("=");
410 if (R_Consolefile) fflush(R_Consolefile);
411 *pold = new;
412 }
413
414 # ifdef Win32
415 // ------- Windows progress bar -----------
416 #include <ga.h>
417
418 /* We could share this window with internet.c, then re-positioning
419 would apply to both */
420 typedef struct {
421 window wprog;
422 progressbar pb;
423 label l_url;
424 RCNTXT cntxt;
425 int pc;
426 } winprogressbar;
427
428 static winprogressbar pbar = {NULL, NULL, NULL};
429
doneprogressbar(void * data)430 static void doneprogressbar(void *data)
431 {
432 winprogressbar *pbar = data;
433 hide(pbar->wprog);
434 }
435
436 static
progress(void * clientp,double dltotal,double dlnow,double ultotal,double ulnow)437 int progress(void *clientp, double dltotal, double dlnow,
438 double ultotal, double ulnow)
439 {
440 static int factor = 1;
441 // we only use downloads. dltotal may be zero.
442 if (dltotal > 0.) {
443 if (total == 0.) {
444 total = dltotal;
445 char *type = NULL;
446 CURL *hnd = (CURL *) clientp;
447 curl_easy_getinfo(hnd, CURLINFO_CONTENT_TYPE, &type);
448 if (total > 1024.0*1024.0)
449 // might be longer than long, and is on 64-bit windows
450 REprintf(" length %0.0f bytes (%0.1f MB)\n",
451 total, total/1024.0/1024.0);
452 else if (total > 10240)
453 REprintf("Content length %d bytes (%d KB)\n",
454 (int)total, (int)(total/1024));
455 else
456 REprintf("Content length %d bytes\n", (int)total);
457 R_FlushConsole();
458 if(R_Interactive) {
459 if (total > 1e9) factor = total/1e6; else factor = 1;
460 setprogressbarrange(pbar.pb, 0, total/factor);
461 show(pbar.wprog);
462 }
463 }
464 if (R_Interactive) {
465 setprogressbar(pbar.pb, dlnow/factor);
466 if (total > 0) {
467 static char pbuf[30];
468 int pc = 0.499 + 100.0*dlnow/total;
469 if (pc > pbar.pc) {
470 snprintf(pbuf, 30, "%d%% downloaded", pc);
471 settext(pbar.wprog, pbuf);
472 pbar.pc = pc;
473 }
474 }
475 } else putdashes(&ndashes, (int)(50*dlnow/total));
476 }
477 R_ProcessEvents();
478 return 0;
479 }
480
481 # else
482 // ------- Unix-alike progress bar -----------
483
484 static
progress(void * clientp,double dltotal,double dlnow,double ultotal,double ulnow)485 int progress(void *clientp, double dltotal, double dlnow,
486 double ultotal, double ulnow)
487 {
488 CURL *hnd = (CURL *) clientp;
489 long status;
490 curl_easy_getinfo(hnd, CURLINFO_RESPONSE_CODE, &status);
491
492 // we only use downloads. dltotal may be zero.
493 if ((status < 300) && (dltotal > 0.)) {
494 if (total == 0.) {
495 total = dltotal;
496 char *type = NULL;
497 curl_easy_getinfo(hnd, CURLINFO_CONTENT_TYPE, &type);
498 REprintf("Content type '%s'", type ? type : "unknown");
499 if (total > 1024.0*1024.0)
500 // might be longer than long, and is on 64-bit windows
501 REprintf(" length %0.0f bytes (%0.1f MB)\n",
502 total, total/1024.0/1024.0);
503 else if (total > 10240)
504 REprintf(" length %d bytes (%d KB)\n",
505 (int)total, (int)(total/1024));
506 else
507 REprintf(" length %d bytes\n", (int)total);
508 if (R_Consolefile) fflush(R_Consolefile);
509 }
510 putdashes(&ndashes, (int)(50*dlnow/total));
511 }
512 return 0;
513 }
514 # endif // Win32
515 #endif // HAVE_LIBCURL
516
517 /* download(url, destfile, quiet, mode, headers, cacheOK) */
518
519 SEXP attribute_hidden
in_do_curlDownload(SEXP call,SEXP op,SEXP args,SEXP rho)520 in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
521 {
522 checkArity(op, args);
523 #ifndef HAVE_LIBCURL
524 error(_("download.file(method = \"libcurl\") is not supported on this platform"));
525 return R_NilValue;
526 #else
527 SEXP scmd, sfile, smode, sheaders;
528 const char *url, *file, *mode;
529 int quiet, cacheOK;
530 struct curl_slist *headers = NULL;
531
532 scmd = CAR(args); args = CDR(args);
533 if (!isString(scmd) || length(scmd) < 1)
534 error(_("invalid '%s' argument"), "url");
535 int nurls = length(scmd);
536 sfile = CAR(args); args = CDR(args);
537 if (!isString(sfile) || length(sfile) < 1)
538 error(_("invalid '%s' argument"), "destfile");
539 if (length(sfile) != length(scmd))
540 error(_("lengths of 'url' and 'destfile' must match"));
541 quiet = asLogical(CAR(args)); args = CDR(args);
542 if (quiet == NA_LOGICAL)
543 error(_("invalid '%s' argument"), "quiet");
544 smode = CAR(args); args = CDR(args);
545 if (!isString(smode) || length(smode) != 1)
546 error(_("invalid '%s' argument"), "mode");
547 mode = CHAR(STRING_ELT(smode, 0));
548 cacheOK = asLogical(CAR(args)); args = CDR(args);
549 if (cacheOK == NA_LOGICAL)
550 error(_("invalid '%s' argument"), "cacheOK");
551 sheaders = CAR(args);
552 if(TYPEOF(sheaders) != NILSXP && !isString(sheaders))
553 error(_("invalid '%s' argument"), "headers");
554 if(TYPEOF(sheaders) != NILSXP) {
555 for (int i = 0; i < LENGTH(sheaders); i++) {
556 struct curl_slist *tmp =
557 curl_slist_append(headers, CHAR(STRING_ELT(sheaders, i)));
558 if (!tmp) {
559 curl_slist_free_all(headers);
560 error(_("out of memory"));
561 }
562 headers = tmp;
563 }
564 }
565
566 /* This comes mainly from curl --libcurl on the call used by
567 download.file(method = "curl").
568 Also https://curl.haxx.se/libcurl/c/multi-single.html.
569 */
570
571 if (!cacheOK) {
572 /* This _is_ the right way to do this: see §14.9 of
573 http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html */
574 struct curl_slist *tmp =
575 curl_slist_append(headers, "Pragma: no-cache");
576 if(!tmp) {
577 curl_slist_free_all(headers);
578 error(_("out of memory"));
579 }
580 headers = tmp;
581 }
582
583 CURLM *mhnd = curl_multi_init();
584 int still_running, repeats = 0, n_err = 0;
585 CURL **hnd[nurls];
586 FILE *out[nurls];
587
588 for(int i = 0; i < nurls; i++) {
589 url = CHAR(STRING_ELT(scmd, i));
590 hnd[i] = curl_easy_init();
591 curl_easy_setopt(hnd[i], CURLOPT_URL, url);
592 curl_easy_setopt(hnd[i], CURLOPT_FAILONERROR, 1L);
593 /* Users will normally expect to follow redirections, although
594 that is not the default in either curl or libcurl. */
595 curlCommon(hnd[i], 1, 1);
596 #if (LIBCURL_VERSION_MINOR >= 25)
597 curl_easy_setopt(hnd[i], CURLOPT_TCP_KEEPALIVE, 1L);
598 #endif
599 curl_easy_setopt(hnd[i], CURLOPT_HTTPHEADER, headers);
600
601 /* check that destfile can be written */
602 file = translateChar(STRING_ELT(sfile, i));
603 out[i] = R_fopen(R_ExpandFileName(file), mode);
604 if (!out[i]) {
605 n_err += 1;
606 warning(_("URL %s: cannot open destfile '%s', reason '%s'"),
607 url, file, strerror(errno));
608 if (nurls == 1) break; else continue;
609 } else {
610 // This uses the internal CURLOPT_WRITEFUNCTION
611 curl_easy_setopt(hnd[i], CURLOPT_WRITEDATA, out[i]);
612 curl_multi_add_handle(mhnd, hnd[i]);
613 }
614
615 total = 0.;
616 if (!quiet && nurls <= 1) {
617 // It would in principle be possible to have
618 // multiple progress bars on Windows.
619 curl_easy_setopt(hnd[i], CURLOPT_NOPROGRESS, 0L);
620 ndashes = 0;
621 #ifdef Win32
622 if (R_Interactive) {
623 if (!pbar.wprog) {
624 pbar.wprog = newwindow(_("Download progress"),
625 rect(0, 0, 540, 100),
626 Titlebar | Centered);
627 setbackground(pbar.wprog, dialog_bg());
628 pbar.l_url = newlabel(" ", rect(10, 15, 520, 25),
629 AlignCenter);
630 pbar.pb = newprogressbar(rect(20, 50, 500, 20),
631 0, 1024, 1024, 1);
632 pbar.pc = 0;
633 }
634
635 settext(pbar.l_url, url);
636 setprogressbar(pbar.pb, 0);
637 settext(pbar.wprog, "Download progress");
638 show(pbar.wprog);
639 begincontext(&(pbar.cntxt), CTXT_CCODE, R_NilValue, R_NilValue,
640 R_NilValue, R_NilValue, R_NilValue);
641 pbar.cntxt.cend = &doneprogressbar;
642 pbar.cntxt.cenddata = &pbar;
643 }
644 #endif
645 // For libcurl >= 7.32.0 use CURLOPT_XFERINFOFUNCTION
646 curl_easy_setopt(hnd[i], CURLOPT_PROGRESSFUNCTION, progress);
647 curl_easy_setopt(hnd[i], CURLOPT_PROGRESSDATA, hnd[i]);
648 } else curl_easy_setopt(hnd[i], CURLOPT_NOPROGRESS, 1L);
649
650 /* This would allow the negotiation of compressed HTTP transfers,
651 but it is not clear it is always a good idea.
652 curl_easy_setopt(hnd[i], CURLOPT_ACCEPT_ENCODING, "gzip, deflate");
653 */
654
655 if (!quiet) REprintf(_("trying URL '%s'\n"), url);
656 }
657
658 if (n_err == nurls) {
659 // no dest files could be opened, so bail out
660 curl_multi_cleanup(mhnd);
661 return ScalarInteger(1);
662 }
663
664 R_Busy(1);
665 // curl_multi_wait needs curl >= 7.28.0 .
666 curl_multi_perform(mhnd, &still_running);
667 do {
668 int numfds;
669 CURLMcode mc = curl_multi_wait(mhnd, NULL, 0, 100, &numfds);
670 if (mc != CURLM_OK) { // internal, do not translate
671 warning("curl_multi_wait() failed, code %d", mc);
672 break;
673 }
674 if (!numfds) {
675 /* 'numfds' being zero means either a timeout or no file
676 descriptors to wait for. Try timeout on first
677 occurrence, then assume no file descriptors to wait for
678 means 'sleep for 100 milliseconds'.
679 */
680 if (repeats++ > 0) Rsleep(0.1); // do not block R process
681 } else repeats = 0;
682 R_ProcessEvents();
683 curl_multi_perform(mhnd, &still_running);
684 } while(still_running);
685 R_Busy(0);
686 #ifdef Win32
687 if (R_Interactive && !quiet && nurls<=1) {
688 endcontext(&(pbar.cntxt));
689 doneprogressbar(&pbar);
690 } else if (total > 0.) {
691 REprintf("\n");
692 R_FlushConsole();
693 }
694 #else
695 if (total > 0.) REprintf("\n");
696 if (R_Consolefile) fflush(R_Consolefile);
697 #endif
698 if (nurls == 1) {
699 long status;
700 curl_easy_getinfo(hnd[0], CURLINFO_RESPONSE_CODE, &status);
701 double cl, dl;
702 curl_easy_getinfo(hnd[0], CURLINFO_SIZE_DOWNLOAD, &dl);
703 if (!quiet && status == 200) {
704 if (dl > 1024*1024)
705 REprintf("downloaded %0.1f MB\n\n", (double)dl/1024/1024);
706 else if (dl > 10240)
707 REprintf("downloaded %d KB\n\n", (int) dl/1024);
708 else
709 REprintf("downloaded %d bytes\n\n", (int) dl);
710 }
711 curl_easy_getinfo(hnd[0], CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
712 if (cl >= 0 && dl != cl)
713 warning(_("downloaded length %0.f != reported length %0.f"), dl, cl);
714 }
715
716 n_err += curlMultiCheckerrs(mhnd);
717
718 long status = 0L;
719 for (int i = 0; i < nurls; i++) {
720 if (out[i]) {
721 fclose(out[i]);
722 double dl;
723 curl_easy_getinfo(hnd[i], CURLINFO_SIZE_DOWNLOAD, &dl);
724 curl_easy_getinfo(hnd[i], CURLINFO_RESPONSE_CODE, &status);
725 // should we do something about incomplete transfers?
726 if (status != 200 && dl == 0. && strchr(mode, 'w'))
727 unlink(R_ExpandFileName(translateChar(STRING_ELT(sfile, i))));
728 }
729 curl_multi_remove_handle(mhnd, hnd[i]);
730 curl_easy_cleanup(hnd[i]);
731 }
732 curl_multi_cleanup(mhnd);
733 curl_slist_free_all(headers);
734
735 if(nurls > 1) {
736 if (n_err == nurls) error(_("cannot download any files"));
737 else if (n_err) warning(_("some files were not downloaded"));
738 } else if(n_err) {
739 if (status != 200)
740 error(_("cannot open URL '%s'"), CHAR(STRING_ELT(scmd, 0)));
741 else
742 error(_("download from '%s' failed"), CHAR(STRING_ELT(scmd, 0)));
743 }
744
745 return ScalarInteger(0);
746 #endif
747 }
748
749 /* -------------------------- connections part ------------------------*/
750
751 /* Unfortunately the libcurl interface is not well adapted to reading
752 data in user-requested chunks.
753
754 But it does read in up to CURL_MAX_WRITE_SIZE chunks, which is 16K.
755 So we implement a buffer which holds two chunks, and when what we
756 have is not enough we move down what it left and fetch another
757 chunk above it. For safety, the buffer is expandable but this
758 should not be exercised.
759
760 It seems that expanding was being done by a couple of packages and
761 gave a use-after-free error with libcurl 7.64.0. So initial size
762 increased to 16x.
763
764 An alternative design would be for consumeData to return what is
765 available and reset current. Then rcvData would only be called on
766 a completely empty buffer.
767 */
768
769 #include <Rconnections.h>
770
771 #define R_MIN(a, b) ((a) < (b) ? (a) : (b))
772
773 #ifdef HAVE_LIBCURL
774 typedef struct Curlconn {
775 char *buf, *current; // base of buffer, last read address
776 size_t bufsize, filled; // buffer size, amount which has been filled
777 Rboolean available; // to be read out
778 int sr; // 'still running' count
779 CURLM *mh; CURL *hnd;
780 struct curl_slist *headers;
781 } *RCurlconn;
782
rcvData(void * ptr,size_t size,size_t nitems,void * ctx)783 static size_t rcvData(void *ptr, size_t size, size_t nitems, void *ctx)
784 {
785 RCurlconn ctxt = (RCurlconn) ctx;
786
787 /* move down any unused data: can overlap */
788 if (ctxt->filled) memmove(ctxt->buf, ctxt->current, ctxt->filled);
789
790 size_t add = size * nitems;
791 if (add) {
792 /* Allocate more space if required: unlikely.
793 Do so as an integer multiple of the current size.
794 */
795 if (ctxt->filled + add > ctxt->bufsize) {
796 int mult = (int) ceil((double)(ctxt->filled + add)/ctxt->bufsize);
797 size_t newbufsize = mult * ctxt->bufsize;
798 void *newbuf = realloc(ctxt->buf, newbufsize);
799 if (!newbuf) error("Failure in re-allocation in rcvData");
800 ctxt->buf = newbuf; ctxt->bufsize = newbufsize;
801 }
802
803 memcpy(ctxt->buf + ctxt->filled, ptr, add);
804 ctxt->filled += add;
805 ctxt->available = TRUE;
806 }
807 ctxt->current = ctxt->buf;
808 return add;
809 }
810
consumeData(void * ptr,size_t max,RCurlconn ctxt)811 static size_t consumeData(void *ptr, size_t max, RCurlconn ctxt)
812 {
813 size_t size = R_MIN(ctxt->filled, max); // guaranteed > 0
814 memcpy(ptr, ctxt->current, size);
815 ctxt->current += size; ctxt->filled -= size;
816 return size;
817 }
818
819 /*
820 return: number of errors encountered
821 */
fetchData(RCurlconn ctxt)822 static int fetchData(RCurlconn ctxt)
823 {
824 int repeats = 0;
825 CURLM *mhnd = ctxt->mh;
826
827 do {
828 int numfds;
829 CURLMcode mc = curl_multi_wait(mhnd, NULL, 0, 100, &numfds);
830 if (mc != CURLM_OK) {
831 warning("curl_multi_wait() failed, code %d", mc);
832 break;
833 }
834 if (!numfds) {
835 if (repeats++ > 0) Rsleep(0.1);
836 } else repeats = 0;
837 curl_multi_perform(mhnd, &ctxt->sr);
838 if (ctxt->available) break;
839 R_ProcessEvents();
840 } while(ctxt->sr);
841
842 return curlMultiCheckerrs(mhnd);
843 }
844
Curl_close(Rconnection con)845 static void Curl_close(Rconnection con)
846 {
847 RCurlconn ctxt = (RCurlconn)(con->private);
848
849 curl_slist_free_all(ctxt->headers);
850 curl_multi_remove_handle(ctxt->mh, ctxt->hnd);
851 curl_easy_cleanup(ctxt->hnd);
852 curl_multi_cleanup(ctxt->mh);
853 con->isopen = FALSE;
854 }
855
Curl_destroy(Rconnection con)856 static void Curl_destroy(Rconnection con)
857 {
858 RCurlconn ctxt;
859
860 if (NULL == con)
861 return;
862 ctxt = (RCurlconn)(con->private);
863
864 if (NULL == ctxt)
865 return;
866
867 free(ctxt->buf);
868 free(ctxt);
869 }
870
Curl_read(void * ptr,size_t size,size_t nitems,Rconnection con)871 static size_t Curl_read(void *ptr, size_t size, size_t nitems,
872 Rconnection con)
873 {
874 RCurlconn ctxt = (RCurlconn)(con->private);
875 size_t nbytes = size*nitems;
876 char *p = (char *) ptr;
877 size_t total = consumeData(ptr, nbytes, ctxt);
878 int n_err = 0;
879 while((total < nbytes) && ctxt->sr) {
880 n_err += fetchData(ctxt);
881 total += consumeData(p + total, (nbytes - total), ctxt);
882 }
883 if (n_err != 0) {
884 Curl_close(con);
885 error(_("cannot read from connection"), n_err);
886 }
887 return total/size;
888 }
889
Curl_open(Rconnection con)890 static Rboolean Curl_open(Rconnection con)
891 {
892 char *url = con->description;
893 RCurlconn ctxt = (RCurlconn)(con->private);
894 int mlen;
895
896 if (con->mode[0] != 'r') {
897 REprintf("can only open URLs for reading");
898 return FALSE;
899 }
900
901 ctxt->hnd = curl_easy_init();
902 curl_easy_setopt(ctxt->hnd, CURLOPT_URL, url);
903 curl_easy_setopt(ctxt->hnd, CURLOPT_FAILONERROR, 1L);
904 curlCommon(ctxt->hnd, 1, 1);
905 curl_easy_setopt(ctxt->hnd, CURLOPT_NOPROGRESS, 1L);
906 #if (LIBCURL_VERSION_MINOR >= 25)
907 curl_easy_setopt(ctxt->hnd, CURLOPT_TCP_KEEPALIVE, 1L);
908 #endif
909
910 if (ctxt->headers) {
911 curl_easy_setopt(ctxt->hnd, CURLOPT_HTTPHEADER, ctxt->headers);
912 }
913 curl_easy_setopt(ctxt->hnd, CURLOPT_WRITEFUNCTION, rcvData);
914 curl_easy_setopt(ctxt->hnd, CURLOPT_WRITEDATA, ctxt);
915 ctxt->mh = curl_multi_init();
916 curl_multi_add_handle(ctxt->mh, ctxt->hnd);
917
918 ctxt->current = ctxt->buf; ctxt->filled = 0; ctxt->available = FALSE;
919
920 // Establish the connection: not clear if we should do this now.
921 ctxt->sr = 1;
922 int n_err = 0;
923 while(ctxt->sr && !ctxt->available)
924 n_err += fetchData(ctxt);
925 if (n_err != 0) {
926 Curl_close(con);
927 error(_("cannot open the connection to '%s'"), url);
928 }
929
930 con->isopen = TRUE;
931 con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
932 con->canread = !con->canwrite;
933 mlen = (int) strlen(con->mode);
934 if (mlen >= 2 && con->mode[mlen - 1] == 'b') con->text = FALSE;
935 else con->text = TRUE;
936 con->save = -1000;
937 set_iconv(con);
938 return TRUE;
939 }
940
Curl_fgetc_internal(Rconnection con)941 static int Curl_fgetc_internal(Rconnection con)
942 {
943 unsigned char c;
944 size_t n = Curl_read(&c, 1, 1, con);
945 return (n == 1) ? c : R_EOF;
946 }
947 #endif
948
949
950 // 'type' is unused.
951 Rconnection
in_newCurlUrl(const char * description,const char * const mode,SEXP headers,int type)952 in_newCurlUrl(const char *description, const char * const mode,
953 SEXP headers, int type)
954 {
955 #ifdef HAVE_LIBCURL
956 Rconnection new = (Rconnection) malloc(sizeof(struct Rconn));
957 if (!new) error(_("allocation of url connection failed"));
958 new->class = (char *) malloc(strlen("url-libcurl") + 1);
959 if (!new->class) {
960 free(new);
961 error(_("allocation of url connection failed"));
962 /* for Solaris 12.5 */ new = NULL;
963 }
964 strcpy(new->class, "url-libcurl");
965 new->description = (char *) malloc(strlen(description) + 1);
966 if (!new->description) {
967 free(new->class); free(new);
968 error(_("allocation of url connection failed"));
969 /* for Solaris 12.5 */ new = NULL;
970 }
971 init_con(new, description, CE_NATIVE, mode);
972 new->canwrite = FALSE;
973 new->open = &Curl_open;
974 new->close = &Curl_close;
975 new->destroy = &Curl_destroy;
976 new->fgetc_internal = &Curl_fgetc_internal;
977 new->fgetc = &dummy_fgetc;
978 new->read = &Curl_read;
979 new->private = (void *) malloc(sizeof(struct Curlconn));
980 if (!new->private) {
981 free(new->description); free(new->class); free(new);
982 error(_("allocation of url connection failed"));
983 /* for Solaris 12.5 */ new = NULL;
984 }
985 RCurlconn ctxt = (RCurlconn) new->private;
986 ctxt->bufsize = 16 * CURL_MAX_WRITE_SIZE;
987 ctxt->buf = malloc(ctxt->bufsize);
988 if (!ctxt->buf) {
989 free(new->description); free(new->class); free(new->private);
990 free(new);
991 error(_("allocation of url connection failed"));
992 /* for Solaris 12.5 */ new = NULL;
993 }
994 ctxt->headers = NULL;
995 for (int i = 0; i < LENGTH(headers); i++) {
996 struct curl_slist *tmp =
997 curl_slist_append(ctxt->headers, CHAR(STRING_ELT(headers, i)));
998 if (!tmp) {
999 free(new->description); free(new->class); free(new->private);
1000 free(new); curl_slist_free_all(ctxt->headers);
1001 error(_("allocation of url connection failed"));
1002 /* for Solaris 12.5 */ new = NULL;
1003 }
1004 ctxt->headers = tmp;
1005 }
1006 return new;
1007 #else
1008 error(_("url(method = \"libcurl\") is not supported on this platform"));
1009 return (Rconnection)0; /* -Wall */
1010 #endif
1011 }
1012