1 /*
2  *  R : A Computer Language for Statistical Data Analysis
3  *  Copyright (C) 2015-2020 The R Core Team
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, a copy is available at
17  *  https://www.R-project.org/Licenses/
18  */
19 
20 #ifdef HAVE_CONFIG_H
21 # include <config.h>
22 #endif
23 
24 #ifdef Win32
25 # define R_USE_SIGNALS 1
26 #endif
27 #include <Defn.h>
28 #include <Internal.h>
29 #include <Fileio.h>
30 #include <errno.h>
31 
32 #ifdef HAVE_UNISTD_H
33 // for unlink
34 # include <unistd.h>
35 #endif
36 
37 #ifdef HAVE_LIBCURL
38 # include <curl/curl.h>
39 /*
40   This needed libcurl >= 7.28.0 (Oct 2012) for curl_multi_wait.
41   Substitute code is provided for a Unix-alike only.
42 
43   There is a configure test but it is not used on Windows and system
44   software can change.
45 */
46 # ifdef Win32
47 #  if LIBCURL_VERSION_MAJOR < 7 || (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR < 28)
48 #  error libcurl 7.28.0 or later is required.
49 #  endif
50 # else
51 #  if LIBCURL_VERSION_MAJOR < 7 || (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR < 22)
52 #  error libcurl 7.22.0 or later is required.
53 #  endif
54 # endif
55 extern void Rsleep(double timeint);
56 #endif
57 
58 static int current_timeout = 0;
59 
60 # if (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR < 28)
61 
62 // curl/curl.h includes <sys/select.h> and headers it requires.
63 
64 #define curl_multi_wait R_curl_multi_wait
65 
66 
67 static CURLMcode
R_curl_multi_wait(CURLM * multi_handle,void * unused,unsigned int extra,int timeout_ms,int * ret)68 R_curl_multi_wait(CURLM *multi_handle,
69 		  /* IGNORED */ void *unused,
70 		  /* IGNORED */ unsigned int extra,
71 		  int timeout_ms, int *ret)
72 {
73     fd_set fdread;
74     fd_set fdwrite;
75     fd_set fdexcep;
76     FD_ZERO(&fdread);
77     FD_ZERO(&fdwrite);
78     FD_ZERO(&fdexcep);
79 
80     struct timeval timeout;
81 
82     timeout.tv_sec = timeout_ms / 1000;
83     timeout.tv_usec = (timeout_ms % 1000) * 1000;
84 
85     int maxfd = -1;
86     CURLMcode
87 	mc = curl_multi_fdset(multi_handle, &fdread, &fdwrite, &fdexcep, &maxfd);
88     if (maxfd == -1) {
89 	*ret = 0;
90 	Rsleep(0.1);
91     } else
92 	*ret = select(maxfd+1, &fdread, &fdwrite, &fdexcep, &timeout);
93 
94     return mc;
95 }
96 #endif
97 
in_do_curlVersion(SEXP call,SEXP op,SEXP args,SEXP rho)98 SEXP attribute_hidden in_do_curlVersion(SEXP call, SEXP op, SEXP args, SEXP rho)
99 {
100     checkArity(op, args);
101     SEXP ans = PROTECT(allocVector(STRSXP, 1));
102 #ifdef HAVE_LIBCURL
103     curl_version_info_data *d = curl_version_info(CURLVERSION_NOW);
104     SET_STRING_ELT(ans, 0, mkChar(d->version));
105     SEXP sSSLVersion = install("ssl_version");
106     setAttrib(ans, sSSLVersion,
107 	      mkString(d->ssl_version ? d->ssl_version : "none"));
108     SEXP sLibSSHVersion = install("libssh_version");
109     setAttrib(ans, sLibSSHVersion,
110 	      mkString(((d->age >= 3) && d->libssh_version) ? d->libssh_version : ""));
111     const char * const *p;
112     int n, i;
113     for (p = d->protocols, n = 0; *p; p++, n++) ;
114     SEXP protocols = PROTECT(allocVector(STRSXP, n));
115     for (p = d->protocols, i = 0; i < n; i++, p++)
116 	SET_STRING_ELT(protocols, i, mkChar(*p));
117     setAttrib(ans, install("protocols"), protocols);
118     UNPROTECT(1);
119 #else
120     SET_STRING_ELT(ans, 0, mkChar(""));
121 #endif
122     UNPROTECT(1);
123     return ans;
124 }
125 
126 #ifdef HAVE_LIBCURL
http_errstr(const long status)127 static const char *http_errstr(const long status)
128 {
129     const char *str;
130     switch(status) {
131     case 400: str = "Bad Request"; break;
132     case 401: str = "Unauthorized"; break;
133     case 402: str = "Payment Required"; break;
134     case 403: str = "Forbidden"; break;
135     case 404: str = "Not Found"; break;
136     case 405: str = "Method Not Allowed"; break;
137     case 406: str = "Not Acceptable"; break;
138     case 407: str = "Proxy Authentication Required"; break;
139     case 408: str = "Request Timeout"; break;
140     case 409: str = "Conflict"; break;
141     case 410: str = "Gone"; break;
142     case 411: str = "Length Required"; break;
143     case 412: str = "Precondition Failed"; break;
144     case 413: str = "Request Entity Too Large"; break;
145     case 414: str = "Request-URI Too Long"; break;
146     case 415: str = "Unsupported Media Type"; break;
147     case 416: str = "Requested Range Not Satisfiable"; break;
148     case 417: str = "Expectation Failed"; break;
149     case 500: str = "Internal Server Error"; break;
150     case 501: str = "Not Implemented"; break;
151     case 502: str = "Bad Gateway"; break;
152     case 503: str = "Service Unavailable"; break;
153     case 504: str = "Gateway Timeout"; break;
154     default: str = "Unknown Error"; break;
155     }
156     return str;
157 }
158 
ftp_errstr(const long status)159 static const char *ftp_errstr(const long status)
160 {
161     const char *str;
162     switch (status) {
163     case 421: str = "Service not available, closing control connection"; break;
164     case 425: str = "Cannot open data connection"; break;
165     case 426: str = "Connection closed; transfer aborted"; break;
166     case 430: str = "Invalid username or password"; break;
167     case 434: str = "Requested host unavailable"; break;
168     case 450: str = "Requested file action not taken"; break;
169     case 451: str = "Requested action aborted; local error in processing"; break;
170     case 452:
171 	str = "Requested action not taken; insufficient storage space in system";
172 	break;
173     case 501: str = "Syntax error in parameters or arguments"; break;
174     case 502: str = "Command not implemented"; break;
175     case 503: str = "Bad sequence of commands"; break;
176     case 504: str = "Command not implemented for that parameter"; break;
177     case 530: str = "Not logged in"; break;
178     case 532: str = "Need account for storing files"; break;
179     case 550:
180 	str = "Requested action not taken; file unavailable";
181 	break;
182     case 551: str = "Requested action aborted; page type unknown"; break;
183     case 552:
184 	str = "Requested file action aborted; exceeded storage allocation";
185 	break;
186     case 553: str = "Requested action not taken; file name not allowed"; break;
187     default: str = "Unknown Error"; break;
188     }
189     return str;
190 }
191 
192 /*
193   Check curl_multi_info_read for errors, reporting as warnings
194 
195   Return: number of errors encountered
196  */
curlMultiCheckerrs(CURLM * mhnd)197 static int curlMultiCheckerrs(CURLM *mhnd)
198 {
199     int retval = 0;
200     for(int n = 1; n > 0;) {
201 	CURLMsg *msg = curl_multi_info_read(mhnd, &n);
202 	if (msg && (msg->data.result != CURLE_OK)) {
203 	    const char *url, *strerr, *type;
204 	    long status = 0;
205 	    curl_easy_getinfo(msg->easy_handle, CURLINFO_EFFECTIVE_URL, &url);
206 	    curl_easy_getinfo(msg->easy_handle, CURLINFO_RESPONSE_CODE,
207 			      &status);
208 	    // This reports the redirected URL
209 	    if (status >= 400) {
210 		if (url && url[0] == 'h') {
211 		    strerr = http_errstr(status);
212 		    type = "HTTP";
213 		} else {
214 		    strerr = ftp_errstr(status);
215 		    type = "FTP";
216 		}
217 		warning(_("cannot open URL '%s': %s status was '%d %s'"),
218 			url, type, status, strerr);
219 	    } else {
220 		strerr = curl_easy_strerror(msg->data.result);
221 		if (streql(strerr, "Timeout was reached"))
222 		    warning(_("URL '%s': Timeout of %d seconds was reached"),
223 			    url, current_timeout);
224 		else
225 		    warning(_("URL '%s': status was '%s'"), url, strerr);
226 	    }
227 	    retval++;
228 	}
229     }
230     return retval;
231 }
curlCommon(CURL * hnd,int redirect,int verify)232 static void curlCommon(CURL *hnd, int redirect, int verify)
233 {
234     const char *capath = getenv("CURL_CA_BUNDLE");
235     if (verify) {
236 	if (capath && capath[0])
237 	    curl_easy_setopt(hnd, CURLOPT_CAINFO, capath);
238 #ifdef Win32
239 	else
240 	    curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYPEER, 0L);
241 #endif
242     } else {
243 	curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYHOST, 0L);
244 	curl_easy_setopt(hnd, CURLOPT_SSL_VERIFYPEER, 0L);
245     }
246 #if 0
247     // for consistency, but all utils:::makeUserAgent does is look up an option.
248     SEXP sMakeUserAgent = install("makeUserAgent");
249     SEXP agentFun = PROTECT(lang2(sMakeUserAgent, ScalarLogical(0)));
250     SEXP utilsNS = PROTECT(R_FindNamespace(mkString("utils")));
251     SEXP sua = eval(agentFun, utilsNS);
252     UNPROTECT(1); /* utilsNS */
253     PROTECT(sua);
254     if(TYPEOF(sua) != NILSXP)
255 	curl_easy_setopt(hnd, CURLOPT_USERAGENT, CHAR(STRING_ELT(sua, 0)));
256     UNPROTECT(2);
257 #else
258     int Default = 1;
259     SEXP sua = GetOption1(install("HTTPUserAgent")); // set in utils startup
260     if (TYPEOF(sua) == STRSXP && LENGTH(sua) == 1 ) {
261 	const char *p = CHAR(STRING_ELT(sua, 0));
262 	if (p[0] && p[1] && p[2] && p[0] == 'R' && p[1] == ' ' && p[2] == '(') {
263 	} else {
264 	    Default = 0;
265 	    curl_easy_setopt(hnd, CURLOPT_USERAGENT, p);
266 	}
267     }
268     if (Default) {
269 	char buf[20];
270 	curl_version_info_data *d = curl_version_info(CURLVERSION_NOW);
271 	snprintf(buf, 20, "libcurl/%s", d->version);
272 	curl_easy_setopt(hnd, CURLOPT_USERAGENT, buf);
273     }
274 #endif
275     int timeout0 = asInteger(GetOption1(install("timeout")));
276     long timeout = (timeout0 == NA_INTEGER) ? 0 : (1000L * timeout0);
277     current_timeout = (timeout0 == NA_INTEGER) ? 0 : timeout0;
278     curl_easy_setopt(hnd, CURLOPT_CONNECTTIMEOUT_MS, timeout);
279     curl_easy_setopt(hnd, CURLOPT_TIMEOUT_MS, timeout);
280     if (redirect) {
281 	curl_easy_setopt(hnd, CURLOPT_FOLLOWLOCATION, 1L);
282 	curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 20L);
283     }
284     int verbosity = asInteger(GetOption1(install("internet.info")));
285     if (verbosity < 2) curl_easy_setopt(hnd, CURLOPT_VERBOSE, 1L);
286 
287     // enable the cookie engine, keep cookies in memory
288     curl_easy_setopt(hnd, CURLOPT_COOKIEFILE, "");
289 }
290 
291 static char headers[500][2049]; // allow for terminator
292 static int used;
293 
294 static size_t
rcvHeaders(void * buffer,size_t size,size_t nmemb,void * userp)295 rcvHeaders(void *buffer, size_t size, size_t nmemb, void *userp)
296 {
297     char *d = (char*)buffer;
298     size_t result = size * nmemb, res = result > 2048 ? 2048 : result;
299     if (used >= 500) return result;
300     strncpy(headers[used], d, res);
301     // 'Do not assume that the header line is zero terminated!'
302     headers[used][res] = '\0';
303     used++;
304     return result;
305 }
306 
307 static size_t
rcvBody(void * buffer,size_t size,size_t nmemb,void * userp)308 rcvBody(void *buffer, size_t size, size_t nmemb, void *userp)
309 {
310     // needed to discard spurious ftp 'body' otherwise written to stdout
311     return size * nmemb;
312 }
313 #endif
314 
315 
316 SEXP attribute_hidden
in_do_curlGetHeaders(SEXP call,SEXP op,SEXP args,SEXP rho)317 in_do_curlGetHeaders(SEXP call, SEXP op, SEXP args, SEXP rho)
318 {
319     checkArity(op, args);
320 #ifndef HAVE_LIBCURL
321     error(_("curlGetHeaders is not supported on this platform"));
322     return R_NilValue;
323 #else
324     if (!isString(CAR(args)) || LENGTH(CAR(args)) != 1)
325        error("invalid %s argument", "url");
326     const char *url = translateChar(STRING_ELT(CAR(args), 0));
327     used = 0;
328     int redirect = asLogical(CADR(args));
329     if (redirect == NA_LOGICAL)
330 	error(_("invalid %s argument"), "redirect");
331     int verify = asLogical(CADDR(args));
332     if (verify == NA_LOGICAL)
333 	error(_("invalid %s argument"), "verify");
334     int timeout = asInteger(CADDDR(args));
335     if (timeout == NA_INTEGER)
336 	error(_("invalid %s argument"), "timeout");
337     SEXP sTLS = CAD4R(args);
338     const char *TLS = "";
339     if (isString(sTLS) && LENGTH(sTLS) == 1 && STRING_ELT(sTLS, 0) != NA_STRING)
340 	TLS = translateChar(STRING_ELT(sTLS, 0));
341     else error(_("invalid %s argument"), "TLS");
342 
343     CURL *hnd = curl_easy_init();
344     curl_easy_setopt(hnd, CURLOPT_URL, url);
345     curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L);
346     curl_easy_setopt(hnd, CURLOPT_NOBODY, 1L);
347     curl_easy_setopt(hnd, CURLOPT_HEADERFUNCTION, &rcvHeaders);
348     curl_easy_setopt(hnd, CURLOPT_WRITEHEADER, &headers);
349     /* libcurl (at least 7.40.0) does not respect CURLOPT_NOBODY
350        for some ftp header info (Content-Length and Accept-ranges). */
351     curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, &rcvBody);
352     curlCommon(hnd, redirect, verify);
353     if (timeout > 0) {
354 	curl_easy_setopt(hnd, CURLOPT_TIMEOUT, timeout);
355 	current_timeout = timeout;
356     }
357     if (!streql(TLS, "")) {
358 	// 7.34.0 was released 2013-12-17
359 #if LIBCURL_VERSION_MAJOR > 7 || (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 34)
360 	long TLS_ver = CURL_SSLVERSION_TLSv1_0;
361 	if (streql(TLS, "1.0")) TLS_ver = CURL_SSLVERSION_TLSv1_0;
362 	else if (streql(TLS, "1.1")) TLS_ver = CURL_SSLVERSION_TLSv1_1;
363 	else if (streql(TLS, "1.2")) TLS_ver = CURL_SSLVERSION_TLSv1_2;
364 # if LIBCURL_VERSION_MAJOR > 7 ||  (LIBCURL_VERSION_MAJOR == 7 && LIBCURL_VERSION_MINOR >= 52)
365 	else if (streql(TLS, "1.3")) TLS_ver = CURL_SSLVERSION_TLSv1_3;
366 # endif
367 	else error(_("invalid %s argument"), "TLS");
368 	curl_easy_setopt(hnd, CURLOPT_SSLVERSION, TLS_ver);
369 # else
370 	error("TLS argument is unsupported in this libcurl version %d.%d",
371 	      LIBCURL_VERSION_MAJOR, LIBCURL_VERSION_MINOR);
372 #endif
373     }
374 
375     char errbuf[CURL_ERROR_SIZE];
376     curl_easy_setopt(hnd, CURLOPT_ERRORBUFFER, errbuf);
377     // libcurl does not initialize this
378     errbuf[0] = '\0';
379     CURLcode ret = curl_easy_perform(hnd);
380     if (ret != CURLE_OK) {
381 	if (errbuf[0])
382 	    error(_("libcurl error code %d:\n\t%s\n"), ret, errbuf);
383 	else if(ret == 77)
384 	    error(_("libcurl error code %d:\n\t%s\n"), ret,
385 		  "unable to access SSL/TLS CA certificates");
386 	else // rare case, error but no message
387 	    error("libcurl error code %d\n", ret);
388     }
389     long http_code = 0;
390     curl_easy_getinfo (hnd, CURLINFO_RESPONSE_CODE, &http_code);
391     curl_easy_cleanup(hnd);
392 
393     SEXP ans = PROTECT(allocVector(STRSXP, used));
394     for (int i = 0; i < used; i++)
395 	SET_STRING_ELT(ans, i, mkChar(headers[i]));
396     SEXP sStatus = install("status");
397     setAttrib(ans, sStatus, ScalarInteger((int) http_code));
398     UNPROTECT(1);
399     return ans;
400 #endif
401 }
402 
403 #ifdef HAVE_LIBCURL
404 static double total;
405 
406 static int ndashes;
putdashes(int * pold,int new)407 static void putdashes(int *pold, int new)
408 {
409     for (int i = *pold; i < new; i++)  REprintf("=");
410     if (R_Consolefile) fflush(R_Consolefile);
411     *pold = new;
412 }
413 
414 # ifdef Win32
415 // ------- Windows progress bar -----------
416 #include <ga.h>
417 
418 /* We could share this window with internet.c, then re-positioning
419    would apply to both */
420 typedef struct {
421     window wprog;
422     progressbar pb;
423     label l_url;
424     RCNTXT cntxt;
425     int pc;
426 } winprogressbar;
427 
428 static winprogressbar pbar = {NULL, NULL, NULL};
429 
doneprogressbar(void * data)430 static void doneprogressbar(void *data)
431 {
432     winprogressbar *pbar = data;
433     hide(pbar->wprog);
434 }
435 
436 static
progress(void * clientp,double dltotal,double dlnow,double ultotal,double ulnow)437 int progress(void *clientp, double dltotal, double dlnow,
438 	     double ultotal, double ulnow)
439 {
440     static int factor = 1;
441     // we only use downloads.  dltotal may be zero.
442     if (dltotal > 0.) {
443 	if (total == 0.) {
444 	    total = dltotal;
445 	    char *type = NULL;
446 	    CURL *hnd = (CURL *) clientp;
447 	    curl_easy_getinfo(hnd, CURLINFO_CONTENT_TYPE, &type);
448 	    if (total > 1024.0*1024.0)
449 		// might be longer than long, and is on 64-bit windows
450 		REprintf(" length %0.0f bytes (%0.1f MB)\n",
451 			 total, total/1024.0/1024.0);
452 	    else if (total > 10240)
453 		REprintf("Content length %d bytes (%d KB)\n",
454 			 (int)total, (int)(total/1024));
455 	    else
456 		REprintf("Content length %d bytes\n", (int)total);
457 	    R_FlushConsole();
458 	    if(R_Interactive) {
459 		if (total > 1e9) factor = total/1e6; else factor = 1;
460 		setprogressbarrange(pbar.pb, 0, total/factor);
461 		show(pbar.wprog);
462 	    }
463 	}
464 	if (R_Interactive) {
465 	    setprogressbar(pbar.pb, dlnow/factor);
466 	    if (total > 0) {
467 		static char pbuf[30];
468 		int pc = 0.499 + 100.0*dlnow/total;
469 		if (pc > pbar.pc) {
470 		    snprintf(pbuf, 30, "%d%% downloaded", pc);
471 		    settext(pbar.wprog, pbuf);
472 		    pbar.pc = pc;
473 		}
474 	    }
475 	} else putdashes(&ndashes, (int)(50*dlnow/total));
476     }
477     R_ProcessEvents();
478     return 0;
479 }
480 
481 # else
482 // ------- Unix-alike progress bar -----------
483 
484 static
progress(void * clientp,double dltotal,double dlnow,double ultotal,double ulnow)485 int progress(void *clientp, double dltotal, double dlnow,
486 	     double ultotal, double ulnow)
487 {
488     CURL *hnd = (CURL *) clientp;
489     long status;
490     curl_easy_getinfo(hnd, CURLINFO_RESPONSE_CODE, &status);
491 
492     // we only use downloads.  dltotal may be zero.
493     if ((status < 300) && (dltotal > 0.)) {
494 	if (total == 0.) {
495 	    total = dltotal;
496 	    char *type = NULL;
497 	    curl_easy_getinfo(hnd, CURLINFO_CONTENT_TYPE, &type);
498 	    REprintf("Content type '%s'", type ? type : "unknown");
499 	    if (total > 1024.0*1024.0)
500 		// might be longer than long, and is on 64-bit windows
501 		REprintf(" length %0.0f bytes (%0.1f MB)\n",
502 			 total, total/1024.0/1024.0);
503 	    else if (total > 10240)
504 		REprintf(" length %d bytes (%d KB)\n",
505 			 (int)total, (int)(total/1024));
506 	    else
507 		REprintf(" length %d bytes\n", (int)total);
508 	    if (R_Consolefile) fflush(R_Consolefile);
509 	}
510 	putdashes(&ndashes, (int)(50*dlnow/total));
511     }
512     return 0;
513 }
514 # endif // Win32
515 #endif // HAVE_LIBCURL
516 
517 /* download(url, destfile, quiet, mode, headers, cacheOK) */
518 
519 SEXP attribute_hidden
in_do_curlDownload(SEXP call,SEXP op,SEXP args,SEXP rho)520 in_do_curlDownload(SEXP call, SEXP op, SEXP args, SEXP rho)
521 {
522     checkArity(op, args);
523 #ifndef HAVE_LIBCURL
524     error(_("download.file(method = \"libcurl\") is not supported on this platform"));
525     return R_NilValue;
526 #else
527     SEXP scmd, sfile, smode, sheaders;
528     const char *url, *file, *mode;
529     int quiet, cacheOK;
530     struct curl_slist *headers = NULL;
531 
532     scmd = CAR(args); args = CDR(args);
533     if (!isString(scmd) || length(scmd) < 1)
534 	error(_("invalid '%s' argument"), "url");
535     int nurls = length(scmd);
536     sfile = CAR(args); args = CDR(args);
537     if (!isString(sfile) || length(sfile) < 1)
538 	error(_("invalid '%s' argument"), "destfile");
539     if (length(sfile) != length(scmd))
540 	error(_("lengths of 'url' and 'destfile' must match"));
541     quiet = asLogical(CAR(args)); args = CDR(args);
542     if (quiet == NA_LOGICAL)
543 	error(_("invalid '%s' argument"), "quiet");
544     smode =  CAR(args); args = CDR(args);
545     if (!isString(smode) || length(smode) != 1)
546 	error(_("invalid '%s' argument"), "mode");
547     mode = CHAR(STRING_ELT(smode, 0));
548     cacheOK = asLogical(CAR(args)); args = CDR(args);
549     if (cacheOK == NA_LOGICAL)
550 	error(_("invalid '%s' argument"), "cacheOK");
551     sheaders = CAR(args);
552     if(TYPEOF(sheaders) != NILSXP && !isString(sheaders))
553 	error(_("invalid '%s' argument"), "headers");
554     if(TYPEOF(sheaders) != NILSXP) {
555 	for (int i = 0; i < LENGTH(sheaders); i++) {
556 	    struct curl_slist *tmp =
557 		curl_slist_append(headers, CHAR(STRING_ELT(sheaders, i)));
558 	    if (!tmp) {
559 		curl_slist_free_all(headers);
560 		error(_("out of memory"));
561 	    }
562 	    headers = tmp;
563 	}
564     }
565 
566     /* This comes mainly from curl --libcurl on the call used by
567        download.file(method = "curl").
568        Also https://curl.haxx.se/libcurl/c/multi-single.html.
569     */
570 
571     if (!cacheOK) {
572 	/* This _is_ the right way to do this: see §14.9 of
573 	   http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html */
574 	struct curl_slist *tmp =
575 	    curl_slist_append(headers, "Pragma: no-cache");
576 	if(!tmp) {
577 	    curl_slist_free_all(headers);
578 	    error(_("out of memory"));
579 	}
580 	headers = tmp;
581     }
582 
583     CURLM *mhnd = curl_multi_init();
584     int still_running, repeats = 0, n_err = 0;
585     CURL **hnd[nurls];
586     FILE *out[nurls];
587 
588     for(int i = 0; i < nurls; i++) {
589 	url = CHAR(STRING_ELT(scmd, i));
590 	hnd[i] = curl_easy_init();
591 	curl_easy_setopt(hnd[i], CURLOPT_URL, url);
592 	curl_easy_setopt(hnd[i], CURLOPT_FAILONERROR, 1L);
593 	/* Users will normally expect to follow redirections, although
594 	   that is not the default in either curl or libcurl. */
595 	curlCommon(hnd[i], 1, 1);
596 #if (LIBCURL_VERSION_MINOR >= 25)
597 	curl_easy_setopt(hnd[i], CURLOPT_TCP_KEEPALIVE, 1L);
598 #endif
599 	curl_easy_setopt(hnd[i], CURLOPT_HTTPHEADER, headers);
600 
601 	/* check that destfile can be written */
602 	file = translateChar(STRING_ELT(sfile, i));
603 	out[i] = R_fopen(R_ExpandFileName(file), mode);
604 	if (!out[i]) {
605 	    n_err += 1;
606 	    warning(_("URL %s: cannot open destfile '%s', reason '%s'"),
607 		    url, file, strerror(errno));
608 	    if (nurls == 1) break; else continue;
609 	} else {
610 	    // This uses the internal CURLOPT_WRITEFUNCTION
611 	    curl_easy_setopt(hnd[i], CURLOPT_WRITEDATA, out[i]);
612 	    curl_multi_add_handle(mhnd, hnd[i]);
613 	}
614 
615 	total = 0.;
616 	if (!quiet && nurls <= 1) {
617 	    // It would in principle be possible to have
618 	    // multiple progress bars on Windows.
619 	    curl_easy_setopt(hnd[i], CURLOPT_NOPROGRESS, 0L);
620 	    ndashes = 0;
621 #ifdef Win32
622 	    if (R_Interactive) {
623 		if (!pbar.wprog) {
624 		    pbar.wprog = newwindow(_("Download progress"),
625 					   rect(0, 0, 540, 100),
626 					   Titlebar | Centered);
627 		    setbackground(pbar.wprog, dialog_bg());
628 		    pbar.l_url = newlabel(" ", rect(10, 15, 520, 25),
629 					  AlignCenter);
630 		    pbar.pb = newprogressbar(rect(20, 50, 500, 20),
631 					     0, 1024, 1024, 1);
632 		    pbar.pc = 0;
633 		}
634 
635 		settext(pbar.l_url, url);
636 		setprogressbar(pbar.pb, 0);
637 		settext(pbar.wprog, "Download progress");
638 		show(pbar.wprog);
639 		begincontext(&(pbar.cntxt), CTXT_CCODE, R_NilValue, R_NilValue,
640 			     R_NilValue, R_NilValue, R_NilValue);
641 		pbar.cntxt.cend = &doneprogressbar;
642 		pbar.cntxt.cenddata = &pbar;
643 	    }
644 #endif
645 	    // For libcurl >= 7.32.0 use CURLOPT_XFERINFOFUNCTION
646 	    curl_easy_setopt(hnd[i], CURLOPT_PROGRESSFUNCTION, progress);
647 	    curl_easy_setopt(hnd[i], CURLOPT_PROGRESSDATA, hnd[i]);
648 	} else curl_easy_setopt(hnd[i], CURLOPT_NOPROGRESS, 1L);
649 
650 	/* This would allow the negotiation of compressed HTTP transfers,
651 	   but it is not clear it is always a good idea.
652 	   curl_easy_setopt(hnd[i], CURLOPT_ACCEPT_ENCODING, "gzip, deflate");
653 	*/
654 
655 	if (!quiet) REprintf(_("trying URL '%s'\n"), url);
656     }
657 
658     if (n_err == nurls) {
659 	// no dest files could be opened, so bail out
660 	curl_multi_cleanup(mhnd);
661 	return ScalarInteger(1);
662     }
663 
664     R_Busy(1);
665     //  curl_multi_wait needs curl >= 7.28.0 .
666     curl_multi_perform(mhnd, &still_running);
667     do {
668 	int numfds;
669 	CURLMcode mc = curl_multi_wait(mhnd, NULL, 0, 100, &numfds);
670 	if (mc != CURLM_OK)  { // internal, do not translate
671 	    warning("curl_multi_wait() failed, code %d", mc);
672 	    break;
673 	}
674 	if (!numfds) {
675 	    /* 'numfds' being zero means either a timeout or no file
676 	       descriptors to wait for. Try timeout on first
677 	       occurrence, then assume no file descriptors to wait for
678 	       means 'sleep for 100 milliseconds'.
679 	    */
680 	    if (repeats++ > 0) Rsleep(0.1); // do not block R process
681 	} else repeats = 0;
682 	R_ProcessEvents();
683 	curl_multi_perform(mhnd, &still_running);
684     } while(still_running);
685     R_Busy(0);
686 #ifdef Win32
687     if (R_Interactive && !quiet && nurls<=1) {
688 	endcontext(&(pbar.cntxt));
689 	doneprogressbar(&pbar);
690     } else if (total > 0.) {
691 	REprintf("\n");
692 	R_FlushConsole();
693     }
694 #else
695     if (total > 0.) REprintf("\n");
696     if (R_Consolefile) fflush(R_Consolefile);
697 #endif
698     if (nurls == 1) {
699 	long status;
700 	curl_easy_getinfo(hnd[0], CURLINFO_RESPONSE_CODE, &status);
701 	double cl, dl;
702 	curl_easy_getinfo(hnd[0], CURLINFO_SIZE_DOWNLOAD, &dl);
703 	if (!quiet && status == 200) {
704 	    if (dl > 1024*1024)
705 		REprintf("downloaded %0.1f MB\n\n", (double)dl/1024/1024);
706 	    else if (dl > 10240)
707 		REprintf("downloaded %d KB\n\n", (int) dl/1024);
708 	    else
709 		REprintf("downloaded %d bytes\n\n", (int) dl);
710 	}
711 	curl_easy_getinfo(hnd[0], CURLINFO_CONTENT_LENGTH_DOWNLOAD, &cl);
712 	if (cl >= 0 && dl != cl)
713 	    warning(_("downloaded length %0.f != reported length %0.f"), dl, cl);
714     }
715 
716     n_err += curlMultiCheckerrs(mhnd);
717 
718     long status = 0L;
719     for (int i = 0; i < nurls; i++) {
720 	if (out[i]) {
721 	    fclose(out[i]);
722 	    double dl;
723 	    curl_easy_getinfo(hnd[i], CURLINFO_SIZE_DOWNLOAD, &dl);
724 	    curl_easy_getinfo(hnd[i], CURLINFO_RESPONSE_CODE, &status);
725 	    // should we do something about incomplete transfers?
726 	    if (status != 200 && dl == 0. && strchr(mode, 'w'))
727 		unlink(R_ExpandFileName(translateChar(STRING_ELT(sfile, i))));
728 	}
729 	curl_multi_remove_handle(mhnd, hnd[i]);
730 	curl_easy_cleanup(hnd[i]);
731     }
732     curl_multi_cleanup(mhnd);
733     curl_slist_free_all(headers);
734 
735     if(nurls > 1) {
736 	if (n_err == nurls) error(_("cannot download any files"));
737 	else if (n_err) warning(_("some files were not downloaded"));
738     } else if(n_err) {
739 	if (status != 200)
740 	    error(_("cannot open URL '%s'"), CHAR(STRING_ELT(scmd, 0)));
741 	else
742 	    error(_("download from '%s' failed"), CHAR(STRING_ELT(scmd, 0)));
743     }
744 
745     return ScalarInteger(0);
746 #endif
747 }
748 
749 /* -------------------------- connections part ------------------------*/
750 
751 /* Unfortunately the libcurl interface is not well adapted to reading
752    data in user-requested chunks.
753 
754    But it does read in up to CURL_MAX_WRITE_SIZE chunks, which is 16K.
755    So we implement a buffer which holds two chunks, and when what we
756    have is not enough we move down what it left and fetch another
757    chunk above it.  For safety, the buffer is expandable but this
758    should not be exercised.
759 
760    It seems that expanding was being done by a couple of packages and
761    gave a use-after-free error with libcurl 7.64.0.  So initial size
762    increased to 16x.
763 
764    An alternative design would be for consumeData to return what is
765    available and reset current.  Then rcvData would only be called on
766    a completely empty buffer.
767  */
768 
769 #include <Rconnections.h>
770 
771 #define R_MIN(a, b) ((a) < (b) ? (a) : (b))
772 
773 #ifdef HAVE_LIBCURL
774 typedef struct Curlconn {
775     char *buf, *current; // base of buffer, last read address
776     size_t bufsize, filled;  // buffer size, amount which has been filled
777     Rboolean available; // to be read out
778     int sr; // 'still running' count
779     CURLM *mh; CURL *hnd;
780     struct curl_slist *headers;
781 } *RCurlconn;
782 
rcvData(void * ptr,size_t size,size_t nitems,void * ctx)783 static size_t rcvData(void *ptr, size_t size, size_t nitems, void *ctx)
784 {
785     RCurlconn ctxt = (RCurlconn) ctx;
786 
787     /* move down any unused data: can overlap */
788     if (ctxt->filled) memmove(ctxt->buf, ctxt->current, ctxt->filled);
789 
790     size_t add = size * nitems;
791     if (add) {
792 	/* Allocate more space if required: unlikely.
793 	   Do so as an integer multiple of the current size.
794 	 */
795 	if (ctxt->filled + add > ctxt->bufsize) {
796 	    int mult = (int) ceil((double)(ctxt->filled + add)/ctxt->bufsize);
797 	    size_t newbufsize = mult * ctxt->bufsize;
798 	    void *newbuf = realloc(ctxt->buf, newbufsize);
799 	    if (!newbuf) error("Failure in re-allocation in rcvData");
800 	    ctxt->buf = newbuf; ctxt->bufsize = newbufsize;
801 	}
802 
803 	memcpy(ctxt->buf + ctxt->filled, ptr, add);
804 	ctxt->filled += add;
805 	ctxt->available = TRUE;
806     }
807     ctxt->current = ctxt->buf;
808     return add;
809 }
810 
consumeData(void * ptr,size_t max,RCurlconn ctxt)811 static size_t consumeData(void *ptr, size_t max, RCurlconn ctxt)
812 {
813     size_t size = R_MIN(ctxt->filled, max);  // guaranteed > 0
814     memcpy(ptr, ctxt->current, size);
815     ctxt->current += size; ctxt->filled -= size;
816     return size;
817 }
818 
819 /*
820   return: number of errors encountered
821  */
fetchData(RCurlconn ctxt)822 static int fetchData(RCurlconn ctxt)
823 {
824     int repeats = 0;
825     CURLM *mhnd = ctxt->mh;
826 
827     do {
828 	int numfds;
829 	CURLMcode mc = curl_multi_wait(mhnd, NULL, 0, 100, &numfds);
830 	if (mc != CURLM_OK) {
831 	    warning("curl_multi_wait() failed, code %d", mc);
832 	    break;
833 	}
834 	if (!numfds) {
835 	    if (repeats++ > 0) Rsleep(0.1);
836 	} else repeats = 0;
837 	curl_multi_perform(mhnd, &ctxt->sr);
838 	if (ctxt->available) break;
839 	R_ProcessEvents();
840     } while(ctxt->sr);
841 
842     return curlMultiCheckerrs(mhnd);
843 }
844 
Curl_close(Rconnection con)845 static void Curl_close(Rconnection con)
846 {
847     RCurlconn ctxt = (RCurlconn)(con->private);
848 
849     curl_slist_free_all(ctxt->headers);
850     curl_multi_remove_handle(ctxt->mh, ctxt->hnd);
851     curl_easy_cleanup(ctxt->hnd);
852     curl_multi_cleanup(ctxt->mh);
853     con->isopen = FALSE;
854 }
855 
Curl_destroy(Rconnection con)856 static void Curl_destroy(Rconnection con)
857 {
858     RCurlconn ctxt;
859 
860     if (NULL == con)
861 	return;
862     ctxt = (RCurlconn)(con->private);
863 
864     if (NULL == ctxt)
865 	return;
866 
867     free(ctxt->buf);
868     free(ctxt);
869 }
870 
Curl_read(void * ptr,size_t size,size_t nitems,Rconnection con)871 static size_t Curl_read(void *ptr, size_t size, size_t nitems,
872 			Rconnection con)
873 {
874     RCurlconn ctxt = (RCurlconn)(con->private);
875     size_t nbytes = size*nitems;
876     char *p = (char *) ptr;
877     size_t total = consumeData(ptr, nbytes, ctxt);
878     int n_err = 0;
879     while((total < nbytes) && ctxt->sr) {
880 	n_err += fetchData(ctxt);
881 	total += consumeData(p + total, (nbytes - total), ctxt);
882     }
883     if (n_err != 0) {
884 	Curl_close(con);
885 	error(_("cannot read from connection"), n_err);
886     }
887     return total/size;
888 }
889 
Curl_open(Rconnection con)890 static Rboolean Curl_open(Rconnection con)
891 {
892     char *url = con->description;
893     RCurlconn ctxt = (RCurlconn)(con->private);
894     int mlen;
895 
896     if (con->mode[0] != 'r') {
897 	REprintf("can only open URLs for reading");
898 	return FALSE;
899     }
900 
901     ctxt->hnd = curl_easy_init();
902     curl_easy_setopt(ctxt->hnd, CURLOPT_URL, url);
903     curl_easy_setopt(ctxt->hnd, CURLOPT_FAILONERROR, 1L);
904     curlCommon(ctxt->hnd, 1, 1);
905     curl_easy_setopt(ctxt->hnd, CURLOPT_NOPROGRESS, 1L);
906 #if (LIBCURL_VERSION_MINOR >= 25)
907     curl_easy_setopt(ctxt->hnd, CURLOPT_TCP_KEEPALIVE, 1L);
908 #endif
909 
910     if (ctxt->headers) {
911 	curl_easy_setopt(ctxt->hnd, CURLOPT_HTTPHEADER, ctxt->headers);
912     }
913     curl_easy_setopt(ctxt->hnd, CURLOPT_WRITEFUNCTION, rcvData);
914     curl_easy_setopt(ctxt->hnd, CURLOPT_WRITEDATA, ctxt);
915     ctxt->mh = curl_multi_init();
916     curl_multi_add_handle(ctxt->mh, ctxt->hnd);
917 
918     ctxt->current = ctxt->buf; ctxt->filled = 0; ctxt->available = FALSE;
919 
920     // Establish the connection: not clear if we should do this now.
921     ctxt->sr = 1;
922     int n_err = 0;
923     while(ctxt->sr && !ctxt->available)
924 	n_err += fetchData(ctxt);
925     if (n_err != 0) {
926 	Curl_close(con);
927 	error(_("cannot open the connection to '%s'"), url);
928     }
929 
930     con->isopen = TRUE;
931     con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
932     con->canread = !con->canwrite;
933     mlen = (int) strlen(con->mode);
934     if (mlen >= 2 && con->mode[mlen - 1] == 'b') con->text = FALSE;
935     else con->text = TRUE;
936     con->save = -1000;
937     set_iconv(con);
938     return TRUE;
939 }
940 
Curl_fgetc_internal(Rconnection con)941 static int Curl_fgetc_internal(Rconnection con)
942 {
943     unsigned char c;
944     size_t n = Curl_read(&c, 1, 1, con);
945     return (n == 1) ? c : R_EOF;
946 }
947 #endif
948 
949 
950 // 'type' is unused.
951 Rconnection
in_newCurlUrl(const char * description,const char * const mode,SEXP headers,int type)952 in_newCurlUrl(const char *description, const char * const mode,
953 	      SEXP headers, int type)
954 {
955 #ifdef HAVE_LIBCURL
956     Rconnection new = (Rconnection) malloc(sizeof(struct Rconn));
957     if (!new) error(_("allocation of url connection failed"));
958     new->class = (char *) malloc(strlen("url-libcurl") + 1);
959     if (!new->class) {
960 	free(new);
961 	error(_("allocation of url connection failed"));
962 	/* for Solaris 12.5 */ new = NULL;
963     }
964     strcpy(new->class, "url-libcurl");
965     new->description = (char *) malloc(strlen(description) + 1);
966     if (!new->description) {
967 	free(new->class); free(new);
968 	error(_("allocation of url connection failed"));
969 	/* for Solaris 12.5 */ new = NULL;
970     }
971     init_con(new, description, CE_NATIVE, mode);
972     new->canwrite = FALSE;
973     new->open = &Curl_open;
974     new->close = &Curl_close;
975     new->destroy = &Curl_destroy;
976     new->fgetc_internal = &Curl_fgetc_internal;
977     new->fgetc = &dummy_fgetc;
978     new->read = &Curl_read;
979     new->private = (void *) malloc(sizeof(struct Curlconn));
980     if (!new->private) {
981 	free(new->description); free(new->class); free(new);
982 	error(_("allocation of url connection failed"));
983 	/* for Solaris 12.5 */ new = NULL;
984     }
985     RCurlconn ctxt = (RCurlconn) new->private;
986     ctxt->bufsize = 16 * CURL_MAX_WRITE_SIZE;
987     ctxt->buf = malloc(ctxt->bufsize);
988     if (!ctxt->buf) {
989 	free(new->description); free(new->class); free(new->private);
990 	free(new);
991 	error(_("allocation of url connection failed"));
992 	/* for Solaris 12.5 */ new = NULL;
993     }
994     ctxt->headers = NULL;
995     for (int i = 0; i < LENGTH(headers); i++) {
996 	struct curl_slist *tmp =
997 	    curl_slist_append(ctxt->headers, CHAR(STRING_ELT(headers, i)));
998 	if (!tmp) {
999 	    free(new->description); free(new->class); free(new->private);
1000 	    free(new); curl_slist_free_all(ctxt->headers);
1001 	    error(_("allocation of url connection failed"));
1002 	    /* for Solaris 12.5 */ new = NULL;
1003 	}
1004 	ctxt->headers = tmp;
1005     }
1006     return new;
1007 #else
1008     error(_("url(method = \"libcurl\") is not supported on this platform"));
1009     return (Rconnection)0; /* -Wall */
1010 #endif
1011 }
1012