1 /*
2  *  R : A Computer Language for Statistical Data Analysis
3  *  Copyright (C) 2000-2021   The R Core Team.
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, a copy is available at
17  *  https://www.R-project.org/Licenses/
18  */
19 
20 /* Notes on so-called 'Large File Support':
21 
22    The C stdio functions such as fseek and ftell are defined using
23    'long' for file positioning: also fread/fwrite use size_t for size
24    and number of items.  (The latter can cause problems with
25    reading/writing large blocks, but not in R.)  POSIX introduced
26    off_t and fseeko/ftello to allow larger file sizes, since 'long'
27    may limit file positioning to 2GB.  (C99 introduced fpos_t and
28    f[gs]etpos.)
29 
30    Note that the issue really only arises if 'long' is 32-bit, which
31    is not the case on all known 64-bit platforms except Windows.
32    However, off_t (defined in sys/types.h) is itself often 32-bit,
33    which has led to workarounds.  On Linux systems, the macros
34 
35    __USE_FILE_OFFSET64
36    __USE_LARGEFILE64
37 
38    select between __off_t and __off64_t.  Since these are different
39    types, the functions using them have to be remapped, and the
40    __off64_t versions call fopen64, fseeko64, ftello64 and so on.
41 
42    These macros are not intended to be used directly but via (features.h)
43 
44    _FILE_OFFSET_BITS=N  Select default filesystem interface.
45    _LARGEFILE_SOURCE    Some more functions for correct standard I/O.
46    _LARGEFILE64_SOURCE  Additional functionality from LFS for large files.
47 
48    The last makes system calls like open64 visible directly, and so
49    should not be needed in R.
50 
51    This is commonly known as LFS; _but_ 'LFS Linux' is something else.
52    See http://en.wikipedia.org/wiki/Large_file_support and
53    http://www.suse.de/~aj/linux_lfs.html
54 
55    Solaris has a similar scheme: see 'man lf64', 'man lfcompile' and
56    'man lfcompile64'.
57 
58    On macOS, off_t is typedef-ed to __darwin_off_t, which is
59    __int64_t, so the issue never arises.  Similarly on FreeBSD.
60 
61    The situation with Windows is similar, but off64_t, fseeko64 etc
62    need to be selected explicitly (even on Win64).
63 
64    There are also issues with the glob(), readdir(), stat() system
65    calls: see platform.c and sysutils.c
66 
67    saveload.c uses f[gs]etpos: they have 64-bit versions on LFS Linux
68    and Solaris.  But this only used for pre-1.4.0 formats, and fpos_t
69    is 64-bit on Windows.
70 */
71 
72 #ifdef HAVE_CONFIG_H
73 # include <config.h>
74 #endif
75 
76 #define R_USE_SIGNALS 1
77 #include <Defn.h>
78 #include <Internal.h>
79 #include <Fileio.h>
80 #include <Rconnections.h>
81 #include <R_ext/Complex.h>
82 #include <R_ext/R-ftp-http.h>
83 #include <R_ext/RS.h>		/* R_chk_calloc and Free */
84 #include <R_ext/Riconv.h>
85 #include <R_ext/Print.h> // REprintf, REvprintf
86 #undef ERROR			/* for compilation on Windows */
87 
88 #ifdef Win32
89 #include <trioremap.h>
90 #endif
91 
92 int attribute_hidden R_OutputCon; /* used in printutils.c */
93 
94 static void con_destroy(int i);
95 
96 #include <errno.h>
97 
98 #ifdef HAVE_UNISTD_H
99 # include <unistd.h>
100 #endif
101 
102 #ifdef HAVE_FCNTL_H
103 # include <fcntl.h>
104 /* Solaris and AIX define open as open64 under some circumstances */
105 # undef open
106 /* AIX defines truncate as truncate64 under some circumstances */
107 # undef truncate
108 #endif
109 
110 #ifdef HAVE_SYS_STAT_H
111 # include <sys/stat.h>
112 #endif
113 
114 /* This works on Win64 where long is 4 bytes but long long is 8 bytes. */
115 #if defined __GNUC__ && __GNUC__ >= 2
116 __extension__ typedef long long int _lli_t;
117 #else
118 typedef long long int _lli_t;
119 #endif
120 
121 /* Win32 does have popen, but it does not work in GUI applications,
122    so test that later */
123 #ifdef Win32
124 # include <Startup.h>
125 #endif
126 
127 #define NCONNECTIONS 128 /* need one per cluster node */
128 #define NSINKS 21
129 
130 static Rconnection Connections[NCONNECTIONS];
131 static SEXP OutTextData;
132 
133 static int R_SinkNumber;
134 static int SinkCons[NSINKS], SinkConsClose[NSINKS], R_SinkSplit[NSINKS];
135 
136 /* We need a unique id for a connection to ensure that the finalizer
137    does not try to close it after it is already closed.  And that id
138    will be passed as a pointer, so it seemed easiest to use void *.
139 */
140 static void * current_id = NULL;
141 
142 /* ------------- admin functions (see also at end) ----------------- */
143 
NextConnection(void)144 static int NextConnection(void)
145 {
146     int i;
147     for(i = 3; i < NCONNECTIONS; i++)
148 	if(!Connections[i]) break;
149     if(i >= NCONNECTIONS) {
150 	R_gc(); /* Try to reclaim unused ones */
151 	for(i = 3; i < NCONNECTIONS; i++)
152 	    if(!Connections[i]) break;
153 	if(i >= NCONNECTIONS)
154 	    error(_("all connections are in use"));
155     }
156     return i;
157 }
158 
ConnIndex(Rconnection con)159 static int ConnIndex(Rconnection con)
160 {
161     int i;
162     for(i = 0; i < NCONNECTIONS; i++)
163 	if(Connections[i] == con) break;
164     if(i >= NCONNECTIONS)
165 	error(_("connection not found"));
166     return i;
167 }
168 
169 /* internal, not the same as R function getConnection */
getConnection(int n)170 Rconnection getConnection(int n)
171 {
172     Rconnection con = NULL;
173 
174     if(n < 0 || n >= NCONNECTIONS || n == NA_INTEGER ||
175        !(con = Connections[n]))
176 	error(_("invalid connection"));
177     return con;
178 
179 }
180 
181 attribute_hidden
getActiveSink(int n)182 int getActiveSink(int n)
183 {
184     if (n >= R_SinkNumber || n < 0)
185 	return 0;
186     if (R_SinkSplit[R_SinkNumber - n])
187 	return SinkCons[R_SinkNumber - n - 1];
188     else
189 	return 0;
190 }
191 
conFinalizer(SEXP ptr)192 static void conFinalizer(SEXP ptr)
193 {
194     int i, ncon;
195     void *cptr = R_ExternalPtrAddr(ptr);
196 
197     if(!cptr) return;
198 
199     for(i = 3; i < NCONNECTIONS; i++)
200 	if(Connections[i] && Connections[i]->id == cptr) {
201 	    ncon = i;
202 	    break;
203 	}
204     if(i >= NCONNECTIONS) return;
205     {
206 	Rconnection this = getConnection(ncon);
207 	if(strcmp(this->class, "textConnection"))
208 	    warning(_("closing unused connection %d (%s)\n"),
209 		    ncon, this->description);
210     }
211 
212     con_destroy(ncon);
213     R_ClearExternalPtr(ptr); /* not really needed */
214 }
215 
216 
217 /* for use in REvprintf */
218 attribute_hidden
getConnection_no_err(int n)219 Rconnection getConnection_no_err(int n)
220 {
221     Rconnection con = NULL;
222 
223     if(n < 0 || n >= NCONNECTIONS || n == NA_INTEGER ||
224        !(con = Connections[n]))
225 	return NULL;
226     return con;
227 
228 }
229 
set_iconv_error(Rconnection con,char * from,char * to)230 static void NORET set_iconv_error(Rconnection con, char* from, char* to)
231 {
232     char buf[100];
233     snprintf(buf, 100, _("unsupported conversion from '%s' to '%s'"), from, to);
234     con_destroy(ConnIndex(con));
235     error(buf);
236 }
237 
238 /* ------------------- buffering --------------------- */
239 
240 #define RBUFFCON_LEN_DEFAULT 4096
241 
242 # define MAX(a, b) ((a) > (b) ? (a) : (b))
243 # define MIN(a, b) ((a) > (b) ? (b) : (a))
244 
buff_set_len(Rconnection con,size_t len)245 static size_t buff_set_len(Rconnection con, size_t len) {
246     size_t unread_len = 0;
247     unsigned char *buff;
248 
249     if (con->buff_len == len)
250 	return len;
251 
252     if (con->buff) {
253 	unread_len = con->buff_stored_len - con->buff_pos;
254 	len = MAX(len, unread_len);
255     }
256 
257     buff = (unsigned char *)malloc(sizeof(unsigned char) * len);
258 
259     if (con->buff) {
260 	memcpy(buff, con->buff + con->buff_pos, unread_len);
261 	free(con->buff);
262     }
263 
264     con->buff = buff;
265     con->buff_len = len;
266     con->buff_pos = 0;
267     con->buff_stored_len = unread_len;
268 
269     return len;
270 }
271 
buff_init(Rconnection con)272 static void buff_init(Rconnection con)
273 {
274     con->buff_pos = con->buff_stored_len = 0;
275     buff_set_len(con, RBUFFCON_LEN_DEFAULT);
276 }
277 
buff_reset(Rconnection con)278 static void buff_reset(Rconnection con) {
279     size_t unread_len = con->buff_stored_len - con->buff_pos;
280 
281     if (unread_len > 0)
282 	memmove(con->buff, con->buff + con->buff_pos, unread_len);
283 
284     con->buff_pos = 0;
285     con->buff_stored_len = unread_len;
286 }
287 
buff_fill(Rconnection con)288 static size_t buff_fill(Rconnection con) {
289     size_t free_len, read_len;
290 
291     buff_reset(con);
292 
293     free_len = con->buff_len - con->buff_stored_len;
294     read_len = con->read(con->buff, sizeof(unsigned char), free_len, con);
295     if ((int)read_len < 0)
296 	error("error reading from the connection");
297     con->buff_stored_len += read_len;
298 
299     return read_len;
300 }
301 
buff_fgetc(Rconnection con)302 static int buff_fgetc(Rconnection con)
303 {
304     size_t unread_len;
305 
306     unread_len = con->buff_stored_len - con->buff_pos;
307     if (unread_len == 0) {
308 	size_t filled_len = buff_fill(con);
309 	if (filled_len == 0)
310 	    return R_EOF;
311     }
312 
313     return con->buff[con->buff_pos++];
314 }
315 
buff_seek(Rconnection con,double where,int origin,int rw)316 static double buff_seek(Rconnection con, double where, int origin, int rw)
317 {
318     size_t unread_len = con->buff_stored_len - con->buff_pos;
319 
320     if (rw == 2) /* write */
321 	return con->seek(con, where, origin, rw);
322 
323     if (ISNA(where)) /* tell */
324 	return con->seek(con, where, origin, rw) - unread_len;
325 
326     if (origin == 2) { /* current */
327 	if (where < unread_len) {
328 	    con->buff_pos += (size_t) where;
329 	    return con->seek(con, NA_REAL, origin, rw);
330 	} else {
331 	    where -= unread_len;
332 	}
333     }
334     con->buff_pos = con->buff_stored_len = 0;
335 
336     return con->seek(con, where, origin, rw);
337 }
338 
set_buffer(Rconnection con)339 void set_buffer(Rconnection con) {
340     if (con->canread && con->text) {
341 	buff_init(con);
342     }
343 }
344 
set_iconv(Rconnection con)345 void set_iconv(Rconnection con)
346 {
347     void *tmp;
348 
349     /* need to test if this is text, open for reading to writing or both,
350        and set inconv and/or outconv */
351     if(!con->text || !strlen(con->encname) ||
352        strcmp(con->encname, "native.enc") == 0) {
353 	con->UTF8out = FALSE;
354 	return;
355     }
356     if(con->canread) {
357 	size_t onb = 50;
358 	char *ob = con->oconvbuff;
359 	/* UTF8out is set in readLines() and scan()
360 	   Was Windows-only until 2.12.0, but we now require iconv.
361 	 */
362 	Rboolean useUTF8 = !utf8locale && con->UTF8out;
363 	const char *enc =
364 	    streql(con->encname, "UTF-8-BOM") ? "UTF-8" : con->encname;
365 	tmp = Riconv_open(useUTF8 ? "UTF-8" : "", enc);
366 	if(tmp != (void *)-1) con->inconv = tmp;
367 	else set_iconv_error(con, con->encname, useUTF8 ? "UTF-8" : "");
368 	con->EOF_signalled = FALSE;
369 	/* initialize state, and prepare any initial bytes */
370 	Riconv(tmp, NULL, NULL, &ob, &onb);
371 	con->navail = (short)(50-onb); con->inavail = 0;
372 	/* libiconv can handle BOM marks on Windows Unicode files, but
373 	   glibc's iconv cannot. Aargh ... */
374 	if(streql(con->encname, "UCS-2LE") ||
375 	   streql(con->encname, "UTF-16LE")) con->inavail = -2;
376 	/* Discaard BOM */
377 	if(streql(con->encname, "UTF-8-BOM")) con->inavail = -3;
378     }
379     if(con->canwrite) {
380 	size_t onb = 25;
381 	char *ob = con->init_out;
382 	tmp = Riconv_open(con->encname, "");
383 	if(tmp != (void *)-1) con->outconv = tmp;
384 	else set_iconv_error(con, con->encname, "");
385 	/* initialize state, and prepare any initial bytes */
386 	Riconv(tmp, NULL, NULL, &ob, &onb);
387 	ob[25-onb] = '\0';
388     }
389 }
390 
391 
392 /* ------------------- null connection functions --------------------- */
393 
null_open(Rconnection con)394 static Rboolean NORET null_open(Rconnection con)
395 {
396     error(_("%s not enabled for this connection"), "open");
397 }
398 
null_close(Rconnection con)399 static void null_close(Rconnection con)
400 {
401     con->isopen = FALSE;
402 }
403 
null_destroy(Rconnection con)404 static void null_destroy(Rconnection con)
405 {
406     if(con->private) free(con->private);
407 }
408 
null_vfprintf(Rconnection con,const char * format,va_list ap)409 static int NORET null_vfprintf(Rconnection con, const char *format, va_list ap)
410 {
411     error(_("%s not enabled for this connection"), "printing");
412 }
413 
414 /* va_copy is C99, but a draft standard had __va_copy.  Glibc has
415    __va_copy declared unconditionally */
416 
417 
418 #if defined(HAVE_VASPRINTF) && !HAVE_DECL_VASPRINTF
419 int vasprintf(char **strp, const char *fmt, va_list ap);
420 #endif
421 
422 # define BUFSIZE 10000
423 // similar to Rcons_vprintf in printutils.c
dummy_vfprintf(Rconnection con,const char * format,va_list ap)424 int dummy_vfprintf(Rconnection con, const char *format, va_list ap)
425 {
426     R_CheckStack2(BUFSIZE); // prudence
427     char buf[BUFSIZE], *b = buf;
428     int res;
429     const void *vmax = NULL; /* -Wall*/
430     int usedVasprintf = FALSE;
431     va_list aq;
432 
433     va_copy(aq, ap);
434     res = Rvsnprintf_mbcs(buf, BUFSIZE, format, aq);
435     va_end(aq);
436 #ifdef HAVE_VASPRINTF
437     if(res >= BUFSIZE || res < 0) {
438 	res = vasprintf(&b, format, ap);
439 	if (res < 0) {
440 	    b = buf;
441 	    warning(_("printing of extremely long output is truncated"));
442 	    res = (int)strlen(buf);
443 	} else usedVasprintf = TRUE;
444     }
445 #else
446     if(res >= BUFSIZE) { /* res is the desired output length */
447 	vmax = vmaxget();
448 	/* apparently some implementations count short,
449 	   <http://unixpapa.com/incnote/stdio.html>
450 	   so add some margin here */
451 	b = R_alloc(res + 101, sizeof(char));
452 	vsnprintf(b, res + 100, format, ap);
453     } else if(res < 0) {
454 	/* Some non-C99 conforming vsnprintf implementations return -1 on
455 	   truncation instead of only on error. */
456 	vmax = vmaxget();
457 	b = R_alloc(10*BUFSIZE, sizeof(char));
458 	res = Rvsnprintf_mbcs(b, 10*BUFSIZE, format, ap);
459 	if (res < 0 || res >= 10*BUFSIZE) {
460 	    warning(_("printing of extremely long output is truncated"));
461 	    res = (int)strlen(b);
462 	}
463     }
464 #endif /* HAVE_VASPRINTF */
465     if(con->outconv) { /* translate the buffer */
466 	char outbuf[BUFSIZE+1], *ob;
467 	const char *ib = b;
468 	size_t inb = res, onb, ires;
469 	Rboolean again = FALSE;
470 	size_t ninit = strlen(con->init_out);
471 	do {
472 	    onb = BUFSIZE; /* leave space for nul */
473 	    ob = outbuf;
474 	    if(ninit) {
475 		strcpy(ob, con->init_out);
476 		ob += ninit; onb -= ninit; ninit = 0;
477 	    }
478 	    errno = 0;
479 	    ires = Riconv(con->outconv, &ib, &inb, &ob, &onb);
480 	    again = (ires == (size_t)(-1) && errno == E2BIG);
481 	    if(ires == (size_t)(-1) && errno != E2BIG)
482 		/* is this safe? */
483 		warning(_("invalid char string in output conversion"));
484 	    *ob = '\0';
485 	    con->write(outbuf, 1, ob - outbuf, con);
486 	} while(again && inb > 0);  /* it seems some iconv signal -1 on
487 				       zero-length input */
488     } else
489 	con->write(b, 1, res, con);
490     if(vmax) vmaxset(vmax);
491     if(usedVasprintf) free(b);
492     return res;
493 }
494 
dummy_fgetc(Rconnection con)495 int dummy_fgetc(Rconnection con)
496 {
497     int c;
498     Rboolean checkBOM = FALSE, checkBOM8 = FALSE;
499 
500     if(con->inconv) {
501 	while(con->navail <= 0) {
502 	    /* Probably in all cases there will be at most one iteration
503 	       of the loop. It could iterate multiple times only if the input
504 	       encoding could have \r or \n as a part of a multi-byte coded
505 	       character.
506 	    */
507 	    unsigned int i, inew = 0;
508 	    char *p, *ob;
509 	    const char *ib;
510 	    size_t inb, onb, res;
511 
512 	    if(con->EOF_signalled) return R_EOF;
513 	    if(con->inavail == -2) {
514 		con->inavail = 0;
515 		checkBOM = TRUE;
516 	    }
517 	    if(con->inavail == -3) {
518 		con->inavail = 0;
519 		checkBOM8 = TRUE;
520 	    }
521 	    p = con->iconvbuff + con->inavail;
522 	    for(i = con->inavail; i < 25; i++) {
523 		if (con->buff)
524 		    c = buff_fgetc(con);
525 		else
526 		    c = con->fgetc_internal(con);
527 		if(c == R_EOF){ con->EOF_signalled = TRUE; break; }
528 		*p++ = (char) c;
529 		con->inavail++;
530 		inew++;
531 		if(!con->buff && (c == '\n' || c == '\r'))
532 		    /* Possibly a line separator: better stop filling in the
533 		       encoding conversion buffer if not buffering the input
534 		       anyway, as not to confuse interactive applications
535 		       (PR17634).
536 		    */
537 		    break;
538 	    }
539 	    if(inew == 0) return R_EOF;
540 	    if(checkBOM && con->inavail >= 2 &&
541 	       ((int)con->iconvbuff[0] & 0xff) == 255 &&
542 	       ((int)con->iconvbuff[1] & 0xff) == 254) {
543 		con->inavail -= (short) 2;
544 		memmove(con->iconvbuff, con->iconvbuff+2, con->inavail);
545 	    }
546 	    if(inew == 0) return R_EOF;
547 	    if(checkBOM8 && con->inavail >= 3 &&
548 	       !memcmp(con->iconvbuff, "\xef\xbb\xbf", 3)) {
549 		con->inavail -= (short) 3;
550 		memmove(con->iconvbuff, con->iconvbuff+3, con->inavail);
551 	    }
552 	    ib = con->iconvbuff; inb = con->inavail;
553 	    ob = con->oconvbuff; onb = 50;
554 	    errno = 0;
555 	    res = Riconv(con->inconv, &ib, &inb, &ob, &onb);
556 	    con->inavail = (short) inb;
557 	    con->next = con->oconvbuff;
558 	    con->navail = (short)(50 - onb);
559 	    if(res == (size_t)-1) { /* an error condition */
560 		if(errno == EINVAL || errno == E2BIG) {
561 		    /* incomplete input char or no space in output buffer */
562 		    memmove(con->iconvbuff, ib, inb);
563 		} else {/*  EILSEQ invalid input */
564 		    warning(_("invalid input found on input connection '%s'"),
565 			    con->description);
566 		    con->inavail = 0;
567 		    if (con->navail == 0) return R_EOF;
568 		    con->EOF_signalled = TRUE;
569 		}
570 	    }
571 	}
572 	con->navail--;
573 	/* the cast prevents sign extension of 0xFF to -1 (R_EOF) */
574 	return (unsigned char)*con->next++;
575     } else if (con->buff)
576 	return buff_fgetc(con);
577     else
578 	return con->fgetc_internal(con);
579 }
580 
null_fgetc(Rconnection con)581 static int NORET null_fgetc(Rconnection con)
582 {
583     error(_("%s not enabled for this connection"), "'getc'");
584 }
585 
null_seek(Rconnection con,double where,int origin,int rw)586 static double NORET null_seek(Rconnection con, double where, int origin, int rw)
587 {
588     error(_("%s not enabled for this connection"), "'seek'");
589 }
590 
null_truncate(Rconnection con)591 static void NORET null_truncate(Rconnection con)
592 {
593     error(_("%s not enabled for this connection"), "truncation");
594 }
595 
null_fflush(Rconnection con)596 static int null_fflush(Rconnection con)
597 {
598     return 0;
599 }
600 
null_read(void * ptr,size_t size,size_t nitems,Rconnection con)601 static size_t NORET null_read(void *ptr, size_t size, size_t nitems,
602 			Rconnection con)
603 {
604     error(_("%s not enabled for this connection"), "'read'");
605 }
606 
null_write(const void * ptr,size_t size,size_t nitems,Rconnection con)607 static size_t NORET null_write(const void *ptr, size_t size, size_t nitems,
608 			 Rconnection con)
609 {
610     error(_("%s not enabled for this connection"), "'write'");
611 }
612 
init_con(Rconnection new,const char * description,int enc,const char * const mode)613 void init_con(Rconnection new, const char *description, int enc,
614 	      const char * const mode)
615 {
616     strcpy(new->description, description);
617     new->enc = enc;
618     strncpy(new->mode, mode, 4); new->mode[4] = '\0';
619     new->isopen = new->incomplete = new->blocking = new->isGzcon = FALSE;
620     new->canread = new->canwrite = TRUE; /* in principle */
621     new->canseek = FALSE;
622     new->text = TRUE;
623     new->open = &null_open;
624     new->close = &null_close;
625     new->destroy = &null_destroy;
626     new->vfprintf = &null_vfprintf;
627     new->fgetc = new->fgetc_internal = &null_fgetc;
628     new->seek = &null_seek;
629     new->truncate = &null_truncate;
630     new->fflush = &null_fflush;
631     new->read = &null_read;
632     new->write = &null_write;
633     new->nPushBack = 0;
634     new->save = new->save2 = -1000;
635     new->private = NULL;
636     new->inconv = new->outconv = NULL;
637     new->UTF8out = FALSE;
638     new->buff = NULL;
639     new->buff_pos = new->buff_stored_len = new->buff_len = 0;
640     /* increment id, avoid NULL */
641     current_id = (void *)((size_t) current_id+1);
642     if(!current_id) current_id = (void *) 1;
643     new->id = current_id;
644     new->ex_ptr = NULL;
645     new->status = NA_INTEGER;
646 }
647 
648 /* ------------------- file connections --------------------- */
649 
650 #ifdef Win32
651 # define f_seek fseeko64
652 # define f_tell ftello64
653 # define OFF_T off64_t
654 #elif defined(HAVE_OFF_T) && defined(HAVE_FSEEKO)
655 # define f_seek fseeko
656 # define f_tell ftello
657 # define OFF_T off_t
658 #else
659 # define f_seek fseek
660 # define f_tell ftell
661 # define OFF_T long
662 #endif
663 
664 #ifdef Win32
665 size_t Rf_utf8towcs(wchar_t *wc, const char *s, size_t n);
666 #endif
667 
668 typedef struct fileconn {
669     FILE *fp;
670     OFF_T rpos, wpos;
671     Rboolean last_was_write;
672     Rboolean raw;
673 #ifdef Win32
674     Rboolean anon_file;
675     Rboolean use_fgetwc;
676     Rboolean have_wcbuffered;
677     char wcbuf;
678     char name[PATH_MAX+1];
679 #endif
680 } *Rfileconn;
681 
shouldBuffer(int fd)682 static Rboolean shouldBuffer(int fd) {
683 #ifdef HAVE_SYS_STAT_H
684     struct stat sb;
685     int err = fstat(fd, &sb);
686     return err ? FALSE : S_ISREG(sb.st_mode);
687 #else
688     return FALSE;
689 #endif
690 }
691 
692 /* returns FALSE on error */
isDir(FILE * fd)693 static Rboolean isDir(FILE *fd)
694 {
695 #ifdef HAVE_SYS_STAT_H
696     struct stat sb;
697     int err = fstat(fileno(fd), &sb);
698     return err ? FALSE : S_ISDIR(sb.st_mode);
699 #else
700     return FALSE;
701 #endif
702 }
703 
704 /* returns FALSE on error */
isDirPath(const char * path)705 static Rboolean isDirPath(const char *path)
706 {
707 #ifdef HAVE_SYS_STAT_H
708 # ifdef Win32
709     struct _stati64 sb;
710     if (!_stati64(path, &sb) && (sb.st_mode & S_IFDIR))
711 	return TRUE;
712 # else
713     struct stat sb;
714     if (!stat(path, &sb) && S_ISDIR(sb.st_mode))
715 	return TRUE;
716 # endif
717 #endif
718     return FALSE;
719 }
720 
file_open(Rconnection con)721 static Rboolean file_open(Rconnection con)
722 {
723     const char *name;
724     FILE *fp = NULL;
725     Rfileconn this = con->private;
726     Rboolean temp = FALSE;
727 #ifdef HAVE_FCNTL
728     int fd, flags;
729 #endif
730     int mlen = (int) strlen(con->mode); // short
731 
732     if(strlen(con->description) == 0) {
733 	temp = TRUE;
734 	name = R_tmpnam("Rf", R_TempDir);
735     } else name = R_ExpandFileName(con->description);
736     errno = 0; /* some systems require this */
737     if(strcmp(name, "stdin")) {
738 #ifdef Win32
739 	char mode[20]; /* 4 byte mode plus "t,ccs=UTF-16LE" plus one for luck. */
740 	strncpy(mode, con->mode, 4);
741 	mode[4] = '\0';
742 	if (!strpbrk(mode, "bt"))
743 	    strcat(mode, "t");
744 	// See PR#16737, https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/fopen-wfopen?view=vs-2019
745 	// ccs= is also supported by glibc but not macOS
746 	if (strchr(mode, 't')
747 	    && (!strcmp(con->encname, "UTF-16LE") || !strcmp(con->encname, "UCS-2LE"))) {
748 	    strcat(mode, ",ccs=UTF-16LE");
749 	    if (con->canread) {
750 	    	this->use_fgetwc = TRUE;
751 	    	this->have_wcbuffered = FALSE;
752 	    }
753 	}
754 	if(con->enc == CE_UTF8) {
755 	    int n = strlen(name);
756 	    wchar_t wname[2 * (n+1)], wmode[20];
757 	    mbstowcs(wmode, mode, 19);
758 	    R_CheckStack();
759 	    Rf_utf8towcs(wname, name, n+1);
760 	    fp = _wfopen(wname, wmode);
761 	    if(!fp) {
762 		warning(_("cannot open file '%ls': %s"), wname, strerror(errno));
763 		return FALSE;
764 	    }
765 	    if (isDir(fp)) {
766 		warning(_("cannot open file '%ls': it is a directory"), wname);
767 		fclose(fp);
768 		return FALSE;
769 	    }
770 	} else {
771 	    fp = R_fopen(name, mode);
772 	}
773 #else
774 	fp = R_fopen(name, con->mode);
775 #endif
776     } else {  /* use file("stdin") to refer to the file and not the console */
777 #ifdef HAVE_FDOPEN
778 	int dstdin = dup(0);
779 # ifdef Win32
780 	if (strchr(con->mode, 'b'))
781 	    /* fdopen won't set dstdin to binary mode */
782 	    setmode(dstdin, _O_BINARY);
783 # endif
784         fp = fdopen(dstdin, con->mode);
785 	con->canseek = FALSE;
786 #else
787 	warning(_("cannot open file '%s': %s"), name,
788 		"fdopen is not supported on this platform");
789 #endif
790     }
791     if(!fp) {
792 	warning(_("cannot open file '%s': %s"), name, strerror(errno));
793 	return FALSE;
794     }
795     if (isDir(fp)) {
796 	warning(_("cannot open file '%s': it is a directory"), name);
797 	fclose(fp);
798 	return FALSE;
799     }
800     if(temp) {
801 	/* This will fail on Windows, so arrange to remove in
802 	 * file_close.  An alternative strategy would be to manipulate
803 	 * the underlying file handle to add FILE_SHARE_DELETE (so the
804 	 * unlink is valid) or FILE_FLAG_DELETE_ON_CLOSE.  E.g. create
805 	 * via CreateFile, get an fd by _open_osfhandle and a file
806 	 * stream by fdopen.  See
807 	 * e.g. http://www.codeproject.com/KB/files/handles.aspx
808 	 */
809 	unlink(name);
810 #ifdef Win32
811 	strncpy(this->name, name, PATH_MAX);
812 	this->name[PATH_MAX - 1] = '\0';
813 #endif
814 	free((char *) name); /* only free if allocated by R_tmpnam */
815     }
816 #ifdef Win32
817     this->anon_file = temp;
818 #endif
819     this->fp = fp;
820     con->isopen = TRUE;
821     con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
822     con->canread = !con->canwrite;
823     if(mlen >= 2 && con->mode[1] == '+')
824 	con->canread = con->canwrite = TRUE;
825     this->last_was_write = !con->canread;
826     this->rpos = 0;
827     if(con->canwrite) this->wpos = f_tell(fp);
828     if(mlen >= 2 && con->mode[mlen-1] == 'b') con->text = FALSE;
829     else con->text = TRUE;
830     con->save = -1000;
831     if (shouldBuffer(fileno(fp)))
832 	set_buffer(con);
833     set_iconv(con);
834 
835 #ifdef HAVE_FCNTL
836     if(!con->blocking) {
837 	fd = fileno(fp);
838 	flags = fcntl(fd, F_GETFL);
839 	flags |= O_NONBLOCK;
840 	fcntl(fd, F_SETFL, flags);
841     }
842 #endif
843     return TRUE;
844 }
845 
file_close(Rconnection con)846 static void file_close(Rconnection con)
847 {
848     Rfileconn this = con->private;
849     if(con->isopen) // && strcmp(con->description, "stdin"))
850 	con->status = fclose(this->fp);
851     con->isopen = FALSE;
852 #ifdef Win32
853     if(this->anon_file) unlink(this->name);
854 #endif
855 }
856 
file_vfprintf(Rconnection con,const char * format,va_list ap)857 static int file_vfprintf(Rconnection con, const char *format, va_list ap)
858 {
859     Rfileconn this = con->private;
860 
861     if(!this->last_was_write) {
862 	this->rpos = f_tell(this->fp);
863 	this->last_was_write = TRUE;
864 	f_seek(this->fp, this->wpos, SEEK_SET);
865     }
866     if(con->outconv) return dummy_vfprintf(con, format, ap);
867     else return vfprintf(this->fp, format, ap);
868 }
869 
file_fgetc_internal(Rconnection con)870 static int file_fgetc_internal(Rconnection con)
871 {
872     Rfileconn this = con->private;
873     FILE *fp = this->fp;
874     int c;
875 
876     if(this->last_was_write) {
877 	this->wpos = f_tell(this->fp);
878 	this->last_was_write = FALSE;
879 	f_seek(this->fp, this->rpos, SEEK_SET);
880     }
881 #ifdef Win32
882     if (this->use_fgetwc) {
883     	if (this->have_wcbuffered) {
884     	    c = this->wcbuf;
885     	    this->have_wcbuffered = FALSE;
886     	} else {
887     	    wint_t wc = fgetwc(fp);
888     	    c = (char) wc & 0xFF;
889     	    this->wcbuf = (char) wc >> 8;
890     	    this->have_wcbuffered = TRUE;
891     	}
892     } else
893 #endif
894     c =fgetc(fp);
895     return feof(fp) ? R_EOF : c;
896 }
897 
file_seek(Rconnection con,double where,int origin,int rw)898 static double file_seek(Rconnection con, double where, int origin, int rw)
899 {
900     Rfileconn this = con->private;
901     FILE *fp = this->fp;
902     OFF_T pos;
903     int whence = SEEK_SET;
904 
905     /* make sure both positions are set */
906     pos = f_tell(fp);
907     if(this->last_was_write) this->wpos = pos; else this->rpos = pos;
908     if(rw == 1) {
909 	if(!con->canread) error(_("connection is not open for reading"));
910 	pos = this->rpos;
911 	this->last_was_write = FALSE;
912     }
913     if(rw == 2) {
914 	if(!con->canwrite) error(_("connection is not open for writing"));
915 	pos = this->wpos;
916 	this->last_was_write = TRUE;
917     }
918     if(ISNA(where)) return (double) pos;
919 
920     switch(origin) {
921     case 2: whence = SEEK_CUR; break;
922     case 3: whence = SEEK_END;
923 //#ifdef Win32
924 	    /* work around a bug in MinGW runtime 3.8 fseeko64, PR#7896
925 	       seems no longer to be needed */
926 //	    if(con->canwrite) fflush(fp);
927 //#endif
928 	    break;
929     default: whence = SEEK_SET;
930     }
931     f_seek(fp, (OFF_T) where, whence);
932     if(this->last_was_write) this->wpos = f_tell(this->fp);
933     else this->rpos = f_tell(this->fp);
934     return (double) pos;
935 }
936 
file_truncate(Rconnection con)937 static void file_truncate(Rconnection con)
938 {
939     Rfileconn this = con->private;
940 #ifdef HAVE_FTRUNCATE
941     FILE *fp = this->fp;
942     int fd = fileno(fp);
943 /* ftruncate64 is in Mingw-64 trunk, but not in current toolkit */
944 # ifdef W64_to_come
945     off64_t size = lseek64(fd, 0, SEEK_CUR);
946 # else
947     OFF_T size = lseek(fd, 0, SEEK_CUR);
948 # endif
949 #endif
950 
951     if(!con->isopen || !con->canwrite)
952 	error(_("can only truncate connections open for writing"));
953 
954     if(!this->last_was_write) this->rpos = f_tell(this->fp);
955 #ifdef W64_to_come
956     if(ftruncate64(fd, size)) error(_("file truncation failed"));
957 #elif defined(HAVE_FTRUNCATE)
958     if(ftruncate(fd, size)) error(_("file truncation failed"));
959 #else
960     error(_("file truncation unavailable on this platform"));
961 #endif
962     this->last_was_write = TRUE;
963     this->wpos = f_tell(this->fp);
964 }
965 
file_fflush(Rconnection con)966 static int file_fflush(Rconnection con)
967 {
968     FILE *fp = ((Rfileconn)(con->private))->fp;
969 
970     return fflush(fp);
971 }
972 
file_read(void * ptr,size_t size,size_t nitems,Rconnection con)973 static size_t file_read(void *ptr, size_t size, size_t nitems,
974 			Rconnection con)
975 {
976     Rfileconn this = con->private;
977     FILE *fp = this->fp;
978 
979     if(this->last_was_write) {
980 	this->wpos = f_tell(this->fp);
981 	this->last_was_write = FALSE;
982 	f_seek(this->fp, this->rpos, SEEK_SET);
983     }
984     return fread(ptr, size, nitems, fp);
985 }
986 
file_write(const void * ptr,size_t size,size_t nitems,Rconnection con)987 static size_t file_write(const void *ptr, size_t size, size_t nitems,
988 			 Rconnection con)
989 {
990     Rfileconn this = con->private;
991     FILE *fp = this->fp;
992 
993     if(!this->last_was_write) {
994 	this->rpos = f_tell(this->fp);
995 	this->last_was_write = TRUE;
996 	f_seek(this->fp, this->wpos, SEEK_SET);
997     }
998     return fwrite(ptr, size, nitems, fp);
999 }
1000 
newfile(const char * description,int enc,const char * mode,int raw)1001 static Rconnection newfile(const char *description, int enc, const char *mode,
1002 			   int raw)
1003 {
1004     Rconnection new;
1005     new = (Rconnection) malloc(sizeof(struct Rconn));
1006     if(!new) error(_("allocation of file connection failed"));
1007     new->class = (char *) malloc(strlen("file") + 1);
1008     if(!new->class) {
1009 	free(new);
1010 	error(_("allocation of file connection failed"));
1011 	/* for Solaris 12.5 */ new = NULL;
1012     }
1013     strcpy(new->class, "file");
1014     new->description = (char *) malloc(strlen(description) + 1);
1015     if(!new->description) {
1016 	free(new->class); free(new);
1017 	error(_("allocation of file connection failed"));
1018 	/* for Solaris 12.5 */ new = NULL;
1019     }
1020     init_con(new, description, enc, mode);
1021     new->open = &file_open;
1022     new->close = &file_close;
1023     new->vfprintf = &file_vfprintf;
1024     new->fgetc_internal = &file_fgetc_internal;
1025     new->fgetc = &dummy_fgetc;
1026     new->seek = &file_seek;
1027     new->truncate = &file_truncate;
1028     new->fflush = &file_fflush;
1029     new->read = &file_read;
1030     new->write = &file_write;
1031     new->canseek = (raw == 0);
1032     new->private = (void *) malloc(sizeof(struct fileconn));
1033     if(!new->private) {
1034 	free(new->description); free(new->class); free(new);
1035 	error(_("allocation of file connection failed"));
1036 	/* for Solaris 12.5 */ new = NULL;
1037     }
1038     ((Rfileconn)(new->private))->raw = raw;
1039 #ifdef Win32
1040     ((Rfileconn)(new->private))->use_fgetwc = FALSE;
1041 #endif
1042     return new;
1043 }
1044 
1045 /* file() is now implemented as an op of do_url */
1046 
1047 
1048 /* ------------------- fifo connections --------------------- */
1049 
1050 #if defined(HAVE_MKFIFO) && defined(HAVE_FCNTL_H)
1051 
1052 #ifdef HAVE_SYS_TYPES_H
1053 # include <sys/types.h>
1054 #endif
1055 
1056 # include <errno.h>
1057 
1058 typedef struct fifoconn {
1059     int fd;
1060 } *Rfifoconn;
1061 
1062 
fifo_open(Rconnection con)1063 static Rboolean fifo_open(Rconnection con)
1064 {
1065     const char *name;
1066     Rfifoconn this = con->private;
1067     int fd, flags, res;
1068     int mlen = (int) strlen(con->mode); // short
1069     struct stat sb;
1070     Rboolean temp = FALSE;
1071 
1072     if(strlen(con->description) == 0) {
1073 	temp = TRUE;
1074 	name = R_tmpnam("Rf", R_TempDir);
1075     } else name = R_ExpandFileName(con->description);
1076     con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
1077     con->canread = !con->canwrite;
1078     if(mlen >= 2 && con->mode[1] == '+') con->canread = TRUE;
1079 
1080     /* if we are to write, create the fifo if needed */
1081     if(con->canwrite) {
1082 	res = stat(name, &sb);
1083 	if(res) { /* error, does not exist? */
1084 	    errno = 0;
1085 	    res = mkfifo(name, 00644);
1086 	    if(res) {
1087 		warning(_("cannot create fifo '%s', reason '%s'"), name,
1088 			strerror(errno));
1089 	    }
1090 	    if(res) return FALSE;
1091 	} else {
1092 	    if(!(sb.st_mode & S_IFIFO)) {
1093 		warning(_("'%s' exists but is not a fifo"), name);
1094 		return FALSE;
1095 	    }
1096 	}
1097     }
1098 
1099     if(con->canread && con->canwrite) flags = O_RDWR;
1100     else if(con->canread) flags = O_RDONLY;
1101     else flags = O_WRONLY;
1102     if(!con->blocking) flags |= O_NONBLOCK;
1103     if(con->mode[0] == 'a') flags |= O_APPEND;
1104     errno = 0; /* precaution */
1105     fd = open(name, flags);
1106     if(fd < 0) {
1107 	if(errno == ENXIO) warning(_("fifo '%s' is not ready"), name);
1108 	else warning(_("cannot open fifo '%s'"), name);
1109 	return FALSE;
1110     }
1111     if(temp) {
1112 	unlink(name);
1113 	free((char *) name); /* allocated by R_tmpnam */
1114     }
1115 
1116     this->fd = fd;
1117     con->isopen = TRUE;
1118 
1119     if(mlen >= 2 && con->mode[mlen-1] == 'b') con->text = FALSE;
1120     else con->text = TRUE;
1121     set_iconv(con);
1122     con->save = -1000;
1123     return TRUE;
1124 }
1125 
fifo_close(Rconnection con)1126 static void fifo_close(Rconnection con)
1127 {
1128     con->status = close(((Rfifoconn)(con->private))->fd);
1129     con->isopen = FALSE;
1130 }
1131 
fifo_fgetc_internal(Rconnection con)1132 static int fifo_fgetc_internal(Rconnection con)
1133 {
1134     Rfifoconn this = con->private;
1135     unsigned char c;
1136     ssize_t n;
1137 
1138     n = read(this->fd, (char *)&c, 1);
1139     return (n == 1) ? c : R_EOF;
1140 }
1141 
fifo_read(void * ptr,size_t size,size_t nitems,Rconnection con)1142 static size_t fifo_read(void *ptr, size_t size, size_t nitems,
1143 			Rconnection con)
1144 {
1145     Rfifoconn this = con->private;
1146 
1147     /* uses 'size_t' for len */
1148     if ((double) size * (double) nitems > (double) SSIZE_MAX)
1149 	error(_("too large a block specified"));
1150     return read(this->fd, ptr, size * nitems)/size;
1151 }
1152 
fifo_write(const void * ptr,size_t size,size_t nitems,Rconnection con)1153 static size_t fifo_write(const void *ptr, size_t size, size_t nitems,
1154 			 Rconnection con)
1155 {
1156     Rfifoconn this = con->private;
1157 
1158     /* uses 'size_t' for len */
1159     if ((double) size * (double) nitems > (double) SSIZE_MAX)
1160 	error(_("too large a block specified"));
1161     return write(this->fd, ptr, size * nitems)/size;
1162 }
1163 
1164 #elif defined(Win32)  // ----- Windows part ------
1165 
1166 // PR#15600, based on https://github.com/0xbaadf00d/r-project_win_fifo
1167 # define WIN32_LEAN_AND_MEAN 1
1168 #include <windows.h>
1169 #include <wchar.h>
1170 
1171 /* Microsoft addition, not supported in Win XP
1172 errno_t strcat_s(char *strDestination, size_t numberOfElements,
1173 		 const char *strSource);
1174 */
1175 
1176 typedef struct fifoconn
1177 {
1178     HANDLE hdl_namedpipe;
1179     LPOVERLAPPED overlapped_write;
1180 } *Rfifoconn;
1181 
win_getlasterror_str(void)1182 static char* win_getlasterror_str(void)
1183 {
1184     LPVOID lpv_tempmsg = NULL;
1185     unsigned int err_msg_len;
1186     char *err_msg = NULL;
1187 
1188     err_msg_len =
1189 	FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
1190 		      FORMAT_MESSAGE_FROM_SYSTEM |
1191 		      FORMAT_MESSAGE_IGNORE_INSERTS, NULL, GetLastError(),
1192 		      MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
1193 		      (LPTSTR)&lpv_tempmsg, 0, NULL);
1194     err_msg = (char*) malloc(err_msg_len);
1195     if (!err_msg) return NULL;
1196     ZeroMemory(err_msg, err_msg_len);
1197     strncpy(err_msg, (LPTSTR)lpv_tempmsg, err_msg_len - sizeof(wchar_t));
1198     LocalFree(lpv_tempmsg);
1199     return err_msg;
1200 }
1201 
fifo_open(Rconnection con)1202 static Rboolean	fifo_open(Rconnection con)
1203 {
1204     Rfifoconn this = con->private;
1205     unsigned int uin_mode_len = strlen(con->mode);
1206     char *hch_pipename = NULL;
1207     Rboolean boo_retvalue = TRUE;
1208     const char *pipe_prefix = "\\\\.\\pipe\\";
1209 
1210     /* Prepare FIFO filename */
1211     if (strlen(con->description) == 0)
1212 	hch_pipename = R_tmpnam("fifo", pipe_prefix); /* malloc */
1213     else {
1214 	const char* hch_tempname = R_ExpandFileName(con->description);
1215 	size_t len = strlen(hch_tempname);
1216 	Rboolean add_prefix = FALSE;
1217 	if (strncmp(pipe_prefix, con->description, strlen(pipe_prefix)) != 0) {
1218 	    len += strlen(pipe_prefix);
1219 	    add_prefix = TRUE;
1220 	}
1221 	hch_pipename = (char*) malloc(len+1);
1222 	if (!hch_pipename)
1223 	    error(_("allocation of fifo name failed"));
1224 	if (add_prefix) {
1225 	    strcpy(hch_pipename, pipe_prefix);
1226 	    strcat(hch_pipename, hch_tempname);
1227 	} else
1228 	    strcpy(hch_pipename, hch_tempname);
1229     }
1230 
1231     /* Prepare FIFO open mode */
1232     con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
1233     con->canread = !con->canwrite;
1234     if (uin_mode_len >= 2 && con->mode[1] == '+') con->canread = TRUE;
1235 
1236     /*
1237     ** FIFO using Windows API -> CreateNamedPipe() OR CreateFile()
1238     ** http://msdn.microsoft.com/en-us/library/windows/desktop/aa363858(v=vs.85).aspx
1239     ** http://msdn.microsoft.com/en-us/library/windows/desktop/aa365150(v=vs.85).aspx
1240     */
1241     this->hdl_namedpipe = NULL;
1242     this->overlapped_write = CreateEventA(NULL, TRUE, TRUE, NULL);
1243     if (con->canwrite) {
1244 	SECURITY_ATTRIBUTES win_namedpipe_secattr = {0};
1245 	win_namedpipe_secattr.nLength = sizeof(SECURITY_ATTRIBUTES);
1246 	win_namedpipe_secattr.lpSecurityDescriptor = NULL;
1247 	win_namedpipe_secattr.bInheritHandle = FALSE;
1248 
1249 	this->hdl_namedpipe =
1250 	    CreateNamedPipeA(hch_pipename,
1251 			     (con->canread ? PIPE_ACCESS_DUPLEX :
1252 			      PIPE_ACCESS_OUTBOUND) | FILE_FLAG_OVERLAPPED,
1253 			     PIPE_TYPE_BYTE, PIPE_UNLIMITED_INSTANCES , 0, 0,
1254 			     FILE_FLAG_NO_BUFFERING, &win_namedpipe_secattr);
1255 	if (this->hdl_namedpipe == INVALID_HANDLE_VALUE) {
1256 	    /*
1257 	    ** If GetLastError() return 231 (All pipe instances are busy) == File
1258 	    ** already exist on Unix/Linux...
1259 	    */
1260 	    if (GetLastError() != 231) {
1261 		char *hch_err_msg = win_getlasterror_str();
1262 		warning(_("cannot create fifo '%s', reason '%s'"),
1263 			hch_pipename, hch_err_msg);
1264 		free(hch_err_msg);
1265 		boo_retvalue = FALSE;
1266 	    }
1267 	}
1268     }
1269 
1270     /* Open existing named pipe */
1271     if ((boo_retvalue || GetLastError() == 231) &&
1272 	this->hdl_namedpipe <= (HANDLE)(LONG_PTR) 0) {
1273 	DWORD dwo_openmode = 0;
1274 	if (con->canread) dwo_openmode |= GENERIC_READ;
1275 	if (con->canwrite) dwo_openmode |= GENERIC_WRITE;
1276 	this->hdl_namedpipe =
1277 	    CreateFileA(hch_pipename, dwo_openmode,
1278 			FILE_SHARE_READ | FILE_SHARE_WRITE,
1279 			NULL, OPEN_EXISTING,
1280 			FILE_ATTRIBUTE_NORMAL | FILE_FLAG_OVERLAPPED,
1281 			NULL);
1282 	if (this->hdl_namedpipe == INVALID_HANDLE_VALUE) {
1283 	    char *hch_err_msg = win_getlasterror_str();
1284 	    warning(_("cannot open fifo '%s', reason '%s'"),
1285 		    hch_pipename, hch_err_msg);
1286 	    free(hch_err_msg);
1287 	    boo_retvalue = FALSE;
1288 	}
1289     }
1290 
1291     /* Free malloc-ed variables */
1292     free(hch_pipename);
1293 
1294     /* Finalize FIFO configuration (only if FIFO is opened/created) */
1295     if (boo_retvalue && this->hdl_namedpipe) {
1296 	con->isopen = TRUE;
1297 	con->text = strchr(con->mode, 'b') ? FALSE : TRUE;
1298 	set_iconv(con);
1299 	con->save = -1000;
1300     }
1301 
1302     /* Done */
1303     return boo_retvalue;
1304 }
1305 
fifo_close(Rconnection con)1306 static void fifo_close(Rconnection con)
1307 {
1308     Rfifoconn this = con->private;
1309     con->isopen = FALSE;
1310     con->status = CloseHandle(this->hdl_namedpipe) ? 0 : -1;
1311     if (this->overlapped_write) CloseHandle(this->overlapped_write);
1312 }
1313 
fifo_read(void * ptr,size_t size,size_t nitems,Rconnection con)1314 static size_t fifo_read(void* ptr, size_t size, size_t nitems, Rconnection con)
1315 {
1316     Rfifoconn this = con->private;
1317     size_t read_byte = 0;
1318 
1319     // avoid integer overflow
1320     if ((double)size * sizeof(wchar_t) * nitems > UINT_MAX)
1321 	error(_("too large a block specified"));
1322 
1323     wchar_t *buffer = (wchar_t*)malloc((size * sizeof(wchar_t)) * nitems);
1324     if (!buffer) error(_("allocation of fifo buffer failed"));
1325     ReadFile(this->hdl_namedpipe, buffer,
1326 	     (size * sizeof(wchar_t)) * nitems, (LPDWORD)&read_byte,
1327 	     this->overlapped_write);
1328     wcstombs(ptr, buffer, read_byte / sizeof(wchar_t));
1329     free(buffer);
1330     return (read_byte / sizeof(wchar_t)) / size;
1331 }
1332 
1333 static size_t
fifo_write(const void * ptr,size_t size,size_t nitems,Rconnection con)1334 fifo_write(const void *ptr, size_t size, size_t nitems, Rconnection con)
1335 {
1336     Rfifoconn this = con->private;
1337     size_t written_bytes = 0;
1338 
1339     if (size * sizeof(wchar_t) * nitems > UINT_MAX)
1340 	error(_("too large a block specified"));
1341 
1342     /* Wait for a client process to connect */
1343     ConnectNamedPipe(this->hdl_namedpipe, NULL);
1344 
1345     /* Convert char* to wchar_t* */
1346     int str_len = size * nitems;
1347     wchar_t *buffer = malloc((str_len + 1) * sizeof(wchar_t));
1348     if (!buffer) error(_("allocation of fifo buffer failed"));
1349     mbstowcs(buffer, (const char*) ptr, str_len);
1350 
1351     /* Write data */
1352     if (WriteFile(this->hdl_namedpipe, buffer,
1353 		  size * sizeof(wchar_t) * nitems, (LPDWORD) &written_bytes,
1354 		  NULL) == FALSE && GetLastError() != ERROR_IO_PENDING) {
1355 	char *hch_err_msg = win_getlasterror_str();
1356 	warning(_("cannot write FIFO '%s'"), hch_err_msg);
1357 	free(hch_err_msg);
1358     }
1359 
1360     /* Free data malloc-ed by windows_towchar */
1361     free(buffer);
1362 
1363     /* Done */
1364     return written_bytes / nitems;
1365 }
1366 
fifo_fgetc_internal(Rconnection con)1367 static int fifo_fgetc_internal(Rconnection con)
1368 {
1369     Rfifoconn  this = con->private;
1370     DWORD available_bytes = 0;
1371     DWORD read_byte = 0;
1372     DWORD len = 1 * sizeof(wchar_t);
1373     wchar_t c;
1374 
1375     /* Check available bytes on named pipe */
1376     PeekNamedPipe(this->hdl_namedpipe, NULL, 0, NULL, &available_bytes, NULL);
1377 
1378     /* Read char if available bytes > 0, otherwize, return R_EOF */
1379     if (available_bytes > 0) {
1380 	ReadFile(this->hdl_namedpipe, &c, len, &read_byte, NULL);
1381 	return (read_byte == len) ? (char) c : R_EOF;
1382     }
1383     return R_EOF;
1384 }
1385 
1386 #endif // WIN32
1387 
newfifo(const char * description,const char * mode)1388 static Rconnection newfifo(const char *description, const char *mode)
1389 {
1390     Rconnection new;
1391     new = (Rconnection) malloc(sizeof(struct Rconn));
1392     if(!new) error(_("allocation of fifo connection failed"));
1393     new->class = (char *) malloc(strlen("fifo") + 1);
1394     if(!new->class) {
1395 	free(new);
1396 	error(_("allocation of fifo connection failed"));
1397 	/* for Solaris 12.5 */ new = NULL;
1398     }
1399     strcpy(new->class, "fifo");
1400     new->description = (char *) malloc(strlen(description) + 1);
1401     if(!new->description) {
1402 	free(new->class); free(new);
1403 	error(_("allocation of fifo connection failed"));
1404 	/* for Solaris 12.5 */ new = NULL;
1405     }
1406     init_con(new, description, CE_NATIVE, mode);
1407     new->open = &fifo_open;
1408     new->close = &fifo_close;
1409     new->vfprintf = &dummy_vfprintf;
1410     new->fgetc_internal = &fifo_fgetc_internal;
1411     new->fgetc = &dummy_fgetc;
1412     new->seek = &null_seek;
1413     new->truncate = &null_truncate;
1414     new->fflush = &null_fflush;
1415     new->read = &fifo_read;
1416     new->write = &fifo_write;
1417     new->private = (void *) malloc(sizeof(struct fifoconn));
1418     if(!new->private) {
1419 	free(new->description); free(new->class); free(new);
1420 	error(_("allocation of fifo connection failed"));
1421 	/* for Solaris 12.5 */ new = NULL;
1422     }
1423     return new;
1424 }
1425 
do_fifo(SEXP call,SEXP op,SEXP args,SEXP env)1426 SEXP attribute_hidden do_fifo(SEXP call, SEXP op, SEXP args, SEXP env)
1427 {
1428 #if (defined(HAVE_MKFIFO) && defined(HAVE_FCNTL_H)) || defined(_WIN32)
1429     SEXP sfile, sopen, ans, class, enc;
1430     const char *file, *open;
1431     int ncon, block;
1432     Rconnection con = NULL;
1433 
1434     checkArity(op, args);
1435     sfile = CAR(args);
1436     if(!isString(sfile) || LENGTH(sfile) != 1 ||
1437        STRING_ELT(sfile, 0) == NA_STRING)
1438 	error(_("invalid '%s' argument"), "description");
1439     if(length(sfile) > 1)
1440 	warning(_("only first element of 'description' argument used"));
1441     file = translateCharFP(STRING_ELT(sfile, 0)); /* for now, like fopen */
1442     sopen = CADR(args);
1443     if(!isString(sopen) || LENGTH(sopen) != 1)
1444 	error(_("invalid '%s' argument"), "open");
1445     block = asLogical(CADDR(args));
1446     if(block == NA_LOGICAL)
1447 	error(_("invalid '%s' argument"), "block");
1448     enc = CADDDR(args);
1449     if(!isString(enc) || LENGTH(enc) != 1 ||
1450        strlen(CHAR(STRING_ELT(enc, 0))) > 100) /* ASCII */
1451 	error(_("invalid '%s' argument"), "encoding");
1452     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
1453     if(strlen(file) == 0) {
1454 	if(!strlen(open)) open ="w+";
1455 	if(strcmp(open, "w+") != 0 && strcmp(open, "w+b") != 0) {
1456 	    open ="w+";
1457 	    warning(_("fifo(\"\") only supports open = \"w+\" and open = \"w+b\": using the former"));
1458 	}
1459     }
1460     ncon = NextConnection();
1461     con = Connections[ncon] = newfifo(file, strlen(open) ? open : "r");
1462     con->blocking = block;
1463     strncpy(con->encname, CHAR(STRING_ELT(enc, 0)), 100); /* ASCII */
1464     con->encname[100 - 1] = '\0';
1465     con->ex_ptr = PROTECT(R_MakeExternalPtr(con->id, install("connection"), R_NilValue));
1466 
1467     /* open it if desired */
1468     if(strlen(open)) {
1469 	Rboolean success = con->open(con);
1470 	if(!success) {
1471 	    con_destroy(ncon);
1472 	    error(_("cannot open the connection"));
1473 	}
1474     }
1475 
1476     PROTECT(ans = ScalarInteger(ncon));
1477     PROTECT(class = allocVector(STRSXP, 2));
1478     SET_STRING_ELT(class, 0, mkChar("fifo"));
1479     SET_STRING_ELT(class, 1, mkChar("connection"));
1480     classgets(ans, class);
1481     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
1482     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
1483     UNPROTECT(3);
1484 
1485     return ans;
1486 #else
1487     error(_("fifo connections are not available on this system"));
1488     return R_NilValue;		/* -Wall */
1489 #endif
1490 }
1491 
1492 /* ------------------- pipe connections --------------------- */
1493 
pipe_open(Rconnection con)1494 static Rboolean pipe_open(Rconnection con)
1495 {
1496     FILE *fp;
1497     char mode[3];
1498     Rfileconn this = con->private;
1499     int mlen;
1500 
1501 #ifdef Win32
1502     strncpy(mode, con->mode, 2);
1503     mode[2] = '\0';
1504 #else
1505     mode[0] = con->mode[0];
1506     mode[1] = '\0';
1507 #endif
1508     errno = 0;
1509 #ifdef Win32
1510     if(con->enc == CE_UTF8) {
1511 	int n = strlen(con->description);
1512 	wchar_t wname[2 * (n+1)], wmode[10];
1513 	R_CheckStack();
1514 	Rf_utf8towcs(wname, con->description, n+1);
1515 	mbstowcs(wmode, con->mode, 10);
1516 	fp = _wpopen(wname, wmode);
1517 	if(!fp) {
1518 	    warning(_("cannot pipe() cmd '%ls': %s"), wname, strerror(errno));
1519 	    return FALSE;
1520 	}
1521     } else
1522 #endif
1523 	fp = R_popen(con->description, mode);
1524     if(!fp) {
1525 	warning(_("cannot open pipe() cmd '%s': %s"), con->description,
1526 		strerror(errno));
1527 	return FALSE;
1528     }
1529     this->fp = fp;
1530     con->isopen = TRUE;
1531     con->canwrite = (con->mode[0] == 'w');
1532     con->canread = !con->canwrite;
1533     mlen = (int) strlen(con->mode);
1534     if(mlen >= 2 && con->mode[mlen - 1] == 'b') con->text = FALSE;
1535     else con->text = TRUE;
1536     this->last_was_write = !con->canread;
1537     this->rpos = this->wpos = 0;
1538     set_iconv(con);
1539     con->save = -1000;
1540     return TRUE;
1541 }
1542 
pipe_close(Rconnection con)1543 static void pipe_close(Rconnection con)
1544 {
1545     con->status = pclose(((Rfileconn)(con->private))->fp);
1546     con->isopen = FALSE;
1547 }
1548 
1549 static Rconnection
newpipe(const char * description,int ienc,const char * mode)1550 newpipe(const char *description, int ienc, const char *mode)
1551 {
1552     Rconnection new;
1553     new = (Rconnection) malloc(sizeof(struct Rconn));
1554     if(!new) error(_("allocation of pipe connection failed"));
1555     new->class = (char *) malloc(strlen("pipe") + 1);
1556     if(!new->class) {
1557 	free(new);
1558 	error(_("allocation of pipe connection failed"));
1559 	/* for Solaris 12.5 */ new = NULL;
1560     }
1561     strcpy(new->class, "pipe");
1562     new->description = (char *) malloc(strlen(description) + 1);
1563     if(!new->description) {
1564 	free(new->class); free(new);
1565 	error(_("allocation of pipe connection failed"));
1566 	/* for Solaris 12.5 */ new = NULL;
1567     }
1568     init_con(new, description, ienc, mode);
1569     new->open = &pipe_open;
1570     new->close = &pipe_close;
1571     new->vfprintf = &file_vfprintf;
1572     new->fgetc_internal = &file_fgetc_internal;
1573     new->fgetc = &dummy_fgetc;
1574     new->fflush = &file_fflush;
1575     new->read = &file_read;
1576     new->write = &file_write;
1577     new->private = (void *) malloc(sizeof(struct fileconn));
1578     if(!new->private) {
1579 	free(new->description); free(new->class); free(new);
1580 	error(_("allocation of pipe connection failed"));
1581 	/* for Solaris 12.5 */ new = NULL;
1582     }
1583     return new;
1584 }
1585 
1586 #ifdef Win32
1587 extern Rconnection
1588 newWpipe(const char *description, int enc, const char *mode);
1589 #endif
1590 
do_pipe(SEXP call,SEXP op,SEXP args,SEXP env)1591 SEXP attribute_hidden do_pipe(SEXP call, SEXP op, SEXP args, SEXP env)
1592 {
1593     SEXP scmd, sopen, ans, class, enc;
1594     const char *file, *open;
1595     int ncon;
1596     cetype_t ienc = CE_NATIVE;
1597     Rconnection con = NULL;
1598 
1599     checkArity(op, args);
1600     scmd = CAR(args);
1601     if(!isString(scmd) || LENGTH(scmd) != 1 ||
1602        STRING_ELT(scmd, 0) == NA_STRING)
1603 	error(_("invalid '%s' argument"), "description");
1604     if(LENGTH(scmd) > 1)
1605 	warning(_("only first element of 'description' argument used"));
1606 #ifdef Win32
1607     if( !IS_ASCII(STRING_ELT(scmd, 0)) ) {
1608 	ienc = CE_UTF8;
1609 	file = trCharUTF8(STRING_ELT(scmd, 0));
1610     } else {
1611 	ienc = CE_NATIVE;
1612 	file = translateCharFP(STRING_ELT(scmd, 0));
1613     }
1614 #else
1615     file = translateCharFP(STRING_ELT(scmd, 0));
1616 #endif
1617     sopen = CADR(args);
1618     if(!isString(sopen) || LENGTH(sopen) != 1)
1619 	error(_("invalid '%s' argument"), "open");
1620     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
1621     enc = CADDR(args);
1622     if(!isString(enc) || LENGTH(enc) != 1 ||
1623        strlen(CHAR(STRING_ELT(enc, 0))) > 100) /* ASCII */
1624 	error(_("invalid '%s' argument"), "encoding");
1625 
1626     ncon = NextConnection();
1627 #ifdef Win32
1628     if(CharacterMode != RTerm)
1629 	con = newWpipe(file, ienc, strlen(open) ? open : "r");
1630     else
1631 #endif
1632 	con = newpipe(file, ienc, strlen(open) ? open : "r");
1633     Connections[ncon] = con;
1634     strncpy(con->encname, CHAR(STRING_ELT(enc, 0)), 100); /* ASCII */
1635     con->encname[100 - 1] = '\0';
1636     con->ex_ptr = PROTECT(R_MakeExternalPtr(con->id, install("connection"), R_NilValue));
1637 
1638     /* open it if desired */
1639     if(strlen(open)) {
1640 	Rboolean success = con->open(con);
1641 	if(!success) {
1642 	    con_destroy(ncon);
1643 	    error(_("cannot open the connection"));
1644 	}
1645     }
1646 
1647     PROTECT(ans = ScalarInteger(ncon));
1648     PROTECT(class = allocVector(STRSXP, 2));
1649     SET_STRING_ELT(class, 0, mkChar("pipe"));
1650 #ifdef Win32
1651     if(CharacterMode != RTerm)
1652 	SET_STRING_ELT(class, 0, mkChar("pipeWin32"));
1653 #endif
1654     SET_STRING_ELT(class, 1, mkChar("connection"));
1655     classgets(ans, class);
1656     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
1657     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
1658     UNPROTECT(3);
1659 
1660     return ans;
1661 }
1662 
1663 /* ------------------- [bgx]zipped file connections --------------------- */
1664 
1665 #include "gzio.h"
1666 
1667 /* needs to be declared before con_close1 */
1668 typedef struct gzconn {
1669     Rconnection con;
1670     int cp; /* compression level */
1671     z_stream s;
1672     int z_err, z_eof;
1673     uLong crc;
1674     Byte buffer[Z_BUFSIZE];
1675     int nsaved;
1676     char saved[2];
1677     Rboolean allow;
1678 } *Rgzconn;
1679 
1680 
1681 typedef struct gzfileconn {
1682     void *fp;
1683     int compress;
1684 } *Rgzfileconn;
1685 
gzfile_open(Rconnection con)1686 static Rboolean gzfile_open(Rconnection con)
1687 {
1688     gzFile fp;
1689     char mode[6];
1690     Rgzfileconn gzcon = con->private;
1691     const char *name;
1692 
1693     strcpy(mode, con->mode);
1694     /* Must open as binary */
1695     if(strchr(con->mode, 'w')) snprintf(mode, 6, "wb%1d", gzcon->compress);
1696     else if (con->mode[0] == 'a') snprintf(mode, 6, "ab%1d", gzcon->compress);
1697     else strcpy(mode, "rb");
1698     errno = 0; /* precaution */
1699     name = R_ExpandFileName(con->description);
1700     /* We cannot use isDir, because we cannot get the fd from gzFile
1701        (it would be possible with gzdopen, if supported) */
1702     if (isDirPath(name)) {
1703 	warning(_("cannot open file '%s': it is a directory"), name);
1704 	return FALSE;
1705     }
1706     fp = R_gzopen(name, mode);
1707     if(!fp) {
1708 	warning(_("cannot open compressed file '%s', probable reason '%s'"),
1709 	        name, strerror(errno));
1710 	return FALSE;
1711     }
1712     ((Rgzfileconn)(con->private))->fp = fp;
1713     con->isopen = TRUE;
1714     con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
1715     con->canread = !con->canwrite;
1716     con->text = strchr(con->mode, 'b') ? FALSE : TRUE;
1717     set_buffer(con);
1718     set_iconv(con);
1719     con->save = -1000;
1720     return TRUE;
1721 }
1722 
gzfile_close(Rconnection con)1723 static void gzfile_close(Rconnection con)
1724 {
1725     R_gzclose(((Rgzfileconn)(con->private))->fp);
1726     con->isopen = FALSE;
1727 }
1728 
gzfile_fgetc_internal(Rconnection con)1729 static int gzfile_fgetc_internal(Rconnection con)
1730 {
1731     gzFile fp = ((Rgzfileconn)(con->private))->fp;
1732     unsigned char c;
1733 
1734     return R_gzread(fp, &c, 1) == 1 ? c : R_EOF;
1735 }
1736 
1737 /* This can only seek forwards when writing (when it writes nul bytes).
1738    When reading, it either seeks forwards or rewinds and reads again */
gzfile_seek(Rconnection con,double where,int origin,int rw)1739 static double gzfile_seek(Rconnection con, double where, int origin, int rw)
1740 {
1741     gzFile  fp = ((Rgzfileconn)(con->private))->fp;
1742     Rz_off_t pos = R_gztell(fp);
1743     int res, whence = SEEK_SET;
1744 
1745     if (ISNA(where)) return (double) pos;
1746 
1747     switch(origin) {
1748     case 2: whence = SEEK_CUR; break;
1749     case 3: error(_("whence = \"end\" is not implemented for gzfile connections"));
1750     default: whence = SEEK_SET;
1751     }
1752     res = R_gzseek(fp, (z_off_t) where, whence);
1753     if(res == -1)
1754 	warning(_("seek on a gzfile connection returned an internal error"));
1755     return (double) pos;
1756 }
1757 
gzfile_fflush(Rconnection con)1758 static int gzfile_fflush(Rconnection con)
1759 {
1760     return 0;
1761 }
1762 
gzfile_read(void * ptr,size_t size,size_t nitems,Rconnection con)1763 static size_t gzfile_read(void *ptr, size_t size, size_t nitems,
1764 			Rconnection con)
1765 {
1766     gzFile fp = ((Rgzfileconn)(con->private))->fp;
1767     /* uses 'unsigned' for len */
1768     if ((double) size * (double) nitems > UINT_MAX)
1769 	error(_("too large a block specified"));
1770     return R_gzread(fp, ptr, (unsigned int)(size*nitems))/size;
1771 }
1772 
gzfile_write(const void * ptr,size_t size,size_t nitems,Rconnection con)1773 static size_t gzfile_write(const void *ptr, size_t size, size_t nitems,
1774 			   Rconnection con)
1775 {
1776     gzFile fp = ((Rgzfileconn)(con->private))->fp;
1777     /* uses 'unsigned' for len */
1778     if ((double) size * (double) nitems > UINT_MAX)
1779 	error(_("too large a block specified"));
1780     return R_gzwrite(fp, (voidp)ptr, (unsigned int)(size*nitems))/size;
1781 }
1782 
newgzfile(const char * description,const char * mode,int compress)1783 static Rconnection newgzfile(const char *description, const char *mode,
1784 			     int compress)
1785 {
1786     Rconnection new;
1787     new = (Rconnection) malloc(sizeof(struct Rconn));
1788     if(!new) error(_("allocation of gzfile connection failed"));
1789     new->class = (char *) malloc(strlen("gzfile") + 1);
1790     if(!new->class) {
1791 	free(new);
1792 	error(_("allocation of gzfile connection failed"));
1793 	/* for Solaris 12.5 */ new = NULL;
1794     }
1795     strcpy(new->class, "gzfile");
1796     new->description = (char *) malloc(strlen(description) + 1);
1797     if(!new->description) {
1798 	free(new->class); free(new);
1799 	error(_("allocation of gzfile connection failed"));
1800 	/* for Solaris 12.5 */ new = NULL;
1801     }
1802     init_con(new, description, CE_NATIVE, mode);
1803 
1804     new->canseek = TRUE;
1805     new->open = &gzfile_open;
1806     new->close = &gzfile_close;
1807     new->vfprintf = &dummy_vfprintf;
1808     new->fgetc_internal = &gzfile_fgetc_internal;
1809     new->fgetc = &dummy_fgetc;
1810     new->seek = &gzfile_seek;
1811     new->fflush = &gzfile_fflush;
1812     new->read = &gzfile_read;
1813     new->write = &gzfile_write;
1814     new->private = (void *) malloc(sizeof(struct gzfileconn));
1815     if(!new->private) {
1816 	free(new->description); free(new->class); free(new);
1817 	error(_("allocation of gzfile connection failed"));
1818 	/* for Solaris 12.5 */ new = NULL;
1819     }
1820     ((Rgzfileconn)new->private)->compress = compress;
1821     return new;
1822 }
1823 
1824 #include <bzlib.h>
1825 typedef struct bzfileconn {
1826     FILE *fp;
1827     BZFILE *bfp;
1828     int compress;
1829 } *Rbzfileconn;
1830 
bzfile_open(Rconnection con)1831 static Rboolean bzfile_open(Rconnection con)
1832 {
1833     Rbzfileconn bz = (Rbzfileconn) con->private;
1834     FILE* fp;
1835     BZFILE* bfp;
1836     int bzerror;
1837     char mode[] = "rb";
1838     const char *name;
1839 
1840     con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
1841     con->canread = !con->canwrite;
1842     /* regardless of the R view of the file, the file must be opened in
1843        binary mode where it matters */
1844     mode[0] = con->mode[0];
1845     errno = 0; /* precaution */
1846     name = R_ExpandFileName(con->description);
1847     fp = R_fopen(name, mode);
1848     if(!fp) {
1849 	warning(_("cannot open bzip2-ed file '%s', probable reason '%s'"),
1850 		name, strerror(errno));
1851 	return FALSE;
1852     }
1853     if (isDir(fp)) {
1854 	warning(_("cannot open file '%s': it is a directory"), name);
1855 	fclose(fp);
1856 	return FALSE;
1857     }
1858     if(con->canread) {
1859 	bfp = BZ2_bzReadOpen(&bzerror, fp, 0, 0, NULL, 0);
1860 	if(bzerror != BZ_OK) {
1861 	    BZ2_bzReadClose(&bzerror, bfp);
1862 	    fclose(fp);
1863 	    warning(_("file '%s' appears not to be compressed by bzip2"),
1864 		    R_ExpandFileName(con->description));
1865 	    return FALSE;
1866 	}
1867     } else {
1868 	bfp = BZ2_bzWriteOpen(&bzerror, fp, bz->compress, 0, 0);
1869 	if(bzerror != BZ_OK) {
1870 	    BZ2_bzWriteClose(&bzerror, bfp, 0, NULL, NULL);
1871 	    fclose(fp);
1872 	    warning(_("initializing bzip2 compression for file '%s' failed"),
1873 		    R_ExpandFileName(con->description));
1874 	    return FALSE;
1875 	}
1876     }
1877     bz->fp = fp;
1878     bz->bfp = bfp;
1879     con->isopen = TRUE;
1880     con->text = strchr(con->mode, 'b') ? FALSE : TRUE;
1881     set_buffer(con);
1882     set_iconv(con);
1883     con->save = -1000;
1884     return TRUE;
1885 }
1886 
bzfile_close(Rconnection con)1887 static void bzfile_close(Rconnection con)
1888 {
1889     int bzerror;
1890     Rbzfileconn bz = con->private;
1891 
1892     if(con->canread)
1893 	BZ2_bzReadClose(&bzerror, bz->bfp);
1894     else
1895 	BZ2_bzWriteClose(&bzerror, bz->bfp, 0, NULL, NULL);
1896     fclose(bz->fp);
1897     con->isopen = FALSE;
1898 }
1899 
bzfile_read(void * ptr,size_t size,size_t nitems,Rconnection con)1900 static size_t bzfile_read(void *ptr, size_t size, size_t nitems,
1901 			  Rconnection con)
1902 {
1903     Rbzfileconn bz = con->private;
1904     int nread = 0,  nleft;
1905     int bzerror;
1906 
1907     /* BZ2 uses 'int' for len */
1908     if ((double) size * (double) nitems > INT_MAX)
1909 	error(_("too large a block specified"));
1910 
1911     nleft = (int)(size * nitems);
1912     /* we try to fill the buffer, because fgetc can interact with the stream boundaries
1913        resulting in truncated text streams while binary streams work fine */
1914     while (nleft > 0) {
1915 	/* Need a cast as 'nread' needs to be interpreted in bytes */
1916 	int n = BZ2_bzRead(&bzerror, bz->bfp, (char *)ptr + nread, nleft);
1917 	if (bzerror == BZ_STREAM_END) { /* this could mean multiple streams so we need to check */
1918 	    char *unused, *next_unused = NULL;
1919 	    int nUnused;
1920 	    BZ2_bzReadGetUnused(&bzerror, bz->bfp, (void**) &unused, &nUnused);
1921 	    if (bzerror == BZ_OK) {
1922 		if (nUnused > 0) { /* unused bytes present - need to retain them */
1923 		    /* given that this should be rare I don't want to add that overhead
1924 		       to the entire bz structure so we allocate memory temporarily */
1925 		    next_unused = (char*) malloc(nUnused);
1926 		    if (!next_unused)
1927 			error(_("allocation of overflow buffer for bzfile failed"));
1928 		    memcpy(next_unused, unused, nUnused);
1929 		}
1930 		if (nUnused > 0 || !feof(bz->fp)) {
1931 		    BZ2_bzReadClose(&bzerror, bz->bfp);
1932 		    bz->bfp = BZ2_bzReadOpen(&bzerror, bz->fp, 0, 0, next_unused, nUnused);
1933 		    if(bzerror != BZ_OK)
1934 			warning(_("file '%s' has trailing content that appears not to be compressed by bzip2"),
1935 				R_ExpandFileName(con->description));
1936 		}
1937 		if (next_unused) free(next_unused);
1938 	    }
1939 	} else if (bzerror != BZ_OK) {
1940 	    /* bzlib docs say in this case n is invalid - but historically
1941 	       we still used n in that case, so I keep it for now */
1942 	    nread += n;
1943 	    break;
1944 	}
1945 	nread += n;
1946 	nleft -= n;
1947     }
1948 
1949     return nread / size;
1950 }
1951 
bzfile_fgetc_internal(Rconnection con)1952 static int bzfile_fgetc_internal(Rconnection con)
1953 {
1954     char buf[1];
1955     size_t size;
1956 
1957     size = bzfile_read(buf, 1, 1, con);
1958     return (size < 1) ? R_EOF : (buf[0] % 256);
1959 }
1960 
bzfile_write(const void * ptr,size_t size,size_t nitems,Rconnection con)1961 static size_t bzfile_write(const void *ptr, size_t size, size_t nitems,
1962 			   Rconnection con)
1963 {
1964     Rbzfileconn bz = con->private;
1965     int bzerror;
1966 
1967     /* uses 'int' for len */
1968     if ((double) size * (double) nitems > INT_MAX)
1969 	error(_("too large a block specified"));
1970     BZ2_bzWrite(&bzerror, bz->bfp, (voidp) ptr, (int)(size*nitems));
1971     if(bzerror != BZ_OK) return 0;
1972     else return nitems;
1973 }
1974 
newbzfile(const char * description,const char * mode,int compress)1975 static Rconnection newbzfile(const char *description, const char *mode,
1976 			     int compress)
1977 {
1978     Rconnection new;
1979     new = (Rconnection) malloc(sizeof(struct Rconn));
1980     if(!new) error(_("allocation of bzfile connection failed"));
1981     new->class = (char *) malloc(strlen("bzfile") + 1);
1982     if(!new->class) {
1983 	free(new);
1984 	error(_("allocation of bzfile connection failed"));
1985 	/* for Solaris 12.5 */ new = NULL;
1986     }
1987     strcpy(new->class, "bzfile");
1988     new->description = (char *) malloc(strlen(description) + 1);
1989     if(!new->description) {
1990 	free(new->class); free(new);
1991 	error(_("allocation of bzfile connection failed"));
1992 	/* for Solaris 12.5 */ new = NULL;
1993     }
1994     init_con(new, description, CE_NATIVE, mode);
1995 
1996     new->canseek = FALSE;
1997     new->open = &bzfile_open;
1998     new->close = &bzfile_close;
1999     new->vfprintf = &dummy_vfprintf;
2000     new->fgetc_internal = &bzfile_fgetc_internal;
2001     new->fgetc = &dummy_fgetc;
2002     new->seek = &null_seek;
2003     new->fflush = &null_fflush;
2004     new->read = &bzfile_read;
2005     new->write = &bzfile_write;
2006     new->private = (void *) malloc(sizeof(struct bzfileconn));
2007     if(!new->private) {
2008 	free(new->description); free(new->class); free(new);
2009 	error(_("allocation of bzfile connection failed"));
2010 	/* for Solaris 12.5 */ new = NULL;
2011     }
2012     ((Rbzfileconn)new->private)->compress = compress;
2013     return new;
2014 }
2015 
2016 #include <lzma.h>
2017 
2018 typedef struct xzfileconn {
2019     FILE *fp;
2020     lzma_stream stream;
2021     lzma_action action;
2022     int compress;
2023     int type;
2024     lzma_filter filters[2];
2025     lzma_options_lzma opt_lzma;
2026     unsigned char buf[BUFSIZE];
2027 } *Rxzfileconn;
2028 
xzfile_open(Rconnection con)2029 static Rboolean xzfile_open(Rconnection con)
2030 {
2031     Rxzfileconn xz = con->private;
2032     lzma_ret ret;
2033     char mode[] = "rb";
2034     const char *name;
2035 
2036     con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
2037     con->canread = !con->canwrite;
2038     /* regardless of the R view of the file, the file must be opened in
2039        binary mode where it matters */
2040     mode[0] = con->mode[0];
2041     errno = 0; /* precaution */
2042     name = R_ExpandFileName(con->description);
2043     xz->fp = R_fopen(name, mode);
2044     if(!xz->fp) {
2045 	warning(_("cannot open compressed file '%s', probable reason '%s'"),
2046 		name, strerror(errno));
2047 	return FALSE;
2048     }
2049     if (isDir(xz->fp)) {
2050 	warning(_("cannot open file '%s': it is a directory"), name);
2051 	fclose(xz->fp);
2052 	return FALSE;
2053     }
2054     if(con->canread) {
2055 	xz->action = LZMA_RUN;
2056 	/* probably about 80Mb is required, but 512Mb seems OK as a limit */
2057 	if (xz->type == 1)
2058 	    ret = lzma_alone_decoder(&xz->stream, 536870912);
2059 	else
2060 	    ret = lzma_stream_decoder(&xz->stream, 536870912,
2061 				      LZMA_CONCATENATED);
2062 	if (ret != LZMA_OK) {
2063 	    warning(_("cannot initialize lzma decoder, error %d"), ret);
2064 	    return FALSE;
2065 	}
2066 	xz->stream.avail_in = 0;
2067     } else {
2068 	lzma_stream *strm = &xz->stream;
2069 	uint32_t preset_number = abs(xz->compress);
2070 	if(xz->compress < 0) preset_number |= LZMA_PRESET_EXTREME;
2071 	if(lzma_lzma_preset(&xz->opt_lzma, preset_number))
2072 	    error("problem setting presets");
2073 	xz->filters[0].id = LZMA_FILTER_LZMA2;
2074 	xz->filters[0].options = &(xz->opt_lzma);
2075 	xz->filters[1].id = LZMA_VLI_UNKNOWN;
2076 
2077 	ret = lzma_stream_encoder(strm, xz->filters, LZMA_CHECK_CRC32);
2078 	if (ret != LZMA_OK) {
2079 	    warning(_("cannot initialize lzma encoder, error %d"), ret);
2080 	    return FALSE;
2081 	}
2082     }
2083     con->isopen = TRUE;
2084     con->text = strchr(con->mode, 'b') ? FALSE : TRUE;
2085     set_buffer(con);
2086     set_iconv(con);
2087     con->save = -1000;
2088     return TRUE;
2089 }
2090 
xzfile_close(Rconnection con)2091 static void xzfile_close(Rconnection con)
2092 {
2093     Rxzfileconn xz = con->private;
2094 
2095     if(con->canwrite) {
2096 	lzma_ret ret;
2097 	lzma_stream *strm = &(xz->stream);
2098 	size_t nout, res;
2099 	unsigned char buf[BUFSIZE];
2100 	while(1) {
2101 	    strm->avail_out = BUFSIZE; strm->next_out = buf;
2102 	    ret = lzma_code(strm, LZMA_FINISH);
2103 	    nout = BUFSIZE - strm->avail_out;
2104 	    res = fwrite(buf, 1, nout, xz->fp);
2105 	    if (res != nout) error("fwrite error");
2106 	    if (ret != LZMA_OK) break;
2107 	}
2108     }
2109     lzma_end(&(xz->stream));
2110     fclose(xz->fp);
2111     con->isopen = FALSE;
2112 }
2113 
xzfile_read(void * ptr,size_t size,size_t nitems,Rconnection con)2114 static size_t xzfile_read(void *ptr, size_t size, size_t nitems,
2115 			  Rconnection con)
2116 {
2117     Rxzfileconn xz = con->private;
2118     lzma_stream *strm = &(xz->stream);
2119     lzma_ret ret;
2120     size_t s = size*nitems, have, given = 0;
2121     unsigned char *p = ptr;
2122 
2123     if (!s) return 0;
2124 
2125     while(1) {
2126 	if (strm->avail_in == 0 && xz->action != LZMA_FINISH) {
2127 	    strm->next_in = xz->buf;
2128 	    strm->avail_in = fread(xz->buf, 1, BUFSIZ, xz->fp);
2129 	    if (feof(xz->fp)) xz->action = LZMA_FINISH;
2130 	}
2131 	strm->avail_out = s; strm->next_out = p;
2132 	ret = lzma_code(strm, xz->action);
2133 	have = s - strm->avail_out;  given += have;
2134 	//printf("available: %d, ready: %d/%d\n", strm->avail_in, given, s);
2135 	if (ret != LZMA_OK) {
2136 	    if (ret != LZMA_STREAM_END) {
2137 		switch(ret) {
2138 		case LZMA_MEM_ERROR:
2139 		case LZMA_MEMLIMIT_ERROR:
2140 		    warning("lzma decoder needed more memory");
2141 		    break;
2142 		case LZMA_FORMAT_ERROR:
2143 		    warning("lzma decoder format error");
2144 		    break;
2145 		case LZMA_DATA_ERROR:
2146 		    warning("lzma decoder corrupt data");
2147 		    break;
2148 		default:
2149 		    warning("lzma decoding result %d", ret);
2150 		}
2151 	    }
2152 	    return given/size;
2153 	}
2154 	s -= have;
2155 	if (!s) return nitems;
2156 	p += have;
2157     }
2158 }
2159 
xzfile_fgetc_internal(Rconnection con)2160 static int xzfile_fgetc_internal(Rconnection con)
2161 {
2162     char buf[1];
2163     size_t size = xzfile_read(buf, 1, 1, con);
2164 
2165     return (size < 1) ? R_EOF : (buf[0] % 256);
2166 }
2167 
2168 
xzfile_write(const void * ptr,size_t size,size_t nitems,Rconnection con)2169 static size_t xzfile_write(const void *ptr, size_t size, size_t nitems,
2170 			   Rconnection con)
2171 {
2172     Rxzfileconn xz = con->private;
2173     lzma_stream *strm = &(xz->stream);
2174     lzma_ret ret;
2175     size_t s = size*nitems, nout, res;
2176     const unsigned char *p = ptr;
2177     unsigned char buf[BUFSIZE];
2178 
2179     if (!s) return 0;
2180 
2181     strm->avail_in = s;
2182     strm->next_in = p;
2183     while(1) {
2184 	strm->avail_out = BUFSIZE; strm->next_out = buf;
2185 	ret = lzma_code(strm, LZMA_RUN);
2186 	if (ret > 1) {
2187 	    switch(ret) {
2188 	    case LZMA_MEM_ERROR:
2189 		warning("lzma encoder needed more memory");
2190 		break;
2191 	    default:
2192 		warning("lzma encoding result %d", ret);
2193 	    }
2194 	    return 0;
2195 	}
2196 	nout = BUFSIZE - strm->avail_out;
2197 	res = fwrite(buf, 1, nout, xz->fp);
2198 	if (res != nout) error("fwrite error");
2199 	if (strm->avail_in == 0) return nitems;
2200     }
2201 }
2202 
2203 static Rconnection
newxzfile(const char * description,const char * mode,int type,int compress)2204 newxzfile(const char *description, const char *mode, int type, int compress)
2205 {
2206     Rconnection new;
2207     new = (Rconnection) malloc(sizeof(struct Rconn));
2208     if(!new) error(_("allocation of xzfile connection failed"));
2209     new->class = (char *) malloc(strlen("xzfile") + 1);
2210     if(!new->class) {
2211 	free(new);
2212 	error(_("allocation of xzfile connection failed"));
2213 	/* for Solaris 12.5 */ new = NULL;
2214     }
2215     strcpy(new->class, "xzfile");
2216     new->description = (char *) malloc(strlen(description) + 1);
2217     if(!new->description) {
2218 	free(new->class); free(new);
2219 	error(_("allocation of xzfile connection failed"));
2220 	/* for Solaris 12.5 */ new = NULL;
2221     }
2222     init_con(new, description, CE_NATIVE, mode);
2223 
2224     new->canseek = FALSE;
2225     new->open = &xzfile_open;
2226     new->close = &xzfile_close;
2227     new->vfprintf = &dummy_vfprintf;
2228     new->fgetc_internal = &xzfile_fgetc_internal;
2229     new->fgetc = &dummy_fgetc;
2230     new->seek = &null_seek;
2231     new->fflush = &null_fflush;
2232     new->read = &xzfile_read;
2233     new->write = &xzfile_write;
2234     new->private = (void *) malloc(sizeof(struct xzfileconn));
2235     memset(new->private, 0, sizeof(struct xzfileconn));
2236     if(!new->private) {
2237 	free(new->description); free(new->class); free(new);
2238 	error(_("allocation of xzfile connection failed"));
2239 	/* for Solaris 12.5 */ new = NULL;
2240     }
2241     ((Rxzfileconn) new->private)->type = type;
2242     ((Rxzfileconn) new->private)->compress = compress;
2243     return new;
2244 }
2245 
2246 /* op 0 is gzfile, 1 is bzfile, 2 is xv/lzma */
do_gzfile(SEXP call,SEXP op,SEXP args,SEXP env)2247 SEXP attribute_hidden do_gzfile(SEXP call, SEXP op, SEXP args, SEXP env)
2248 {
2249     SEXP sfile, sopen, ans, class, enc;
2250     const char *file, *open;
2251     int ncon, compress = 9;
2252     Rconnection con = NULL;
2253     int type = PRIMVAL(op);
2254     int subtype = 0;
2255 
2256     checkArity(op, args);
2257     sfile = CAR(args);
2258     if(!isString(sfile) || LENGTH(sfile) != 1 ||
2259        STRING_ELT(sfile, 0) == NA_STRING)
2260 	error(_("invalid '%s' argument"), "description");
2261     if(LENGTH(sfile) > 1)
2262 	warning(_("only first element of 'description' argument used"));
2263     file = translateCharFP(STRING_ELT(sfile, 0));
2264     sopen = CADR(args);
2265     if(!isString(sopen) || LENGTH(sopen) != 1)
2266 	error(_("invalid '%s' argument"), "open");
2267     enc = CADDR(args);
2268     if(!isString(enc) || LENGTH(enc) != 1 ||
2269        strlen(CHAR(STRING_ELT(enc, 0))) > 100) /* ASCII */
2270 	error(_("invalid '%s' argument"), "encoding");
2271     if(type < 2) {
2272 	compress = asInteger(CADDDR(args));
2273 	if(compress == NA_LOGICAL || compress < 0 || compress > 9)
2274 	    error(_("invalid '%s' argument"), "compress");
2275     }
2276     if(type == 2) {
2277 	compress = asInteger(CADDDR(args));
2278 	if(compress == NA_LOGICAL || abs(compress) > 9)
2279 	    error(_("invalid '%s' argument"), "compress");
2280     }
2281     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
2282     if (type == 0 && (!open[0] || open[0] == 'r')) {
2283 	/* check magic no */
2284 	FILE *fp = fopen(R_ExpandFileName(file), "rb");
2285 	char buf[7];
2286 	if (fp) {
2287 	    size_t res;
2288 	    memset(buf, 0, 7); res = fread(buf, 5, 1, fp); fclose(fp);
2289 	    if(res == 1) {
2290 		if(!strncmp(buf, "BZh", 3)) type = 1;
2291 		if((buf[0] == '\xFD') && !strncmp(buf+1, "7zXZ", 4)) type = 2;
2292 		if((buf[0] == '\xFF') && !strncmp(buf+1, "LZMA", 4)) {
2293 		    type = 2; subtype = 1;
2294 		}
2295 		if(!memcmp(buf, "]\0\0\200\0", 5)) {
2296 		    type = 2; subtype = 1;
2297 		}
2298 		if((buf[0] == '\x89') && !strncmp(buf+1, "LZO", 3))
2299 		    error(_("this is a %s-compressed file which this build of R does not support"), "lzop");
2300 	    }
2301 	}
2302     }
2303     switch(type) {
2304     case 0:
2305 	con = newgzfile(file, strlen(open) ? open : "rb", compress);
2306 	break;
2307     case 1:
2308 	con = newbzfile(file, strlen(open) ? open : "rb", compress);
2309 	break;
2310     case 2:
2311 	con = newxzfile(file, strlen(open) ? open : "rb", subtype, compress);
2312 	break;
2313     }
2314     ncon = NextConnection();
2315     Connections[ncon] = con;
2316     con->blocking = TRUE;
2317     strncpy(con->encname, CHAR(STRING_ELT(enc, 0)), 100); /* ASCII */
2318     con->encname[100 - 1] = '\0';
2319 
2320     /* see the comment in do_url */
2321     if (con->encname[0] && !streql(con->encname, "native.enc"))
2322 	con->canseek = 0;
2323     con->ex_ptr = PROTECT(R_MakeExternalPtr(con->id, install("connection"), R_NilValue));
2324 
2325     /* open it if desired */
2326     if(strlen(open)) {
2327 	Rboolean success = con->open(con);
2328 	if(!success) {
2329 	    con_destroy(ncon);
2330 	    error(_("cannot open the connection"));
2331 	}
2332     }
2333 
2334     PROTECT(ans = ScalarInteger(ncon));
2335     PROTECT(class = allocVector(STRSXP, 2));
2336     switch(type) {
2337     case 0:
2338 	SET_STRING_ELT(class, 0, mkChar("gzfile"));
2339 	break;
2340     case 1:
2341 	SET_STRING_ELT(class, 0, mkChar("bzfile"));
2342 	break;
2343     case 2:
2344 	SET_STRING_ELT(class, 0, mkChar("xzfile"));
2345 	break;
2346     }
2347     SET_STRING_ELT(class, 1, mkChar("connection"));
2348     classgets(ans, class);
2349     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
2350     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
2351     UNPROTECT(3);
2352 
2353     return ans;
2354 }
2355 
2356 /* ------------------- clipboard connections --------------------- */
2357 
2358 #ifdef Win32
2359 # define WIN32_LEAN_AND_MEAN 1
2360 #include <windows.h>
2361 extern int GA_clipboardhastext(void); /* from ga.h */
2362 #endif
2363 
2364 #ifdef Unix
2365 Rboolean R_ReadClipboard(Rclpconn clpcon, char *type);
2366 #endif
2367 
clp_open(Rconnection con)2368 static Rboolean clp_open(Rconnection con)
2369 {
2370     Rclpconn this = con->private;
2371 
2372     con->isopen = TRUE;
2373     con->canwrite = (con->mode[0] == 'w' || con->mode[0] == 'a');
2374     con->canread = !con->canwrite;
2375     this->pos = 0;
2376     if(con->canread) {
2377 	/* copy the clipboard contents now */
2378 #ifdef Win32
2379 	HGLOBAL hglb;
2380 	char *pc;
2381 	if(GA_clipboardhastext() &&
2382 	   OpenClipboard(NULL) &&
2383 	   (hglb = GetClipboardData(CF_TEXT)) &&
2384 	   (pc = (char *)GlobalLock(hglb))) {
2385 	    int len = (int) strlen(pc);  // will be fairly small
2386 	    this->buff = (char *)malloc(len + 1);
2387 	    this->last = this->len = len;
2388 	    if(this->buff) {
2389 		strcpy(this->buff, pc);
2390 		GlobalUnlock(hglb);
2391 		CloseClipboard();
2392 	    } else {
2393 		GlobalUnlock(hglb);
2394 		CloseClipboard();
2395 		this->buff = NULL; this->last = this->len = 0;
2396 		warning(_("memory allocation to copy clipboard failed"));
2397 		return FALSE;
2398 	    }
2399 	} else {
2400 	    this->buff = NULL; this->last = this->len = 0;
2401 	    warning(_("clipboard cannot be opened or contains no text"));
2402 	    return FALSE;
2403 	}
2404 #else
2405 	Rboolean res = R_ReadClipboard(this, con->description);
2406 	if(!res) return FALSE;
2407 #endif
2408     } else {
2409 	int len = (this->sizeKB)*1024;
2410 	this->buff = (char *) malloc(len + 1);
2411 	if(!this->buff) {
2412 	    warning(_("memory allocation to open clipboard failed"));
2413 	    return FALSE;
2414 	}
2415 	this->len = len;
2416 	this->last = 0;
2417     }
2418     con->text = TRUE;
2419     /* Not calling set_buffer(con) as the data is already buffered */
2420     set_iconv(con);
2421     con->save = -1000;
2422     this->warned = FALSE;
2423 
2424     return TRUE;
2425 }
2426 
clp_writeout(Rconnection con)2427 static void clp_writeout(Rconnection con)
2428 {
2429 #ifdef Win32
2430     Rclpconn this = con->private;
2431 
2432     HGLOBAL hglb;
2433     char *s, *p;
2434     if ( (hglb = GlobalAlloc(GHND, this->len)) &&
2435 	 (s = (char *)GlobalLock(hglb)) ) {
2436 	p = this->buff;
2437 	while(p < this->buff + this->pos) *s++ = *p++;
2438 	*s = '\0';
2439 	GlobalUnlock(hglb);
2440 	if (!OpenClipboard(NULL) || !EmptyClipboard()) {
2441 	    warning(_("unable to open the clipboard"));
2442 	    GlobalFree(hglb);
2443 	} else {
2444 	    if(!SetClipboardData(CF_TEXT, hglb)) {
2445 		warning(_("unable to write to the clipboard"));
2446 		GlobalFree(hglb);
2447 	    }
2448 	    CloseClipboard();
2449 	}
2450     }
2451 #endif
2452 }
2453 
clp_close(Rconnection con)2454 static void clp_close(Rconnection con)
2455 {
2456     Rclpconn this = con->private;
2457 
2458     con->isopen = FALSE;
2459     if(con->canwrite)
2460 	clp_writeout(con);
2461     if(this-> buff) free(this->buff);
2462 }
2463 
clp_fgetc_internal(Rconnection con)2464 static int clp_fgetc_internal(Rconnection con)
2465 {
2466     Rclpconn this = con->private;
2467 
2468     if (this->pos >= this->len) return R_EOF;
2469     return this->buff[this->pos++];
2470 }
2471 
clp_seek(Rconnection con,double where,int origin,int rw)2472 static double clp_seek(Rconnection con, double where, int origin, int rw)
2473 {
2474     Rclpconn this = con->private;
2475     int newpos, oldpos = this->pos;
2476 
2477     if(ISNA(where)) return oldpos;
2478 
2479     switch(origin) {
2480     case 2: newpos = this->pos + (int) where; break;
2481     case 3: newpos = this->last + (int) where; break;
2482     default: newpos = (int) where;
2483     }
2484     if(newpos < 0 || newpos >= this->last)
2485 	error(_("attempt to seek outside the range of the clipboard"));
2486     else this->pos = newpos;
2487 
2488     return (double) oldpos;
2489 }
2490 
clp_truncate(Rconnection con)2491 static void clp_truncate(Rconnection con)
2492 {
2493     Rclpconn this = con->private;
2494 
2495     if(!con->isopen || !con->canwrite)
2496 	error(_("can only truncate connections open for writing"));
2497     this->last = this->pos;
2498 }
2499 
clp_fflush(Rconnection con)2500 static int clp_fflush(Rconnection con)
2501 {
2502     if(!con->isopen || !con->canwrite) return 1;
2503     clp_writeout(con);
2504     return 0;
2505 }
2506 
clp_read(void * ptr,size_t size,size_t nitems,Rconnection con)2507 static size_t clp_read(void *ptr, size_t size, size_t nitems,
2508 			Rconnection con)
2509 {
2510     Rclpconn this = con->private;
2511     int available = this->len - this->pos, request = (int)(size*nitems), used;
2512     if ((double) size * (double) nitems > INT_MAX)
2513 	error(_("too large a block specified"));
2514     used = (request < available) ? request : available;
2515     strncpy(ptr, this->buff + this->pos, used);
2516     this->pos += used;
2517     return (size_t) used/size;
2518 }
2519 
clp_write(const void * ptr,size_t size,size_t nitems,Rconnection con)2520 static size_t clp_write(const void *ptr, size_t size, size_t nitems,
2521 			 Rconnection con)
2522 {
2523     Rclpconn this = con->private;
2524     int i, len = (int)(size * nitems), used = 0;
2525     char c, *p = (char *) ptr, *q = this->buff + this->pos;
2526 
2527     if(!con->canwrite)
2528 	error(_("clipboard connection is open for reading only"));
2529     if ((double) size * (double) nitems > INT_MAX)
2530 	error(_("too large a block specified"));
2531 
2532     for(i = 0; i < len; i++) {
2533 	if(this->pos >= this->len) break;
2534 	c = *p++;
2535 #ifdef Win32
2536     /* clipboard requires CRLF termination */
2537 	if(c == '\n') {
2538 	    *q++ = '\r';
2539 	    this->pos++;
2540 	    if(this->pos >= this->len) break;
2541 	}
2542 #endif
2543 	*q++ = c;
2544 	this->pos++;
2545 	used++;
2546     }
2547     if (used < len && !this->warned) {
2548 	warning(_("clipboard buffer is full and output lost"));
2549 	this->warned = TRUE;
2550     }
2551     if(this->last < this->pos) this->last = this->pos;
2552     return (size_t) used/size;
2553 }
2554 
newclp(const char * url,const char * inmode)2555 static Rconnection newclp(const char *url, const char *inmode)
2556 {
2557     Rconnection new;
2558     const char *description;
2559     int sizeKB = 32;
2560     char mode[4];
2561 
2562     mode[3] = '\0';
2563     strncpy(mode, inmode, 3);
2564 
2565     if(strlen(mode) == 2 && mode[1] == 't') mode[1] = '\0';
2566 
2567     if(strlen(mode) != 1 ||
2568        (mode[0] != 'r' && mode[0] != 'w'))
2569 	error(_("'mode' for the clipboard must be 'r' or 'w'"));
2570 #ifdef Unix
2571     if(mode[0] != 'r')
2572 	error(_("'mode' for the clipboard must be 'r' on Unix"));
2573 #endif
2574     new = (Rconnection) malloc(sizeof(struct Rconn));
2575     if(!new) error(_("allocation of clipboard connection failed"));
2576     if(strncmp(url, "clipboard", 9) == 0) description = "clipboard";
2577     else description = url;
2578     new->class = (char *) malloc(strlen(description) + 1);
2579     if(!new->class) {
2580 	free(new);
2581 	error(_("allocation of clipboard connection failed"));
2582 	/* for Solaris 12.5 */ new = NULL;
2583     }
2584     strcpy(new->class, description);
2585     new->description = (char *) malloc(strlen(description) + 1);
2586     if(!new->description) {
2587 	free(new->class); free(new);
2588 	error(_("allocation of clipboard connection failed"));
2589 	/* for Solaris 12.5 */ new = NULL;
2590     }
2591     init_con(new, description, CE_NATIVE, mode);
2592     new->open = &clp_open;
2593     new->close = &clp_close;
2594     new->vfprintf = &dummy_vfprintf;
2595     new->fgetc_internal = &clp_fgetc_internal;
2596     new->fgetc = &dummy_fgetc;
2597     new->seek = &clp_seek;
2598     new->truncate = &clp_truncate;
2599     new->fflush = &clp_fflush;
2600     new->read = &clp_read;
2601     new->write = &clp_write;
2602     new->canseek = TRUE;
2603     new->private = (void *) malloc(sizeof(struct clpconn));
2604     if(!new->private) {
2605 	free(new->description); free(new->class); free(new);
2606 	error(_("allocation of clipboard connection failed"));
2607 	/* for Solaris 12.5 */ new = NULL;
2608     }
2609     ((Rclpconn)new->private)->buff = NULL;
2610     if (strncmp(url, "clipboard-", 10) == 0) {
2611 	sizeKB = atoi(url+10);
2612 	if(sizeKB < 32) sizeKB = 32;
2613 	/* Rprintf("setting clipboard size to %dKB\n", sizeKB); */
2614     }
2615     ((Rclpconn)new->private)->sizeKB = sizeKB;
2616     return new;
2617 }
2618 
2619 /* ------------------- terminal connections --------------------- */
2620 
2621 static unsigned char  ConsoleBuf[CONSOLE_BUFFER_SIZE+1];
2622 static unsigned char *ConsoleBufp;
2623 static int  ConsoleBufCnt;
2624 
ConsoleGetchar(void)2625 static int ConsoleGetchar(void)
2626 {
2627     if (--ConsoleBufCnt < 0) {
2628 	ConsoleBuf[CONSOLE_BUFFER_SIZE] = '\0';
2629 	if (R_ReadConsole("", ConsoleBuf, CONSOLE_BUFFER_SIZE, 0) == 0) {
2630 	    R_ClearerrConsole();
2631 	    return R_EOF;
2632 	}
2633 	ConsoleBufp = ConsoleBuf;
2634 	ConsoleBufCnt = (int) strlen((char *)ConsoleBuf); // must be short
2635 	ConsoleBufCnt--;
2636     }
2637     return *ConsoleBufp++;
2638 }
2639 
stdin_fgetc(Rconnection con)2640 static int stdin_fgetc(Rconnection con)
2641 {
2642     return ConsoleGetchar();
2643 }
2644 
stdout_vfprintf(Rconnection con,const char * format,va_list ap)2645 static int stdout_vfprintf(Rconnection con, const char *format, va_list ap)
2646 {
2647     if(R_Outputfile) vfprintf(R_Outputfile, format, ap);
2648     else Rcons_vprintf(format, ap);
2649     return 0;
2650 }
2651 
stdout_fflush(Rconnection con)2652 static int stdout_fflush(Rconnection con)
2653 {
2654     if(R_Outputfile) return fflush(R_Outputfile);
2655     return 0;
2656 }
2657 
stderr_vfprintf(Rconnection con,const char * format,va_list ap)2658 static int stderr_vfprintf(Rconnection con, const char *format, va_list ap)
2659 {
2660     REvprintf(format, ap);
2661     return 0;
2662 }
2663 
stderr_fflush(Rconnection con)2664 static int stderr_fflush(Rconnection con)
2665 {
2666     /* normally stderr and hence unbuffered, but it needs not be,
2667        e.g. it is stdout on Win9x */
2668     if(R_Consolefile) return fflush(R_Consolefile);
2669     return 0;
2670 }
2671 
newterminal(const char * description,const char * mode)2672 static Rconnection newterminal(const char *description, const char *mode)
2673 {
2674     Rconnection new;
2675     new = (Rconnection) malloc(sizeof(struct Rconn));
2676     if(!new) error(_("allocation of terminal connection failed"));
2677     new->class = (char *) malloc(strlen("terminal") + 1);
2678     if(!new->class) {
2679 	free(new);
2680 	error(_("allocation of terminal connection failed"));
2681 	/* for Solaris 12.5 */ new = NULL;
2682     }
2683     strcpy(new->class, "terminal");
2684     new->description = (char *) malloc(strlen(description) + 1);
2685     if(!new->description) {
2686 	free(new->class); free(new);
2687 	error(_("allocation of terminal connection failed"));
2688 	/* for Solaris 12.5 */ new = NULL;
2689     }
2690     init_con(new, description, CE_NATIVE, mode);
2691     new->isopen = TRUE;
2692     new->canread = (strcmp(mode, "r") == 0);
2693     new->canwrite = (strcmp(mode, "w") == 0);
2694     new->destroy = &null_close;
2695     new->private = NULL;
2696     return new;
2697 }
2698 
2699 
do_stdin(SEXP call,SEXP op,SEXP args,SEXP env)2700 SEXP attribute_hidden do_stdin(SEXP call, SEXP op, SEXP args, SEXP env)
2701 {
2702     SEXP ans, class;
2703     Rconnection con = getConnection(0);
2704 
2705     checkArity(op, args);
2706     PROTECT(ans = ScalarInteger(0));
2707     PROTECT(class = allocVector(STRSXP, 2));
2708     SET_STRING_ELT(class, 0, mkChar(con->class));
2709     SET_STRING_ELT(class, 1, mkChar("connection"));
2710     classgets(ans, class);
2711     UNPROTECT(2);
2712     return ans;
2713 }
2714 
do_stdout(SEXP call,SEXP op,SEXP args,SEXP env)2715 SEXP attribute_hidden do_stdout(SEXP call, SEXP op, SEXP args, SEXP env)
2716 {
2717     SEXP ans, class;
2718     Rconnection con = getConnection(R_OutputCon);
2719 
2720     checkArity(op, args);
2721     PROTECT(ans = ScalarInteger(R_OutputCon));
2722     PROTECT(class = allocVector(STRSXP, 2));
2723     SET_STRING_ELT(class, 0, mkChar(con->class));
2724     SET_STRING_ELT(class, 1, mkChar("connection"));
2725     classgets(ans, class);
2726     UNPROTECT(2);
2727     return ans;
2728 }
2729 
2730 
do_stderr(SEXP call,SEXP op,SEXP args,SEXP env)2731 SEXP attribute_hidden do_stderr(SEXP call, SEXP op, SEXP args, SEXP env)
2732 {
2733     SEXP ans, class;
2734     Rconnection con = getConnection(2);
2735 
2736     checkArity(op, args);
2737     PROTECT(ans = ScalarInteger(2));
2738     PROTECT(class = allocVector(STRSXP, 2));
2739     SET_STRING_ELT(class, 0, mkChar(con->class));
2740     SET_STRING_ELT(class, 1, mkChar("connection"));
2741     classgets(ans, class);
2742     UNPROTECT(2);
2743     return ans;
2744 }
2745 
2746 
do_isatty(SEXP call,SEXP op,SEXP args,SEXP env)2747 SEXP attribute_hidden do_isatty(SEXP call, SEXP op, SEXP args, SEXP env)
2748 {
2749     int con;
2750     /* FIXME: is this correct for consoles? */
2751     checkArity(op, args);
2752     con = asInteger(CAR(args));
2753     return ScalarLogical(con == NA_LOGICAL ? FALSE : R_isatty(con) );
2754 }
2755 
2756 /* ------------------- raw connections --------------------- */
2757 
2758 /* Possible future redesign: store nbytes as TRUELENGTH */
2759 
2760 typedef struct rawconn {
2761     SEXP data; /* all the data, stored as a raw vector */
2762     /* replace nbytes by TRUELENGTH in due course? */
2763     size_t pos, nbytes; /* current pos and number of bytes
2764 			   (same pos for read and write) */
2765 } *Rrawconn;
2766 
2767 
2768 /* copy a raw vector into a buffer */
raw_init(Rconnection con,SEXP raw)2769 static void raw_init(Rconnection con, SEXP raw)
2770 {
2771     Rrawconn this = con->private;
2772 
2773     this->data = MAYBE_REFERENCED(raw) ? duplicate(raw) : raw;
2774     R_PreserveObject(this->data);
2775     this->nbytes = XLENGTH(this->data);
2776     this->pos = 0;
2777 }
2778 
raw_open(Rconnection con)2779 static Rboolean raw_open(Rconnection con)
2780 {
2781     return TRUE;
2782 }
2783 
raw_close(Rconnection con)2784 static void raw_close(Rconnection con)
2785 {
2786 }
2787 
raw_destroy(Rconnection con)2788 static void raw_destroy(Rconnection con)
2789 {
2790     Rrawconn this = con->private;
2791 
2792     R_ReleaseObject(this->data);
2793     free(this);
2794 }
2795 
raw_resize(Rrawconn this,size_t needed)2796 static void raw_resize(Rrawconn this, size_t needed)
2797 {
2798     size_t nalloc = 64;
2799     SEXP tmp;
2800 
2801     if (needed > 8192) nalloc = (size_t)(1.2*(double)needed); /* 20% over-allocation */
2802     else while(nalloc < needed) nalloc *= 2;  /* use powers of 2 if small */
2803     PROTECT(tmp = allocVector(RAWSXP, nalloc));
2804     memcpy(RAW(tmp), RAW(this->data), this->nbytes);
2805     R_ReleaseObject(this->data);
2806     this->data = tmp;
2807     R_PreserveObject(this->data);
2808     UNPROTECT(1);
2809 }
2810 
raw_write(const void * ptr,size_t size,size_t nitems,Rconnection con)2811 static size_t raw_write(const void *ptr, size_t size, size_t nitems,
2812 			Rconnection con)
2813 {
2814     Rrawconn this = con->private;
2815     size_t freespace = XLENGTH(this->data) - this->pos, bytes = size*nitems;
2816 
2817     if ((double) size * (double) nitems + (double) this->pos > R_XLEN_T_MAX)
2818 	error(_("attempting to add too many elements to raw vector"));
2819     /* resize may fail, when this will give an error */
2820     if(bytes >= freespace) raw_resize(this, bytes + this->pos);
2821     /* the source just might be this raw vector */
2822     memmove(RAW(this->data) + this->pos, ptr, bytes);
2823     this->pos += bytes;
2824     if(this->nbytes < this->pos) this->nbytes = this->pos;
2825     return nitems;
2826 }
2827 
raw_truncate(Rconnection con)2828 static void raw_truncate(Rconnection con)
2829 {
2830     Rrawconn this = con->private;
2831     this->nbytes = this->pos;
2832 }
2833 
raw_read(void * ptr,size_t size,size_t nitems,Rconnection con)2834 static size_t raw_read(void *ptr, size_t size, size_t nitems,
2835 		       Rconnection con)
2836 {
2837     Rrawconn this = con->private;
2838     size_t available = this->nbytes - this->pos, request = size*nitems, used;
2839 
2840     if ((double) size * (double) nitems + (double) this->pos > R_XLEN_T_MAX)
2841 	error(_("too large a block specified"));
2842     used = (request < available) ? request : available;
2843     memmove(ptr, RAW(this->data) + this->pos, used);
2844     this->pos += used;
2845     return used/size;
2846 }
2847 
raw_fgetc(Rconnection con)2848 static int raw_fgetc(Rconnection con)
2849 {
2850     Rrawconn this = con->private;
2851     if(this->pos >= this->nbytes) return R_EOF;
2852     else return (int) RAW(this->data)[this->pos++];
2853 }
2854 
raw_seek(Rconnection con,double where,int origin,int rw)2855 static double raw_seek(Rconnection con, double where, int origin, int rw)
2856 {
2857     Rrawconn this = con->private;
2858     double newpos;
2859     size_t oldpos = this->pos;
2860 
2861     if(ISNA(where)) return (double) oldpos;
2862 
2863     /* Do the calculations here as double to avoid integer overflow */
2864     switch(origin) {
2865     case 2: newpos = (double) this->pos + where; break;
2866     case 3: newpos = (double) this->nbytes + where; break;
2867     default: newpos = where;
2868     }
2869     if(newpos < 0 || newpos > this->nbytes)
2870 	error(_("attempt to seek outside the range of the raw connection"));
2871     else this->pos = (size_t) newpos;
2872 
2873     return (double) oldpos;
2874 }
2875 
newraw(const char * description,SEXP raw,const char * mode)2876 static Rconnection newraw(const char *description, SEXP raw, const char *mode)
2877 {
2878     Rconnection new;
2879 
2880     new = (Rconnection) malloc(sizeof(struct Rconn));
2881     if(!new) error(_("allocation of raw connection failed"));
2882     new->class = (char *) malloc(strlen("rawConnection") + 1);
2883     if(!new->class) {
2884 	free(new);
2885 	error(_("allocation of raw connection failed"));
2886 	/* for Solaris 12.5 */ new = NULL;
2887     }
2888     strcpy(new->class, "rawConnection");
2889     new->description = (char *) malloc(strlen(description) + 1);
2890     if(!new->description) {
2891 	free(new->class); free(new);
2892 	error(_("allocation of raw connection failed"));
2893 	/* for Solaris 12.5 */ new = NULL;
2894     }
2895     init_con(new, description, CE_NATIVE, mode);
2896     new->isopen = TRUE;
2897     new->text = FALSE;
2898     new->blocking = TRUE;
2899     new->canseek = TRUE;
2900     new->canwrite = (mode[0] == 'w' || mode[0] == 'a');
2901     new->canread = mode[0] == 'r';
2902     if(strlen(mode) >= 2 && mode[1] == '+') new->canread = new->canwrite = TRUE;
2903     new->open = &raw_open;
2904     new->close = &raw_close;
2905     new->destroy = &raw_destroy;
2906     if(new->canwrite) {
2907 	new->write = &raw_write;
2908 	new->vfprintf = &dummy_vfprintf;
2909 	new->truncate = &raw_truncate;
2910     }
2911     if(new->canread) {
2912 	new->read = &raw_read;
2913 	new->fgetc = &raw_fgetc;
2914     }
2915     new->seek = &raw_seek;
2916     new->private = (void*) malloc(sizeof(struct rawconn));
2917     if(!new->private) {
2918 	free(new->description); free(new->class); free(new);
2919 	error(_("allocation of raw connection failed"));
2920 	/* for Solaris 12.5 */ new = NULL;
2921     }
2922     raw_init(new, raw);
2923     if(mode[0] == 'a') raw_seek(new, 0, 3, 0);
2924     return new;
2925 }
2926 
2927 // .Internal(rawConnection(deparse(substitute(object)), object, open))
do_rawconnection(SEXP call,SEXP op,SEXP args,SEXP env)2928 SEXP attribute_hidden do_rawconnection(SEXP call, SEXP op, SEXP args, SEXP env)
2929 {
2930     SEXP sfile, sraw, sopen, ans, class;
2931     const char *desc, *open;
2932     int ncon;
2933     Rconnection con = NULL;
2934 
2935     checkArity(op, args);
2936     sfile = CAR(args);
2937     if(!isString(sfile) || LENGTH(sfile) != 1 ||
2938        STRING_ELT(sfile, 0) == NA_STRING)
2939 	error(_("invalid '%s' argument"), "description");
2940     desc = translateCharFP(STRING_ELT(sfile, 0));
2941     sraw = CADR(args);
2942     sopen = CADDR(args);
2943     if(!isString(sopen) || LENGTH(sopen) != 1)
2944 	error(_("invalid '%s' argument"), "open");
2945     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
2946     if(strchr(open, 't'))
2947 	error(_("invalid '%s' argument"), "open");
2948     ncon = NextConnection();
2949     if(TYPEOF(sraw) != RAWSXP)
2950 	error(_("invalid '%s' argument"), "raw");
2951     con = Connections[ncon] = newraw(desc, sraw, open);
2952 
2953     /* already opened */
2954 
2955     PROTECT(ans = ScalarInteger(ncon));
2956     PROTECT(class = allocVector(STRSXP, 2));
2957     SET_STRING_ELT(class, 0, mkChar("rawConnection"));
2958     SET_STRING_ELT(class, 1, mkChar("connection"));
2959     classgets(ans, class);
2960     con->ex_ptr = R_MakeExternalPtr(con->id, install("connection"), R_NilValue);
2961     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
2962     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
2963     UNPROTECT(2);
2964     return ans;
2965 }
2966 
getConnectionCheck(SEXP rcon,const char * cls,const char * var)2967 static Rconnection getConnectionCheck(SEXP rcon, const char *cls,
2968 				      const char *var)
2969 {
2970     if(!inherits(rcon, cls))
2971 	error(_("'%s' is not a %s"), var, cls);
2972     Rconnection con = getConnection(asInteger(rcon));
2973     /* check that the R class and internal class match */
2974     if (strcmp(con->class, cls))
2975 	error(_("internal connection is not a %s"), cls);
2976     return con;
2977 }
2978 
do_rawconvalue(SEXP call,SEXP op,SEXP args,SEXP env)2979 SEXP attribute_hidden do_rawconvalue(SEXP call, SEXP op, SEXP args, SEXP env)
2980 {
2981     Rconnection con=NULL;
2982     Rrawconn this;
2983     SEXP ans;
2984 
2985     checkArity(op, args);
2986     con = getConnectionCheck(CAR(args), "rawConnection", "con");
2987     if(!con->canwrite)
2988 	error(_("'con' is not an output rawConnection"));
2989     this = con->private;
2990     ans = allocVector(RAWSXP, this->nbytes); /* later, use TRUELENGTH? */
2991     memcpy(RAW(ans), RAW(this->data), this->nbytes);
2992     return ans;
2993 }
2994 
2995 /* ------------------- text connections --------------------- */
2996 
2997 typedef struct textconn {
2998     char *data;  /* all the data */
2999     size_t cur, nchars; /* current pos and number of chars */
3000     char save; /* pushback */
3001 } *Rtextconn;
3002 
3003 typedef struct outtextconn {
3004     size_t len;  /* number of lines */
3005     SEXP namesymbol;
3006     SEXP data;
3007     char *lastline;
3008     int lastlinelength; /* buffer size */
3009 } *Routtextconn;
3010 
3011 /* read a R character vector into a buffer  --- helper for newtext() */
text_init(Rconnection con,SEXP text,int type)3012 static void text_init(Rconnection con, SEXP text, int type)
3013 {
3014     R_xlen_t nlines = xlength(text);  // not very plausible that this is long
3015     size_t nchars = 0; /* -Wall */
3016     double dnc = 0.0;
3017     Rtextconn this = con->private;
3018     const void *vmax = vmaxget();
3019 
3020     for(R_xlen_t i = 0; i < nlines; i++)
3021 	dnc +=
3022 	    /*     type =  1 |    2    |    3    <==>
3023 	     * encoding = "" | "bytes" | "UTF-8" */
3024 	    (double) strlen(type == 1 ? translateChar(STRING_ELT(text, i))
3025 			    : ((type == 3) ?translateCharUTF8(STRING_ELT(text, i))
3026 			       : CHAR(STRING_ELT(text, i))) ) + 1;
3027     if (dnc >= (double) SIZE_MAX)
3028 	error(_("too many characters for text connection"));
3029     else nchars = (size_t) dnc;
3030     this->data = (char *) malloc(nchars+1);
3031     if(!this->data) {
3032 	free(this); free(con->description); free(con->class); free(con);
3033 	error(_("cannot allocate memory for text connection"));
3034     }
3035     char *t = this->data;
3036     for(R_xlen_t i = 0; i < nlines; i++) {
3037 	const char *s = (type == 1) ? translateChar(STRING_ELT(text, i))
3038 	    : ((type == 3) ? translateCharUTF8(STRING_ELT(text, i))
3039 	       : CHAR(STRING_ELT(text, i)));
3040 	while(*s) *t++ = *s++;
3041 	*t++ = '\n';
3042     }
3043     *t = '\0';
3044     this->nchars = nchars;
3045     this->cur = this->save = 0;
3046     vmaxset(vmax);
3047 }
3048 
text_open(Rconnection con)3049 static Rboolean text_open(Rconnection con)
3050 {
3051     con->save = -1000;
3052     return TRUE;
3053 }
3054 
text_close(Rconnection con)3055 static void text_close(Rconnection con)
3056 {
3057 }
3058 
text_destroy(Rconnection con)3059 static void text_destroy(Rconnection con)
3060 {
3061     Rtextconn this = con->private;
3062 
3063     free(this->data);
3064     /* this->cur = this->nchars = 0; */
3065     free(this);
3066 }
3067 
text_fgetc(Rconnection con)3068 static int text_fgetc(Rconnection con)
3069 {
3070     Rtextconn this = con->private;
3071     if(this->save) {
3072 	int c;
3073 	c = this->save;
3074 	this->save = 0;
3075 	return c;
3076     }
3077     if(this->cur >= this->nchars) return R_EOF;
3078     else return (int) (this->data[this->cur++]);
3079 }
3080 
text_seek(Rconnection con,double where,int origin,int rw)3081 static double text_seek(Rconnection con, double where, int origin, int rw)
3082 {
3083     if(where >= 0) error(_("seek is not relevant for text connection"));
3084     return 0; /* if just asking, always at the beginning */
3085 }
3086 
3087 // helper for do_textconnection(.., open = "r") :
newtext(const char * description,SEXP text,int type)3088 static Rconnection newtext(const char *description, SEXP text, int type)
3089 {
3090     Rconnection new;
3091     new = (Rconnection) malloc(sizeof(struct Rconn));
3092     if(!new) error(_("allocation of text connection failed"));
3093     new->class = (char *) malloc(strlen("textConnection") + 1);
3094     if(!new->class) {
3095 	free(new);
3096 	error(_("allocation of text connection failed"));
3097 	/* for Solaris 12.5 */ new = NULL;
3098     }
3099     strcpy(new->class, "textConnection");
3100     new->description = (char *) malloc(strlen(description) + 1);
3101     if(!new->description) {
3102 	free(new->class); free(new);
3103 	error(_("allocation of text connection failed"));
3104 	/* for Solaris 12.5 */ new = NULL;
3105     }
3106     init_con(new, description, CE_NATIVE, "r");
3107     new->isopen = TRUE;
3108     new->canwrite = FALSE;
3109     new->open = &text_open;
3110     new->close = &text_close;
3111     new->destroy = &text_destroy;
3112     new->fgetc = &text_fgetc;
3113     new->seek = &text_seek;
3114     new->private = (void*) malloc(sizeof(struct textconn));
3115     if(!new->private) {
3116 	free(new->description); free(new->class); free(new);
3117 	error(_("allocation of text connection failed"));
3118 	/* for Solaris 12.5 */ new = NULL;
3119     }
3120     text_init(new, text, type);
3121     return new;
3122 }
3123 
3124 
mkCharLocal(const char * s)3125 static SEXP mkCharLocal(const char *s)
3126 {
3127     int ienc = CE_NATIVE;
3128     if(known_to_be_latin1) ienc = CE_LATIN1;
3129     if(known_to_be_utf8) ienc = CE_UTF8;
3130     return mkCharCE(s, ienc);
3131 }
3132 
outtext_close(Rconnection con)3133 static void outtext_close(Rconnection con)
3134 {
3135     Routtextconn this = con->private;
3136     int idx = ConnIndex(con);
3137     SEXP tmp, env = VECTOR_ELT(OutTextData, idx);
3138 
3139     if(this->namesymbol &&
3140        findVarInFrame3(env, this->namesymbol, FALSE) != R_UnboundValue)
3141 	R_unLockBinding(this->namesymbol, env);
3142     if(strlen(this->lastline) > 0) {
3143 	PROTECT(tmp = xlengthgets(this->data, ++this->len));
3144 	SET_STRING_ELT(tmp, this->len - 1, mkCharLocal(this->lastline));
3145 	if(this->namesymbol) defineVar(this->namesymbol, tmp, env);
3146 	ENSURE_NAMEDMAX(tmp);
3147 	this->data = tmp;
3148 	UNPROTECT(1);
3149     }
3150 }
3151 
outtext_destroy(Rconnection con)3152 static void outtext_destroy(Rconnection con)
3153 {
3154     Routtextconn this = con->private;
3155     int idx = ConnIndex(con);
3156     /* OutTextData is preserved, and that implies that the environment
3157        we are writing it and hence the character vector is protected.
3158        However, this could be quite expensive.
3159     */
3160     SET_VECTOR_ELT(OutTextData, idx, R_NilValue);
3161     if(!this->namesymbol) R_ReleaseObject(this->data);
3162     free(this->lastline); free(this);
3163 }
3164 
3165 #define LAST_LINE_LEN 256
3166 
text_vfprintf(Rconnection con,const char * format,va_list ap)3167 static int text_vfprintf(Rconnection con, const char *format, va_list ap)
3168 {
3169     Routtextconn this = con->private;
3170     char buf[BUFSIZE], *b = buf, *p, *q;
3171     const void *vmax = NULL;
3172     int res = 0, buffree,
3173 	already = (int) strlen(this->lastline); // we do not allow longer lines
3174     SEXP tmp;
3175 
3176     va_list aq;
3177     va_copy(aq, ap);
3178     if(already >= BUFSIZE) {
3179 	/* This will fail so just call vsnprintf to get the length of
3180 	   the new piece */
3181 	res = vsnprintf(buf, 0, format, aq);
3182 	if(res > 0) res += already;
3183 	buffree = 0;
3184     } else {
3185 	strcpy(b, this->lastline);
3186 	p = b + already;
3187 	buffree = BUFSIZE - already; // checked < BUFSIZE above
3188 	res = vsnprintf(p, buffree, format, aq);
3189     }
3190     va_end(aq);
3191     if(res >= buffree) { /* res is the desired output length */
3192 	vmax = vmaxget();
3193 	b = R_alloc(res + already + 1, sizeof(char));
3194 	strcpy(b, this->lastline);
3195 	p = b + already;
3196 	vsprintf(p, format, ap);
3197     } else if(res < 0) { /* just a failure indication */
3198 #define NBUFSIZE (already + 100*BUFSIZE)
3199 	vmax = vmaxget();
3200 	b = R_alloc(NBUFSIZE, sizeof(char));
3201 	strncpy(b, this->lastline, NBUFSIZE); /* `already` < NBUFSIZE */
3202 	*(b + NBUFSIZE - 1) = '\0';
3203 	p = b + already;
3204 	res = Rvsnprintf_mbcs(p, NBUFSIZE - already, format, ap);
3205 	if (res < 0 || res >= NBUFSIZE - already) {
3206 	    warning(_("printing of extremely long output is truncated"));
3207 	}
3208     }
3209 
3210     /* copy buf line-by-line to object */
3211     for(p = b; ; p = q+1) {
3212 	q = Rf_strchr(p, '\n');
3213 	if(q) {
3214 	    int idx = ConnIndex(con);
3215 	    SEXP env = VECTOR_ELT(OutTextData, idx);
3216 	    *q = '\0';
3217 	    PROTECT(tmp = xlengthgets(this->data, ++this->len));
3218 	    SET_STRING_ELT(tmp, this->len - 1, mkCharLocal(p));
3219 	    if(this->namesymbol) {
3220 		if(findVarInFrame3(env, this->namesymbol, FALSE)
3221 		   != R_UnboundValue) R_unLockBinding(this->namesymbol, env);
3222 		defineVar(this->namesymbol, tmp, env);
3223 		R_LockBinding(this->namesymbol, env);
3224 	    } else {
3225 		R_ReleaseObject(this->data);
3226 		R_PreserveObject(tmp);
3227 	    }
3228 	    this->data = tmp;
3229 	    ENSURE_NAMEDMAX(tmp);
3230 	    UNPROTECT(1);
3231 	} else {
3232 	    /* retain the last line */
3233 	    if(strlen(p) >= this->lastlinelength) {
3234 		size_t newlen = strlen(p) + 1;
3235 		if (newlen > INT_MAX) error("last line is too long");
3236 		void * tmp = realloc(this->lastline, newlen);
3237 		if (tmp) {
3238 		    this->lastline = tmp;
3239 		    this->lastlinelength = (int) newlen;
3240 		} else {
3241 		    warning("allocation problem for last line");
3242 		    this->lastline = NULL;
3243 		    this->lastlinelength = 0;
3244 		}
3245 	    }
3246 	    strcpy(this->lastline, p);
3247 	    con->incomplete = strlen(this->lastline) > 0;
3248 	    break;
3249 	}
3250     }
3251     if(vmax) vmaxset(vmax);
3252     return res;
3253 }
3254 
3255 // finalizing helper for  newouttext() :
outtext_init(Rconnection con,SEXP stext,const char * mode,int idx)3256 static void outtext_init(Rconnection con, SEXP stext, const char *mode, int idx)
3257 {
3258     Routtextconn this = con->private;
3259     SEXP val;
3260 
3261     if(stext == R_NilValue) {
3262 	this->namesymbol = NULL;
3263 	    /* create variable pointed to by con->description */
3264 	val = allocVector(STRSXP, 0);
3265 	R_PreserveObject(val);
3266     } else {
3267 	this->namesymbol = install(con->description);
3268 	if(strcmp(mode, "w") == 0) {
3269 	    /* create variable pointed to by con->description */
3270 	    PROTECT(val = allocVector(STRSXP, 0));
3271 	    defineVar(this->namesymbol, val, VECTOR_ELT(OutTextData, idx));
3272 	    /* Not clear if this is needed, but be conservative */
3273 	    ENSURE_NAMEDMAX(val);
3274 	    UNPROTECT(1);
3275 	} else {
3276 	    /* take over existing variable */
3277 	    val = findVar1(this->namesymbol, VECTOR_ELT(OutTextData, idx),
3278 			   STRSXP, FALSE);
3279 	    if(val == R_UnboundValue) {
3280 		warning(_("text connection: appending to a non-existent char vector"));
3281 		PROTECT(val = allocVector(STRSXP, 0));
3282 		defineVar(this->namesymbol, val, VECTOR_ELT(OutTextData, idx));
3283 		ENSURE_NAMEDMAX(val);
3284 		UNPROTECT(1);
3285 	    }
3286 	    PROTECT(val);
3287 	    R_LockBinding(this->namesymbol, VECTOR_ELT(OutTextData, idx));
3288 	    UNPROTECT(1);
3289 	}
3290     }
3291     this->len = LENGTH(val);
3292     this->data = val;
3293     this->lastline[0] = '\0';
3294     this->lastlinelength = LAST_LINE_LEN;
3295 }
3296 
3297 // helper for do_textconnection(.., open = "w" or "a") :
newouttext(const char * description,SEXP stext,const char * mode,int idx)3298 static Rconnection newouttext(const char *description, SEXP stext,
3299 			      const char *mode, int idx)
3300 {
3301     Rconnection new;
3302     void *tmp;
3303 
3304     new = (Rconnection) malloc(sizeof(struct Rconn));
3305     if(!new) error(_("allocation of text connection failed"));
3306     new->class = (char *) malloc(strlen("textConnection") + 1);
3307     if(!new->class) {
3308 	free(new);
3309 	error(_("allocation of text connection failed"));
3310  	/* for Solaris 12.5 */ new = NULL;
3311    }
3312     strcpy(new->class, "textConnection");
3313     new->description = (char *) malloc(strlen(description) + 1);
3314     if(!new->description) {
3315 	free(new->class); free(new);
3316 	error(_("allocation of text connection failed"));
3317 	/* for Solaris 12.5 */ new = NULL;
3318     }
3319     init_con(new, description, CE_NATIVE, mode);
3320     new->isopen = TRUE;
3321     new->canread = FALSE;
3322     new->open = &text_open;
3323     new->close = &outtext_close;
3324     new->destroy = &outtext_destroy;
3325     new->vfprintf = &text_vfprintf;
3326     new->seek = &text_seek;
3327     new->private = (void*) malloc(sizeof(struct outtextconn));
3328     if(!new->private) {
3329 	free(new->description); free(new->class); free(new);
3330 	error(_("allocation of text connection failed"));
3331 	/* for Solaris 12.5 */ new = NULL;
3332     }
3333     ((Routtextconn)new->private)->lastline = tmp = malloc(LAST_LINE_LEN);
3334     if(!tmp) {
3335 	free(new->private);
3336 	free(new->description); free(new->class); free(new);
3337 	error(_("allocation of text connection failed"));
3338 	/* for Solaris 12.5 */ new = NULL;
3339     }
3340     outtext_init(new, stext, mode, idx);
3341     return new;
3342 }
3343 
3344 // .Internal(textConnection(name, object, open, env, type))
do_textconnection(SEXP call,SEXP op,SEXP args,SEXP env)3345 SEXP attribute_hidden do_textconnection(SEXP call, SEXP op, SEXP args, SEXP env)
3346 {
3347     SEXP sdesc, stext, sopen, ans, class, venv;
3348     const char *desc, *open;
3349     int ncon, type;
3350     Rconnection con = NULL;
3351 
3352     checkArity(op, args);
3353     sdesc = CAR(args);
3354     if(!isString(sdesc) || LENGTH(sdesc) != 1 ||
3355        STRING_ELT(sdesc, 0) == NA_STRING)
3356 	error(_("invalid '%s' argument"), "description");
3357     desc = translateChar(STRING_ELT(sdesc, 0));
3358     stext = CADR(args); // object
3359     sopen = CADDR(args);
3360     if(!isString(sopen) || LENGTH(sopen) != 1)
3361 	error(_("invalid '%s' argument"), "open");
3362     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
3363     venv = CADDDR(args);
3364     if (isNull(venv))
3365 	error(_("use of NULL environment is defunct"));
3366     if (!isEnvironment(venv))
3367 	error(_("invalid '%s' argument"), "environment");
3368     type = asInteger(CAD4R(args));
3369     if (type == NA_INTEGER)
3370 	error(_("invalid '%s' argument"), "encoding");
3371     ncon = NextConnection();
3372     if(!strlen(open) || strncmp(open, "r", 1) == 0) {
3373 	if(!isString(stext))
3374 	    error(_("invalid '%s' argument"), "text");
3375 	con = Connections[ncon] = newtext(desc, stext, type);
3376     } else if (strncmp(open, "w", 1) == 0 || strncmp(open, "a", 1) == 0) {
3377 	if (OutTextData == NULL) {
3378 	    OutTextData = allocVector(VECSXP, NCONNECTIONS);
3379 	    R_PreserveObject(OutTextData);
3380 	}
3381 	SET_VECTOR_ELT(OutTextData, ncon, venv);
3382 	if(stext == R_NilValue)
3383 	    con = Connections[ncon] = newouttext("NULL", stext, open, ncon);
3384 	else if(isString(stext) && LENGTH(stext) == 1)
3385 	    con = Connections[ncon] =
3386 		newouttext(translateChar(STRING_ELT(stext, 0)), stext,
3387 			   open, ncon);
3388 	else
3389 	    error(_("invalid '%s' argument"), "text");
3390     }
3391     else
3392 	error(_("unsupported mode"));
3393     /* already opened */
3394 
3395     PROTECT(ans = ScalarInteger(ncon));
3396     PROTECT(class = allocVector(STRSXP, 2));
3397     SET_STRING_ELT(class, 0, mkChar("textConnection"));
3398     SET_STRING_ELT(class, 1, mkChar("connection"));
3399     classgets(ans, class);
3400     con->ex_ptr = R_MakeExternalPtr(con->id, install("connection"), R_NilValue);
3401     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
3402     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
3403     UNPROTECT(2);
3404     return ans;
3405 }
3406 
do_textconvalue(SEXP call,SEXP op,SEXP args,SEXP env)3407 SEXP attribute_hidden do_textconvalue(SEXP call, SEXP op, SEXP args, SEXP env)
3408 {
3409     Rconnection con=NULL;
3410     Routtextconn this;
3411 
3412     checkArity(op, args);
3413     con = getConnectionCheck(CAR(args), "textConnection", "con");
3414     if(!con->canwrite)
3415 	error(_("'con' is not an output textConnection"));
3416     this = con->private;
3417     return this->data;
3418 }
3419 
3420 
3421 
3422 /* ------------------- socket connections  --------------------- */
3423 
3424 
3425 /* socketConnection(host, port, server, blocking, open, encoding, timeout, options) */
3426 /* socketAccept(socket, blocking, open, encoding, timeout, options) */
do_sockconn(SEXP call,SEXP op,SEXP args,SEXP env)3427 SEXP attribute_hidden do_sockconn(SEXP call, SEXP op, SEXP args, SEXP env)
3428 {
3429     SEXP scmd, sopen, ans, class, enc;
3430     const char *host, *open;
3431     int ncon, port, server, blocking, timeout, serverfd, options = 0;
3432     Rconnection con = NULL;
3433     Rservsockconn scon = NULL;
3434 
3435     checkArity(op, args);
3436     if (PRIMVAL(op) == 0) { /* socketConnection */
3437 	scmd = CAR(args);
3438 	if(!isString(scmd) || LENGTH(scmd) != 1)
3439 	    error(_("invalid '%s' argument"), "host");
3440 	host = translateCharFP(STRING_ELT(scmd, 0));
3441 	args = CDR(args);
3442 	port = asInteger(CAR(args));
3443 	if(port == NA_INTEGER || port < 0)
3444 	    error(_("invalid '%s' argument"), "port");
3445 	args = CDR(args);
3446 	server = asLogical(CAR(args));
3447 	if(server == NA_LOGICAL)
3448 	    error(_("invalid '%s' argument"), "server");
3449 	serverfd = -1;
3450     } else { /* socketAccept */
3451 	scon = getConnectionCheck(CAR(args), "servsockconn", "socket")->private;
3452 	port = scon->port;
3453 	server = 1;
3454 	host = "localhost"; /* ignored */
3455 	serverfd = scon->fd;
3456     }
3457     args = CDR(args);
3458     blocking = asLogical(CAR(args));
3459     if(blocking == NA_LOGICAL)
3460 	error(_("invalid '%s' argument"), "blocking");
3461     args = CDR(args);
3462     sopen = CAR(args);
3463     if(!isString(sopen) || LENGTH(sopen) != 1)
3464 	error(_("invalid '%s' argument"), "open");
3465     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
3466     args = CDR(args);
3467     enc = CAR(args);
3468     if(!isString(enc) || LENGTH(enc) != 1 ||
3469        strlen(CHAR(STRING_ELT(enc, 0))) > 100) /* ASCII */
3470 	error(_("invalid '%s' argument"), "encoding");
3471     args = CDR(args);
3472     timeout = asInteger(CAR(args));
3473     args = CDR(args);
3474     /* we don't issue errors/warnings on unknown options to allow for
3475        future extensions */
3476     if (isString(CAR(args))) {
3477 	SEXP sOpts = CAR(args);
3478 	int i = 0, n = LENGTH(sOpts);
3479 	while (i < n) {
3480 	    const char *opt = CHAR(STRING_ELT(sOpts, i));
3481 	    if (!strcmp("no-delay", opt))
3482 		options |= RSC_SET_TCP_NODELAY;
3483 	    i++;
3484 	}
3485     }
3486 
3487     ncon = NextConnection();
3488     con = R_newsock(host, port, server, serverfd, open, timeout, options);
3489     Connections[ncon] = con;
3490     con->blocking = blocking;
3491     strncpy(con->encname, CHAR(STRING_ELT(enc, 0)), 100); /* ASCII */
3492     con->encname[100 - 1] = '\0';
3493     con->ex_ptr = PROTECT(R_MakeExternalPtr(con->id, install("connection"), R_NilValue));
3494 
3495     /* open it if desired */
3496     if(strlen(open)) {
3497 	Rboolean success = con->open(con);
3498 	if(!success) {
3499 	    con_destroy(ncon);
3500 	    error(_("cannot open the connection"));
3501 	}
3502     }
3503 
3504     PROTECT(ans = ScalarInteger(ncon));
3505     PROTECT(class = allocVector(STRSXP, 2));
3506     SET_STRING_ELT(class, 0, mkChar("sockconn"));
3507     SET_STRING_ELT(class, 1, mkChar("connection"));
3508     classgets(ans, class);
3509     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
3510     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
3511     UNPROTECT(3);
3512     return ans;
3513 }
3514 
3515 /* ------------------- unz connections  --------------------- */
3516 
3517 /* .Internal(unz(paste(description, filename, sep = ":"),
3518  *               open, encoding)) */
do_unz(SEXP call,SEXP op,SEXP args,SEXP env)3519 SEXP attribute_hidden do_unz(SEXP call, SEXP op, SEXP args, SEXP env)
3520 {
3521     SEXP sfile, sopen, ans, class, enc;
3522     const char *file, *open;
3523     int ncon;
3524     Rconnection con = NULL;
3525 
3526     checkArity(op, args);
3527     sfile = CAR(args);
3528     if(!isString(sfile) || LENGTH(sfile) != 1 ||
3529        STRING_ELT(sfile, 0) == NA_STRING)
3530 	error(_("invalid '%s' argument"), "description");
3531     if(length(sfile) > 1)
3532 	warning(_("only first element of 'description' argument used"));
3533     file = translateCharFP(STRING_ELT(sfile, 0));
3534     sopen = CADR(args);
3535     if(!isString(sopen) || LENGTH(sopen) != 1)
3536 	error(_("invalid '%s' argument"), "open");
3537     enc = CADDR(args);
3538     if(!isString(enc) || LENGTH(enc) != 1 ||
3539        strlen(CHAR(STRING_ELT(enc, 0))) > 100) /* ASCII */
3540 	error(_("invalid '%s' argument"), "encoding");
3541     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
3542     ncon = NextConnection();
3543     con = Connections[ncon] = R_newunz(file, strlen(open) ? open : "r"); // see dounzip.c for the details
3544 
3545     strncpy(con->encname, CHAR(STRING_ELT(enc, 0)), 100); /* ASCII */
3546     con->encname[100 - 1] = '\0';
3547     con->ex_ptr = PROTECT(R_MakeExternalPtr(con->id, install("connection"), R_NilValue));
3548 
3549     /* open it if desired */
3550     if(strlen(open)) {
3551 	Rboolean success = con->open(con);
3552 	if(!success) {
3553 	    con_destroy(ncon);
3554 	    error(_("cannot open the connection"));
3555 	}
3556     }
3557 
3558     PROTECT(ans = ScalarInteger(ncon));
3559     PROTECT(class = allocVector(STRSXP, 2));
3560     SET_STRING_ELT(class, 0, mkChar("unz"));
3561     SET_STRING_ELT(class, 1, mkChar("connection"));
3562     classgets(ans, class);
3563     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
3564     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
3565     UNPROTECT(3);
3566 
3567     return ans;
3568 }
3569 
3570 /* -------------- open, close, seek, truncate, flush ------------------ */
3571 
do_open(SEXP call,SEXP op,SEXP args,SEXP env)3572 SEXP attribute_hidden do_open(SEXP call, SEXP op, SEXP args, SEXP env)
3573 {
3574     int i, block;
3575     Rconnection con=NULL;
3576     SEXP sopen;
3577     const char *open;
3578     Rboolean success;
3579 
3580     checkArity(op, args);
3581     if(!inherits(CAR(args), "connection"))
3582 	error(_("'con' is not a connection"));
3583     i = asInteger(CAR(args));
3584     con = getConnection(i);
3585     if(i < 3) error(_("cannot open standard connections"));
3586     if(con->isopen) {
3587 	warning(_("connection is already open"));
3588 	return R_NilValue;
3589     }
3590     sopen = CADR(args);
3591     if(!isString(sopen) || LENGTH(sopen) != 1)
3592 	error(_("invalid '%s' argument"), "open");
3593     block = asLogical(CADDR(args));
3594     if(block == NA_LOGICAL)
3595 	error(_("invalid '%s' argument"), "blocking");
3596     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
3597     if(strlen(open) > 0) strcpy(con->mode, open);
3598     con->blocking = block;
3599     success = con->open(con);
3600     if(!success) {
3601 	/* con_destroy(i); user might have a reference */
3602 	error(_("cannot open the connection"));
3603     }
3604     return R_NilValue;
3605 }
3606 
do_isopen(SEXP call,SEXP op,SEXP args,SEXP env)3607 SEXP attribute_hidden do_isopen(SEXP call, SEXP op, SEXP args, SEXP env)
3608 {
3609     Rconnection con;
3610     int rw, res;
3611 
3612     checkArity(op, args);
3613     con = getConnection(asInteger(CAR(args)));
3614     rw = asInteger(CADR(args));
3615     res = con->isopen != FALSE;
3616     switch(rw) {
3617     case 0: break;
3618     case 1: res = res & con->canread; break;
3619     case 2: res = res & con->canwrite; break;
3620     default: error(_("unknown 'rw' value"));
3621     }
3622     return ScalarLogical(res);
3623 }
3624 
do_isincomplete(SEXP call,SEXP op,SEXP args,SEXP env)3625 SEXP attribute_hidden do_isincomplete(SEXP call, SEXP op, SEXP args, SEXP env)
3626 {
3627     Rconnection con;
3628 
3629     checkArity(op, args);
3630     if(!inherits(CAR(args), "connection"))
3631 	error(_("'con' is not a connection"));
3632     con = getConnection(asInteger(CAR(args)));
3633     return ScalarLogical(con->incomplete != FALSE);
3634 }
3635 
do_isseekable(SEXP call,SEXP op,SEXP args,SEXP env)3636 SEXP attribute_hidden do_isseekable(SEXP call, SEXP op, SEXP args, SEXP env)
3637 {
3638     Rconnection con;
3639 
3640     checkArity(op, args);
3641     if(!inherits(CAR(args), "connection"))
3642 	error(_("'con' is not a connection"));
3643     con = getConnection(asInteger(CAR(args)));
3644     return ScalarLogical(con->canseek != FALSE);
3645 }
3646 
checkClose(Rconnection con)3647 static void checkClose(Rconnection con)
3648 {
3649     if (con->isopen) {
3650         errno = 0;
3651     	con->close(con);
3652     	if (con->status != NA_INTEGER && con->status < 0) {
3653     	    int serrno = errno;
3654             if (serrno)
3655 		warning(_("Problem closing connection:  %s"), strerror(serrno));
3656 	    else
3657 		warning(_("Problem closing connection"));
3658    	 }
3659     }
3660 }
3661 
con_close1(Rconnection con)3662 static int con_close1(Rconnection con)
3663 {
3664     int status;
3665     checkClose(con);
3666     status = con->status;
3667     if(con->isGzcon) {
3668 	Rgzconn priv = con->private;
3669 	con_close1(priv->con);
3670 	R_ReleaseObject(priv->con->ex_ptr);
3671     }
3672     /* close inconv and outconv if open */
3673     if(con->inconv) Riconv_close(con->inconv);
3674     if(con->outconv) Riconv_close(con->outconv);
3675     con->destroy(con);
3676     free(con->class);
3677     con->class = NULL;
3678     free(con->description);
3679     con->description = NULL;
3680     /* clear the pushBack */
3681     if(con->nPushBack > 0) { // already cleared on closed connection,
3682 	                     // so no double-free pace -fanalyzer
3683 	int j;
3684 
3685 	for(j = 0; j < con->nPushBack; j++)
3686 	    free(con->PushBack[j]);
3687 	free(con->PushBack);
3688     }
3689     con->nPushBack = 0;
3690     if (con->buff) {
3691 	free(con->buff);
3692 	con->buff = NULL;
3693     }
3694     con->buff_len = con->buff_pos = con->buff_stored_len = 0;
3695     con->open = &null_open;
3696     con->close = &null_close;
3697     con->destroy = &null_destroy;
3698     con->vfprintf = &null_vfprintf;
3699     con->fgetc = con->fgetc_internal = &null_fgetc;
3700     con->seek = &null_seek;
3701     con->truncate = &null_truncate;
3702     con->fflush = &null_fflush;
3703     con->read = &null_read;
3704     con->write = &null_write;
3705     return status;
3706 }
3707 
3708 
con_destroy(int i)3709 static void con_destroy(int i)
3710 {
3711     Rconnection con=NULL;
3712 
3713     con = getConnection(i);
3714     con_close1(con);
3715     free(Connections[i]);
3716     Connections[i] = NULL;
3717 }
3718 
3719 
do_close(SEXP call,SEXP op,SEXP args,SEXP env)3720 SEXP attribute_hidden do_close(SEXP call, SEXP op, SEXP args, SEXP env)
3721 {
3722     int i, j;
3723 
3724     checkArity(op, args);
3725     if(!inherits(CAR(args), "connection"))
3726 	error(_("'con' is not a connection"));
3727     i = asInteger(CAR(args));
3728     if(i < 3) error(_("cannot close standard connections"));
3729     for(j = 0; j < R_SinkNumber; j++)
3730 	if(i == SinkCons[j])
3731 	    error(_("cannot close 'output' sink connection"));
3732     if(i == R_ErrorCon)
3733 	error(_("cannot close 'message' sink connection"));
3734     Rconnection con = getConnection(i);
3735     int status = con_close1(con);
3736     free(Connections[i]);
3737     Connections[i] = NULL;
3738     return (status != NA_INTEGER) ? ScalarInteger(status) : R_NilValue;
3739 }
3740 
Rconn_seek(Rconnection con,double where,int origin,int rw)3741 static double Rconn_seek(Rconnection con, double where, int origin, int rw) {
3742     if (con->buff)
3743 	return buff_seek(con, where, origin, rw);
3744     return con->seek(con, where, origin, rw);
3745 }
3746 
3747 /* seek(con, where = numeric(), origin = "start", rw = "") */
do_seek(SEXP call,SEXP op,SEXP args,SEXP env)3748 SEXP attribute_hidden do_seek(SEXP call, SEXP op, SEXP args, SEXP env)
3749 {
3750     int origin, rw;
3751     Rconnection con = NULL;
3752     double where;
3753 
3754     checkArity(op, args);
3755     if(!inherits(CAR(args), "connection"))
3756 	error(_("'con' is not a connection"));
3757     con = getConnection(asInteger(CAR(args)));
3758     if(!con->isopen) error(_("connection is not open"));
3759     where = asReal(CADR(args));
3760     origin = asInteger(CADDR(args));
3761     rw = asInteger(CADDDR(args));
3762     if(!ISNAN(where) && con->nPushBack > 0) {
3763 	/* clear pushback */
3764 	int j;
3765 	for(j = 0; j < con->nPushBack; j++) free(con->PushBack[j]);
3766 	free(con->PushBack);
3767 	con->nPushBack = 0;
3768     }
3769     return ScalarReal(Rconn_seek(con, where, origin, rw));
3770 }
3771 
3772 /* truncate(con) */
do_truncate(SEXP call,SEXP op,SEXP args,SEXP env)3773 SEXP attribute_hidden do_truncate(SEXP call, SEXP op, SEXP args, SEXP env)
3774 {
3775     Rconnection con = NULL;
3776 
3777     checkArity(op, args);
3778     if(!inherits(CAR(args), "connection"))
3779 	error(_("'con' is not a connection"));
3780     con = getConnection(asInteger(CAR(args)));
3781     con->truncate(con);
3782     return R_NilValue;
3783 }
3784 
do_flush(SEXP call,SEXP op,SEXP args,SEXP env)3785 SEXP attribute_hidden do_flush(SEXP call, SEXP op, SEXP args, SEXP env)
3786 {
3787     Rconnection con = NULL;
3788 
3789     checkArity(op, args);
3790     if(!inherits(CAR(args), "connection"))
3791 	error(_("'con' is not a connection"));
3792     con = getConnection(asInteger(CAR(args)));
3793     if(con->canwrite) con->fflush(con);
3794     return R_NilValue;
3795 }
3796 
3797 /* ------------------- read, write  text --------------------- */
3798 
Rconn_fgetc(Rconnection con)3799 int Rconn_fgetc(Rconnection con)
3800 {
3801     char *curLine;
3802     int c;
3803 
3804     if (con->save2 != -1000) {
3805 	c = con->save2;
3806 	con->save2 = -1000;
3807 	return c;
3808     }
3809     if(con->nPushBack <= 0) {
3810 	/* map CR or CRLF to LF */
3811 	if (con->save != -1000) {
3812 	    c = con->save;
3813 	    con->save = -1000;
3814 	    return c;
3815 	}
3816 	c = con->fgetc(con);
3817 	if (c == '\r') {
3818 	    c = con->fgetc(con);
3819 	    if (c != '\n') {
3820 		con->save = (c != '\r') ? c : '\n';
3821 		return('\n');
3822 	    }
3823 	}
3824 	return c;
3825     }
3826     curLine = con->PushBack[con->nPushBack-1];
3827     c = (unsigned char) curLine[con->posPushBack++];
3828     if(con->posPushBack >= strlen(curLine)) {
3829 	/* last character on a line, so pop the line */
3830 	free(curLine);
3831 	con->nPushBack--;
3832 	con->posPushBack = 0;
3833 	if(con->nPushBack == 0) free(con->PushBack);
3834     }
3835     return c;
3836 }
3837 
3838 #ifdef UNUSED
Rconn_ungetc(int c,Rconnection con)3839 int Rconn_ungetc(int c, Rconnection con)
3840 {
3841     con->save2 = c;
3842     return c;
3843 }
3844 #endif
3845 
3846 /* read one line (without trailing newline) from con and store it in buf */
3847 /* return number of characters read, -1 on EOF */
3848 attribute_hidden
Rconn_getline(Rconnection con,char * buf,size_t bufsize)3849 size_t Rconn_getline(Rconnection con, char *buf, size_t bufsize)
3850 {
3851     int c;
3852     ssize_t nbuf = -1;
3853 
3854     while((c = Rconn_fgetc(con)) != R_EOF) {
3855 	if(nbuf+1 >= bufsize)
3856 	    error(_("line longer than buffer size %lu"), (unsigned long) bufsize);
3857 	if(c != '\n'){
3858 	    buf[++nbuf] = (char) c; /* compiler-defined conversion behavior */
3859 	} else {
3860 	    buf[++nbuf] = '\0';
3861 	    break;
3862 	}
3863     }
3864     /* Make sure it is null-terminated and count is correct, even if
3865      *  file did not end with newline.
3866      */
3867     if(nbuf >= 0 && buf[nbuf]) {
3868 	if(nbuf+1 >= bufsize)
3869 	    error(_("line longer than buffer size %lu"), (unsigned long) bufsize);
3870 	buf[++nbuf] = '\0';
3871     }
3872     return (size_t) nbuf;
3873 }
3874 
Rconn_printf(Rconnection con,const char * format,...)3875 int Rconn_printf(Rconnection con, const char *format, ...)
3876 {
3877     int res;
3878     errno = 0;
3879     va_list(ap);
3880     va_start(ap, format);
3881     /* Parentheses added for FC4 with gcc4 and -D_FORTIFY_SOURCE=2 */
3882     res = (con->vfprintf)(con, format, ap);
3883     va_end(ap);
3884     /* PR#17243:  write.table and friends silently failed if the disk was full (or there was another error) */
3885     if (res < 0) {
3886 	if (errno)
3887 	    error(_("Error writing to connection:  %s"), strerror(errno));
3888 	else
3889 	    error(_("Error writing to connection"));
3890     }
3891     return res;
3892 }
3893 
con_cleanup(void * data)3894 static void con_cleanup(void *data)
3895 {
3896     Rconnection con = data;
3897     checkClose(con);
3898 }
3899 
3900 /* readLines(con = stdin(), n = 1, ok = TRUE, warn = TRUE) */
3901 #define BUF_SIZE 1000
do_readLines(SEXP call,SEXP op,SEXP args,SEXP env)3902 SEXP attribute_hidden do_readLines(SEXP call, SEXP op, SEXP args, SEXP env)
3903 {
3904     SEXP ans = R_NilValue, ans2;
3905     int ok, warn, skipNul, c;
3906     size_t nbuf, buf_size = BUF_SIZE;
3907     int oenc = CE_NATIVE;
3908     Rconnection con = NULL;
3909     Rboolean wasopen;
3910     char *buf;
3911     const char *encoding;
3912     RCNTXT cntxt;
3913     R_xlen_t i, n, nn, nnn, nread;
3914 
3915     checkArity(op, args);
3916     if(!inherits(CAR(args), "connection"))
3917 	error(_("'con' is not a connection"));
3918     con = getConnection(asInteger(CAR(args))); args = CDR(args);
3919     n = asVecSize(CAR(args)); args = CDR(args);
3920     if(n == -999)
3921 	error(_("invalid '%s' argument"), "n");
3922     ok = asLogical(CAR(args));  args = CDR(args);
3923     if(ok == NA_LOGICAL)
3924 	error(_("invalid '%s' argument"), "ok");
3925     warn = asLogical(CAR(args));  args = CDR(args);
3926     if(warn == NA_LOGICAL)
3927 	error(_("invalid '%s' argument"), "warn");
3928     if(!isString(CAR(args)) || LENGTH(CAR(args)) != 1)
3929 	error(_("invalid '%s' value"), "encoding");
3930     encoding = CHAR(STRING_ELT(CAR(args), 0));  args = CDR(args); /* ASCII */
3931     skipNul = asLogical(CAR(args));
3932     if(skipNul == NA_LOGICAL)
3933 	error(_("invalid '%s' argument"), "skipNul");
3934 
3935     wasopen = con->isopen;
3936     if(!wasopen) {
3937 	char mode[5];
3938 	con->UTF8out = TRUE;  /* a request */
3939 	strcpy(mode, con->mode);
3940 	strcpy(con->mode, "rt");
3941 	if(!con->open(con)) error(_("cannot open the connection"));
3942 	strcpy(con->mode, mode);
3943 	/* Set up a context which will close the connection on error */
3944 	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
3945 		     R_NilValue, R_NilValue);
3946 	cntxt.cend = &con_cleanup;
3947 	cntxt.cenddata = con;
3948 	if(!con->canread) error(_("cannot read from this connection"));
3949     } else {
3950 	if(!con->canread) error(_("cannot read from this connection"));
3951 	/* for a non-blocking connection, more input may
3952 	   have become available, so re-position */
3953 	if(con->canseek && !con->blocking)
3954 	    Rconn_seek(con, Rconn_seek(con, -1, 1, 1), 1, 1);
3955     }
3956     con->incomplete = FALSE;
3957     if(con->UTF8out || streql(encoding, "UTF-8")) oenc = CE_UTF8;
3958     else if(streql(encoding, "latin1")) oenc = CE_LATIN1;
3959 
3960     buf = (char *) malloc(buf_size);
3961     if(!buf)
3962 	error(_("cannot allocate buffer in readLines"));
3963     nn = (n < 0) ? 1000 : n; /* initially allocate space for 1000 lines */
3964     nnn = (n < 0) ? R_XLEN_T_MAX : n;
3965     PROTECT(ans = allocVector(STRSXP, nn));
3966     for(nread = 0; nread < nnn; nread++) {
3967 	if(nread >= nn) {
3968 	    double dnn = 2.* nn;
3969 	    if (dnn > R_XLEN_T_MAX) error("too many items");
3970 	    ans2 = allocVector(STRSXP, 2*nn);
3971 	    for(i = 0; i < nn; i++)
3972 		SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
3973 	    nn *= 2;
3974 	    UNPROTECT(1); /* old ans */
3975 	    PROTECT(ans = ans2);
3976 	}
3977 	nbuf = 0;
3978 	while((c = Rconn_fgetc(con)) != R_EOF) {
3979 	    if(nbuf == buf_size-1) {  /* need space for the terminator */
3980 		buf_size *= 2;
3981 		char *tmp = (char *) realloc(buf, buf_size);
3982 		if(!buf) {
3983 		    free(buf);
3984 		    error(_("cannot allocate buffer in readLines"));
3985 		} else buf = tmp;
3986 	    }
3987 	    if(skipNul && c == '\0') continue;
3988 	    if(c != '\n')
3989 		/* compiler-defined conversion behavior */
3990 		buf[nbuf++] = (char) c;
3991 	    else
3992 		break;
3993 	}
3994 	buf[nbuf] = '\0';
3995 	/* Remove UTF-8 BOM */
3996 	const char *qbuf = buf;
3997 	// avoid valgrind warning if < 3 bytes
3998 	if (nread == 0 && utf8locale && strlen(buf) >= 3 &&
3999 	    !memcmp(buf, "\xef\xbb\xbf", 3)) qbuf = buf + 3;
4000 	SET_STRING_ELT(ans, nread, mkCharCE(qbuf, oenc));
4001 	if (warn && strlen(buf) < nbuf)
4002 	    warning(_("line %d appears to contain an embedded nul"), nread + 1);
4003 	if(c == R_EOF) goto no_more_lines;
4004     }
4005     if(!wasopen) {endcontext(&cntxt); con->close(con);}
4006     UNPROTECT(1);
4007     free(buf);
4008     return ans;
4009 no_more_lines:
4010     if(!wasopen) {endcontext(&cntxt); con->close(con);}
4011     if(nbuf > 0) { /* incomplete last line */
4012 	if(con->text && !con->blocking &&
4013 	   (strcmp(con->class, "gzfile") != 0)) {
4014 	    /* push back the rest */
4015 	    con_pushback(con, 0, buf);
4016 	    con->incomplete = TRUE;
4017 	} else {
4018 	    nread++;
4019 	    if(warn)
4020 		warning(_("incomplete final line found on '%s'"),
4021 			con->description);
4022 	}
4023     }
4024     free(buf);
4025     if(nread < nnn && !ok)
4026 	error(_("too few lines read in readLines"));
4027     PROTECT(ans2 = allocVector(STRSXP, nread));
4028     for(i = 0; i < nread; i++)
4029 	SET_STRING_ELT(ans2, i, STRING_ELT(ans, i));
4030     UNPROTECT(2);
4031     return ans2;
4032 }
4033 
4034 /* writeLines(text, con = stdout(), sep = "\n", useBytes) */
do_writelines(SEXP call,SEXP op,SEXP args,SEXP env)4035 SEXP attribute_hidden do_writelines(SEXP call, SEXP op, SEXP args, SEXP env)
4036 {
4037     int con_num, useBytes;
4038     Rboolean wasopen;
4039     Rconnection con=NULL;
4040     const char *ssep;
4041     SEXP text, sep;
4042     RCNTXT cntxt;
4043 
4044     checkArity(op, args);
4045     text = CAR(args);
4046     if(!isString(text)) error(_("invalid '%s' argument"), "text");
4047     if(!inherits(CADR(args), "connection"))
4048 	error(_("'con' is not a connection"));
4049     con_num = asInteger(CADR(args));
4050     con = getConnection(con_num);
4051     sep = CADDR(args);
4052     if(!isString(sep)) error(_("invalid '%s' argument"), "sep");
4053     useBytes = asLogical(CADDDR(args));
4054     if(useBytes == NA_LOGICAL)
4055 	error(_("invalid '%s' argument"), "useBytes");
4056 
4057     wasopen = con->isopen;
4058     if(!wasopen) {
4059 	char mode[5];
4060 	/* Documented behaviour */
4061 	strcpy(mode, con->mode);
4062 	strcpy(con->mode, "wt");
4063 	if(!con->open(con)) error(_("cannot open the connection"));
4064 	strcpy(con->mode, mode);
4065 	/* Set up a context which will close the connection on error */
4066 	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
4067 		     R_NilValue, R_NilValue);
4068 	cntxt.cend = &con_cleanup;
4069 	cntxt.cenddata = con;
4070     }
4071     if(!con->canwrite) error(_("cannot write to this connection"));
4072     /* NB: translateChar0() is the same as CHAR() for IS_BYTES strings */
4073     if(useBytes)
4074 	ssep = CHAR(STRING_ELT(sep, 0));
4075     else
4076 	ssep = translateChar0(STRING_ELT(sep, 0));
4077 
4078     /* New for 2.7.0: split the output if sink was split.
4079        It would be slightly simpler just to call Rvprintf if the
4080        connection was stdout(), but this way is more efficent */
4081     if(con_num == R_OutputCon) {
4082 	int j = 0;
4083 	Rconnection con0;
4084 	do {
4085 	    con0 = getConnection(con_num);
4086 	    for(R_xlen_t i = 0; i < XLENGTH(text); i++)
4087 		Rconn_printf(con0, "%s%s",
4088 			     useBytes ? CHAR(STRING_ELT(text, i)) :
4089 			     translateChar0(STRING_ELT(text, i)), ssep);
4090 	    con0->fflush(con0);
4091 	    con_num = getActiveSink(j++);
4092 	} while (con_num > 0);
4093     } else {
4094 	for(R_xlen_t i = 0; i < XLENGTH(text); i++)
4095 	    Rconn_printf(con, "%s%s",
4096 			 useBytes ? CHAR(STRING_ELT(text, i)) :
4097 			 translateChar0(STRING_ELT(text, i)), ssep);
4098     }
4099 
4100     if(!wasopen) {
4101     	endcontext(&cntxt);
4102     	checkClose(con);
4103     }
4104     return R_NilValue;
4105 }
4106 
4107 /* ------------------- read, write  binary --------------------- */
4108 
swapb(void * result,int size)4109 static void swapb(void *result, int size)
4110 {
4111     int i;
4112     char *p = result, tmp;
4113 
4114     if (size == 1) return;
4115     for (i = 0; i < size/2; i++) {
4116 	tmp = p[i];
4117 	p[i] = p[size - i - 1];
4118 	p[size - i - 1] = tmp;
4119     }
4120 }
4121 
readOneString(Rconnection con)4122 static SEXP readOneString(Rconnection con)
4123 {
4124     char buf[10001], *p;
4125     int pos, m;
4126 
4127     for(pos = 0; pos < 10000; pos++) {
4128 	p = buf + pos;
4129 	m = (int) con->read(p, sizeof(char), 1, con);
4130 	if (m < 0) error("error reading from the connection");
4131 	if(!m) {
4132 	    if(pos > 0)
4133 		warning(_("incomplete string at end of file has been discarded"));
4134 	    return R_NilValue;
4135 	}
4136 	if(*p == '\0') break;
4137     }
4138     if(pos == 10000)
4139 	warning(_("null terminator not found: breaking string at 10000 bytes"));
4140     return mkChar(buf);
4141 }
4142 
4143 static R_xlen_t
rawRead(char * p,int size,R_xlen_t n,Rbyte * bytes,R_xlen_t nbytes,R_xlen_t * np)4144 rawRead(char *p, int size, R_xlen_t n, Rbyte *bytes, R_xlen_t nbytes, R_xlen_t *np)
4145 {
4146     R_xlen_t avail, m;
4147 
4148     avail = (nbytes - *np)/size;
4149     m = n;
4150     if (m > avail) m = avail;
4151     if (m > 0) {
4152 	memcpy(p, bytes + *(np), m*size);
4153 	*np += m*size;
4154     }
4155     return m;
4156 }
4157 
rawOneString(Rbyte * bytes,R_xlen_t nbytes,R_xlen_t * np)4158 static SEXP rawOneString(Rbyte *bytes, R_xlen_t nbytes, R_xlen_t *np)
4159 {
4160     Rbyte *p;
4161     R_xlen_t i;
4162     char *buf;
4163     SEXP res;
4164 
4165     /* just look for null terminator */
4166     for(i = *np, p = bytes+(*np); i < nbytes; p++, i++)
4167 	if(*p == '\0') break;
4168     if(i < nbytes) { /* has terminator */
4169 	p = bytes+(*np);
4170 	*np = i+1;
4171 	return mkChar((char *)p);
4172     }
4173     /* so no terminator */
4174     buf = R_chk_calloc(nbytes - (*np) + 1, 1);
4175     memcpy(buf, bytes+(*np), nbytes-(*np));
4176     res = mkChar(buf);
4177     Free(buf);
4178     *np = nbytes;
4179     return res;
4180 }
4181 
4182 /* readBin(con, what, n, swap) */
4183 #define BLOCK 8096
do_readbin(SEXP call,SEXP op,SEXP args,SEXP env)4184 SEXP attribute_hidden do_readbin(SEXP call, SEXP op, SEXP args, SEXP env)
4185 {
4186     SEXP ans = R_NilValue, swhat;
4187     int size, signd, swap, sizedef= 4, mode = 1;
4188     const char *what;
4189     void *p = NULL;
4190     Rboolean wasopen = TRUE, isRaw = FALSE;
4191     Rconnection con = NULL;
4192     Rbyte *bytes = NULL;
4193     RCNTXT cntxt;
4194     R_xlen_t i, n,  m = 0, nbytes = 0, np = 0;
4195 
4196     checkArity(op, args);
4197 
4198     if(TYPEOF(CAR(args)) == RAWSXP) {
4199 	isRaw = TRUE;
4200 	bytes = RAW(CAR(args));
4201 	nbytes = XLENGTH(CAR(args));
4202     } else {
4203 	con = getConnection(asInteger(CAR(args)));
4204 	if(con->text) error(_("can only read from a binary connection"));
4205     }
4206 
4207     args = CDR(args);
4208     swhat = CAR(args); args = CDR(args);
4209     if(!isString(swhat) || LENGTH(swhat) != 1)
4210 	error(_("invalid '%s' argument"), "what");
4211     what = CHAR(STRING_ELT(swhat, 0)); /* ASCII */
4212     n = asVecSize(CAR(args)); args = CDR(args);
4213     if(n < 0) error(_("invalid '%s' argument"), "n");
4214     size = asInteger(CAR(args)); args = CDR(args);
4215     signd = asLogical(CAR(args)); args = CDR(args);
4216     if(signd == NA_LOGICAL)
4217 	error(_("invalid '%s' argument"), "signed");
4218     swap = asLogical(CAR(args));
4219     if(swap == NA_LOGICAL)
4220 	error(_("invalid '%s' argument"), "swap");
4221     if(!isRaw) {
4222 	wasopen = con->isopen;
4223 	if(!wasopen) {
4224 	    /* Documented behaviour */
4225 	    char mode[5];
4226 	    strcpy(mode, con->mode);
4227 	    strcpy(con->mode, "rb");
4228 	    if(!con->open(con)) error(_("cannot open the connection"));
4229 	    strcpy(con->mode, mode);
4230 	    /* Set up a context which will close the connection on error */
4231 	    begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
4232 			 R_NilValue, R_NilValue);
4233 	    cntxt.cend = &con_cleanup;
4234 	    cntxt.cenddata = con;
4235 	}
4236 	if(!con->canread) error(_("cannot read from this connection"));
4237     }
4238     if(!strcmp(what, "character")) {
4239 	SEXP onechar;
4240 	PROTECT(ans = allocVector(STRSXP, n));
4241 	for(i = 0, m = 0; i < n; i++) {
4242 	    onechar = isRaw ? rawOneString(bytes, nbytes, &np)
4243 		: readOneString(con);
4244 	    if(onechar != R_NilValue) {
4245 		SET_STRING_ELT(ans, i, onechar);
4246 		m++;
4247 	    } else break;
4248 	}
4249     } else if(!strcmp(what, "complex")) {
4250 	if(size == NA_INTEGER) size = sizeof(Rcomplex);
4251 	if(size != sizeof(Rcomplex))
4252 	    error(_("size changing is not supported for complex vectors"));
4253 	PROTECT(ans = allocVector(CPLXSXP, n));
4254 	p = (void *) COMPLEX(ans);
4255 	if(isRaw) m = rawRead(p, size, n, bytes, nbytes, &np);
4256 	else {
4257 	    /* Do this in blocks to avoid large buffers in the connection */
4258 	    char *pp = p;
4259 	    R_xlen_t m0, n0 = n;
4260 	    m = 0;
4261 	    while(n0) {
4262 		size_t n1 = (n0 < BLOCK) ? n0 : BLOCK;
4263 		m0 = con->read(pp, size, n1, con);
4264 		if (m0 < 0) error("error reading from the connection");
4265 		m += m0;
4266 		if (m0 < n1) break;
4267 		n0 -= n1;
4268 		pp += n1 * size;
4269 	    }
4270 	}
4271 	if(swap)
4272 	    for(i = 0; i < m; i++) {
4273 		swapb(&(COMPLEX(ans)[i].r), sizeof(double));
4274 		swapb(&(COMPLEX(ans)[i].i), sizeof(double));
4275 	    }
4276     } else {
4277 	if (!strcmp(what, "integer") || !strcmp(what, "int")) {
4278 	    sizedef = sizeof(int); mode = 1;
4279 
4280 #if (SIZEOF_LONG == 8) && (SIZEOF_LONG > SIZEOF_INT)
4281 #  define CASE_LONG_ETC case sizeof(long):
4282 #elif (SIZEOF_LONG_LONG == 8) && (SIZEOF_LONG_LONG > SIZEOF_INT)
4283 #  define CASE_LONG_ETC case sizeof(_lli_t):
4284 #else
4285 #  define CASE_LONG_ETC
4286 #endif
4287 
4288 #define CHECK_INT_SIZES(SIZE, DEF) do {					\
4289 	    if(SIZE == NA_INTEGER) SIZE = DEF;				\
4290 	    switch (SIZE) {						\
4291 	    case sizeof(signed char):					\
4292 	    case sizeof(short):						\
4293 	    case sizeof(int):						\
4294 	    CASE_LONG_ETC						\
4295 		break;							\
4296 	    default:							\
4297 		error(_("size %d is unknown on this machine"), SIZE);	\
4298 	    }								\
4299 	} while(0)
4300 
4301 	    CHECK_INT_SIZES(size, sizedef);
4302 	    PROTECT(ans = allocVector(INTSXP, n));
4303 	    p = (void *) INTEGER(ans);
4304 	} else if (!strcmp(what, "logical")) {
4305 	    sizedef = sizeof(int); mode = 1;
4306 	    CHECK_INT_SIZES(size, sizedef);
4307 	    PROTECT(ans = allocVector(LGLSXP, n));
4308 	    p = (void *) LOGICAL(ans);
4309 	} else if (!strcmp(what, "raw")) {
4310 	    sizedef = 1; mode = 1;
4311 	    if(size == NA_INTEGER) size = sizedef;
4312 	    switch (size) {
4313 	    case 1:
4314 		break;
4315 	    default:
4316 		error(_("raw is always of size 1"));
4317 	    }
4318 	    PROTECT(ans = allocVector(RAWSXP, n));
4319 	    p = (void *) RAW(ans);
4320 	} else if (!strcmp(what, "numeric") || !strcmp(what, "double")) {
4321 	    sizedef = sizeof(double); mode = 2;
4322 	    if(size == NA_INTEGER) size = sizedef;
4323 	    switch (size) {
4324 	    case sizeof(double):
4325 	    case sizeof(float):
4326 #if HAVE_LONG_DOUBLE && (SIZEOF_LONG_DOUBLE > SIZEOF_DOUBLE)
4327 	    case sizeof(long double):
4328 #endif
4329 		break;
4330 	    default:
4331 		error(_("size %d is unknown on this machine"), size);
4332 	    }
4333 	    PROTECT(ans = allocVector(REALSXP, n));
4334 	    p = (void *) REAL(ans);
4335 	} else
4336 	    error(_("invalid '%s' argument"), "what");
4337 
4338 	if(!signd && (mode != 1 || size > 2))
4339 	    warning(_("'signed = FALSE' is only valid for integers of sizes 1 and 2"));
4340 	if(size == sizedef) {
4341 	    if(isRaw) {
4342 		m = rawRead(p, size, n, bytes, nbytes, &np);
4343 	    } else {
4344 		/* Do this in blocks to avoid large buffers in the connection */
4345 		char *pp = p;
4346 		R_xlen_t m0, n0 = n;
4347 		m = 0;
4348 		while(n0) {
4349 		    size_t n1 = (n0 < BLOCK) ? n0 : BLOCK;
4350 		    m0 = con->read(pp, size, n1, con);
4351 		    if (m0 < 0) error("error reading from the connection");
4352 		    m += m0;
4353 		    if (m0 < n1) break;
4354 		    n0 -= n1;
4355 		    pp += n1 * size;
4356 		}
4357 	    }
4358 	    if(swap && size > 1)
4359 		for(i = 0; i < m; i++) swapb((char *)p+i*size, size);
4360 	} else {
4361 	    R_xlen_t s;
4362 	    union {
4363 		signed char sch;
4364 		unsigned char uch;
4365 		signed short ssh;
4366 		unsigned short ush;
4367 		long l;
4368 		long long ll;
4369 		float f;
4370 #if HAVE_LONG_DOUBLE
4371 		long double ld;
4372 #endif
4373 	    } u;
4374 	    if (size > sizeof u)
4375 		error(_("size %d is unknown on this machine"), size);
4376 	    if(mode == 1) { /* integer result */
4377 		for(i = 0, m = 0; i < n; i++) {
4378 		    s = isRaw ? rawRead((char*) &u, size, 1, bytes, nbytes, &np)
4379 			: (int) con->read((char*) &u, size, 1, con);
4380 		    if (s < 0) error("error reading from the connection");
4381 		    if(s) m++; else break;
4382 		    if(swap && size > 1) swapb((char *) &u, size);
4383 		    switch(size) {
4384 		    case sizeof(signed char):
4385 			if(signd)
4386 			    INTEGER(ans)[i] = u.sch;
4387 			else
4388 			    INTEGER(ans)[i] = u.uch;
4389 			break;
4390 		    case sizeof(short):
4391 			if(signd)
4392 			    INTEGER(ans)[i] = u.ssh;
4393 			else
4394 			    INTEGER(ans)[i] = u.ush;
4395 			break;
4396 #if SIZEOF_LONG == 8
4397 		    case sizeof(long):
4398 			INTEGER(ans)[i] = (int) u.l;
4399 			break;
4400 #elif SIZEOF_LONG_LONG == 8
4401 		    case sizeof(_lli_t):
4402 			INTEGER(ans)[i] = (int) u.ll;
4403 			break;
4404 #endif
4405 		    default:
4406 			error(_("size %d is unknown on this machine"), size);
4407 		    }
4408 		}
4409 	    } else if (mode == 2) { /* double result */
4410 		for(i = 0, m = 0; i < n; i++) {
4411 		    s = isRaw ? rawRead((char*) &u, size, 1, bytes, nbytes, &np)
4412 			: (int) con->read((char*) &u, size, 1, con);
4413 		    if (s < 0) error("error reading from the connection");
4414 		    if(s) m++; else break;
4415 		    if(swap && size > 1) swapb((char *) &u, size);
4416 		    switch(size) {
4417 		    case sizeof(float):
4418 			REAL(ans)[i] = u.f;
4419 			break;
4420 #if HAVE_LONG_DOUBLE && (SIZEOF_LONG_DOUBLE > SIZEOF_DOUBLE)
4421 		    case sizeof(long double):
4422 			REAL(ans)[i] = (double) u.ld;
4423 			break;
4424 #endif
4425 		    default:
4426 			error(_("size %d is unknown on this machine"), size);
4427 		    }
4428 		}
4429 	    }
4430 	}
4431     }
4432     if(!wasopen) {endcontext(&cntxt); con->close(con);}
4433     if(m < n)
4434 	ans = xlengthgets(ans, m);
4435     UNPROTECT(1);
4436     return ans;
4437 }
4438 
4439 /* writeBin(object, con, size, swap, useBytes) */
do_writebin(SEXP call,SEXP op,SEXP args,SEXP env)4440 SEXP attribute_hidden do_writebin(SEXP call, SEXP op, SEXP args, SEXP env)
4441 {
4442     checkArity(op, args);
4443     SEXP object = CAR(args);
4444     if(!isVectorAtomic(object))
4445 	error(_("'x' is not an atomic vector type"));
4446     Rboolean
4447 	isRaw = TYPEOF(CADR(args)) == RAWSXP,
4448 	wasopen = isRaw;
4449     Rconnection con = NULL;
4450     if(!isRaw) {
4451 	con = getConnection(asInteger(CADR(args)));
4452 	if(con->text) error(_("can only write to a binary connection"));
4453 	wasopen = con->isopen;
4454 	if(!con->canwrite) error(_("cannot write to this connection"));
4455     }
4456 
4457     int size = asInteger(CADDR(args)),
4458 	swap = asLogical(CADDDR(args));
4459     if(swap == NA_LOGICAL)
4460 	error(_("invalid '%s' argument"), "swap");
4461     int useBytes = asLogical(CAD4R(args));
4462     if(useBytes == NA_LOGICAL)
4463 	error(_("invalid '%s' argument"), "useBytes");
4464     R_xlen_t i, len = XLENGTH(object);
4465     if(len == 0)
4466 	return (isRaw) ? allocVector(RAWSXP, 0) : R_NilValue;
4467 
4468 #ifndef LONG_VECTOR_SUPPORT
4469     /* without long vectors RAW vectors are limited to 2^31 - 1 bytes */
4470     if(len * (double)size > INT_MAX) {
4471 	if(isRaw)
4472 	    error(_("only 2^31-1 bytes can be written to a raw vector"));
4473 	else
4474 	    error(_("only 2^31-1 bytes can be written in a single writeBin() call"));
4475     }
4476 #endif
4477 
4478     RCNTXT cntxt;
4479     if(!wasopen) {
4480 	/* Documented behaviour */
4481 	char mode[5];
4482 	strcpy(mode, con->mode);
4483 	strcpy(con->mode, "wb");
4484 	if(!con->open(con)) error(_("cannot open the connection"));
4485 	strcpy(con->mode, mode);
4486 	/* Set up a context which will close the connection on error */
4487 	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
4488 		     R_NilValue, R_NilValue);
4489 	cntxt.cend = &con_cleanup;
4490 	cntxt.cenddata = con;
4491 	if(!con->canwrite) error(_("cannot write to this connection"));
4492     }
4493 
4494     SEXP ans = R_NilValue;
4495     if(TYPEOF(object) == STRSXP) {
4496 	const char *s;
4497 	if(isRaw) {
4498 	    Rbyte *bytes;
4499 	    size_t np, outlen = 0;
4500 	    if(useBytes)
4501 		for(i = 0; i < len; i++)
4502 		    outlen += strlen(CHAR(STRING_ELT(object, i))) + 1;
4503 	    else
4504 		for(i = 0; i < len; i++)
4505 		    outlen += strlen(translateChar0(STRING_ELT(object, i))) + 1;
4506 	    PROTECT(ans = allocVector(RAWSXP, outlen));
4507 	    bytes = RAW(ans);
4508 	    /* translateChar0() is the same as CHAR for IS_BYTES strings */
4509 	    for(i = 0, np = 0; i < len; i++) {
4510 		if(useBytes)
4511 		    s = CHAR(STRING_ELT(object, i));
4512 		else
4513 		    s = translateChar0(STRING_ELT(object, i));
4514 		memcpy(bytes+np, s, strlen(s) + 1);
4515 		np +=  strlen(s) + 1;
4516 	    }
4517 	} else {
4518 	    /* translateChar0() is the same as CHAR for IS_BYTES strings */
4519 	    for(i = 0; i < len; i++) {
4520 		if(useBytes)
4521 		    s = CHAR(STRING_ELT(object, i));
4522 		else
4523 		    s = translateChar0(STRING_ELT(object, i));
4524 		size_t nwrite = con->write(s, sizeof(char), strlen(s) + 1, con);
4525 		if(!nwrite) {
4526 		    warning(_("problem writing to connection"));
4527 		    break;
4528 		}
4529 	    }
4530 	}
4531     } else {
4532 	switch(TYPEOF(object)) {
4533 	case LGLSXP:
4534 	case INTSXP:
4535 	    CHECK_INT_SIZES(size, sizeof(int));
4536 	    break;
4537 	case REALSXP:
4538 	    if(size == NA_INTEGER) size = sizeof(double);
4539 	    switch (size) {
4540 	    case sizeof(double):
4541 	    case sizeof(float):
4542 #if HAVE_LONG_DOUBLE && (SIZEOF_LONG_DOUBLE > SIZEOF_DOUBLE)
4543 	    case sizeof(long double):
4544 #endif
4545 		break;
4546 	    default:
4547 		error(_("size %d is unknown on this machine"), size);
4548 	    }
4549 	    break;
4550 	case CPLXSXP:
4551 	    if(size == NA_INTEGER) size = sizeof(Rcomplex);
4552 	    if(size != sizeof(Rcomplex))
4553 		error(_("size changing is not supported for complex vectors"));
4554 	    break;
4555 	case RAWSXP:
4556 	    if(size == NA_INTEGER) size = 1;
4557 	    if(size != 1)
4558 		error(_("size changing is not supported for raw vectors"));
4559 	    break;
4560 	default:
4561 	    UNIMPLEMENTED_TYPE("writeBin", object);
4562 	}
4563 	char *buf = R_chk_calloc(len, size);
4564 	R_xlen_t j;
4565 	switch(TYPEOF(object)) {
4566 	case LGLSXP:
4567 	case INTSXP:
4568 	    switch (size) {
4569 	    case sizeof(int):
4570 		memcpy(buf, INTEGER(object), size * len);
4571 		break;
4572 #if SIZEOF_LONG == 8
4573 	    case sizeof(long):
4574 	    {
4575 		for (i = 0, j = 0; i < len; i++, j += size) {
4576 		    long l1 = (long) INTEGER(object)[i];
4577 		    memcpy(buf + j, &l1, size);
4578 		}
4579 		break;
4580 	    }
4581 #elif SIZEOF_LONG_LONG == 8
4582 	    case sizeof(_lli_t):
4583 	    {
4584 		for (i = 0, j = 0; i < len; i++, j += size) {
4585 		    _lli_t ll1 = (_lli_t) INTEGER(object)[i];
4586 		    memcpy(buf + j, &ll1, size);
4587 		}
4588 		break;
4589 	    }
4590 #endif
4591 	    case 2:
4592 	    {
4593 		for (i = 0, j = 0; i < len; i++, j += size) {
4594 		    short s1 = (short) INTEGER(object)[i];
4595 		    memcpy(buf + j, &s1, size);
4596 		}
4597 		break;
4598 	    }
4599 	    case 1:
4600 		for (i = 0; i < len; i++)
4601 		    /* compiler-defined conversion behavior */
4602 		    buf[i] = (signed char) INTEGER(object)[i];
4603 		break;
4604 	    default:
4605 		error(_("size %d is unknown on this machine"), size);
4606 	    }
4607 	    break;
4608 	case REALSXP:
4609 	    switch (size) {
4610 	    case sizeof(double):
4611 		memcpy(buf, REAL(object), size * len);
4612 		break;
4613 	    case sizeof(float):
4614 	    {
4615 		for (i = 0, j = 0; i < len; i++, j += size) {
4616 		    float f1 = (float) REAL(object)[i];
4617 		    memcpy(buf+j, &f1, size);
4618 		}
4619 		break;
4620 	    }
4621 #if HAVE_LONG_DOUBLE && (SIZEOF_LONG_DOUBLE > SIZEOF_DOUBLE)
4622 	    case sizeof(long double):
4623 	    {
4624 		/* some systems have problems with memcpy from
4625 		   the address of an automatic long double,
4626 		   e.g. ix86/x86_64 Linux with gcc4 */
4627 		static long double ld1;
4628 		for (i = 0, j = 0; i < len; i++, j += size) {
4629 		    ld1 = (long double) REAL(object)[i];
4630 		    memcpy(buf+j, &ld1, size);
4631 		}
4632 		break;
4633 	    }
4634 #endif
4635 	    default:
4636 		error(_("size %d is unknown on this machine"), size);
4637 	    }
4638 	    break;
4639 	case CPLXSXP:
4640 	    memcpy(buf, COMPLEX(object), size * len);
4641 	    break;
4642 	case RAWSXP:
4643 	    memcpy(buf, RAW(object), len); /* size = 1 */
4644 	    break;
4645 	}
4646 
4647 	if(swap && size > 1) {
4648 	    if (TYPEOF(object) == CPLXSXP)
4649 		for(i = 0; i < len; i++) {
4650 		    int sz = size/2;
4651 		    swapb(buf+sz*2*i, sz);
4652 		    swapb(buf+sz*(2*i+1), sz);
4653 		}
4654 	    else
4655 		for(i = 0; i < len; i++) swapb(buf+size*i, size);
4656 	}
4657 
4658 	/* write it now */
4659 	if(isRaw) { /* for non-long vectors, we checked size*len < 2^31-1 above */
4660 	    PROTECT(ans = allocVector(RAWSXP, size*len));
4661 	    memcpy(RAW(ans), buf, size*len);
4662 	} else {
4663 	    size_t nwrite = con->write(buf, size, len, con);
4664 	    if(nwrite < len) warning(_("problem writing to connection"));
4665 	}
4666 	Free(buf);
4667     }
4668 
4669     if(!wasopen) {
4670         endcontext(&cntxt);
4671         checkClose(con);
4672     }
4673     if(isRaw) {
4674 	UNPROTECT(1);
4675 	R_Visible = TRUE;
4676     } else R_Visible = FALSE;
4677     return ans;
4678 }
4679 
4680 /* FIXME: could do any MBCS locale, but would need pushback */
4681 static SEXP
readFixedString(Rconnection con,int len,int useBytes,Rboolean * warnOnNul)4682 readFixedString(Rconnection con, int len, int useBytes, Rboolean *warnOnNul)
4683 {
4684     SEXP ans;
4685     char *buf;
4686     int  m;
4687     const void *vmax = vmaxget();
4688 
4689     if(utf8locale && !useBytes) {
4690 	int i, clen;
4691 	char *p, *q;
4692 
4693 	p = buf = (char *) R_alloc(R_MB_CUR_MAX*len+1, sizeof(char));
4694 	memset(buf, 0, R_MB_CUR_MAX*len+1);
4695 	for(i = 0; i < len; i++) {
4696 	    q = p;
4697 	    m = (int) con->read(p, sizeof(char), 1, con);
4698 	    if(!m) { if(i == 0) return R_NilValue; else break;}
4699 	    clen = utf8clen(*p++);
4700 	    if(clen > 1) {
4701 		m = (int) con->read(p, sizeof(char), clen - 1, con);
4702 		if(m < clen - 1) error(_("invalid UTF-8 input in readChar()"));
4703 		p += clen - 1;
4704 		/* NB: this only checks validity of multi-byte characters */
4705 		if((int)mbrtowc(NULL, q, clen, NULL) < 0)
4706 		    error(_("invalid UTF-8 input in readChar()"));
4707 	    } else if (*q == '\0' && *warnOnNul) {
4708 		*warnOnNul = FALSE;
4709 		warning(_("truncating string with embedded nuls"));
4710 	    }
4711 	}
4712     } else {
4713 	buf = (char *) R_alloc(len+1, sizeof(char));
4714 	memset(buf, 0, len+1);
4715 	m = (int) con->read(buf, sizeof(char), len, con);
4716 	if(len && !m) return R_NilValue;
4717 	if (strlen(buf) < m && *warnOnNul) {
4718 	    *warnOnNul = FALSE;
4719 	    warning(_("truncating string with embedded nuls"));
4720 	}
4721     }
4722     /* String may contain nuls which we now (R >= 2.8.0) assume to be
4723        padding and ignore */
4724     ans = mkChar(buf);
4725     vmaxset(vmax);
4726     return ans;
4727 }
4728 
4729 static SEXP
rawFixedString(Rbyte * bytes,int len,int nbytes,int * np,int useBytes)4730 rawFixedString(Rbyte *bytes, int len, int nbytes, int *np, int useBytes)
4731 {
4732     char *buf;
4733     SEXP res;
4734     const void *vmax = vmaxget();
4735 
4736     if(*np + len > nbytes) {
4737 	len = nbytes - *np;
4738 	if (!len) return(R_NilValue);
4739     }
4740 
4741     /* Note: mkCharLenCE signals an error on embedded nuls. */
4742     if(utf8locale && !useBytes) {
4743 	int i, clen, iread = *np;
4744 	char *p;
4745 	Rbyte *q;
4746 
4747 	p = buf = (char *) R_alloc(R_MB_CUR_MAX*len+1, sizeof(char));
4748 	for(i = 0; i < len; i++, p += clen, iread += clen) {
4749 	    if (iread >= nbytes) break;
4750 	    q = bytes + iread;
4751 	    clen = utf8clen(*q);
4752 	    if (iread + clen > nbytes)
4753 		error(_("invalid UTF-8 input in readChar()"));
4754 	    memcpy(p, q, clen);
4755 	}
4756 	clen = iread - (*np);
4757 	*np = iread;
4758 	*p = '\0';
4759 	res = mkCharLenCE(buf, clen, CE_NATIVE);
4760     } else {
4761 	/* no terminator */
4762 	buf = R_chk_calloc(len + 1, 1);
4763 	memcpy(buf, bytes + (*np), len);
4764 	*np += len;
4765 	res = mkCharLenCE(buf, len, CE_NATIVE);
4766 	Free(buf);
4767     }
4768     vmaxset(vmax);
4769     return res;
4770 }
4771 
4772 
4773 /* readChar(con, nchars) */
do_readchar(SEXP call,SEXP op,SEXP args,SEXP env)4774 SEXP attribute_hidden do_readchar(SEXP call, SEXP op, SEXP args, SEXP env)
4775 {
4776     SEXP ans = R_NilValue, onechar, nchars;
4777     R_xlen_t i, n, m = 0;
4778     int nbytes = 0, np = 0, useBytes;
4779     Rboolean wasopen = TRUE, isRaw = FALSE, warnOnNul = TRUE;
4780     Rconnection con = NULL;
4781     Rbyte *bytes = NULL;
4782     RCNTXT cntxt;
4783     checkArity(op, args);
4784 
4785     if(TYPEOF(CAR(args)) == RAWSXP) {
4786 	isRaw = TRUE;
4787 	bytes = RAW(CAR(args));
4788 	nbytes = LENGTH(CAR(args));
4789     } else {
4790 	con = getConnection(asInteger(CAR(args)));
4791 	if(!con->canread)
4792 	    error(_("cannot read from this connection"));
4793     }
4794     /* We did as.integer in the wrapper */
4795     nchars = CADR(args);
4796     n = XLENGTH(nchars);
4797     if(n == 0) return allocVector(STRSXP, 0);
4798     useBytes = asLogical(CADDR(args));
4799     if(useBytes == NA_LOGICAL)
4800 	error(_("invalid '%s' argument"), "useBytes");
4801 
4802     if (!isRaw) {
4803 	wasopen = con->isopen;
4804 	if(!wasopen) {
4805 	    /* Documented behaviour */
4806 	    char mode[5];
4807 	    strcpy(mode, con->mode);
4808 	    strcpy(con->mode, "rb");
4809 	    if(!con->open(con)) error(_("cannot open the connection"));
4810 	    strcpy(con->mode, mode);
4811 	    /* Set up a context which will close the connection on error */
4812 	    begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
4813 			 R_NilValue, R_NilValue);
4814 	    cntxt.cend = &con_cleanup;
4815 	    cntxt.cenddata = con;
4816 	}
4817 	if(!con->canread) error(_("cannot read from this connection"));
4818     }
4819     if (mbcslocale && !utf8locale && !useBytes)
4820 	warning(_("can only read in bytes in a non-UTF-8 MBCS locale" ));
4821     PROTECT(ans = allocVector(STRSXP, n));
4822     if(!isRaw && con->text &&
4823        (con->buff || con->nPushBack >= 0 || con->inconv))
4824 
4825 	/* could be turned into runtime error */
4826 	warning(_("text connection used with %s(), results may be incorrect"),
4827 	          "readChar");
4828     for(i = 0, m = 0; i < n; i++) {
4829 	int len = INTEGER(nchars)[i];
4830 	if(len == NA_INTEGER || len < 0)
4831 	    error(_("invalid '%s' argument"), "nchars");
4832 	onechar = isRaw ? rawFixedString(bytes, len, nbytes, &np, useBytes)
4833 	    : readFixedString(con, len, useBytes, &warnOnNul);
4834 	if(onechar != R_NilValue) {
4835 	    SET_STRING_ELT(ans, i, onechar);
4836 	    m++;
4837 	} else break;
4838     }
4839 
4840     if(!wasopen) {endcontext(&cntxt); con->close(con);}
4841     if(m < n) {
4842 	PROTECT(ans = xlengthgets(ans, m));
4843 	UNPROTECT(1);
4844     }
4845     UNPROTECT(1);
4846     return ans;
4847 }
4848 
4849 /* writeChar(object, con, nchars, sep, useBytes) */
do_writechar(SEXP call,SEXP op,SEXP args,SEXP env)4850 SEXP attribute_hidden do_writechar(SEXP call, SEXP op, SEXP args, SEXP env)
4851 {
4852     SEXP object, nchars, sep, ans = R_NilValue, si;
4853     R_xlen_t i, n, len;
4854     int useBytes;
4855     size_t slen, tlen, lenb, lenc;
4856     char *buf;
4857     const char *s, *ssep = "";
4858     Rboolean wasopen = TRUE, usesep, isRaw = FALSE;
4859     Rconnection con = NULL;
4860     mbstate_t mb_st;
4861     RCNTXT cntxt;
4862 
4863     checkArity(op, args);
4864     object = CAR(args);
4865     if(TYPEOF(object) != STRSXP)
4866 	error(_("invalid '%s' argument"), "object");
4867     if(TYPEOF(CADR(args)) == RAWSXP) {
4868 	isRaw = TRUE;
4869     } else {
4870 	con = getConnection(asInteger(CADR(args)));
4871 	if(!con->canwrite)
4872 	    error(_("cannot write to this connection"));
4873 	wasopen = con->isopen;
4874     }
4875 
4876     /* We did as.integer in the wrapper */
4877     nchars = CADDR(args);
4878     sep = CADDDR(args);
4879     useBytes = asLogical(CAD4R(args));
4880     if(useBytes == NA_LOGICAL)
4881 	error(_("invalid '%s' argument"), "useBytes");
4882 
4883     if(isNull(sep)) {
4884 	usesep = FALSE;
4885 	slen = 0;
4886     } else {
4887 	usesep = TRUE;
4888 	if (!isString(sep) || LENGTH(sep) != 1)
4889 	    error(_("invalid '%s' argument"), "sep");
4890 	if(useBytes)
4891 	    ssep = CHAR(STRING_ELT(sep, 0));
4892 	else
4893 	    ssep = translateChar(STRING_ELT(sep, 0));
4894 	slen = strlen(ssep) + 1;
4895     }
4896     n = XLENGTH(nchars);
4897     if(XLENGTH(object) < n)
4898 	error(_("'object' is too short"));
4899     if(n == 0) {
4900 	if(isRaw) return allocVector(RAWSXP, 0); else return R_NilValue;
4901     }
4902 
4903     len = 0;
4904     if (!isRaw) {
4905 	for(i = 0; i < n; i++) {
4906 	    /* This is not currently needed, just future-proofing in case
4907 	       the logic gets changed */
4908 	    if(useBytes)
4909 		tlen = strlen(CHAR(STRING_ELT(object, i)));
4910 	    else
4911 		tlen = strlen(translateChar(STRING_ELT(object, i)));
4912 	    if (tlen > len) len = tlen;
4913 	    int tt = INTEGER(nchars)[i];
4914 	    if(tt == NA_INTEGER || tt < 0)
4915 		error(_("invalid '%s' argument"), "nchars");
4916 	    if (tt > len) len = tt;
4917 	}
4918 	buf = (char *) R_alloc(len + slen, sizeof(char));
4919     } else {
4920 	double dlen = 0;
4921 	for (i = 0; i < n; i++)
4922 	    dlen += (double)(INTEGER(nchars)[i] + slen);
4923 	if (dlen > R_XLEN_T_MAX)
4924 	    error("too much data for a raw vector on this platform");
4925 	len = (R_xlen_t) dlen;
4926 	PROTECT(ans = allocVector(RAWSXP, len));
4927 	buf = (char*) RAW(ans);
4928     }
4929 
4930     if(!wasopen) {
4931 	/* Documented behaviour */
4932 	char mode[5];
4933 	strcpy(mode, con->mode);
4934 	strcpy(con->mode, "wb");
4935 	if(!con->open(con)) error(_("cannot open the connection"));
4936 	strcpy(con->mode, mode);
4937 	/* Set up a context which will close the connection on error */
4938 	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
4939 		     R_NilValue, R_NilValue);
4940 	cntxt.cend = &con_cleanup;
4941 	cntxt.cenddata = con;
4942 	if(!con->canwrite) error(_("cannot write to this connection"));
4943     }
4944 
4945     if(!isRaw && con->text && con->outconv)
4946 	/* could be turned into runtime error */
4947 	warning(_("text connection used with %s(), results may be incorrect"),
4948 	          "writeChar");
4949 
4950     for(i = 0; i < n; i++) {
4951 	len = INTEGER(nchars)[i];
4952 	si = STRING_ELT(object, i);
4953 	if(strlen(CHAR(si)) < LENGTH(si)) {
4954 	    if(len > LENGTH(si)) {
4955 		warning(_("writeChar: more bytes requested than are in the string - will zero-pad"));
4956 	    }
4957 	    memset(buf, '\0', len + slen);
4958 	    memcpy(buf, CHAR(si), len);
4959 	    if (usesep) {
4960 		strcpy(buf + len, ssep);
4961 		len += slen;
4962 	    }
4963 	    if (!isRaw) {
4964 		size_t nwrite = con->write(buf, sizeof(char), len, con);
4965 		if(!nwrite) {
4966 		    warning(_("problem writing to connection"));
4967 		    break;
4968 		}
4969 	    } else
4970 		buf += len;
4971 	} else {
4972 	    if(useBytes)
4973 		s = CHAR(si);
4974 	    else
4975 		s = translateChar(si);
4976 	    lenb = lenc = strlen(s);
4977 	    if(mbcslocale) lenc = mbstowcs(NULL, s, 0);
4978 	    /* As from 1.8.1, zero-pad if too many chars are requested. */
4979 	    if(len > lenc) {
4980 		warning(_("writeChar: more characters requested than are in the string - will zero-pad"));
4981 		lenb += (len - lenc);
4982 	    }
4983 	    if(len < lenc) {
4984 		if(mbcslocale) {
4985 		    /* find out how many bytes we need to write */
4986 		    size_t i, used;
4987 		    const char *p = s;
4988 		    mbs_init(&mb_st);
4989 		    for(i = 0, lenb = 0; i < len; i++) {
4990 			used = Mbrtowc(NULL, p, R_MB_CUR_MAX, &mb_st);
4991 			p += used;
4992 			lenb += used;
4993 		    }
4994 		} else
4995 		    lenb = len;
4996 	    }
4997 	    memset(buf, '\0', lenb + slen);
4998 	    strncpy(buf, s, lenb);
4999 	    if (usesep) {
5000 		strcpy(buf + lenb, ssep);
5001 		lenb += slen;
5002 	    }
5003 	    if (!isRaw) {
5004 		size_t nwrite = con->write(buf, sizeof(char), lenb, con);
5005 		if(!nwrite) {
5006 		    warning(_("problem writing to connection"));
5007 		    break;
5008 		}
5009 	    } else
5010 		buf += lenb;
5011 	}
5012     }
5013     if(!wasopen) {
5014         endcontext(&cntxt);
5015         checkClose(con);
5016     }
5017     if(isRaw) {
5018 	UNPROTECT(1);
5019 	R_Visible = TRUE;
5020     } else {
5021 	ans = R_NilValue;
5022 	R_Visible = FALSE;
5023     }
5024     return ans;
5025 }
5026 
5027 /* ------------------- push back text  --------------------- */
5028 
5029 
5030 /* used in readLines and scan */
con_pushback(Rconnection con,Rboolean newLine,char * line)5031 void con_pushback(Rconnection con, Rboolean newLine, char *line)
5032 {
5033     int nexists = con->nPushBack;
5034     char **q;
5035 
5036     if (nexists == INT_MAX)
5037 	error(_("maximum number of pushback lines exceeded"));
5038     if(nexists > 0) {
5039 	q = (char **) realloc(con->PushBack, (nexists+1)*sizeof(char *));
5040     } else {
5041 	q = (char **) malloc(sizeof(char *));
5042     }
5043     if(!q) error(_("could not allocate space for pushback"));
5044     else con->PushBack = q;
5045     q += nexists;
5046     *q = (char *) malloc(strlen(line) + 1 + newLine);
5047     if(!(*q)) error(_("could not allocate space for pushback"));
5048     strcpy(*q, line);
5049     if(newLine) strcat(*q, "\n");
5050     q++;
5051     con->posPushBack = 0;
5052     con->nPushBack++;
5053 }
5054 
5055 
do_pushback(SEXP call,SEXP op,SEXP args,SEXP env)5056 SEXP attribute_hidden do_pushback(SEXP call, SEXP op, SEXP args, SEXP env)
5057 {
5058     int i, n, nexists, newLine, type;
5059     Rconnection con = NULL;
5060     SEXP stext;
5061     const char *p;
5062     char **q;
5063 
5064     checkArity(op, args);
5065 
5066     stext = CAR(args);
5067     if(!isString(stext))
5068 	error(_("invalid '%s' argument"), "data");
5069     con = getConnection(asInteger(CADR(args)));
5070     newLine = asLogical(CADDR(args));
5071     if(newLine == NA_LOGICAL)
5072 	error(_("invalid '%s' argument"), "newLine");
5073     type = asInteger(CADDDR(args));
5074     if(!con->canread && !con->isopen)
5075 	error(_("can only push back on open readable connections"));
5076     if(!con->text)
5077 	error(_("can only push back on text-mode connections"));
5078     nexists = con->nPushBack;
5079     if((n = LENGTH(stext)) > 0) {
5080 	if(nexists > 0)
5081 	    q = (char **) realloc(con->PushBack, (n+nexists)*sizeof(char *));
5082 	else
5083 	    q = (char **) malloc(n*sizeof(char *));
5084 	if(!q) error(_("could not allocate space for pushback"));
5085 	con->PushBack = q;
5086 	q += nexists;
5087 	for(i = 0; i < n; i++) {
5088 	    p = type == 1 ? translateChar(STRING_ELT(stext, n - i - 1))
5089 			  : ((type == 3) ? translateCharUTF8(STRING_ELT(stext, n - i - 1))
5090 					 : CHAR(STRING_ELT(stext, n - i - 1)));
5091 	    *q = (char *) malloc(strlen(p) + 1 + newLine);
5092 	    if(!(*q)) error(_("could not allocate space for pushback"));
5093 	    strcpy(*q, p);
5094 	    if(newLine) strcat(*q, "\n");
5095 	    q++;
5096 	}
5097 	con->posPushBack = 0;
5098 	con->nPushBack += n;
5099     }
5100     return R_NilValue;
5101 }
5102 
do_pushbacklength(SEXP call,SEXP op,SEXP args,SEXP env)5103 SEXP attribute_hidden do_pushbacklength(SEXP call, SEXP op, SEXP args, SEXP env)
5104 {
5105     Rconnection con = NULL;
5106 
5107     checkArity(op, args);
5108     con = getConnection(asInteger(CAR(args)));
5109     return ScalarInteger(con->nPushBack);
5110 }
5111 
do_clearpushback(SEXP call,SEXP op,SEXP args,SEXP env)5112 SEXP attribute_hidden do_clearpushback(SEXP call, SEXP op, SEXP args, SEXP env)
5113 {
5114     int j;
5115     Rconnection con = NULL;
5116 
5117     checkArity(op, args);
5118     con = getConnection(asInteger(CAR(args)));
5119 
5120     if(con->nPushBack > 0) { // so con_close1 has not been called
5121 	for(j = 0; j < con->nPushBack; j++) free(con->PushBack[j]);
5122 	free(con->PushBack);
5123 	con->nPushBack = 0;
5124     }
5125     return R_NilValue;
5126 }
5127 
5128 /* ------------------- sink functions  --------------------- */
5129 
5130 /* Switch output to connection number icon, or pop stack if icon < 0
5131  */
5132 
5133 static Rboolean
switch_or_tee_stdout(int icon,int closeOnExit,int tee)5134 switch_or_tee_stdout(int icon, int closeOnExit, int tee)
5135 {
5136     int toclose;
5137 
5138     if(icon == R_OutputCon) return FALSE;
5139 
5140     if(icon >= 0 && R_SinkNumber >= NSINKS - 1)
5141 	error(_("sink stack is full"));
5142 
5143     if(icon == 0)
5144 	error(_("cannot switch output to stdin"));
5145     else if(icon == 1 || icon == 2) {
5146 	R_OutputCon = SinkCons[++R_SinkNumber] = icon;
5147 	R_SinkSplit[R_SinkNumber] = tee;
5148 	SinkConsClose[R_SinkNumber] = 0;
5149     } else if(icon >= 3) {
5150 	Rconnection con = getConnection(icon); /* checks validity */
5151 	toclose = 2*closeOnExit;
5152 	if(!con->isopen) {
5153 	    char mode[5];
5154 	    strcpy(mode, con->mode);
5155 	    strcpy(con->mode, "wt");
5156 	    if(!con->open(con)) error(_("cannot open the connection"));
5157 	    strcpy(con->mode, mode);
5158 	    if(!con->canwrite) {
5159 		con->close(con);
5160 		error(_("cannot write to this connection"));
5161 	    }
5162 	    toclose = 1;
5163 	} else if(!con->canwrite)
5164 	    error(_("cannot write to this connection"));
5165 	R_OutputCon = SinkCons[++R_SinkNumber] = icon;
5166 	SinkConsClose[R_SinkNumber] = toclose;
5167 	R_SinkSplit[R_SinkNumber] = tee;
5168 	R_PreserveObject(con->ex_ptr);
5169    } else { /* removing a sink */
5170 	if (R_SinkNumber <= 0) {
5171 	    warning(_("no sink to remove"));
5172 	    return FALSE;
5173 	} else {
5174 	    R_OutputCon = SinkCons[--R_SinkNumber];
5175 	    if((icon = SinkCons[R_SinkNumber + 1]) >= 3) {
5176 		Rconnection con = getConnection(icon);
5177 		R_ReleaseObject(con->ex_ptr);
5178 		if(SinkConsClose[R_SinkNumber + 1] == 1) { /* close it */
5179 		    checkClose(con);
5180 		} else if (SinkConsClose[R_SinkNumber + 1] == 2) /* destroy it */
5181 		    con_destroy(icon);
5182 	    }
5183 	}
5184     }
5185     return TRUE;
5186 }
5187 
5188 /* This is only used by cat() */
switch_stdout(int icon,int closeOnExit)5189 Rboolean attribute_hidden switch_stdout(int icon, int closeOnExit)
5190 {
5191   return switch_or_tee_stdout(icon, closeOnExit, 0);
5192 }
5193 
do_sink(SEXP call,SEXP op,SEXP args,SEXP rho)5194 SEXP attribute_hidden do_sink(SEXP call, SEXP op, SEXP args, SEXP rho)
5195 {
5196   int icon, closeOnExit, errcon, tee;
5197 
5198     checkArity(op, args);
5199     icon = asInteger(CAR(args));
5200     closeOnExit = asLogical(CADR(args));
5201     if(closeOnExit == NA_LOGICAL)
5202 	error(_("invalid '%s' argument"), "closeOnExit");
5203     errcon = asLogical(CADDR(args));
5204     if(errcon == NA_LOGICAL) error(_("invalid '%s' argument"), "type");
5205     tee = asLogical(CADDDR(args));
5206     if(tee == NA_LOGICAL) error(_("invalid '%s' argument"), "split");
5207 
5208     if(!errcon) {
5209 	/* allow space for cat() to use sink() */
5210 	if(icon >= 0 && R_SinkNumber >= NSINKS - 2)
5211 	    error(_("sink stack is full"));
5212 	switch_or_tee_stdout(icon, closeOnExit, tee);
5213     } else {
5214 	if(icon < 0) {
5215 	    R_ReleaseObject(getConnection(R_ErrorCon)->ex_ptr);
5216 	    R_ErrorCon = 2;
5217 	} else {
5218 	    getConnection(icon); /* check validity */
5219 	    R_ErrorCon = icon;
5220 	    R_PreserveObject(getConnection(icon)->ex_ptr);
5221 	}
5222     }
5223 
5224     return R_NilValue;
5225 }
5226 
do_sinknumber(SEXP call,SEXP op,SEXP args,SEXP rho)5227 SEXP attribute_hidden do_sinknumber(SEXP call, SEXP op, SEXP args, SEXP rho)
5228 {
5229     int errcon;
5230     checkArity(op, args);
5231 
5232     errcon = asLogical(CAR(args));
5233     if(errcon == NA_LOGICAL)
5234 	error(_("invalid '%s' argument"), "type");
5235     return ScalarInteger(errcon ? R_SinkNumber : R_ErrorCon);
5236 }
5237 
5238 #ifdef Win32
WinCheckUTF8(void)5239 void WinCheckUTF8(void)
5240 {
5241     if(EmitEmbeddedUTF8) /* RGui */
5242 	WinUTF8out = (SinkCons[R_SinkNumber] == 1 ||
5243 	              SinkCons[R_SinkNumber] == 2) && localeCP != 65001;
5244     else
5245 	WinUTF8out = FALSE;
5246 }
5247 #endif
5248 
5249 /* ------------------- admin functions  --------------------- */
5250 
InitConnections()5251 void attribute_hidden InitConnections()
5252 {
5253     int i;
5254     Connections[0] = newterminal("stdin", "r");
5255     Connections[0]->fgetc = stdin_fgetc;
5256     Connections[1] = newterminal("stdout", "w");
5257     Connections[1]->vfprintf = stdout_vfprintf;
5258     Connections[1]->fflush = stdout_fflush;
5259     Connections[2] = newterminal("stderr", "w");
5260     Connections[2]->vfprintf = stderr_vfprintf;
5261     Connections[2]->fflush = stderr_fflush;
5262     for(i = 3; i < NCONNECTIONS; i++) Connections[i] = NULL;
5263     R_OutputCon = 1;
5264     R_SinkNumber = 0;
5265     SinkCons[0] = 1; R_ErrorCon = 2;
5266 }
5267 
5268 SEXP attribute_hidden
do_getallconnections(SEXP call,SEXP op,SEXP args,SEXP env)5269 do_getallconnections(SEXP call, SEXP op, SEXP args, SEXP env)
5270 {
5271     int i, j=0, n=0;
5272     SEXP ans;
5273     checkArity(op, args);
5274     for(i = 0; i < NCONNECTIONS; i++)
5275 	if(Connections[i]) n++;
5276     PROTECT(ans = allocVector(INTSXP, n));
5277     for(i = 0; i < NCONNECTIONS; i++)
5278 	if(Connections[i])
5279 	    INTEGER(ans)[j++] = i;
5280     UNPROTECT(1);
5281     return ans;
5282 }
5283 
5284 SEXP attribute_hidden
do_getconnection(SEXP call,SEXP op,SEXP args,SEXP env)5285 do_getconnection(SEXP call, SEXP op, SEXP args, SEXP env)
5286 {
5287     SEXP ans, class;
5288     int what;
5289     Rconnection con;
5290 
5291     checkArity(op, args);
5292     what = asInteger(CAR(args));
5293     if (what == NA_INTEGER)
5294 	error(_("there is no connection NA"));
5295     if (what < 0 || what >= NCONNECTIONS || !Connections[what])
5296 	error(_("there is no connection %d"), what);
5297 
5298     con = Connections[what];
5299     PROTECT(ans = ScalarInteger(what));
5300     PROTECT(class = allocVector(STRSXP, 2));
5301     SET_STRING_ELT(class, 0, mkChar(con->class));
5302     SET_STRING_ELT(class, 1, mkChar("connection"));
5303     classgets(ans, class);
5304     if (what > 2)
5305 	setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
5306     UNPROTECT(2);
5307     return ans;
5308 }
5309 
do_sumconnection(SEXP call,SEXP op,SEXP args,SEXP env)5310 SEXP attribute_hidden do_sumconnection(SEXP call, SEXP op, SEXP args, SEXP env)
5311 {
5312     SEXP ans, names, tmp;
5313     Rconnection Rcon;
5314 
5315     checkArity(op, args);
5316     Rcon = getConnection(asInteger(CAR(args)));
5317     PROTECT(ans = allocVector(VECSXP, 7));
5318     PROTECT(names = allocVector(STRSXP, 7));
5319     SET_STRING_ELT(names, 0, mkChar("description"));
5320     PROTECT(tmp = allocVector(STRSXP, 1));
5321     if(Rcon->enc == CE_UTF8)
5322 	SET_STRING_ELT(tmp, 0, mkCharCE(Rcon->description, CE_UTF8));
5323     else
5324 	SET_STRING_ELT(tmp, 0, mkChar(Rcon->description));
5325     SET_VECTOR_ELT(ans, 0, tmp);
5326     SET_STRING_ELT(names, 1, mkChar("class"));
5327     SET_VECTOR_ELT(ans, 1, mkString(Rcon->class));
5328     SET_STRING_ELT(names, 2, mkChar("mode"));
5329     SET_VECTOR_ELT(ans, 2, mkString(Rcon->mode));
5330     SET_STRING_ELT(names, 3, mkChar("text"));
5331     SET_VECTOR_ELT(ans, 3, mkString(Rcon->text? "text":"binary"));
5332     SET_STRING_ELT(names, 4, mkChar("opened"));
5333     SET_VECTOR_ELT(ans, 4, mkString(Rcon->isopen? "opened":"closed"));
5334     SET_STRING_ELT(names, 5, mkChar("can read"));
5335     SET_VECTOR_ELT(ans, 5, mkString(Rcon->canread? "yes":"no"));
5336     SET_STRING_ELT(names, 6, mkChar("can write"));
5337     SET_VECTOR_ELT(ans, 6, mkString(Rcon->canwrite? "yes":"no"));
5338     setAttrib(ans, R_NamesSymbol, names);
5339     UNPROTECT(3);
5340     return ans;
5341 }
5342 
5343 
5344 #if defined(USE_WININET_ASYNC) && !defined(USE_WININET)
5345 # define USE_WININET 2
5346 #endif
5347 
5348 // in internet module: 'type' is unused
5349 extern Rconnection
5350 R_newCurlUrl(const char *description, const char * const mode, SEXP headers, int type);
5351 
5352 
5353 /* op = 0: .Internal( url(description, open, blocking, encoding, method, headers))
5354    op = 1: .Internal(file(description, open, blocking, encoding, method, raw))
5355 */
do_url(SEXP call,SEXP op,SEXP args,SEXP env)5356 SEXP attribute_hidden do_url(SEXP call, SEXP op, SEXP args, SEXP env)
5357 {
5358     SEXP scmd, sopen, ans, class, enc, headers = R_NilValue,
5359 	headers_flat = R_NilValue;
5360     char *class2 = "url";
5361     const char *url, *open;
5362     int ncon, block, raw = 0, defmeth,
5363 	meth = 0, // 0: "default" | "internal" | "wininet", 1: "libcurl"
5364 	winmeth;  // 0: "internal", 1: "wininet" (Windows only)
5365     cetype_t ienc = CE_NATIVE;
5366     Rconnection con = NULL;
5367 
5368     checkArity(op, args);
5369     // --------- description
5370     scmd = CAR(args);
5371     if(!isString(scmd) || LENGTH(scmd) != 1 ||
5372        STRING_ELT(scmd, 0) == NA_STRING)
5373 	error(_("invalid '%s' argument"), "description");
5374     if(LENGTH(scmd) > 1)
5375 	warning(_("only first element of 'description' argument used"));
5376 #ifdef Win32
5377     winmeth = 1;
5378     if(PRIMVAL(op) == 1 && !IS_ASCII(STRING_ELT(scmd, 0)) ) { // file(<non-ASCII>, *)
5379 	ienc = CE_UTF8;
5380 	url = trCharUTF8(STRING_ELT(scmd, 0));
5381     } else {
5382 	ienc = getCharCE(STRING_ELT(scmd, 0));
5383 	if(ienc == CE_UTF8)
5384 	    url = CHAR(STRING_ELT(scmd, 0));
5385 	else
5386 	    url = translateCharFP(STRING_ELT(scmd, 0));
5387     }
5388 #else
5389     winmeth = 0;
5390     url = translateCharFP(STRING_ELT(scmd, 0));
5391 #endif
5392 
5393     UrlScheme type = HTTPsh;	/* -Wall */
5394     Rboolean inet = TRUE;
5395     if (strncmp(url, "http://", 7) == 0)
5396 	type = HTTPsh;
5397     else if (strncmp(url, "ftp://", 6) == 0)
5398 	type = FTPsh;
5399     else if (strncmp(url, "https://", 8) == 0)
5400 	type = HTTPSsh;
5401     // ftps:// is available via most libcurl, only
5402     // The internal and wininet methods will create a connection
5403     // but refuse to open it so as from R 3.2.0 we switch to libcurl
5404     else if (strncmp(url, "ftps://", 7) == 0)
5405 	type = FTPSsh;
5406     else
5407 	inet = FALSE; // file:// URL or a file path
5408 
5409     // --------- open
5410     sopen = CADR(args);
5411     if(!isString(sopen) || LENGTH(sopen) != 1)
5412 	error(_("invalid '%s' argument"), "open");
5413     open = CHAR(STRING_ELT(sopen, 0)); /* ASCII */
5414     // --------- blocking
5415     block = asLogical(CADDR(args));
5416     if(block == NA_LOGICAL)
5417 	error(_("invalid '%s' argument"), "blocking");
5418     // --------- encoding
5419     enc = CADDDR(args);
5420     if(!isString(enc) || LENGTH(enc) != 1 ||
5421        strlen(CHAR(STRING_ELT(enc, 0))) > 100) /* ASCII */
5422 	error(_("invalid '%s' argument"), "encoding");
5423 
5424     // --------- method
5425     const char *cmeth = CHAR(asChar(CAD4R(args)));
5426     meth = streql(cmeth, "libcurl"); // 1 if "libcurl", else 0
5427     defmeth = streql(cmeth, "default");
5428 #ifndef Win32
5429     if(defmeth) meth = 1;
5430 #endif
5431     if (streql(cmeth, "wininet")) {
5432 #ifdef Win32
5433 	winmeth = 1;  // it already was as this is the default
5434 #else
5435 	error(_("method = \"wininet\" is only supported on Windows"));
5436 #endif
5437     }
5438 #ifdef Win32
5439     else if (streql(cmeth, "internal")) winmeth = 0;
5440 #endif
5441 
5442     // --------- raw, for file() only
5443     if(PRIMVAL(op) == 1) {
5444 	raw = asLogical(CAD4R(CDR(args)));
5445 	if(raw == NA_LOGICAL)
5446 	    error(_("invalid '%s' argument"), "raw");
5447     }
5448 
5449     // --------- headers, for url() only
5450     if(PRIMVAL(op) == 0) {
5451 	SEXP lheaders = CAD4R(CDR(args));
5452 	if (!isNull(lheaders)) {
5453 	    headers = VECTOR_ELT(lheaders, 0);
5454 	    headers_flat = VECTOR_ELT(lheaders, 1);
5455 	}
5456     }
5457 
5458     if(!meth) {
5459 	if (strncmp(url, "ftps://", 7) == 0) {
5460 #ifdef HAVE_LIBCURL
5461 	    if (defmeth) meth = 1; else
5462 #endif
5463 		error("ftps:// URLs are not supported by this method");
5464 	}
5465 
5466 #if defined(Win32) && defined(HAVE_LIBCURL)
5467 	if (strncmp(url, "ftp://", 8) == 0 && defmeth) meth = 1;
5468 #endif
5469 
5470 #ifdef Win32
5471 	if (!winmeth && strncmp(url, "https://", 8) == 0) {
5472 # ifdef HAVE_LIBCURL
5473 	    if (defmeth) meth = 1; else
5474 # endif
5475 		error("https:// URLs are not supported by this method");
5476 	}
5477 #else // Unix
5478 	if (strncmp(url, "https://", 8) == 0) {
5479 	    // We check the libcurl build does support https as from R 3.3.0
5480 	    if (defmeth) meth = 1; else
5481 		error("https:// URLs are not supported by the \"internal\" method");
5482 	}
5483 #endif
5484     }
5485 
5486     ncon = NextConnection();
5487     if(strncmp(url, "file://", 7) == 0) {
5488 	int nh = 7;
5489 #ifdef Win32
5490 	/* on Windows we have file:///d:/path/to
5491 	   whereas on Unix it is file:///path/to */
5492 	if (strlen(url) > 9 && url[7] == '/' && url[9] == ':') nh = 8;
5493 #endif
5494 	con = newfile(url + nh, ienc, strlen(open) ? open : "r", raw);
5495 	class2 = "file";
5496     } else if (inet) {
5497 	if(meth) {
5498 # ifdef HAVE_LIBCURL
5499 	    con = R_newCurlUrl(url, strlen(open) ? open : "r", headers, 0);
5500 # else
5501 	    error("url(method = \"libcurl\") is not supported on this platform");
5502 # endif
5503 	} else {
5504 	    con = R_newurl(url, strlen(open) ? open : "r", headers_flat, winmeth);
5505 	    ((Rurlconn)con->private)->type = type;
5506 	}
5507     } else {
5508 	if(PRIMVAL(op) == 1) { /* call to file() */
5509 	    if(strlen(url) == 0) {
5510 		if(!strlen(open)) open ="w+";
5511 		if(strcmp(open, "w+") != 0 && strcmp(open, "w+b") != 0) {
5512 		    open ="w+";
5513 		    warning(_("file(\"\") only supports open = \"w+\" and open = \"w+b\": using the former"));
5514 		}
5515 	    }
5516 	    if(strcmp(url, "clipboard") == 0 ||
5517 #ifdef Win32
5518 	       strncmp(url, "clipboard-", 10) == 0
5519 #else
5520 	       strcmp(url, "X11_primary") == 0
5521 	       || strcmp(url, "X11_secondary") == 0
5522 	       || strcmp(url, "X11_clipboard") == 0
5523 #endif
5524 		)
5525 		con = newclp(url, strlen(open) ? open : "r");
5526 	    else {
5527 		const char *efn = R_ExpandFileName(url);
5528 #ifndef Win32
5529 		if (!raw) {
5530 		    struct stat sb;
5531 		    int res = stat(efn, &sb);
5532 		    if (!res && (sb.st_mode & S_IFIFO)) {
5533 			raw = TRUE;
5534 			warning(_("using 'raw = TRUE' because '%s' is a fifo or pipe"),
5535 				url);
5536 		    } else if (!res && !(sb.st_mode & S_IFREG) &&
5537 			       strcmp(efn, "/dev/null"))
5538 			/* not setting 'raw' to FALSE because character devices may be
5539 			   seekable; unfortunately there is no reliable way to detect
5540 			   that without changing the device state */
5541 			warning(_("'raw = FALSE' but '%s' is not a regular file"),
5542 			        url);
5543 		}
5544 #endif
5545 		if (!raw &&
5546 		    (!strlen(open) || streql(open, "r") || streql(open, "rt"))) {
5547 		    /* check if this is a compressed file */
5548 		    FILE *fp = fopen(efn, "rb");
5549 		    char buf[7];
5550 		    int ztype = -1, subtype = 0, compress = 0;
5551 		    if (fp) {
5552 			memset(buf, 0, 7);
5553 			size_t res = fread(buf, 5, 1, fp);
5554 			fclose(fp);
5555 			if(res == 1) {
5556 			    if(buf[0] == '\x1f' && buf[1] == '\x8b') ztype = 0;
5557 			    if(!strncmp(buf, "BZh", 3)) ztype = 1;
5558 			    if((buf[0] == '\xFD') && !strncmp(buf+1, "7zXZ", 4))
5559 				ztype = 2;
5560 			    if((buf[0] == '\xFF') && !strncmp(buf+1, "LZMA", 4))
5561 			    { ztype = 2; subtype = 1;}
5562 			    if(!memcmp(buf, "]\0\0\200\0", 5))
5563 			    { ztype = 2; subtype = 1;}
5564 			}
5565 		    }
5566 		    switch(ztype) {
5567 		    case -1:
5568 			con = newfile(url, ienc, strlen(open) ? open : "r", raw);
5569 			break;
5570 		    case 0:
5571 			con = newgzfile(url, strlen(open) ? open : "rt", compress);
5572 			break;
5573 		    case 1:
5574 			con = newbzfile(url, strlen(open) ? open : "rt", compress);
5575 			break;
5576 		    case 2:
5577 			con = newxzfile(url, strlen(open) ? open : "rt", subtype, compress);
5578 			break;
5579 		    }
5580 		} else
5581 		    con = newfile(url, ienc, strlen(open) ? open : "r", raw);
5582 	    }
5583 	    class2 = "file";
5584 	} else { // url()
5585 	    error(_("URL scheme unsupported by this method"));
5586 	}
5587     }
5588 
5589     Connections[ncon] = con;
5590     con->blocking = block;
5591     strncpy(con->encname, CHAR(STRING_ELT(enc, 0)), 100); /* ASCII */
5592     con->encname[100 - 1] = '\0';
5593 
5594     /* only text-mode connections are affected, but we can't tell that
5595        until the connection is opened, and why set an encoding on a
5596        connection intended to be used in binary mode? */
5597     if (con->encname[0] && !streql(con->encname, "native.enc"))
5598 	con->canseek = 0;
5599     /* This is referenced in do_getconnection, so set up before
5600        any warning */
5601     con->ex_ptr = PROTECT(R_MakeExternalPtr(con->id, install("connection"),
5602 					    R_NilValue));
5603 
5604     /* open it if desired */
5605     if(strlen(open)) {
5606 	Rboolean success = con->open(con);
5607 	if(!success) {
5608 	    con_destroy(ncon);
5609 	    error(_("cannot open the connection"));
5610 	}
5611     }
5612 
5613     PROTECT(ans = ScalarInteger(ncon));
5614     PROTECT(class = allocVector(STRSXP, 2));
5615     SET_STRING_ELT(class, 0, mkChar(class2));
5616     SET_STRING_ELT(class, 1, mkChar("connection"));
5617     classgets(ans, class);
5618     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
5619     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
5620     UNPROTECT(3);
5621 
5622     return ans;
5623 }
5624 
R_WriteConnection(Rconnection con,void * buf,size_t n)5625 size_t R_WriteConnection(Rconnection con, void *buf, size_t n)
5626 {
5627     if(!con->isopen) error(_("connection is not open"));
5628     if(!con->canwrite) error(_("cannot write to this connection"));
5629 
5630     return con->write(buf, 1, n, con);
5631 }
5632 
R_ReadConnection(Rconnection con,void * buf,size_t n)5633 size_t R_ReadConnection(Rconnection con, void *buf, size_t n)
5634 {
5635     if(!con->isopen) error(_("connection is not open"));
5636     if(!con->canread) error(_("cannot read from this connection"));
5637 
5638     return con->read(buf, 1, n, con);
5639 }
5640 
R_GetConnection(SEXP sConn)5641 Rconnection R_GetConnection(SEXP sConn) {
5642     if (!inherits(sConn, "connection")) error(_("invalid connection"));
5643     return getConnection(asInteger(sConn));
5644 }
5645 
5646 /* ------------------- (de)compression functions  --------------------- */
5647 
5648 /* Code for gzcon connections is modelled on gzio.c from zlib 1.2.3 */
5649 
5650 #define get_byte() (icon->read(&ccc, 1, 1, icon), ccc)
5651 
gzcon_open(Rconnection con)5652 static Rboolean gzcon_open(Rconnection con)
5653 {
5654     Rgzconn priv = con->private;
5655     Rconnection icon = priv->con;
5656 
5657     if(!icon->isopen && !icon->open(icon)) return FALSE;
5658     con->isopen = TRUE;
5659     con->canwrite = icon->canwrite;
5660     con->canread = !con->canwrite;
5661     con->save = -1000;
5662 
5663     priv->s.zalloc = (alloc_func)0;
5664     priv->s.zfree = (free_func)0;
5665     priv->s.opaque = (voidpf)0;
5666     priv->s.next_in = Z_NULL;
5667     priv->s.next_out = Z_NULL;
5668     priv->s.avail_in = priv->s.avail_out = 0;
5669     priv->z_err = Z_OK;
5670     priv->z_eof = 0;
5671     priv->crc = crc32(0L, Z_NULL, 0);
5672 
5673     if(con->canread) {
5674 	/* read header */
5675 	char c, ccc, method, flags, dummy[6];
5676 	unsigned char head[2];
5677 	uInt len;
5678 
5679 	icon->read(head, 1, 2, icon);
5680 	if(head[0] != gz_magic[0] || head[1] != gz_magic[1]) {
5681 	    if(!priv->allow) {
5682 		warning(_("file stream does not have gzip magic number"));
5683 		return FALSE;
5684 	    }
5685 	    priv->nsaved = 2;
5686 	    priv->saved[0] = head[0];
5687 	    priv->saved[1] = head[1];
5688 	    return TRUE;
5689 	}
5690 	icon->read(&method, 1, 1, icon);
5691 	icon->read(&flags, 1, 1, icon);
5692 	if (method != Z_DEFLATED || (flags & RESERVED) != 0) {
5693 	    warning(_("file stream does not have valid gzip header"));
5694 	    return FALSE;
5695 	}
5696 	icon->read(dummy, 1, 6, icon);
5697 	if ((flags & EXTRA_FIELD) != 0) { /* skip the extra field */
5698 	    len  =  (uInt) get_byte();
5699 	    len += ((uInt) get_byte()) << 8;
5700 	    /* len is garbage if EOF but the loop below will quit anyway */
5701 	    while (len-- != 0 && get_byte() != EOF) ;
5702 	}
5703 	if ((flags & ORIG_NAME) != 0) { /* skip the original file name */
5704 	    while ((c = get_byte()) != 0 && c != EOF) ;
5705 	}
5706 	if ((flags & COMMENT) != 0) {   /* skip the .gz file comment */
5707 	    while ((c = get_byte()) != 0 && c != EOF) ;
5708 	}
5709 	if ((flags & HEAD_CRC) != 0) {  /* skip the header crc */
5710 	    for (len = 0; len < 2; len++) (void) get_byte();
5711 	}
5712 	priv->s.next_in  = priv->buffer;
5713 	inflateInit2(&(priv->s), -MAX_WBITS);
5714     } else {
5715 	/* write a header */
5716 	char head[11];
5717 	snprintf(head, 11, "%c%c%c%c%c%c%c%c%c%c", gz_magic[0], gz_magic[1],
5718 		Z_DEFLATED, 0 /*flags*/, 0,0,0,0 /*time*/, 0 /*xflags*/,
5719 		OS_CODE);
5720 	icon->write(head, 1, 10, icon);
5721 	deflateInit2(&(priv->s), priv->cp, Z_DEFLATED, -MAX_WBITS,
5722 		     8, Z_DEFAULT_STRATEGY);
5723 	priv->s.next_out = priv->buffer;
5724 	priv->s.avail_out = Z_BUFSIZE;
5725     }
5726 
5727     return TRUE;
5728 }
5729 
putLong(Rconnection con,uLong x)5730 static void putLong(Rconnection con, uLong x)
5731 {
5732     int n;
5733     unsigned char buf[4];
5734 
5735     for (n = 0; n < 4; n++) {
5736 	buf[n] = (x & 0xff);
5737 	x >>= 8;
5738     }
5739     con->write(&buf, 4, 1, con);
5740 }
5741 
5742 
gzcon_close(Rconnection con)5743 static void gzcon_close(Rconnection con)
5744 {
5745     Rgzconn priv = con->private;
5746     Rconnection icon = priv->con;
5747 
5748     if(icon->canwrite) {
5749 	uInt len;
5750 	int done = 0;
5751 	priv->s.avail_in = 0; /* should be zero already anyway */
5752 	for (;;) {
5753 	    len = Z_BUFSIZE - priv->s.avail_out;
5754 
5755 	    if (len != 0) {
5756 		if (icon->write(priv->buffer, 1, len, icon) != len) {
5757 		    priv->z_err = Z_ERRNO;
5758 		    error(_("writing error whilst flushing 'gzcon' connection"));
5759 		}
5760 		priv->s.next_out = priv->buffer;
5761 		priv->s.avail_out = Z_BUFSIZE;
5762 	    }
5763 	    if (done) break;
5764 	    priv->z_err = deflate(&(priv->s), Z_FINISH);
5765 
5766 	    /* deflate has finished flushing only when it hasn't used up
5767 	     * all the available space in the output buffer:
5768 	     */
5769 	    done = (priv->s.avail_out != 0 || priv->z_err == Z_STREAM_END);
5770 
5771 	    if (priv->z_err != Z_OK && priv->z_err != Z_STREAM_END) break;
5772 	}
5773 	deflateEnd(&(priv->s));
5774 	/* NB: these must be little-endian */
5775 	putLong(icon, priv->crc);
5776 	putLong(icon, (uLong) (priv->s.total_in & 0xffffffff));
5777     } else inflateEnd(&(priv->s));
5778 
5779     if(icon->isopen) icon->close(icon);
5780     con->isopen = FALSE;
5781 }
5782 
gzcon_byte(Rgzconn priv)5783 static int gzcon_byte(Rgzconn priv)
5784 {
5785     Rconnection icon = priv->con;
5786 
5787     if (priv->z_eof) return EOF;
5788     if (priv->s.avail_in == 0) {
5789 	priv->s.avail_in = (uInt) icon->read(priv->buffer, 1, Z_BUFSIZE, icon);
5790 	if (priv->s.avail_in == 0) {
5791 	    priv->z_eof = 1;
5792 	    return EOF;
5793 	}
5794 	priv->s.next_in = priv->buffer;
5795     }
5796     priv->s.avail_in--;
5797     return *(priv->s.next_in)++;
5798 }
5799 
5800 
gzcon_read(void * ptr,size_t size,size_t nitems,Rconnection con)5801 static size_t gzcon_read(void *ptr, size_t size, size_t nitems,
5802 			 Rconnection con)
5803 {
5804     Rgzconn priv = con->private;
5805     Rconnection icon = priv->con;
5806     Bytef *start = (Bytef*) ptr;
5807     uLong crc;
5808     int n;
5809 
5810     if (priv->z_err == Z_STREAM_END) return 0;  /* EOF */
5811 
5812     /* wrapped connection only needs to handle INT_MAX */
5813     if ((double) size * (double) nitems > INT_MAX)
5814 	error(_("too large a block specified"));
5815     if (priv->nsaved >= 0) { /* non-compressed mode */
5816 	size_t len = size*nitems;
5817 	int i, nsaved = priv->nsaved;
5818 	if (len == 0) return 0;
5819 	if (len >= 2) {
5820 	    for(i = 0; i < priv->nsaved; i++)
5821 		((char *)ptr)[i] = priv->saved[i];
5822 	    priv->nsaved = 0;
5823 	    return (nsaved + icon->read((char *) ptr+nsaved, 1, len - nsaved,
5824 					icon))/size;
5825 	}
5826 	if (len == 1) { /* size must be one */
5827 	    if (nsaved > 0) {
5828 		((char *) ptr)[0] = priv->saved[0];
5829 		priv->saved[0] = priv->saved[1];
5830 		priv->nsaved--;
5831 		return 1;
5832 	    } else
5833 		return icon->read(ptr, 1, 1, icon);
5834 	}
5835     }
5836 
5837     priv->s.next_out = (Bytef*) ptr;
5838     priv->s.avail_out = (uInt)(size*nitems);
5839 
5840     while (priv->s.avail_out != 0) {
5841 	if (priv->s.avail_in == 0 && !priv->z_eof) {
5842 	    priv->s.avail_in = (uInt)icon->read(priv->buffer, 1, Z_BUFSIZE, icon);
5843 	    if (priv->s.avail_in == 0) priv->z_eof = 1;
5844 	    priv->s.next_in = priv->buffer;
5845 	}
5846 	priv->z_err = inflate(&(priv->s), Z_NO_FLUSH);
5847 
5848 	if (priv->z_err == Z_STREAM_END) {
5849 	    /* Check CRC */
5850 	    priv->crc = crc32(priv->crc, start,
5851 			      (uInt)(priv->s.next_out - start));
5852 	    start = priv->s.next_out;
5853 	    crc = 0;
5854 	    for (n = 0; n < 4; n++) {
5855 		crc >>= 8;
5856 		crc += ((uLong) gzcon_byte(priv) << 24);
5857 	    }
5858 	    if (crc != priv->crc) {
5859 		priv->z_err = Z_DATA_ERROR;
5860 		REprintf(_("crc error %x %x\n"), crc, priv->crc);
5861 	    }
5862 	    /* finally, get (and ignore) length */
5863 	    for (n = 0; n < 4; n++) gzcon_byte(priv);
5864 	}
5865 	if (priv->z_err != Z_OK || priv->z_eof) break;
5866     }
5867     priv->crc = crc32(priv->crc, start, (uInt)(priv->s.next_out - start));
5868     return (size_t)(size*nitems - priv->s.avail_out)/size;
5869 }
5870 
gzcon_write(const void * ptr,size_t size,size_t nitems,Rconnection con)5871 static size_t gzcon_write(const void *ptr, size_t size, size_t nitems,
5872 			  Rconnection con)
5873 {
5874     Rgzconn priv = con->private;
5875     Rconnection icon = priv->con;
5876 
5877     if ((double) size * (double) nitems > INT_MAX)
5878 	error(_("too large a block specified"));
5879     priv->s.next_in = (Bytef*) ptr;
5880     priv->s.avail_in = (uInt)(size*nitems);
5881 
5882     while (priv->s.avail_in != 0) {
5883 	if (priv->s.avail_out == 0) {
5884 	    priv->s.next_out = priv->buffer;
5885 	    if (icon->write(priv->buffer, 1, Z_BUFSIZE, icon) != Z_BUFSIZE) {
5886 		priv->z_err = Z_ERRNO;
5887 		warning(_("write error on 'gzcon' connection"));
5888 		break;
5889 	    }
5890 	    priv->s.avail_out = Z_BUFSIZE;
5891 	}
5892 	priv->z_err = deflate(&(priv->s), Z_NO_FLUSH);
5893 	if (priv->z_err != Z_OK) break;
5894     }
5895     priv->crc = crc32(priv->crc, (const Bytef *) ptr, (uInt)(size*nitems));
5896     return (size_t)(size*nitems - priv->s.avail_in)/size;
5897 }
5898 
gzcon_fgetc(Rconnection con)5899 static int gzcon_fgetc(Rconnection con)
5900 {
5901     unsigned char c;
5902     size_t n = gzcon_read(&c, 1, 1, con);
5903     return (n == 1) ? c : R_EOF;
5904 }
5905 
5906 
5907 /* gzcon(con, level, allowNonCompressed) */
do_gzcon(SEXP call,SEXP op,SEXP args,SEXP rho)5908 SEXP attribute_hidden do_gzcon(SEXP call, SEXP op, SEXP args, SEXP rho)
5909 {
5910     SEXP ans, class;
5911     int icon, level, allow;
5912     Rconnection incon = NULL, new = NULL;
5913     char *m, *mode = NULL /* -Wall */,  description[1000];
5914     Rboolean text;
5915 
5916     checkArity(op, args);
5917     if(!inherits(CAR(args), "connection"))
5918 	error(_("'con' is not a connection"));
5919     incon = getConnection(icon = asInteger(CAR(args)));
5920     level = asInteger(CADR(args));
5921     if(level == NA_INTEGER || level < 0 || level > 9)
5922 	error(_("'level' must be one of 0 ... 9"));
5923     allow = asLogical(CADDR(args));
5924     if(allow == NA_INTEGER)
5925 	error(_("'allowNonCompression' must be TRUE or FALSE"));
5926     text = asLogical(CADDDR(args));
5927     if(text == NA_INTEGER)
5928         error(_("'text' must be TRUE or FALSE"));
5929 
5930     if(incon->isGzcon) {
5931 	warning(_("this is already a 'gzcon' connection"));
5932 	return CAR(args);
5933     }
5934     m = incon->mode;
5935     if(strcmp(m, "r") == 0 || strncmp(m, "rb", 2) == 0) mode = "rb";
5936     else if (strcmp(m, "w") == 0 || strncmp(m, "wb", 2) == 0) mode = "wb";
5937     else error(_("can only use read- or write- binary connections"));
5938     if(strcmp(incon->class, "file") == 0 &&
5939        (strcmp(m, "r") == 0 || strcmp(m, "w") == 0))
5940 	warning(_("using a text-mode 'file' connection may not work correctly"));
5941 
5942     else if(strcmp(incon->class, "textConnection") == 0 && strcmp(m, "w") == 0)
5943 	error(_("cannot create a 'gzcon' connection from a writable textConnection; maybe use rawConnection"));
5944 
5945     new = (Rconnection) malloc(sizeof(struct Rconn));
5946     if(!new) error(_("allocation of 'gzcon' connection failed"));
5947     new->class = (char *) malloc(strlen("gzcon") + 1);
5948     if(!new->class) {
5949 	free(new);
5950 	error(_("allocation of 'gzcon' connection failed"));
5951  	/* for Solaris 12.5 */ new = NULL;
5952    }
5953     strcpy(new->class, "gzcon");
5954     Rsnprintf_mbcs(description, 1000, "gzcon(%s)", incon->description);
5955     new->description = (char *) malloc(strlen(description) + 1);
5956     if(!new->description) {
5957 	free(new->class); free(new);
5958 	error(_("allocation of 'gzcon' connection failed"));
5959 	/* for Solaris 12.5 */ new = NULL;
5960     }
5961     init_con(new, description, CE_NATIVE, mode);
5962     new->text = text;
5963     new->isGzcon = TRUE;
5964     new->open = &gzcon_open;
5965     new->close = &gzcon_close;
5966     new->vfprintf = &dummy_vfprintf;
5967     new->fgetc = &gzcon_fgetc;
5968     new->read = &gzcon_read;
5969     new->write = &gzcon_write;
5970     new->private = (void *) malloc(sizeof(struct gzconn));
5971     if(!new->private) {
5972 	free(new->description); free(new->class); free(new);
5973 	error(_("allocation of 'gzcon' connection failed"));
5974 	/* for Solaris 12.5 */ new = NULL;
5975     }
5976     ((Rgzconn)(new->private))->con = incon;
5977     ((Rgzconn)(new->private))->cp = level;
5978     ((Rgzconn)(new->private))->nsaved = -1;
5979     ((Rgzconn)(new->private))->allow = allow;
5980 
5981     /* as there might not be an R-level reference to the wrapped connection */
5982     R_PreserveObject(incon->ex_ptr);
5983 
5984     Connections[icon] = new;
5985     // gcc 8 with sanitizers selected objects to truncation here with 99 or 100.
5986     strncpy(new->encname, incon->encname, 100);
5987     new->encname[100 - 1] = '\0';
5988     new->ex_ptr = PROTECT(R_MakeExternalPtr((void *)new->id, install("connection"),
5989 					    R_NilValue));
5990     if(incon->isopen) new->open(new);
5991 
5992     PROTECT(ans = ScalarInteger(icon));
5993     PROTECT(class = allocVector(STRSXP, 2));
5994     SET_STRING_ELT(class, 0, mkChar("gzcon"));
5995     SET_STRING_ELT(class, 1, mkChar("connection"));
5996     classgets(ans, class);
5997     setAttrib(ans, R_ConnIdSymbol, new->ex_ptr);
5998     /* Disable, as e.g. load() leaves no reference to the new connection */
5999     //R_RegisterCFinalizerEx(new->ex_ptr, conFinalizer, FALSE);
6000     UNPROTECT(3);
6001 
6002     return ans;
6003 }
6004 
6005 
6006 /* code for in-memory (de)compression
6007    of data stored in a scalar string. Uses a 4-byte header of length,
6008    in XDR order. */
6009 
6010 #ifndef WORDS_BIGENDIAN
uiSwap(unsigned int x)6011 static unsigned int uiSwap (unsigned int x)
6012 {
6013   return((x << 24) | ((x & 0xff00) << 8) | ((x & 0xff0000) >> 8) | (x >> 24));
6014 }
6015 #else
6016 #define uiSwap(x) (x)
6017 #endif
6018 
6019 /* These are all hidden and used only in serialize.c,
6020    so managing R_alloc stack is prudence. */
6021 attribute_hidden
R_compress1(SEXP in)6022 SEXP R_compress1(SEXP in)
6023 {
6024     const void *vmax = vmaxget();
6025     unsigned int inlen;
6026     uLong outlen;
6027     int res;
6028     Bytef *buf;
6029     SEXP ans;
6030 
6031     if(TYPEOF(in) != RAWSXP)
6032 	error("R_compress1 requires a raw vector");
6033     inlen = LENGTH(in);
6034     outlen = (uLong)(1.001*inlen + 20);
6035     buf = (Bytef *) R_alloc(outlen + 4, sizeof(Bytef));
6036     /* we want this to be system-independent */
6037     *((unsigned int *)buf) = (unsigned int) uiSwap(inlen);
6038     res = compress(buf + 4, &outlen, (Bytef *)RAW(in), inlen);
6039     if(res != Z_OK) error("internal error %d in R_compress1", res);
6040     ans = allocVector(RAWSXP, outlen + 4);
6041     memcpy(RAW(ans), buf, outlen + 4);
6042     vmaxset(vmax);
6043     return ans;
6044 }
6045 
6046 attribute_hidden
R_decompress1(SEXP in,Rboolean * err)6047 SEXP R_decompress1(SEXP in, Rboolean *err)
6048 {
6049     const void *vmax = vmaxget();
6050     uLong inlen, outlen;
6051     int res;
6052     Bytef *buf;
6053     unsigned char *p = RAW(in);
6054     SEXP ans;
6055 
6056     if(TYPEOF(in) != RAWSXP)
6057 	error("R_decompress1 requires a raw vector");
6058     inlen = LENGTH(in);
6059     outlen = (uLong) uiSwap(*((unsigned int *) p));
6060     buf = (Bytef *) R_alloc(outlen, sizeof(Bytef));
6061     res = uncompress(buf, &outlen, (Bytef *)(p + 4), inlen - 4);
6062     if(res != Z_OK) {
6063 	warning("internal error %d in R_decompress1", res);
6064 	*err = TRUE;
6065 	return R_NilValue;
6066     }
6067     ans = allocVector(RAWSXP, outlen);
6068     memcpy(RAW(ans), buf, outlen);
6069     vmaxset(vmax);
6070     return ans;
6071 }
6072 
6073 attribute_hidden
R_compress2(SEXP in)6074 SEXP R_compress2(SEXP in)
6075 {
6076     const void *vmax = vmaxget();
6077     unsigned int inlen, outlen;
6078     int res;
6079     char *buf;
6080     SEXP ans;
6081 
6082     if(TYPEOF(in) != RAWSXP)
6083 	error("R_compress2 requires a raw vector");
6084     inlen = LENGTH(in);
6085     outlen = (unsigned int)(1.01*inlen + 600);
6086     buf = R_alloc(outlen + 5, sizeof(char));
6087     /* we want this to be system-independent */
6088     *((unsigned int *)buf) = (unsigned int) uiSwap(inlen);
6089     buf[4] = '2';
6090     res = BZ2_bzBuffToBuffCompress(buf + 5, &outlen,
6091 				   (char *)RAW(in), inlen,
6092 				   9, 0, 0);
6093     if(res != BZ_OK) error("internal error %d in R_compress2", res);
6094     /* printf("compressed %d to %d\n", inlen, outlen); */
6095     if (res != BZ_OK || outlen > inlen) {
6096 	outlen = inlen;
6097 	buf[4] = '0';
6098 	memcpy(buf+5, (char *)RAW(in), inlen);
6099     }
6100     ans = allocVector(RAWSXP, outlen + 5);
6101     memcpy(RAW(ans), buf, outlen + 5);
6102     vmaxset(vmax);
6103     return ans;
6104 }
6105 
6106 attribute_hidden
R_decompress2(SEXP in,Rboolean * err)6107 SEXP R_decompress2(SEXP in, Rboolean *err)
6108 {
6109     const void *vmax = vmaxget();
6110     unsigned int inlen, outlen;
6111     int res;
6112     char *buf, *p = (char *) RAW(in), type;
6113     SEXP ans;
6114 
6115     if(TYPEOF(in) != RAWSXP)
6116 	error("R_decompress2 requires a raw vector");
6117     inlen = LENGTH(in);
6118     outlen = uiSwap(*((unsigned int *) p));
6119     buf = R_alloc(outlen, sizeof(char));
6120     type = p[4];
6121     if (type == '2') {
6122 	res = BZ2_bzBuffToBuffDecompress(buf, &outlen, p + 5, inlen - 5, 0, 0);
6123 	if(res != BZ_OK) {
6124 	    warning("internal error %d in R_decompress2", res);
6125 	    *err = TRUE;
6126 	    return R_NilValue;
6127 	}
6128     } else if (type == '1') {
6129 	uLong outl;
6130 	res = uncompress((unsigned char *) buf, &outl,
6131 			 (Bytef *)(p + 5), inlen - 5);
6132 	if(res != Z_OK) {
6133 	    warning("internal error %d in R_decompress1");
6134 	    *err = TRUE;
6135 	    return R_NilValue;
6136 	}
6137     } else if (type == '0') {
6138 	buf = p + 5;
6139     } else {
6140 	warning("unknown type in R_decompress2");
6141 	*err = TRUE;
6142 	return R_NilValue;
6143     }
6144     ans = allocVector(RAWSXP, outlen);
6145     memcpy(RAW(ans), buf, outlen);
6146     vmaxset(vmax);
6147     return ans;
6148 }
6149 
6150 
do_sockselect(SEXP call,SEXP op,SEXP args,SEXP rho)6151 SEXP attribute_hidden do_sockselect(SEXP call, SEXP op, SEXP args, SEXP rho)
6152 {
6153     Rboolean immediate = FALSE;
6154     int nsock, i;
6155     SEXP insock, write, val, insockfd;
6156     double timeout;
6157 
6158     checkArity(op, args);
6159 
6160     insock = CAR(args);
6161     if (TYPEOF(insock) != VECSXP || LENGTH(insock) == 0)
6162 	error(_("not a list of sockets"));
6163     nsock = LENGTH(insock);
6164 
6165     write = CADR(args);
6166     if (TYPEOF(write) != LGLSXP || LENGTH(write) != nsock)
6167 	error(_("bad write indicators"));
6168 
6169     timeout = asReal(CADDR(args));
6170 
6171     PROTECT(insockfd = allocVector(INTSXP, nsock));
6172     PROTECT(val = allocVector(LGLSXP, nsock));
6173 
6174     for (i = 0; i < nsock; i++) {
6175 	Rconnection conn = getConnection(asInteger(VECTOR_ELT(insock, i)));
6176 	if (!strcmp(conn->class, "sockconn")) {
6177 	    Rsockconn scp = conn->private;
6178 	    INTEGER(insockfd)[i] = scp->fd;
6179 	    if (! LOGICAL(write)[i] && scp->pstart < scp->pend) {
6180 		LOGICAL(val)[i] = TRUE;
6181 		immediate = TRUE;
6182 	    }
6183 	    else LOGICAL(val)[i] = FALSE;
6184 	} else if (!strcmp(conn->class, "servsockconn")) {
6185 	    Rservsockconn sop = conn->private;
6186 	    INTEGER(insockfd)[i] = sop->fd;
6187 	    LOGICAL(val)[i] = FALSE;
6188 	    if (LOGICAL(write)[i])
6189 		warning(_("a server socket connection cannot be writeable"));
6190 	} else
6191 	    error(_("not a socket connection"));
6192     }
6193 
6194     if (! immediate)
6195 	Rsockselect(nsock, INTEGER(insockfd), LOGICAL(val), LOGICAL(write),
6196 		    timeout);
6197 
6198     UNPROTECT(2);
6199     return val;
6200 }
6201 
do_serversocket(SEXP call,SEXP op,SEXP args,SEXP rho)6202 SEXP attribute_hidden do_serversocket(SEXP call, SEXP op, SEXP args, SEXP rho)
6203 {
6204     SEXP ans, class;
6205     int ncon, port;
6206     Rconnection con = NULL;
6207 
6208     checkArity(op, args);
6209 
6210     port = asInteger(CAR(args));
6211     if(port == NA_INTEGER || port < 0)
6212 	error(_("invalid '%s' argument"), "port");
6213 
6214     ncon = NextConnection();
6215     con = R_newservsock(port);
6216     Connections[ncon] = con;
6217     con->ex_ptr = PROTECT(R_MakeExternalPtr(con->id, install("connection"), R_NilValue));
6218 
6219     PROTECT(ans = ScalarInteger(ncon));
6220     PROTECT(class = allocVector(STRSXP, 2));
6221     SET_STRING_ELT(class, 0, mkChar("servsockconn"));
6222     SET_STRING_ELT(class, 1, mkChar("connection"));
6223     classgets(ans, class);
6224     setAttrib(ans, R_ConnIdSymbol, con->ex_ptr);
6225     R_RegisterCFinalizerEx(con->ex_ptr, conFinalizer, FALSE);
6226     UNPROTECT(3);
6227     return ans;
6228 }
6229 
6230 /* socketTimeout(socket, timeout) */
do_socktimeout(SEXP call,SEXP op,SEXP args,SEXP rho)6231 SEXP attribute_hidden do_socktimeout(SEXP call, SEXP op, SEXP args, SEXP rho)
6232 {
6233     int tnew, told;
6234     Rsockconn scon;
6235 
6236     checkArity(op, args);
6237 
6238     if(!inherits(CAR(args), "sockconn"))
6239 	error(_("invalid '%s' argument"), "socket");
6240 
6241     scon = getConnection(asInteger(CAR(args)))->private;
6242     told = scon->timeout;
6243 
6244     tnew = asInteger(CADR(args));
6245     if(tnew == NA_INTEGER)
6246 	error(_("invalid '%s' argument"), "timeout");
6247 
6248     if (tnew >= 0)
6249 	scon->timeout = tnew;
6250 
6251     return ScalarInteger(told);
6252 }
6253 
6254 static lzma_filter filters[LZMA_FILTERS_MAX + 1];
6255 
init_filters(void)6256 static void init_filters(void)
6257 {
6258     static uint32_t preset_number = 6; /* 9 | LZMA_PRESET_EXTREME; */
6259     static lzma_options_lzma opt_lzma;
6260     static Rboolean set = FALSE;
6261     if(set) return;
6262     if(lzma_lzma_preset(&opt_lzma, preset_number))
6263 	error("problem setting presets");
6264     filters[0].id = LZMA_FILTER_LZMA2;
6265     filters[0].options = &opt_lzma;
6266     filters[1].id = LZMA_VLI_UNKNOWN;
6267     set = TRUE;
6268     /*
6269       printf("encoding memory usage %lu\n", lzma_raw_encoder_memusage(filters));
6270       printf("decoding memory usage %lu\n", lzma_raw_decoder_memusage(filters));
6271     */
6272 }
6273 
6274 attribute_hidden
R_compress3(SEXP in)6275 SEXP R_compress3(SEXP in)
6276 {
6277     const void *vmax = vmaxget();
6278     unsigned int inlen, outlen;
6279     unsigned char *buf;
6280     SEXP ans;
6281     lzma_stream strm = LZMA_STREAM_INIT;
6282     lzma_ret ret;
6283 
6284     if(TYPEOF(in) != RAWSXP)
6285 	error("R_compress3 requires a raw vector");
6286     inlen = LENGTH(in);
6287     outlen = inlen + 5;  /* don't allow it to expand */
6288     buf = (unsigned char *) R_alloc(outlen + 5, sizeof(unsigned char));
6289     /* we want this to be system-independent */
6290     *((unsigned int *)buf) = (unsigned int) uiSwap(inlen);
6291     buf[4] = 'Z';
6292 
6293     init_filters();
6294     ret = lzma_raw_encoder(&strm, filters);
6295     if (ret != LZMA_OK) error("internal error %d in R_compress3", ret);
6296     strm.next_in = RAW(in);
6297     strm.avail_in = inlen;
6298     strm.next_out = buf + 5;
6299     strm.avail_out = outlen;
6300     while(!ret) ret = lzma_code(&strm, LZMA_FINISH);
6301     if (ret != LZMA_STREAM_END || (strm.avail_in > 0)) {
6302 	warning("internal error %d in R_compress3", ret);
6303 	outlen = inlen;
6304 	buf[4] = '0';
6305 	memcpy(buf+5, (char *)RAW(in), inlen);
6306     } else outlen = (unsigned int) strm.total_out;
6307     lzma_end(&strm);
6308 
6309     /* printf("compressed %d to %d\n", inlen, outlen); */
6310     ans = allocVector(RAWSXP, outlen + 5);
6311     memcpy(RAW(ans), buf, outlen + 5);
6312     vmaxset(vmax);
6313     return ans;
6314 }
6315 
6316 attribute_hidden
R_decompress3(SEXP in,Rboolean * err)6317 SEXP R_decompress3(SEXP in, Rboolean *err)
6318 {
6319     const void *vmax = vmaxget();
6320     unsigned int inlen, outlen;
6321     unsigned char *buf, *p = RAW(in), type = p[4];
6322     SEXP ans;
6323 
6324     if(TYPEOF(in) != RAWSXP)
6325 	error("R_decompress3 requires a raw vector");
6326     inlen = LENGTH(in);
6327     outlen = (unsigned int) uiSwap(*((unsigned int *) p));
6328     buf = (unsigned char *) R_alloc(outlen, sizeof(unsigned char));
6329 
6330     if (type == 'Z') {
6331 	lzma_stream strm = LZMA_STREAM_INIT;
6332 	lzma_ret ret;
6333 	init_filters();
6334 	ret = lzma_raw_decoder(&strm, filters);
6335 	if (ret != LZMA_OK) {
6336 	    warning("internal error %d in R_decompress3", ret);
6337 	    *err = TRUE;
6338 	    return R_NilValue;
6339 	}
6340 	strm.next_in = p + 5;
6341 	strm.avail_in = inlen - 5;
6342 	strm.next_out = buf;
6343 	strm.avail_out = outlen;
6344 	ret = lzma_code(&strm, LZMA_RUN);
6345 	if (ret != LZMA_OK && (strm.avail_in > 0)) {
6346 	    warning("internal error %d in R_decompress3 %d",
6347 		    ret, strm.avail_in);
6348 	    *err = TRUE;
6349 	    return R_NilValue;
6350 	}
6351 	lzma_end(&strm);
6352     } else if (type == '2') {
6353 	int res;
6354 	res = BZ2_bzBuffToBuffDecompress((char *)buf, &outlen,
6355 					 (char *)(p + 5), inlen - 5, 0, 0);
6356 	if(res != BZ_OK) {
6357 	    warning("internal error %d in R_decompress2", res);
6358 	    *err = TRUE;
6359 	    return R_NilValue;
6360 	}
6361     } else if (type == '1') {
6362 	uLong outl; int res;
6363 	res = uncompress(buf, &outl, (Bytef *)(p + 5), inlen - 5);
6364 	if(res != Z_OK) {
6365 	    warning("internal error %d in R_decompress1");
6366 	    *err = TRUE;
6367 	    return R_NilValue;
6368 	}
6369     } else if (type == '0') {
6370 	buf = p + 5;
6371     } else {
6372 	warning("unknown type in R_decompress3");
6373 	*err = TRUE;
6374 	return R_NilValue;
6375     }
6376     ans = allocVector(RAWSXP, outlen);
6377     memcpy(RAW(ans), buf, outlen);
6378     vmaxset(vmax);
6379     return ans;
6380 }
6381 
6382 SEXP attribute_hidden
do_memCompress(SEXP call,SEXP op,SEXP args,SEXP env)6383 do_memCompress(SEXP call, SEXP op, SEXP args, SEXP env)
6384 {
6385     SEXP ans, from;
6386     int type, res;
6387 
6388     checkArity(op, args);
6389     ans = from = CAR(args);
6390     if(TYPEOF(from) != RAWSXP) error("'from' must be raw or character");
6391     type = asInteger(CADR(args));
6392     switch(type) {
6393     case 1: break; /* none */
6394     case 2: /*gzip */
6395     {
6396 	Bytef *buf;
6397 	/* could use outlen = compressBound(inlen) */
6398 	uLong inlen = XLENGTH(from),
6399 	    outlen = (uLong)(1.001*(double)inlen + 20);
6400 	buf = (Bytef *) R_alloc(outlen, sizeof(Bytef));
6401 	res = compress(buf, &outlen, (Bytef *)RAW(from), inlen);
6402 	if(res != Z_OK) error("internal error %d in memCompress", res);
6403 	ans = allocVector(RAWSXP, outlen);
6404 	memcpy(RAW(ans), buf, outlen);
6405 	break;
6406     }
6407     case 3: /* bzip */
6408     {
6409 	// bzlib does not support long inputs
6410 	char *buf;
6411 	unsigned int inlen = LENGTH(from),
6412 	    outlen = (unsigned int)(1.01*inlen + 600);
6413 	buf = R_alloc(outlen, sizeof(char));
6414 	res = BZ2_bzBuffToBuffCompress(buf, &outlen, (char *)RAW(from),
6415 				       inlen, 9, 0, 0);
6416 	if(res != BZ_OK) error("internal error %d in memCompress", res);
6417 	ans = allocVector(RAWSXP, outlen);
6418 	memcpy(RAW(ans), buf, outlen);
6419 	break;
6420     }
6421     case 4: /* xz */
6422     {
6423 	unsigned char *buf;
6424 	size_t inlen = XLENGTH(from), outlen;
6425 	lzma_stream strm = LZMA_STREAM_INIT;
6426 	lzma_ret ret;
6427 	lzma_filter filters[LZMA_FILTERS_MAX + 1];
6428 	uint32_t preset_number = 9 | LZMA_PRESET_EXTREME;
6429 	lzma_options_lzma opt_lzma;
6430 
6431 	if(lzma_lzma_preset(&opt_lzma, preset_number))
6432 	    error("problem setting presets");
6433 	filters[0].id = LZMA_FILTER_LZMA2;
6434 	filters[0].options = &opt_lzma;
6435 	filters[1].id = LZMA_VLI_UNKNOWN;
6436 
6437 	ret = lzma_stream_encoder(&strm, filters, LZMA_CHECK_CRC32);
6438 	if (ret != LZMA_OK) error("internal error %d in memCompress", ret);
6439 
6440 	outlen = (unsigned int)(1.01 * inlen + 600); /* FIXME, copied
6441 						        from bzip2 case */
6442 	buf = (unsigned char *) R_alloc(outlen, sizeof(unsigned char));
6443 	strm.next_in = RAW(from);
6444 	strm.avail_in = inlen;
6445 	strm.next_out = buf;
6446 	strm.avail_out = outlen;
6447 	while(!ret) ret = lzma_code(&strm, LZMA_FINISH);
6448 	if (ret != LZMA_STREAM_END || (strm.avail_in > 0))
6449 	    error("internal error %d in memCompress", ret);
6450 	/* If LZMZ_BUF_ERROR, could realloc and continue */
6451 	outlen = (unsigned int)strm.total_out;
6452 	lzma_end(&strm);
6453 	ans = allocVector(RAWSXP, outlen);
6454 	memcpy(RAW(ans), buf, outlen);
6455 	break;
6456     }
6457     default:
6458 	break;
6459     }
6460 
6461     return ans;
6462 }
6463 
6464 SEXP attribute_hidden
do_memDecompress(SEXP call,SEXP op,SEXP args,SEXP env)6465 do_memDecompress(SEXP call, SEXP op, SEXP args, SEXP env)
6466 {
6467     SEXP ans, from;
6468     int type, subtype = 0;
6469 
6470     checkArity(op, args);
6471     ans = from = CAR(args);
6472     if(TYPEOF(from) != RAWSXP) error("'from' must be raw or character");
6473     type = asInteger(CADR(args));
6474     if (type == 5) {/* type = 5 is "unknown" */
6475 	char *p = (char *) RAW(from);
6476 	if (strncmp(p, "BZh", 3) == 0) type = 3; /* bzip2 always uses a header */
6477 	else if(p[0] == '\x1f' && p[1] == '\x8b') type = 2; /* gzip files */
6478 	else if((p[0] == '\xFD') && !strncmp(p+1, "7zXZ", 4)) type = 4;
6479 	else if((p[0] == '\xFF') && !strncmp(p+1, "LZMA", 4)) {
6480 	    type = 4; subtype = 1;
6481 	} else if(!memcmp(p, "]\0\0\200\0", 5)) {
6482 	    type = 4; subtype = 1;
6483 	} else {
6484 	    warning(_("unknown compression, assuming none"));
6485 	    type = 1;
6486 	}
6487     }
6488 
6489     switch(type) {
6490     case 1: break; /* none */
6491     case 2: /* gzip */
6492     {
6493 	uLong inlen = XLENGTH(from), outlen = 3*inlen;
6494 	Bytef *buf, *p = (Bytef *)RAW(from);
6495 	int opt = 0;
6496 
6497 	if (p[0] == 0x1f && p[1] == 0x8b) { // in-memory gzip file
6498 	    // in this case we could read outlen from the trailer
6499 	    opt = 16;  // force gzip format
6500 	}
6501 	while(1) {
6502 	    buf = (Bytef *) R_alloc(outlen, sizeof(Bytef));
6503 	    int res = R_uncompress(buf, &outlen, p, inlen, opt);
6504 	    if(res == Z_BUF_ERROR) {
6505 		if(outlen < ULONG_MAX/2) {
6506 		    outlen *= 2; continue;
6507 		} else break;
6508 	    }
6509 	    if(res >= 0) break;
6510 	    error("internal error %d in memDecompress(%s)", res,
6511 		  "type = \"gzip\"");
6512 	}
6513 
6514 	/*
6515 	if (p[0] == 0x1f && p[1] == 0x8b) { // in-memory gzip file
6516 	    // in this case we could read outlen from the trailer
6517 	    while(1) {
6518 		buf = (Bytef *) R_alloc(outlen, sizeof(Bytef));
6519 		int res = R_gzuncompress(buf, &outlen, p, inlen, 16); // force gzip format
6520 		if(res == Z_BUF_ERROR) {
6521 		    if(outlen < ULONG_MAX/2) {
6522 			outlen *= 2; continue;
6523 		    } else break;
6524 		}
6525 		if(res >= 0) break;
6526 		error("internal error %d in memDecompress(%s)", res,
6527 		      "type = \"gzip\"");
6528 	    }
6529 	} else {
6530 	    while(1) {
6531 		buf = (Bytef *) R_alloc(outlen, sizeof(Bytef));
6532 		int res = uncompress(buf, &outlen, p, inlen);
6533 		if(res == Z_BUF_ERROR) {
6534 		    if(outlen < ULONG_MAX/2) {
6535 			outlen *= 2; continue;
6536 		    } else break;
6537 		}
6538 		if(res == Z_OK) break;
6539 		error("internal error %d in memDecompress(%s)", res,
6540 		      "type = \"gzip\"");
6541 	    }
6542 	}
6543 	*/
6544 	ans = allocVector(RAWSXP, outlen);
6545 	memcpy(RAW(ans), buf, outlen);
6546 	break;
6547     }
6548     case 3: /* bzip2 */
6549     {
6550 	// bzlib does not support long inputs
6551 	unsigned int inlen = LENGTH(from), outlen;
6552 	double o0 = 3.0*inlen;
6553 	outlen = (o0 > UINT_MAX)? UINT_MAX: (unsigned int)o0;
6554 	int res;
6555 	char *buf, *p = (char *) RAW(from);
6556 	while(1) {
6557 	    buf = R_alloc(outlen, sizeof(char));
6558 	    res = BZ2_bzBuffToBuffDecompress(buf, &outlen, p, inlen, 0, 0);
6559 	    if(res == BZ_OUTBUFF_FULL) {
6560 		if(outlen < UINT_MAX/2) {
6561 		    outlen *= 2; continue;
6562 		} else break;
6563 	    }
6564 	    if(res == BZ_OK) break;
6565 	    error("internal error %d in memDecompress(%s)", res,
6566 		  "type = \"bzip2\"");
6567 	}
6568 	ans = allocVector(RAWSXP, outlen);
6569 	memcpy(RAW(ans), buf, outlen);
6570 	break;
6571     }
6572     case 4: /* xz */
6573     {
6574 	unsigned char *buf;
6575 	size_t inlen = XLENGTH(from);
6576 	size_t outlen = 3*inlen;
6577 	lzma_stream strm = LZMA_STREAM_INIT;
6578 	lzma_ret ret;
6579 	while(1) {
6580 	    /* Initialize lzma_stream in each iteration. */
6581 	    /* probably at most 80Mb is required, but 512Mb seems OK as a limit */
6582 	    if (subtype == 1)
6583 		ret = lzma_alone_decoder(&strm, 536870912);
6584 	    else
6585 		ret = lzma_stream_decoder(&strm, 536870912, LZMA_CONCATENATED);
6586 	    if (ret != LZMA_OK)
6587 		error(_("cannot initialize lzma decoder, error %d"), ret);
6588 
6589 	    buf = (unsigned char *) R_alloc(outlen, sizeof(unsigned char));
6590 	    strm.avail_in = inlen;
6591 	    strm.avail_out = outlen;
6592 	    strm.next_in = (unsigned char *) RAW(from);
6593 	    strm.next_out = buf;
6594 
6595 	    ret = lzma_code(&strm, LZMA_FINISH);
6596 	    /* Did lzma_code() leave some input? */
6597 	    if (strm.avail_in > 0) {
6598 		/* Decompression failed, free lzma_stream. */
6599 		lzma_end(&strm);
6600 		/* Because it ran out of output buffer?
6601 		 *
6602 		 * This used to only check if LZMA_BUF_ERROR was
6603 		 * returned, but apparently XZ will also signal an out
6604 		 * of buffer condition by returning LZMA_OK and
6605 		 * leaving avail_in > 0 (i.e. not all input was
6606 		 * consumed).
6607 		 */
6608 		if (ret == LZMA_BUF_ERROR || ret == LZMA_OK) {
6609 		    outlen *= 2;
6610 		    continue;
6611 		} else {
6612 		    error("internal error %d in memDecompress(%s) at %d",
6613 			  ret, "type = \"xz\"", strm.avail_in);
6614 		}
6615 	    } else {
6616 		break;
6617 	    }
6618 	}
6619 	outlen = strm.total_out;
6620 	lzma_end(&strm);
6621 	ans = allocVector(RAWSXP, outlen);
6622 	memcpy(RAW(ans), buf, outlen);
6623 	break;
6624     }
6625     default:
6626 	break;
6627     }
6628     return ans;
6629 }
6630 
6631 /* --- C-level entry to create a custom connection object -- */
6632 /* The returned value is the R-side instance. To avoid additional call to getConnection()
6633    the internal Rconnection pointer will be placed in ptr[0] if ptr is not NULL.
6634    It is the responsibility of the caller to customize callbacks in the structure,
6635    they are initialized to dummy_ (where available) and null_ (all others) callbacks.
6636    Also note that the resulting object has a finalizer, so any clean up (including after
6637    errors) is done by garbage collection - the caller may not free anything in the
6638    structure explicitly (that includes the con->private pointer!).
6639  */
R_new_custom_connection(const char * description,const char * mode,const char * class_name,Rconnection * ptr)6640 SEXP R_new_custom_connection(const char *description, const char *mode, const char *class_name, Rconnection *ptr)
6641 {
6642     Rconnection new;
6643     SEXP ans, class;
6644 
6645     int ncon = NextConnection();
6646 
6647     /* built-in connections do this in a separate new<class>() function */
6648     new = (Rconnection) malloc(sizeof(struct Rconn));
6649     if(!new) error(_("allocation of %s connection failed"), class_name);
6650     new->class = (char *) malloc(strlen(class_name) + 1);
6651     if(!new->class) {
6652 	free(new);
6653 	error(_("allocation of %s connection failed"), class_name);
6654 	/* for Solaris 12.5 */ new = NULL;
6655     }
6656     strcpy(new->class, class_name);
6657     new->description = (char *) malloc(strlen(description) + 1);
6658     if(!new->description) {
6659 	free(new->class); free(new);
6660 	error(_("allocation of %s connection failed"), class_name);
6661 	/* for Solaris 12.5 */ new = NULL;
6662     }
6663     init_con(new, description, CE_NATIVE, mode);
6664     /* all ptrs are init'ed to null_* so no need to repeat that,
6665        but the following two are useful tools which could not be accessed otherwise */
6666     new->vfprintf = &dummy_vfprintf;
6667     new->fgetc = &dummy_fgetc;
6668 
6669     /* here we use the new connection to create a SEXP */
6670     Connections[ncon] = new;
6671     /* new->blocking = block; */
6672     new->encname[0] = 0; /* "" (should have the same effect as "native.enc") */
6673     new->ex_ptr = PROTECT(R_MakeExternalPtr(new->id, install("connection"), R_NilValue));
6674 
6675     PROTECT(ans = ScalarInteger(ncon));
6676     PROTECT(class = allocVector(STRSXP, 2));
6677     SET_STRING_ELT(class, 0, mkChar(class_name));
6678     SET_STRING_ELT(class, 1, mkChar("connection"));
6679     classgets(ans, class);
6680     setAttrib(ans, R_ConnIdSymbol, new->ex_ptr);
6681     R_RegisterCFinalizerEx(new->ex_ptr, conFinalizer, FALSE);
6682     UNPROTECT(3);
6683 
6684     if (ptr) ptr[0] = new;
6685 
6686     return ans;
6687 }
6688