1 /*
2  *  R : A Computer Language for Statistical Data Analysis
3  *  Copyright (C) 1995--2020  The R Core Team
4  *
5  *  This program is free software; you can redistribute it and/or modify
6  *  it under the terms of the GNU General Public License as published by
7  *  the Free Software Foundation; either version 2 of the License, or
8  *  (at your option) any later version.
9  *
10  *  This program is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  *  GNU General Public License for more details.
14  *
15  *  You should have received a copy of the GNU General Public License
16  *  along with this program; if not, a copy is available at
17  *  https://www.R-project.org/Licenses/
18  */
19 
20 /* <UTF8> byte-level access is only to compare with chars <= 0x7F */
21 
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25 
26 #define NEED_CONNECTION_PSTREAMS
27 #define R_USE_SIGNALS 1
28 #include <Defn.h>
29 #include <Rmath.h>
30 #include <Fileio.h>
31 #include <Rversion.h>
32 #include <R_ext/Riconv.h>
33 #include <R_ext/RS.h>           /* for CallocCharBuf, Free */
34 #include <errno.h>
35 #include <ctype.h>		/* for isspace */
36 #include <stdarg.h>
37 #ifdef Win32
38 #include <trioremap.h>
39 #endif
40 
41 /* From time to time changes in R, such as the addition of a new SXP,
42  * may require changes in the save file format.  Here are some
43  * guidelines on handling format changes:
44  *
45  *    Starting with 1.4 there is a version number associated with save
46  *    file formats.  This version number should be incremented when
47  *    the format is changed so older versions of R can recognize and
48  *    reject the new format with a meaningful error message.
49  *
50  *    R should remain able to write older workspace formats.  An error
51  *    should be signaled if the contents to be saved is not compatible
52  *    with the requested format.
53  *
54  *    To allow older versions of R to give useful error messages, the
55  *    header now contains the version of R that wrote the workspace
56  *    and the oldest version that can read the workspace.  These
57  *    versions are stored as an integer packed by the R_Version macro
58  *    from Rversion.h.  Some workspace formats may only exist
59  *    temporarily in the development stage.  If readers are not
60  *    provided in a released version, then these should specify the
61  *    oldest reader R version as -1.
62  */
63 
64  /* It is now customary that the version (1, 2, 3) of the format is
65   * reflected also in magic numbers (such as RDX2, RDX3, ...), together with
66   * type (xdr/ascii/binary).  Adding a new serialization format thus now
67   * also requires adding a new set of magic numbers, yet in principle this
68   * could be changed in the future.  The code in this file does not need the
69   * magic numbers, it relies on version and type information in the
70   * serialization header (version 2 and 3).
71   */
72 
73 /* ----- V e r s i o n -- T w o -- S a v e / R e s t o r e ----- */
74 
75 /* Adapted from Chris Young and Ross Ihaka's Version One by Luke
76    Tierney.  Copyright Assigned to the R Project.
77 
78    The approach used here uses a single pass over the node tree to be
79    serialized.  Sharing of reference objects is preserved, but sharing
80    among other objects is ignored.  The first time a reference object
81    is encountered it is entered in a hash table; the value stored with
82    the object is the index in the sequence of reference objects (1 for
83    first reference object, 2 for second, etc.).  When an object is
84    seen again, i.e. it is already in the hash table, a reference
85    marker along with the index is written out.  The unserialize code
86    does not know in advance how many reference objects it will see, so
87    it starts with an initial array of some reasonable size and doubles
88    it each time space runs out.  Reference objects are entered as they
89    are encountered.
90 
91    This means the serialize and unserialize code needs to agree on
92    what is a reference object.  Making a non-reference object into
93    a reference object requires a version change in the format.  An
94    alternate design would be to precede each reference object with a
95    marker that says the next thing is a possibly shared object and
96    needs to be entered into the reference table.
97 
98    Adding new SXP types is easy, whether they are reference objects or
99    not.  The unserialize code will signal an error if it sees a type
100    value it does not know.  It is of course better to increment the
101    serialization format number when a new SXP is added, but if that
102    SXP is unlikely to be saved by users then it may be simpler to keep
103    the version number and let the error handling code deal with it.
104 
105    The output format for dotted pairs writes the ATTRIB value first
106    rather than last.  This allows CDR's to be processed by iterative
107    tail calls to avoid recursion stack overflows when processing long
108    lists.  The writing code does take advantage of this, but the
109    reading code does not.  It hasn't been a big issue so far--the only
110    case where it has come up is in saving a large unhashed environment
111    where saving succeeds but loading fails because the PROTECT stack
112    overflows.  With the ability to create hashed environments at the
113    user level this is likely to be even less of an issue now.  But if
114    we do need to deal with it we can do so without a change in the
115    serialization format--just rewrite ReadItem to pass the place to
116    store the CDR it reads. (It's a bit of a pain to do, that is why it
117    is being deferred until it is clearly needed.)
118 
119    CHARSXPs are now handled in a way that preserves both embedded null
120    characters and NA_STRING values.
121 
122    The XDR save format now only uses the in-memory xdr facility for
123    converting integers and doubles to a portable format.
124 
125    The output format packs the type flag and other flags into a single
126    integer.  This produces more compact output for code; it has little
127    effect on data.
128 
129    Environments recognized as package or namespace environments are
130    not saved directly. Instead, a STRSXP is saved that is then used to
131    attempt to find the package/namespace when unserialized.  The
132    exact mechanism for choosing the name and finding the package/name
133    space from the name still has to be developed, but the
134    serialization format should be able to accommodate any reasonable
135    mechanism.
136 
137    The mechanism assumes that user code supplies one routine for
138    handling single characters and one for handling an array of bytes.
139    Higher level interfaces that serialize to/from a FILE * pointer or
140    an Rconnection pointer are provided in this file; others can be
141    built easily.
142 
143    A mechanism is provided to allow special handling of non-system
144    reference objects (all weak references and external pointers, and
145    all environments other than package environments, namespace
146    environments, and the global environment).  The hook function
147    consists of a function pointer and a data value.  The serialization
148    function pointer is called with the reference object and the data
149    value as arguments.  It should return R_NilValue for standard
150    handling and an STRSXP for special handling.  If an STRSXP is
151    returned, then a special handing mark is written followed by the
152    strings in the STRSXP (attributes are ignored).  On unserializing,
153    any specially marked entry causes a call to the hook function with
154    the reconstructed STRSXP and data value as arguments.  This should
155    return the value to use for the reference object.  A reasonable
156    convention on how to use this mechanism is neded, but again the
157    format should be compatible with any reasonable convention.
158 
159    Eventually it may be useful to use these hooks to allow objects
160    with a class to have a class-specific serialization mechanism.  The
161    serialization format should support this.  It is trickier than in
162    Java and other reference based languages where creation and
163    initialization can be separated--we don't really have that option
164    at the R level.  */
165 
166 /* ----- V e r s i o n -- T h r e e -- S a v e / R e s t o r e ----- */
167 
168 /* This format extends version 2 format by adding an identifier of the
169    current native encoding to the serialization header.  On deserialization,
170    strings without an encoding flag will be converted to the current native
171    encoding, if possible, or to (flagged) UTF-8.  The conversion may fail
172    when the original encoding is not supported by iconv (unlikely) or when
173    the string is not valid in its declared encoding, which unfortunately is
174    not uncommon.  The conversion code now deliberately does not check
175    whether strings are valid when no conversion is needed, but such check
176    could be added in the future without changing the format.
177 
178    Version 3 also adds support for custom ALTREP serialization. Under
179    version 2 ALTREP objects are serialied like non-ALTREP ones. */
180 
181 /*
182  * Forward Declarations
183  */
184 
185 static void OutStringVec(R_outpstream_t stream, SEXP s, SEXP ref_table);
186 static void WriteItem (SEXP s, SEXP ref_table, R_outpstream_t stream);
187 static SEXP ReadItem(SEXP ref_table, R_inpstream_t stream);
188 static void WriteBC(SEXP s, SEXP ref_table, R_outpstream_t stream);
189 static SEXP ReadBC(SEXP ref_table, R_inpstream_t stream);
190 
191 /*
192  * Constants
193  */
194 
195 /* The default version used when a stream Init function is called with
196    version = 0 */
197 
defaultSerializeVersion()198 static int defaultSerializeVersion()
199 {
200     static int dflt = -1;
201 
202     if (dflt < 0) {
203 	char *valstr = getenv("R_DEFAULT_SERIALIZE_VERSION");
204 	int val = -1;
205 	if (valstr != NULL)
206 	    val = atoi(valstr);
207 	if (val == 2 || val == 3)
208 	    dflt = val;
209 	else
210 	    dflt = 3; /* the default */
211     }
212     return dflt;
213 }
214 
215 /*
216  * Utility Functions
217  *
218  * An assert function which doesn't crash the program.
219  * Something like this might be useful in an R header file
220  */
221 
222 #ifdef NDEBUG
223 #define R_assert(e) ((void) 0)
224 #else
225 /* The line below requires an ANSI C preprocessor (stringify operator) */
226 #define R_assert(e) ((e) ? (void) 0 : error("assertion '%s' failed: file '%s', line %d\n", #e, __FILE__, __LINE__))
227 #endif /* NDEBUG */
228 
229 /* Rsnprintf: like snprintf, but guaranteed to null-terminate. See
230    errors.c::Rvsnprintf_mbcs for a multi-byte safe version. */
Rsnprintf(char * buf,size_t size,const char * format,...)231 static int Rsnprintf(char *buf, size_t size, const char *format, ...)
232 {
233     int val;
234     va_list(ap);
235     va_start(ap, format);
236     /* On Windows this no longer uses the non-C99 MSVCRT.dll version */
237     val = vsnprintf(buf, size, format, ap);
238     if(size) buf[size-1] = '\0';
239     va_end(ap);
240     return val;
241 }
242 
243 
244 /*
245  * Basic Output Routines
246  */
247 
OutInteger(R_outpstream_t stream,int i)248 static void OutInteger(R_outpstream_t stream, int i)
249 {
250     char buf[128];
251     switch (stream->type) {
252     case R_pstream_ascii_format:
253     case R_pstream_asciihex_format:
254 	if (i == NA_INTEGER)
255 	    Rsnprintf(buf, sizeof(buf), "NA\n");
256 	else
257 	    Rsnprintf(buf, sizeof(buf), "%d\n", i);
258 	stream->OutBytes(stream, buf, (int)strlen(buf));
259 	break;
260     case R_pstream_binary_format:
261 	stream->OutBytes(stream, &i, sizeof(int));
262 	break;
263     case R_pstream_xdr_format:
264 	R_XDREncodeInteger(i, buf);
265 	stream->OutBytes(stream, buf, R_XDR_INTEGER_SIZE);
266 	break;
267     default:
268 	error(_("unknown or inappropriate output format"));
269     }
270 }
271 
OutReal(R_outpstream_t stream,double d)272 static void OutReal(R_outpstream_t stream, double d)
273 {
274     char buf[128];
275     switch (stream->type) {
276     case R_pstream_ascii_format:
277 	if (! R_FINITE(d)) {
278 	    if (ISNA(d))
279 		Rsnprintf(buf, sizeof(buf), "NA\n");
280 	    else if (ISNAN(d))
281 		Rsnprintf(buf, sizeof(buf), "NaN\n");
282 	    else if (d < 0)
283 		Rsnprintf(buf, sizeof(buf), "-Inf\n");
284 	    else
285 		Rsnprintf(buf, sizeof(buf), "Inf\n");
286 	}
287 	else
288 	    /* 16: full precision; 17 gives 999, 000 &c */
289 	    Rsnprintf(buf, sizeof(buf), "%.16g\n", d);
290 	stream->OutBytes(stream, buf, (int)strlen(buf));
291 	break;
292     case R_pstream_asciihex_format:
293 	if (! R_FINITE(d)) {
294 	    if (ISNA(d))
295 		Rsnprintf(buf, sizeof(buf), "NA\n");
296 	    else if (ISNAN(d))
297 		Rsnprintf(buf, sizeof(buf), "NaN\n");
298 	    else if (d < 0)
299 		Rsnprintf(buf, sizeof(buf), "-Inf\n");
300 	    else
301 		Rsnprintf(buf, sizeof(buf), "Inf\n");
302 	}
303 	else
304 	    Rsnprintf(buf, sizeof(buf), "%a\n", d);
305 	stream->OutBytes(stream, buf, (int)strlen(buf));
306 	break;
307     case R_pstream_binary_format:
308 	stream->OutBytes(stream, &d, sizeof(double));
309 	break;
310     case R_pstream_xdr_format:
311 	R_XDREncodeDouble(d, buf);
312 	stream->OutBytes(stream, buf, R_XDR_DOUBLE_SIZE);
313 	break;
314     default:
315 	error(_("unknown or inappropriate output format"));
316     }
317 }
318 
OutComplex(R_outpstream_t stream,Rcomplex c)319 static void OutComplex(R_outpstream_t stream, Rcomplex c)
320 {
321     OutReal(stream, c.r);
322     OutReal(stream, c.i);
323 }
324 
OutByte(R_outpstream_t stream,Rbyte i)325 static void OutByte(R_outpstream_t stream, Rbyte i)
326 {
327     char buf[128];
328     switch (stream->type) {
329     case R_pstream_ascii_format:
330     case R_pstream_asciihex_format:
331 	Rsnprintf(buf, sizeof(buf), "%02x\n", i);
332 	stream->OutBytes(stream, buf, (int)strlen(buf));
333 	break;
334     case R_pstream_binary_format:
335     case R_pstream_xdr_format:
336 	stream->OutBytes(stream, &i, 1);
337 	break;
338     default:
339 	error(_("unknown or inappropriate output format"));
340     }
341 }
342 
343 /* This assumes CHARSXPs remain limited to 2^31-1 bytes */
OutString(R_outpstream_t stream,const char * s,int length)344 static void OutString(R_outpstream_t stream, const char *s, int length)
345 {
346     if (stream->type == R_pstream_ascii_format ||
347 	stream->type == R_pstream_asciihex_format) {
348 	int i;
349 	char buf[128];
350 	for (i = 0; i < length; i++) {
351 	    switch(s[i]) {
352 	    case '\n': sprintf(buf, "\\n");  break;
353 	    case '\t': sprintf(buf, "\\t");  break;
354 	    case '\v': sprintf(buf, "\\v");  break;
355 	    case '\b': sprintf(buf, "\\b");  break;
356 	    case '\r': sprintf(buf, "\\r");  break;
357 	    case '\f': sprintf(buf, "\\f");  break;
358 	    case '\a': sprintf(buf, "\\a");  break;
359 	    case '\\': sprintf(buf, "\\\\"); break;
360 	    case '\?': sprintf(buf, "\\?");  break;
361 	    case '\'': sprintf(buf, "\\'");  break;
362 	    case '\"': sprintf(buf, "\\\""); break;
363 	    default  :
364 		/* cannot print char in octal mode -> cast to unsigned
365 		   char first */
366 		/* actually, since s is signed char and '\?' == 127
367 		   is handled above, s[i] > 126 can't happen, but
368 		   I'm superstitious...  -pd */
369 		if (s[i] <= 32 || s[i] > 126)
370 		    sprintf(buf, "\\%03o", (unsigned char) s[i]);
371 		else
372 		    sprintf(buf, "%c", s[i]);
373 	    }
374 	    stream->OutBytes(stream, buf, (int)strlen(buf));
375 	}
376 	stream->OutChar(stream, '\n');
377     }
378     else
379 	stream->OutBytes(stream, (void *)s, length); /* FIXME: is this case right? */
380 }
381 
382 
383 /*
384  * Basic Input Routines
385  */
386 
InWord(R_inpstream_t stream,char * buf,int size)387 static void InWord(R_inpstream_t stream, char * buf, int size)
388 {
389     int c, i;
390     i = 0;
391     do {
392 	c = stream->InChar(stream);
393 	if (c == EOF)
394 	    error(_("read error"));
395     } while (isspace(c));
396     while (! isspace(c) && i < size) {
397 	buf[i++] = (char) c;
398 	c = stream->InChar(stream);
399     }
400     if (i == size)
401 	error(_("read error"));
402     buf[i] = 0;
403 }
404 
InInteger(R_inpstream_t stream)405 static int InInteger(R_inpstream_t stream)
406 {
407     char word[128];
408     char buf[128];
409     int i;
410 
411     switch (stream->type) {
412     case R_pstream_ascii_format:
413 	InWord(stream, word, sizeof(word));
414 	if(sscanf(word, "%127s", buf) != 1) error(_("read error"));
415 	if (strcmp(buf, "NA") == 0)
416 	    return NA_INTEGER;
417 	else
418 	    if(sscanf(buf, "%d", &i) != 1) error(_("read error"));
419 	return i;
420     case R_pstream_binary_format:
421 	stream->InBytes(stream, &i, sizeof(int));
422 	return i;
423     case R_pstream_xdr_format:
424 	stream->InBytes(stream, buf, R_XDR_INTEGER_SIZE);
425 	return R_XDRDecodeInteger(buf);
426     default:
427 	return NA_INTEGER;
428     }
429 }
430 
431 #ifdef Win32
432 extern int trio_sscanf(const char *buffer, const char *format, ...);
433 
434 #endif
435 
InReal(R_inpstream_t stream)436 static double InReal(R_inpstream_t stream)
437 {
438     char word[128];
439     char buf[128];
440     double d;
441 
442     switch (stream->type) {
443     case R_pstream_ascii_format:
444 	InWord(stream, word, sizeof(word));
445 	if(sscanf(word, "%127s", buf) != 1) error(_("read error"));
446 	if (strcmp(buf, "NA") == 0)
447 	    return NA_REAL;
448 	else if (strcmp(buf, "NaN") == 0)
449 	    return R_NaN;
450 	else if (strcmp(buf, "Inf") == 0)
451 	    return R_PosInf;
452 	else if (strcmp(buf, "-Inf") == 0)
453 	    return R_NegInf;
454 	else
455 	    if(
456 #ifdef Win32
457 		trio_sscanf(buf, "%lg", &d)
458 #else
459 		sscanf(buf, "%lg", &d)
460 #endif
461 		!= 1) error(_("read error"));
462 	return d;
463     case R_pstream_binary_format:
464 	stream->InBytes(stream, &d, sizeof(double));
465 	return d;
466     case R_pstream_xdr_format:
467 	stream->InBytes(stream, buf, R_XDR_DOUBLE_SIZE);
468 	return R_XDRDecodeDouble(buf);
469     default:
470 	return NA_REAL;
471     }
472 }
473 
InComplex(R_inpstream_t stream)474 static Rcomplex InComplex(R_inpstream_t stream)
475 {
476     Rcomplex c;
477     c.r = InReal(stream);
478     c.i = InReal(stream);
479     return c;
480 }
481 
482 /* These utilities for reading characters with an unget option are
483    defined so the code in InString can match the code in
484    saveload.c:InStringAscii--that way it is easier to match changes in
485    one to the other. */
486 typedef struct R_instring_stream_st {
487     int last;
488     R_inpstream_t stream;
489 } *R_instring_stream_t;
490 
InitInStringStream(R_instring_stream_t s,R_inpstream_t stream)491 static void InitInStringStream(R_instring_stream_t s, R_inpstream_t stream)
492 {
493     s->last = EOF;
494     s->stream = stream;
495 }
496 
GetChar(R_instring_stream_t s)497 static int GetChar(R_instring_stream_t s)
498 {
499     int c;
500     if (s->last != EOF) {
501 	c = s->last;
502 	s->last = EOF;
503     }
504     else c = s->stream->InChar(s->stream);
505     return c;
506 }
507 
UngetChar(R_instring_stream_t s,int c)508 static void UngetChar(R_instring_stream_t s, int c)
509 {
510     s->last = c;
511 }
512 
513 
InString(R_inpstream_t stream,char * buf,int length)514 static void InString(R_inpstream_t stream, char *buf, int length)
515 {
516     if (stream->type == R_pstream_ascii_format) {
517 	if (length > 0) {
518 	    int c, d, i, j;
519 	    struct R_instring_stream_st iss;
520 
521 	    InitInStringStream(&iss, stream);
522 	    while(isspace(c = GetChar(&iss)))
523 		;
524 	    UngetChar(&iss, c);
525 	    for (i = 0; i < length; i++) {
526 		if ((c =  GetChar(&iss)) == '\\') {
527 		    switch(c = GetChar(&iss)) {
528 		    case 'n' : buf[i] = '\n'; break;
529 		    case 't' : buf[i] = '\t'; break;
530 		    case 'v' : buf[i] = '\v'; break;
531 		    case 'b' : buf[i] = '\b'; break;
532 		    case 'r' : buf[i] = '\r'; break;
533 		    case 'f' : buf[i] = '\f'; break;
534 		    case 'a' : buf[i] = '\a'; break;
535 		    case '\\': buf[i] = '\\'; break;
536 		    case '?' : buf[i] = '\?'; break;
537 		    case '\'': buf[i] = '\''; break;
538 		    case '\"': buf[i] = '\"'; break; /* closing " for emacs */
539 		    case '0': case '1': case '2': case '3':
540 		    case '4': case '5': case '6': case '7':
541 			d = 0; j = 0;
542 			while('0' <= c && c < '8' && j < 3) {
543 			    d = d * 8 + (c - '0');
544 			    c = GetChar(&iss);
545 			    j++;
546 			}
547 			buf[i] = (char) d;
548 			UngetChar(&iss, c);
549 			break;
550 		    default  : buf[i] = (char) c;
551 		    }
552 		}
553 		else buf[i] = (char) c;
554 	    }
555 	}
556     }
557     else  /* this limits the string length: used for CHARSXPs */
558 	stream->InBytes(stream, buf, length);
559 }
560 
561 
562 /*
563  * Format Header Reading and Writing
564  *
565  * The header starts with one of three characters, A for ascii, B for
566  * binary, or X for xdr.
567  */
568 
OutFormat(R_outpstream_t stream)569 static void OutFormat(R_outpstream_t stream)
570 {
571 /*    if (stream->type == R_pstream_binary_format) {
572 	warning(_("binary format is deprecated; using xdr instead"));
573 	stream->type = R_pstream_xdr_format;
574 	} */
575     switch (stream->type) {
576     case R_pstream_ascii_format:
577     case R_pstream_asciihex_format:
578 	stream->OutBytes(stream, "A\n", 2); break;
579 	/* on deserialization, asciihex_format is treated exactly the same
580 	   way as ascii_format; the distinction is handled inside scanf %lg */
581     case R_pstream_binary_format: stream->OutBytes(stream, "B\n", 2); break;
582     case R_pstream_xdr_format:    stream->OutBytes(stream, "X\n", 2); break;
583     case R_pstream_any_format:
584 	error(_("must specify ascii, binary, or xdr format"));
585     default: error(_("unknown output format"));
586     }
587 }
588 
InFormat(R_inpstream_t stream)589 static void InFormat(R_inpstream_t stream)
590 {
591     char buf[2];
592     R_pstream_format_t type;
593     stream->InBytes(stream, buf, 2);
594     switch (buf[0]) {
595     case 'A': type = R_pstream_ascii_format; break; /* also for asciihex */
596     case 'B': type = R_pstream_binary_format; break;
597     case 'X': type = R_pstream_xdr_format; break;
598     case '\n':
599 	/* GROSS HACK: ASCII unserialize may leave a trailing newline
600 	   in the stream.  If the stream contains a second
601 	   serialization, then a second unserialize will fail if such
602 	   a newline is present.  The right fix is to make sure
603 	   unserialize consumes exactly what serialize produces.  But
604 	   this seems hard because of the current use of whitespace
605 	   skipping in unserialize.  So a temporary hack to cure the
606 	   symptom is to deal with a possible leading newline.  I
607 	   don't think more than one is possible, but I'm not sure.
608 	   LT */
609 	if (buf[1] == 'A') {
610 	    type = R_pstream_ascii_format;
611 	    stream->InBytes(stream, buf, 1);
612 	    break;
613 	}
614     default:
615 	type = R_pstream_any_format;  /* keep compiler happy */
616 	error(_("unknown input format"));
617     }
618     if (stream->type == R_pstream_any_format)
619 	stream->type = type;
620     else if (type != stream->type)
621 	error(_("input format does not match specified format"));
622 }
623 
624 
625 /*
626  * Hash Table Functions
627  *
628  * Hashing functions for hashing reference objects during writing.
629  * Objects are entered, and the order in which they are encountered is
630  * recorded.  HashGet returns this number, a positive integer, if the
631  * object was seen before, and zero if not.  A fixed hash table size
632  * is used; this is not ideal but seems adequate for now.  The hash
633  * table representation consists of a (R_NilValue . vector) pair.  The
634  * hash buckets are in the vector.  This indirect representation
635  * should allow resizing the table at some point.
636  */
637 
638 #define HASHSIZE 1099
639 
640 #define PTRHASH(obj) (((R_size_t) (obj)) >> 2)
641 
642 #define HASH_TABLE_COUNT(ht) ((int) TRUELENGTH(CDR(ht)))
643 #define SET_HASH_TABLE_COUNT(ht, val) SET_TRUELENGTH(CDR(ht), ((int) (val)))
644 
645 #define HASH_TABLE_SIZE(ht) LENGTH(CDR(ht))
646 
647 #define HASH_BUCKET(ht, pos) VECTOR_ELT(CDR(ht), pos)
648 #define SET_HASH_BUCKET(ht, pos, val) SET_VECTOR_ELT(CDR(ht), pos, val)
649 
MakeHashTable(void)650 static SEXP MakeHashTable(void)
651 {
652     SEXP val = CONS(R_NilValue, allocVector(VECSXP, HASHSIZE));
653     SET_HASH_TABLE_COUNT(val, 0);
654     return val;
655 }
656 
HashAdd(SEXP obj,SEXP ht)657 static void HashAdd(SEXP obj, SEXP ht)
658 {
659     R_size_t pos = PTRHASH(obj) % HASH_TABLE_SIZE(ht);
660     int count = HASH_TABLE_COUNT(ht) + 1;
661     SEXP val = ScalarInteger(count);
662     SEXP cell = CONS(val, HASH_BUCKET(ht, pos));
663 
664     SET_HASH_TABLE_COUNT(ht, count);
665     SET_HASH_BUCKET(ht, pos, cell);
666     SET_TAG(cell, obj);
667 }
668 
HashGet(SEXP item,SEXP ht)669 static int HashGet(SEXP item, SEXP ht)
670 {
671     R_size_t pos = PTRHASH(item) % HASH_TABLE_SIZE(ht);
672     SEXP cell;
673     for (cell = HASH_BUCKET(ht, pos); cell != R_NilValue; cell = CDR(cell))
674 	if (item == TAG(cell))
675 	    return INTEGER(CAR(cell))[0];
676     return 0;
677 }
678 
679 
680 /*
681  * Administrative SXP values
682  *
683  * These macros define SXP "type" for specifying special object, such
684  * as R_NilValue, or control information, like REFSXP or NAMESPACESXP.
685  * The range of SXP types is limited to 5 bit by the current sxpinfo
686  * layout, but just in case these values are placed at the top of the
687  * 8 bit range.
688  */
689 
690 #define REFSXP            255
691 #define NILVALUE_SXP      254
692 #define GLOBALENV_SXP     253
693 #define UNBOUNDVALUE_SXP  252
694 #define MISSINGARG_SXP    251
695 #define BASENAMESPACE_SXP 250
696 #define NAMESPACESXP      249
697 #define PACKAGESXP        248
698 #define PERSISTSXP        247
699 /* the following are speculative--we may or may not need them soon */
700 #define CLASSREFSXP       246
701 #define GENERICREFSXP     245
702 #define BCREPDEF          244
703 #define BCREPREF          243
704 #define EMPTYENV_SXP	  242
705 #define BASEENV_SXP	  241
706 
707 /* The following are needed to preserve attribute information on
708    expressions in the constant pool of byte code objects. This is
709    mainly for preserving source references attributes.  The original
710    implementation of the sharing-preserving writing and reading of byte
711    code objects did not account for the need to preserve attributes,
712    so there is now a work-around using these SXP types to flag when
713    the ATTRIB field has been written out. Object bits and S4 bits are
714    still not preserved.  In the long run it might be better to change
715    to a scheme in which all sharing is preserved and byte code objects
716    don't need to be handled as a special case.  LT */
717 #define ATTRLANGSXP       240
718 #define ATTRLISTSXP       239
719 
720 #define ALTREP_SXP	  238
721 
722 /*
723  * Type/Flag Packing and Unpacking
724  *
725  * To reduce space consumption for serializing code (lots of list
726  * structure) the type (at most 8 bits), several single bit flags,
727  * and the sxpinfo gp field (LEVELS, 16 bits) are packed into a single
728  * integer.  The integer is signed, so this shouldn't be pushed too
729  * far.  It assumes at least 28 bits, but that should be no problem.
730  */
731 
732 #define IS_OBJECT_BIT_MASK (1 << 8)
733 #define HAS_ATTR_BIT_MASK (1 << 9)
734 #define HAS_TAG_BIT_MASK (1 << 10)
735 #define ENCODE_LEVELS(v) ((v) << 12)
736 #define DECODE_LEVELS(v) ((v) >> 12)
737 #define DECODE_TYPE(v) ((v) & 255)
738 
PackFlags(int type,int levs,int isobj,int hasattr,int hastag)739 static int PackFlags(int type, int levs, int isobj, int hasattr, int hastag)
740 {
741     /* We don't write out bit 5 as from R 2.8.0.
742        It is used to indicate if an object is in CHARSXP cache
743        - not that it matters to this version of R, but it saves
744        checking all previous versions.
745 
746        Also make sure the HASHASH bit is not written out.
747     */
748     int val;
749     if (type == CHARSXP) levs &= (~(CACHED_MASK | HASHASH_MASK));
750     val = type | ENCODE_LEVELS(levs);
751     if (isobj) val |= IS_OBJECT_BIT_MASK;
752     if (hasattr) val |= HAS_ATTR_BIT_MASK;
753     if (hastag) val |= HAS_TAG_BIT_MASK;
754     return val;
755 }
756 
UnpackFlags(int flags,SEXPTYPE * ptype,int * plevs,int * pisobj,int * phasattr,int * phastag)757 static void UnpackFlags(int flags, SEXPTYPE *ptype, int *plevs,
758 			int *pisobj, int *phasattr, int *phastag)
759 {
760     *ptype = DECODE_TYPE(flags);
761     *plevs = DECODE_LEVELS(flags);
762     *pisobj = flags & IS_OBJECT_BIT_MASK ? TRUE : FALSE;
763     *phasattr = flags & HAS_ATTR_BIT_MASK ? TRUE : FALSE;
764     *phastag = flags & HAS_TAG_BIT_MASK ? TRUE : FALSE;
765 }
766 
767 
768 /*
769  * Reference/Index Packing and Unpacking
770  *
771  * Code will contain many references to symbols. As long as there are
772  * not too many references, the index ant the REFSXP flag indicating a
773  * reference can be packed in a single integer.  Since the index is
774  * 1-based, a 0 is used to indicate an index that doesn't fit and
775  * therefore follows.
776  */
777 
778 #define PACK_REF_INDEX(i) (((i) << 8) | REFSXP)
779 #define UNPACK_REF_INDEX(i) ((i) >> 8)
780 #define MAX_PACKED_INDEX (INT_MAX >> 8)
781 
OutRefIndex(R_outpstream_t stream,int i)782 static void OutRefIndex(R_outpstream_t stream, int i)
783 {
784     if (i > MAX_PACKED_INDEX) {
785 	OutInteger(stream, REFSXP);
786 	OutInteger(stream, i);
787     }
788     else OutInteger(stream, PACK_REF_INDEX(i));
789 }
790 
InRefIndex(R_inpstream_t stream,int flags)791 static int InRefIndex(R_inpstream_t stream, int flags)
792 {
793     int i = UNPACK_REF_INDEX(flags);
794     if (i == 0)
795 	return InInteger(stream);
796     else
797 	return i;
798 }
799 
800 
801 /*
802  * Persistent Name Hooks
803  *
804  * These routines call the appropriate hook functions for allowing
805  * customized handling of reference objects.
806  */
807 
GetPersistentName(R_outpstream_t stream,SEXP s)808 static SEXP GetPersistentName(R_outpstream_t stream, SEXP s)
809 {
810     if (stream->OutPersistHookFunc != NULL) {
811 	switch (TYPEOF(s)) {
812 	case WEAKREFSXP:
813 	case EXTPTRSXP: break;
814 	case ENVSXP:
815 	    if (s == R_GlobalEnv ||
816 		s == R_BaseEnv ||
817 		s == R_EmptyEnv ||
818 		R_IsNamespaceEnv(s) ||
819 		R_IsPackageEnv(s))
820 		return R_NilValue;
821 	    else
822 		break;
823 	default: return R_NilValue;
824 	}
825 	return stream->OutPersistHookFunc(s, stream->OutPersistHookData);
826     }
827     else
828 	return R_NilValue;
829 }
830 
PersistentRestore(R_inpstream_t stream,SEXP s)831 static SEXP PersistentRestore(R_inpstream_t stream, SEXP s)
832 {
833     if (stream->InPersistHookFunc == NULL)
834 	error(_("no restore method available"));
835     return stream->InPersistHookFunc(s, stream->InPersistHookData);
836 }
837 
838 
839 /*
840  * Serialization Code
841  */
842 
SaveSpecialHook(SEXP item)843 static int SaveSpecialHook(SEXP item)
844 {
845     if (item == R_NilValue)      return NILVALUE_SXP;
846     if (item == R_EmptyEnv)	 return EMPTYENV_SXP;
847     if (item == R_BaseEnv)	 return BASEENV_SXP;
848     if (item == R_GlobalEnv)     return GLOBALENV_SXP;
849     if (item == R_UnboundValue)  return UNBOUNDVALUE_SXP;
850     if (item == R_MissingArg)    return MISSINGARG_SXP;
851     if (item == R_BaseNamespace) return BASENAMESPACE_SXP;
852     return 0;
853 }
854 
WriteLENGTH(R_outpstream_t stream,SEXP s)855 static void WriteLENGTH(R_outpstream_t stream, SEXP s)
856 {
857 #ifdef LONG_VECTOR_SUPPORT
858     if (IS_LONG_VEC(s)) {
859 	OutInteger(stream, -1);
860 	R_xlen_t len = XLENGTH(s);
861 	OutInteger(stream, (int)(len / 4294967296L));
862 	OutInteger(stream, (int)(len % 4294967296L));
863    } else OutInteger(stream, LENGTH(s));
864 #else
865     OutInteger(stream, LENGTH(s));
866 #endif
867 }
868 
OutStringVec(R_outpstream_t stream,SEXP s,SEXP ref_table)869 static void OutStringVec(R_outpstream_t stream, SEXP s, SEXP ref_table)
870 {
871     R_assert(TYPEOF(s) == STRSXP);
872 
873 #ifdef WARN_ABOUT_NAMES_IN_PERSISTENT_STRINGS
874     SEXP names = getAttrib(s, R_NamesSymbol);
875     if (names != R_NilValue)
876 	warning(_("names in persistent strings are currently ignored"));
877 #endif
878 
879     R_xlen_t len = XLENGTH(s);
880     OutInteger(stream, 0); /* place holder to allow names if we want to */
881     WriteLENGTH(stream, s);
882     for (R_xlen_t i = 0; i < len; i++)
883 	WriteItem(STRING_ELT(s, i), ref_table, stream);
884 }
885 
886 #include <rpc/types.h>
887 #include <rpc/xdr.h>
888 
889 #define CHUNK_SIZE 8096
890 
891 #define min2(a, b) ((a) < (b)) ? (a) : (b)
892 
893 
894 static R_INLINE void
OutIntegerVec(R_outpstream_t stream,SEXP s,R_xlen_t length)895 OutIntegerVec(R_outpstream_t stream, SEXP s, R_xlen_t length)
896 {
897     switch (stream->type) {
898     case R_pstream_xdr_format:
899     {
900 	static char buf[CHUNK_SIZE * sizeof(int)];
901 	R_xlen_t done, this;
902 	XDR xdrs;
903 	for (done = 0; done < length; done += this) {
904 	    this = min2(CHUNK_SIZE, length - done);
905 	    xdrmem_create(&xdrs, buf, (int)(this * sizeof(int)), XDR_ENCODE);
906 	    for(int cnt = 0; cnt < this; cnt++)
907 		if(!xdr_int(&xdrs, INTEGER(s) + done + cnt))
908 		    error(_("XDR write failed"));
909 	    xdr_destroy(&xdrs);
910 	    stream->OutBytes(stream, buf, (int)(sizeof(int) * this));
911 	}
912 	break;
913     }
914     case R_pstream_binary_format:
915     {
916 	/* write in chunks to avoid overflowing ints */
917 	R_xlen_t done, this;
918 	for (done = 0; done < length; done += this) {
919 	    this = min2(CHUNK_SIZE, length - done);
920 	    stream->OutBytes(stream, INTEGER(s) + done,
921 			     (int)(sizeof(int) * this));
922 	}
923 	break;
924     }
925     default:
926 	for (R_xlen_t cnt = 0; cnt < length; cnt++)
927 	    OutInteger(stream, INTEGER(s)[cnt]);
928     }
929 }
930 
931 static R_INLINE void
OutRealVec(R_outpstream_t stream,SEXP s,R_xlen_t length)932 OutRealVec(R_outpstream_t stream, SEXP s, R_xlen_t length)
933 {
934     switch (stream->type) {
935     case R_pstream_xdr_format:
936     {
937 	static char buf[CHUNK_SIZE * sizeof(double)];
938 	R_xlen_t done, this;
939 	XDR xdrs;
940 	for (done = 0; done < length; done += this) {
941 	    this = min2(CHUNK_SIZE, length - done);
942 	    xdrmem_create(&xdrs, buf, (int)(this * sizeof(double)), XDR_ENCODE);
943 	    for(int cnt = 0; cnt < this; cnt++)
944 		if(!xdr_double(&xdrs, REAL(s) + done + cnt))
945 		    error(_("XDR write failed"));
946 	    xdr_destroy(&xdrs);
947 	    stream->OutBytes(stream, buf, (int)(sizeof(double) * this));
948 	}
949 	break;
950     }
951     case R_pstream_binary_format:
952     {
953 	R_xlen_t done, this;
954 	for (done = 0; done < length; done += this) {
955 	    this = min2(CHUNK_SIZE, length - done);
956 	    stream->OutBytes(stream, REAL(s) + done,
957 			     (int)(sizeof(double) * this));
958 	}
959 	break;
960     }
961     default:
962 	for (R_xlen_t cnt = 0; cnt < length; cnt++)
963 	    OutReal(stream, REAL(s)[cnt]);
964     }
965 }
966 
967 static R_INLINE void
OutComplexVec(R_outpstream_t stream,SEXP s,R_xlen_t length)968 OutComplexVec(R_outpstream_t stream, SEXP s, R_xlen_t length)
969 {
970     switch (stream->type) {
971     case R_pstream_xdr_format:
972     {
973 	static char buf[CHUNK_SIZE * sizeof(Rcomplex)];
974 	R_xlen_t done, this;
975 	XDR xdrs;
976 	Rcomplex *c = COMPLEX(s);
977 	for (done = 0; done < length; done += this) {
978 	    this = min2(CHUNK_SIZE, length - done);
979 	    xdrmem_create(&xdrs, buf, (int)(this * sizeof(Rcomplex)), XDR_ENCODE);
980 	    for(int cnt = 0; cnt < this; cnt++) {
981 		if(!xdr_double(&xdrs, &(c[done+cnt].r)) ||
982 		   !xdr_double(&xdrs, &(c[done+cnt].i)))
983 		    error(_("XDR write failed"));
984 	    }
985 	    stream->OutBytes(stream, buf, (int)(sizeof(Rcomplex) * this));
986 	    xdr_destroy(&xdrs);
987 	}
988 	break;
989     }
990     case R_pstream_binary_format:
991     {
992 	R_xlen_t done, this;
993 	for (done = 0; done < length; done += this) {
994 	    this = min2(CHUNK_SIZE, length - done);
995 	    stream->OutBytes(stream, COMPLEX(s) + done,
996 			     (int)(sizeof(Rcomplex) * this));
997 	}
998 	break;
999     }
1000     default:
1001 	for (R_xlen_t cnt = 0; cnt < length; cnt++)
1002 	    OutComplex(stream, COMPLEX(s)[cnt]);
1003     }
1004 }
1005 
WriteItem(SEXP s,SEXP ref_table,R_outpstream_t stream)1006 static void WriteItem (SEXP s, SEXP ref_table, R_outpstream_t stream)
1007 {
1008     int i;
1009     SEXP t;
1010 
1011     if (R_compile_pkgs && TYPEOF(s) == CLOSXP && TYPEOF(BODY(s)) != BCODESXP &&
1012         !R_disable_bytecode &&
1013         (!IS_S4_OBJECT(s) || (!inherits(s, "refMethodDef") &&
1014                               !inherits(s, "defaultBindingFunction")))) {
1015 
1016 	/* Do not compile reference class methods in their generators, because
1017 	   the byte-code is dropped as soon as the method is installed into a
1018 	   new environment. This is a performance optimization but it also
1019 	   prevents byte-compiler warnings about no visible binding for super
1020 	   assignment to a class field.
1021 
1022 	   Do not compile default binding functions, because the byte-code is
1023 	   dropped as fields are set in constructors (just an optimization).
1024 	*/
1025 
1026 	SEXP new_s;
1027 	R_compile_pkgs = FALSE;
1028 	PROTECT(new_s = R_cmpfun1(s));
1029 	WriteItem (new_s, ref_table, stream);
1030 	UNPROTECT(1);
1031 	R_compile_pkgs = TRUE;
1032 	return;
1033     }
1034 
1035  tailcall:
1036     R_CheckStack();
1037     if (ALTREP(s) && stream->version >= 3) {
1038 	SEXP info = ALTREP_SERIALIZED_CLASS(s);
1039 	SEXP state = ALTREP_SERIALIZED_STATE(s);
1040 	if (info != NULL && state != NULL) {
1041 	    int flags = PackFlags(ALTREP_SXP, LEVELS(s), OBJECT(s), 0, 0);
1042 	    PROTECT(state);
1043 	    PROTECT(info);
1044 	    OutInteger(stream, flags);
1045 	    WriteItem(info, ref_table, stream);
1046 	    WriteItem(state, ref_table, stream);
1047 	    WriteItem(ATTRIB(s), ref_table, stream);
1048 	    UNPROTECT(2); /* state, info */
1049 	    return;
1050 	}
1051 	/* else fall through to standard processing */
1052     }
1053     if ((t = GetPersistentName(stream, s)) != R_NilValue) {
1054 	R_assert(TYPEOF(t) == STRSXP && LENGTH(t) > 0);
1055 	PROTECT(t);
1056 	HashAdd(s, ref_table);
1057 	OutInteger(stream, PERSISTSXP);
1058 	OutStringVec(stream, t, ref_table);
1059 	UNPROTECT(1);
1060     }
1061     else if ((i = SaveSpecialHook(s)) != 0)
1062 	OutInteger(stream, i);
1063     else if ((i = HashGet(s, ref_table)) != 0)
1064 	OutRefIndex(stream, i);
1065     else if (TYPEOF(s) == SYMSXP) {
1066 	/* Note : NILSXP can't occur here */
1067 	HashAdd(s, ref_table);
1068 	OutInteger(stream, SYMSXP);
1069 	WriteItem(PRINTNAME(s), ref_table, stream);
1070     }
1071     else if (TYPEOF(s) == ENVSXP) {
1072 	HashAdd(s, ref_table);
1073 	if (R_IsPackageEnv(s)) {
1074 	    SEXP name = R_PackageEnvName(s);
1075 	    const void *vmax = vmaxget();
1076 	    warning(_("'%s' may not be available when loading"),
1077 		    translateChar(STRING_ELT(name, 0)));
1078 	    vmaxset(vmax);
1079 	    OutInteger(stream, PACKAGESXP);
1080 	    OutStringVec(stream, name, ref_table);
1081 	}
1082 	else if (R_IsNamespaceEnv(s)) {
1083 #ifdef WARN_ABOUT_NAME_SPACES_MAYBE_NOT_AVAILABLE
1084 	    warning(_("namespaces may not be available when loading"));
1085 #endif
1086 	    OutInteger(stream, NAMESPACESXP);
1087 	    OutStringVec(stream, PROTECT(R_NamespaceEnvSpec(s)), ref_table);
1088 	    UNPROTECT(1);
1089 	}
1090 	else {
1091 	    OutInteger(stream, ENVSXP);
1092 	    OutInteger(stream, R_EnvironmentIsLocked(s) ? 1 : 0);
1093 	    WriteItem(ENCLOS(s), ref_table, stream);
1094 	    WriteItem(FRAME(s), ref_table, stream);
1095 	    WriteItem(HASHTAB(s), ref_table, stream);
1096 	    WriteItem(ATTRIB(s), ref_table, stream);
1097 	}
1098     }
1099     else {
1100 	int flags, hastag, hasattr;
1101 	R_xlen_t len;
1102 	switch(TYPEOF(s)) {
1103 	case LISTSXP:
1104 	case LANGSXP:
1105 	case PROMSXP:
1106 	case DOTSXP: hastag = TAG(s) != R_NilValue; break;
1107 	case CLOSXP: hastag = TRUE; break;
1108 	default: hastag = FALSE;
1109 	}
1110 	/* With the CHARSXP cache chains maintained through the ATTRIB
1111 	   field the content of that field must not be serialized, so
1112 	   we treat it as not there. */
1113 	hasattr = (TYPEOF(s) != CHARSXP && ATTRIB(s) != R_NilValue);
1114 	flags = PackFlags(TYPEOF(s), LEVELS(s), OBJECT(s),
1115 			  hasattr, hastag);
1116 	OutInteger(stream, flags);
1117 	switch (TYPEOF(s)) {
1118 	case LISTSXP:
1119 	case LANGSXP:
1120 	case PROMSXP:
1121 	case DOTSXP:
1122 	    /* Dotted pair objects */
1123 	    /* These write their ATTRIB fields first to allow us to avoid
1124 	       recursion on the CDR */
1125 	    if (hasattr)
1126 		WriteItem(ATTRIB(s), ref_table, stream);
1127 	    if (TAG(s) != R_NilValue)
1128 		WriteItem(TAG(s), ref_table, stream);
1129 	    if (BNDCELL_TAG(s))
1130 		R_expand_binding_value(s);
1131 	    WriteItem(CAR(s), ref_table, stream);
1132 	    /* now do a tail call to WriteItem to handle the CDR */
1133 	    s = CDR(s);
1134 	    goto tailcall;
1135 	case CLOSXP:
1136 	    /* Like a dotted pair object */
1137 	    /* Write the ATTRIB field first to allow us to avoid
1138 	       recursion on the CDR/BODY */
1139 	    if (hasattr)
1140 		WriteItem(ATTRIB(s), ref_table, stream);
1141 	    WriteItem(CLOENV(s), ref_table, stream);
1142 	    WriteItem(FORMALS(s), ref_table, stream);
1143 	    /* now do a tail call to WriteItem to handle the CDR/BODY */
1144 	    s = BODY(s);
1145 	    goto tailcall;
1146 	case EXTPTRSXP:
1147 	    /* external pointers */
1148 	    HashAdd(s, ref_table);
1149 	    WriteItem(EXTPTR_PROT(s), ref_table, stream);
1150 	    WriteItem(EXTPTR_TAG(s), ref_table, stream);
1151 	    break;
1152 	case WEAKREFSXP:
1153 	    /* Weak references */
1154 	    HashAdd(s, ref_table);
1155 	    break;
1156 	case SPECIALSXP:
1157 	case BUILTINSXP:
1158 	    /* Builtin functions */
1159 	    OutInteger(stream, (int)strlen(PRIMNAME(s)));
1160 	    OutString(stream, PRIMNAME(s), (int)strlen(PRIMNAME(s)));
1161 	    break;
1162 	case CHARSXP:
1163 	    if (s == NA_STRING)
1164 		OutInteger(stream, -1);
1165 	    else {
1166 		OutInteger(stream, LENGTH(s));
1167 		OutString(stream, CHAR(s), LENGTH(s));
1168 	    }
1169 	    break;
1170 	case LGLSXP:
1171 	case INTSXP:
1172 	    len = XLENGTH(s);
1173 	    WriteLENGTH(stream, s);
1174 	    OutIntegerVec(stream, s, len);
1175 	    break;
1176 	case REALSXP:
1177 	    len = XLENGTH(s);
1178 	    WriteLENGTH(stream, s);
1179 	    OutRealVec(stream, s, len);
1180 	    break;
1181 	case CPLXSXP:
1182 	    len = XLENGTH(s);
1183 	    WriteLENGTH(stream, s);
1184 	    OutComplexVec(stream, s, len);
1185 	    break;
1186 	case STRSXP:
1187 	    len = XLENGTH(s);
1188 	    WriteLENGTH(stream, s);
1189 	    for (R_xlen_t ix = 0; ix < len; ix++)
1190 		WriteItem(STRING_ELT(s, ix), ref_table, stream);
1191 	    break;
1192 	case VECSXP:
1193 	case EXPRSXP:
1194 	    len = XLENGTH(s);
1195 	    WriteLENGTH(stream, s);
1196 	    for (R_xlen_t ix = 0; ix < len; ix++)
1197 		WriteItem(VECTOR_ELT(s, ix), ref_table, stream);
1198 	    break;
1199 	case BCODESXP:
1200 	    WriteBC(s, ref_table, stream);
1201 	    break;
1202 	case RAWSXP:
1203 	    len = XLENGTH(s);
1204 	    WriteLENGTH(stream, s);
1205 	    switch (stream->type) {
1206 	    case R_pstream_xdr_format:
1207 	    case R_pstream_binary_format:
1208 	    {
1209 		R_xlen_t done, this;
1210 		for (done = 0; done < len; done += this) {
1211 		    this = min2(CHUNK_SIZE, len - done);
1212 		    stream->OutBytes(stream, RAW(s) + done, (int) this);
1213 		}
1214 		break;
1215 	    }
1216 	    default:
1217 		for (R_xlen_t ix = 0; ix < len; ix++)
1218 		    OutByte(stream, RAW(s)[ix]);
1219 	    }
1220 	    break;
1221 	case S4SXP:
1222 	  break; /* only attributes (i.e., slots) count */
1223 	default:
1224 	    error(_("WriteItem: unknown type %i"), TYPEOF(s));
1225 	}
1226 	if (hasattr)
1227 	    WriteItem(ATTRIB(s), ref_table, stream);
1228     }
1229 }
1230 
MakeCircleHashTable(void)1231 static SEXP MakeCircleHashTable(void)
1232 {
1233     return CONS(R_NilValue, allocVector(VECSXP, HASHSIZE));
1234 }
1235 
AddCircleHash(SEXP item,SEXP ct)1236 static Rboolean AddCircleHash(SEXP item, SEXP ct)
1237 {
1238     SEXP table, bucket, list;
1239 
1240     table = CDR(ct);
1241     R_size_t pos = PTRHASH(item) % LENGTH(table);
1242     bucket = VECTOR_ELT(table, pos);
1243     for (list = bucket; list != R_NilValue; list = CDR(list))
1244 	if (TAG(list) == item) {
1245 	    if (CAR(list) == R_NilValue) {
1246 		/* this is the second time; enter in list and mark */
1247 		SETCAR(list, R_UnboundValue); /* anything different will do */
1248 		SETCAR(ct, CONS(item, CAR(ct)));
1249 	    }
1250 	    return TRUE;
1251 	}
1252 
1253     /* If we get here then this is a new item; enter in the table */
1254     bucket = CONS(R_NilValue, bucket);
1255     SET_TAG(bucket, item);
1256     SET_VECTOR_ELT(table, pos, bucket);
1257     return FALSE;
1258 }
1259 
ScanForCircles1(SEXP s,SEXP ct)1260 static void ScanForCircles1(SEXP s, SEXP ct)
1261 {
1262     switch (TYPEOF(s)) {
1263     case LANGSXP:
1264     case LISTSXP:
1265 	if (! AddCircleHash(s, ct)) {
1266 	    ScanForCircles1(CAR(s), ct);
1267 	    ScanForCircles1(CDR(s), ct);
1268 	}
1269 	break;
1270     case BCODESXP:
1271 	{
1272 	    int i, n;
1273 	    SEXP consts = BCODE_CONSTS(s);
1274 	    n = LENGTH(consts);
1275 	    for (i = 0; i < n; i++)
1276 		ScanForCircles1(VECTOR_ELT(consts, i), ct);
1277 	}
1278 	break;
1279     default: break;
1280     }
1281 }
1282 
ScanForCircles(SEXP s)1283 static SEXP ScanForCircles(SEXP s)
1284 {
1285     SEXP ct;
1286     PROTECT(ct = MakeCircleHashTable());
1287     ScanForCircles1(s, ct);
1288     UNPROTECT(1);
1289     return CAR(ct);
1290 }
1291 
findrep(SEXP x,SEXP reps)1292 static SEXP findrep(SEXP x, SEXP reps)
1293 {
1294     for (; reps != R_NilValue; reps = CDR(reps))
1295 	if (x == CAR(reps))
1296 	    return reps;
1297     return R_NilValue;
1298 }
1299 
WriteBCLang(SEXP s,SEXP ref_table,SEXP reps,R_outpstream_t stream)1300 static void WriteBCLang(SEXP s, SEXP ref_table, SEXP reps,
1301 			R_outpstream_t stream)
1302 {
1303     int type = TYPEOF(s);
1304     if (type == LANGSXP || type == LISTSXP) {
1305 	SEXP r = findrep(s, reps);
1306 	int output = TRUE;
1307 	if (r != R_NilValue) {
1308 	    /* we have a cell referenced more than once */
1309 	    if (TAG(r) == R_NilValue) {
1310 		/* this is the first reference, so update and register
1311 		   the counter */
1312 		int i = INTEGER(CAR(reps))[0]++;
1313 		SET_TAG(r, allocVector(INTSXP, 1));
1314 		INTEGER(TAG(r))[0] = i;
1315 		OutInteger(stream, BCREPDEF);
1316 		OutInteger(stream, i);
1317 	    }
1318 	    else {
1319 		/* we've seen it before, so just put out the index */
1320 		OutInteger(stream, BCREPREF);
1321 		OutInteger(stream, INTEGER(TAG(r))[0]);
1322 		output = FALSE;
1323 	    }
1324 	}
1325 	if (output) {
1326 	    SEXP attr = ATTRIB(s);
1327 	    if (attr != R_NilValue) {
1328 		switch(type) {
1329 		case LANGSXP: type = ATTRLANGSXP; break;
1330 		case LISTSXP: type = ATTRLISTSXP; break;
1331 		}
1332 	    }
1333 	    OutInteger(stream, type);
1334 	    if (attr != R_NilValue)
1335 		WriteItem(attr, ref_table, stream);
1336 	    WriteItem(TAG(s), ref_table, stream);
1337 	    WriteBCLang(CAR(s), ref_table, reps, stream);
1338 	    WriteBCLang(CDR(s), ref_table, reps, stream);
1339 	}
1340     }
1341     else {
1342 	OutInteger(stream, 0); /* pad */
1343 	WriteItem(s, ref_table, stream);
1344     }
1345 }
1346 
WriteBC1(SEXP s,SEXP ref_table,SEXP reps,R_outpstream_t stream)1347 static void WriteBC1(SEXP s, SEXP ref_table, SEXP reps, R_outpstream_t stream)
1348 {
1349     int i, n;
1350     SEXP code, consts;
1351     PROTECT(code = R_bcDecode(BCODE_CODE(s)));
1352     WriteItem(code, ref_table, stream);
1353     consts = BCODE_CONSTS(s);
1354     n = LENGTH(consts);
1355     OutInteger(stream, n);
1356     for (i = 0; i < n; i++) {
1357 	SEXP c = VECTOR_ELT(consts, i);
1358 	int type = TYPEOF(c);
1359 	switch (type) {
1360 	case BCODESXP:
1361 	    OutInteger(stream, type);
1362 	    WriteBC1(c, ref_table, reps, stream);
1363 	    break;
1364 	case LANGSXP:
1365 	case LISTSXP:
1366 	    WriteBCLang(c, ref_table, reps, stream);
1367 	    break;
1368 	default:
1369 	    OutInteger(stream, type);
1370 	    WriteItem(c, ref_table, stream);
1371 	}
1372     }
1373     UNPROTECT(1);
1374 }
1375 
WriteBC(SEXP s,SEXP ref_table,R_outpstream_t stream)1376 static void WriteBC(SEXP s, SEXP ref_table, R_outpstream_t stream)
1377 {
1378     SEXP reps = ScanForCircles(s);
1379     PROTECT(reps = CONS(R_NilValue, reps));
1380     OutInteger(stream, length(reps));
1381     SETCAR(reps, allocVector(INTSXP, 1));
1382     INTEGER(CAR(reps))[0] = 0;
1383     WriteBC1(s, ref_table, reps, stream);
1384     UNPROTECT(1);
1385 }
1386 
R_Serialize(SEXP s,R_outpstream_t stream)1387 void R_Serialize(SEXP s, R_outpstream_t stream)
1388 {
1389     SEXP ref_table;
1390     int version = stream->version;
1391 
1392     OutFormat(stream);
1393 
1394     switch(version) {
1395     case 2:
1396 	OutInteger(stream, version);
1397 	OutInteger(stream, R_VERSION);
1398 	OutInteger(stream, R_Version(2,3,0));
1399 	break;
1400     case 3:
1401     {
1402 	OutInteger(stream, version);
1403 	OutInteger(stream, R_VERSION);
1404 	OutInteger(stream, R_Version(3,5,0));
1405 	const char *natenc = R_nativeEncoding();
1406 	int nelen = (int) strlen(natenc);
1407 	OutInteger(stream, nelen);
1408 	OutString(stream, natenc, nelen);
1409 	break;
1410     }
1411     default: error(_("version %d not supported"), version);
1412     }
1413 
1414     PROTECT(ref_table = MakeHashTable());
1415     WriteItem(s, ref_table, stream);
1416     UNPROTECT(1);
1417 }
1418 
1419 
1420 /*
1421  * Unserialize Code
1422  */
1423 
1424 // used in saveload.c
1425 attribute_hidden int R_ReadItemDepth = 0, R_InitReadItemDepth;
1426 
1427 static char lastname[8192] = "<unknown>";
1428 
1429 #define INITIAL_REFREAD_TABLE_SIZE 128
1430 
MakeReadRefTable(void)1431 static SEXP MakeReadRefTable(void)
1432 {
1433     SEXP data = allocVector(VECSXP, INITIAL_REFREAD_TABLE_SIZE);
1434     SET_TRUELENGTH(data, 0);
1435     return CONS(data, R_NilValue);
1436 }
1437 
GetReadRef(SEXP table,int index)1438 static SEXP GetReadRef(SEXP table, int index)
1439 {
1440     int i = index - 1;
1441     SEXP data = CAR(table);
1442 
1443     if (i < 0 || i >= LENGTH(data))
1444 	error(_("reference index out of range"));
1445     return VECTOR_ELT(data, i);
1446 }
1447 
AddReadRef(SEXP table,SEXP value)1448 static void AddReadRef(SEXP table, SEXP value)
1449 {
1450     SEXP data = CAR(table);
1451     R_xlen_t count = TRUELENGTH(data) + 1;
1452     if (count >= LENGTH(data)) {
1453 	R_xlen_t i, len;
1454 	SEXP newdata;
1455 
1456 	PROTECT(value);
1457 	len = 2 * count;
1458 	newdata = allocVector(VECSXP, len);
1459 	for (i = 0; i < LENGTH(data); i++)
1460 	    SET_VECTOR_ELT(newdata, i, VECTOR_ELT(data, i));
1461 	SETCAR(table, newdata);
1462 	data = newdata;
1463 	UNPROTECT(1);
1464     }
1465     SET_TRUELENGTH(data, count);
1466     SET_VECTOR_ELT(data, count - 1, value);
1467 }
1468 
InStringVec(R_inpstream_t stream,SEXP ref_table)1469 static SEXP InStringVec(R_inpstream_t stream, SEXP ref_table)
1470 {
1471     SEXP s;
1472     int i, len;
1473     if (InInteger(stream) != 0)
1474 	error(_("names in persistent strings are not supported yet"));
1475     len = InInteger(stream);
1476     PROTECT(s = allocVector(STRSXP, len));
1477     R_ReadItemDepth++;
1478     for (i = 0; i < len; i++)
1479 	SET_STRING_ELT(s, i, ReadItem(ref_table, stream));
1480     R_ReadItemDepth--;
1481     UNPROTECT(1);
1482     return s;
1483 }
1484 
1485 /* use static buffer to reuse storage */
1486 static R_INLINE void
InIntegerVec(R_inpstream_t stream,SEXP obj,R_xlen_t length)1487 InIntegerVec(R_inpstream_t stream, SEXP obj, R_xlen_t length)
1488 {
1489     switch (stream->type) {
1490     case R_pstream_xdr_format:
1491     {
1492 	static char buf[CHUNK_SIZE * sizeof(int)];
1493 	R_xlen_t done, this;
1494 	XDR xdrs;
1495 	for (done = 0; done < length; done += this) {
1496 	    this = min2(CHUNK_SIZE, length - done);
1497 	    stream->InBytes(stream, buf, (int)(sizeof(int) * this));
1498 	    xdrmem_create(&xdrs, buf, (int)(this * sizeof(int)), XDR_DECODE);
1499 	    for(int cnt = 0; cnt < this; cnt++)
1500 		if(!xdr_int(&xdrs, INTEGER(obj) + done + cnt))
1501 		    error(_("XDR read failed"));
1502 	    xdr_destroy(&xdrs);
1503 	}
1504 	break;
1505     }
1506     case R_pstream_binary_format:
1507     {
1508 	R_xlen_t done, this;
1509 	for (done = 0; done < length; done += this) {
1510 	    this = min2(CHUNK_SIZE, length - done);
1511 	    stream->InBytes(stream, INTEGER(obj) + done,
1512 			    (int)(sizeof(int) * this));
1513 	}
1514 	break;
1515     }
1516     default:
1517 	for (R_xlen_t cnt = 0; cnt < length; cnt++)
1518 	    INTEGER(obj)[cnt] = InInteger(stream);
1519     }
1520 }
1521 
1522 static R_INLINE void
InRealVec(R_inpstream_t stream,SEXP obj,R_xlen_t length)1523 InRealVec(R_inpstream_t stream, SEXP obj, R_xlen_t length)
1524 {
1525     switch (stream->type) {
1526     case R_pstream_xdr_format:
1527     {
1528 	static char buf[CHUNK_SIZE * sizeof(double)];
1529 	R_xlen_t done, this;
1530 	XDR xdrs;
1531 	for (done = 0; done < length; done += this) {
1532 	    this = min2(CHUNK_SIZE, length - done);
1533 	    stream->InBytes(stream, buf, (int)(sizeof(double) * this));
1534 	    xdrmem_create(&xdrs, buf, (int)(this * sizeof(double)), XDR_DECODE);
1535 	    for(R_xlen_t cnt = 0; cnt < this; cnt++)
1536 		if(!xdr_double(&xdrs, REAL(obj) + done + cnt))
1537 		    error(_("XDR read failed"));
1538 	    xdr_destroy(&xdrs);
1539 	}
1540 	break;
1541     }
1542     case R_pstream_binary_format:
1543     {
1544 	R_xlen_t done, this;
1545 	for (done = 0; done < length; done += this) {
1546 	    this = min2(CHUNK_SIZE, length - done);
1547 	    stream->InBytes(stream, REAL(obj) + done,
1548 			    (int)(sizeof(double) * this));
1549 	}
1550 	break;
1551     }
1552     default:
1553 	for (R_xlen_t cnt = 0; cnt < length; cnt++)
1554 	    REAL(obj)[cnt] = InReal(stream);
1555     }
1556 }
1557 
1558 static R_INLINE void
InComplexVec(R_inpstream_t stream,SEXP obj,R_xlen_t length)1559 InComplexVec(R_inpstream_t stream, SEXP obj, R_xlen_t length)
1560 {
1561     switch (stream->type) {
1562     case R_pstream_xdr_format:
1563     {
1564 	static char buf[CHUNK_SIZE * sizeof(Rcomplex)];
1565 	R_xlen_t done, this;
1566 	XDR xdrs;
1567 	Rcomplex *output = COMPLEX(obj);
1568 	for (done = 0; done < length; done += this) {
1569 	    this = min2(CHUNK_SIZE, length - done);
1570 	    stream->InBytes(stream, buf, (int)(sizeof(Rcomplex) * this));
1571 	    xdrmem_create(&xdrs, buf, (int)(this * sizeof(Rcomplex)), XDR_DECODE);
1572 	    for(R_xlen_t cnt = 0; cnt < this; cnt++) {
1573 		if(!xdr_double(&xdrs, &(output[done+cnt].r)) ||
1574 		   !xdr_double(&xdrs, &(output[done+cnt].i)))
1575 		    error(_("XDR read failed"));
1576 	    }
1577 	    xdr_destroy(&xdrs);
1578 	}
1579 	break;
1580     }
1581     case R_pstream_binary_format:
1582     {
1583 	R_xlen_t done, this;
1584 	for (done = 0; done < length; done += this) {
1585 	    this = min2(CHUNK_SIZE, length - done);
1586 	    stream->InBytes(stream, COMPLEX(obj) + done,
1587 			    (int)(sizeof(Rcomplex) * this));
1588 	}
1589 	break;
1590     }
1591     default:
1592 	for (R_xlen_t cnt = 0; cnt < length; cnt++)
1593 	    COMPLEX(obj)[cnt] = InComplex(stream);
1594     }
1595 }
1596 
TryConvertString(void * obj,const char * inp,size_t inplen,char * buf,size_t * bufleft)1597 static int TryConvertString(void *obj, const char *inp, size_t inplen,
1598                             char *buf, size_t *bufleft)
1599 {
1600     if (Riconv(obj, NULL, NULL, &buf, bufleft) == -1)
1601 	return -1;
1602     return (int) Riconv(obj, &inp, &inplen, &buf, bufleft);
1603 }
1604 
1605 static SEXP
ConvertChar(void * obj,char * inp,size_t inplen,cetype_t enc)1606 ConvertChar(void *obj, char *inp, size_t inplen, cetype_t enc)
1607 {
1608     size_t buflen = inplen;
1609 
1610     for(;;) {
1611 	size_t bufleft = buflen;
1612 	if (buflen < 1000) {
1613 	    char buf[buflen + 1];
1614 	    if (TryConvertString(obj, inp, inplen, buf, &bufleft) == -1) {
1615 		if (errno == E2BIG) {
1616 		    buflen *= 2;
1617 		    continue;
1618 		} else
1619 		    return R_NilValue;
1620 	    }
1621 	    return mkCharLenCE(buf, (int)(buflen - bufleft), enc);
1622 	} else {
1623 	    char *buf = CallocCharBuf(buflen);
1624 	    if (TryConvertString(obj, inp, inplen, buf, &bufleft) == -1) {
1625 		Free(buf);
1626 		if (errno == E2BIG) {
1627 		    buflen *= 2;
1628 		    continue;
1629 		} else
1630 		    return R_NilValue;
1631 	    }
1632 	    SEXP ans = mkCharLenCE(buf, (int)(buflen - bufleft), enc);
1633 	    Free(buf);
1634 	    return ans;
1635 	}
1636     }
1637 }
1638 
native_fromcode(R_inpstream_t stream)1639 static char *native_fromcode(R_inpstream_t stream)
1640 {
1641     char *from = stream->native_encoding;
1642 #ifdef HAVE_ICONV_CP1252
1643     if (!strcmp(from, "ISO-8859-1"))
1644 	from = "CP1252";
1645 #endif
1646     return from;
1647 }
1648 
1649 /* Read string into pre-allocated buffer, convert encoding if necessary, and
1650    return a CHARSXP */
1651 static SEXP
ReadChar(R_inpstream_t stream,char * buf,int length,int levs)1652 ReadChar(R_inpstream_t stream, char *buf, int length, int levs)
1653 {
1654     InString(stream, buf, length);
1655     buf[length] = '\0';
1656     if (levs & UTF8_MASK)
1657 	return mkCharLenCE(buf, length, CE_UTF8);
1658     if (levs & LATIN1_MASK)
1659 	return mkCharLenCE(buf, length, CE_LATIN1);
1660     if (levs & BYTES_MASK)
1661 	return mkCharLenCE(buf, length, CE_BYTES);
1662     if (levs & ASCII_MASK)
1663 	return mkCharLenCE(buf, length, CE_NATIVE);
1664 
1665     /* native encoding, not ascii */
1666     if (!stream->native_encoding[0] || /* original native encoding unknown */
1667         (stream->nat2nat_obj == (void *)-1 && /* translation impossible or disabled */
1668          stream->nat2utf8_obj == (void *)-1))
1669 	return mkCharLenCE(buf, length, CE_NATIVE);
1670     /* try converting to native encoding */
1671     if (!stream->nat2nat_obj &&
1672         !strcmp(stream->native_encoding, R_nativeEncoding())) {
1673 	/* No translation needed. Performance optimization but also leaves
1674 	   invalid strings in their encoding undetected. */
1675 	stream->nat2nat_obj = (void *)-1;
1676 	stream->nat2utf8_obj = (void *)-1;
1677 #ifdef WARN_DESERIALIZE_INVALID_UTF8
1678 	if (known_to_be_utf8 && !utf8Valid(buf))
1679 	    warning(_("deserializing invalid UTF-8 string '%s'"), buf);
1680 #endif
1681     }
1682     if (!stream->nat2nat_obj) {
1683 	char *from = native_fromcode(stream);
1684 	stream->nat2nat_obj = Riconv_open("", from);
1685 	if (stream->nat2nat_obj == (void *)-1)
1686 	    warning(_("unsupported conversion from '%s' to '%s'"), from, "");
1687     }
1688     if (stream->nat2nat_obj != (void *)-1) {
1689 	cetype_t enc = CE_NATIVE;
1690 	if (known_to_be_utf8) enc = CE_UTF8;
1691 	else if (known_to_be_latin1) enc = CE_LATIN1;
1692 	SEXP ans = ConvertChar(stream->nat2nat_obj, buf, length, enc);
1693 	if (ans != R_NilValue)
1694 	    return ans;
1695 	if (known_to_be_utf8) {
1696 	    /* nat2nat_obj is converting to UTF-8, no need to use nat2utf8_obj */
1697 	    stream->nat2utf8_obj = (void *)-1;
1698 	    char *from = native_fromcode(stream);
1699 	    warning(_("input string '%s' cannot be translated to UTF-8, is it valid in '%s'?"),
1700 	            buf, from);
1701 	}
1702     }
1703     /* try converting to UTF-8 */
1704     if (!stream->nat2utf8_obj) {
1705 	char *from = native_fromcode(stream);
1706 	stream->nat2utf8_obj = Riconv_open("UTF-8", from);
1707 	if (stream->nat2utf8_obj == (void *)-1) {
1708 	    /* very unlikely */
1709 	    warning(_("unsupported conversion from '%s' to '%s'"),
1710 	            from, "UTF-8");
1711 	    warning(_("strings not representable in native encoding will not be translated"));
1712 	} else
1713 	    warning(_("strings not representable in native encoding will be translated to UTF-8"));
1714     }
1715     if (stream->nat2utf8_obj != (void *)-1) {
1716 	SEXP ans = ConvertChar(stream->nat2utf8_obj, buf, length, CE_UTF8);
1717 	if (ans != R_NilValue)
1718 	    return ans;
1719 	char *from = native_fromcode(stream);
1720 	warning(_("input string '%s' cannot be translated to UTF-8, is it valid in '%s' ?"),
1721 	        buf, from);
1722     }
1723     /* no translation possible */
1724     return mkCharLenCE(buf, length, CE_NATIVE);
1725 }
1726 
ReadLENGTH(R_inpstream_t stream)1727 static R_xlen_t ReadLENGTH (R_inpstream_t stream)
1728 {
1729     int len = InInteger(stream);
1730 #ifdef LONG_VECTOR_SUPPORT
1731     if (len < -1)
1732 	error(_("negative serialized length for vector"));
1733     if (len == -1) {
1734 	unsigned int len1, len2;
1735 	len1 = InInteger(stream); /* upper part */
1736 	len2 = InInteger(stream); /* lower part */
1737 	R_xlen_t xlen = len1;
1738 	/* sanity check for now */
1739 	if (len1 > 65536)
1740 	    error (_("invalid upper part of serialized vector length"));
1741 	return (xlen << 32) + len2;
1742     } else return len;
1743 #else
1744     if (len < 0)
1745 	error(_("negative serialized vector length:\nperhaps long vector from 64-bit version of R?"));
1746     return len;
1747 #endif
1748 }
1749 
1750 /* differs when it fails from version in envir.c */
R_FindNamespace1(SEXP info)1751 static SEXP R_FindNamespace1(SEXP info)
1752 {
1753     SEXP expr, val, where;
1754     PROTECT(info);
1755     where = PROTECT(ScalarString(mkChar(lastname)));
1756     SEXP s_getNamespace = install("..getNamespace");
1757     PROTECT(expr = LCONS(s_getNamespace,
1758 			 LCONS(info, LCONS(where, R_NilValue))));
1759     val = eval(expr, R_GlobalEnv);
1760     UNPROTECT(3);
1761     return val;
1762 }
1763 
1764 
ReadItem(SEXP ref_table,R_inpstream_t stream)1765 static SEXP ReadItem (SEXP ref_table, R_inpstream_t stream)
1766 {
1767     SEXPTYPE type;
1768     SEXP s;
1769     R_xlen_t len, count;
1770     int flags, levs, objf, hasattr, hastag, length;
1771 
1772     R_assert(TYPEOF(ref_table) == LISTSXP && TYPEOF(CAR(ref_table)) == VECSXP);
1773 
1774     flags = InInteger(stream);
1775     UnpackFlags(flags, &type, &levs, &objf, &hasattr, &hastag);
1776 
1777     switch(type) {
1778     case NILVALUE_SXP:      return R_NilValue;
1779     case EMPTYENV_SXP:	    return R_EmptyEnv;
1780     case BASEENV_SXP:	    return R_BaseEnv;
1781     case GLOBALENV_SXP:     return R_GlobalEnv;
1782     case UNBOUNDVALUE_SXP:  return R_UnboundValue;
1783     case MISSINGARG_SXP:    return R_MissingArg;
1784     case BASENAMESPACE_SXP:
1785 	return R_BaseNamespace;
1786     case REFSXP:
1787 	return GetReadRef(ref_table, InRefIndex(stream, flags));
1788     case PERSISTSXP:
1789 	PROTECT(s = InStringVec(stream, ref_table));
1790 	s = PersistentRestore(stream, s);
1791 	UNPROTECT(1);
1792 	AddReadRef(ref_table, s);
1793 	return s;
1794     case ALTREP_SXP:
1795 	{
1796 	    R_ReadItemDepth++;
1797 	    SEXP info = PROTECT(ReadItem(ref_table, stream));
1798 	    SEXP state = PROTECT(ReadItem(ref_table, stream));
1799 	    SEXP attr = PROTECT(ReadItem(ref_table, stream));
1800 	    s = ALTREP_UNSERIALIZE_EX(info, state, attr, objf, levs);
1801 	    UNPROTECT(3); /* info, state, attr */
1802 	    R_ReadItemDepth--;
1803 	    return s;
1804 	}
1805     case SYMSXP:
1806 	R_ReadItemDepth++;
1807 	PROTECT(s = ReadItem(ref_table, stream)); /* print name */
1808 	R_ReadItemDepth--;
1809 	s = installTrChar(s);
1810 	AddReadRef(ref_table, s);
1811 	UNPROTECT(1);
1812 	return s;
1813     case PACKAGESXP:
1814 	PROTECT(s = InStringVec(stream, ref_table));
1815 	s = R_FindPackageEnv(s);
1816 	UNPROTECT(1);
1817 	AddReadRef(ref_table, s);
1818 	return s;
1819     case NAMESPACESXP:
1820 	PROTECT(s = InStringVec(stream, ref_table));
1821 	s = R_FindNamespace1(s);
1822 	AddReadRef(ref_table, s);
1823 	UNPROTECT(1);
1824 	return s;
1825     case ENVSXP:
1826 	{
1827 	    int locked = InInteger(stream);
1828 
1829 	    PROTECT(s = allocSExp(ENVSXP));
1830 
1831 	    /* MUST register before filling in */
1832 	    AddReadRef(ref_table, s);
1833 
1834 	    /* Now fill it in  */
1835 	    R_ReadItemDepth++;
1836 	    SET_ENCLOS(s, ReadItem(ref_table, stream));
1837 	    SET_FRAME(s, ReadItem(ref_table, stream));
1838 	    SET_HASHTAB(s, ReadItem(ref_table, stream));
1839 	    SET_ATTRIB(s, ReadItem(ref_table, stream));
1840 	    R_ReadItemDepth--;
1841 	    if (ATTRIB(s) != R_NilValue &&
1842 		getAttrib(s, R_ClassSymbol) != R_NilValue)
1843 		/* We don't write out the object bit for environments,
1844 		   so reconstruct it here if needed. */
1845 		SET_OBJECT(s, 1);
1846 	    R_RestoreHashCount(s);
1847 	    if (locked) R_LockEnvironment(s, FALSE);
1848 	    /* Convert a NULL enclosure to baseenv() */
1849 	    if (ENCLOS(s) == R_NilValue) SET_ENCLOS(s, R_BaseEnv);
1850 	    UNPROTECT(1);
1851 	    return s;
1852 	}
1853     case LISTSXP:
1854     case LANGSXP:
1855     case CLOSXP:
1856     case PROMSXP:
1857     case DOTSXP:
1858 	/* This handling of dotted pair objects still uses recursion
1859 	   on the CDR and so will overflow the PROTECT stack for long
1860 	   lists.  The save format does permit using an iterative
1861 	   approach; it just has to pass around the place to write the
1862 	   CDR into when it is allocated.  It's more trouble than it
1863 	   is worth to write the code to handle this now, but if it
1864 	   becomes necessary we can do it without needing to change
1865 	   the save format. */
1866 	PROTECT(s = allocSExp(type));
1867 	SETLEVELS(s, levs);
1868 	SET_OBJECT(s, objf);
1869 	R_ReadItemDepth++;
1870 	Rboolean set_lastname = FALSE;
1871 	SET_ATTRIB(s, hasattr ? ReadItem(ref_table, stream) : R_NilValue);
1872 	SET_TAG(s, hastag ? ReadItem(ref_table, stream) : R_NilValue);
1873 	if (hastag && R_ReadItemDepth == R_InitReadItemDepth + 1 &&
1874 	    isSymbol(TAG(s))) {
1875 	    snprintf(lastname, 8192, "%s", CHAR(PRINTNAME(TAG(s))));
1876 	    set_lastname = TRUE;
1877 	}
1878 	if (hastag && R_ReadItemDepth <= 0) {
1879 	    Rprintf("%*s", 2*(R_ReadItemDepth - R_InitReadItemDepth), "");
1880 	    PrintValue(TAG(s));
1881 	}
1882 	SETCAR(s, ReadItem(ref_table, stream));
1883 	R_ReadItemDepth--; /* do this early because of the recursion. */
1884 	SETCDR(s, ReadItem(ref_table, stream));
1885 	/* For reading closures and promises stored in earlier versions, convert NULL env to baseenv() */
1886 	if      (type == CLOSXP && CLOENV(s) == R_NilValue) SET_CLOENV(s, R_BaseEnv);
1887 	else if (type == PROMSXP && PRENV(s) == R_NilValue) SET_PRENV(s, R_BaseEnv);
1888 	if (set_lastname) strcpy(lastname, "<unknown>");
1889 	UNPROTECT(1); /* s */
1890 	return s;
1891     default:
1892 	/* These break out of the switch to have their ATTR,
1893 	   LEVELS, and OBJECT fields filled in.  Each leaves the
1894 	   newly allocated value PROTECTed */
1895 	switch (type) {
1896 	case EXTPTRSXP:
1897 	    PROTECT(s = allocSExp(type));
1898 	    AddReadRef(ref_table, s);
1899 	    R_SetExternalPtrAddr(s, NULL);
1900 	    R_ReadItemDepth++;
1901 	    R_SetExternalPtrProtected(s, ReadItem(ref_table, stream));
1902 	    R_SetExternalPtrTag(s, ReadItem(ref_table, stream));
1903 	    R_ReadItemDepth--;
1904 	    break;
1905 	case WEAKREFSXP:
1906 	    PROTECT(s = R_MakeWeakRef(R_NilValue, R_NilValue, R_NilValue,
1907 				      FALSE));
1908 	    AddReadRef(ref_table, s);
1909 	    break;
1910 	case SPECIALSXP:
1911 	case BUILTINSXP:
1912 	    {
1913 		/* These are all short strings */
1914 		length = InInteger(stream);
1915 		char cbuf[length+1];
1916 		InString(stream, cbuf, length);
1917 		cbuf[length] = '\0';
1918 		int index = StrToInternal(cbuf);
1919 		if (index == NA_INTEGER) {
1920 		    warning(_("unrecognized internal function name \"%s\""), cbuf);
1921 		    PROTECT(s = R_NilValue);
1922 		} else
1923 		    PROTECT(s = mkPRIMSXP(index, type == BUILTINSXP));
1924 	    }
1925 	    break;
1926 	case CHARSXP:
1927 	    /* these are currently limited to 2^31 -1 bytes */
1928 	    length = InInteger(stream);
1929 	    if (length == -1)
1930 		PROTECT(s = NA_STRING);
1931 	    else if (length < 1000) {
1932 		char cbuf[length+1];
1933 		PROTECT(s = ReadChar(stream, cbuf, length, levs));
1934 	    } else {
1935 		char *cbuf = CallocCharBuf(length);
1936 		PROTECT(s = ReadChar(stream, cbuf, length, levs));
1937 		Free(cbuf);
1938 	    }
1939 	    break;
1940 	case LGLSXP:
1941 	case INTSXP:
1942 	    len = ReadLENGTH(stream);
1943 	    PROTECT(s = allocVector(type, len));
1944 	    InIntegerVec(stream, s, len);
1945 	    break;
1946 	case REALSXP:
1947 	    len = ReadLENGTH(stream);
1948 	    PROTECT(s = allocVector(type, len));
1949 	    InRealVec(stream, s, len);
1950 	    break;
1951 	case CPLXSXP:
1952 	    len = ReadLENGTH(stream);
1953 	    PROTECT(s = allocVector(type, len));
1954 	    InComplexVec(stream, s, len);
1955 	    break;
1956 	case STRSXP:
1957 	    len = ReadLENGTH(stream);
1958 	    PROTECT(s = allocVector(type, len));
1959 	    R_ReadItemDepth++;
1960 	    for (count = 0; count < len; ++count)
1961 		SET_STRING_ELT(s, count, ReadItem(ref_table, stream));
1962 	    R_ReadItemDepth--;
1963 	    break;
1964 	case VECSXP:
1965 	case EXPRSXP:
1966 	    len = ReadLENGTH(stream);
1967 	    PROTECT(s = allocVector(type, len));
1968 	    R_ReadItemDepth++;
1969 	    for (count = 0; count < len; ++count) {
1970 		if (R_ReadItemDepth <= 0)
1971 		    Rprintf("%*s[%d]\n", 2*(R_ReadItemDepth - R_InitReadItemDepth), "", count+1);
1972 		SET_VECTOR_ELT(s, count, ReadItem(ref_table, stream));
1973 	    }
1974 	    R_ReadItemDepth--;
1975 	    break;
1976 	case BCODESXP:
1977 	    PROTECT(s = ReadBC(ref_table, stream));
1978 	    break;
1979 	case CLASSREFSXP:
1980 	    error(_("this version of R cannot read class references"));
1981 	case GENERICREFSXP:
1982 	    error(_("this version of R cannot read generic function references"));
1983 	case RAWSXP:
1984 	    len = ReadLENGTH(stream);
1985 	    PROTECT(s = allocVector(type, len));
1986 	    switch (stream->type) {
1987 	    case R_pstream_ascii_format:
1988 		for (R_xlen_t ix = 0; ix < len; ix++) {
1989 		    char word[128];
1990 		    unsigned int i; // unsigned to avoid compiler warnings
1991 		    InWord(stream, word, sizeof(word));
1992 		    if(sscanf(word, "%2x", &i) != 1) error(_("read error"));
1993 		    RAW(s)[ix] = (Rbyte) i;
1994 		}
1995 		break;
1996 	    default:
1997 	    {
1998 		R_xlen_t done, this;
1999 		for (done = 0; done < len; done += this) {
2000 		    this = min2(CHUNK_SIZE, len - done);
2001 		    stream->InBytes(stream, RAW(s) + done, (int) this);
2002 		}
2003 	    }
2004 	    }
2005 	    break;
2006 	case S4SXP:
2007 	    PROTECT(s = allocS4Object());
2008 	    break;
2009 	default:
2010 	    s = R_NilValue; /* keep compiler happy */
2011 	    error(_("ReadItem: unknown type %i, perhaps written by later version of R"), type);
2012 	}
2013 	if (type != CHARSXP) SETLEVELS(s, levs);
2014 	SET_OBJECT(s, objf);
2015 	if (TYPEOF(s) == CHARSXP) {
2016 	    /* With the CHARSXP cache maintained through the ATTRIB
2017 	       field that field has already been filled in by the
2018 	       mkChar/mkCharCE call above, so we need to leave it
2019 	       alone.  If there is an attribute (as there might be if
2020 	       the serialized data was created by an older version) we
2021 	       read and ignore the value. */
2022 	    R_ReadItemDepth++;
2023 	    if (hasattr) ReadItem(ref_table, stream);
2024 	    R_ReadItemDepth--;
2025 	}
2026 	else {
2027 	    R_ReadItemDepth++;
2028 	    SET_ATTRIB(s, hasattr ? ReadItem(ref_table, stream) : R_NilValue);
2029 	    R_ReadItemDepth--;
2030 	}
2031 	UNPROTECT(1); /* s */
2032 	if (TYPEOF(s) == BCODESXP && !R_BCVersionOK(s))
2033 	    return R_BytecodeExpr(s);
2034 	return s;
2035     }
2036 }
2037 
2038 static SEXP ReadBC1(SEXP ref_table, SEXP reps, R_inpstream_t stream);
2039 
ReadBCLang(int type,SEXP ref_table,SEXP reps,R_inpstream_t stream)2040 static SEXP ReadBCLang(int type, SEXP ref_table, SEXP reps,
2041 		       R_inpstream_t stream)
2042 {
2043     switch (type) {
2044     case BCREPREF:
2045 	return VECTOR_ELT(reps, InInteger(stream));
2046     case BCREPDEF:
2047     case LANGSXP:
2048     case LISTSXP:
2049     case ATTRLANGSXP:
2050     case ATTRLISTSXP:
2051 	{
2052 	    SEXP ans;
2053 	    int pos = -1;
2054 	    int hasattr = FALSE;
2055 	    if (type == BCREPDEF) {
2056 		pos = InInteger(stream);
2057 		type = InInteger(stream);
2058 	    }
2059 	    switch (type) {
2060 	    case ATTRLANGSXP: type = LANGSXP; hasattr = TRUE; break;
2061 	    case ATTRLISTSXP: type = LISTSXP; hasattr = TRUE; break;
2062 	    }
2063 	    PROTECT(ans = allocSExp(type));
2064 	    if (pos >= 0)
2065 		SET_VECTOR_ELT(reps, pos, ans);
2066 	    R_ReadItemDepth++;
2067 	    if (hasattr)
2068 		SET_ATTRIB(ans, ReadItem(ref_table, stream));
2069 	    SET_TAG(ans, ReadItem(ref_table, stream));
2070 	    R_ReadItemDepth--;
2071 	    SETCAR(ans, ReadBCLang(InInteger(stream), ref_table, reps,
2072 				   stream));
2073 	    SETCDR(ans, ReadBCLang(InInteger(stream), ref_table, reps,
2074 				   stream));
2075 	    UNPROTECT(1);
2076 	    return ans;
2077 	}
2078     default:
2079 	{
2080 	    R_ReadItemDepth++;
2081 	    SEXP res = ReadItem(ref_table, stream);
2082 	    R_ReadItemDepth--;
2083 	    return res;
2084 	}
2085     }
2086 }
2087 
ReadBCConsts(SEXP ref_table,SEXP reps,R_inpstream_t stream)2088 static SEXP ReadBCConsts(SEXP ref_table, SEXP reps, R_inpstream_t stream)
2089 {
2090     SEXP ans, c;
2091     int i, n;
2092     n = InInteger(stream);
2093     PROTECT(ans = allocVector(VECSXP, n));
2094     for (i = 0; i < n; i++) {
2095 	int type = InInteger(stream);
2096 	switch (type) {
2097 	case BCODESXP:
2098 	    c = ReadBC1(ref_table, reps, stream);
2099 	    SET_VECTOR_ELT(ans, i, c);
2100 	    break;
2101 	case LANGSXP:
2102 	case LISTSXP:
2103 	case BCREPDEF:
2104 	case BCREPREF:
2105 	case ATTRLANGSXP:
2106 	case ATTRLISTSXP:
2107 	    c = ReadBCLang(type, ref_table, reps, stream);
2108 	    SET_VECTOR_ELT(ans, i, c);
2109 	    break;
2110 	default:
2111 	    R_ReadItemDepth++;
2112 	    SET_VECTOR_ELT(ans, i, ReadItem(ref_table, stream));
2113 	    R_ReadItemDepth--;
2114 	}
2115     }
2116     UNPROTECT(1);
2117     return ans;
2118 }
2119 
ReadBC1(SEXP ref_table,SEXP reps,R_inpstream_t stream)2120 static SEXP ReadBC1(SEXP ref_table, SEXP reps, R_inpstream_t stream)
2121 {
2122     SEXP s;
2123     PROTECT(s = allocSExp(BCODESXP));
2124     R_ReadItemDepth++;
2125     SETCAR(s, ReadItem(ref_table, stream)); /* code */
2126     R_ReadItemDepth--;
2127     SEXP bytes = PROTECT(CAR(s));
2128     SETCAR(s, R_bcEncode(bytes));
2129     SETCDR(s, ReadBCConsts(ref_table, reps, stream)); /* consts */
2130     SET_TAG(s, R_NilValue); /* expr */
2131     R_registerBC(bytes, s);
2132     UNPROTECT(2);
2133     return s;
2134 }
2135 
ReadBC(SEXP ref_table,R_inpstream_t stream)2136 static SEXP ReadBC(SEXP ref_table, R_inpstream_t stream)
2137 {
2138     SEXP reps, ans;
2139     PROTECT(reps = allocVector(VECSXP, InInteger(stream)));
2140     ans = ReadBC1(ref_table, reps, stream);
2141     UNPROTECT(1);
2142     return ans;
2143 }
2144 
DecodeVersion(int packed,int * v,int * p,int * s)2145 static void DecodeVersion(int packed, int *v, int *p, int *s)
2146 {
2147     *v = packed / 65536; packed = packed % 65536;
2148     *p = packed / 256; packed = packed % 256;
2149     *s = packed;
2150 }
2151 
R_Unserialize(R_inpstream_t stream)2152 SEXP R_Unserialize(R_inpstream_t stream)
2153 {
2154     int version;
2155     int writer_version, min_reader_version;
2156     SEXP obj, ref_table;
2157 
2158     InFormat(stream);
2159 
2160     /* Read the version numbers */
2161     version = InInteger(stream);
2162     writer_version = InInteger(stream);
2163     min_reader_version = InInteger(stream);
2164     switch (version) {
2165     case 2: break;
2166     case 3:
2167     {
2168 	int nelen = InInteger(stream);
2169 	if (nelen > R_CODESET_MAX)
2170 	    error(_("invalid length of encoding name"));
2171 	InString(stream, stream->native_encoding, nelen);
2172 	stream->native_encoding[nelen] = '\0';
2173 	break;
2174     }
2175     default:
2176 	{
2177 	    int vw, pw, sw;
2178 	    DecodeVersion(writer_version, &vw, &pw, &sw);
2179 	    if (min_reader_version < 0)
2180 		error(_("cannot read unreleased workspace version %d written by experimental R %d.%d.%d"), version, vw, pw, sw);
2181 	    else {
2182 		int vm, pm, sm;
2183 		DecodeVersion(min_reader_version, &vm, &pm, &sm);
2184 		error(_("cannot read workspace version %d written by R %d.%d.%d; need R %d.%d.%d or newer"),
2185 		      version, vw, pw, sw, vm, pm, sm);
2186 	    }
2187 	}
2188     }
2189 
2190     /* Read the actual object back */
2191     PROTECT(ref_table = MakeReadRefTable());
2192     obj =  ReadItem(ref_table, stream);
2193 
2194     if (version == 3) {
2195 	if (stream->nat2nat_obj && stream->nat2nat_obj != (void *)-1) {
2196 	    Riconv_close(stream->nat2nat_obj);
2197 	    stream->nat2nat_obj = NULL;
2198 	}
2199 	if (stream->nat2utf8_obj && stream->nat2utf8_obj != (void *)-1) {
2200 	    Riconv_close(stream->nat2utf8_obj);
2201 	    stream->nat2utf8_obj = NULL;
2202 	}
2203     }
2204     UNPROTECT(1);
2205 
2206     return obj;
2207 }
2208 
R_SerializeInfo(R_inpstream_t stream)2209 SEXP R_SerializeInfo(R_inpstream_t stream)
2210 {
2211     int version;
2212     int writer_version, min_reader_version, vv, vp, vs;
2213     int anslen = 4;
2214     SEXP ans, names;
2215     char buf[128];
2216 
2217     InFormat(stream);
2218 
2219     /* Read the version numbers */
2220     version = InInteger(stream);
2221     if (version == 3)
2222 	anslen++;
2223     writer_version = InInteger(stream);
2224     min_reader_version = InInteger(stream);
2225 
2226     PROTECT(ans = allocVector(VECSXP, anslen));
2227     PROTECT(names = allocVector(STRSXP, anslen));
2228     SET_STRING_ELT(names, 0, mkChar("version"));
2229     SET_VECTOR_ELT(ans, 0, ScalarInteger(version));
2230     SET_STRING_ELT(names, 1, mkChar("writer_version"));
2231     DecodeVersion(writer_version, &vv, &vp, &vs);
2232     snprintf(buf, 128, "%d.%d.%d", vv, vp, vs);
2233     SET_VECTOR_ELT(ans, 1, mkString(buf));
2234     SET_STRING_ELT(names, 2, mkChar("min_reader_version"));
2235     if (min_reader_version < 0)
2236 	/* unreleased version of R */
2237 	SET_VECTOR_ELT(ans, 2, ScalarString(NA_STRING));
2238     else {
2239 	DecodeVersion(min_reader_version, &vv, &vp, &vs);
2240 	snprintf(buf, 128, "%d.%d.%d", vv, vp, vs);
2241 	SET_VECTOR_ELT(ans, 2, mkString(buf));
2242     }
2243     SET_STRING_ELT(names, 3, mkChar("format"));
2244     switch(stream->type) {
2245     case R_pstream_ascii_format:
2246 	SET_VECTOR_ELT(ans, 3, mkString("ascii"));
2247 	break;
2248     case R_pstream_binary_format:
2249 	SET_VECTOR_ELT(ans, 3, mkString("binary"));
2250 	break;
2251     case R_pstream_xdr_format:
2252 	SET_VECTOR_ELT(ans, 3, mkString("xdr"));
2253 	break;
2254     default:
2255 	error(_("unknown input format"));
2256     }
2257     if (version == 3) {
2258 	SET_STRING_ELT(names, 4, mkChar("native_encoding"));
2259 	int nelen = InInteger(stream);
2260 	if (nelen > R_CODESET_MAX)
2261 	    error(_("invalid length of encoding name"));
2262 	char nbuf[nelen + 1];
2263 	InString(stream, nbuf, nelen);
2264 	nbuf[nelen] = '\0';
2265 	SET_VECTOR_ELT(ans, 4, mkString(nbuf));
2266     }
2267     setAttrib(ans, R_NamesSymbol, names);
2268     UNPROTECT(2); /* ans, names */
2269 
2270     return ans;
2271 }
2272 
2273 /*
2274  * Generic Persistent Stream Initializers
2275  */
2276 
2277 void
R_InitInPStream(R_inpstream_t stream,R_pstream_data_t data,R_pstream_format_t type,int (* inchar)(R_inpstream_t),void (* inbytes)(R_inpstream_t,void *,int),SEXP (* phook)(SEXP,SEXP),SEXP pdata)2278 R_InitInPStream(R_inpstream_t stream, R_pstream_data_t data,
2279 		R_pstream_format_t type,
2280 		int (*inchar)(R_inpstream_t),
2281 		void (*inbytes)(R_inpstream_t, void *, int),
2282 		SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2283 {
2284     stream->data = data;
2285     stream->type = type;
2286     stream->InChar = inchar;
2287     stream->InBytes = inbytes;
2288     stream->InPersistHookFunc = phook;
2289     stream->InPersistHookData = pdata;
2290     stream->native_encoding[0] = 0;
2291     stream->nat2nat_obj = NULL;
2292     stream->nat2utf8_obj = NULL;
2293 }
2294 
2295 void
R_InitOutPStream(R_outpstream_t stream,R_pstream_data_t data,R_pstream_format_t type,int version,void (* outchar)(R_outpstream_t,int),void (* outbytes)(R_outpstream_t,void *,int),SEXP (* phook)(SEXP,SEXP),SEXP pdata)2296 R_InitOutPStream(R_outpstream_t stream, R_pstream_data_t data,
2297 		 R_pstream_format_t type, int version,
2298 		 void (*outchar)(R_outpstream_t, int),
2299 		 void (*outbytes)(R_outpstream_t, void *, int),
2300 		 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2301 {
2302     stream->data = data;
2303     stream->type = type;
2304     stream->version = version != 0 ? version : defaultSerializeVersion();
2305     stream->OutChar = outchar;
2306     stream->OutBytes = outbytes;
2307     stream->OutPersistHookFunc = phook;
2308     stream->OutPersistHookData = pdata;
2309 }
2310 
2311 
2312 /*
2313  * Persistent File Streams
2314  */
2315 
OutCharFile(R_outpstream_t stream,int c)2316 static void OutCharFile(R_outpstream_t stream, int c)
2317 {
2318     FILE *fp = stream->data;
2319     fputc(c, fp);
2320 }
2321 
2322 
InCharFile(R_inpstream_t stream)2323 static int InCharFile(R_inpstream_t stream)
2324 {
2325     FILE *fp = stream->data;
2326     return fgetc(fp);
2327 }
2328 
OutBytesFile(R_outpstream_t stream,void * buf,int length)2329 static void OutBytesFile(R_outpstream_t stream, void *buf, int length)
2330 {
2331     FILE *fp = stream->data;
2332     size_t out = fwrite(buf, 1, length, fp);
2333     if (out != length) error(_("write failed"));
2334 }
2335 
InBytesFile(R_inpstream_t stream,void * buf,int length)2336 static void InBytesFile(R_inpstream_t stream, void *buf, int length)
2337 {
2338     FILE *fp = stream->data;
2339     size_t in = fread(buf, 1, length, fp);
2340     if (in != length) error(_("read failed"));
2341 }
2342 
2343 void
R_InitFileOutPStream(R_outpstream_t stream,FILE * fp,R_pstream_format_t type,int version,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2344 R_InitFileOutPStream(R_outpstream_t stream, FILE *fp,
2345 			  R_pstream_format_t type, int version,
2346 			  SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2347 {
2348     R_InitOutPStream(stream, (R_pstream_data_t) fp, type, version,
2349 		     OutCharFile, OutBytesFile, phook, pdata);
2350 }
2351 
2352 void
R_InitFileInPStream(R_inpstream_t stream,FILE * fp,R_pstream_format_t type,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2353 R_InitFileInPStream(R_inpstream_t stream, FILE *fp,
2354 			 R_pstream_format_t type,
2355 			 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2356 {
2357     R_InitInPStream(stream, (R_pstream_data_t) fp, type,
2358 		    InCharFile, InBytesFile, phook, pdata);
2359 }
2360 
2361 
2362 /*
2363  * Persistent Connection Streams
2364  */
2365 
2366 #include <Rconnections.h>
2367 
CheckInConn(Rconnection con)2368 static void CheckInConn(Rconnection con)
2369 {
2370     if (! con->isopen)
2371 	error(_("connection is not open"));
2372     if (! con->canread || con->read == NULL)
2373 	error(_("cannot read from this connection"));
2374 }
2375 
CheckOutConn(Rconnection con)2376 static void CheckOutConn(Rconnection con)
2377 {
2378     if (! con->isopen)
2379 	error(_("connection is not open"));
2380     if (! con->canwrite || con->write == NULL)
2381 	error(_("cannot write to this connection"));
2382 }
2383 
InBytesConn(R_inpstream_t stream,void * buf,int length)2384 static void InBytesConn(R_inpstream_t stream, void *buf, int length)
2385 {
2386     Rconnection con = (Rconnection) stream->data;
2387     CheckInConn(con);
2388     if (con->text) {
2389 	int i;
2390 	char *p = buf;
2391 	for (i = 0; i < length; i++)
2392 	    p[i] = (char) Rconn_fgetc(con);
2393     }
2394     else {
2395 	if (stream->type == R_pstream_ascii_format) {
2396 	    char linebuf[4];
2397 	    unsigned char *p = buf;
2398 	    int i;
2399 	    unsigned int res;
2400 	    for (i = 0; i < length; i++) {
2401 		size_t ncread = Rconn_getline(con, linebuf, 3);
2402 		if (ncread != 2)
2403 		    error(_("error reading from ascii connection"));
2404 		if (!sscanf(linebuf, "%02x", &res))
2405 		    error(_("unexpected format in ascii connection"));
2406 		*p++ = (unsigned char)res;
2407 	    }
2408 	} else {
2409 	    if (length != con->read(buf, 1, length, con))
2410 		error(_("error reading from connection"));
2411 	}
2412     }
2413 }
2414 
InCharConn(R_inpstream_t stream)2415 static int InCharConn(R_inpstream_t stream)
2416 {
2417     char buf[1];
2418     Rconnection con = (Rconnection) stream->data;
2419     CheckInConn(con);
2420     if (con->text)
2421 	return Rconn_fgetc(con);
2422     else {
2423 	if (1 != con->read(buf, 1, 1, con))
2424 	    error(_("error reading from connection"));
2425 	return buf[0];
2426     }
2427 }
2428 
OutBytesConn(R_outpstream_t stream,void * buf,int length)2429 static void OutBytesConn(R_outpstream_t stream, void *buf, int length)
2430 {
2431     Rconnection con = (Rconnection) stream->data;
2432     CheckOutConn(con);
2433     if (con->text) {
2434 	int i;
2435 	char *p = buf;
2436 	for (i = 0; i < length; i++)
2437 	    Rconn_printf(con, "%c", p[i]);
2438     }
2439     else {
2440 	if (length != con->write(buf, 1, length, con))
2441 	    error(_("error writing to connection"));
2442     }
2443 }
2444 
OutCharConn(R_outpstream_t stream,int c)2445 static void OutCharConn(R_outpstream_t stream, int c)
2446 {
2447     Rconnection con = (Rconnection) stream->data;
2448     CheckOutConn(con);
2449     if (con->text)
2450 	Rconn_printf(con, "%c", c);
2451     else {
2452 	char buf[1];
2453 	buf[0] = (char) c;
2454 	if (1 != con->write(buf, 1, 1, con))
2455 	    error(_("error writing to connection"));
2456     }
2457 }
2458 
R_InitConnOutPStream(R_outpstream_t stream,Rconnection con,R_pstream_format_t type,int version,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2459 void R_InitConnOutPStream(R_outpstream_t stream, Rconnection con,
2460 			  R_pstream_format_t type, int version,
2461 			  SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2462 {
2463     CheckOutConn(con);
2464     if (con->text &&
2465 	!(type == R_pstream_ascii_format || type == R_pstream_asciihex_format) )
2466 	error(_("only ascii format can be written to text mode connections"));
2467     R_InitOutPStream(stream, (R_pstream_data_t) con, type, version,
2468 		     OutCharConn, OutBytesConn, phook, pdata);
2469 }
2470 
R_InitConnInPStream(R_inpstream_t stream,Rconnection con,R_pstream_format_t type,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2471 void R_InitConnInPStream(R_inpstream_t stream,  Rconnection con,
2472 			 R_pstream_format_t type,
2473 			 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2474 {
2475     CheckInConn(con);
2476     if (con->text) {
2477 	if (type == R_pstream_any_format)
2478 	    type = R_pstream_ascii_format;
2479 	else if (type != R_pstream_ascii_format)
2480 	    error(_("only ascii format can be read from text mode connections"));
2481     }
2482     R_InitInPStream(stream, (R_pstream_data_t) con, type,
2483 		    InCharConn, InBytesConn, phook, pdata);
2484 }
2485 
2486 /* ought to quote the argument, but it should only be an ENVSXP or STRSXP */
CallHook(SEXP x,SEXP fun)2487 static SEXP CallHook(SEXP x, SEXP fun)
2488 {
2489     SEXP val, call;
2490     PROTECT(call = LCONS(fun, LCONS(x, R_NilValue)));
2491     val = eval(call, R_GlobalEnv);
2492     UNPROTECT(1);
2493     return val;
2494 }
2495 
con_cleanup(void * data)2496 static void con_cleanup(void *data)
2497 {
2498     Rconnection con = data;
2499     if(con->isopen) con->close(con);
2500 }
2501 
2502 /* Used from saveRDS().
2503    This became public in R 2.13.0, and that version added support for
2504    connections internally */
2505 SEXP attribute_hidden
do_serializeToConn(SEXP call,SEXP op,SEXP args,SEXP env)2506 do_serializeToConn(SEXP call, SEXP op, SEXP args, SEXP env)
2507 {
2508     /* serializeToConn(object, conn, ascii, version, hook) */
2509 
2510     SEXP object, fun;
2511     Rboolean ascii, wasopen;
2512     int version;
2513     Rconnection con;
2514     struct R_outpstream_st out;
2515     R_pstream_format_t type;
2516     SEXP (*hook)(SEXP, SEXP);
2517     RCNTXT cntxt;
2518 
2519     checkArity(op, args);
2520 
2521     object = CAR(args);
2522     con = getConnection(asInteger(CADR(args)));
2523 
2524     if (TYPEOF(CADDR(args)) != LGLSXP)
2525 	error(_("'ascii' must be logical"));
2526     ascii = INTEGER(CADDR(args))[0];
2527     if (ascii == NA_LOGICAL) type = R_pstream_asciihex_format;
2528     else if (ascii) type = R_pstream_ascii_format;
2529     else type = R_pstream_xdr_format;
2530 
2531     if (CADDDR(args) == R_NilValue)
2532 	version = defaultSerializeVersion();
2533     else
2534 	version = asInteger(CADDDR(args));
2535     if (version == NA_INTEGER || version <= 0)
2536 	error(_("bad version value"));
2537     if (version < 2)
2538 	error(_("cannot save to connections in version %d format"), version);
2539 
2540     fun = CAR(nthcdr(args,4));
2541     hook = fun != R_NilValue ? CallHook : NULL;
2542 
2543     /* Now we need to do some sanity checking of the arguments.
2544        A filename will already have been opened, so anything
2545        not open was specified as a connection directly.
2546      */
2547     wasopen = con->isopen;
2548     if(!wasopen) {
2549 	char mode[5];
2550 	strcpy(mode, con->mode);
2551 	strcpy(con->mode, ascii ? "w" : "wb");
2552 	if(!con->open(con)) error(_("cannot open the connection"));
2553 	strcpy(con->mode, mode);
2554 	/* Set up a context which will close the connection on error */
2555 	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
2556 		     R_NilValue, R_NilValue);
2557 	cntxt.cend = &con_cleanup;
2558 	cntxt.cenddata = con;
2559     }
2560     if (!ascii && con->text)
2561 	error(_("binary-mode connection required for ascii=FALSE"));
2562     if(!con->canwrite)
2563 	error(_("connection not open for writing"));
2564 
2565     R_InitConnOutPStream(&out, con, type, version, hook, fun);
2566     R_Serialize(object, &out);
2567     if(!wasopen) {endcontext(&cntxt); con->close(con);}
2568 
2569     return R_NilValue;
2570 }
2571 
2572 /* unserializeFromConn(conn, hook) used from readRDS().
2573    It became public in R 2.13.0, and that version added support for
2574    connections internally */
2575 SEXP attribute_hidden
do_unserializeFromConn(SEXP call,SEXP op,SEXP args,SEXP env)2576 do_unserializeFromConn(SEXP call, SEXP op, SEXP args, SEXP env)
2577 {
2578     /* 0 .. unserializeFromConn(conn, hook) */
2579     /* 1 .. serializeInfoFromConn(conn) */
2580 
2581     struct R_inpstream_st in;
2582     Rconnection con;
2583     SEXP fun, ans;
2584     SEXP (*hook)(SEXP, SEXP);
2585     Rboolean wasopen;
2586     RCNTXT cntxt;
2587 
2588     checkArity(op, args);
2589 
2590     con = getConnection(asInteger(CAR(args)));
2591 
2592     /* Now we need to do some sanity checking of the arguments.
2593        A filename will already have been opened, so anything
2594        not open was specified as a connection directly.
2595      */
2596     wasopen = con->isopen;
2597     if(!wasopen) {
2598 	char mode[5];
2599 	strcpy(mode, con->mode);
2600 	strcpy(con->mode, "rb");
2601 	if(!con->open(con)) error(_("cannot open the connection"));
2602 	strcpy(con->mode, mode);
2603 	/* Set up a context which will close the connection on error */
2604 	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
2605 		     R_NilValue, R_NilValue);
2606 	cntxt.cend = &con_cleanup;
2607 	cntxt.cenddata = con;
2608     }
2609     if(!con->canread) error(_("connection not open for reading"));
2610 
2611     fun = PRIMVAL(op) == 0 ? CADR(args) : R_NilValue;
2612     hook = fun != R_NilValue ? CallHook : NULL;
2613     R_InitConnInPStream(&in, con, R_pstream_any_format, hook, fun);
2614     ans = PRIMVAL(op) == 0 ? R_Unserialize(&in) : R_SerializeInfo(&in);
2615     if(!wasopen) {
2616 	PROTECT(ans); /* paranoia about next line */
2617 	endcontext(&cntxt);
2618 	con->close(con);
2619 	UNPROTECT(1);
2620     }
2621     return ans;
2622 }
2623 
2624 /*
2625  * Persistent Buffered Binary Connection Streams
2626  */
2627 
2628 /**** should eventually come from a public header file */
2629 size_t R_WriteConnection(Rconnection con, void *buf, size_t n);
2630 
2631 #define BCONBUFSIZ 4096
2632 
2633 typedef struct bconbuf_st {
2634     Rconnection con;
2635     int count;
2636     unsigned char buf[BCONBUFSIZ];
2637 } *bconbuf_t;
2638 
flush_bcon_buffer(bconbuf_t bb)2639 static void flush_bcon_buffer(bconbuf_t bb)
2640 {
2641     if (R_WriteConnection(bb->con, bb->buf, bb->count) != bb->count)
2642 	error(_("error writing to connection"));
2643     bb->count = 0;
2644 }
2645 
OutCharBB(R_outpstream_t stream,int c)2646 static void OutCharBB(R_outpstream_t stream, int c)
2647 {
2648     bconbuf_t bb = stream->data;
2649     if (bb->count >= BCONBUFSIZ)
2650 	flush_bcon_buffer(bb);
2651     bb->buf[bb->count++] = (char) c;
2652 }
2653 
OutBytesBB(R_outpstream_t stream,void * buf,int length)2654 static void OutBytesBB(R_outpstream_t stream, void *buf, int length)
2655 {
2656     bconbuf_t bb = stream->data;
2657     if (bb->count + length > BCONBUFSIZ)
2658 	flush_bcon_buffer(bb);
2659     if (length <= BCONBUFSIZ) {
2660 	memcpy(bb->buf + bb->count, buf, length);
2661 	bb->count += length;
2662     }
2663     else if (R_WriteConnection(bb->con, buf, length) != length)
2664 	error(_("error writing to connection"));
2665 }
2666 
InitBConOutPStream(R_outpstream_t stream,bconbuf_t bb,Rconnection con,R_pstream_format_t type,int version,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2667 static void InitBConOutPStream(R_outpstream_t stream, bconbuf_t bb,
2668 			       Rconnection con,
2669 			       R_pstream_format_t type, int version,
2670 			       SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2671 {
2672     bb->count = 0;
2673     bb->con = con;
2674     R_InitOutPStream(stream, (R_pstream_data_t) bb, type, version,
2675 		     OutCharBB, OutBytesBB, phook, pdata);
2676 }
2677 
2678 /* only for use by serialize(), with binary write to a socket connection */
2679 static SEXP
R_serializeb(SEXP object,SEXP icon,SEXP xdr,SEXP Sversion,SEXP fun)2680 R_serializeb(SEXP object, SEXP icon, SEXP xdr, SEXP Sversion, SEXP fun)
2681 {
2682     struct R_outpstream_st out;
2683     SEXP (*hook)(SEXP, SEXP);
2684     struct bconbuf_st bbs;
2685     Rconnection con = getConnection(asInteger(icon));
2686     int version;
2687 
2688     if (Sversion == R_NilValue)
2689 	version = defaultSerializeVersion();
2690     else version = asInteger(Sversion);
2691     if (version == NA_INTEGER || version <= 0)
2692 	error(_("bad version value"));
2693 
2694     hook = fun != R_NilValue ? CallHook : NULL;
2695 
2696     InitBConOutPStream(&out, &bbs, con,
2697 		       asLogical(xdr) ? R_pstream_xdr_format : R_pstream_binary_format,
2698 		       version, hook, fun);
2699     R_Serialize(object, &out);
2700     flush_bcon_buffer(&bbs);
2701     return R_NilValue;
2702 }
2703 
2704 
2705 /*
2706  * Persistent Memory Streams
2707  */
2708 
2709 typedef struct membuf_st {
2710     R_size_t size;
2711     R_size_t count;
2712     unsigned char *buf;
2713 } *membuf_t;
2714 
2715 
2716 #define INCR MAXELTSIZE
resize_buffer(membuf_t mb,R_size_t needed)2717 static void resize_buffer(membuf_t mb, R_size_t needed)
2718 {
2719     if(needed > R_XLEN_T_MAX)
2720 	error(_("serialization is too large to store in a raw vector"));
2721 #ifdef LONG_VECTOR_SUPPORT
2722     if(needed < 10000000) /* ca 10MB */
2723 	needed = (1+2*needed/INCR) * INCR;
2724     else
2725 	needed = (R_size_t)((1+1.2*(double)needed/INCR) * INCR);
2726 #else
2727     if(needed < 10000000) /* ca 10MB */
2728 	needed = (1+2*needed/INCR) * INCR;
2729     else if(needed < 1700000000) /* close to 2GB/1.2 */
2730 	needed = (R_size_t)((1+1.2*(double)needed/INCR) * INCR);
2731     else if(needed < INT_MAX - INCR)
2732 	needed = (1+needed/INCR) * INCR;
2733 #endif
2734     unsigned char *tmp = realloc(mb->buf, needed);
2735     if (tmp == NULL) {
2736 	free(mb->buf); mb->buf = NULL;
2737 	error(_("cannot allocate buffer"));
2738     } else mb->buf = tmp;
2739     mb->size = needed;
2740 }
2741 
OutCharMem(R_outpstream_t stream,int c)2742 static void OutCharMem(R_outpstream_t stream, int c)
2743 {
2744     membuf_t mb = stream->data;
2745     if (mb->count >= mb->size)
2746 	resize_buffer(mb, mb->count + 1);
2747     mb->buf[mb->count++] = (char) c;
2748 }
2749 
OutBytesMem(R_outpstream_t stream,void * buf,int length)2750 static void OutBytesMem(R_outpstream_t stream, void *buf, int length)
2751 {
2752     membuf_t mb = stream->data;
2753     R_size_t needed = mb->count + (R_size_t) length;
2754 #ifndef LONG_VECTOR_SUPPORT
2755     /* There is a potential overflow here on 32-bit systems */
2756     if((double) mb->count + length > (double) INT_MAX)
2757 	error(_("serialization is too large to store in a raw vector"));
2758 #endif
2759     if (needed > mb->size) resize_buffer(mb, needed);
2760     memcpy(mb->buf + mb->count, buf, length);
2761     mb->count = needed;
2762 }
2763 
InCharMem(R_inpstream_t stream)2764 static int InCharMem(R_inpstream_t stream)
2765 {
2766     membuf_t mb = stream->data;
2767     if (mb->count >= mb->size)
2768 	error(_("read error"));
2769     return mb->buf[mb->count++];
2770 }
2771 
InBytesMem(R_inpstream_t stream,void * buf,int length)2772 static void InBytesMem(R_inpstream_t stream, void *buf, int length)
2773 {
2774     membuf_t mb = stream->data;
2775     if (mb->count + (R_size_t) length > mb->size)
2776 	error(_("read error"));
2777     memcpy(buf, mb->buf + mb->count, length);
2778     mb->count += length;
2779 }
2780 
InitMemInPStream(R_inpstream_t stream,membuf_t mb,void * buf,R_size_t length,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2781 static void InitMemInPStream(R_inpstream_t stream, membuf_t mb,
2782 			     void *buf, R_size_t length,
2783 			     SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2784 {
2785     mb->count = 0;
2786     mb->size = length;
2787     mb->buf = buf;
2788     R_InitInPStream(stream, (R_pstream_data_t) mb, R_pstream_any_format,
2789 		    InCharMem, InBytesMem, phook, pdata);
2790 }
2791 
InitMemOutPStream(R_outpstream_t stream,membuf_t mb,R_pstream_format_t type,int version,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2792 static void InitMemOutPStream(R_outpstream_t stream, membuf_t mb,
2793 			      R_pstream_format_t type, int version,
2794 			      SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2795 {
2796     mb->count = 0;
2797     mb->size = 0;
2798     mb->buf = NULL;
2799     R_InitOutPStream(stream, (R_pstream_data_t) mb, type, version,
2800 		     OutCharMem, OutBytesMem, phook, pdata);
2801 }
2802 
free_mem_buffer(void * data)2803 static void free_mem_buffer(void *data)
2804 {
2805     membuf_t mb = data;
2806     if (mb->buf != NULL) {
2807 	unsigned char *buf = mb->buf;
2808 	mb->buf = NULL;
2809 	free(buf);
2810     }
2811 }
2812 
CloseMemOutPStream(R_outpstream_t stream)2813 static SEXP CloseMemOutPStream(R_outpstream_t stream)
2814 {
2815     SEXP val;
2816     membuf_t mb = stream->data;
2817     /* duplicate check, for future proofing */
2818 #ifndef LONG_VECTOR_SUPPORT
2819     if(mb->count > INT_MAX)
2820 	error(_("serialization is too large to store in a raw vector"));
2821 #endif
2822     PROTECT(val = allocVector(RAWSXP, mb->count));
2823     memcpy(RAW(val), mb->buf, mb->count);
2824     free_mem_buffer(mb);
2825     UNPROTECT(1);
2826     return val;
2827 }
2828 
2829 static SEXP
R_serialize(SEXP object,SEXP icon,SEXP ascii,SEXP Sversion,SEXP fun)2830 R_serialize(SEXP object, SEXP icon, SEXP ascii, SEXP Sversion, SEXP fun)
2831 {
2832     struct R_outpstream_st out;
2833     R_pstream_format_t type;
2834     SEXP (*hook)(SEXP, SEXP);
2835     int version;
2836 
2837     if (Sversion == R_NilValue)
2838 	version = defaultSerializeVersion();
2839     else version = asInteger(Sversion);
2840     if (version == NA_INTEGER || version <= 0)
2841 	error(_("bad version value"));
2842 
2843     hook = fun != R_NilValue ? CallHook : NULL;
2844 
2845     // Prior to 3.2.0 this was logical, values 0/1/NA for binary.
2846     int asc = asInteger(ascii);
2847     switch(asc) {
2848     case 1: type = R_pstream_ascii_format; break;
2849     case 2: type = R_pstream_asciihex_format; break;
2850     case 3: type = R_pstream_binary_format; break;
2851     default: type = R_pstream_xdr_format; break;
2852     }
2853 
2854     if (icon == R_NilValue) {
2855 	RCNTXT cntxt;
2856 	struct membuf_st mbs;
2857 	SEXP val;
2858 
2859 	/* set up a context which will free the buffer if there is an error */
2860 	begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
2861 		     R_NilValue, R_NilValue);
2862 	cntxt.cend = &free_mem_buffer;
2863 	cntxt.cenddata = &mbs;
2864 
2865 	InitMemOutPStream(&out, &mbs, type, version, hook, fun);
2866 	R_Serialize(object, &out);
2867 
2868 	PROTECT(val = CloseMemOutPStream(&out));
2869 
2870 	/* end the context after anything that could raise an error but before
2871 	   calling OutTerm so it doesn't get called twice */
2872 	endcontext(&cntxt);
2873 
2874 	UNPROTECT(1); /* val */
2875 	return val;
2876     }
2877     else {
2878 	Rconnection con = getConnection(asInteger(icon));
2879 	R_InitConnOutPStream(&out, con, type, version, hook, fun);
2880 	R_Serialize(object, &out);
2881 	return R_NilValue;
2882     }
2883 }
2884 
2885 
R_unserialize(SEXP icon,SEXP fun)2886 SEXP attribute_hidden R_unserialize(SEXP icon, SEXP fun)
2887 {
2888     struct R_inpstream_st in;
2889     SEXP (*hook)(SEXP, SEXP);
2890 
2891     hook = fun != R_NilValue ? CallHook : NULL;
2892 
2893     if (TYPEOF(icon) == STRSXP && LENGTH(icon) > 0) {
2894 	/* was the format in R < 2.4.0, removed in R 2.8.0 */
2895 	error("character vectors are no longer accepted by unserialize()");
2896 	return R_NilValue; /* -Wall */
2897     } else if (TYPEOF(icon) == RAWSXP) {
2898 	/* We might want to read from a long raw vector */
2899 	struct membuf_st mbs;
2900 	void *data = RAW(icon);
2901 	R_size_t length = XLENGTH(icon);
2902 	InitMemInPStream(&in, &mbs, data,  length, hook, fun);
2903 	return R_Unserialize(&in);
2904     } else {
2905 	Rconnection con = getConnection(asInteger(icon));
2906 	R_InitConnInPStream(&in, con, R_pstream_any_format, hook, fun);
2907 	return R_Unserialize(&in);
2908     }
2909 }
2910 
2911 
2912 /*
2913  * Support Code for Lazy Loading of Packages
2914  */
2915 
2916 
2917 #define IS_PROPER_STRING(s) (TYPEOF(s) == STRSXP && LENGTH(s) > 0)
2918 
2919 /* Appends a raw vector to the end of a file using binary mode.
2920    Returns an integer vector of the initial offset of the string in
2921    the file and the length of the vector. */
2922 
appendRawToFile(SEXP file,SEXP bytes)2923 static SEXP appendRawToFile(SEXP file, SEXP bytes)
2924 {
2925     FILE *fp;
2926     size_t len, out;
2927     long pos;  // what ftell gives: won't work for > 2GB files
2928     SEXP val;
2929     const void *vmax;
2930     const char *cfile;
2931 
2932     if (! IS_PROPER_STRING(file))
2933 	error(_("not a proper file name"));
2934     vmax = vmaxget();
2935     cfile = translateCharFP(STRING_ELT(file, 0));
2936     if (TYPEOF(bytes) != RAWSXP)
2937 	error(_("not a proper raw vector"));
2938 #ifdef HAVE_WORKING_FTELL
2939     /* Windows' ftell returns position 0 with "ab" */
2940     if ((fp = R_fopen(cfile, "ab")) == NULL) {
2941 	error( _("cannot open file '%s': %s"), cfile,
2942 	       strerror(errno));
2943     }
2944 #else
2945     if ((fp = R_fopen(cfile, "r+b")) == NULL) {
2946 	error( _("cannot open file '%s': %s"), cfile,
2947 	       strerror(errno));
2948     }
2949     if (fseek(fp, 0, SEEK_END) != 0) {
2950 	fclose(fp);
2951 	error(_("seek failed on %s"), cfile);
2952     }
2953 #endif
2954 
2955     len = LENGTH(bytes);
2956     pos = ftell(fp);
2957     out = fwrite(RAW(bytes), 1, len, fp);
2958     fclose(fp);
2959 
2960     if (out != len) error(_("write failed"));
2961     if (pos == -1) error(_("could not determine file position"));
2962 
2963     val = allocVector(INTSXP, 2);
2964     INTEGER(val)[0] = (int) pos;
2965     INTEGER(val)[1] = (int) len;
2966     vmaxset(vmax);
2967 
2968     return val;
2969 }
2970 
2971 /* Interface to cache the pkg.rdb files */
2972 
2973 #define NC 100
2974 static int used = 0;
2975 static char names[NC][PATH_MAX];
2976 static char *ptr[NC];
2977 
2978 SEXP attribute_hidden
do_lazyLoadDBflush(SEXP call,SEXP op,SEXP args,SEXP env)2979 do_lazyLoadDBflush(SEXP call, SEXP op, SEXP args, SEXP env)
2980 {
2981     checkArity(op, args);
2982 
2983     int i;
2984     const char *cfile = translateCharFP(STRING_ELT(CAR(args), 0));
2985 
2986     /* fprintf(stderr, "flushing file %s", cfile); */
2987     for (i = 0; i < used; i++)
2988 	if(strcmp(cfile, names[i]) == 0) {
2989 	    strcpy(names[i], "");
2990 	    free(ptr[i]);
2991 	    /* fprintf(stderr, " found at pos %d in cache", i); */
2992 	    break;
2993 	}
2994     /* fprintf(stderr, "\n"); */
2995     return R_NilValue;
2996 }
2997 
2998 
2999 /* Reads, in binary mode, the bytes in the range specified by a
3000    position/length vector and returns them as raw vector. */
3001 
3002 /* There are some large lazy-data examples, e.g. 80Mb for SNPMaP.cdm */
3003 #define LEN_LIMIT 10*1048576
readRawFromFile(SEXP file,SEXP key)3004 static SEXP readRawFromFile(SEXP file, SEXP key)
3005 {
3006     FILE *fp;
3007     int offset, len, in, i, icache = -1;
3008     long filelen;
3009     SEXP val;
3010     const void *vmax;
3011     const char *cfile;
3012 
3013     if (! IS_PROPER_STRING(file))
3014 	error(_("not a proper file name"));
3015     vmax = vmaxget();
3016     cfile = translateCharFP(STRING_ELT(file, 0));
3017     if (TYPEOF(key) != INTSXP || LENGTH(key) != 2)
3018 	error(_("bad offset/length argument"));
3019 
3020     offset = INTEGER(key)[0];
3021     len = INTEGER(key)[1];
3022 
3023     val = allocVector(RAWSXP, len);
3024     /* Do we have this database cached? */
3025     for (i = 0; i < used; i++)
3026 	if(strcmp(cfile, names[i]) == 0) {icache = i; break;}
3027     if (icache >= 0) {
3028 	memcpy(RAW(val), ptr[icache]+offset, len);
3029 	vmaxset(vmax);
3030 	return val;
3031     }
3032 
3033     /* find a vacant slot? */
3034     for (i = 0; i < used; i++)
3035 	if(strcmp("", names[i]) == 0) {icache = i; break;}
3036     if(icache < 0 && used < NC) icache = used++;
3037 
3038     if(icache >= 0) {
3039 	if ((fp = R_fopen(cfile, "rb")) == NULL)
3040 	    error(_("cannot open file '%s': %s"), cfile, strerror(errno));
3041 	if (fseek(fp, 0, SEEK_END) != 0) {
3042 	    fclose(fp);
3043 	    error(_("seek failed on %s"), cfile);
3044 	}
3045 	filelen = ftell(fp);
3046 	if (filelen < LEN_LIMIT) {
3047 	    char *p;
3048 	    /* fprintf(stderr, "adding file '%s' at pos %d in cache, length %d\n",
3049 	       cfile, icache, filelen); */
3050 	    p = (char *) malloc(filelen);
3051 	    if (p) {
3052 		strcpy(names[icache], cfile);
3053 		ptr[icache] = p;
3054 		if (fseek(fp, 0, SEEK_SET) != 0) {
3055 		    fclose(fp);
3056 		    error(_("seek failed on %s"), cfile);
3057 		}
3058 		in = (int) fread(p, 1, filelen, fp);
3059 		fclose(fp);
3060 		if (filelen != in) error(_("read failed on %s"), cfile);
3061 		memcpy(RAW(val), p+offset, len);
3062 	    } else {
3063 		if (fseek(fp, offset, SEEK_SET) != 0) {
3064 		    fclose(fp);
3065 		    error(_("seek failed on %s"), cfile);
3066 		}
3067 		in = (int) fread(RAW(val), 1, len, fp);
3068 		fclose(fp);
3069 		if (len != in) error(_("read failed on %s"), cfile);
3070 	    }
3071 	    vmaxset(vmax);
3072 	    return val;
3073 	} else {
3074 	    if (fseek(fp, offset, SEEK_SET) != 0) {
3075 		fclose(fp);
3076 		error(_("seek failed on %s"), cfile);
3077 	    }
3078 	    in = (int) fread(RAW(val), 1, len, fp);
3079 	    fclose(fp);
3080 	    if (len != in) error(_("read failed on %s"), cfile);
3081 	    vmaxset(vmax);
3082 	    return val;
3083 	}
3084     }
3085 
3086     if ((fp = R_fopen(cfile, "rb")) == NULL)
3087 	error(_("cannot open file '%s': %s"), cfile, strerror(errno));
3088     if (fseek(fp, offset, SEEK_SET) != 0) {
3089 	fclose(fp);
3090 	error(_("seek failed on %s"), cfile);
3091     }
3092     in = (int) fread(RAW(val), 1, len, fp);
3093     fclose(fp);
3094     if (len != in) error(_("read failed on %s"), cfile);
3095     vmaxset(vmax);
3096     return val;
3097 }
3098 
3099 /* Gets the binding values of variables from a frame and returns them
3100    as a list.  If the force argument is true, promises are forced;
3101    otherwise they are not. */
3102 
R_getVarsFromFrame(SEXP vars,SEXP env,SEXP forcesxp)3103 static SEXP R_getVarsFromFrame(SEXP vars, SEXP env, SEXP forcesxp)
3104 {
3105     SEXP val, tmp, sym;
3106     Rboolean force;
3107     int i, len;
3108 
3109     if (TYPEOF(env) == NILSXP) {
3110 	error(_("use of NULL environment is defunct"));
3111 	env = R_BaseEnv;
3112     } else
3113     if (TYPEOF(env) != ENVSXP)
3114 	error(_("bad environment"));
3115     if (TYPEOF(vars) != STRSXP)
3116 	error(_("bad variable names"));
3117     force = asLogical(forcesxp);
3118 
3119     len = LENGTH(vars);
3120     PROTECT(val = allocVector(VECSXP, len));
3121     for (i = 0; i < len; i++) {
3122 	sym = installTrChar(STRING_ELT(vars, i));
3123 
3124 	tmp = findVarInFrame(env, sym);
3125 	if (tmp == R_UnboundValue) {
3126 /*		PrintValue(env);
3127 		PrintValue(R_GetTraceback(0)); */  /* DJM debugging */
3128 	    error(_("object '%s' not found"), EncodeChar(STRING_ELT(vars, i)));
3129 	    }
3130 	if (force && TYPEOF(tmp) == PROMSXP) {
3131 	    PROTECT(tmp);
3132 	    tmp = eval(tmp, R_GlobalEnv);
3133 	    ENSURE_NAMEDMAX(tmp);
3134 	    UNPROTECT(1);
3135 	}
3136 	else ENSURE_NAMED(tmp); /* should not really be needed - LT */
3137 	SET_VECTOR_ELT(val, i, tmp);
3138     }
3139     setAttrib(val, R_NamesSymbol, vars);
3140     UNPROTECT(1);
3141 
3142     return val;
3143 }
3144 
3145 /* from connections.c */
3146 SEXP R_compress1(SEXP in);
3147 SEXP R_decompress1(SEXP in, Rboolean *err);
3148 SEXP R_compress2(SEXP in);
3149 SEXP R_decompress2(SEXP in, Rboolean *err);
3150 SEXP R_compress3(SEXP in);
3151 SEXP R_decompress3(SEXP in, Rboolean *err);
3152 
3153 /* Serializes and, optionally, compresses a value and appends the
3154    result to a file.  Returns the key position/length key for
3155    retrieving the value */
3156 
3157 static SEXP
R_lazyLoadDBinsertValue(SEXP value,SEXP file,SEXP ascii,SEXP compsxp,SEXP hook)3158 R_lazyLoadDBinsertValue(SEXP value, SEXP file, SEXP ascii,
3159 			SEXP compsxp, SEXP hook)
3160 {
3161     PROTECT_INDEX vpi;
3162     int compress = asInteger(compsxp);
3163     SEXP key;
3164 
3165     value = R_serialize(value, R_NilValue, ascii, R_NilValue, hook);
3166     PROTECT_WITH_INDEX(value, &vpi);
3167     if (compress == 3)
3168 	REPROTECT(value = R_compress3(value), vpi);
3169     else if (compress == 2)
3170 	REPROTECT(value = R_compress2(value), vpi);
3171     else if (compress)
3172 	REPROTECT(value = R_compress1(value), vpi);
3173     key = appendRawToFile(file, value);
3174     UNPROTECT(1);
3175     return key;
3176 }
3177 
3178 /* Retrieves a sequence of bytes as specified by a position/length key
3179    from a file, optionally decompresses, and unserializes the bytes.
3180    If the result is a promise, then the promise is forced. */
3181 
3182 SEXP attribute_hidden
do_lazyLoadDBfetch(SEXP call,SEXP op,SEXP args,SEXP env)3183 do_lazyLoadDBfetch(SEXP call, SEXP op, SEXP args, SEXP env)
3184 {
3185     SEXP key, file, compsxp, hook;
3186     PROTECT_INDEX vpi;
3187     int compressed;
3188     Rboolean err = FALSE;
3189     SEXP val;
3190 
3191     checkArity(op, args);
3192     key = CAR(args); args = CDR(args);
3193     file = CAR(args); args = CDR(args);
3194     compsxp = CAR(args); args = CDR(args);
3195     hook = CAR(args);
3196     compressed = asInteger(compsxp);
3197 
3198     PROTECT_WITH_INDEX(val = readRawFromFile(file, key), &vpi);
3199     if (compressed == 3)
3200 	REPROTECT(val = R_decompress3(val, &err), vpi);
3201     else if (compressed == 2)
3202 	REPROTECT(val = R_decompress2(val, &err), vpi);
3203     else if (compressed)
3204 	REPROTECT(val = R_decompress1(val, &err), vpi);
3205     if (err) error("lazy-load database '%s' is corrupt",
3206 		   translateChar(STRING_ELT(file, 0)));
3207     val = R_unserialize(val, hook);
3208     if (TYPEOF(val) == PROMSXP) {
3209 	REPROTECT(val, vpi);
3210 	val = eval(val, R_GlobalEnv);
3211 	ENSURE_NAMEDMAX(val);
3212     }
3213     UNPROTECT(1);
3214     return val;
3215 }
3216 
3217 SEXP attribute_hidden
do_getVarsFromFrame(SEXP call,SEXP op,SEXP args,SEXP env)3218 do_getVarsFromFrame(SEXP call, SEXP op, SEXP args, SEXP env)
3219 {
3220     checkArity(op, args);
3221     return R_getVarsFromFrame(CAR(args), CADR(args), CADDR(args));
3222 }
3223 
3224 
3225 SEXP attribute_hidden
do_lazyLoadDBinsertValue(SEXP call,SEXP op,SEXP args,SEXP env)3226 do_lazyLoadDBinsertValue(SEXP call, SEXP op, SEXP args, SEXP env)
3227 {
3228     checkArity(op, args);
3229     SEXP value, file, ascii, compsxp, hook;
3230     value = CAR(args); args = CDR(args);
3231     file = CAR(args); args = CDR(args);
3232     ascii = CAR(args); args = CDR(args);
3233     compsxp = CAR(args); args = CDR(args);
3234     hook = CAR(args); args = CDR(args);
3235     return R_lazyLoadDBinsertValue(value, file, ascii, compsxp, hook);
3236 }
3237 
3238 SEXP attribute_hidden
do_serialize(SEXP call,SEXP op,SEXP args,SEXP env)3239 do_serialize(SEXP call, SEXP op, SEXP args, SEXP env)
3240 {
3241     checkArity(op, args);
3242     if (PRIMVAL(op) == 2) return R_unserialize(CAR(args), CADR(args));
3243 
3244     SEXP object, icon, type, ver, fun;
3245     object = CAR(args); args = CDR(args);
3246     icon = CAR(args); args = CDR(args);
3247     type = CAR(args); args = CDR(args);
3248     ver = CAR(args); args = CDR(args);
3249     fun = CAR(args);
3250 
3251     if(PRIMVAL(op) == 1)
3252 	return R_serializeb(object, icon, type, ver, fun);
3253     else
3254 	return R_serialize(object, icon, type, ver, fun);
3255 }
3256