1 /*
2 * R : A Computer Language for Statistical Data Analysis
3 * Copyright (C) 1995--2020 The R Core Team
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, a copy is available at
17 * https://www.R-project.org/Licenses/
18 */
19
20 /* <UTF8> byte-level access is only to compare with chars <= 0x7F */
21
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #define NEED_CONNECTION_PSTREAMS
27 #define R_USE_SIGNALS 1
28 #include <Defn.h>
29 #include <Rmath.h>
30 #include <Fileio.h>
31 #include <Rversion.h>
32 #include <R_ext/Riconv.h>
33 #include <R_ext/RS.h> /* for CallocCharBuf, Free */
34 #include <errno.h>
35 #include <ctype.h> /* for isspace */
36 #include <stdarg.h>
37 #ifdef Win32
38 #include <trioremap.h>
39 #endif
40
41 /* From time to time changes in R, such as the addition of a new SXP,
42 * may require changes in the save file format. Here are some
43 * guidelines on handling format changes:
44 *
45 * Starting with 1.4 there is a version number associated with save
46 * file formats. This version number should be incremented when
47 * the format is changed so older versions of R can recognize and
48 * reject the new format with a meaningful error message.
49 *
50 * R should remain able to write older workspace formats. An error
51 * should be signaled if the contents to be saved is not compatible
52 * with the requested format.
53 *
54 * To allow older versions of R to give useful error messages, the
55 * header now contains the version of R that wrote the workspace
56 * and the oldest version that can read the workspace. These
57 * versions are stored as an integer packed by the R_Version macro
58 * from Rversion.h. Some workspace formats may only exist
59 * temporarily in the development stage. If readers are not
60 * provided in a released version, then these should specify the
61 * oldest reader R version as -1.
62 */
63
64 /* It is now customary that the version (1, 2, 3) of the format is
65 * reflected also in magic numbers (such as RDX2, RDX3, ...), together with
66 * type (xdr/ascii/binary). Adding a new serialization format thus now
67 * also requires adding a new set of magic numbers, yet in principle this
68 * could be changed in the future. The code in this file does not need the
69 * magic numbers, it relies on version and type information in the
70 * serialization header (version 2 and 3).
71 */
72
73 /* ----- V e r s i o n -- T w o -- S a v e / R e s t o r e ----- */
74
75 /* Adapted from Chris Young and Ross Ihaka's Version One by Luke
76 Tierney. Copyright Assigned to the R Project.
77
78 The approach used here uses a single pass over the node tree to be
79 serialized. Sharing of reference objects is preserved, but sharing
80 among other objects is ignored. The first time a reference object
81 is encountered it is entered in a hash table; the value stored with
82 the object is the index in the sequence of reference objects (1 for
83 first reference object, 2 for second, etc.). When an object is
84 seen again, i.e. it is already in the hash table, a reference
85 marker along with the index is written out. The unserialize code
86 does not know in advance how many reference objects it will see, so
87 it starts with an initial array of some reasonable size and doubles
88 it each time space runs out. Reference objects are entered as they
89 are encountered.
90
91 This means the serialize and unserialize code needs to agree on
92 what is a reference object. Making a non-reference object into
93 a reference object requires a version change in the format. An
94 alternate design would be to precede each reference object with a
95 marker that says the next thing is a possibly shared object and
96 needs to be entered into the reference table.
97
98 Adding new SXP types is easy, whether they are reference objects or
99 not. The unserialize code will signal an error if it sees a type
100 value it does not know. It is of course better to increment the
101 serialization format number when a new SXP is added, but if that
102 SXP is unlikely to be saved by users then it may be simpler to keep
103 the version number and let the error handling code deal with it.
104
105 The output format for dotted pairs writes the ATTRIB value first
106 rather than last. This allows CDR's to be processed by iterative
107 tail calls to avoid recursion stack overflows when processing long
108 lists. The writing code does take advantage of this, but the
109 reading code does not. It hasn't been a big issue so far--the only
110 case where it has come up is in saving a large unhashed environment
111 where saving succeeds but loading fails because the PROTECT stack
112 overflows. With the ability to create hashed environments at the
113 user level this is likely to be even less of an issue now. But if
114 we do need to deal with it we can do so without a change in the
115 serialization format--just rewrite ReadItem to pass the place to
116 store the CDR it reads. (It's a bit of a pain to do, that is why it
117 is being deferred until it is clearly needed.)
118
119 CHARSXPs are now handled in a way that preserves both embedded null
120 characters and NA_STRING values.
121
122 The XDR save format now only uses the in-memory xdr facility for
123 converting integers and doubles to a portable format.
124
125 The output format packs the type flag and other flags into a single
126 integer. This produces more compact output for code; it has little
127 effect on data.
128
129 Environments recognized as package or namespace environments are
130 not saved directly. Instead, a STRSXP is saved that is then used to
131 attempt to find the package/namespace when unserialized. The
132 exact mechanism for choosing the name and finding the package/name
133 space from the name still has to be developed, but the
134 serialization format should be able to accommodate any reasonable
135 mechanism.
136
137 The mechanism assumes that user code supplies one routine for
138 handling single characters and one for handling an array of bytes.
139 Higher level interfaces that serialize to/from a FILE * pointer or
140 an Rconnection pointer are provided in this file; others can be
141 built easily.
142
143 A mechanism is provided to allow special handling of non-system
144 reference objects (all weak references and external pointers, and
145 all environments other than package environments, namespace
146 environments, and the global environment). The hook function
147 consists of a function pointer and a data value. The serialization
148 function pointer is called with the reference object and the data
149 value as arguments. It should return R_NilValue for standard
150 handling and an STRSXP for special handling. If an STRSXP is
151 returned, then a special handing mark is written followed by the
152 strings in the STRSXP (attributes are ignored). On unserializing,
153 any specially marked entry causes a call to the hook function with
154 the reconstructed STRSXP and data value as arguments. This should
155 return the value to use for the reference object. A reasonable
156 convention on how to use this mechanism is neded, but again the
157 format should be compatible with any reasonable convention.
158
159 Eventually it may be useful to use these hooks to allow objects
160 with a class to have a class-specific serialization mechanism. The
161 serialization format should support this. It is trickier than in
162 Java and other reference based languages where creation and
163 initialization can be separated--we don't really have that option
164 at the R level. */
165
166 /* ----- V e r s i o n -- T h r e e -- S a v e / R e s t o r e ----- */
167
168 /* This format extends version 2 format by adding an identifier of the
169 current native encoding to the serialization header. On deserialization,
170 strings without an encoding flag will be converted to the current native
171 encoding, if possible, or to (flagged) UTF-8. The conversion may fail
172 when the original encoding is not supported by iconv (unlikely) or when
173 the string is not valid in its declared encoding, which unfortunately is
174 not uncommon. The conversion code now deliberately does not check
175 whether strings are valid when no conversion is needed, but such check
176 could be added in the future without changing the format.
177
178 Version 3 also adds support for custom ALTREP serialization. Under
179 version 2 ALTREP objects are serialied like non-ALTREP ones. */
180
181 /*
182 * Forward Declarations
183 */
184
185 static void OutStringVec(R_outpstream_t stream, SEXP s, SEXP ref_table);
186 static void WriteItem (SEXP s, SEXP ref_table, R_outpstream_t stream);
187 static SEXP ReadItem(SEXP ref_table, R_inpstream_t stream);
188 static void WriteBC(SEXP s, SEXP ref_table, R_outpstream_t stream);
189 static SEXP ReadBC(SEXP ref_table, R_inpstream_t stream);
190
191 /*
192 * Constants
193 */
194
195 /* The default version used when a stream Init function is called with
196 version = 0 */
197
defaultSerializeVersion()198 static int defaultSerializeVersion()
199 {
200 static int dflt = -1;
201
202 if (dflt < 0) {
203 char *valstr = getenv("R_DEFAULT_SERIALIZE_VERSION");
204 int val = -1;
205 if (valstr != NULL)
206 val = atoi(valstr);
207 if (val == 2 || val == 3)
208 dflt = val;
209 else
210 dflt = 3; /* the default */
211 }
212 return dflt;
213 }
214
215 /*
216 * Utility Functions
217 *
218 * An assert function which doesn't crash the program.
219 * Something like this might be useful in an R header file
220 */
221
222 #ifdef NDEBUG
223 #define R_assert(e) ((void) 0)
224 #else
225 /* The line below requires an ANSI C preprocessor (stringify operator) */
226 #define R_assert(e) ((e) ? (void) 0 : error("assertion '%s' failed: file '%s', line %d\n", #e, __FILE__, __LINE__))
227 #endif /* NDEBUG */
228
229 /* Rsnprintf: like snprintf, but guaranteed to null-terminate. See
230 errors.c::Rvsnprintf_mbcs for a multi-byte safe version. */
Rsnprintf(char * buf,size_t size,const char * format,...)231 static int Rsnprintf(char *buf, size_t size, const char *format, ...)
232 {
233 int val;
234 va_list(ap);
235 va_start(ap, format);
236 /* On Windows this no longer uses the non-C99 MSVCRT.dll version */
237 val = vsnprintf(buf, size, format, ap);
238 if(size) buf[size-1] = '\0';
239 va_end(ap);
240 return val;
241 }
242
243
244 /*
245 * Basic Output Routines
246 */
247
OutInteger(R_outpstream_t stream,int i)248 static void OutInteger(R_outpstream_t stream, int i)
249 {
250 char buf[128];
251 switch (stream->type) {
252 case R_pstream_ascii_format:
253 case R_pstream_asciihex_format:
254 if (i == NA_INTEGER)
255 Rsnprintf(buf, sizeof(buf), "NA\n");
256 else
257 Rsnprintf(buf, sizeof(buf), "%d\n", i);
258 stream->OutBytes(stream, buf, (int)strlen(buf));
259 break;
260 case R_pstream_binary_format:
261 stream->OutBytes(stream, &i, sizeof(int));
262 break;
263 case R_pstream_xdr_format:
264 R_XDREncodeInteger(i, buf);
265 stream->OutBytes(stream, buf, R_XDR_INTEGER_SIZE);
266 break;
267 default:
268 error(_("unknown or inappropriate output format"));
269 }
270 }
271
OutReal(R_outpstream_t stream,double d)272 static void OutReal(R_outpstream_t stream, double d)
273 {
274 char buf[128];
275 switch (stream->type) {
276 case R_pstream_ascii_format:
277 if (! R_FINITE(d)) {
278 if (ISNA(d))
279 Rsnprintf(buf, sizeof(buf), "NA\n");
280 else if (ISNAN(d))
281 Rsnprintf(buf, sizeof(buf), "NaN\n");
282 else if (d < 0)
283 Rsnprintf(buf, sizeof(buf), "-Inf\n");
284 else
285 Rsnprintf(buf, sizeof(buf), "Inf\n");
286 }
287 else
288 /* 16: full precision; 17 gives 999, 000 &c */
289 Rsnprintf(buf, sizeof(buf), "%.16g\n", d);
290 stream->OutBytes(stream, buf, (int)strlen(buf));
291 break;
292 case R_pstream_asciihex_format:
293 if (! R_FINITE(d)) {
294 if (ISNA(d))
295 Rsnprintf(buf, sizeof(buf), "NA\n");
296 else if (ISNAN(d))
297 Rsnprintf(buf, sizeof(buf), "NaN\n");
298 else if (d < 0)
299 Rsnprintf(buf, sizeof(buf), "-Inf\n");
300 else
301 Rsnprintf(buf, sizeof(buf), "Inf\n");
302 }
303 else
304 Rsnprintf(buf, sizeof(buf), "%a\n", d);
305 stream->OutBytes(stream, buf, (int)strlen(buf));
306 break;
307 case R_pstream_binary_format:
308 stream->OutBytes(stream, &d, sizeof(double));
309 break;
310 case R_pstream_xdr_format:
311 R_XDREncodeDouble(d, buf);
312 stream->OutBytes(stream, buf, R_XDR_DOUBLE_SIZE);
313 break;
314 default:
315 error(_("unknown or inappropriate output format"));
316 }
317 }
318
OutComplex(R_outpstream_t stream,Rcomplex c)319 static void OutComplex(R_outpstream_t stream, Rcomplex c)
320 {
321 OutReal(stream, c.r);
322 OutReal(stream, c.i);
323 }
324
OutByte(R_outpstream_t stream,Rbyte i)325 static void OutByte(R_outpstream_t stream, Rbyte i)
326 {
327 char buf[128];
328 switch (stream->type) {
329 case R_pstream_ascii_format:
330 case R_pstream_asciihex_format:
331 Rsnprintf(buf, sizeof(buf), "%02x\n", i);
332 stream->OutBytes(stream, buf, (int)strlen(buf));
333 break;
334 case R_pstream_binary_format:
335 case R_pstream_xdr_format:
336 stream->OutBytes(stream, &i, 1);
337 break;
338 default:
339 error(_("unknown or inappropriate output format"));
340 }
341 }
342
343 /* This assumes CHARSXPs remain limited to 2^31-1 bytes */
OutString(R_outpstream_t stream,const char * s,int length)344 static void OutString(R_outpstream_t stream, const char *s, int length)
345 {
346 if (stream->type == R_pstream_ascii_format ||
347 stream->type == R_pstream_asciihex_format) {
348 int i;
349 char buf[128];
350 for (i = 0; i < length; i++) {
351 switch(s[i]) {
352 case '\n': sprintf(buf, "\\n"); break;
353 case '\t': sprintf(buf, "\\t"); break;
354 case '\v': sprintf(buf, "\\v"); break;
355 case '\b': sprintf(buf, "\\b"); break;
356 case '\r': sprintf(buf, "\\r"); break;
357 case '\f': sprintf(buf, "\\f"); break;
358 case '\a': sprintf(buf, "\\a"); break;
359 case '\\': sprintf(buf, "\\\\"); break;
360 case '\?': sprintf(buf, "\\?"); break;
361 case '\'': sprintf(buf, "\\'"); break;
362 case '\"': sprintf(buf, "\\\""); break;
363 default :
364 /* cannot print char in octal mode -> cast to unsigned
365 char first */
366 /* actually, since s is signed char and '\?' == 127
367 is handled above, s[i] > 126 can't happen, but
368 I'm superstitious... -pd */
369 if (s[i] <= 32 || s[i] > 126)
370 sprintf(buf, "\\%03o", (unsigned char) s[i]);
371 else
372 sprintf(buf, "%c", s[i]);
373 }
374 stream->OutBytes(stream, buf, (int)strlen(buf));
375 }
376 stream->OutChar(stream, '\n');
377 }
378 else
379 stream->OutBytes(stream, (void *)s, length); /* FIXME: is this case right? */
380 }
381
382
383 /*
384 * Basic Input Routines
385 */
386
InWord(R_inpstream_t stream,char * buf,int size)387 static void InWord(R_inpstream_t stream, char * buf, int size)
388 {
389 int c, i;
390 i = 0;
391 do {
392 c = stream->InChar(stream);
393 if (c == EOF)
394 error(_("read error"));
395 } while (isspace(c));
396 while (! isspace(c) && i < size) {
397 buf[i++] = (char) c;
398 c = stream->InChar(stream);
399 }
400 if (i == size)
401 error(_("read error"));
402 buf[i] = 0;
403 }
404
InInteger(R_inpstream_t stream)405 static int InInteger(R_inpstream_t stream)
406 {
407 char word[128];
408 char buf[128];
409 int i;
410
411 switch (stream->type) {
412 case R_pstream_ascii_format:
413 InWord(stream, word, sizeof(word));
414 if(sscanf(word, "%127s", buf) != 1) error(_("read error"));
415 if (strcmp(buf, "NA") == 0)
416 return NA_INTEGER;
417 else
418 if(sscanf(buf, "%d", &i) != 1) error(_("read error"));
419 return i;
420 case R_pstream_binary_format:
421 stream->InBytes(stream, &i, sizeof(int));
422 return i;
423 case R_pstream_xdr_format:
424 stream->InBytes(stream, buf, R_XDR_INTEGER_SIZE);
425 return R_XDRDecodeInteger(buf);
426 default:
427 return NA_INTEGER;
428 }
429 }
430
431 #ifdef Win32
432 extern int trio_sscanf(const char *buffer, const char *format, ...);
433
434 #endif
435
InReal(R_inpstream_t stream)436 static double InReal(R_inpstream_t stream)
437 {
438 char word[128];
439 char buf[128];
440 double d;
441
442 switch (stream->type) {
443 case R_pstream_ascii_format:
444 InWord(stream, word, sizeof(word));
445 if(sscanf(word, "%127s", buf) != 1) error(_("read error"));
446 if (strcmp(buf, "NA") == 0)
447 return NA_REAL;
448 else if (strcmp(buf, "NaN") == 0)
449 return R_NaN;
450 else if (strcmp(buf, "Inf") == 0)
451 return R_PosInf;
452 else if (strcmp(buf, "-Inf") == 0)
453 return R_NegInf;
454 else
455 if(
456 #ifdef Win32
457 trio_sscanf(buf, "%lg", &d)
458 #else
459 sscanf(buf, "%lg", &d)
460 #endif
461 != 1) error(_("read error"));
462 return d;
463 case R_pstream_binary_format:
464 stream->InBytes(stream, &d, sizeof(double));
465 return d;
466 case R_pstream_xdr_format:
467 stream->InBytes(stream, buf, R_XDR_DOUBLE_SIZE);
468 return R_XDRDecodeDouble(buf);
469 default:
470 return NA_REAL;
471 }
472 }
473
InComplex(R_inpstream_t stream)474 static Rcomplex InComplex(R_inpstream_t stream)
475 {
476 Rcomplex c;
477 c.r = InReal(stream);
478 c.i = InReal(stream);
479 return c;
480 }
481
482 /* These utilities for reading characters with an unget option are
483 defined so the code in InString can match the code in
484 saveload.c:InStringAscii--that way it is easier to match changes in
485 one to the other. */
486 typedef struct R_instring_stream_st {
487 int last;
488 R_inpstream_t stream;
489 } *R_instring_stream_t;
490
InitInStringStream(R_instring_stream_t s,R_inpstream_t stream)491 static void InitInStringStream(R_instring_stream_t s, R_inpstream_t stream)
492 {
493 s->last = EOF;
494 s->stream = stream;
495 }
496
GetChar(R_instring_stream_t s)497 static int GetChar(R_instring_stream_t s)
498 {
499 int c;
500 if (s->last != EOF) {
501 c = s->last;
502 s->last = EOF;
503 }
504 else c = s->stream->InChar(s->stream);
505 return c;
506 }
507
UngetChar(R_instring_stream_t s,int c)508 static void UngetChar(R_instring_stream_t s, int c)
509 {
510 s->last = c;
511 }
512
513
InString(R_inpstream_t stream,char * buf,int length)514 static void InString(R_inpstream_t stream, char *buf, int length)
515 {
516 if (stream->type == R_pstream_ascii_format) {
517 if (length > 0) {
518 int c, d, i, j;
519 struct R_instring_stream_st iss;
520
521 InitInStringStream(&iss, stream);
522 while(isspace(c = GetChar(&iss)))
523 ;
524 UngetChar(&iss, c);
525 for (i = 0; i < length; i++) {
526 if ((c = GetChar(&iss)) == '\\') {
527 switch(c = GetChar(&iss)) {
528 case 'n' : buf[i] = '\n'; break;
529 case 't' : buf[i] = '\t'; break;
530 case 'v' : buf[i] = '\v'; break;
531 case 'b' : buf[i] = '\b'; break;
532 case 'r' : buf[i] = '\r'; break;
533 case 'f' : buf[i] = '\f'; break;
534 case 'a' : buf[i] = '\a'; break;
535 case '\\': buf[i] = '\\'; break;
536 case '?' : buf[i] = '\?'; break;
537 case '\'': buf[i] = '\''; break;
538 case '\"': buf[i] = '\"'; break; /* closing " for emacs */
539 case '0': case '1': case '2': case '3':
540 case '4': case '5': case '6': case '7':
541 d = 0; j = 0;
542 while('0' <= c && c < '8' && j < 3) {
543 d = d * 8 + (c - '0');
544 c = GetChar(&iss);
545 j++;
546 }
547 buf[i] = (char) d;
548 UngetChar(&iss, c);
549 break;
550 default : buf[i] = (char) c;
551 }
552 }
553 else buf[i] = (char) c;
554 }
555 }
556 }
557 else /* this limits the string length: used for CHARSXPs */
558 stream->InBytes(stream, buf, length);
559 }
560
561
562 /*
563 * Format Header Reading and Writing
564 *
565 * The header starts with one of three characters, A for ascii, B for
566 * binary, or X for xdr.
567 */
568
OutFormat(R_outpstream_t stream)569 static void OutFormat(R_outpstream_t stream)
570 {
571 /* if (stream->type == R_pstream_binary_format) {
572 warning(_("binary format is deprecated; using xdr instead"));
573 stream->type = R_pstream_xdr_format;
574 } */
575 switch (stream->type) {
576 case R_pstream_ascii_format:
577 case R_pstream_asciihex_format:
578 stream->OutBytes(stream, "A\n", 2); break;
579 /* on deserialization, asciihex_format is treated exactly the same
580 way as ascii_format; the distinction is handled inside scanf %lg */
581 case R_pstream_binary_format: stream->OutBytes(stream, "B\n", 2); break;
582 case R_pstream_xdr_format: stream->OutBytes(stream, "X\n", 2); break;
583 case R_pstream_any_format:
584 error(_("must specify ascii, binary, or xdr format"));
585 default: error(_("unknown output format"));
586 }
587 }
588
InFormat(R_inpstream_t stream)589 static void InFormat(R_inpstream_t stream)
590 {
591 char buf[2];
592 R_pstream_format_t type;
593 stream->InBytes(stream, buf, 2);
594 switch (buf[0]) {
595 case 'A': type = R_pstream_ascii_format; break; /* also for asciihex */
596 case 'B': type = R_pstream_binary_format; break;
597 case 'X': type = R_pstream_xdr_format; break;
598 case '\n':
599 /* GROSS HACK: ASCII unserialize may leave a trailing newline
600 in the stream. If the stream contains a second
601 serialization, then a second unserialize will fail if such
602 a newline is present. The right fix is to make sure
603 unserialize consumes exactly what serialize produces. But
604 this seems hard because of the current use of whitespace
605 skipping in unserialize. So a temporary hack to cure the
606 symptom is to deal with a possible leading newline. I
607 don't think more than one is possible, but I'm not sure.
608 LT */
609 if (buf[1] == 'A') {
610 type = R_pstream_ascii_format;
611 stream->InBytes(stream, buf, 1);
612 break;
613 }
614 default:
615 type = R_pstream_any_format; /* keep compiler happy */
616 error(_("unknown input format"));
617 }
618 if (stream->type == R_pstream_any_format)
619 stream->type = type;
620 else if (type != stream->type)
621 error(_("input format does not match specified format"));
622 }
623
624
625 /*
626 * Hash Table Functions
627 *
628 * Hashing functions for hashing reference objects during writing.
629 * Objects are entered, and the order in which they are encountered is
630 * recorded. HashGet returns this number, a positive integer, if the
631 * object was seen before, and zero if not. A fixed hash table size
632 * is used; this is not ideal but seems adequate for now. The hash
633 * table representation consists of a (R_NilValue . vector) pair. The
634 * hash buckets are in the vector. This indirect representation
635 * should allow resizing the table at some point.
636 */
637
638 #define HASHSIZE 1099
639
640 #define PTRHASH(obj) (((R_size_t) (obj)) >> 2)
641
642 #define HASH_TABLE_COUNT(ht) ((int) TRUELENGTH(CDR(ht)))
643 #define SET_HASH_TABLE_COUNT(ht, val) SET_TRUELENGTH(CDR(ht), ((int) (val)))
644
645 #define HASH_TABLE_SIZE(ht) LENGTH(CDR(ht))
646
647 #define HASH_BUCKET(ht, pos) VECTOR_ELT(CDR(ht), pos)
648 #define SET_HASH_BUCKET(ht, pos, val) SET_VECTOR_ELT(CDR(ht), pos, val)
649
MakeHashTable(void)650 static SEXP MakeHashTable(void)
651 {
652 SEXP val = CONS(R_NilValue, allocVector(VECSXP, HASHSIZE));
653 SET_HASH_TABLE_COUNT(val, 0);
654 return val;
655 }
656
HashAdd(SEXP obj,SEXP ht)657 static void HashAdd(SEXP obj, SEXP ht)
658 {
659 R_size_t pos = PTRHASH(obj) % HASH_TABLE_SIZE(ht);
660 int count = HASH_TABLE_COUNT(ht) + 1;
661 SEXP val = ScalarInteger(count);
662 SEXP cell = CONS(val, HASH_BUCKET(ht, pos));
663
664 SET_HASH_TABLE_COUNT(ht, count);
665 SET_HASH_BUCKET(ht, pos, cell);
666 SET_TAG(cell, obj);
667 }
668
HashGet(SEXP item,SEXP ht)669 static int HashGet(SEXP item, SEXP ht)
670 {
671 R_size_t pos = PTRHASH(item) % HASH_TABLE_SIZE(ht);
672 SEXP cell;
673 for (cell = HASH_BUCKET(ht, pos); cell != R_NilValue; cell = CDR(cell))
674 if (item == TAG(cell))
675 return INTEGER(CAR(cell))[0];
676 return 0;
677 }
678
679
680 /*
681 * Administrative SXP values
682 *
683 * These macros define SXP "type" for specifying special object, such
684 * as R_NilValue, or control information, like REFSXP or NAMESPACESXP.
685 * The range of SXP types is limited to 5 bit by the current sxpinfo
686 * layout, but just in case these values are placed at the top of the
687 * 8 bit range.
688 */
689
690 #define REFSXP 255
691 #define NILVALUE_SXP 254
692 #define GLOBALENV_SXP 253
693 #define UNBOUNDVALUE_SXP 252
694 #define MISSINGARG_SXP 251
695 #define BASENAMESPACE_SXP 250
696 #define NAMESPACESXP 249
697 #define PACKAGESXP 248
698 #define PERSISTSXP 247
699 /* the following are speculative--we may or may not need them soon */
700 #define CLASSREFSXP 246
701 #define GENERICREFSXP 245
702 #define BCREPDEF 244
703 #define BCREPREF 243
704 #define EMPTYENV_SXP 242
705 #define BASEENV_SXP 241
706
707 /* The following are needed to preserve attribute information on
708 expressions in the constant pool of byte code objects. This is
709 mainly for preserving source references attributes. The original
710 implementation of the sharing-preserving writing and reading of byte
711 code objects did not account for the need to preserve attributes,
712 so there is now a work-around using these SXP types to flag when
713 the ATTRIB field has been written out. Object bits and S4 bits are
714 still not preserved. In the long run it might be better to change
715 to a scheme in which all sharing is preserved and byte code objects
716 don't need to be handled as a special case. LT */
717 #define ATTRLANGSXP 240
718 #define ATTRLISTSXP 239
719
720 #define ALTREP_SXP 238
721
722 /*
723 * Type/Flag Packing and Unpacking
724 *
725 * To reduce space consumption for serializing code (lots of list
726 * structure) the type (at most 8 bits), several single bit flags,
727 * and the sxpinfo gp field (LEVELS, 16 bits) are packed into a single
728 * integer. The integer is signed, so this shouldn't be pushed too
729 * far. It assumes at least 28 bits, but that should be no problem.
730 */
731
732 #define IS_OBJECT_BIT_MASK (1 << 8)
733 #define HAS_ATTR_BIT_MASK (1 << 9)
734 #define HAS_TAG_BIT_MASK (1 << 10)
735 #define ENCODE_LEVELS(v) ((v) << 12)
736 #define DECODE_LEVELS(v) ((v) >> 12)
737 #define DECODE_TYPE(v) ((v) & 255)
738
PackFlags(int type,int levs,int isobj,int hasattr,int hastag)739 static int PackFlags(int type, int levs, int isobj, int hasattr, int hastag)
740 {
741 /* We don't write out bit 5 as from R 2.8.0.
742 It is used to indicate if an object is in CHARSXP cache
743 - not that it matters to this version of R, but it saves
744 checking all previous versions.
745
746 Also make sure the HASHASH bit is not written out.
747 */
748 int val;
749 if (type == CHARSXP) levs &= (~(CACHED_MASK | HASHASH_MASK));
750 val = type | ENCODE_LEVELS(levs);
751 if (isobj) val |= IS_OBJECT_BIT_MASK;
752 if (hasattr) val |= HAS_ATTR_BIT_MASK;
753 if (hastag) val |= HAS_TAG_BIT_MASK;
754 return val;
755 }
756
UnpackFlags(int flags,SEXPTYPE * ptype,int * plevs,int * pisobj,int * phasattr,int * phastag)757 static void UnpackFlags(int flags, SEXPTYPE *ptype, int *plevs,
758 int *pisobj, int *phasattr, int *phastag)
759 {
760 *ptype = DECODE_TYPE(flags);
761 *plevs = DECODE_LEVELS(flags);
762 *pisobj = flags & IS_OBJECT_BIT_MASK ? TRUE : FALSE;
763 *phasattr = flags & HAS_ATTR_BIT_MASK ? TRUE : FALSE;
764 *phastag = flags & HAS_TAG_BIT_MASK ? TRUE : FALSE;
765 }
766
767
768 /*
769 * Reference/Index Packing and Unpacking
770 *
771 * Code will contain many references to symbols. As long as there are
772 * not too many references, the index ant the REFSXP flag indicating a
773 * reference can be packed in a single integer. Since the index is
774 * 1-based, a 0 is used to indicate an index that doesn't fit and
775 * therefore follows.
776 */
777
778 #define PACK_REF_INDEX(i) (((i) << 8) | REFSXP)
779 #define UNPACK_REF_INDEX(i) ((i) >> 8)
780 #define MAX_PACKED_INDEX (INT_MAX >> 8)
781
OutRefIndex(R_outpstream_t stream,int i)782 static void OutRefIndex(R_outpstream_t stream, int i)
783 {
784 if (i > MAX_PACKED_INDEX) {
785 OutInteger(stream, REFSXP);
786 OutInteger(stream, i);
787 }
788 else OutInteger(stream, PACK_REF_INDEX(i));
789 }
790
InRefIndex(R_inpstream_t stream,int flags)791 static int InRefIndex(R_inpstream_t stream, int flags)
792 {
793 int i = UNPACK_REF_INDEX(flags);
794 if (i == 0)
795 return InInteger(stream);
796 else
797 return i;
798 }
799
800
801 /*
802 * Persistent Name Hooks
803 *
804 * These routines call the appropriate hook functions for allowing
805 * customized handling of reference objects.
806 */
807
GetPersistentName(R_outpstream_t stream,SEXP s)808 static SEXP GetPersistentName(R_outpstream_t stream, SEXP s)
809 {
810 if (stream->OutPersistHookFunc != NULL) {
811 switch (TYPEOF(s)) {
812 case WEAKREFSXP:
813 case EXTPTRSXP: break;
814 case ENVSXP:
815 if (s == R_GlobalEnv ||
816 s == R_BaseEnv ||
817 s == R_EmptyEnv ||
818 R_IsNamespaceEnv(s) ||
819 R_IsPackageEnv(s))
820 return R_NilValue;
821 else
822 break;
823 default: return R_NilValue;
824 }
825 return stream->OutPersistHookFunc(s, stream->OutPersistHookData);
826 }
827 else
828 return R_NilValue;
829 }
830
PersistentRestore(R_inpstream_t stream,SEXP s)831 static SEXP PersistentRestore(R_inpstream_t stream, SEXP s)
832 {
833 if (stream->InPersistHookFunc == NULL)
834 error(_("no restore method available"));
835 return stream->InPersistHookFunc(s, stream->InPersistHookData);
836 }
837
838
839 /*
840 * Serialization Code
841 */
842
SaveSpecialHook(SEXP item)843 static int SaveSpecialHook(SEXP item)
844 {
845 if (item == R_NilValue) return NILVALUE_SXP;
846 if (item == R_EmptyEnv) return EMPTYENV_SXP;
847 if (item == R_BaseEnv) return BASEENV_SXP;
848 if (item == R_GlobalEnv) return GLOBALENV_SXP;
849 if (item == R_UnboundValue) return UNBOUNDVALUE_SXP;
850 if (item == R_MissingArg) return MISSINGARG_SXP;
851 if (item == R_BaseNamespace) return BASENAMESPACE_SXP;
852 return 0;
853 }
854
WriteLENGTH(R_outpstream_t stream,SEXP s)855 static void WriteLENGTH(R_outpstream_t stream, SEXP s)
856 {
857 #ifdef LONG_VECTOR_SUPPORT
858 if (IS_LONG_VEC(s)) {
859 OutInteger(stream, -1);
860 R_xlen_t len = XLENGTH(s);
861 OutInteger(stream, (int)(len / 4294967296L));
862 OutInteger(stream, (int)(len % 4294967296L));
863 } else OutInteger(stream, LENGTH(s));
864 #else
865 OutInteger(stream, LENGTH(s));
866 #endif
867 }
868
OutStringVec(R_outpstream_t stream,SEXP s,SEXP ref_table)869 static void OutStringVec(R_outpstream_t stream, SEXP s, SEXP ref_table)
870 {
871 R_assert(TYPEOF(s) == STRSXP);
872
873 #ifdef WARN_ABOUT_NAMES_IN_PERSISTENT_STRINGS
874 SEXP names = getAttrib(s, R_NamesSymbol);
875 if (names != R_NilValue)
876 warning(_("names in persistent strings are currently ignored"));
877 #endif
878
879 R_xlen_t len = XLENGTH(s);
880 OutInteger(stream, 0); /* place holder to allow names if we want to */
881 WriteLENGTH(stream, s);
882 for (R_xlen_t i = 0; i < len; i++)
883 WriteItem(STRING_ELT(s, i), ref_table, stream);
884 }
885
886 #include <rpc/types.h>
887 #include <rpc/xdr.h>
888
889 #define CHUNK_SIZE 8096
890
891 #define min2(a, b) ((a) < (b)) ? (a) : (b)
892
893
894 static R_INLINE void
OutIntegerVec(R_outpstream_t stream,SEXP s,R_xlen_t length)895 OutIntegerVec(R_outpstream_t stream, SEXP s, R_xlen_t length)
896 {
897 switch (stream->type) {
898 case R_pstream_xdr_format:
899 {
900 static char buf[CHUNK_SIZE * sizeof(int)];
901 R_xlen_t done, this;
902 XDR xdrs;
903 for (done = 0; done < length; done += this) {
904 this = min2(CHUNK_SIZE, length - done);
905 xdrmem_create(&xdrs, buf, (int)(this * sizeof(int)), XDR_ENCODE);
906 for(int cnt = 0; cnt < this; cnt++)
907 if(!xdr_int(&xdrs, INTEGER(s) + done + cnt))
908 error(_("XDR write failed"));
909 xdr_destroy(&xdrs);
910 stream->OutBytes(stream, buf, (int)(sizeof(int) * this));
911 }
912 break;
913 }
914 case R_pstream_binary_format:
915 {
916 /* write in chunks to avoid overflowing ints */
917 R_xlen_t done, this;
918 for (done = 0; done < length; done += this) {
919 this = min2(CHUNK_SIZE, length - done);
920 stream->OutBytes(stream, INTEGER(s) + done,
921 (int)(sizeof(int) * this));
922 }
923 break;
924 }
925 default:
926 for (R_xlen_t cnt = 0; cnt < length; cnt++)
927 OutInteger(stream, INTEGER(s)[cnt]);
928 }
929 }
930
931 static R_INLINE void
OutRealVec(R_outpstream_t stream,SEXP s,R_xlen_t length)932 OutRealVec(R_outpstream_t stream, SEXP s, R_xlen_t length)
933 {
934 switch (stream->type) {
935 case R_pstream_xdr_format:
936 {
937 static char buf[CHUNK_SIZE * sizeof(double)];
938 R_xlen_t done, this;
939 XDR xdrs;
940 for (done = 0; done < length; done += this) {
941 this = min2(CHUNK_SIZE, length - done);
942 xdrmem_create(&xdrs, buf, (int)(this * sizeof(double)), XDR_ENCODE);
943 for(int cnt = 0; cnt < this; cnt++)
944 if(!xdr_double(&xdrs, REAL(s) + done + cnt))
945 error(_("XDR write failed"));
946 xdr_destroy(&xdrs);
947 stream->OutBytes(stream, buf, (int)(sizeof(double) * this));
948 }
949 break;
950 }
951 case R_pstream_binary_format:
952 {
953 R_xlen_t done, this;
954 for (done = 0; done < length; done += this) {
955 this = min2(CHUNK_SIZE, length - done);
956 stream->OutBytes(stream, REAL(s) + done,
957 (int)(sizeof(double) * this));
958 }
959 break;
960 }
961 default:
962 for (R_xlen_t cnt = 0; cnt < length; cnt++)
963 OutReal(stream, REAL(s)[cnt]);
964 }
965 }
966
967 static R_INLINE void
OutComplexVec(R_outpstream_t stream,SEXP s,R_xlen_t length)968 OutComplexVec(R_outpstream_t stream, SEXP s, R_xlen_t length)
969 {
970 switch (stream->type) {
971 case R_pstream_xdr_format:
972 {
973 static char buf[CHUNK_SIZE * sizeof(Rcomplex)];
974 R_xlen_t done, this;
975 XDR xdrs;
976 Rcomplex *c = COMPLEX(s);
977 for (done = 0; done < length; done += this) {
978 this = min2(CHUNK_SIZE, length - done);
979 xdrmem_create(&xdrs, buf, (int)(this * sizeof(Rcomplex)), XDR_ENCODE);
980 for(int cnt = 0; cnt < this; cnt++) {
981 if(!xdr_double(&xdrs, &(c[done+cnt].r)) ||
982 !xdr_double(&xdrs, &(c[done+cnt].i)))
983 error(_("XDR write failed"));
984 }
985 stream->OutBytes(stream, buf, (int)(sizeof(Rcomplex) * this));
986 xdr_destroy(&xdrs);
987 }
988 break;
989 }
990 case R_pstream_binary_format:
991 {
992 R_xlen_t done, this;
993 for (done = 0; done < length; done += this) {
994 this = min2(CHUNK_SIZE, length - done);
995 stream->OutBytes(stream, COMPLEX(s) + done,
996 (int)(sizeof(Rcomplex) * this));
997 }
998 break;
999 }
1000 default:
1001 for (R_xlen_t cnt = 0; cnt < length; cnt++)
1002 OutComplex(stream, COMPLEX(s)[cnt]);
1003 }
1004 }
1005
WriteItem(SEXP s,SEXP ref_table,R_outpstream_t stream)1006 static void WriteItem (SEXP s, SEXP ref_table, R_outpstream_t stream)
1007 {
1008 int i;
1009 SEXP t;
1010
1011 if (R_compile_pkgs && TYPEOF(s) == CLOSXP && TYPEOF(BODY(s)) != BCODESXP &&
1012 !R_disable_bytecode &&
1013 (!IS_S4_OBJECT(s) || (!inherits(s, "refMethodDef") &&
1014 !inherits(s, "defaultBindingFunction")))) {
1015
1016 /* Do not compile reference class methods in their generators, because
1017 the byte-code is dropped as soon as the method is installed into a
1018 new environment. This is a performance optimization but it also
1019 prevents byte-compiler warnings about no visible binding for super
1020 assignment to a class field.
1021
1022 Do not compile default binding functions, because the byte-code is
1023 dropped as fields are set in constructors (just an optimization).
1024 */
1025
1026 SEXP new_s;
1027 R_compile_pkgs = FALSE;
1028 PROTECT(new_s = R_cmpfun1(s));
1029 WriteItem (new_s, ref_table, stream);
1030 UNPROTECT(1);
1031 R_compile_pkgs = TRUE;
1032 return;
1033 }
1034
1035 tailcall:
1036 R_CheckStack();
1037 if (ALTREP(s) && stream->version >= 3) {
1038 SEXP info = ALTREP_SERIALIZED_CLASS(s);
1039 SEXP state = ALTREP_SERIALIZED_STATE(s);
1040 if (info != NULL && state != NULL) {
1041 int flags = PackFlags(ALTREP_SXP, LEVELS(s), OBJECT(s), 0, 0);
1042 PROTECT(state);
1043 PROTECT(info);
1044 OutInteger(stream, flags);
1045 WriteItem(info, ref_table, stream);
1046 WriteItem(state, ref_table, stream);
1047 WriteItem(ATTRIB(s), ref_table, stream);
1048 UNPROTECT(2); /* state, info */
1049 return;
1050 }
1051 /* else fall through to standard processing */
1052 }
1053 if ((t = GetPersistentName(stream, s)) != R_NilValue) {
1054 R_assert(TYPEOF(t) == STRSXP && LENGTH(t) > 0);
1055 PROTECT(t);
1056 HashAdd(s, ref_table);
1057 OutInteger(stream, PERSISTSXP);
1058 OutStringVec(stream, t, ref_table);
1059 UNPROTECT(1);
1060 }
1061 else if ((i = SaveSpecialHook(s)) != 0)
1062 OutInteger(stream, i);
1063 else if ((i = HashGet(s, ref_table)) != 0)
1064 OutRefIndex(stream, i);
1065 else if (TYPEOF(s) == SYMSXP) {
1066 /* Note : NILSXP can't occur here */
1067 HashAdd(s, ref_table);
1068 OutInteger(stream, SYMSXP);
1069 WriteItem(PRINTNAME(s), ref_table, stream);
1070 }
1071 else if (TYPEOF(s) == ENVSXP) {
1072 HashAdd(s, ref_table);
1073 if (R_IsPackageEnv(s)) {
1074 SEXP name = R_PackageEnvName(s);
1075 const void *vmax = vmaxget();
1076 warning(_("'%s' may not be available when loading"),
1077 translateChar(STRING_ELT(name, 0)));
1078 vmaxset(vmax);
1079 OutInteger(stream, PACKAGESXP);
1080 OutStringVec(stream, name, ref_table);
1081 }
1082 else if (R_IsNamespaceEnv(s)) {
1083 #ifdef WARN_ABOUT_NAME_SPACES_MAYBE_NOT_AVAILABLE
1084 warning(_("namespaces may not be available when loading"));
1085 #endif
1086 OutInteger(stream, NAMESPACESXP);
1087 OutStringVec(stream, PROTECT(R_NamespaceEnvSpec(s)), ref_table);
1088 UNPROTECT(1);
1089 }
1090 else {
1091 OutInteger(stream, ENVSXP);
1092 OutInteger(stream, R_EnvironmentIsLocked(s) ? 1 : 0);
1093 WriteItem(ENCLOS(s), ref_table, stream);
1094 WriteItem(FRAME(s), ref_table, stream);
1095 WriteItem(HASHTAB(s), ref_table, stream);
1096 WriteItem(ATTRIB(s), ref_table, stream);
1097 }
1098 }
1099 else {
1100 int flags, hastag, hasattr;
1101 R_xlen_t len;
1102 switch(TYPEOF(s)) {
1103 case LISTSXP:
1104 case LANGSXP:
1105 case PROMSXP:
1106 case DOTSXP: hastag = TAG(s) != R_NilValue; break;
1107 case CLOSXP: hastag = TRUE; break;
1108 default: hastag = FALSE;
1109 }
1110 /* With the CHARSXP cache chains maintained through the ATTRIB
1111 field the content of that field must not be serialized, so
1112 we treat it as not there. */
1113 hasattr = (TYPEOF(s) != CHARSXP && ATTRIB(s) != R_NilValue);
1114 flags = PackFlags(TYPEOF(s), LEVELS(s), OBJECT(s),
1115 hasattr, hastag);
1116 OutInteger(stream, flags);
1117 switch (TYPEOF(s)) {
1118 case LISTSXP:
1119 case LANGSXP:
1120 case PROMSXP:
1121 case DOTSXP:
1122 /* Dotted pair objects */
1123 /* These write their ATTRIB fields first to allow us to avoid
1124 recursion on the CDR */
1125 if (hasattr)
1126 WriteItem(ATTRIB(s), ref_table, stream);
1127 if (TAG(s) != R_NilValue)
1128 WriteItem(TAG(s), ref_table, stream);
1129 if (BNDCELL_TAG(s))
1130 R_expand_binding_value(s);
1131 WriteItem(CAR(s), ref_table, stream);
1132 /* now do a tail call to WriteItem to handle the CDR */
1133 s = CDR(s);
1134 goto tailcall;
1135 case CLOSXP:
1136 /* Like a dotted pair object */
1137 /* Write the ATTRIB field first to allow us to avoid
1138 recursion on the CDR/BODY */
1139 if (hasattr)
1140 WriteItem(ATTRIB(s), ref_table, stream);
1141 WriteItem(CLOENV(s), ref_table, stream);
1142 WriteItem(FORMALS(s), ref_table, stream);
1143 /* now do a tail call to WriteItem to handle the CDR/BODY */
1144 s = BODY(s);
1145 goto tailcall;
1146 case EXTPTRSXP:
1147 /* external pointers */
1148 HashAdd(s, ref_table);
1149 WriteItem(EXTPTR_PROT(s), ref_table, stream);
1150 WriteItem(EXTPTR_TAG(s), ref_table, stream);
1151 break;
1152 case WEAKREFSXP:
1153 /* Weak references */
1154 HashAdd(s, ref_table);
1155 break;
1156 case SPECIALSXP:
1157 case BUILTINSXP:
1158 /* Builtin functions */
1159 OutInteger(stream, (int)strlen(PRIMNAME(s)));
1160 OutString(stream, PRIMNAME(s), (int)strlen(PRIMNAME(s)));
1161 break;
1162 case CHARSXP:
1163 if (s == NA_STRING)
1164 OutInteger(stream, -1);
1165 else {
1166 OutInteger(stream, LENGTH(s));
1167 OutString(stream, CHAR(s), LENGTH(s));
1168 }
1169 break;
1170 case LGLSXP:
1171 case INTSXP:
1172 len = XLENGTH(s);
1173 WriteLENGTH(stream, s);
1174 OutIntegerVec(stream, s, len);
1175 break;
1176 case REALSXP:
1177 len = XLENGTH(s);
1178 WriteLENGTH(stream, s);
1179 OutRealVec(stream, s, len);
1180 break;
1181 case CPLXSXP:
1182 len = XLENGTH(s);
1183 WriteLENGTH(stream, s);
1184 OutComplexVec(stream, s, len);
1185 break;
1186 case STRSXP:
1187 len = XLENGTH(s);
1188 WriteLENGTH(stream, s);
1189 for (R_xlen_t ix = 0; ix < len; ix++)
1190 WriteItem(STRING_ELT(s, ix), ref_table, stream);
1191 break;
1192 case VECSXP:
1193 case EXPRSXP:
1194 len = XLENGTH(s);
1195 WriteLENGTH(stream, s);
1196 for (R_xlen_t ix = 0; ix < len; ix++)
1197 WriteItem(VECTOR_ELT(s, ix), ref_table, stream);
1198 break;
1199 case BCODESXP:
1200 WriteBC(s, ref_table, stream);
1201 break;
1202 case RAWSXP:
1203 len = XLENGTH(s);
1204 WriteLENGTH(stream, s);
1205 switch (stream->type) {
1206 case R_pstream_xdr_format:
1207 case R_pstream_binary_format:
1208 {
1209 R_xlen_t done, this;
1210 for (done = 0; done < len; done += this) {
1211 this = min2(CHUNK_SIZE, len - done);
1212 stream->OutBytes(stream, RAW(s) + done, (int) this);
1213 }
1214 break;
1215 }
1216 default:
1217 for (R_xlen_t ix = 0; ix < len; ix++)
1218 OutByte(stream, RAW(s)[ix]);
1219 }
1220 break;
1221 case S4SXP:
1222 break; /* only attributes (i.e., slots) count */
1223 default:
1224 error(_("WriteItem: unknown type %i"), TYPEOF(s));
1225 }
1226 if (hasattr)
1227 WriteItem(ATTRIB(s), ref_table, stream);
1228 }
1229 }
1230
MakeCircleHashTable(void)1231 static SEXP MakeCircleHashTable(void)
1232 {
1233 return CONS(R_NilValue, allocVector(VECSXP, HASHSIZE));
1234 }
1235
AddCircleHash(SEXP item,SEXP ct)1236 static Rboolean AddCircleHash(SEXP item, SEXP ct)
1237 {
1238 SEXP table, bucket, list;
1239
1240 table = CDR(ct);
1241 R_size_t pos = PTRHASH(item) % LENGTH(table);
1242 bucket = VECTOR_ELT(table, pos);
1243 for (list = bucket; list != R_NilValue; list = CDR(list))
1244 if (TAG(list) == item) {
1245 if (CAR(list) == R_NilValue) {
1246 /* this is the second time; enter in list and mark */
1247 SETCAR(list, R_UnboundValue); /* anything different will do */
1248 SETCAR(ct, CONS(item, CAR(ct)));
1249 }
1250 return TRUE;
1251 }
1252
1253 /* If we get here then this is a new item; enter in the table */
1254 bucket = CONS(R_NilValue, bucket);
1255 SET_TAG(bucket, item);
1256 SET_VECTOR_ELT(table, pos, bucket);
1257 return FALSE;
1258 }
1259
ScanForCircles1(SEXP s,SEXP ct)1260 static void ScanForCircles1(SEXP s, SEXP ct)
1261 {
1262 switch (TYPEOF(s)) {
1263 case LANGSXP:
1264 case LISTSXP:
1265 if (! AddCircleHash(s, ct)) {
1266 ScanForCircles1(CAR(s), ct);
1267 ScanForCircles1(CDR(s), ct);
1268 }
1269 break;
1270 case BCODESXP:
1271 {
1272 int i, n;
1273 SEXP consts = BCODE_CONSTS(s);
1274 n = LENGTH(consts);
1275 for (i = 0; i < n; i++)
1276 ScanForCircles1(VECTOR_ELT(consts, i), ct);
1277 }
1278 break;
1279 default: break;
1280 }
1281 }
1282
ScanForCircles(SEXP s)1283 static SEXP ScanForCircles(SEXP s)
1284 {
1285 SEXP ct;
1286 PROTECT(ct = MakeCircleHashTable());
1287 ScanForCircles1(s, ct);
1288 UNPROTECT(1);
1289 return CAR(ct);
1290 }
1291
findrep(SEXP x,SEXP reps)1292 static SEXP findrep(SEXP x, SEXP reps)
1293 {
1294 for (; reps != R_NilValue; reps = CDR(reps))
1295 if (x == CAR(reps))
1296 return reps;
1297 return R_NilValue;
1298 }
1299
WriteBCLang(SEXP s,SEXP ref_table,SEXP reps,R_outpstream_t stream)1300 static void WriteBCLang(SEXP s, SEXP ref_table, SEXP reps,
1301 R_outpstream_t stream)
1302 {
1303 int type = TYPEOF(s);
1304 if (type == LANGSXP || type == LISTSXP) {
1305 SEXP r = findrep(s, reps);
1306 int output = TRUE;
1307 if (r != R_NilValue) {
1308 /* we have a cell referenced more than once */
1309 if (TAG(r) == R_NilValue) {
1310 /* this is the first reference, so update and register
1311 the counter */
1312 int i = INTEGER(CAR(reps))[0]++;
1313 SET_TAG(r, allocVector(INTSXP, 1));
1314 INTEGER(TAG(r))[0] = i;
1315 OutInteger(stream, BCREPDEF);
1316 OutInteger(stream, i);
1317 }
1318 else {
1319 /* we've seen it before, so just put out the index */
1320 OutInteger(stream, BCREPREF);
1321 OutInteger(stream, INTEGER(TAG(r))[0]);
1322 output = FALSE;
1323 }
1324 }
1325 if (output) {
1326 SEXP attr = ATTRIB(s);
1327 if (attr != R_NilValue) {
1328 switch(type) {
1329 case LANGSXP: type = ATTRLANGSXP; break;
1330 case LISTSXP: type = ATTRLISTSXP; break;
1331 }
1332 }
1333 OutInteger(stream, type);
1334 if (attr != R_NilValue)
1335 WriteItem(attr, ref_table, stream);
1336 WriteItem(TAG(s), ref_table, stream);
1337 WriteBCLang(CAR(s), ref_table, reps, stream);
1338 WriteBCLang(CDR(s), ref_table, reps, stream);
1339 }
1340 }
1341 else {
1342 OutInteger(stream, 0); /* pad */
1343 WriteItem(s, ref_table, stream);
1344 }
1345 }
1346
WriteBC1(SEXP s,SEXP ref_table,SEXP reps,R_outpstream_t stream)1347 static void WriteBC1(SEXP s, SEXP ref_table, SEXP reps, R_outpstream_t stream)
1348 {
1349 int i, n;
1350 SEXP code, consts;
1351 PROTECT(code = R_bcDecode(BCODE_CODE(s)));
1352 WriteItem(code, ref_table, stream);
1353 consts = BCODE_CONSTS(s);
1354 n = LENGTH(consts);
1355 OutInteger(stream, n);
1356 for (i = 0; i < n; i++) {
1357 SEXP c = VECTOR_ELT(consts, i);
1358 int type = TYPEOF(c);
1359 switch (type) {
1360 case BCODESXP:
1361 OutInteger(stream, type);
1362 WriteBC1(c, ref_table, reps, stream);
1363 break;
1364 case LANGSXP:
1365 case LISTSXP:
1366 WriteBCLang(c, ref_table, reps, stream);
1367 break;
1368 default:
1369 OutInteger(stream, type);
1370 WriteItem(c, ref_table, stream);
1371 }
1372 }
1373 UNPROTECT(1);
1374 }
1375
WriteBC(SEXP s,SEXP ref_table,R_outpstream_t stream)1376 static void WriteBC(SEXP s, SEXP ref_table, R_outpstream_t stream)
1377 {
1378 SEXP reps = ScanForCircles(s);
1379 PROTECT(reps = CONS(R_NilValue, reps));
1380 OutInteger(stream, length(reps));
1381 SETCAR(reps, allocVector(INTSXP, 1));
1382 INTEGER(CAR(reps))[0] = 0;
1383 WriteBC1(s, ref_table, reps, stream);
1384 UNPROTECT(1);
1385 }
1386
R_Serialize(SEXP s,R_outpstream_t stream)1387 void R_Serialize(SEXP s, R_outpstream_t stream)
1388 {
1389 SEXP ref_table;
1390 int version = stream->version;
1391
1392 OutFormat(stream);
1393
1394 switch(version) {
1395 case 2:
1396 OutInteger(stream, version);
1397 OutInteger(stream, R_VERSION);
1398 OutInteger(stream, R_Version(2,3,0));
1399 break;
1400 case 3:
1401 {
1402 OutInteger(stream, version);
1403 OutInteger(stream, R_VERSION);
1404 OutInteger(stream, R_Version(3,5,0));
1405 const char *natenc = R_nativeEncoding();
1406 int nelen = (int) strlen(natenc);
1407 OutInteger(stream, nelen);
1408 OutString(stream, natenc, nelen);
1409 break;
1410 }
1411 default: error(_("version %d not supported"), version);
1412 }
1413
1414 PROTECT(ref_table = MakeHashTable());
1415 WriteItem(s, ref_table, stream);
1416 UNPROTECT(1);
1417 }
1418
1419
1420 /*
1421 * Unserialize Code
1422 */
1423
1424 // used in saveload.c
1425 attribute_hidden int R_ReadItemDepth = 0, R_InitReadItemDepth;
1426
1427 static char lastname[8192] = "<unknown>";
1428
1429 #define INITIAL_REFREAD_TABLE_SIZE 128
1430
MakeReadRefTable(void)1431 static SEXP MakeReadRefTable(void)
1432 {
1433 SEXP data = allocVector(VECSXP, INITIAL_REFREAD_TABLE_SIZE);
1434 SET_TRUELENGTH(data, 0);
1435 return CONS(data, R_NilValue);
1436 }
1437
GetReadRef(SEXP table,int index)1438 static SEXP GetReadRef(SEXP table, int index)
1439 {
1440 int i = index - 1;
1441 SEXP data = CAR(table);
1442
1443 if (i < 0 || i >= LENGTH(data))
1444 error(_("reference index out of range"));
1445 return VECTOR_ELT(data, i);
1446 }
1447
AddReadRef(SEXP table,SEXP value)1448 static void AddReadRef(SEXP table, SEXP value)
1449 {
1450 SEXP data = CAR(table);
1451 R_xlen_t count = TRUELENGTH(data) + 1;
1452 if (count >= LENGTH(data)) {
1453 R_xlen_t i, len;
1454 SEXP newdata;
1455
1456 PROTECT(value);
1457 len = 2 * count;
1458 newdata = allocVector(VECSXP, len);
1459 for (i = 0; i < LENGTH(data); i++)
1460 SET_VECTOR_ELT(newdata, i, VECTOR_ELT(data, i));
1461 SETCAR(table, newdata);
1462 data = newdata;
1463 UNPROTECT(1);
1464 }
1465 SET_TRUELENGTH(data, count);
1466 SET_VECTOR_ELT(data, count - 1, value);
1467 }
1468
InStringVec(R_inpstream_t stream,SEXP ref_table)1469 static SEXP InStringVec(R_inpstream_t stream, SEXP ref_table)
1470 {
1471 SEXP s;
1472 int i, len;
1473 if (InInteger(stream) != 0)
1474 error(_("names in persistent strings are not supported yet"));
1475 len = InInteger(stream);
1476 PROTECT(s = allocVector(STRSXP, len));
1477 R_ReadItemDepth++;
1478 for (i = 0; i < len; i++)
1479 SET_STRING_ELT(s, i, ReadItem(ref_table, stream));
1480 R_ReadItemDepth--;
1481 UNPROTECT(1);
1482 return s;
1483 }
1484
1485 /* use static buffer to reuse storage */
1486 static R_INLINE void
InIntegerVec(R_inpstream_t stream,SEXP obj,R_xlen_t length)1487 InIntegerVec(R_inpstream_t stream, SEXP obj, R_xlen_t length)
1488 {
1489 switch (stream->type) {
1490 case R_pstream_xdr_format:
1491 {
1492 static char buf[CHUNK_SIZE * sizeof(int)];
1493 R_xlen_t done, this;
1494 XDR xdrs;
1495 for (done = 0; done < length; done += this) {
1496 this = min2(CHUNK_SIZE, length - done);
1497 stream->InBytes(stream, buf, (int)(sizeof(int) * this));
1498 xdrmem_create(&xdrs, buf, (int)(this * sizeof(int)), XDR_DECODE);
1499 for(int cnt = 0; cnt < this; cnt++)
1500 if(!xdr_int(&xdrs, INTEGER(obj) + done + cnt))
1501 error(_("XDR read failed"));
1502 xdr_destroy(&xdrs);
1503 }
1504 break;
1505 }
1506 case R_pstream_binary_format:
1507 {
1508 R_xlen_t done, this;
1509 for (done = 0; done < length; done += this) {
1510 this = min2(CHUNK_SIZE, length - done);
1511 stream->InBytes(stream, INTEGER(obj) + done,
1512 (int)(sizeof(int) * this));
1513 }
1514 break;
1515 }
1516 default:
1517 for (R_xlen_t cnt = 0; cnt < length; cnt++)
1518 INTEGER(obj)[cnt] = InInteger(stream);
1519 }
1520 }
1521
1522 static R_INLINE void
InRealVec(R_inpstream_t stream,SEXP obj,R_xlen_t length)1523 InRealVec(R_inpstream_t stream, SEXP obj, R_xlen_t length)
1524 {
1525 switch (stream->type) {
1526 case R_pstream_xdr_format:
1527 {
1528 static char buf[CHUNK_SIZE * sizeof(double)];
1529 R_xlen_t done, this;
1530 XDR xdrs;
1531 for (done = 0; done < length; done += this) {
1532 this = min2(CHUNK_SIZE, length - done);
1533 stream->InBytes(stream, buf, (int)(sizeof(double) * this));
1534 xdrmem_create(&xdrs, buf, (int)(this * sizeof(double)), XDR_DECODE);
1535 for(R_xlen_t cnt = 0; cnt < this; cnt++)
1536 if(!xdr_double(&xdrs, REAL(obj) + done + cnt))
1537 error(_("XDR read failed"));
1538 xdr_destroy(&xdrs);
1539 }
1540 break;
1541 }
1542 case R_pstream_binary_format:
1543 {
1544 R_xlen_t done, this;
1545 for (done = 0; done < length; done += this) {
1546 this = min2(CHUNK_SIZE, length - done);
1547 stream->InBytes(stream, REAL(obj) + done,
1548 (int)(sizeof(double) * this));
1549 }
1550 break;
1551 }
1552 default:
1553 for (R_xlen_t cnt = 0; cnt < length; cnt++)
1554 REAL(obj)[cnt] = InReal(stream);
1555 }
1556 }
1557
1558 static R_INLINE void
InComplexVec(R_inpstream_t stream,SEXP obj,R_xlen_t length)1559 InComplexVec(R_inpstream_t stream, SEXP obj, R_xlen_t length)
1560 {
1561 switch (stream->type) {
1562 case R_pstream_xdr_format:
1563 {
1564 static char buf[CHUNK_SIZE * sizeof(Rcomplex)];
1565 R_xlen_t done, this;
1566 XDR xdrs;
1567 Rcomplex *output = COMPLEX(obj);
1568 for (done = 0; done < length; done += this) {
1569 this = min2(CHUNK_SIZE, length - done);
1570 stream->InBytes(stream, buf, (int)(sizeof(Rcomplex) * this));
1571 xdrmem_create(&xdrs, buf, (int)(this * sizeof(Rcomplex)), XDR_DECODE);
1572 for(R_xlen_t cnt = 0; cnt < this; cnt++) {
1573 if(!xdr_double(&xdrs, &(output[done+cnt].r)) ||
1574 !xdr_double(&xdrs, &(output[done+cnt].i)))
1575 error(_("XDR read failed"));
1576 }
1577 xdr_destroy(&xdrs);
1578 }
1579 break;
1580 }
1581 case R_pstream_binary_format:
1582 {
1583 R_xlen_t done, this;
1584 for (done = 0; done < length; done += this) {
1585 this = min2(CHUNK_SIZE, length - done);
1586 stream->InBytes(stream, COMPLEX(obj) + done,
1587 (int)(sizeof(Rcomplex) * this));
1588 }
1589 break;
1590 }
1591 default:
1592 for (R_xlen_t cnt = 0; cnt < length; cnt++)
1593 COMPLEX(obj)[cnt] = InComplex(stream);
1594 }
1595 }
1596
TryConvertString(void * obj,const char * inp,size_t inplen,char * buf,size_t * bufleft)1597 static int TryConvertString(void *obj, const char *inp, size_t inplen,
1598 char *buf, size_t *bufleft)
1599 {
1600 if (Riconv(obj, NULL, NULL, &buf, bufleft) == -1)
1601 return -1;
1602 return (int) Riconv(obj, &inp, &inplen, &buf, bufleft);
1603 }
1604
1605 static SEXP
ConvertChar(void * obj,char * inp,size_t inplen,cetype_t enc)1606 ConvertChar(void *obj, char *inp, size_t inplen, cetype_t enc)
1607 {
1608 size_t buflen = inplen;
1609
1610 for(;;) {
1611 size_t bufleft = buflen;
1612 if (buflen < 1000) {
1613 char buf[buflen + 1];
1614 if (TryConvertString(obj, inp, inplen, buf, &bufleft) == -1) {
1615 if (errno == E2BIG) {
1616 buflen *= 2;
1617 continue;
1618 } else
1619 return R_NilValue;
1620 }
1621 return mkCharLenCE(buf, (int)(buflen - bufleft), enc);
1622 } else {
1623 char *buf = CallocCharBuf(buflen);
1624 if (TryConvertString(obj, inp, inplen, buf, &bufleft) == -1) {
1625 Free(buf);
1626 if (errno == E2BIG) {
1627 buflen *= 2;
1628 continue;
1629 } else
1630 return R_NilValue;
1631 }
1632 SEXP ans = mkCharLenCE(buf, (int)(buflen - bufleft), enc);
1633 Free(buf);
1634 return ans;
1635 }
1636 }
1637 }
1638
native_fromcode(R_inpstream_t stream)1639 static char *native_fromcode(R_inpstream_t stream)
1640 {
1641 char *from = stream->native_encoding;
1642 #ifdef HAVE_ICONV_CP1252
1643 if (!strcmp(from, "ISO-8859-1"))
1644 from = "CP1252";
1645 #endif
1646 return from;
1647 }
1648
1649 /* Read string into pre-allocated buffer, convert encoding if necessary, and
1650 return a CHARSXP */
1651 static SEXP
ReadChar(R_inpstream_t stream,char * buf,int length,int levs)1652 ReadChar(R_inpstream_t stream, char *buf, int length, int levs)
1653 {
1654 InString(stream, buf, length);
1655 buf[length] = '\0';
1656 if (levs & UTF8_MASK)
1657 return mkCharLenCE(buf, length, CE_UTF8);
1658 if (levs & LATIN1_MASK)
1659 return mkCharLenCE(buf, length, CE_LATIN1);
1660 if (levs & BYTES_MASK)
1661 return mkCharLenCE(buf, length, CE_BYTES);
1662 if (levs & ASCII_MASK)
1663 return mkCharLenCE(buf, length, CE_NATIVE);
1664
1665 /* native encoding, not ascii */
1666 if (!stream->native_encoding[0] || /* original native encoding unknown */
1667 (stream->nat2nat_obj == (void *)-1 && /* translation impossible or disabled */
1668 stream->nat2utf8_obj == (void *)-1))
1669 return mkCharLenCE(buf, length, CE_NATIVE);
1670 /* try converting to native encoding */
1671 if (!stream->nat2nat_obj &&
1672 !strcmp(stream->native_encoding, R_nativeEncoding())) {
1673 /* No translation needed. Performance optimization but also leaves
1674 invalid strings in their encoding undetected. */
1675 stream->nat2nat_obj = (void *)-1;
1676 stream->nat2utf8_obj = (void *)-1;
1677 #ifdef WARN_DESERIALIZE_INVALID_UTF8
1678 if (known_to_be_utf8 && !utf8Valid(buf))
1679 warning(_("deserializing invalid UTF-8 string '%s'"), buf);
1680 #endif
1681 }
1682 if (!stream->nat2nat_obj) {
1683 char *from = native_fromcode(stream);
1684 stream->nat2nat_obj = Riconv_open("", from);
1685 if (stream->nat2nat_obj == (void *)-1)
1686 warning(_("unsupported conversion from '%s' to '%s'"), from, "");
1687 }
1688 if (stream->nat2nat_obj != (void *)-1) {
1689 cetype_t enc = CE_NATIVE;
1690 if (known_to_be_utf8) enc = CE_UTF8;
1691 else if (known_to_be_latin1) enc = CE_LATIN1;
1692 SEXP ans = ConvertChar(stream->nat2nat_obj, buf, length, enc);
1693 if (ans != R_NilValue)
1694 return ans;
1695 if (known_to_be_utf8) {
1696 /* nat2nat_obj is converting to UTF-8, no need to use nat2utf8_obj */
1697 stream->nat2utf8_obj = (void *)-1;
1698 char *from = native_fromcode(stream);
1699 warning(_("input string '%s' cannot be translated to UTF-8, is it valid in '%s'?"),
1700 buf, from);
1701 }
1702 }
1703 /* try converting to UTF-8 */
1704 if (!stream->nat2utf8_obj) {
1705 char *from = native_fromcode(stream);
1706 stream->nat2utf8_obj = Riconv_open("UTF-8", from);
1707 if (stream->nat2utf8_obj == (void *)-1) {
1708 /* very unlikely */
1709 warning(_("unsupported conversion from '%s' to '%s'"),
1710 from, "UTF-8");
1711 warning(_("strings not representable in native encoding will not be translated"));
1712 } else
1713 warning(_("strings not representable in native encoding will be translated to UTF-8"));
1714 }
1715 if (stream->nat2utf8_obj != (void *)-1) {
1716 SEXP ans = ConvertChar(stream->nat2utf8_obj, buf, length, CE_UTF8);
1717 if (ans != R_NilValue)
1718 return ans;
1719 char *from = native_fromcode(stream);
1720 warning(_("input string '%s' cannot be translated to UTF-8, is it valid in '%s' ?"),
1721 buf, from);
1722 }
1723 /* no translation possible */
1724 return mkCharLenCE(buf, length, CE_NATIVE);
1725 }
1726
ReadLENGTH(R_inpstream_t stream)1727 static R_xlen_t ReadLENGTH (R_inpstream_t stream)
1728 {
1729 int len = InInteger(stream);
1730 #ifdef LONG_VECTOR_SUPPORT
1731 if (len < -1)
1732 error(_("negative serialized length for vector"));
1733 if (len == -1) {
1734 unsigned int len1, len2;
1735 len1 = InInteger(stream); /* upper part */
1736 len2 = InInteger(stream); /* lower part */
1737 R_xlen_t xlen = len1;
1738 /* sanity check for now */
1739 if (len1 > 65536)
1740 error (_("invalid upper part of serialized vector length"));
1741 return (xlen << 32) + len2;
1742 } else return len;
1743 #else
1744 if (len < 0)
1745 error(_("negative serialized vector length:\nperhaps long vector from 64-bit version of R?"));
1746 return len;
1747 #endif
1748 }
1749
1750 /* differs when it fails from version in envir.c */
R_FindNamespace1(SEXP info)1751 static SEXP R_FindNamespace1(SEXP info)
1752 {
1753 SEXP expr, val, where;
1754 PROTECT(info);
1755 where = PROTECT(ScalarString(mkChar(lastname)));
1756 SEXP s_getNamespace = install("..getNamespace");
1757 PROTECT(expr = LCONS(s_getNamespace,
1758 LCONS(info, LCONS(where, R_NilValue))));
1759 val = eval(expr, R_GlobalEnv);
1760 UNPROTECT(3);
1761 return val;
1762 }
1763
1764
ReadItem(SEXP ref_table,R_inpstream_t stream)1765 static SEXP ReadItem (SEXP ref_table, R_inpstream_t stream)
1766 {
1767 SEXPTYPE type;
1768 SEXP s;
1769 R_xlen_t len, count;
1770 int flags, levs, objf, hasattr, hastag, length;
1771
1772 R_assert(TYPEOF(ref_table) == LISTSXP && TYPEOF(CAR(ref_table)) == VECSXP);
1773
1774 flags = InInteger(stream);
1775 UnpackFlags(flags, &type, &levs, &objf, &hasattr, &hastag);
1776
1777 switch(type) {
1778 case NILVALUE_SXP: return R_NilValue;
1779 case EMPTYENV_SXP: return R_EmptyEnv;
1780 case BASEENV_SXP: return R_BaseEnv;
1781 case GLOBALENV_SXP: return R_GlobalEnv;
1782 case UNBOUNDVALUE_SXP: return R_UnboundValue;
1783 case MISSINGARG_SXP: return R_MissingArg;
1784 case BASENAMESPACE_SXP:
1785 return R_BaseNamespace;
1786 case REFSXP:
1787 return GetReadRef(ref_table, InRefIndex(stream, flags));
1788 case PERSISTSXP:
1789 PROTECT(s = InStringVec(stream, ref_table));
1790 s = PersistentRestore(stream, s);
1791 UNPROTECT(1);
1792 AddReadRef(ref_table, s);
1793 return s;
1794 case ALTREP_SXP:
1795 {
1796 R_ReadItemDepth++;
1797 SEXP info = PROTECT(ReadItem(ref_table, stream));
1798 SEXP state = PROTECT(ReadItem(ref_table, stream));
1799 SEXP attr = PROTECT(ReadItem(ref_table, stream));
1800 s = ALTREP_UNSERIALIZE_EX(info, state, attr, objf, levs);
1801 UNPROTECT(3); /* info, state, attr */
1802 R_ReadItemDepth--;
1803 return s;
1804 }
1805 case SYMSXP:
1806 R_ReadItemDepth++;
1807 PROTECT(s = ReadItem(ref_table, stream)); /* print name */
1808 R_ReadItemDepth--;
1809 s = installTrChar(s);
1810 AddReadRef(ref_table, s);
1811 UNPROTECT(1);
1812 return s;
1813 case PACKAGESXP:
1814 PROTECT(s = InStringVec(stream, ref_table));
1815 s = R_FindPackageEnv(s);
1816 UNPROTECT(1);
1817 AddReadRef(ref_table, s);
1818 return s;
1819 case NAMESPACESXP:
1820 PROTECT(s = InStringVec(stream, ref_table));
1821 s = R_FindNamespace1(s);
1822 AddReadRef(ref_table, s);
1823 UNPROTECT(1);
1824 return s;
1825 case ENVSXP:
1826 {
1827 int locked = InInteger(stream);
1828
1829 PROTECT(s = allocSExp(ENVSXP));
1830
1831 /* MUST register before filling in */
1832 AddReadRef(ref_table, s);
1833
1834 /* Now fill it in */
1835 R_ReadItemDepth++;
1836 SET_ENCLOS(s, ReadItem(ref_table, stream));
1837 SET_FRAME(s, ReadItem(ref_table, stream));
1838 SET_HASHTAB(s, ReadItem(ref_table, stream));
1839 SET_ATTRIB(s, ReadItem(ref_table, stream));
1840 R_ReadItemDepth--;
1841 if (ATTRIB(s) != R_NilValue &&
1842 getAttrib(s, R_ClassSymbol) != R_NilValue)
1843 /* We don't write out the object bit for environments,
1844 so reconstruct it here if needed. */
1845 SET_OBJECT(s, 1);
1846 R_RestoreHashCount(s);
1847 if (locked) R_LockEnvironment(s, FALSE);
1848 /* Convert a NULL enclosure to baseenv() */
1849 if (ENCLOS(s) == R_NilValue) SET_ENCLOS(s, R_BaseEnv);
1850 UNPROTECT(1);
1851 return s;
1852 }
1853 case LISTSXP:
1854 case LANGSXP:
1855 case CLOSXP:
1856 case PROMSXP:
1857 case DOTSXP:
1858 /* This handling of dotted pair objects still uses recursion
1859 on the CDR and so will overflow the PROTECT stack for long
1860 lists. The save format does permit using an iterative
1861 approach; it just has to pass around the place to write the
1862 CDR into when it is allocated. It's more trouble than it
1863 is worth to write the code to handle this now, but if it
1864 becomes necessary we can do it without needing to change
1865 the save format. */
1866 PROTECT(s = allocSExp(type));
1867 SETLEVELS(s, levs);
1868 SET_OBJECT(s, objf);
1869 R_ReadItemDepth++;
1870 Rboolean set_lastname = FALSE;
1871 SET_ATTRIB(s, hasattr ? ReadItem(ref_table, stream) : R_NilValue);
1872 SET_TAG(s, hastag ? ReadItem(ref_table, stream) : R_NilValue);
1873 if (hastag && R_ReadItemDepth == R_InitReadItemDepth + 1 &&
1874 isSymbol(TAG(s))) {
1875 snprintf(lastname, 8192, "%s", CHAR(PRINTNAME(TAG(s))));
1876 set_lastname = TRUE;
1877 }
1878 if (hastag && R_ReadItemDepth <= 0) {
1879 Rprintf("%*s", 2*(R_ReadItemDepth - R_InitReadItemDepth), "");
1880 PrintValue(TAG(s));
1881 }
1882 SETCAR(s, ReadItem(ref_table, stream));
1883 R_ReadItemDepth--; /* do this early because of the recursion. */
1884 SETCDR(s, ReadItem(ref_table, stream));
1885 /* For reading closures and promises stored in earlier versions, convert NULL env to baseenv() */
1886 if (type == CLOSXP && CLOENV(s) == R_NilValue) SET_CLOENV(s, R_BaseEnv);
1887 else if (type == PROMSXP && PRENV(s) == R_NilValue) SET_PRENV(s, R_BaseEnv);
1888 if (set_lastname) strcpy(lastname, "<unknown>");
1889 UNPROTECT(1); /* s */
1890 return s;
1891 default:
1892 /* These break out of the switch to have their ATTR,
1893 LEVELS, and OBJECT fields filled in. Each leaves the
1894 newly allocated value PROTECTed */
1895 switch (type) {
1896 case EXTPTRSXP:
1897 PROTECT(s = allocSExp(type));
1898 AddReadRef(ref_table, s);
1899 R_SetExternalPtrAddr(s, NULL);
1900 R_ReadItemDepth++;
1901 R_SetExternalPtrProtected(s, ReadItem(ref_table, stream));
1902 R_SetExternalPtrTag(s, ReadItem(ref_table, stream));
1903 R_ReadItemDepth--;
1904 break;
1905 case WEAKREFSXP:
1906 PROTECT(s = R_MakeWeakRef(R_NilValue, R_NilValue, R_NilValue,
1907 FALSE));
1908 AddReadRef(ref_table, s);
1909 break;
1910 case SPECIALSXP:
1911 case BUILTINSXP:
1912 {
1913 /* These are all short strings */
1914 length = InInteger(stream);
1915 char cbuf[length+1];
1916 InString(stream, cbuf, length);
1917 cbuf[length] = '\0';
1918 int index = StrToInternal(cbuf);
1919 if (index == NA_INTEGER) {
1920 warning(_("unrecognized internal function name \"%s\""), cbuf);
1921 PROTECT(s = R_NilValue);
1922 } else
1923 PROTECT(s = mkPRIMSXP(index, type == BUILTINSXP));
1924 }
1925 break;
1926 case CHARSXP:
1927 /* these are currently limited to 2^31 -1 bytes */
1928 length = InInteger(stream);
1929 if (length == -1)
1930 PROTECT(s = NA_STRING);
1931 else if (length < 1000) {
1932 char cbuf[length+1];
1933 PROTECT(s = ReadChar(stream, cbuf, length, levs));
1934 } else {
1935 char *cbuf = CallocCharBuf(length);
1936 PROTECT(s = ReadChar(stream, cbuf, length, levs));
1937 Free(cbuf);
1938 }
1939 break;
1940 case LGLSXP:
1941 case INTSXP:
1942 len = ReadLENGTH(stream);
1943 PROTECT(s = allocVector(type, len));
1944 InIntegerVec(stream, s, len);
1945 break;
1946 case REALSXP:
1947 len = ReadLENGTH(stream);
1948 PROTECT(s = allocVector(type, len));
1949 InRealVec(stream, s, len);
1950 break;
1951 case CPLXSXP:
1952 len = ReadLENGTH(stream);
1953 PROTECT(s = allocVector(type, len));
1954 InComplexVec(stream, s, len);
1955 break;
1956 case STRSXP:
1957 len = ReadLENGTH(stream);
1958 PROTECT(s = allocVector(type, len));
1959 R_ReadItemDepth++;
1960 for (count = 0; count < len; ++count)
1961 SET_STRING_ELT(s, count, ReadItem(ref_table, stream));
1962 R_ReadItemDepth--;
1963 break;
1964 case VECSXP:
1965 case EXPRSXP:
1966 len = ReadLENGTH(stream);
1967 PROTECT(s = allocVector(type, len));
1968 R_ReadItemDepth++;
1969 for (count = 0; count < len; ++count) {
1970 if (R_ReadItemDepth <= 0)
1971 Rprintf("%*s[%d]\n", 2*(R_ReadItemDepth - R_InitReadItemDepth), "", count+1);
1972 SET_VECTOR_ELT(s, count, ReadItem(ref_table, stream));
1973 }
1974 R_ReadItemDepth--;
1975 break;
1976 case BCODESXP:
1977 PROTECT(s = ReadBC(ref_table, stream));
1978 break;
1979 case CLASSREFSXP:
1980 error(_("this version of R cannot read class references"));
1981 case GENERICREFSXP:
1982 error(_("this version of R cannot read generic function references"));
1983 case RAWSXP:
1984 len = ReadLENGTH(stream);
1985 PROTECT(s = allocVector(type, len));
1986 switch (stream->type) {
1987 case R_pstream_ascii_format:
1988 for (R_xlen_t ix = 0; ix < len; ix++) {
1989 char word[128];
1990 unsigned int i; // unsigned to avoid compiler warnings
1991 InWord(stream, word, sizeof(word));
1992 if(sscanf(word, "%2x", &i) != 1) error(_("read error"));
1993 RAW(s)[ix] = (Rbyte) i;
1994 }
1995 break;
1996 default:
1997 {
1998 R_xlen_t done, this;
1999 for (done = 0; done < len; done += this) {
2000 this = min2(CHUNK_SIZE, len - done);
2001 stream->InBytes(stream, RAW(s) + done, (int) this);
2002 }
2003 }
2004 }
2005 break;
2006 case S4SXP:
2007 PROTECT(s = allocS4Object());
2008 break;
2009 default:
2010 s = R_NilValue; /* keep compiler happy */
2011 error(_("ReadItem: unknown type %i, perhaps written by later version of R"), type);
2012 }
2013 if (type != CHARSXP) SETLEVELS(s, levs);
2014 SET_OBJECT(s, objf);
2015 if (TYPEOF(s) == CHARSXP) {
2016 /* With the CHARSXP cache maintained through the ATTRIB
2017 field that field has already been filled in by the
2018 mkChar/mkCharCE call above, so we need to leave it
2019 alone. If there is an attribute (as there might be if
2020 the serialized data was created by an older version) we
2021 read and ignore the value. */
2022 R_ReadItemDepth++;
2023 if (hasattr) ReadItem(ref_table, stream);
2024 R_ReadItemDepth--;
2025 }
2026 else {
2027 R_ReadItemDepth++;
2028 SET_ATTRIB(s, hasattr ? ReadItem(ref_table, stream) : R_NilValue);
2029 R_ReadItemDepth--;
2030 }
2031 UNPROTECT(1); /* s */
2032 if (TYPEOF(s) == BCODESXP && !R_BCVersionOK(s))
2033 return R_BytecodeExpr(s);
2034 return s;
2035 }
2036 }
2037
2038 static SEXP ReadBC1(SEXP ref_table, SEXP reps, R_inpstream_t stream);
2039
ReadBCLang(int type,SEXP ref_table,SEXP reps,R_inpstream_t stream)2040 static SEXP ReadBCLang(int type, SEXP ref_table, SEXP reps,
2041 R_inpstream_t stream)
2042 {
2043 switch (type) {
2044 case BCREPREF:
2045 return VECTOR_ELT(reps, InInteger(stream));
2046 case BCREPDEF:
2047 case LANGSXP:
2048 case LISTSXP:
2049 case ATTRLANGSXP:
2050 case ATTRLISTSXP:
2051 {
2052 SEXP ans;
2053 int pos = -1;
2054 int hasattr = FALSE;
2055 if (type == BCREPDEF) {
2056 pos = InInteger(stream);
2057 type = InInteger(stream);
2058 }
2059 switch (type) {
2060 case ATTRLANGSXP: type = LANGSXP; hasattr = TRUE; break;
2061 case ATTRLISTSXP: type = LISTSXP; hasattr = TRUE; break;
2062 }
2063 PROTECT(ans = allocSExp(type));
2064 if (pos >= 0)
2065 SET_VECTOR_ELT(reps, pos, ans);
2066 R_ReadItemDepth++;
2067 if (hasattr)
2068 SET_ATTRIB(ans, ReadItem(ref_table, stream));
2069 SET_TAG(ans, ReadItem(ref_table, stream));
2070 R_ReadItemDepth--;
2071 SETCAR(ans, ReadBCLang(InInteger(stream), ref_table, reps,
2072 stream));
2073 SETCDR(ans, ReadBCLang(InInteger(stream), ref_table, reps,
2074 stream));
2075 UNPROTECT(1);
2076 return ans;
2077 }
2078 default:
2079 {
2080 R_ReadItemDepth++;
2081 SEXP res = ReadItem(ref_table, stream);
2082 R_ReadItemDepth--;
2083 return res;
2084 }
2085 }
2086 }
2087
ReadBCConsts(SEXP ref_table,SEXP reps,R_inpstream_t stream)2088 static SEXP ReadBCConsts(SEXP ref_table, SEXP reps, R_inpstream_t stream)
2089 {
2090 SEXP ans, c;
2091 int i, n;
2092 n = InInteger(stream);
2093 PROTECT(ans = allocVector(VECSXP, n));
2094 for (i = 0; i < n; i++) {
2095 int type = InInteger(stream);
2096 switch (type) {
2097 case BCODESXP:
2098 c = ReadBC1(ref_table, reps, stream);
2099 SET_VECTOR_ELT(ans, i, c);
2100 break;
2101 case LANGSXP:
2102 case LISTSXP:
2103 case BCREPDEF:
2104 case BCREPREF:
2105 case ATTRLANGSXP:
2106 case ATTRLISTSXP:
2107 c = ReadBCLang(type, ref_table, reps, stream);
2108 SET_VECTOR_ELT(ans, i, c);
2109 break;
2110 default:
2111 R_ReadItemDepth++;
2112 SET_VECTOR_ELT(ans, i, ReadItem(ref_table, stream));
2113 R_ReadItemDepth--;
2114 }
2115 }
2116 UNPROTECT(1);
2117 return ans;
2118 }
2119
ReadBC1(SEXP ref_table,SEXP reps,R_inpstream_t stream)2120 static SEXP ReadBC1(SEXP ref_table, SEXP reps, R_inpstream_t stream)
2121 {
2122 SEXP s;
2123 PROTECT(s = allocSExp(BCODESXP));
2124 R_ReadItemDepth++;
2125 SETCAR(s, ReadItem(ref_table, stream)); /* code */
2126 R_ReadItemDepth--;
2127 SEXP bytes = PROTECT(CAR(s));
2128 SETCAR(s, R_bcEncode(bytes));
2129 SETCDR(s, ReadBCConsts(ref_table, reps, stream)); /* consts */
2130 SET_TAG(s, R_NilValue); /* expr */
2131 R_registerBC(bytes, s);
2132 UNPROTECT(2);
2133 return s;
2134 }
2135
ReadBC(SEXP ref_table,R_inpstream_t stream)2136 static SEXP ReadBC(SEXP ref_table, R_inpstream_t stream)
2137 {
2138 SEXP reps, ans;
2139 PROTECT(reps = allocVector(VECSXP, InInteger(stream)));
2140 ans = ReadBC1(ref_table, reps, stream);
2141 UNPROTECT(1);
2142 return ans;
2143 }
2144
DecodeVersion(int packed,int * v,int * p,int * s)2145 static void DecodeVersion(int packed, int *v, int *p, int *s)
2146 {
2147 *v = packed / 65536; packed = packed % 65536;
2148 *p = packed / 256; packed = packed % 256;
2149 *s = packed;
2150 }
2151
R_Unserialize(R_inpstream_t stream)2152 SEXP R_Unserialize(R_inpstream_t stream)
2153 {
2154 int version;
2155 int writer_version, min_reader_version;
2156 SEXP obj, ref_table;
2157
2158 InFormat(stream);
2159
2160 /* Read the version numbers */
2161 version = InInteger(stream);
2162 writer_version = InInteger(stream);
2163 min_reader_version = InInteger(stream);
2164 switch (version) {
2165 case 2: break;
2166 case 3:
2167 {
2168 int nelen = InInteger(stream);
2169 if (nelen > R_CODESET_MAX)
2170 error(_("invalid length of encoding name"));
2171 InString(stream, stream->native_encoding, nelen);
2172 stream->native_encoding[nelen] = '\0';
2173 break;
2174 }
2175 default:
2176 {
2177 int vw, pw, sw;
2178 DecodeVersion(writer_version, &vw, &pw, &sw);
2179 if (min_reader_version < 0)
2180 error(_("cannot read unreleased workspace version %d written by experimental R %d.%d.%d"), version, vw, pw, sw);
2181 else {
2182 int vm, pm, sm;
2183 DecodeVersion(min_reader_version, &vm, &pm, &sm);
2184 error(_("cannot read workspace version %d written by R %d.%d.%d; need R %d.%d.%d or newer"),
2185 version, vw, pw, sw, vm, pm, sm);
2186 }
2187 }
2188 }
2189
2190 /* Read the actual object back */
2191 PROTECT(ref_table = MakeReadRefTable());
2192 obj = ReadItem(ref_table, stream);
2193
2194 if (version == 3) {
2195 if (stream->nat2nat_obj && stream->nat2nat_obj != (void *)-1) {
2196 Riconv_close(stream->nat2nat_obj);
2197 stream->nat2nat_obj = NULL;
2198 }
2199 if (stream->nat2utf8_obj && stream->nat2utf8_obj != (void *)-1) {
2200 Riconv_close(stream->nat2utf8_obj);
2201 stream->nat2utf8_obj = NULL;
2202 }
2203 }
2204 UNPROTECT(1);
2205
2206 return obj;
2207 }
2208
R_SerializeInfo(R_inpstream_t stream)2209 SEXP R_SerializeInfo(R_inpstream_t stream)
2210 {
2211 int version;
2212 int writer_version, min_reader_version, vv, vp, vs;
2213 int anslen = 4;
2214 SEXP ans, names;
2215 char buf[128];
2216
2217 InFormat(stream);
2218
2219 /* Read the version numbers */
2220 version = InInteger(stream);
2221 if (version == 3)
2222 anslen++;
2223 writer_version = InInteger(stream);
2224 min_reader_version = InInteger(stream);
2225
2226 PROTECT(ans = allocVector(VECSXP, anslen));
2227 PROTECT(names = allocVector(STRSXP, anslen));
2228 SET_STRING_ELT(names, 0, mkChar("version"));
2229 SET_VECTOR_ELT(ans, 0, ScalarInteger(version));
2230 SET_STRING_ELT(names, 1, mkChar("writer_version"));
2231 DecodeVersion(writer_version, &vv, &vp, &vs);
2232 snprintf(buf, 128, "%d.%d.%d", vv, vp, vs);
2233 SET_VECTOR_ELT(ans, 1, mkString(buf));
2234 SET_STRING_ELT(names, 2, mkChar("min_reader_version"));
2235 if (min_reader_version < 0)
2236 /* unreleased version of R */
2237 SET_VECTOR_ELT(ans, 2, ScalarString(NA_STRING));
2238 else {
2239 DecodeVersion(min_reader_version, &vv, &vp, &vs);
2240 snprintf(buf, 128, "%d.%d.%d", vv, vp, vs);
2241 SET_VECTOR_ELT(ans, 2, mkString(buf));
2242 }
2243 SET_STRING_ELT(names, 3, mkChar("format"));
2244 switch(stream->type) {
2245 case R_pstream_ascii_format:
2246 SET_VECTOR_ELT(ans, 3, mkString("ascii"));
2247 break;
2248 case R_pstream_binary_format:
2249 SET_VECTOR_ELT(ans, 3, mkString("binary"));
2250 break;
2251 case R_pstream_xdr_format:
2252 SET_VECTOR_ELT(ans, 3, mkString("xdr"));
2253 break;
2254 default:
2255 error(_("unknown input format"));
2256 }
2257 if (version == 3) {
2258 SET_STRING_ELT(names, 4, mkChar("native_encoding"));
2259 int nelen = InInteger(stream);
2260 if (nelen > R_CODESET_MAX)
2261 error(_("invalid length of encoding name"));
2262 char nbuf[nelen + 1];
2263 InString(stream, nbuf, nelen);
2264 nbuf[nelen] = '\0';
2265 SET_VECTOR_ELT(ans, 4, mkString(nbuf));
2266 }
2267 setAttrib(ans, R_NamesSymbol, names);
2268 UNPROTECT(2); /* ans, names */
2269
2270 return ans;
2271 }
2272
2273 /*
2274 * Generic Persistent Stream Initializers
2275 */
2276
2277 void
R_InitInPStream(R_inpstream_t stream,R_pstream_data_t data,R_pstream_format_t type,int (* inchar)(R_inpstream_t),void (* inbytes)(R_inpstream_t,void *,int),SEXP (* phook)(SEXP,SEXP),SEXP pdata)2278 R_InitInPStream(R_inpstream_t stream, R_pstream_data_t data,
2279 R_pstream_format_t type,
2280 int (*inchar)(R_inpstream_t),
2281 void (*inbytes)(R_inpstream_t, void *, int),
2282 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2283 {
2284 stream->data = data;
2285 stream->type = type;
2286 stream->InChar = inchar;
2287 stream->InBytes = inbytes;
2288 stream->InPersistHookFunc = phook;
2289 stream->InPersistHookData = pdata;
2290 stream->native_encoding[0] = 0;
2291 stream->nat2nat_obj = NULL;
2292 stream->nat2utf8_obj = NULL;
2293 }
2294
2295 void
R_InitOutPStream(R_outpstream_t stream,R_pstream_data_t data,R_pstream_format_t type,int version,void (* outchar)(R_outpstream_t,int),void (* outbytes)(R_outpstream_t,void *,int),SEXP (* phook)(SEXP,SEXP),SEXP pdata)2296 R_InitOutPStream(R_outpstream_t stream, R_pstream_data_t data,
2297 R_pstream_format_t type, int version,
2298 void (*outchar)(R_outpstream_t, int),
2299 void (*outbytes)(R_outpstream_t, void *, int),
2300 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2301 {
2302 stream->data = data;
2303 stream->type = type;
2304 stream->version = version != 0 ? version : defaultSerializeVersion();
2305 stream->OutChar = outchar;
2306 stream->OutBytes = outbytes;
2307 stream->OutPersistHookFunc = phook;
2308 stream->OutPersistHookData = pdata;
2309 }
2310
2311
2312 /*
2313 * Persistent File Streams
2314 */
2315
OutCharFile(R_outpstream_t stream,int c)2316 static void OutCharFile(R_outpstream_t stream, int c)
2317 {
2318 FILE *fp = stream->data;
2319 fputc(c, fp);
2320 }
2321
2322
InCharFile(R_inpstream_t stream)2323 static int InCharFile(R_inpstream_t stream)
2324 {
2325 FILE *fp = stream->data;
2326 return fgetc(fp);
2327 }
2328
OutBytesFile(R_outpstream_t stream,void * buf,int length)2329 static void OutBytesFile(R_outpstream_t stream, void *buf, int length)
2330 {
2331 FILE *fp = stream->data;
2332 size_t out = fwrite(buf, 1, length, fp);
2333 if (out != length) error(_("write failed"));
2334 }
2335
InBytesFile(R_inpstream_t stream,void * buf,int length)2336 static void InBytesFile(R_inpstream_t stream, void *buf, int length)
2337 {
2338 FILE *fp = stream->data;
2339 size_t in = fread(buf, 1, length, fp);
2340 if (in != length) error(_("read failed"));
2341 }
2342
2343 void
R_InitFileOutPStream(R_outpstream_t stream,FILE * fp,R_pstream_format_t type,int version,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2344 R_InitFileOutPStream(R_outpstream_t stream, FILE *fp,
2345 R_pstream_format_t type, int version,
2346 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2347 {
2348 R_InitOutPStream(stream, (R_pstream_data_t) fp, type, version,
2349 OutCharFile, OutBytesFile, phook, pdata);
2350 }
2351
2352 void
R_InitFileInPStream(R_inpstream_t stream,FILE * fp,R_pstream_format_t type,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2353 R_InitFileInPStream(R_inpstream_t stream, FILE *fp,
2354 R_pstream_format_t type,
2355 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2356 {
2357 R_InitInPStream(stream, (R_pstream_data_t) fp, type,
2358 InCharFile, InBytesFile, phook, pdata);
2359 }
2360
2361
2362 /*
2363 * Persistent Connection Streams
2364 */
2365
2366 #include <Rconnections.h>
2367
CheckInConn(Rconnection con)2368 static void CheckInConn(Rconnection con)
2369 {
2370 if (! con->isopen)
2371 error(_("connection is not open"));
2372 if (! con->canread || con->read == NULL)
2373 error(_("cannot read from this connection"));
2374 }
2375
CheckOutConn(Rconnection con)2376 static void CheckOutConn(Rconnection con)
2377 {
2378 if (! con->isopen)
2379 error(_("connection is not open"));
2380 if (! con->canwrite || con->write == NULL)
2381 error(_("cannot write to this connection"));
2382 }
2383
InBytesConn(R_inpstream_t stream,void * buf,int length)2384 static void InBytesConn(R_inpstream_t stream, void *buf, int length)
2385 {
2386 Rconnection con = (Rconnection) stream->data;
2387 CheckInConn(con);
2388 if (con->text) {
2389 int i;
2390 char *p = buf;
2391 for (i = 0; i < length; i++)
2392 p[i] = (char) Rconn_fgetc(con);
2393 }
2394 else {
2395 if (stream->type == R_pstream_ascii_format) {
2396 char linebuf[4];
2397 unsigned char *p = buf;
2398 int i;
2399 unsigned int res;
2400 for (i = 0; i < length; i++) {
2401 size_t ncread = Rconn_getline(con, linebuf, 3);
2402 if (ncread != 2)
2403 error(_("error reading from ascii connection"));
2404 if (!sscanf(linebuf, "%02x", &res))
2405 error(_("unexpected format in ascii connection"));
2406 *p++ = (unsigned char)res;
2407 }
2408 } else {
2409 if (length != con->read(buf, 1, length, con))
2410 error(_("error reading from connection"));
2411 }
2412 }
2413 }
2414
InCharConn(R_inpstream_t stream)2415 static int InCharConn(R_inpstream_t stream)
2416 {
2417 char buf[1];
2418 Rconnection con = (Rconnection) stream->data;
2419 CheckInConn(con);
2420 if (con->text)
2421 return Rconn_fgetc(con);
2422 else {
2423 if (1 != con->read(buf, 1, 1, con))
2424 error(_("error reading from connection"));
2425 return buf[0];
2426 }
2427 }
2428
OutBytesConn(R_outpstream_t stream,void * buf,int length)2429 static void OutBytesConn(R_outpstream_t stream, void *buf, int length)
2430 {
2431 Rconnection con = (Rconnection) stream->data;
2432 CheckOutConn(con);
2433 if (con->text) {
2434 int i;
2435 char *p = buf;
2436 for (i = 0; i < length; i++)
2437 Rconn_printf(con, "%c", p[i]);
2438 }
2439 else {
2440 if (length != con->write(buf, 1, length, con))
2441 error(_("error writing to connection"));
2442 }
2443 }
2444
OutCharConn(R_outpstream_t stream,int c)2445 static void OutCharConn(R_outpstream_t stream, int c)
2446 {
2447 Rconnection con = (Rconnection) stream->data;
2448 CheckOutConn(con);
2449 if (con->text)
2450 Rconn_printf(con, "%c", c);
2451 else {
2452 char buf[1];
2453 buf[0] = (char) c;
2454 if (1 != con->write(buf, 1, 1, con))
2455 error(_("error writing to connection"));
2456 }
2457 }
2458
R_InitConnOutPStream(R_outpstream_t stream,Rconnection con,R_pstream_format_t type,int version,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2459 void R_InitConnOutPStream(R_outpstream_t stream, Rconnection con,
2460 R_pstream_format_t type, int version,
2461 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2462 {
2463 CheckOutConn(con);
2464 if (con->text &&
2465 !(type == R_pstream_ascii_format || type == R_pstream_asciihex_format) )
2466 error(_("only ascii format can be written to text mode connections"));
2467 R_InitOutPStream(stream, (R_pstream_data_t) con, type, version,
2468 OutCharConn, OutBytesConn, phook, pdata);
2469 }
2470
R_InitConnInPStream(R_inpstream_t stream,Rconnection con,R_pstream_format_t type,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2471 void R_InitConnInPStream(R_inpstream_t stream, Rconnection con,
2472 R_pstream_format_t type,
2473 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2474 {
2475 CheckInConn(con);
2476 if (con->text) {
2477 if (type == R_pstream_any_format)
2478 type = R_pstream_ascii_format;
2479 else if (type != R_pstream_ascii_format)
2480 error(_("only ascii format can be read from text mode connections"));
2481 }
2482 R_InitInPStream(stream, (R_pstream_data_t) con, type,
2483 InCharConn, InBytesConn, phook, pdata);
2484 }
2485
2486 /* ought to quote the argument, but it should only be an ENVSXP or STRSXP */
CallHook(SEXP x,SEXP fun)2487 static SEXP CallHook(SEXP x, SEXP fun)
2488 {
2489 SEXP val, call;
2490 PROTECT(call = LCONS(fun, LCONS(x, R_NilValue)));
2491 val = eval(call, R_GlobalEnv);
2492 UNPROTECT(1);
2493 return val;
2494 }
2495
con_cleanup(void * data)2496 static void con_cleanup(void *data)
2497 {
2498 Rconnection con = data;
2499 if(con->isopen) con->close(con);
2500 }
2501
2502 /* Used from saveRDS().
2503 This became public in R 2.13.0, and that version added support for
2504 connections internally */
2505 SEXP attribute_hidden
do_serializeToConn(SEXP call,SEXP op,SEXP args,SEXP env)2506 do_serializeToConn(SEXP call, SEXP op, SEXP args, SEXP env)
2507 {
2508 /* serializeToConn(object, conn, ascii, version, hook) */
2509
2510 SEXP object, fun;
2511 Rboolean ascii, wasopen;
2512 int version;
2513 Rconnection con;
2514 struct R_outpstream_st out;
2515 R_pstream_format_t type;
2516 SEXP (*hook)(SEXP, SEXP);
2517 RCNTXT cntxt;
2518
2519 checkArity(op, args);
2520
2521 object = CAR(args);
2522 con = getConnection(asInteger(CADR(args)));
2523
2524 if (TYPEOF(CADDR(args)) != LGLSXP)
2525 error(_("'ascii' must be logical"));
2526 ascii = INTEGER(CADDR(args))[0];
2527 if (ascii == NA_LOGICAL) type = R_pstream_asciihex_format;
2528 else if (ascii) type = R_pstream_ascii_format;
2529 else type = R_pstream_xdr_format;
2530
2531 if (CADDDR(args) == R_NilValue)
2532 version = defaultSerializeVersion();
2533 else
2534 version = asInteger(CADDDR(args));
2535 if (version == NA_INTEGER || version <= 0)
2536 error(_("bad version value"));
2537 if (version < 2)
2538 error(_("cannot save to connections in version %d format"), version);
2539
2540 fun = CAR(nthcdr(args,4));
2541 hook = fun != R_NilValue ? CallHook : NULL;
2542
2543 /* Now we need to do some sanity checking of the arguments.
2544 A filename will already have been opened, so anything
2545 not open was specified as a connection directly.
2546 */
2547 wasopen = con->isopen;
2548 if(!wasopen) {
2549 char mode[5];
2550 strcpy(mode, con->mode);
2551 strcpy(con->mode, ascii ? "w" : "wb");
2552 if(!con->open(con)) error(_("cannot open the connection"));
2553 strcpy(con->mode, mode);
2554 /* Set up a context which will close the connection on error */
2555 begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
2556 R_NilValue, R_NilValue);
2557 cntxt.cend = &con_cleanup;
2558 cntxt.cenddata = con;
2559 }
2560 if (!ascii && con->text)
2561 error(_("binary-mode connection required for ascii=FALSE"));
2562 if(!con->canwrite)
2563 error(_("connection not open for writing"));
2564
2565 R_InitConnOutPStream(&out, con, type, version, hook, fun);
2566 R_Serialize(object, &out);
2567 if(!wasopen) {endcontext(&cntxt); con->close(con);}
2568
2569 return R_NilValue;
2570 }
2571
2572 /* unserializeFromConn(conn, hook) used from readRDS().
2573 It became public in R 2.13.0, and that version added support for
2574 connections internally */
2575 SEXP attribute_hidden
do_unserializeFromConn(SEXP call,SEXP op,SEXP args,SEXP env)2576 do_unserializeFromConn(SEXP call, SEXP op, SEXP args, SEXP env)
2577 {
2578 /* 0 .. unserializeFromConn(conn, hook) */
2579 /* 1 .. serializeInfoFromConn(conn) */
2580
2581 struct R_inpstream_st in;
2582 Rconnection con;
2583 SEXP fun, ans;
2584 SEXP (*hook)(SEXP, SEXP);
2585 Rboolean wasopen;
2586 RCNTXT cntxt;
2587
2588 checkArity(op, args);
2589
2590 con = getConnection(asInteger(CAR(args)));
2591
2592 /* Now we need to do some sanity checking of the arguments.
2593 A filename will already have been opened, so anything
2594 not open was specified as a connection directly.
2595 */
2596 wasopen = con->isopen;
2597 if(!wasopen) {
2598 char mode[5];
2599 strcpy(mode, con->mode);
2600 strcpy(con->mode, "rb");
2601 if(!con->open(con)) error(_("cannot open the connection"));
2602 strcpy(con->mode, mode);
2603 /* Set up a context which will close the connection on error */
2604 begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
2605 R_NilValue, R_NilValue);
2606 cntxt.cend = &con_cleanup;
2607 cntxt.cenddata = con;
2608 }
2609 if(!con->canread) error(_("connection not open for reading"));
2610
2611 fun = PRIMVAL(op) == 0 ? CADR(args) : R_NilValue;
2612 hook = fun != R_NilValue ? CallHook : NULL;
2613 R_InitConnInPStream(&in, con, R_pstream_any_format, hook, fun);
2614 ans = PRIMVAL(op) == 0 ? R_Unserialize(&in) : R_SerializeInfo(&in);
2615 if(!wasopen) {
2616 PROTECT(ans); /* paranoia about next line */
2617 endcontext(&cntxt);
2618 con->close(con);
2619 UNPROTECT(1);
2620 }
2621 return ans;
2622 }
2623
2624 /*
2625 * Persistent Buffered Binary Connection Streams
2626 */
2627
2628 /**** should eventually come from a public header file */
2629 size_t R_WriteConnection(Rconnection con, void *buf, size_t n);
2630
2631 #define BCONBUFSIZ 4096
2632
2633 typedef struct bconbuf_st {
2634 Rconnection con;
2635 int count;
2636 unsigned char buf[BCONBUFSIZ];
2637 } *bconbuf_t;
2638
flush_bcon_buffer(bconbuf_t bb)2639 static void flush_bcon_buffer(bconbuf_t bb)
2640 {
2641 if (R_WriteConnection(bb->con, bb->buf, bb->count) != bb->count)
2642 error(_("error writing to connection"));
2643 bb->count = 0;
2644 }
2645
OutCharBB(R_outpstream_t stream,int c)2646 static void OutCharBB(R_outpstream_t stream, int c)
2647 {
2648 bconbuf_t bb = stream->data;
2649 if (bb->count >= BCONBUFSIZ)
2650 flush_bcon_buffer(bb);
2651 bb->buf[bb->count++] = (char) c;
2652 }
2653
OutBytesBB(R_outpstream_t stream,void * buf,int length)2654 static void OutBytesBB(R_outpstream_t stream, void *buf, int length)
2655 {
2656 bconbuf_t bb = stream->data;
2657 if (bb->count + length > BCONBUFSIZ)
2658 flush_bcon_buffer(bb);
2659 if (length <= BCONBUFSIZ) {
2660 memcpy(bb->buf + bb->count, buf, length);
2661 bb->count += length;
2662 }
2663 else if (R_WriteConnection(bb->con, buf, length) != length)
2664 error(_("error writing to connection"));
2665 }
2666
InitBConOutPStream(R_outpstream_t stream,bconbuf_t bb,Rconnection con,R_pstream_format_t type,int version,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2667 static void InitBConOutPStream(R_outpstream_t stream, bconbuf_t bb,
2668 Rconnection con,
2669 R_pstream_format_t type, int version,
2670 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2671 {
2672 bb->count = 0;
2673 bb->con = con;
2674 R_InitOutPStream(stream, (R_pstream_data_t) bb, type, version,
2675 OutCharBB, OutBytesBB, phook, pdata);
2676 }
2677
2678 /* only for use by serialize(), with binary write to a socket connection */
2679 static SEXP
R_serializeb(SEXP object,SEXP icon,SEXP xdr,SEXP Sversion,SEXP fun)2680 R_serializeb(SEXP object, SEXP icon, SEXP xdr, SEXP Sversion, SEXP fun)
2681 {
2682 struct R_outpstream_st out;
2683 SEXP (*hook)(SEXP, SEXP);
2684 struct bconbuf_st bbs;
2685 Rconnection con = getConnection(asInteger(icon));
2686 int version;
2687
2688 if (Sversion == R_NilValue)
2689 version = defaultSerializeVersion();
2690 else version = asInteger(Sversion);
2691 if (version == NA_INTEGER || version <= 0)
2692 error(_("bad version value"));
2693
2694 hook = fun != R_NilValue ? CallHook : NULL;
2695
2696 InitBConOutPStream(&out, &bbs, con,
2697 asLogical(xdr) ? R_pstream_xdr_format : R_pstream_binary_format,
2698 version, hook, fun);
2699 R_Serialize(object, &out);
2700 flush_bcon_buffer(&bbs);
2701 return R_NilValue;
2702 }
2703
2704
2705 /*
2706 * Persistent Memory Streams
2707 */
2708
2709 typedef struct membuf_st {
2710 R_size_t size;
2711 R_size_t count;
2712 unsigned char *buf;
2713 } *membuf_t;
2714
2715
2716 #define INCR MAXELTSIZE
resize_buffer(membuf_t mb,R_size_t needed)2717 static void resize_buffer(membuf_t mb, R_size_t needed)
2718 {
2719 if(needed > R_XLEN_T_MAX)
2720 error(_("serialization is too large to store in a raw vector"));
2721 #ifdef LONG_VECTOR_SUPPORT
2722 if(needed < 10000000) /* ca 10MB */
2723 needed = (1+2*needed/INCR) * INCR;
2724 else
2725 needed = (R_size_t)((1+1.2*(double)needed/INCR) * INCR);
2726 #else
2727 if(needed < 10000000) /* ca 10MB */
2728 needed = (1+2*needed/INCR) * INCR;
2729 else if(needed < 1700000000) /* close to 2GB/1.2 */
2730 needed = (R_size_t)((1+1.2*(double)needed/INCR) * INCR);
2731 else if(needed < INT_MAX - INCR)
2732 needed = (1+needed/INCR) * INCR;
2733 #endif
2734 unsigned char *tmp = realloc(mb->buf, needed);
2735 if (tmp == NULL) {
2736 free(mb->buf); mb->buf = NULL;
2737 error(_("cannot allocate buffer"));
2738 } else mb->buf = tmp;
2739 mb->size = needed;
2740 }
2741
OutCharMem(R_outpstream_t stream,int c)2742 static void OutCharMem(R_outpstream_t stream, int c)
2743 {
2744 membuf_t mb = stream->data;
2745 if (mb->count >= mb->size)
2746 resize_buffer(mb, mb->count + 1);
2747 mb->buf[mb->count++] = (char) c;
2748 }
2749
OutBytesMem(R_outpstream_t stream,void * buf,int length)2750 static void OutBytesMem(R_outpstream_t stream, void *buf, int length)
2751 {
2752 membuf_t mb = stream->data;
2753 R_size_t needed = mb->count + (R_size_t) length;
2754 #ifndef LONG_VECTOR_SUPPORT
2755 /* There is a potential overflow here on 32-bit systems */
2756 if((double) mb->count + length > (double) INT_MAX)
2757 error(_("serialization is too large to store in a raw vector"));
2758 #endif
2759 if (needed > mb->size) resize_buffer(mb, needed);
2760 memcpy(mb->buf + mb->count, buf, length);
2761 mb->count = needed;
2762 }
2763
InCharMem(R_inpstream_t stream)2764 static int InCharMem(R_inpstream_t stream)
2765 {
2766 membuf_t mb = stream->data;
2767 if (mb->count >= mb->size)
2768 error(_("read error"));
2769 return mb->buf[mb->count++];
2770 }
2771
InBytesMem(R_inpstream_t stream,void * buf,int length)2772 static void InBytesMem(R_inpstream_t stream, void *buf, int length)
2773 {
2774 membuf_t mb = stream->data;
2775 if (mb->count + (R_size_t) length > mb->size)
2776 error(_("read error"));
2777 memcpy(buf, mb->buf + mb->count, length);
2778 mb->count += length;
2779 }
2780
InitMemInPStream(R_inpstream_t stream,membuf_t mb,void * buf,R_size_t length,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2781 static void InitMemInPStream(R_inpstream_t stream, membuf_t mb,
2782 void *buf, R_size_t length,
2783 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2784 {
2785 mb->count = 0;
2786 mb->size = length;
2787 mb->buf = buf;
2788 R_InitInPStream(stream, (R_pstream_data_t) mb, R_pstream_any_format,
2789 InCharMem, InBytesMem, phook, pdata);
2790 }
2791
InitMemOutPStream(R_outpstream_t stream,membuf_t mb,R_pstream_format_t type,int version,SEXP (* phook)(SEXP,SEXP),SEXP pdata)2792 static void InitMemOutPStream(R_outpstream_t stream, membuf_t mb,
2793 R_pstream_format_t type, int version,
2794 SEXP (*phook)(SEXP, SEXP), SEXP pdata)
2795 {
2796 mb->count = 0;
2797 mb->size = 0;
2798 mb->buf = NULL;
2799 R_InitOutPStream(stream, (R_pstream_data_t) mb, type, version,
2800 OutCharMem, OutBytesMem, phook, pdata);
2801 }
2802
free_mem_buffer(void * data)2803 static void free_mem_buffer(void *data)
2804 {
2805 membuf_t mb = data;
2806 if (mb->buf != NULL) {
2807 unsigned char *buf = mb->buf;
2808 mb->buf = NULL;
2809 free(buf);
2810 }
2811 }
2812
CloseMemOutPStream(R_outpstream_t stream)2813 static SEXP CloseMemOutPStream(R_outpstream_t stream)
2814 {
2815 SEXP val;
2816 membuf_t mb = stream->data;
2817 /* duplicate check, for future proofing */
2818 #ifndef LONG_VECTOR_SUPPORT
2819 if(mb->count > INT_MAX)
2820 error(_("serialization is too large to store in a raw vector"));
2821 #endif
2822 PROTECT(val = allocVector(RAWSXP, mb->count));
2823 memcpy(RAW(val), mb->buf, mb->count);
2824 free_mem_buffer(mb);
2825 UNPROTECT(1);
2826 return val;
2827 }
2828
2829 static SEXP
R_serialize(SEXP object,SEXP icon,SEXP ascii,SEXP Sversion,SEXP fun)2830 R_serialize(SEXP object, SEXP icon, SEXP ascii, SEXP Sversion, SEXP fun)
2831 {
2832 struct R_outpstream_st out;
2833 R_pstream_format_t type;
2834 SEXP (*hook)(SEXP, SEXP);
2835 int version;
2836
2837 if (Sversion == R_NilValue)
2838 version = defaultSerializeVersion();
2839 else version = asInteger(Sversion);
2840 if (version == NA_INTEGER || version <= 0)
2841 error(_("bad version value"));
2842
2843 hook = fun != R_NilValue ? CallHook : NULL;
2844
2845 // Prior to 3.2.0 this was logical, values 0/1/NA for binary.
2846 int asc = asInteger(ascii);
2847 switch(asc) {
2848 case 1: type = R_pstream_ascii_format; break;
2849 case 2: type = R_pstream_asciihex_format; break;
2850 case 3: type = R_pstream_binary_format; break;
2851 default: type = R_pstream_xdr_format; break;
2852 }
2853
2854 if (icon == R_NilValue) {
2855 RCNTXT cntxt;
2856 struct membuf_st mbs;
2857 SEXP val;
2858
2859 /* set up a context which will free the buffer if there is an error */
2860 begincontext(&cntxt, CTXT_CCODE, R_NilValue, R_BaseEnv, R_BaseEnv,
2861 R_NilValue, R_NilValue);
2862 cntxt.cend = &free_mem_buffer;
2863 cntxt.cenddata = &mbs;
2864
2865 InitMemOutPStream(&out, &mbs, type, version, hook, fun);
2866 R_Serialize(object, &out);
2867
2868 PROTECT(val = CloseMemOutPStream(&out));
2869
2870 /* end the context after anything that could raise an error but before
2871 calling OutTerm so it doesn't get called twice */
2872 endcontext(&cntxt);
2873
2874 UNPROTECT(1); /* val */
2875 return val;
2876 }
2877 else {
2878 Rconnection con = getConnection(asInteger(icon));
2879 R_InitConnOutPStream(&out, con, type, version, hook, fun);
2880 R_Serialize(object, &out);
2881 return R_NilValue;
2882 }
2883 }
2884
2885
R_unserialize(SEXP icon,SEXP fun)2886 SEXP attribute_hidden R_unserialize(SEXP icon, SEXP fun)
2887 {
2888 struct R_inpstream_st in;
2889 SEXP (*hook)(SEXP, SEXP);
2890
2891 hook = fun != R_NilValue ? CallHook : NULL;
2892
2893 if (TYPEOF(icon) == STRSXP && LENGTH(icon) > 0) {
2894 /* was the format in R < 2.4.0, removed in R 2.8.0 */
2895 error("character vectors are no longer accepted by unserialize()");
2896 return R_NilValue; /* -Wall */
2897 } else if (TYPEOF(icon) == RAWSXP) {
2898 /* We might want to read from a long raw vector */
2899 struct membuf_st mbs;
2900 void *data = RAW(icon);
2901 R_size_t length = XLENGTH(icon);
2902 InitMemInPStream(&in, &mbs, data, length, hook, fun);
2903 return R_Unserialize(&in);
2904 } else {
2905 Rconnection con = getConnection(asInteger(icon));
2906 R_InitConnInPStream(&in, con, R_pstream_any_format, hook, fun);
2907 return R_Unserialize(&in);
2908 }
2909 }
2910
2911
2912 /*
2913 * Support Code for Lazy Loading of Packages
2914 */
2915
2916
2917 #define IS_PROPER_STRING(s) (TYPEOF(s) == STRSXP && LENGTH(s) > 0)
2918
2919 /* Appends a raw vector to the end of a file using binary mode.
2920 Returns an integer vector of the initial offset of the string in
2921 the file and the length of the vector. */
2922
appendRawToFile(SEXP file,SEXP bytes)2923 static SEXP appendRawToFile(SEXP file, SEXP bytes)
2924 {
2925 FILE *fp;
2926 size_t len, out;
2927 long pos; // what ftell gives: won't work for > 2GB files
2928 SEXP val;
2929 const void *vmax;
2930 const char *cfile;
2931
2932 if (! IS_PROPER_STRING(file))
2933 error(_("not a proper file name"));
2934 vmax = vmaxget();
2935 cfile = translateCharFP(STRING_ELT(file, 0));
2936 if (TYPEOF(bytes) != RAWSXP)
2937 error(_("not a proper raw vector"));
2938 #ifdef HAVE_WORKING_FTELL
2939 /* Windows' ftell returns position 0 with "ab" */
2940 if ((fp = R_fopen(cfile, "ab")) == NULL) {
2941 error( _("cannot open file '%s': %s"), cfile,
2942 strerror(errno));
2943 }
2944 #else
2945 if ((fp = R_fopen(cfile, "r+b")) == NULL) {
2946 error( _("cannot open file '%s': %s"), cfile,
2947 strerror(errno));
2948 }
2949 if (fseek(fp, 0, SEEK_END) != 0) {
2950 fclose(fp);
2951 error(_("seek failed on %s"), cfile);
2952 }
2953 #endif
2954
2955 len = LENGTH(bytes);
2956 pos = ftell(fp);
2957 out = fwrite(RAW(bytes), 1, len, fp);
2958 fclose(fp);
2959
2960 if (out != len) error(_("write failed"));
2961 if (pos == -1) error(_("could not determine file position"));
2962
2963 val = allocVector(INTSXP, 2);
2964 INTEGER(val)[0] = (int) pos;
2965 INTEGER(val)[1] = (int) len;
2966 vmaxset(vmax);
2967
2968 return val;
2969 }
2970
2971 /* Interface to cache the pkg.rdb files */
2972
2973 #define NC 100
2974 static int used = 0;
2975 static char names[NC][PATH_MAX];
2976 static char *ptr[NC];
2977
2978 SEXP attribute_hidden
do_lazyLoadDBflush(SEXP call,SEXP op,SEXP args,SEXP env)2979 do_lazyLoadDBflush(SEXP call, SEXP op, SEXP args, SEXP env)
2980 {
2981 checkArity(op, args);
2982
2983 int i;
2984 const char *cfile = translateCharFP(STRING_ELT(CAR(args), 0));
2985
2986 /* fprintf(stderr, "flushing file %s", cfile); */
2987 for (i = 0; i < used; i++)
2988 if(strcmp(cfile, names[i]) == 0) {
2989 strcpy(names[i], "");
2990 free(ptr[i]);
2991 /* fprintf(stderr, " found at pos %d in cache", i); */
2992 break;
2993 }
2994 /* fprintf(stderr, "\n"); */
2995 return R_NilValue;
2996 }
2997
2998
2999 /* Reads, in binary mode, the bytes in the range specified by a
3000 position/length vector and returns them as raw vector. */
3001
3002 /* There are some large lazy-data examples, e.g. 80Mb for SNPMaP.cdm */
3003 #define LEN_LIMIT 10*1048576
readRawFromFile(SEXP file,SEXP key)3004 static SEXP readRawFromFile(SEXP file, SEXP key)
3005 {
3006 FILE *fp;
3007 int offset, len, in, i, icache = -1;
3008 long filelen;
3009 SEXP val;
3010 const void *vmax;
3011 const char *cfile;
3012
3013 if (! IS_PROPER_STRING(file))
3014 error(_("not a proper file name"));
3015 vmax = vmaxget();
3016 cfile = translateCharFP(STRING_ELT(file, 0));
3017 if (TYPEOF(key) != INTSXP || LENGTH(key) != 2)
3018 error(_("bad offset/length argument"));
3019
3020 offset = INTEGER(key)[0];
3021 len = INTEGER(key)[1];
3022
3023 val = allocVector(RAWSXP, len);
3024 /* Do we have this database cached? */
3025 for (i = 0; i < used; i++)
3026 if(strcmp(cfile, names[i]) == 0) {icache = i; break;}
3027 if (icache >= 0) {
3028 memcpy(RAW(val), ptr[icache]+offset, len);
3029 vmaxset(vmax);
3030 return val;
3031 }
3032
3033 /* find a vacant slot? */
3034 for (i = 0; i < used; i++)
3035 if(strcmp("", names[i]) == 0) {icache = i; break;}
3036 if(icache < 0 && used < NC) icache = used++;
3037
3038 if(icache >= 0) {
3039 if ((fp = R_fopen(cfile, "rb")) == NULL)
3040 error(_("cannot open file '%s': %s"), cfile, strerror(errno));
3041 if (fseek(fp, 0, SEEK_END) != 0) {
3042 fclose(fp);
3043 error(_("seek failed on %s"), cfile);
3044 }
3045 filelen = ftell(fp);
3046 if (filelen < LEN_LIMIT) {
3047 char *p;
3048 /* fprintf(stderr, "adding file '%s' at pos %d in cache, length %d\n",
3049 cfile, icache, filelen); */
3050 p = (char *) malloc(filelen);
3051 if (p) {
3052 strcpy(names[icache], cfile);
3053 ptr[icache] = p;
3054 if (fseek(fp, 0, SEEK_SET) != 0) {
3055 fclose(fp);
3056 error(_("seek failed on %s"), cfile);
3057 }
3058 in = (int) fread(p, 1, filelen, fp);
3059 fclose(fp);
3060 if (filelen != in) error(_("read failed on %s"), cfile);
3061 memcpy(RAW(val), p+offset, len);
3062 } else {
3063 if (fseek(fp, offset, SEEK_SET) != 0) {
3064 fclose(fp);
3065 error(_("seek failed on %s"), cfile);
3066 }
3067 in = (int) fread(RAW(val), 1, len, fp);
3068 fclose(fp);
3069 if (len != in) error(_("read failed on %s"), cfile);
3070 }
3071 vmaxset(vmax);
3072 return val;
3073 } else {
3074 if (fseek(fp, offset, SEEK_SET) != 0) {
3075 fclose(fp);
3076 error(_("seek failed on %s"), cfile);
3077 }
3078 in = (int) fread(RAW(val), 1, len, fp);
3079 fclose(fp);
3080 if (len != in) error(_("read failed on %s"), cfile);
3081 vmaxset(vmax);
3082 return val;
3083 }
3084 }
3085
3086 if ((fp = R_fopen(cfile, "rb")) == NULL)
3087 error(_("cannot open file '%s': %s"), cfile, strerror(errno));
3088 if (fseek(fp, offset, SEEK_SET) != 0) {
3089 fclose(fp);
3090 error(_("seek failed on %s"), cfile);
3091 }
3092 in = (int) fread(RAW(val), 1, len, fp);
3093 fclose(fp);
3094 if (len != in) error(_("read failed on %s"), cfile);
3095 vmaxset(vmax);
3096 return val;
3097 }
3098
3099 /* Gets the binding values of variables from a frame and returns them
3100 as a list. If the force argument is true, promises are forced;
3101 otherwise they are not. */
3102
R_getVarsFromFrame(SEXP vars,SEXP env,SEXP forcesxp)3103 static SEXP R_getVarsFromFrame(SEXP vars, SEXP env, SEXP forcesxp)
3104 {
3105 SEXP val, tmp, sym;
3106 Rboolean force;
3107 int i, len;
3108
3109 if (TYPEOF(env) == NILSXP) {
3110 error(_("use of NULL environment is defunct"));
3111 env = R_BaseEnv;
3112 } else
3113 if (TYPEOF(env) != ENVSXP)
3114 error(_("bad environment"));
3115 if (TYPEOF(vars) != STRSXP)
3116 error(_("bad variable names"));
3117 force = asLogical(forcesxp);
3118
3119 len = LENGTH(vars);
3120 PROTECT(val = allocVector(VECSXP, len));
3121 for (i = 0; i < len; i++) {
3122 sym = installTrChar(STRING_ELT(vars, i));
3123
3124 tmp = findVarInFrame(env, sym);
3125 if (tmp == R_UnboundValue) {
3126 /* PrintValue(env);
3127 PrintValue(R_GetTraceback(0)); */ /* DJM debugging */
3128 error(_("object '%s' not found"), EncodeChar(STRING_ELT(vars, i)));
3129 }
3130 if (force && TYPEOF(tmp) == PROMSXP) {
3131 PROTECT(tmp);
3132 tmp = eval(tmp, R_GlobalEnv);
3133 ENSURE_NAMEDMAX(tmp);
3134 UNPROTECT(1);
3135 }
3136 else ENSURE_NAMED(tmp); /* should not really be needed - LT */
3137 SET_VECTOR_ELT(val, i, tmp);
3138 }
3139 setAttrib(val, R_NamesSymbol, vars);
3140 UNPROTECT(1);
3141
3142 return val;
3143 }
3144
3145 /* from connections.c */
3146 SEXP R_compress1(SEXP in);
3147 SEXP R_decompress1(SEXP in, Rboolean *err);
3148 SEXP R_compress2(SEXP in);
3149 SEXP R_decompress2(SEXP in, Rboolean *err);
3150 SEXP R_compress3(SEXP in);
3151 SEXP R_decompress3(SEXP in, Rboolean *err);
3152
3153 /* Serializes and, optionally, compresses a value and appends the
3154 result to a file. Returns the key position/length key for
3155 retrieving the value */
3156
3157 static SEXP
R_lazyLoadDBinsertValue(SEXP value,SEXP file,SEXP ascii,SEXP compsxp,SEXP hook)3158 R_lazyLoadDBinsertValue(SEXP value, SEXP file, SEXP ascii,
3159 SEXP compsxp, SEXP hook)
3160 {
3161 PROTECT_INDEX vpi;
3162 int compress = asInteger(compsxp);
3163 SEXP key;
3164
3165 value = R_serialize(value, R_NilValue, ascii, R_NilValue, hook);
3166 PROTECT_WITH_INDEX(value, &vpi);
3167 if (compress == 3)
3168 REPROTECT(value = R_compress3(value), vpi);
3169 else if (compress == 2)
3170 REPROTECT(value = R_compress2(value), vpi);
3171 else if (compress)
3172 REPROTECT(value = R_compress1(value), vpi);
3173 key = appendRawToFile(file, value);
3174 UNPROTECT(1);
3175 return key;
3176 }
3177
3178 /* Retrieves a sequence of bytes as specified by a position/length key
3179 from a file, optionally decompresses, and unserializes the bytes.
3180 If the result is a promise, then the promise is forced. */
3181
3182 SEXP attribute_hidden
do_lazyLoadDBfetch(SEXP call,SEXP op,SEXP args,SEXP env)3183 do_lazyLoadDBfetch(SEXP call, SEXP op, SEXP args, SEXP env)
3184 {
3185 SEXP key, file, compsxp, hook;
3186 PROTECT_INDEX vpi;
3187 int compressed;
3188 Rboolean err = FALSE;
3189 SEXP val;
3190
3191 checkArity(op, args);
3192 key = CAR(args); args = CDR(args);
3193 file = CAR(args); args = CDR(args);
3194 compsxp = CAR(args); args = CDR(args);
3195 hook = CAR(args);
3196 compressed = asInteger(compsxp);
3197
3198 PROTECT_WITH_INDEX(val = readRawFromFile(file, key), &vpi);
3199 if (compressed == 3)
3200 REPROTECT(val = R_decompress3(val, &err), vpi);
3201 else if (compressed == 2)
3202 REPROTECT(val = R_decompress2(val, &err), vpi);
3203 else if (compressed)
3204 REPROTECT(val = R_decompress1(val, &err), vpi);
3205 if (err) error("lazy-load database '%s' is corrupt",
3206 translateChar(STRING_ELT(file, 0)));
3207 val = R_unserialize(val, hook);
3208 if (TYPEOF(val) == PROMSXP) {
3209 REPROTECT(val, vpi);
3210 val = eval(val, R_GlobalEnv);
3211 ENSURE_NAMEDMAX(val);
3212 }
3213 UNPROTECT(1);
3214 return val;
3215 }
3216
3217 SEXP attribute_hidden
do_getVarsFromFrame(SEXP call,SEXP op,SEXP args,SEXP env)3218 do_getVarsFromFrame(SEXP call, SEXP op, SEXP args, SEXP env)
3219 {
3220 checkArity(op, args);
3221 return R_getVarsFromFrame(CAR(args), CADR(args), CADDR(args));
3222 }
3223
3224
3225 SEXP attribute_hidden
do_lazyLoadDBinsertValue(SEXP call,SEXP op,SEXP args,SEXP env)3226 do_lazyLoadDBinsertValue(SEXP call, SEXP op, SEXP args, SEXP env)
3227 {
3228 checkArity(op, args);
3229 SEXP value, file, ascii, compsxp, hook;
3230 value = CAR(args); args = CDR(args);
3231 file = CAR(args); args = CDR(args);
3232 ascii = CAR(args); args = CDR(args);
3233 compsxp = CAR(args); args = CDR(args);
3234 hook = CAR(args); args = CDR(args);
3235 return R_lazyLoadDBinsertValue(value, file, ascii, compsxp, hook);
3236 }
3237
3238 SEXP attribute_hidden
do_serialize(SEXP call,SEXP op,SEXP args,SEXP env)3239 do_serialize(SEXP call, SEXP op, SEXP args, SEXP env)
3240 {
3241 checkArity(op, args);
3242 if (PRIMVAL(op) == 2) return R_unserialize(CAR(args), CADR(args));
3243
3244 SEXP object, icon, type, ver, fun;
3245 object = CAR(args); args = CDR(args);
3246 icon = CAR(args); args = CDR(args);
3247 type = CAR(args); args = CDR(args);
3248 ver = CAR(args); args = CDR(args);
3249 fun = CAR(args);
3250
3251 if(PRIMVAL(op) == 1)
3252 return R_serializeb(object, icon, type, ver, fun);
3253 else
3254 return R_serialize(object, icon, type, ver, fun);
3255 }
3256