1 #include "data.table.h"
2 
finalizer(SEXP p)3 static void finalizer(SEXP p)
4 {
5   SEXP x;
6   R_len_t n, l, tl;
7   if(!R_ExternalPtrAddr(p)) error(_("Internal error: finalizer hasn't received an ExternalPtr")); // # nocov
8   p = R_ExternalPtrTag(p);
9   if (!isString(p)) error(_("Internal error: finalizer's ExternalPtr doesn't see names in tag")); // # nocov
10   l = LENGTH(p);
11   tl = TRUELENGTH(p);
12   if (l<0 || tl<l) error(_("Internal error: finalizer sees l=%d, tl=%d"),l,tl); // # nocov
13   n = tl-l;
14   if (n==0) {
15     // gc's ReleaseLargeFreeVectors() will have reduced R_LargeVallocSize by the correct amount
16     // already, so nothing to do (but almost never the case).
17     return;
18   }
19   x = PROTECT(allocVector(INTSXP, 50));  // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector
20                                          // INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Miller, Jul 2015)
21   SETLENGTH(x,50+n*2*sizeof(void *)/4);  // 1*n for the names, 1*n for the VECSXP itself (both are over allocated).
22   UNPROTECT(1);
23   return;
24 }
25 
setselfref(SEXP x)26 void setselfref(SEXP x) {
27   SEXP p;
28   // Store pointer to itself so we can detect if the object has been copied. See
29   // ?copy for why copies are not just inefficient but cause a problem for over-allocated data.tables.
30   // Called from C only, not R level, so returns void.
31   setAttrib(x, SelfRefSymbol, p=R_MakeExternalPtr(
32     R_NilValue,                  // for identical() to return TRUE. identical() doesn't look at tag and prot
33     PROTECT(getAttrib(x, R_NamesSymbol)),  // to detect if names has been replaced and its tl lost, e.g. setattr(DT,"names",...)
34     PROTECT(R_MakeExternalPtr(   // to avoid an infinite loop in object.size(), if prot=x here
35       x,                         // to know if this data.table has been copied by key<-, attr<-, names<-, etc.
36       R_NilValue,                // this tag and prot currently unused
37       R_NilValue
38     ))
39   ));
40   R_RegisterCFinalizerEx(p, finalizer, FALSE);
41   UNPROTECT(2);
42 
43 /*
44   *  base::identical doesn't check prot and tag of EXTPTR, just that the ptr itself is the
45      same in both objects. R_NilValue is always equal to R_NilValue.  R_NilValue is a memory
46      location constant within an R session, but can vary from session to session. So, it
47      looks like a pointer to a user looking at attributes(DT), but they might wonder how it
48      works if they realise the selfref of all data.tables all point to the same address (rather
49      than to the table itself which would be reasonable to expect given the attribute's name).
50   *  p=NULL rather than R_NilValue works too, other than we need something other than NULL
51      so we can detect tables loaded from disk (which set p to NULL, see 5.13 of R-exts).
52   *  x is wrapped in another EXTPTR because of object.size (called by tables(), and by users).
53      If the prot (or tag) was x directly it sends object.size into an infinite loop and then
54      "segfault from C stack overflow" (object.size does count tag and prot, unlike identical,
55      but doesn't count what's pointed to).
56   *  Could use weak reference possibly, but the fact that they can get be set to R_NilValue
57      by gc (?) didn't seem appropriate.
58   *  If the .internal.selfref attribute is removed (e.g. by user code), nothing will break, but
59      an extra copy will just be taken on next :=, with warning, with a new selfref.
60   *  object.size will count size of names twice, but that's ok as only small.
61   *  Thanks to Steve L for suggesting ExtPtr for this, rather than the previous REALSXP
62      vector which required data.table to do a show/hide dance in a masked identical.
63 */
64 }
65 
66 /* There are two reasons the finalizer doesn't restore the LENGTH to TRUELENGTH. i) The finalizer
67 happens too late after GC has already released the memory, and ii) copies by base R (e.g.
68 [<- in write.table just before test 894) allocate at length LENGTH but copy the TRUELENGTH over.
69 If the finalizer sets LENGTH to TRUELENGTH, that's a fail as it wasn't really allocated at
70 TRUELENGTH when R did the copy.
71 Karl Miller suggested an ENVSXP so that restoring LENGTH in finalizer should work. This is the
72 closest I got to getting it to pass all tests :
73 
74   SEXP env = PROTECT(allocSExp(ENVSXP));
75   defineVar(SelfRefSymbol, x, env);
76   defineVar(R_NamesSymbol, getAttrib(x, R_NamesSymbol), env);
77   setAttrib(x, SelfRefSymbol, p = R_MakeExternalPtr(
78     R_NilValue,         // for identical() to return TRUE. identical() doesn't look at tag and prot
79     R_NilValue, //getAttrib(x, R_NamesSymbol), // to detect if names has been replaced and its tl lost, e.g. setattr(DT,"names",...)
80     PROTECT(            // needed when --enable-strict-barrier it seems, iiuc. TO DO: test under that flag and remove if not needed.
81       env               // wrap x in env to avoid an infinite loop in object.size() if prot=x were here
82     )
83   ));
84   R_RegisterCFinalizerEx(p, finalizer, FALSE);
85   UNPROTECT(2);
86 
87 Then in finalizer:
88   SETLENGTH(names, tl)
89   SETLENGTH(dt, tl)
90 
91 and that finalizer indeed now happens before the GC releases memory (thanks to the env wrapper).
92 
93 However, we still have problem (ii) above and it didn't pass tests involving base R copies.
94 
95 We really need R itself to start setting TRUELENGTH to be the allocated length and then
96 for GC to release TRUELENGTH not LENGTH.  Would really tidy this up.
97 
98 Moved out of ?setkey Details section in 1.12.2 (Mar 2019). Revisit this w.r.t. to recent versions of R.
99   The problem (for \code{data.table}) with the copy by \code{key<-} (other than
100   being slower) is that \R doesn't maintain the over-allocated truelength, but it
101   looks as though it has. Adding a column by reference using \code{:=} after a
102   \code{key<-} was therefore a memory overwrite and eventually a segfault; the
103   over-allocated memory wasn't really there after \code{key<-}'s copy. \code{data.table}s now have an attribute \code{.internal.selfref} to catch and warn about such copies.
104   This attribute has been implemented in a way that is friendly with
105   \code{identical()} and \code{object.size()}.
106 */
107 
_selfrefok(SEXP x,Rboolean checkNames,Rboolean verbose)108 static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) {
109   SEXP v, p, tag, prot, names;
110   v = getAttrib(x, SelfRefSymbol);
111   if (v==R_NilValue || TYPEOF(v)!=EXTPTRSXP) {
112     // .internal.selfref missing is expected and normal for i) a pre v1.7.8 data.table loaded
113     //  from disk, and ii) every time a new data.table is over-allocated for the first time.
114     //  Not being an extptr is for when users contruct a data.table via structure() using dput, post
115     //  a question, and find the extptr doesn't parse so put quotes around it (for example).
116     //  In both cases the selfref is not ok.
117     return 0;
118   }
119   p = R_ExternalPtrAddr(v);
120   if (p==NULL) {
121     if (verbose) Rprintf(_(".internal.selfref ptr is NULL. This is expected and normal for a data.table loaded from disk. Please remember to always setDT() immediately after loading to prevent unexpected behavior. If this table was not loaded from disk or you've already run setDT(), please report to data.table issue tracker.\n"));
122     return -1;
123   }
124   if (!isNull(p)) error(_("Internal error: .internal.selfref ptr is not NULL or R_NilValue")); // # nocov
125   tag = R_ExternalPtrTag(v);
126   if (!(isNull(tag) || isString(tag))) error(_("Internal error: .internal.selfref tag isn't NULL or a character vector")); // # nocov
127   names = getAttrib(x, R_NamesSymbol);
128   if (names!=tag && isString(names) && !ALTREP(names))  // !ALTREP for #4734
129     SET_TRUELENGTH(names, LENGTH(names));
130     // R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated
131     // because R copies the original vector's tl over despite allocating length.
132   prot = R_ExternalPtrProtected(v);
133   if (TYPEOF(prot) != EXTPTRSXP)   // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")).
134     return 0;                      // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r
135   if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x))
136     SET_TRUELENGTH(x, LENGTH(x));  // R copied this vector not data.table, it's not actually over-allocated
137   return checkNames ? names==tag : x==R_ExternalPtrAddr(prot);
138 }
139 
selfrefok(SEXP x,Rboolean verbose)140 static Rboolean selfrefok(SEXP x, Rboolean verbose) {   // for readability
141   return(_selfrefok(x, FALSE, verbose)==1);
142 }
selfrefnamesok(SEXP x,Rboolean verbose)143 static Rboolean selfrefnamesok(SEXP x, Rboolean verbose) {
144   return(_selfrefok(x, TRUE, verbose)==1);
145 }
146 
shallow(SEXP dt,SEXP cols,R_len_t n)147 static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
148 {
149   // NEW: cols argument to specify the columns to shallow copy on. If NULL, all columns.
150   // called from alloccol where n is checked carefully, or from shallow() at R level
151   // where n is set to truelength (i.e. a shallow copy only with no size change)
152   R_len_t i,l;
153   int protecti=0;
154   SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++;   // to do, use growVector here?
155   SET_ATTRIB(newdt, shallow_duplicate(ATTRIB(dt)));
156   SET_OBJECT(newdt, OBJECT(dt));
157   IS_S4_OBJECT(dt) ? SET_S4_OBJECT(newdt) : UNSET_S4_OBJECT(newdt);  // To support S4 objects that incude data.table
158   //SHALLOW_DUPLICATE_ATTRIB(newdt, dt);  // SHALLOW_DUPLICATE_ATTRIB would be a bit neater but is only available from R 3.3.0
159 
160   // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It
161   //        also increases truelength. Perhaps make that distinction, then, and split out, but marked
162   //        so that the next change knows to duplicate.
163   //        keepattr() also merely points to the entire attrbutes list and thus doesn't allow replacing
164   //        some of its elements.
165 
166   // We copy all attributes that refer to column names so that calling setnames on either
167   // the original or the shallow copy doesn't break anything.
168   SEXP index = PROTECT(getAttrib(dt, sym_index)); protecti++;
169   setAttrib(newdt, sym_index, shallow_duplicate(index));
170 
171   SEXP sorted = PROTECT(getAttrib(dt, sym_sorted)); protecti++;
172   setAttrib(newdt, sym_sorted, duplicate(sorted));
173 
174   SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++;
175   SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++;
176   if (isNull(cols)) {
177     l = LENGTH(dt);
178     for (i=0; i<l; i++) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,i));
179     if (length(names)) {
180       if (length(names) < l) error(_("Internal error: length(names)>0 but <length(dt)")); // # nocov
181       for (i=0; i<l; i++) SET_STRING_ELT(newnames, i, STRING_ELT(names,i));
182     }
183     // else an unnamed data.table is valid e.g. unname(DT) done by ggplot2, and .SD may have its names cleared in dogroups, but shallow will always create names for data.table(NULL) which has 100 slots all empty so you can add to an empty data.table by reference ok.
184   } else {
185     l = length(cols);
186     for (i=0; i<l; i++) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,INTEGER(cols)[i]-1));
187     if (length(names)) {
188       // no need to check length(names) < l here. R-level checks if all value
189       // in 'cols' are valid - in the range of 1:length(names(x))
190       for (i=0; i<l; i++) SET_STRING_ELT( newnames, i, STRING_ELT(names,INTEGER(cols)[i]-1) );
191     }
192   }
193   setAttrib(newdt, R_NamesSymbol, newnames);
194   // setAttrib appears to change length and truelength, so need to do that first _then_ SET next,
195   // otherwise (if the SET were were first) the 100 tl is assigned to length.
196   SETLENGTH(newnames,l);
197   SET_TRUELENGTH(newnames,n);
198   SETLENGTH(newdt,l);
199   SET_TRUELENGTH(newdt,n);
200   setselfref(newdt);
201   UNPROTECT(protecti);
202   return(newdt);
203 }
204 
alloccol(SEXP dt,R_len_t n,Rboolean verbose)205 SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
206 {
207   SEXP names, klass;   // klass not class at request of pydatatable because class is reserved word in C++, PR #3129
208   R_len_t l, tl;
209   if (isNull(dt)) error(_("alloccol has been passed a NULL dt"));
210   if (TYPEOF(dt) != VECSXP) error(_("dt passed to alloccol isn't type VECSXP"));
211   klass = getAttrib(dt, R_ClassSymbol);
212   if (isNull(klass)) error(_("dt passed to alloccol has no class attribute. Please report result of traceback() to data.table issue tracker."));
213   l = LENGTH(dt);
214   names = getAttrib(dt,R_NamesSymbol);
215   // names may be NULL when null.data.table() passes list() to alloccol for example.
216   // So, careful to use length() on names, not LENGTH().
217   if (length(names)!=l) error(_("Internal error: length of names (%d) is not length of dt (%d)"),length(names),l); // # nocov
218   if (!selfrefok(dt,verbose))
219     return shallow(dt,R_NilValue,(n>l) ? n : l);  // e.g. test 848 and 851 in R > 3.0.2
220     // added (n>l) ? ... for #970, see test 1481.
221   // TO DO:  test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example.
222   // if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl)
223   //    error(_("Internal error: tl of dt passes checks, but tl of names (%d) != tl of dt (%d)"), tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov
224 
225   tl = TRUELENGTH(dt);
226   // R <= 2.13.2 and we didn't catch uninitialized tl somehow
227   if (tl<0) error(_("Internal error, tl of class is marked but tl<0.")); // # nocov
228   if (tl>0 && tl<l) error(_("Internal error, please report (including result of sessionInfo()) to data.table issue tracker: tl (%d) < l (%d) but tl of class is marked."), tl, l); // # nocov
229   if (tl>l+10000) warning(_("tl (%d) is greater than 10,000 items over-allocated (l = %d). If you didn't set the datatable.alloccol option to be very large, please report to data.table issue tracker including the result of sessionInfo()."),tl,l);
230   if (n>tl) return(shallow(dt,R_NilValue,n)); // usual case (increasing alloc)
231   if (n<tl && verbose) Rprintf(_("Attempt to reduce allocation from %d to %d ignored. Can only increase allocation via shallow copy. Please do not use DT[...]<- or DT$someCol<-. Use := inside DT[...] instead."),tl,n);
232         // otherwise the finalizer can't clear up the Large Vector heap
233   return(dt);
234 }
235 
checkOverAlloc(SEXP x)236 int checkOverAlloc(SEXP x)
237 {
238   if (isNull(x))
239     error(_("Has getOption('datatable.alloccol') somehow become unset? It should be a number, by default 1024."));
240   if (!isInteger(x) && !isReal(x))
241     error(_("getOption('datatable.alloccol') should be a number, by default 1024. But its type is '%s'."), type2char(TYPEOF(x)));
242   if (LENGTH(x) != 1)
243     error(_("getOption('datatable.alloc') is a numeric vector ok but its length is %d. Its length should be 1."), LENGTH(x));
244   int ans = isInteger(x) ? INTEGER(x)[0] : (int)REAL(x)[0];
245   if (ans<0)
246     error(_("getOption('datatable.alloc')==%d.  It must be >=0 and not NA."), ans);
247   return ans;
248 }
249 
alloccolwrapper(SEXP dt,SEXP overAllocArg,SEXP verbose)250 SEXP alloccolwrapper(SEXP dt, SEXP overAllocArg, SEXP verbose) {
251   if (!isLogical(verbose) || length(verbose)!=1) error(_("verbose must be TRUE or FALSE"));
252   int overAlloc = checkOverAlloc(overAllocArg);
253   SEXP ans = PROTECT(alloccol(dt, length(dt)+overAlloc, LOGICAL(verbose)[0]));
254 
255   for(R_len_t i = 0; i < LENGTH(ans); i++) {
256     // clear names; also excluded by copyMostAttrib(). Primarily for data.table and as.data.table, but added here centrally (see #103).
257     setAttrib(VECTOR_ELT(ans, i), R_NamesSymbol, R_NilValue);
258 
259     // But don't clear dim and dimnames. Because as from 1.12.4 we keep the matrix column as-is and ask user to use as.data.table to
260     // unpack matrix columns when they really need to; test 2089.2
261     // setAttrib(VECTOR_ELT(ans, i), R_DimSymbol, R_NilValue);
262     // setAttrib(VECTOR_ELT(ans, i), R_DimNamesSymbol, R_NilValue);
263   }
264 
265   UNPROTECT(1);
266   return ans;
267 }
268 
shallowwrapper(SEXP dt,SEXP cols)269 SEXP shallowwrapper(SEXP dt, SEXP cols) {
270   // selfref will be FALSE on manually created data.table, e.g., via dput() or structure()
271   if (!selfrefok(dt, FALSE)) {
272     int n = isNull(cols) ? length(dt) : length(cols);
273     return(shallow(dt, cols, n));
274   } else return(shallow(dt, cols, TRUELENGTH(dt)));
275 }
276 
truelength(SEXP x)277 SEXP truelength(SEXP x) {
278   return ScalarInteger(isNull(x) ? 0 : TRUELENGTH(x));
279 }
280 
selfrefokwrapper(SEXP x,SEXP verbose)281 SEXP selfrefokwrapper(SEXP x, SEXP verbose) {
282   return ScalarInteger(_selfrefok(x,FALSE,LOGICAL(verbose)[0]));
283 }
284 
285 int *_Last_updated = NULL;
286 
assign(SEXP dt,SEXP rows,SEXP cols,SEXP newcolnames,SEXP values)287 SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
288 {
289   // For internal use only by := in [.data.table, and set()
290   // newcolnames : add these columns (if any)
291   // cols : column names or numbers corresponding to the values to set
292   // rows : row numbers to assign
293   R_len_t i, j, numToDo, targetlen, vlen, oldncol, oldtncol, coln, protecti=0, newcolnum, indexLength;
294   SEXP targetcol, nullint, s, colnam, tmp, key, index, a, assignedNames, indexNames;
295   bool verbose=GetVerbose();
296   int ndelete=0;  // how many columns are being deleted
297   const char *c1, *tc1, *tc2;
298   int *buf, newKeyLength, indexNo;
299   if (isNull(dt)) error(_("assign has been passed a NULL dt"));
300   if (TYPEOF(dt) != VECSXP) error(_("dt passed to assign isn't type VECSXP"));
301   if (islocked(dt))
302     error(_(".SD is locked. Updating .SD by reference using := or set are reserved for future use. Use := in j directly. Or use copy(.SD) as a (slow) last resort, until shallow() is exported."));
303 
304   // We allow set() on data.frame too; e.g. package Causata uses set() on a data.frame in tests/testTransformationReplay.R
305   // := is only allowed on a data.table. However, the ":=" = stop(...) message in data.table.R will have already
306   // detected use on a data.frame before getting to this point.
307   // For data.frame, can use set() on existing columns but not add new ones because DF are not over-allocated.
308   bool isDataTable = INHERITS(dt, char_datatable);
309   if (!isDataTable && !INHERITS(dt, char_dataframe))
310     error(_("Internal error: dt passed to Cassign is not a data.table or data.frame"));  // # nocov
311 
312   oldncol = LENGTH(dt);
313   SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++;
314   if (isNull(names)) error(_("dt passed to assign has no names"));
315   if (length(names)!=oldncol)
316     error(_("Internal error in assign: length of names (%d) is not length of dt (%d)"),length(names),oldncol); // # nocov
317   if (isNull(dt)) {
318     error(_("data.table is NULL; malformed. A null data.table should be an empty list. typeof() should always return 'list' for data.table.")); // # nocov
319     // Not possible to test because R won't permit attributes be attached to NULL (which is good and we like); warning from R 3.4.0+ tested by 944.5
320   }
321   const int nrow = LENGTH(dt) ? length(VECTOR_ELT(dt,0)) :
322                                 (isNewList(values) && length(values) ? length(VECTOR_ELT(values,0)) : length(values));
323   //                            ^ when null data.table the new nrow becomes the fist column added
324   if (isNull(rows)) {
325     numToDo = nrow;
326     targetlen = nrow;
327     if (verbose) Rprintf(_("Assigning to all %d rows\n"), nrow);
328     // fast way to assign to whole column, without creating 1:nrow(x) vector up in R, or here in C
329   } else {
330     if (isReal(rows)) {
331       rows = PROTECT(coerceVector(rows, INTSXP)); protecti++;
332       warning(_("Coerced i from numeric to integer. Please pass integer for efficiency; e.g., 2L rather than 2"));
333     }
334     if (!isInteger(rows))
335       error(_("i is type '%s'. Must be integer, or numeric is coerced with warning. If i is a logical subset, simply wrap with which(), and take the which() outside the loop if possible for efficiency."), type2char(TYPEOF(rows)));
336     targetlen = length(rows);
337     numToDo = 0;
338     const int *rowsd = INTEGER(rows);
339     for (i=0; i<targetlen; i++) {
340       if ((rowsd[i]<0 && rowsd[i]!=NA_INTEGER) || rowsd[i]>nrow)
341         error(_("i[%d] is %d which is out of range [1,nrow=%d]."),i+1,rowsd[i],nrow);  // set() reaches here (test 2005.2); := reaches the same error in subset.c first
342       if (rowsd[i]>=1) numToDo++;
343     }
344     if (verbose) Rprintf(_("Assigning to %d row subset of %d rows\n"), numToDo, nrow);
345     // TODO: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i)
346     if (numToDo==0) {
347       if (!length(newcolnames)) {
348         *_Last_updated = 0;
349         UNPROTECT(protecti);
350         return(dt); // all items of rows either 0 or NA. !length(newcolnames) for #759
351       }
352       if (verbose) Rprintf(_("Added %d new column%s initialized with all-NA\n"),
353                            length(newcolnames), (length(newcolnames)>1)?"s":"");
354     }
355   }
356   if (!length(cols)) {
357     warning(_("length(LHS)==0; no columns to delete or assign RHS to."));   // test 1295 covers
358     *_Last_updated = 0;
359     UNPROTECT(protecti);
360     return(dt);
361   }
362   // FR #2077 - set able to add new cols by reference
363   if (isString(cols)) {
364     PROTECT(tmp = chmatch(cols, names, 0)); protecti++;
365     buf = (int *) R_alloc(length(cols), sizeof(int));
366     int k=0;
367     for (i=0; i<length(cols); i++) {
368       if (INTEGER(tmp)[i] == 0) buf[k++] = i;
369     }
370     if (k>0) {
371       if (!isDataTable) error(_("set() on a data.frame is for changing existing columns, not adding new ones. Please use a data.table for that. data.table's are over-allocated and don't shallow copy."));
372       newcolnames = PROTECT(allocVector(STRSXP, k)); protecti++;
373       for (i=0; i<k; i++) {
374         SET_STRING_ELT(newcolnames, i, STRING_ELT(cols, buf[i]));
375         INTEGER(tmp)[buf[i]] = oldncol+i+1;
376       }
377     }
378     cols = tmp;
379   } else {
380     if (isReal(cols)) {
381       cols = PROTECT(coerceVector(cols, INTSXP)); protecti++;
382       warning(_("Coerced j from numeric to integer. Please pass integer for efficiency; e.g., 2L rather than 2"));
383     }
384     if (!isInteger(cols))
385       error(_("j is type '%s'. Must be integer, character, or numeric is coerced with warning."), type2char(TYPEOF(cols)));
386   }
387   if (any_duplicated(cols,FALSE)) error(_("Can't assign to the same column twice in the same query (duplicates detected)."));
388   if (!isNull(newcolnames) && !isString(newcolnames)) error(_("newcolnames is supplied but isn't a character vector"));
389   bool RHS_list_of_columns = TYPEOF(values)==VECSXP && length(cols)>1;  // initial value; may be revised below
390   if (verbose) Rprintf(_("RHS_list_of_columns == %s\n"), RHS_list_of_columns ? "true" : "false");
391   if (TYPEOF(values)==VECSXP && length(cols)==1 && length(values)==1) {
392     SEXP item = VECTOR_ELT(values,0);
393     if (isNull(item) || length(item)==1 || length(item)==targetlen) {
394       RHS_list_of_columns=true;
395       if (verbose) Rprintf(_("RHS_list_of_columns revised to true because RHS list has 1 item which is NULL, or whose length %d is either 1 or targetlen (%d). Please unwrap RHS.\n"), length(item), targetlen);
396     }
397   }
398   if (RHS_list_of_columns) {
399     if (length(values)==0)
400       error(_("Supplied %d columns to be assigned an empty list (which may be an empty data.table or data.frame since they are lists too). To delete multiple columns use NULL instead. To add multiple empty list columns, use list(list())."), length(cols));
401     if (length(values)!=length(cols)) {
402       if (length(values)==1) {   // test 351.1; c("colA","colB"):=list(13:15) uses 13:15 for both columns
403         values = VECTOR_ELT(values,0);
404         RHS_list_of_columns = false;
405         if (verbose) Rprintf(_("Recycling single RHS list item across %d columns. Please unwrap RHS.\n"), length(cols));
406       } else {
407         error(_("Supplied %d columns to be assigned %d items. Please see NEWS for v1.12.2."), length(cols), length(values));
408       }
409     }
410   }
411   // Check all inputs :
412   for (i=0; i<length(cols); i++) {
413     coln = INTEGER(cols)[i];
414     if (coln<1 || coln>oldncol+length(newcolnames)) {
415       if (!isDataTable) error(_("Item %d of column numbers in j is %d which is outside range [1,ncol=%d]. set() on a data.frame is for changing existing columns, not adding new ones. Please use a data.table for that."), i+1, coln, oldncol);
416       else error(_("Item %d of column numbers in j is %d which is outside range [1,ncol=%d]. Use column names instead in j to add new columns."), i+1, coln, oldncol);
417     }
418     coln--;
419     SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values;
420     vlen = length(thisvalue);
421     if (isNull(thisvalue) && !isNull(rows)) error(_("When deleting columns, i should not be provided"));  // #1082, #3089
422     if (coln+1 <= oldncol) colnam = STRING_ELT(names,coln);
423     else colnam = STRING_ELT(newcolnames,coln-length(names));
424     if (coln+1 <= oldncol && isNull(thisvalue)) continue;  // delete existing column(s) afterwards, near end of this function
425     //if (vlen<1 && nrow>0) {
426     if (coln+1 <= oldncol && nrow>0 && vlen<1 && numToDo>0) { // numToDo > 0 fixes #2829, see test 1911
427       error(_("RHS of assignment to existing column '%s' is zero length but not NULL. If you intend to delete the column use NULL. Otherwise, the RHS must have length > 0; e.g., NA_integer_. If you are trying to change the column type to be an empty list column then, as with all column type changes, provide a full length RHS vector such as vector('list',nrow(DT)); i.e., 'plonk' in the new column."), CHAR(STRING_ELT(names,coln)));
428     }
429     if (coln+1 > oldncol && TYPEOF(thisvalue)!=VECSXP) {  // list() is ok for new columns
430       newcolnum = coln-length(names);
431       if (newcolnum<0 || newcolnum>=length(newcolnames))
432         error(_("Internal error in assign.c: length(newcolnames)=%d, length(names)=%d, coln=%d"), length(newcolnames), length(names), coln); // # nocov
433       if (isNull(thisvalue)) {
434         warning(_("Column '%s' does not exist to remove"),CHAR(STRING_ELT(newcolnames,newcolnum)));
435         continue;
436       }
437       // RHS of assignment to new column is zero length but we'll use its type to create all-NA column of that type
438     }
439     if (isMatrix(thisvalue) && (j=INTEGER(getAttrib(thisvalue, R_DimSymbol))[1]) > 1)  // matrix passes above (considered atomic vector)
440       warning(_("%d column matrix RHS of := will be treated as one vector"), j);
441     const SEXP existing = (coln+1)<=oldncol ? VECTOR_ELT(dt,coln) : R_NilValue;
442     if (isFactor(existing) &&
443       !isString(thisvalue) && TYPEOF(thisvalue)!=INTSXP && TYPEOF(thisvalue)!=LGLSXP && !isReal(thisvalue) && !isNewList(thisvalue)) {  // !=INTSXP includes factor
444       error(_("Can't assign to column '%s' (type 'factor') a value of type '%s' (not character, factor, integer or numeric)"),
445             CHAR(STRING_ELT(names,coln)),type2char(TYPEOF(thisvalue)));
446     }
447     if (nrow>0 && targetlen>0 && vlen>1 && vlen!=targetlen && (TYPEOF(existing)!=VECSXP || TYPEOF(thisvalue)==VECSXP)) {
448       // note that isNewList(R_NilValue) is true so it needs to be TYPEOF(existing)!=VECSXP above
449       error(_("Supplied %d items to be assigned to %d items of column '%s'. If you wish to 'recycle' the RHS please use rep() to make this intent clear to readers of your code."), vlen, targetlen, CHAR(colnam));
450     }
451   }
452   // having now checked the inputs, from this point there should be no errors so we can now proceed to
453   // modify DT by reference. Other than if new columns are being added and the allocVec() fails with
454   // out-of-memory. In that case the user will receive hard halt and know to rerun.
455   if (length(newcolnames)) {
456     oldtncol = TRUELENGTH(dt);   // TO DO: oldtncol can be just called tl now, as we won't realloc here any more.
457 
458     if (oldtncol<oldncol) {
459       if (oldtncol==0) error(_("This data.table has either been loaded from disk (e.g. using readRDS()/load()) or constructed manually (e.g. using structure()). Please run setDT() or setalloccol() on it first (to pre-allocate space for new columns) before assigning by reference to it."));   // #2996
460       error(_("Internal error: oldtncol(%d) < oldncol(%d). Please report to data.table issue tracker, including result of sessionInfo()."), oldtncol, oldncol); // # nocov
461     }
462     if (oldtncol>oldncol+10000L) warning(_("truelength (%d) is greater than 10,000 items over-allocated (length = %d). See ?truelength. If you didn't set the datatable.alloccol option very large, please report to data.table issue tracker including the result of sessionInfo()."),oldtncol, oldncol);
463     if (oldtncol < oldncol+LENGTH(newcolnames))
464       error(_("Internal error: DT passed to assign has not been allocated enough column slots. l=%d, tl=%d, adding %d"), oldncol, oldtncol, LENGTH(newcolnames));  // # nocov
465     if (!selfrefnamesok(dt,verbose))
466       error(_("It appears that at some earlier point, names of this data.table have been reassigned. Please ensure to use setnames() rather than names<- or colnames<-. Otherwise, please report to data.table issue tracker."));  // # nocov
467       // Can growVector at this point easily enough, but it shouldn't happen in first place so leave it as
468       // strong error message for now.
469     else if (TRUELENGTH(names) != oldtncol)
470       error(_("Internal error: selfrefnames is ok but tl names [%d] != tl [%d]"), TRUELENGTH(names), oldtncol);  // # nocov
471     SETLENGTH(dt, oldncol+LENGTH(newcolnames));
472     SETLENGTH(names, oldncol+LENGTH(newcolnames));
473     for (i=0; i<LENGTH(newcolnames); i++)
474       SET_STRING_ELT(names,oldncol+i,STRING_ELT(newcolnames,i));
475     // truelengths of both already set by alloccol
476   }
477   for (i=0; i<length(cols); i++) {
478     coln = INTEGER(cols)[i]-1;
479     SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values;
480     if (TYPEOF(thisvalue)==NILSXP) {
481       if (!isNull(rows)) error(_("Internal error: earlier error 'When deleting columns, i should not be provided' did not happen.")); // # nocov
482       ndelete++;
483       continue;   // delete column(s) afterwards, below this loop
484     }
485     vlen = length(thisvalue);
486     if (length(rows)==0 && targetlen==vlen && (vlen>0 || nrow==0)) {
487       if (  MAYBE_SHARED(thisvalue) ||  // set() protects the NAMED of atomic vectors from .Call setting arguments to 2 by wrapping with list
488          (TYPEOF(values)==VECSXP && i>LENGTH(values)-1) || // recycled RHS would have columns pointing to others, #185.
489          (TYPEOF(values)!=VECSXP && i>0) // assigning the same values to a second column. Have to ensure a copy #2540
490          ) {
491         if (verbose) {
492           Rprintf(_("RHS for item %d has been duplicated because NAMED==%d MAYBE_SHARED==%d, but then is being plonked. length(values)==%d; length(cols)==%d)\n"),
493                   i+1, NAMED(thisvalue), MAYBE_SHARED(thisvalue), length(values), length(cols));
494         }
495         thisvalue = copyAsPlain(thisvalue);   // PROTECT not needed as assigned as element to protected list below.
496       } else {
497         if (verbose) Rprintf(_("Direct plonk of unnamed RHS, no copy. NAMED==%d, MAYBE_SHARED==%d\n"), NAMED(thisvalue), MAYBE_SHARED(thisvalue));  // e.g. DT[,a:=as.character(a)] as tested by 754.5
498       }
499       SET_VECTOR_ELT(dt, coln, thisvalue);                 // plonk new column in as it's already the correct length
500       setAttrib(thisvalue, R_NamesSymbol, R_NilValue);     // clear names such as  DT[,a:=mapvector[a]]
501       setAttrib(thisvalue, R_DimSymbol, R_NilValue);       // so that matrix is treated as vector
502       setAttrib(thisvalue, R_DimNamesSymbol, R_NilValue);  // the 3rd of the 3 attribs not copied by copyMostAttrib, for consistency.
503       continue;
504     }
505 
506     if (coln+1 > oldncol) {  // new column
507       SET_VECTOR_ELT(dt, coln, targetcol=allocNAVectorLike(thisvalue, nrow));
508       // initialize with NAs for when 'rows' is a subset and it doesn't touch
509       // do not try to save the time to NA fill (contiguous branch free assign anyway) since being
510       // sure all items will be written to (isNull(rows), length(rows), vlen<1, targetlen) is not worth the risk.
511       if (isVectorAtomic(thisvalue)) copyMostAttrib(thisvalue,targetcol);  // class etc but not names
512       // else for lists (such as data.frame and data.table) treat them as raw lists and drop attribs
513       if (vlen<1) continue;   // e.g. DT[,newcol:=integer()] (adding new empty column)
514     } else {                 // existing column
515       targetcol = VECTOR_ELT(dt,coln);
516     }
517     const char *ret = memrecycle(targetcol, rows, 0, targetlen, thisvalue, 0, -1, coln+1, CHAR(STRING_ELT(names, coln)));
518     if (ret) warning(ret);
519   }
520 
521   *_Last_updated = numToDo;  // the updates have taken place with no error, so update .Last.updated now
522   assignedNames = PROTECT(allocVector(STRSXP, LENGTH(cols))); protecti++;
523   for (i=0;i<LENGTH(cols);i++) SET_STRING_ELT(assignedNames,i,STRING_ELT(names,INTEGER(cols)[i]-1));
524   key = getAttrib(dt, sym_sorted);
525   if (length(key)) {
526     // if assigning to at least one key column, the key is truncated to one position before the first changed column.
527     //any() and subsetVector() don't seem to be exposed by R API at C level, so this is done here long hand.
528     PROTECT(tmp = chin(key, assignedNames)); protecti++;
529     newKeyLength = xlength(key);
530     for (i=0;i<LENGTH(tmp);i++) if (LOGICAL(tmp)[i]) {
531       // If a key column is being assigned to, set newKeyLength to the key element before since everything after that may have changed in order.
532       newKeyLength = i;
533       break;
534     }
535     if(newKeyLength == 0){
536       // no valid key columns remain, remove the key
537       setAttrib(dt, sym_sorted, R_NilValue);
538     } else if (newKeyLength < xlength(key)){
539       // new key is shorter than original one. Reassign
540       PROTECT(tmp = allocVector(STRSXP, newKeyLength)); protecti++;
541       for (int i=0; i<newKeyLength; i++) SET_STRING_ELT(tmp, i, STRING_ELT(key, i));
542       setAttrib(dt, sym_sorted, tmp);
543     }
544     //else: no key column changed, nothing to be done
545   }
546   index = getAttrib(dt, install("index"));
547   if (index != R_NilValue) {
548     s = ATTRIB(index);
549     indexNo = 0;
550     // get a vector with all index names
551     PROTECT(indexNames = allocVector(STRSXP, xlength(s))); protecti++;
552     while(s != R_NilValue){
553       SET_STRING_ELT(indexNames, indexNo, PRINTNAME(TAG(s)));
554       indexNo++;
555       s = CDR(s);
556     }
557     s = ATTRIB(index); // reset to first element
558     indexNo = 0;
559     while(s != R_NilValue) {
560       a = TAG(s);
561       indexLength = xlength(CAR(s));
562       tc1 = c1 = CHAR(PRINTNAME(a));  // the index name; e.g. "__col1__col2"
563       if (*tc1!='_' || *(tc1+1)!='_') {
564         // fix for #1396
565         if (verbose) {
566           Rprintf(_("Dropping index '%s' as it doesn't have '__' at the beginning of its name. It was very likely created by v1.9.4 of data.table.\n"), tc1);
567         }
568         setAttrib(index, a, R_NilValue);
569         indexNo++;
570         s = CDR(s);
571         continue; // with next index
572       }
573       tc1 += 2; // tc1 always marks the start of a key column
574       if (!*tc1) error(_("Internal error: index name ends with trailing __")); // # nocov
575       // check the position of the first appearance of an assigned column in the index.
576       // the new index will be truncated to this position.
577       char *s4 = (char*) malloc(strlen(c1) + 3);
578       if(s4 == NULL){
579         error(_("Internal error: Couldn't allocate memory for s4.")); // # nocov
580       }
581       memcpy(s4, c1, strlen(c1));
582       memset(s4 + strlen(c1), '\0', 1);
583       strcat(s4, "__"); // add trailing '__' to newKey so we can search for pattern '__colName__' also at the end of the index.
584       newKeyLength = strlen(c1);
585       for(int i = 0; i < xlength(assignedNames); i++){
586         tc2 = CHAR(STRING_ELT(assignedNames, i));
587         char *s5 = (char*) malloc(strlen(tc2) + 5); //4 * '_' + \0
588         if(s5 == NULL){
589           free(s4);                                                  // # nocov
590           error(_("Internal error: Couldn't allocate memory for s5.")); // # nocov
591         }
592         memset(s5, '_', 2);
593         memset(s5 + 2, '\0', 1);
594         strcat(s5, tc2);
595         strcat(s5, "__");
596         tc2 = strstr(s4, s5);
597         if(tc2 == NULL){ // column is not part of key
598           free(s5);
599           continue;
600         }
601         if(tc2 - s4 < newKeyLength){ // new column match is before last match
602           newKeyLength = tc2 - s4;
603         }
604         free(s5);
605       }
606       memset(s4 + newKeyLength, '\0', 1); // truncate the new key to the new length
607       if(newKeyLength == 0){ // no valid key column remains. Drop the key
608         setAttrib(index, a, R_NilValue);
609         SET_STRING_ELT(indexNames, indexNo, NA_STRING);
610         if (verbose) {
611           Rprintf(_("Dropping index '%s' due to an update on a key column\n"), c1+2);
612         }
613       } else if(newKeyLength < strlen(c1)) {
614         SEXP s4Str = PROTECT(mkString(s4));
615         if(indexLength == 0 && // shortened index can be kept since it is just information on the order (see #2372)
616            LOGICAL(chin(s4Str, indexNames))[0] == 0) {// index with shortened name not present yet
617           SET_TAG(s, install(s4));
618           SET_STRING_ELT(indexNames, indexNo, mkChar(s4));
619           if (verbose)
620             Rprintf(_("Shortening index '%s' to '%s' due to an update on a key column\n"), c1+2, s4 + 2);
621         } else { // indexLength > 0 || shortened name present already
622           // indexLength > 0 indicates reordering. Drop it to avoid spurious reordering in non-indexed columns (#2372)
623           // shortened anme already present indicates that index needs to be dropped to avoid duplicate indices.
624           setAttrib(index, a, R_NilValue);
625           SET_STRING_ELT(indexNames, indexNo, NA_STRING);
626           if (verbose)
627             Rprintf(_("Dropping index '%s' due to an update on a key column\n"), c1+2);
628         }
629         UNPROTECT(1); // s4Str
630       } //else: index is not affected by assign: nothing to be done
631       free(s4);
632       indexNo ++;
633       s = CDR(s);
634     }
635   }
636   if (ndelete) {
637     // delete any columns assigned NULL (there was a 'continue' earlier in loop above)
638     int *tt = (int *)R_alloc(ndelete, sizeof(int));
639     const int *colsd=INTEGER(cols), ncols=length(cols), ndt=length(dt);
640     for (int i=0, k=0; i<ncols; ++i) {   // find which ones to delete and put them in tt
641       // Aside: a new column being assigned NULL (something odd to do) would have been warned above, added above, and now deleted. Just
642       //        easier to code it this way; e.g. so that other columns may be added or removed ok by the same query.
643       coln = colsd[i]-1;
644       SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values;
645       if (isNull(thisvalue)) tt[k++] = coln;
646     }
647     R_isort(tt, ndelete);  // sort the column-numbers-to-delete into ascending order
648     for (int i=0; i<ndelete-1; ++i) {
649       if (tt[i]>=tt[i+1])
650         error(_("Internal error: %d column numbers to delete not now in strictly increasing order. No-dups were checked earlier.")); // # nocov
651     }
652     for (int i=tt[0], j=1, k=tt[0]+1;  i<ndt-ndelete;  ++i, ++k) {  // i moves up from the first non-deleted column and is the target of write
653       while (j<ndelete && k==tt[j]) { j++; k++; }                   // move k up to the next non-deleted column; j is the next position in tt
654       SET_VECTOR_ELT(dt,    i, VECTOR_ELT(dt,    k));
655       SET_STRING_ELT(names, i, STRING_ELT(names, k));
656     }
657     for (int i=ndt-ndelete; i<ndt; ++i) {   // blank out the ndelete slots at the end
658       SET_VECTOR_ELT(dt, i, R_NilValue);
659       SET_STRING_ELT(names, i, NA_STRING);  // release reference to the CHARSXP
660     }
661     SETLENGTH(dt,    ndt-ndelete);
662     SETLENGTH(names, ndt-ndelete);
663     if (LENGTH(names)==0) {
664       // That was last column deleted, leaving NULL data.table, so we need to reset .row_names, so that it really is the NULL data.table.
665       PROTECT(nullint=allocVector(INTSXP, 0)); protecti++;
666       setAttrib(dt, R_RowNamesSymbol, nullint);  // i.e. .set_row_names(0)
667     }
668   }
669   UNPROTECT(protecti);
670   return(dt);  // needed for `*tmp*` mechanism (when := isn't used), and to return the new object after a := for compound syntax.
671 }
672 
673 #define MSGSIZE 1000
674 static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects
675 
memrecycle(const SEXP target,const SEXP where,const int start,const int len,SEXP source,const int sourceStart,const int sourceLen,const int colnum,const char * colname)676 const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname)
677 // like memcpy but recycles single-item source
678 // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer()
679 // assigns to target[start:start+len-1] or target[where[start:start+len-1]] where start is 0-based
680 // if sourceLen==-1 then all of source is used (if it is 1 item then it is recycled, or its length must match) for convenience to avoid
681 //   having to use length(source) (repeating source expression) in each call
682 // sourceLen==1 is used in dogroups to recycle the group values into ans to match the nrow of each group's result; sourceStart is set to each group value row.
683 {
684   if (len<1) return NULL;
685   const int slen = sourceLen>=0 ? sourceLen : length(source);
686   if (slen==0) return NULL;
687   if (sourceStart<0 || sourceStart+slen>length(source))
688     error(_("Internal error memrecycle: sourceStart=%d sourceLen=%d length(source)=%d"), sourceStart, sourceLen, length(source)); // # nocov
689   if (!length(where) && start+len>length(target))
690     error(_("Internal error memrecycle: start=%d len=%d length(target)=%d"), start, len, length(target)); // # nocov
691   const int soff = sourceStart;
692   if (slen>1 && slen!=len && (!isNewList(target) || isNewList(source)))
693     error(_("Internal error: recycle length error not caught earlier. slen=%d len=%d"), slen, len); // # nocov
694   // Internal error because the column has already been added to the DT, so length mismatch should have been caught before adding the column.
695   // for 5647 this used to limit slen to len, but no longer
696   if (colname==NULL)
697     error(_("Internal error: memrecycle has received NULL colname")); // # nocov
698   *memrecycle_message = '\0';
699   int protecti=0;
700   const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target);
701   const bool sourceIsI64=isReal(source) && Rinherits(source, char_integer64);
702   const bool targetIsI64=isReal(target) && Rinherits(target, char_integer64);
703   if (sourceIsFactor || targetIsFactor) {
704     if (!targetIsFactor) {
705       if (!isString(target) && !isNewList(target))
706         error(_("Cannot assign 'factor' to '%s'. Factors can only be assigned to factor, character or list columns."), type2char(TYPEOF(target)));
707       // else assigning factor to character is left to later below, avoiding wasteful asCharacterFactor
708     } else if (!sourceIsFactor && !isString(source)) {
709       // target is factor
710       if (allNA(source, false)) {  // return false for list and other types that allNA does not support
711         source = ScalarLogical(NA_LOGICAL); // a global constant in R and won't allocate; fall through to regular zero-copy coerce
712       } else if (isInteger(source) || isReal(source)) {
713         // allow assigning level numbers to factor columns; test 425, 426, 429 and 1945
714         const int nlevel = length(getAttrib(target, R_LevelsSymbol));
715         if (isInteger(source)) {
716           const int *sd = INTEGER(source);
717           for (int i=0; i<slen; ++i) {
718             const int val = sd[i+soff];
719             if ((val<1 && val!=NA_INTEGER) || val>nlevel) {
720               error(_("Assigning factor numbers to column %d named '%s'. But %d is outside the level range [1,%d]"), colnum, colname, val, nlevel);
721             }
722           }
723         } else {
724           const double *sd = REAL(source);
725           for (int i=0; i<slen; ++i) {
726             const double val = sd[i+soff];
727             if (!ISNAN(val) && (!R_FINITE(val) || val!=(int)val || (int)val<1 || (int)val>nlevel)) {
728               error(_("Assigning factor numbers to column %d named '%s'. But %f is outside the level range [1,%d], or is not a whole number."), colnum, colname, val, nlevel);
729             }
730           }
731         }
732         // Now just let the valid level numbers fall through to regular assign by BODY below
733       } else {
734         error(_("Cannot assign '%s' to 'factor'. Factor columns can be assigned factor, character, NA in any type, or level numbers."), type2char(TYPEOF(source)));
735       }
736     } else {
737       // either factor or character being assigned to factor column
738       SEXP targetLevels = PROTECT(getAttrib(target, R_LevelsSymbol)); protecti++;
739       SEXP sourceLevels = source;  // character source
740       if (sourceIsFactor) { sourceLevels=PROTECT(getAttrib(source, R_LevelsSymbol)); protecti++; }
741       if (!sourceIsFactor || !R_compute_identical(sourceLevels, targetLevels, 0)) {  // !sourceIsFactor for test 2115.6
742         const int nTargetLevels=length(targetLevels), nSourceLevels=length(sourceLevels);
743         const SEXP *targetLevelsD=STRING_PTR(targetLevels), *sourceLevelsD=STRING_PTR(sourceLevels);
744         SEXP newSource = PROTECT(allocVector(INTSXP, length(source))); protecti++;
745         savetl_init();
746         for (int k=0; k<nTargetLevels; ++k) {
747           const SEXP s = targetLevelsD[k];
748           const int tl = TRUELENGTH(s);
749           if (tl>0) {
750             savetl(s);
751           } else if (tl<0) {
752             // # nocov start
753             for (int j=0; j<k; ++j) SET_TRUELENGTH(s, 0);  // wipe our negative usage and restore 0
754             savetl_end();                                  // then restore R's own usage (if any)
755             error(_("Internal error: levels of target are either not unique or have truelength<0"));
756             // # nocov end
757           }
758           SET_TRUELENGTH(s, -k-1);
759         }
760         int nAdd = 0;
761         for (int k=0; k<nSourceLevels; ++k) {
762           const SEXP s = sourceLevelsD[k];
763           const int tl = TRUELENGTH(s);
764           if (tl>=0) {
765             if (!sourceIsFactor && s==NA_STRING) continue; // don't create NA factor level when assigning character to factor; test 2117
766             if (tl>0) savetl(s);
767             SET_TRUELENGTH(s, -nTargetLevels-(++nAdd));
768           } // else, when sourceIsString, it's normal for there to be duplicates here
769         }
770         const int nSource = length(source);
771         int *newSourceD = INTEGER(newSource);
772         if (sourceIsFactor) {
773           const int *sourceD = INTEGER(source);
774           for (int i=0; i<nSource; ++i) {  // convert source integers to refer to target levels
775             const int val = sourceD[i];
776             newSourceD[i] = val==NA_INTEGER ? NA_INTEGER : -TRUELENGTH(sourceLevelsD[val-1]); // retains NA factor levels here via TL(NA_STRING); e.g. ordered factor
777           }
778         } else {
779           const SEXP *sourceD = STRING_PTR(source);
780           for (int i=0; i<nSource; ++i) {  // convert source integers to refer to target levels
781             const SEXP val = sourceD[i];
782             newSourceD[i] = val==NA_STRING ? NA_INTEGER : -TRUELENGTH(val);
783           }
784         }
785         source = newSource;
786         for (int k=0; k<nTargetLevels; ++k) SET_TRUELENGTH(targetLevelsD[k], 0);  // don't need those anymore
787         if (nAdd) {
788           // cannot grow the levels yet as that would be R call which could fail to alloc and we have no hook to clear up
789           SEXP *temp = (SEXP *)malloc(nAdd * sizeof(SEXP *));
790           if (!temp) {
791             // # nocov start
792             for (int k=0; k<nSourceLevels; ++k) SET_TRUELENGTH(sourceLevelsD[k], 0);
793             savetl_end();
794             error(_("Unable to allocate working memory of %d bytes to combine factor levels"), nAdd*sizeof(SEXP *));
795             // # nocov end
796           }
797           for (int k=0, thisAdd=0; thisAdd<nAdd; ++k) {   // thisAdd<nAdd to stop early when the added ones are all reached
798             SEXP s = sourceLevelsD[k];
799             int tl = TRUELENGTH(s);
800             if (tl) {  // tl negative here
801               if (tl != -nTargetLevels-thisAdd-1) error(_("Internal error: extra level check sum failed")); // # nocov
802               temp[thisAdd++] = s;
803               SET_TRUELENGTH(s,0);
804             }
805           }
806           savetl_end();
807           setAttrib(target, R_LevelsSymbol, targetLevels=growVector(targetLevels, nTargetLevels + nAdd));
808           for (int k=0; k<nAdd; ++k) {
809             SET_STRING_ELT(targetLevels, nTargetLevels+k, temp[k]);
810           }
811           free(temp);
812         } else {
813           // all source levels were already in target levels, but not with the same integers; we're done
814           savetl_end();
815         }
816         // now continue, but with the mapped integers in the (new) source
817       }
818     }
819   } else if (isString(source) && !isString(target) && !isNewList(target)) {
820     warning(_("Coercing 'character' RHS to '%s' to match the type of the target column (column %d named '%s')."),
821             type2char(TYPEOF(target)), colnum, colname);
822     // this "Coercing ..." warning first to give context in case coerceVector warns 'NAs introduced by coercion'
823     source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++;
824   } else if (isNewList(source) && !isNewList(target)) {
825     if (targetIsI64) {
826       error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of the target column (column %d named '%s')."), colnum, colname);
827       // because R's coerceVector doesn't know about integer64
828     }
829     // as in base R; e.g. let as.double(list(1,2,3)) work but not as.double(list(1,c(2,4),3))
830     // relied on by NNS, simstudy and table.express; tests 1294.*
831     warning(_("Coercing 'list' RHS to '%s' to match the type of the target column (column %d named '%s')."),
832             type2char(TYPEOF(target)), colnum, colname);
833     source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++;
834   } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) {
835     if (GetVerbose()) {
836       // only take the (small) cost of GetVerbose() (search of options() list) when types don't match
837       Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n"),
838               sourceIsI64 ? "integer64" : type2char(TYPEOF(source)),
839               targetIsI64 ? "integer64" : type2char(TYPEOF(target)),
840               colnum, colname);
841     }
842     // The following checks are up front here, otherwise we'd need them twice in the two branches
843     //   inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future.
844     // The idea is to do these range checks without calling coerceVector() (which allocates)
845 
846 #define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO) {{                                                                      \
847   const STYPE *sd = (const STYPE *)RFUN(source);                                                                        \
848   for (int i=0; i<slen; ++i) {                                                                                          \
849     const STYPE val = sd[i+soff];                                                                                       \
850     if (COND) {                                                                                                         \
851       const char *sType = sourceIsI64 ? "integer64" : type2char(TYPEOF(source));                                        \
852       const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target));                                        \
853       int n = snprintf(memrecycle_message, MSGSIZE,                                                                     \
854             "%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s'", val, sType, i+1, tType);          \
855       if (colnum>0 && n>0 && n<MSGSIZE)                                                                                 \
856         snprintf(memrecycle_message+n, MSGSIZE-n, " (column %d named '%s')", colnum, colname);                          \
857       /* string returned so that rbindlist/dogroups can prefix it with which item of its list this refers to  */        \
858       break;                                                                                                            \
859     }                                                                                                                   \
860   }                                                                                                                     \
861 } break; }
862 
863     switch(TYPEOF(target)) {
864     case LGLSXP:
865       switch (TYPEOF(source)) {
866       case RAWSXP:  CHECK_RANGE(Rbyte, RAW,    val!=0 && val!=1,                                        "d",    "taken as TRUE")
867       case INTSXP:  CHECK_RANGE(int, INTEGER,  val!=0 && val!=1 && val!=NA_INTEGER,                     "d",    "taken as TRUE")
868       case REALSXP: if (sourceIsI64)
869                     CHECK_RANGE(int64_t, REAL, val!=0 && val!=1 && val!=NA_INTEGER64,                   PRId64, "taken as TRUE")
870               else  CHECK_RANGE(double, REAL,  !ISNAN(val) && val!=0.0 && val!=1.0,                     "f",    "taken as TRUE")
871       } break;
872     case RAWSXP:
873       switch (TYPEOF(source)) {
874       case INTSXP:  CHECK_RANGE(int, INTEGER,  val<0 || val>255,                                        "d",    "taken as 0")
875       case REALSXP: if (sourceIsI64)
876                     CHECK_RANGE(int64_t, REAL, val<0 || val>255,                                        PRId64, "taken as 0")
877               else  CHECK_RANGE(double, REAL,  !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, "f",    "either truncated (precision lost) or taken as 0")
878       } break;
879     case INTSXP:
880       if (TYPEOF(source)==REALSXP) {
881         if (sourceIsI64)
882                     CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX),   PRId64,  "out-of-range (NA)")
883         else        CHECK_RANGE(double, REAL,  !ISNAN(val) && (!R_FINITE(val) || (int)val!=val),        "f",     "truncated (precision lost)")
884       } break;
885     case REALSXP:
886       if (targetIsI64 && isReal(source) && !sourceIsI64) {
887                     CHECK_RANGE(double, REAL,  !ISNAN(val) && (!R_FINITE(val) || (int)val!=val),        "f",     "truncated (precision lost)")
888       }
889     }
890   }
891 
892 #undef BODY
893 #define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) {{    \
894   const STYPE *sd = (const STYPE *)RFUN(source);     \
895   if (length(where)) {                               \
896     if (slen==1) {                                   \
897       const STYPE val = sd[soff];                    \
898       const CTYPE cval = CAST;                       \
899       for (int wi=0; wi<len; ++wi) {                 \
900         const int w = wd[wi];                        \
901         if (w<1) continue; /*0 or NA*/               \
902         const int i = w-1;                           \
903         ASSIGN;                                      \
904       }                                              \
905     } else {                                         \
906       for (int wi=0; wi<len; ++wi) {                 \
907         const int w = wd[wi];                        \
908         if (w<1) continue;                           \
909         const STYPE val = sd[wi+soff];               \
910         const CTYPE cval = CAST;                     \
911         const int i = w-1;                           \
912         ASSIGN;                                      \
913       }                                              \
914     }                                                \
915   } else {                                           \
916     if (slen==1) {                                   \
917       const STYPE val = sd[soff];                    \
918       const CTYPE cval = CAST;                       \
919       for (int i=0; i<len; ++i) {                    \
920         ASSIGN;                                      \
921       }                                              \
922     } else {                                         \
923       for (int i=0; i<len; i++) {                    \
924         const STYPE val = sd[i+soff];                \
925         const CTYPE cval = CAST;                     \
926         ASSIGN;                                      \
927       }                                              \
928     }                                                \
929   }                                                  \
930 } break; }
931 
932 #define COERCE_ERROR(targetType) error(_("type '%s' cannot be coerced to '%s'"), type2char(TYPEOF(source)), targetType); // 'targetType' for integer64 vs double
933 
934   const int off = length(where) ? 0 : start;  // off = target offset; e.g. called from rbindlist with where=R_NilValue and start!=0
935   const bool mc = length(where)==0 && slen>0 && slen==len && soff==0;  // mc=memcpy; only if types match and not for single items (a single assign faster than these non-const memcpy calls)
936   const int *wd = length(where) ? INTEGER(where)+start : NULL;
937   switch (TYPEOF(target)) {
938   case RAWSXP: {
939     Rbyte *td = RAW(target) + off;
940     switch (TYPEOF(source)) {
941     case RAWSXP:
942       if (mc) {
943                     memcpy(td, RAW(source), slen*sizeof(Rbyte)); break;
944       } else        BODY(Rbyte, RAW,    Rbyte, val,                                     td[i]=cval)
945     case LGLSXP:    BODY(int, LOGICAL,  Rbyte, val==1,                                  td[i]=cval)
946     case INTSXP:    BODY(int, INTEGER,  Rbyte, (val>255 || val<0) ? 0 : val,            td[i]=cval)
947     case REALSXP:
948       if (sourceIsI64)
949                     BODY(int64_t, REAL, Rbyte, (val>255 || val<0) ? 0 : val,            td[i]=cval)
950       else          BODY(double, REAL,  Rbyte, (ISNAN(val)||val>255||val<0) ? 0 : val,  td[i]=cval)
951     default:        COERCE_ERROR("raw");
952     }
953   } break;
954   case LGLSXP: {
955     int *td = LOGICAL(target) + off;
956     switch (TYPEOF(source)) {
957     case RAWSXP:    BODY(Rbyte, RAW,    int, val!=0,                                    td[i]=cval)
958     case LGLSXP:
959       if (mc) {
960                     memcpy(td, LOGICAL(source), slen*sizeof(Rboolean)); break;
961       } else        BODY(int, LOGICAL,  int, val,                                       td[i]=cval)
962     case INTSXP:    BODY(int, INTEGER,  int, val==NA_INTEGER ? NA_LOGICAL : val!=0,     td[i]=cval)
963     case REALSXP:
964       if (sourceIsI64)
965                     BODY(int64_t, REAL, int, val==NA_INTEGER64 ? NA_LOGICAL : val!=0,   td[i]=cval)
966       else          BODY(double,  REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0,        td[i]=cval)
967     default:        COERCE_ERROR("logical");
968     }
969   } break;
970   case INTSXP : {
971     int *td = INTEGER(target) + off;
972     switch (TYPEOF(source)) {
973     case  RAWSXP:   BODY(Rbyte, RAW,    int, (int)val,                                  td[i]=cval)
974     case  LGLSXP:   // same as INTSXP ...
975     case  INTSXP:
976       if (mc) {
977                     memcpy(td, INTEGER(source), slen*sizeof(int)); break;
978       } else        BODY(int, INTEGER,  int, val,                                       td[i]=cval)
979     case REALSXP:
980       if (sourceIsI64)
981                     BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val,  td[i]=cval)
982       else          BODY(double, REAL,  int, ISNAN(val) ? NA_INTEGER : (int)val,        td[i]=cval)
983     default:        COERCE_ERROR("integer"); // test 2005.4
984     }
985   } break;
986   case REALSXP : {
987     if (targetIsI64) {
988       int64_t *td = (int64_t *)REAL(target) + off;
989       switch (TYPEOF(source)) {
990       case RAWSXP:  BODY(Rbyte, RAW,    int64_t, (int64_t)val,                          td[i]=cval)
991       case LGLSXP:  // same as INTSXP
992       case INTSXP:  BODY(int, INTEGER,  int64_t, val==NA_INTEGER ? NA_INTEGER64 : val,  td[i]=cval)
993       case REALSXP:
994         if (sourceIsI64) {
995           if (mc) {
996                     memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break;
997           } else    BODY(int64_t, REAL, int64_t, val,                                   td[i]=cval)
998         } else      BODY(double, REAL,  int64_t, R_FINITE(val) ? val : NA_INTEGER64,    td[i]=cval)
999       default:      COERCE_ERROR("integer64");
1000       }
1001     } else {
1002       double *td = REAL(target) + off;
1003       switch (TYPEOF(source)) {
1004       case  RAWSXP: BODY(Rbyte, RAW,    double, (double)val,                            td[i]=cval)
1005       case  LGLSXP: // same as INTSXP
1006       case  INTSXP: BODY(int, INTEGER,  double, val==NA_INTEGER ? NA_REAL : val,        td[i]=cval)
1007       case REALSXP:
1008         if (!sourceIsI64) {
1009           if (mc) {
1010                     memcpy(td, (double *)REAL(source), slen*sizeof(double)); break;
1011           } else    BODY(double, REAL,  double, val,                                    td[i]=cval)
1012         } else      BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val,      td[i]=cval)
1013       default:      COERCE_ERROR("double");
1014       }
1015     }
1016   } break;
1017   case CPLXSXP: {
1018     Rcomplex *td = COMPLEX(target) + off;
1019     double im = 0.0;
1020     switch (TYPEOF(source)) {
1021     case  RAWSXP:   BODY(Rbyte, RAW,    double, (im=0.0,val),                                         td[i].r=cval;td[i].i=im)
1022     case  LGLSXP:   // same as INTSXP
1023     case  INTSXP:   BODY(int, INTEGER,  double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val),    td[i].r=cval;td[i].i=im)
1024     case REALSXP:
1025       if (sourceIsI64)
1026                     BODY(int64_t, REAL, double, val==NA_INTEGER64?(im=NA_REAL,NA_REAL):(im=0.0,val),  td[i].r=cval;td[i].i=im)
1027       else          BODY(double,  REAL, double, ISNAN(val)?(im=NA_REAL,NA_REAL):(im=0.0,val),         td[i].r=cval;td[i].i=im)
1028     case CPLXSXP:
1029       if (mc) {
1030                     memcpy(td, COMPLEX(source), slen*sizeof(Rcomplex)); break;
1031       } else        BODY(Rcomplex, COMPLEX, Rcomplex, val,                                            td[i]=cval)
1032     default:        COERCE_ERROR("complex");
1033     }
1034   } break;
1035   case STRSXP :
1036     if (sourceIsFactor) {
1037       const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++;
1038       BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1],  SET_STRING_ELT(target, off+i, cval))
1039     } else {
1040       if (!isString(source)) {
1041         if (allNA(source, true)) {  // saves common coercion of NA (logical) to NA_character_
1042           //              ^^ =errorForBadType; if type list, that was already an error earlier so we
1043           //                 want to be strict now otherwise list would get to coerceVector below
1044           if (length(where)) {
1045             for (int i=0; i<len; ++i) if (wd[i]>0) SET_STRING_ELT(target, wd[i]-1, NA_STRING);
1046           } else {
1047             for (int i=0; i<len; ++i) SET_STRING_ELT(target, start+i, NA_STRING);
1048           }
1049           break;
1050         }
1051         if (sourceIsI64)
1052           error(_("To assign integer64 to a character column, please use as.character() for clarity."));
1053         source = PROTECT(coerceVector(source, STRSXP)); protecti++;
1054       }
1055       BODY(SEXP, STRING_PTR, SEXP, val,  SET_STRING_ELT(target, off+i, cval))
1056     }
1057   case VECSXP :
1058   case EXPRSXP :  // #546
1059     if (TYPEOF(source)!=VECSXP && TYPEOF(source)!=EXPRSXP)
1060       BODY(SEXP, &, SEXP, val,           SET_VECTOR_ELT(target, off+i, cval))
1061     else
1062       BODY(SEXP, SEXPPTR_RO, SEXP, val,  SET_VECTOR_ELT(target, off+i, cval))
1063   default :
1064     error(_("Unsupported column type in assign.c:memrecycle '%s'"), type2char(TYPEOF(target)));  // # nocov
1065   }
1066   UNPROTECT(protecti);
1067   return memrecycle_message[0] ? memrecycle_message : NULL;
1068 }
1069 
writeNA(SEXP v,const int from,const int n)1070 void writeNA(SEXP v, const int from, const int n)
1071 // e.g. for use after allocVector() which does not initialize its result.
1072 {
1073   const int to = from-1+n;  // writing to position 2147483647 in mind, 'i<=to' in loop conditions
1074   switch(TYPEOF(v)) {
1075   case RAWSXP:
1076     memset(RAW(v)+from, 0, n*sizeof(Rbyte));
1077     break;
1078   case LGLSXP: {
1079     Rboolean *vd = (Rboolean *)LOGICAL(v);
1080     for (int i=from; i<=to; ++i) vd[i] = NA_LOGICAL;
1081   } break;
1082   case INTSXP: {
1083     // same whether factor or not
1084     int *vd = INTEGER(v);
1085     for (int i=from; i<=to; ++i) vd[i] = NA_INTEGER;
1086   } break;
1087   case REALSXP: {
1088     if (Rinherits(v, char_integer64)) {  // Rinherits covers nanotime too which inherits from integer64 via S4 extends
1089       int64_t *vd = (int64_t *)REAL(v);
1090       for (int i=from; i<=to; ++i) vd[i] = NA_INTEGER64;
1091     } else {
1092       double *vd = REAL(v);
1093       for (int i=from; i<=to; ++i) vd[i] = NA_REAL;
1094     }
1095   } break;
1096   case CPLXSXP: {
1097     Rcomplex *vd = COMPLEX(v);
1098     for (int i=from; i<=to; ++i) vd[i] = NA_CPLX;
1099   } break;
1100   case STRSXP:
1101     // character columns are initialized with blank string (""). So replace the all-"" with all-NA_character_
1102     // Since "" and NA_character_ are global constants in R, it should be ok to not use SET_STRING_ELT here. But use it anyway for safety (revisit if proved slow)
1103     // If there's ever a way added to R API to pass NA_STRING to allocVector() to tell it to initialize with NA not "", would be great
1104     for (int i=from; i<=to; ++i) SET_STRING_ELT(v, i, NA_STRING);
1105     break;
1106   case VECSXP: case EXPRSXP :
1107     // although allocVector already initializes to R_NilValue, we use writeNA() in other places too, so we shouldn't skip this assign
1108     for (int i=from; i<=to; ++i) SET_VECTOR_ELT(v, i, R_NilValue);
1109     break;
1110   default :
1111     error(_("Internal error: writeNA passed a vector of type '%s'"), type2char(TYPEOF(v)));  // # nocov
1112   }
1113 }
1114 
allocNAVector(SEXPTYPE type,R_len_t n)1115 SEXP allocNAVector(SEXPTYPE type, R_len_t n)
1116 {
1117   // an allocVector following with initialization to NA since a subassign to a new column using :=
1118   // routinely leaves untouched items (rather than 0 or "" as allocVector does with its memset)
1119   // We guess that author of allocVector would have liked to initialize with NA but was prevented since memset
1120   // is restricted to one byte.
1121   SEXP v = PROTECT(allocVector(type, n));
1122   writeNA(v, 0, n);
1123   UNPROTECT(1);
1124   return(v);
1125 }
1126 
allocNAVectorLike(SEXP x,R_len_t n)1127 SEXP allocNAVectorLike(SEXP x, R_len_t n) {
1128   // writeNA needs the attribute retained to write NA_INTEGER64, #3723
1129   // TODO: remove allocNAVector above when usage in fastmean.c, fcast.c and fmelt.c can be adjusted; see comments in PR3724
1130   SEXP v = PROTECT(allocVector(TYPEOF(x), n));
1131   copyMostAttrib(x, v);
1132   writeNA(v, 0, n);
1133   UNPROTECT(1);
1134   return(v);
1135 }
1136 
1137 static SEXP *saveds=NULL;
1138 static R_len_t *savedtl=NULL, nalloc=0, nsaved=0;
1139 
savetl_init()1140 void savetl_init() {
1141   if (nsaved || nalloc || saveds || savedtl) {
1142     error(_("Internal error: savetl_init checks failed (%d %d %p %p). please report to data.table issue tracker."), nsaved, nalloc, saveds, savedtl); // # nocov
1143   }
1144   nsaved = 0;
1145   nalloc = 100;
1146   saveds = (SEXP *)malloc(nalloc * sizeof(SEXP));
1147   savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t));
1148   if (saveds==NULL || savedtl==NULL) {
1149     savetl_end();                                                        // # nocov
1150     error(_("Failed to allocate initial %d items in savetl_init"), nalloc); // # nocov
1151   }
1152 }
1153 
savetl(SEXP s)1154 void savetl(SEXP s)
1155 {
1156   if (nsaved==nalloc) {
1157     if (nalloc==INT_MAX) {
1158       savetl_end();                                                                                                     // # nocov
1159       error(_("Internal error: reached maximum %d items for savetl. Please report to data.table issue tracker."), nalloc); // # nocov
1160     }
1161     nalloc = nalloc>(INT_MAX/2) ? INT_MAX : nalloc*2;
1162     char *tmp = (char *)realloc(saveds, nalloc*sizeof(SEXP));
1163     if (tmp==NULL) {
1164       // C spec states that if realloc() fails the original block is left untouched; it is not freed or moved. We rely on that here.
1165       savetl_end();                                                      // # nocov  free(saveds) happens inside savetl_end
1166       error(_("Failed to realloc saveds to %d items in savetl"), nalloc);   // # nocov
1167     }
1168     saveds = (SEXP *)tmp;
1169     tmp = (char *)realloc(savedtl, nalloc*sizeof(R_len_t));
1170     if (tmp==NULL) {
1171       savetl_end();                                                      // # nocov
1172       error(_("Failed to realloc savedtl to %d items in savetl"), nalloc);  // # nocov
1173     }
1174     savedtl = (R_len_t *)tmp;
1175   }
1176   saveds[nsaved] = s;
1177   savedtl[nsaved] = TRUELENGTH(s);
1178   nsaved++;
1179 }
1180 
savetl_end()1181 void savetl_end() {
1182   // Can get called if nothing has been saved yet (nsaved==0), or even if _init() hasn't been called yet (pointers NULL). Such
1183   // as to clear up before error. Also, it might be that nothing needed to be saved anyway.
1184   for (int i=0; i<nsaved; i++) SET_TRUELENGTH(saveds[i],savedtl[i]);
1185   free(saveds);  // possible free(NULL) which is safe no-op
1186   saveds = NULL;
1187   free(savedtl);
1188   savedtl = NULL;
1189   nsaved = nalloc = 0;
1190 }
1191 
setcharvec(SEXP x,SEXP which,SEXP newx)1192 SEXP setcharvec(SEXP x, SEXP which, SEXP newx)
1193 {
1194   int w;
1195   if (!isString(x)) error(_("x must be a character vector"));
1196   if (!isInteger(which)) error(_("'which' must be an integer vector"));
1197   if (!isString(newx)) error(_("'new' must be a character vector"));
1198   if (LENGTH(newx)!=LENGTH(which)) error(_("'new' is length %d. Should be the same as length of 'which' (%d)"),LENGTH(newx),LENGTH(which));
1199   for (int i=0; i<LENGTH(which); i++) {
1200     w = INTEGER(which)[i];
1201     if (w==NA_INTEGER || w<1 || w>LENGTH(x)) error(_("Item %d of 'which' is %d which is outside range of the length %d character vector"), i+1,w,LENGTH(x));
1202     SET_STRING_ELT(x, w-1, STRING_ELT(newx, i));
1203   }
1204   return R_NilValue;
1205 }
1206 
1207