1 #include "data.table.h"
2
finalizer(SEXP p)3 static void finalizer(SEXP p)
4 {
5 SEXP x;
6 R_len_t n, l, tl;
7 if(!R_ExternalPtrAddr(p)) error(_("Internal error: finalizer hasn't received an ExternalPtr")); // # nocov
8 p = R_ExternalPtrTag(p);
9 if (!isString(p)) error(_("Internal error: finalizer's ExternalPtr doesn't see names in tag")); // # nocov
10 l = LENGTH(p);
11 tl = TRUELENGTH(p);
12 if (l<0 || tl<l) error(_("Internal error: finalizer sees l=%d, tl=%d"),l,tl); // # nocov
13 n = tl-l;
14 if (n==0) {
15 // gc's ReleaseLargeFreeVectors() will have reduced R_LargeVallocSize by the correct amount
16 // already, so nothing to do (but almost never the case).
17 return;
18 }
19 x = PROTECT(allocVector(INTSXP, 50)); // 50 so it's big enough to be on LargeVector heap. See NodeClassSize in memory.c:allocVector
20 // INTSXP rather than VECSXP so that GC doesn't inspect contents after LENGTH (thanks to Karl Miller, Jul 2015)
21 SETLENGTH(x,50+n*2*sizeof(void *)/4); // 1*n for the names, 1*n for the VECSXP itself (both are over allocated).
22 UNPROTECT(1);
23 return;
24 }
25
setselfref(SEXP x)26 void setselfref(SEXP x) {
27 SEXP p;
28 // Store pointer to itself so we can detect if the object has been copied. See
29 // ?copy for why copies are not just inefficient but cause a problem for over-allocated data.tables.
30 // Called from C only, not R level, so returns void.
31 setAttrib(x, SelfRefSymbol, p=R_MakeExternalPtr(
32 R_NilValue, // for identical() to return TRUE. identical() doesn't look at tag and prot
33 PROTECT(getAttrib(x, R_NamesSymbol)), // to detect if names has been replaced and its tl lost, e.g. setattr(DT,"names",...)
34 PROTECT(R_MakeExternalPtr( // to avoid an infinite loop in object.size(), if prot=x here
35 x, // to know if this data.table has been copied by key<-, attr<-, names<-, etc.
36 R_NilValue, // this tag and prot currently unused
37 R_NilValue
38 ))
39 ));
40 R_RegisterCFinalizerEx(p, finalizer, FALSE);
41 UNPROTECT(2);
42
43 /*
44 * base::identical doesn't check prot and tag of EXTPTR, just that the ptr itself is the
45 same in both objects. R_NilValue is always equal to R_NilValue. R_NilValue is a memory
46 location constant within an R session, but can vary from session to session. So, it
47 looks like a pointer to a user looking at attributes(DT), but they might wonder how it
48 works if they realise the selfref of all data.tables all point to the same address (rather
49 than to the table itself which would be reasonable to expect given the attribute's name).
50 * p=NULL rather than R_NilValue works too, other than we need something other than NULL
51 so we can detect tables loaded from disk (which set p to NULL, see 5.13 of R-exts).
52 * x is wrapped in another EXTPTR because of object.size (called by tables(), and by users).
53 If the prot (or tag) was x directly it sends object.size into an infinite loop and then
54 "segfault from C stack overflow" (object.size does count tag and prot, unlike identical,
55 but doesn't count what's pointed to).
56 * Could use weak reference possibly, but the fact that they can get be set to R_NilValue
57 by gc (?) didn't seem appropriate.
58 * If the .internal.selfref attribute is removed (e.g. by user code), nothing will break, but
59 an extra copy will just be taken on next :=, with warning, with a new selfref.
60 * object.size will count size of names twice, but that's ok as only small.
61 * Thanks to Steve L for suggesting ExtPtr for this, rather than the previous REALSXP
62 vector which required data.table to do a show/hide dance in a masked identical.
63 */
64 }
65
66 /* There are two reasons the finalizer doesn't restore the LENGTH to TRUELENGTH. i) The finalizer
67 happens too late after GC has already released the memory, and ii) copies by base R (e.g.
68 [<- in write.table just before test 894) allocate at length LENGTH but copy the TRUELENGTH over.
69 If the finalizer sets LENGTH to TRUELENGTH, that's a fail as it wasn't really allocated at
70 TRUELENGTH when R did the copy.
71 Karl Miller suggested an ENVSXP so that restoring LENGTH in finalizer should work. This is the
72 closest I got to getting it to pass all tests :
73
74 SEXP env = PROTECT(allocSExp(ENVSXP));
75 defineVar(SelfRefSymbol, x, env);
76 defineVar(R_NamesSymbol, getAttrib(x, R_NamesSymbol), env);
77 setAttrib(x, SelfRefSymbol, p = R_MakeExternalPtr(
78 R_NilValue, // for identical() to return TRUE. identical() doesn't look at tag and prot
79 R_NilValue, //getAttrib(x, R_NamesSymbol), // to detect if names has been replaced and its tl lost, e.g. setattr(DT,"names",...)
80 PROTECT( // needed when --enable-strict-barrier it seems, iiuc. TO DO: test under that flag and remove if not needed.
81 env // wrap x in env to avoid an infinite loop in object.size() if prot=x were here
82 )
83 ));
84 R_RegisterCFinalizerEx(p, finalizer, FALSE);
85 UNPROTECT(2);
86
87 Then in finalizer:
88 SETLENGTH(names, tl)
89 SETLENGTH(dt, tl)
90
91 and that finalizer indeed now happens before the GC releases memory (thanks to the env wrapper).
92
93 However, we still have problem (ii) above and it didn't pass tests involving base R copies.
94
95 We really need R itself to start setting TRUELENGTH to be the allocated length and then
96 for GC to release TRUELENGTH not LENGTH. Would really tidy this up.
97
98 Moved out of ?setkey Details section in 1.12.2 (Mar 2019). Revisit this w.r.t. to recent versions of R.
99 The problem (for \code{data.table}) with the copy by \code{key<-} (other than
100 being slower) is that \R doesn't maintain the over-allocated truelength, but it
101 looks as though it has. Adding a column by reference using \code{:=} after a
102 \code{key<-} was therefore a memory overwrite and eventually a segfault; the
103 over-allocated memory wasn't really there after \code{key<-}'s copy. \code{data.table}s now have an attribute \code{.internal.selfref} to catch and warn about such copies.
104 This attribute has been implemented in a way that is friendly with
105 \code{identical()} and \code{object.size()}.
106 */
107
_selfrefok(SEXP x,Rboolean checkNames,Rboolean verbose)108 static int _selfrefok(SEXP x, Rboolean checkNames, Rboolean verbose) {
109 SEXP v, p, tag, prot, names;
110 v = getAttrib(x, SelfRefSymbol);
111 if (v==R_NilValue || TYPEOF(v)!=EXTPTRSXP) {
112 // .internal.selfref missing is expected and normal for i) a pre v1.7.8 data.table loaded
113 // from disk, and ii) every time a new data.table is over-allocated for the first time.
114 // Not being an extptr is for when users contruct a data.table via structure() using dput, post
115 // a question, and find the extptr doesn't parse so put quotes around it (for example).
116 // In both cases the selfref is not ok.
117 return 0;
118 }
119 p = R_ExternalPtrAddr(v);
120 if (p==NULL) {
121 if (verbose) Rprintf(_(".internal.selfref ptr is NULL. This is expected and normal for a data.table loaded from disk. Please remember to always setDT() immediately after loading to prevent unexpected behavior. If this table was not loaded from disk or you've already run setDT(), please report to data.table issue tracker.\n"));
122 return -1;
123 }
124 if (!isNull(p)) error(_("Internal error: .internal.selfref ptr is not NULL or R_NilValue")); // # nocov
125 tag = R_ExternalPtrTag(v);
126 if (!(isNull(tag) || isString(tag))) error(_("Internal error: .internal.selfref tag isn't NULL or a character vector")); // # nocov
127 names = getAttrib(x, R_NamesSymbol);
128 if (names!=tag && isString(names) && !ALTREP(names)) // !ALTREP for #4734
129 SET_TRUELENGTH(names, LENGTH(names));
130 // R copied this vector not data.table; it's not actually over-allocated. It looks over-allocated
131 // because R copies the original vector's tl over despite allocating length.
132 prot = R_ExternalPtrProtected(v);
133 if (TYPEOF(prot) != EXTPTRSXP) // Very rare. Was error(_(".internal.selfref prot is not itself an extptr")).
134 return 0; // # nocov ; see http://stackoverflow.com/questions/15342227/getting-a-random-internal-selfref-error-in-data-table-for-r
135 if (x!=R_ExternalPtrAddr(prot) && !ALTREP(x))
136 SET_TRUELENGTH(x, LENGTH(x)); // R copied this vector not data.table, it's not actually over-allocated
137 return checkNames ? names==tag : x==R_ExternalPtrAddr(prot);
138 }
139
selfrefok(SEXP x,Rboolean verbose)140 static Rboolean selfrefok(SEXP x, Rboolean verbose) { // for readability
141 return(_selfrefok(x, FALSE, verbose)==1);
142 }
selfrefnamesok(SEXP x,Rboolean verbose)143 static Rboolean selfrefnamesok(SEXP x, Rboolean verbose) {
144 return(_selfrefok(x, TRUE, verbose)==1);
145 }
146
shallow(SEXP dt,SEXP cols,R_len_t n)147 static SEXP shallow(SEXP dt, SEXP cols, R_len_t n)
148 {
149 // NEW: cols argument to specify the columns to shallow copy on. If NULL, all columns.
150 // called from alloccol where n is checked carefully, or from shallow() at R level
151 // where n is set to truelength (i.e. a shallow copy only with no size change)
152 R_len_t i,l;
153 int protecti=0;
154 SEXP newdt = PROTECT(allocVector(VECSXP, n)); protecti++; // to do, use growVector here?
155 SET_ATTRIB(newdt, shallow_duplicate(ATTRIB(dt)));
156 SET_OBJECT(newdt, OBJECT(dt));
157 IS_S4_OBJECT(dt) ? SET_S4_OBJECT(newdt) : UNSET_S4_OBJECT(newdt); // To support S4 objects that incude data.table
158 //SHALLOW_DUPLICATE_ATTRIB(newdt, dt); // SHALLOW_DUPLICATE_ATTRIB would be a bit neater but is only available from R 3.3.0
159
160 // TO DO: keepattr() would be faster, but can't because shallow isn't merely a shallow copy. It
161 // also increases truelength. Perhaps make that distinction, then, and split out, but marked
162 // so that the next change knows to duplicate.
163 // keepattr() also merely points to the entire attrbutes list and thus doesn't allow replacing
164 // some of its elements.
165
166 // We copy all attributes that refer to column names so that calling setnames on either
167 // the original or the shallow copy doesn't break anything.
168 SEXP index = PROTECT(getAttrib(dt, sym_index)); protecti++;
169 setAttrib(newdt, sym_index, shallow_duplicate(index));
170
171 SEXP sorted = PROTECT(getAttrib(dt, sym_sorted)); protecti++;
172 setAttrib(newdt, sym_sorted, duplicate(sorted));
173
174 SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++;
175 SEXP newnames = PROTECT(allocVector(STRSXP, n)); protecti++;
176 if (isNull(cols)) {
177 l = LENGTH(dt);
178 for (i=0; i<l; i++) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,i));
179 if (length(names)) {
180 if (length(names) < l) error(_("Internal error: length(names)>0 but <length(dt)")); // # nocov
181 for (i=0; i<l; i++) SET_STRING_ELT(newnames, i, STRING_ELT(names,i));
182 }
183 // else an unnamed data.table is valid e.g. unname(DT) done by ggplot2, and .SD may have its names cleared in dogroups, but shallow will always create names for data.table(NULL) which has 100 slots all empty so you can add to an empty data.table by reference ok.
184 } else {
185 l = length(cols);
186 for (i=0; i<l; i++) SET_VECTOR_ELT(newdt, i, VECTOR_ELT(dt,INTEGER(cols)[i]-1));
187 if (length(names)) {
188 // no need to check length(names) < l here. R-level checks if all value
189 // in 'cols' are valid - in the range of 1:length(names(x))
190 for (i=0; i<l; i++) SET_STRING_ELT( newnames, i, STRING_ELT(names,INTEGER(cols)[i]-1) );
191 }
192 }
193 setAttrib(newdt, R_NamesSymbol, newnames);
194 // setAttrib appears to change length and truelength, so need to do that first _then_ SET next,
195 // otherwise (if the SET were were first) the 100 tl is assigned to length.
196 SETLENGTH(newnames,l);
197 SET_TRUELENGTH(newnames,n);
198 SETLENGTH(newdt,l);
199 SET_TRUELENGTH(newdt,n);
200 setselfref(newdt);
201 UNPROTECT(protecti);
202 return(newdt);
203 }
204
alloccol(SEXP dt,R_len_t n,Rboolean verbose)205 SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose)
206 {
207 SEXP names, klass; // klass not class at request of pydatatable because class is reserved word in C++, PR #3129
208 R_len_t l, tl;
209 if (isNull(dt)) error(_("alloccol has been passed a NULL dt"));
210 if (TYPEOF(dt) != VECSXP) error(_("dt passed to alloccol isn't type VECSXP"));
211 klass = getAttrib(dt, R_ClassSymbol);
212 if (isNull(klass)) error(_("dt passed to alloccol has no class attribute. Please report result of traceback() to data.table issue tracker."));
213 l = LENGTH(dt);
214 names = getAttrib(dt,R_NamesSymbol);
215 // names may be NULL when null.data.table() passes list() to alloccol for example.
216 // So, careful to use length() on names, not LENGTH().
217 if (length(names)!=l) error(_("Internal error: length of names (%d) is not length of dt (%d)"),length(names),l); // # nocov
218 if (!selfrefok(dt,verbose))
219 return shallow(dt,R_NilValue,(n>l) ? n : l); // e.g. test 848 and 851 in R > 3.0.2
220 // added (n>l) ? ... for #970, see test 1481.
221 // TO DO: test realloc names if selfrefnamesok (users can setattr(x,"name") themselves for example.
222 // if (TRUELENGTH(getAttrib(dt,R_NamesSymbol))!=tl)
223 // error(_("Internal error: tl of dt passes checks, but tl of names (%d) != tl of dt (%d)"), tl, TRUELENGTH(getAttrib(dt,R_NamesSymbol))); // # nocov
224
225 tl = TRUELENGTH(dt);
226 // R <= 2.13.2 and we didn't catch uninitialized tl somehow
227 if (tl<0) error(_("Internal error, tl of class is marked but tl<0.")); // # nocov
228 if (tl>0 && tl<l) error(_("Internal error, please report (including result of sessionInfo()) to data.table issue tracker: tl (%d) < l (%d) but tl of class is marked."), tl, l); // # nocov
229 if (tl>l+10000) warning(_("tl (%d) is greater than 10,000 items over-allocated (l = %d). If you didn't set the datatable.alloccol option to be very large, please report to data.table issue tracker including the result of sessionInfo()."),tl,l);
230 if (n>tl) return(shallow(dt,R_NilValue,n)); // usual case (increasing alloc)
231 if (n<tl && verbose) Rprintf(_("Attempt to reduce allocation from %d to %d ignored. Can only increase allocation via shallow copy. Please do not use DT[...]<- or DT$someCol<-. Use := inside DT[...] instead."),tl,n);
232 // otherwise the finalizer can't clear up the Large Vector heap
233 return(dt);
234 }
235
checkOverAlloc(SEXP x)236 int checkOverAlloc(SEXP x)
237 {
238 if (isNull(x))
239 error(_("Has getOption('datatable.alloccol') somehow become unset? It should be a number, by default 1024."));
240 if (!isInteger(x) && !isReal(x))
241 error(_("getOption('datatable.alloccol') should be a number, by default 1024. But its type is '%s'."), type2char(TYPEOF(x)));
242 if (LENGTH(x) != 1)
243 error(_("getOption('datatable.alloc') is a numeric vector ok but its length is %d. Its length should be 1."), LENGTH(x));
244 int ans = isInteger(x) ? INTEGER(x)[0] : (int)REAL(x)[0];
245 if (ans<0)
246 error(_("getOption('datatable.alloc')==%d. It must be >=0 and not NA."), ans);
247 return ans;
248 }
249
alloccolwrapper(SEXP dt,SEXP overAllocArg,SEXP verbose)250 SEXP alloccolwrapper(SEXP dt, SEXP overAllocArg, SEXP verbose) {
251 if (!isLogical(verbose) || length(verbose)!=1) error(_("verbose must be TRUE or FALSE"));
252 int overAlloc = checkOverAlloc(overAllocArg);
253 SEXP ans = PROTECT(alloccol(dt, length(dt)+overAlloc, LOGICAL(verbose)[0]));
254
255 for(R_len_t i = 0; i < LENGTH(ans); i++) {
256 // clear names; also excluded by copyMostAttrib(). Primarily for data.table and as.data.table, but added here centrally (see #103).
257 setAttrib(VECTOR_ELT(ans, i), R_NamesSymbol, R_NilValue);
258
259 // But don't clear dim and dimnames. Because as from 1.12.4 we keep the matrix column as-is and ask user to use as.data.table to
260 // unpack matrix columns when they really need to; test 2089.2
261 // setAttrib(VECTOR_ELT(ans, i), R_DimSymbol, R_NilValue);
262 // setAttrib(VECTOR_ELT(ans, i), R_DimNamesSymbol, R_NilValue);
263 }
264
265 UNPROTECT(1);
266 return ans;
267 }
268
shallowwrapper(SEXP dt,SEXP cols)269 SEXP shallowwrapper(SEXP dt, SEXP cols) {
270 // selfref will be FALSE on manually created data.table, e.g., via dput() or structure()
271 if (!selfrefok(dt, FALSE)) {
272 int n = isNull(cols) ? length(dt) : length(cols);
273 return(shallow(dt, cols, n));
274 } else return(shallow(dt, cols, TRUELENGTH(dt)));
275 }
276
truelength(SEXP x)277 SEXP truelength(SEXP x) {
278 return ScalarInteger(isNull(x) ? 0 : TRUELENGTH(x));
279 }
280
selfrefokwrapper(SEXP x,SEXP verbose)281 SEXP selfrefokwrapper(SEXP x, SEXP verbose) {
282 return ScalarInteger(_selfrefok(x,FALSE,LOGICAL(verbose)[0]));
283 }
284
285 int *_Last_updated = NULL;
286
assign(SEXP dt,SEXP rows,SEXP cols,SEXP newcolnames,SEXP values)287 SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
288 {
289 // For internal use only by := in [.data.table, and set()
290 // newcolnames : add these columns (if any)
291 // cols : column names or numbers corresponding to the values to set
292 // rows : row numbers to assign
293 R_len_t i, j, numToDo, targetlen, vlen, oldncol, oldtncol, coln, protecti=0, newcolnum, indexLength;
294 SEXP targetcol, nullint, s, colnam, tmp, key, index, a, assignedNames, indexNames;
295 bool verbose=GetVerbose();
296 int ndelete=0; // how many columns are being deleted
297 const char *c1, *tc1, *tc2;
298 int *buf, newKeyLength, indexNo;
299 if (isNull(dt)) error(_("assign has been passed a NULL dt"));
300 if (TYPEOF(dt) != VECSXP) error(_("dt passed to assign isn't type VECSXP"));
301 if (islocked(dt))
302 error(_(".SD is locked. Updating .SD by reference using := or set are reserved for future use. Use := in j directly. Or use copy(.SD) as a (slow) last resort, until shallow() is exported."));
303
304 // We allow set() on data.frame too; e.g. package Causata uses set() on a data.frame in tests/testTransformationReplay.R
305 // := is only allowed on a data.table. However, the ":=" = stop(...) message in data.table.R will have already
306 // detected use on a data.frame before getting to this point.
307 // For data.frame, can use set() on existing columns but not add new ones because DF are not over-allocated.
308 bool isDataTable = INHERITS(dt, char_datatable);
309 if (!isDataTable && !INHERITS(dt, char_dataframe))
310 error(_("Internal error: dt passed to Cassign is not a data.table or data.frame")); // # nocov
311
312 oldncol = LENGTH(dt);
313 SEXP names = PROTECT(getAttrib(dt, R_NamesSymbol)); protecti++;
314 if (isNull(names)) error(_("dt passed to assign has no names"));
315 if (length(names)!=oldncol)
316 error(_("Internal error in assign: length of names (%d) is not length of dt (%d)"),length(names),oldncol); // # nocov
317 if (isNull(dt)) {
318 error(_("data.table is NULL; malformed. A null data.table should be an empty list. typeof() should always return 'list' for data.table.")); // # nocov
319 // Not possible to test because R won't permit attributes be attached to NULL (which is good and we like); warning from R 3.4.0+ tested by 944.5
320 }
321 const int nrow = LENGTH(dt) ? length(VECTOR_ELT(dt,0)) :
322 (isNewList(values) && length(values) ? length(VECTOR_ELT(values,0)) : length(values));
323 // ^ when null data.table the new nrow becomes the fist column added
324 if (isNull(rows)) {
325 numToDo = nrow;
326 targetlen = nrow;
327 if (verbose) Rprintf(_("Assigning to all %d rows\n"), nrow);
328 // fast way to assign to whole column, without creating 1:nrow(x) vector up in R, or here in C
329 } else {
330 if (isReal(rows)) {
331 rows = PROTECT(coerceVector(rows, INTSXP)); protecti++;
332 warning(_("Coerced i from numeric to integer. Please pass integer for efficiency; e.g., 2L rather than 2"));
333 }
334 if (!isInteger(rows))
335 error(_("i is type '%s'. Must be integer, or numeric is coerced with warning. If i is a logical subset, simply wrap with which(), and take the which() outside the loop if possible for efficiency."), type2char(TYPEOF(rows)));
336 targetlen = length(rows);
337 numToDo = 0;
338 const int *rowsd = INTEGER(rows);
339 for (i=0; i<targetlen; i++) {
340 if ((rowsd[i]<0 && rowsd[i]!=NA_INTEGER) || rowsd[i]>nrow)
341 error(_("i[%d] is %d which is out of range [1,nrow=%d]."),i+1,rowsd[i],nrow); // set() reaches here (test 2005.2); := reaches the same error in subset.c first
342 if (rowsd[i]>=1) numToDo++;
343 }
344 if (verbose) Rprintf(_("Assigning to %d row subset of %d rows\n"), numToDo, nrow);
345 // TODO: include in message if any rows are assigned several times (e.g. by=.EACHI with dups in i)
346 if (numToDo==0) {
347 if (!length(newcolnames)) {
348 *_Last_updated = 0;
349 UNPROTECT(protecti);
350 return(dt); // all items of rows either 0 or NA. !length(newcolnames) for #759
351 }
352 if (verbose) Rprintf(_("Added %d new column%s initialized with all-NA\n"),
353 length(newcolnames), (length(newcolnames)>1)?"s":"");
354 }
355 }
356 if (!length(cols)) {
357 warning(_("length(LHS)==0; no columns to delete or assign RHS to.")); // test 1295 covers
358 *_Last_updated = 0;
359 UNPROTECT(protecti);
360 return(dt);
361 }
362 // FR #2077 - set able to add new cols by reference
363 if (isString(cols)) {
364 PROTECT(tmp = chmatch(cols, names, 0)); protecti++;
365 buf = (int *) R_alloc(length(cols), sizeof(int));
366 int k=0;
367 for (i=0; i<length(cols); i++) {
368 if (INTEGER(tmp)[i] == 0) buf[k++] = i;
369 }
370 if (k>0) {
371 if (!isDataTable) error(_("set() on a data.frame is for changing existing columns, not adding new ones. Please use a data.table for that. data.table's are over-allocated and don't shallow copy."));
372 newcolnames = PROTECT(allocVector(STRSXP, k)); protecti++;
373 for (i=0; i<k; i++) {
374 SET_STRING_ELT(newcolnames, i, STRING_ELT(cols, buf[i]));
375 INTEGER(tmp)[buf[i]] = oldncol+i+1;
376 }
377 }
378 cols = tmp;
379 } else {
380 if (isReal(cols)) {
381 cols = PROTECT(coerceVector(cols, INTSXP)); protecti++;
382 warning(_("Coerced j from numeric to integer. Please pass integer for efficiency; e.g., 2L rather than 2"));
383 }
384 if (!isInteger(cols))
385 error(_("j is type '%s'. Must be integer, character, or numeric is coerced with warning."), type2char(TYPEOF(cols)));
386 }
387 if (any_duplicated(cols,FALSE)) error(_("Can't assign to the same column twice in the same query (duplicates detected)."));
388 if (!isNull(newcolnames) && !isString(newcolnames)) error(_("newcolnames is supplied but isn't a character vector"));
389 bool RHS_list_of_columns = TYPEOF(values)==VECSXP && length(cols)>1; // initial value; may be revised below
390 if (verbose) Rprintf(_("RHS_list_of_columns == %s\n"), RHS_list_of_columns ? "true" : "false");
391 if (TYPEOF(values)==VECSXP && length(cols)==1 && length(values)==1) {
392 SEXP item = VECTOR_ELT(values,0);
393 if (isNull(item) || length(item)==1 || length(item)==targetlen) {
394 RHS_list_of_columns=true;
395 if (verbose) Rprintf(_("RHS_list_of_columns revised to true because RHS list has 1 item which is NULL, or whose length %d is either 1 or targetlen (%d). Please unwrap RHS.\n"), length(item), targetlen);
396 }
397 }
398 if (RHS_list_of_columns) {
399 if (length(values)==0)
400 error(_("Supplied %d columns to be assigned an empty list (which may be an empty data.table or data.frame since they are lists too). To delete multiple columns use NULL instead. To add multiple empty list columns, use list(list())."), length(cols));
401 if (length(values)!=length(cols)) {
402 if (length(values)==1) { // test 351.1; c("colA","colB"):=list(13:15) uses 13:15 for both columns
403 values = VECTOR_ELT(values,0);
404 RHS_list_of_columns = false;
405 if (verbose) Rprintf(_("Recycling single RHS list item across %d columns. Please unwrap RHS.\n"), length(cols));
406 } else {
407 error(_("Supplied %d columns to be assigned %d items. Please see NEWS for v1.12.2."), length(cols), length(values));
408 }
409 }
410 }
411 // Check all inputs :
412 for (i=0; i<length(cols); i++) {
413 coln = INTEGER(cols)[i];
414 if (coln<1 || coln>oldncol+length(newcolnames)) {
415 if (!isDataTable) error(_("Item %d of column numbers in j is %d which is outside range [1,ncol=%d]. set() on a data.frame is for changing existing columns, not adding new ones. Please use a data.table for that."), i+1, coln, oldncol);
416 else error(_("Item %d of column numbers in j is %d which is outside range [1,ncol=%d]. Use column names instead in j to add new columns."), i+1, coln, oldncol);
417 }
418 coln--;
419 SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values;
420 vlen = length(thisvalue);
421 if (isNull(thisvalue) && !isNull(rows)) error(_("When deleting columns, i should not be provided")); // #1082, #3089
422 if (coln+1 <= oldncol) colnam = STRING_ELT(names,coln);
423 else colnam = STRING_ELT(newcolnames,coln-length(names));
424 if (coln+1 <= oldncol && isNull(thisvalue)) continue; // delete existing column(s) afterwards, near end of this function
425 //if (vlen<1 && nrow>0) {
426 if (coln+1 <= oldncol && nrow>0 && vlen<1 && numToDo>0) { // numToDo > 0 fixes #2829, see test 1911
427 error(_("RHS of assignment to existing column '%s' is zero length but not NULL. If you intend to delete the column use NULL. Otherwise, the RHS must have length > 0; e.g., NA_integer_. If you are trying to change the column type to be an empty list column then, as with all column type changes, provide a full length RHS vector such as vector('list',nrow(DT)); i.e., 'plonk' in the new column."), CHAR(STRING_ELT(names,coln)));
428 }
429 if (coln+1 > oldncol && TYPEOF(thisvalue)!=VECSXP) { // list() is ok for new columns
430 newcolnum = coln-length(names);
431 if (newcolnum<0 || newcolnum>=length(newcolnames))
432 error(_("Internal error in assign.c: length(newcolnames)=%d, length(names)=%d, coln=%d"), length(newcolnames), length(names), coln); // # nocov
433 if (isNull(thisvalue)) {
434 warning(_("Column '%s' does not exist to remove"),CHAR(STRING_ELT(newcolnames,newcolnum)));
435 continue;
436 }
437 // RHS of assignment to new column is zero length but we'll use its type to create all-NA column of that type
438 }
439 if (isMatrix(thisvalue) && (j=INTEGER(getAttrib(thisvalue, R_DimSymbol))[1]) > 1) // matrix passes above (considered atomic vector)
440 warning(_("%d column matrix RHS of := will be treated as one vector"), j);
441 const SEXP existing = (coln+1)<=oldncol ? VECTOR_ELT(dt,coln) : R_NilValue;
442 if (isFactor(existing) &&
443 !isString(thisvalue) && TYPEOF(thisvalue)!=INTSXP && TYPEOF(thisvalue)!=LGLSXP && !isReal(thisvalue) && !isNewList(thisvalue)) { // !=INTSXP includes factor
444 error(_("Can't assign to column '%s' (type 'factor') a value of type '%s' (not character, factor, integer or numeric)"),
445 CHAR(STRING_ELT(names,coln)),type2char(TYPEOF(thisvalue)));
446 }
447 if (nrow>0 && targetlen>0 && vlen>1 && vlen!=targetlen && (TYPEOF(existing)!=VECSXP || TYPEOF(thisvalue)==VECSXP)) {
448 // note that isNewList(R_NilValue) is true so it needs to be TYPEOF(existing)!=VECSXP above
449 error(_("Supplied %d items to be assigned to %d items of column '%s'. If you wish to 'recycle' the RHS please use rep() to make this intent clear to readers of your code."), vlen, targetlen, CHAR(colnam));
450 }
451 }
452 // having now checked the inputs, from this point there should be no errors so we can now proceed to
453 // modify DT by reference. Other than if new columns are being added and the allocVec() fails with
454 // out-of-memory. In that case the user will receive hard halt and know to rerun.
455 if (length(newcolnames)) {
456 oldtncol = TRUELENGTH(dt); // TO DO: oldtncol can be just called tl now, as we won't realloc here any more.
457
458 if (oldtncol<oldncol) {
459 if (oldtncol==0) error(_("This data.table has either been loaded from disk (e.g. using readRDS()/load()) or constructed manually (e.g. using structure()). Please run setDT() or setalloccol() on it first (to pre-allocate space for new columns) before assigning by reference to it.")); // #2996
460 error(_("Internal error: oldtncol(%d) < oldncol(%d). Please report to data.table issue tracker, including result of sessionInfo()."), oldtncol, oldncol); // # nocov
461 }
462 if (oldtncol>oldncol+10000L) warning(_("truelength (%d) is greater than 10,000 items over-allocated (length = %d). See ?truelength. If you didn't set the datatable.alloccol option very large, please report to data.table issue tracker including the result of sessionInfo()."),oldtncol, oldncol);
463 if (oldtncol < oldncol+LENGTH(newcolnames))
464 error(_("Internal error: DT passed to assign has not been allocated enough column slots. l=%d, tl=%d, adding %d"), oldncol, oldtncol, LENGTH(newcolnames)); // # nocov
465 if (!selfrefnamesok(dt,verbose))
466 error(_("It appears that at some earlier point, names of this data.table have been reassigned. Please ensure to use setnames() rather than names<- or colnames<-. Otherwise, please report to data.table issue tracker.")); // # nocov
467 // Can growVector at this point easily enough, but it shouldn't happen in first place so leave it as
468 // strong error message for now.
469 else if (TRUELENGTH(names) != oldtncol)
470 error(_("Internal error: selfrefnames is ok but tl names [%d] != tl [%d]"), TRUELENGTH(names), oldtncol); // # nocov
471 SETLENGTH(dt, oldncol+LENGTH(newcolnames));
472 SETLENGTH(names, oldncol+LENGTH(newcolnames));
473 for (i=0; i<LENGTH(newcolnames); i++)
474 SET_STRING_ELT(names,oldncol+i,STRING_ELT(newcolnames,i));
475 // truelengths of both already set by alloccol
476 }
477 for (i=0; i<length(cols); i++) {
478 coln = INTEGER(cols)[i]-1;
479 SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values;
480 if (TYPEOF(thisvalue)==NILSXP) {
481 if (!isNull(rows)) error(_("Internal error: earlier error 'When deleting columns, i should not be provided' did not happen.")); // # nocov
482 ndelete++;
483 continue; // delete column(s) afterwards, below this loop
484 }
485 vlen = length(thisvalue);
486 if (length(rows)==0 && targetlen==vlen && (vlen>0 || nrow==0)) {
487 if ( MAYBE_SHARED(thisvalue) || // set() protects the NAMED of atomic vectors from .Call setting arguments to 2 by wrapping with list
488 (TYPEOF(values)==VECSXP && i>LENGTH(values)-1) || // recycled RHS would have columns pointing to others, #185.
489 (TYPEOF(values)!=VECSXP && i>0) // assigning the same values to a second column. Have to ensure a copy #2540
490 ) {
491 if (verbose) {
492 Rprintf(_("RHS for item %d has been duplicated because NAMED==%d MAYBE_SHARED==%d, but then is being plonked. length(values)==%d; length(cols)==%d)\n"),
493 i+1, NAMED(thisvalue), MAYBE_SHARED(thisvalue), length(values), length(cols));
494 }
495 thisvalue = copyAsPlain(thisvalue); // PROTECT not needed as assigned as element to protected list below.
496 } else {
497 if (verbose) Rprintf(_("Direct plonk of unnamed RHS, no copy. NAMED==%d, MAYBE_SHARED==%d\n"), NAMED(thisvalue), MAYBE_SHARED(thisvalue)); // e.g. DT[,a:=as.character(a)] as tested by 754.5
498 }
499 SET_VECTOR_ELT(dt, coln, thisvalue); // plonk new column in as it's already the correct length
500 setAttrib(thisvalue, R_NamesSymbol, R_NilValue); // clear names such as DT[,a:=mapvector[a]]
501 setAttrib(thisvalue, R_DimSymbol, R_NilValue); // so that matrix is treated as vector
502 setAttrib(thisvalue, R_DimNamesSymbol, R_NilValue); // the 3rd of the 3 attribs not copied by copyMostAttrib, for consistency.
503 continue;
504 }
505
506 if (coln+1 > oldncol) { // new column
507 SET_VECTOR_ELT(dt, coln, targetcol=allocNAVectorLike(thisvalue, nrow));
508 // initialize with NAs for when 'rows' is a subset and it doesn't touch
509 // do not try to save the time to NA fill (contiguous branch free assign anyway) since being
510 // sure all items will be written to (isNull(rows), length(rows), vlen<1, targetlen) is not worth the risk.
511 if (isVectorAtomic(thisvalue)) copyMostAttrib(thisvalue,targetcol); // class etc but not names
512 // else for lists (such as data.frame and data.table) treat them as raw lists and drop attribs
513 if (vlen<1) continue; // e.g. DT[,newcol:=integer()] (adding new empty column)
514 } else { // existing column
515 targetcol = VECTOR_ELT(dt,coln);
516 }
517 const char *ret = memrecycle(targetcol, rows, 0, targetlen, thisvalue, 0, -1, coln+1, CHAR(STRING_ELT(names, coln)));
518 if (ret) warning(ret);
519 }
520
521 *_Last_updated = numToDo; // the updates have taken place with no error, so update .Last.updated now
522 assignedNames = PROTECT(allocVector(STRSXP, LENGTH(cols))); protecti++;
523 for (i=0;i<LENGTH(cols);i++) SET_STRING_ELT(assignedNames,i,STRING_ELT(names,INTEGER(cols)[i]-1));
524 key = getAttrib(dt, sym_sorted);
525 if (length(key)) {
526 // if assigning to at least one key column, the key is truncated to one position before the first changed column.
527 //any() and subsetVector() don't seem to be exposed by R API at C level, so this is done here long hand.
528 PROTECT(tmp = chin(key, assignedNames)); protecti++;
529 newKeyLength = xlength(key);
530 for (i=0;i<LENGTH(tmp);i++) if (LOGICAL(tmp)[i]) {
531 // If a key column is being assigned to, set newKeyLength to the key element before since everything after that may have changed in order.
532 newKeyLength = i;
533 break;
534 }
535 if(newKeyLength == 0){
536 // no valid key columns remain, remove the key
537 setAttrib(dt, sym_sorted, R_NilValue);
538 } else if (newKeyLength < xlength(key)){
539 // new key is shorter than original one. Reassign
540 PROTECT(tmp = allocVector(STRSXP, newKeyLength)); protecti++;
541 for (int i=0; i<newKeyLength; i++) SET_STRING_ELT(tmp, i, STRING_ELT(key, i));
542 setAttrib(dt, sym_sorted, tmp);
543 }
544 //else: no key column changed, nothing to be done
545 }
546 index = getAttrib(dt, install("index"));
547 if (index != R_NilValue) {
548 s = ATTRIB(index);
549 indexNo = 0;
550 // get a vector with all index names
551 PROTECT(indexNames = allocVector(STRSXP, xlength(s))); protecti++;
552 while(s != R_NilValue){
553 SET_STRING_ELT(indexNames, indexNo, PRINTNAME(TAG(s)));
554 indexNo++;
555 s = CDR(s);
556 }
557 s = ATTRIB(index); // reset to first element
558 indexNo = 0;
559 while(s != R_NilValue) {
560 a = TAG(s);
561 indexLength = xlength(CAR(s));
562 tc1 = c1 = CHAR(PRINTNAME(a)); // the index name; e.g. "__col1__col2"
563 if (*tc1!='_' || *(tc1+1)!='_') {
564 // fix for #1396
565 if (verbose) {
566 Rprintf(_("Dropping index '%s' as it doesn't have '__' at the beginning of its name. It was very likely created by v1.9.4 of data.table.\n"), tc1);
567 }
568 setAttrib(index, a, R_NilValue);
569 indexNo++;
570 s = CDR(s);
571 continue; // with next index
572 }
573 tc1 += 2; // tc1 always marks the start of a key column
574 if (!*tc1) error(_("Internal error: index name ends with trailing __")); // # nocov
575 // check the position of the first appearance of an assigned column in the index.
576 // the new index will be truncated to this position.
577 char *s4 = (char*) malloc(strlen(c1) + 3);
578 if(s4 == NULL){
579 error(_("Internal error: Couldn't allocate memory for s4.")); // # nocov
580 }
581 memcpy(s4, c1, strlen(c1));
582 memset(s4 + strlen(c1), '\0', 1);
583 strcat(s4, "__"); // add trailing '__' to newKey so we can search for pattern '__colName__' also at the end of the index.
584 newKeyLength = strlen(c1);
585 for(int i = 0; i < xlength(assignedNames); i++){
586 tc2 = CHAR(STRING_ELT(assignedNames, i));
587 char *s5 = (char*) malloc(strlen(tc2) + 5); //4 * '_' + \0
588 if(s5 == NULL){
589 free(s4); // # nocov
590 error(_("Internal error: Couldn't allocate memory for s5.")); // # nocov
591 }
592 memset(s5, '_', 2);
593 memset(s5 + 2, '\0', 1);
594 strcat(s5, tc2);
595 strcat(s5, "__");
596 tc2 = strstr(s4, s5);
597 if(tc2 == NULL){ // column is not part of key
598 free(s5);
599 continue;
600 }
601 if(tc2 - s4 < newKeyLength){ // new column match is before last match
602 newKeyLength = tc2 - s4;
603 }
604 free(s5);
605 }
606 memset(s4 + newKeyLength, '\0', 1); // truncate the new key to the new length
607 if(newKeyLength == 0){ // no valid key column remains. Drop the key
608 setAttrib(index, a, R_NilValue);
609 SET_STRING_ELT(indexNames, indexNo, NA_STRING);
610 if (verbose) {
611 Rprintf(_("Dropping index '%s' due to an update on a key column\n"), c1+2);
612 }
613 } else if(newKeyLength < strlen(c1)) {
614 SEXP s4Str = PROTECT(mkString(s4));
615 if(indexLength == 0 && // shortened index can be kept since it is just information on the order (see #2372)
616 LOGICAL(chin(s4Str, indexNames))[0] == 0) {// index with shortened name not present yet
617 SET_TAG(s, install(s4));
618 SET_STRING_ELT(indexNames, indexNo, mkChar(s4));
619 if (verbose)
620 Rprintf(_("Shortening index '%s' to '%s' due to an update on a key column\n"), c1+2, s4 + 2);
621 } else { // indexLength > 0 || shortened name present already
622 // indexLength > 0 indicates reordering. Drop it to avoid spurious reordering in non-indexed columns (#2372)
623 // shortened anme already present indicates that index needs to be dropped to avoid duplicate indices.
624 setAttrib(index, a, R_NilValue);
625 SET_STRING_ELT(indexNames, indexNo, NA_STRING);
626 if (verbose)
627 Rprintf(_("Dropping index '%s' due to an update on a key column\n"), c1+2);
628 }
629 UNPROTECT(1); // s4Str
630 } //else: index is not affected by assign: nothing to be done
631 free(s4);
632 indexNo ++;
633 s = CDR(s);
634 }
635 }
636 if (ndelete) {
637 // delete any columns assigned NULL (there was a 'continue' earlier in loop above)
638 int *tt = (int *)R_alloc(ndelete, sizeof(int));
639 const int *colsd=INTEGER(cols), ncols=length(cols), ndt=length(dt);
640 for (int i=0, k=0; i<ncols; ++i) { // find which ones to delete and put them in tt
641 // Aside: a new column being assigned NULL (something odd to do) would have been warned above, added above, and now deleted. Just
642 // easier to code it this way; e.g. so that other columns may be added or removed ok by the same query.
643 coln = colsd[i]-1;
644 SEXP thisvalue = RHS_list_of_columns ? VECTOR_ELT(values, i) : values;
645 if (isNull(thisvalue)) tt[k++] = coln;
646 }
647 R_isort(tt, ndelete); // sort the column-numbers-to-delete into ascending order
648 for (int i=0; i<ndelete-1; ++i) {
649 if (tt[i]>=tt[i+1])
650 error(_("Internal error: %d column numbers to delete not now in strictly increasing order. No-dups were checked earlier.")); // # nocov
651 }
652 for (int i=tt[0], j=1, k=tt[0]+1; i<ndt-ndelete; ++i, ++k) { // i moves up from the first non-deleted column and is the target of write
653 while (j<ndelete && k==tt[j]) { j++; k++; } // move k up to the next non-deleted column; j is the next position in tt
654 SET_VECTOR_ELT(dt, i, VECTOR_ELT(dt, k));
655 SET_STRING_ELT(names, i, STRING_ELT(names, k));
656 }
657 for (int i=ndt-ndelete; i<ndt; ++i) { // blank out the ndelete slots at the end
658 SET_VECTOR_ELT(dt, i, R_NilValue);
659 SET_STRING_ELT(names, i, NA_STRING); // release reference to the CHARSXP
660 }
661 SETLENGTH(dt, ndt-ndelete);
662 SETLENGTH(names, ndt-ndelete);
663 if (LENGTH(names)==0) {
664 // That was last column deleted, leaving NULL data.table, so we need to reset .row_names, so that it really is the NULL data.table.
665 PROTECT(nullint=allocVector(INTSXP, 0)); protecti++;
666 setAttrib(dt, R_RowNamesSymbol, nullint); // i.e. .set_row_names(0)
667 }
668 }
669 UNPROTECT(protecti);
670 return(dt); // needed for `*tmp*` mechanism (when := isn't used), and to return the new object after a := for compound syntax.
671 }
672
673 #define MSGSIZE 1000
674 static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects
675
memrecycle(const SEXP target,const SEXP where,const int start,const int len,SEXP source,const int sourceStart,const int sourceLen,const int colnum,const char * colname)676 const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname)
677 // like memcpy but recycles single-item source
678 // 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer()
679 // assigns to target[start:start+len-1] or target[where[start:start+len-1]] where start is 0-based
680 // if sourceLen==-1 then all of source is used (if it is 1 item then it is recycled, or its length must match) for convenience to avoid
681 // having to use length(source) (repeating source expression) in each call
682 // sourceLen==1 is used in dogroups to recycle the group values into ans to match the nrow of each group's result; sourceStart is set to each group value row.
683 {
684 if (len<1) return NULL;
685 const int slen = sourceLen>=0 ? sourceLen : length(source);
686 if (slen==0) return NULL;
687 if (sourceStart<0 || sourceStart+slen>length(source))
688 error(_("Internal error memrecycle: sourceStart=%d sourceLen=%d length(source)=%d"), sourceStart, sourceLen, length(source)); // # nocov
689 if (!length(where) && start+len>length(target))
690 error(_("Internal error memrecycle: start=%d len=%d length(target)=%d"), start, len, length(target)); // # nocov
691 const int soff = sourceStart;
692 if (slen>1 && slen!=len && (!isNewList(target) || isNewList(source)))
693 error(_("Internal error: recycle length error not caught earlier. slen=%d len=%d"), slen, len); // # nocov
694 // Internal error because the column has already been added to the DT, so length mismatch should have been caught before adding the column.
695 // for 5647 this used to limit slen to len, but no longer
696 if (colname==NULL)
697 error(_("Internal error: memrecycle has received NULL colname")); // # nocov
698 *memrecycle_message = '\0';
699 int protecti=0;
700 const bool sourceIsFactor=isFactor(source), targetIsFactor=isFactor(target);
701 const bool sourceIsI64=isReal(source) && Rinherits(source, char_integer64);
702 const bool targetIsI64=isReal(target) && Rinherits(target, char_integer64);
703 if (sourceIsFactor || targetIsFactor) {
704 if (!targetIsFactor) {
705 if (!isString(target) && !isNewList(target))
706 error(_("Cannot assign 'factor' to '%s'. Factors can only be assigned to factor, character or list columns."), type2char(TYPEOF(target)));
707 // else assigning factor to character is left to later below, avoiding wasteful asCharacterFactor
708 } else if (!sourceIsFactor && !isString(source)) {
709 // target is factor
710 if (allNA(source, false)) { // return false for list and other types that allNA does not support
711 source = ScalarLogical(NA_LOGICAL); // a global constant in R and won't allocate; fall through to regular zero-copy coerce
712 } else if (isInteger(source) || isReal(source)) {
713 // allow assigning level numbers to factor columns; test 425, 426, 429 and 1945
714 const int nlevel = length(getAttrib(target, R_LevelsSymbol));
715 if (isInteger(source)) {
716 const int *sd = INTEGER(source);
717 for (int i=0; i<slen; ++i) {
718 const int val = sd[i+soff];
719 if ((val<1 && val!=NA_INTEGER) || val>nlevel) {
720 error(_("Assigning factor numbers to column %d named '%s'. But %d is outside the level range [1,%d]"), colnum, colname, val, nlevel);
721 }
722 }
723 } else {
724 const double *sd = REAL(source);
725 for (int i=0; i<slen; ++i) {
726 const double val = sd[i+soff];
727 if (!ISNAN(val) && (!R_FINITE(val) || val!=(int)val || (int)val<1 || (int)val>nlevel)) {
728 error(_("Assigning factor numbers to column %d named '%s'. But %f is outside the level range [1,%d], or is not a whole number."), colnum, colname, val, nlevel);
729 }
730 }
731 }
732 // Now just let the valid level numbers fall through to regular assign by BODY below
733 } else {
734 error(_("Cannot assign '%s' to 'factor'. Factor columns can be assigned factor, character, NA in any type, or level numbers."), type2char(TYPEOF(source)));
735 }
736 } else {
737 // either factor or character being assigned to factor column
738 SEXP targetLevels = PROTECT(getAttrib(target, R_LevelsSymbol)); protecti++;
739 SEXP sourceLevels = source; // character source
740 if (sourceIsFactor) { sourceLevels=PROTECT(getAttrib(source, R_LevelsSymbol)); protecti++; }
741 if (!sourceIsFactor || !R_compute_identical(sourceLevels, targetLevels, 0)) { // !sourceIsFactor for test 2115.6
742 const int nTargetLevels=length(targetLevels), nSourceLevels=length(sourceLevels);
743 const SEXP *targetLevelsD=STRING_PTR(targetLevels), *sourceLevelsD=STRING_PTR(sourceLevels);
744 SEXP newSource = PROTECT(allocVector(INTSXP, length(source))); protecti++;
745 savetl_init();
746 for (int k=0; k<nTargetLevels; ++k) {
747 const SEXP s = targetLevelsD[k];
748 const int tl = TRUELENGTH(s);
749 if (tl>0) {
750 savetl(s);
751 } else if (tl<0) {
752 // # nocov start
753 for (int j=0; j<k; ++j) SET_TRUELENGTH(s, 0); // wipe our negative usage and restore 0
754 savetl_end(); // then restore R's own usage (if any)
755 error(_("Internal error: levels of target are either not unique or have truelength<0"));
756 // # nocov end
757 }
758 SET_TRUELENGTH(s, -k-1);
759 }
760 int nAdd = 0;
761 for (int k=0; k<nSourceLevels; ++k) {
762 const SEXP s = sourceLevelsD[k];
763 const int tl = TRUELENGTH(s);
764 if (tl>=0) {
765 if (!sourceIsFactor && s==NA_STRING) continue; // don't create NA factor level when assigning character to factor; test 2117
766 if (tl>0) savetl(s);
767 SET_TRUELENGTH(s, -nTargetLevels-(++nAdd));
768 } // else, when sourceIsString, it's normal for there to be duplicates here
769 }
770 const int nSource = length(source);
771 int *newSourceD = INTEGER(newSource);
772 if (sourceIsFactor) {
773 const int *sourceD = INTEGER(source);
774 for (int i=0; i<nSource; ++i) { // convert source integers to refer to target levels
775 const int val = sourceD[i];
776 newSourceD[i] = val==NA_INTEGER ? NA_INTEGER : -TRUELENGTH(sourceLevelsD[val-1]); // retains NA factor levels here via TL(NA_STRING); e.g. ordered factor
777 }
778 } else {
779 const SEXP *sourceD = STRING_PTR(source);
780 for (int i=0; i<nSource; ++i) { // convert source integers to refer to target levels
781 const SEXP val = sourceD[i];
782 newSourceD[i] = val==NA_STRING ? NA_INTEGER : -TRUELENGTH(val);
783 }
784 }
785 source = newSource;
786 for (int k=0; k<nTargetLevels; ++k) SET_TRUELENGTH(targetLevelsD[k], 0); // don't need those anymore
787 if (nAdd) {
788 // cannot grow the levels yet as that would be R call which could fail to alloc and we have no hook to clear up
789 SEXP *temp = (SEXP *)malloc(nAdd * sizeof(SEXP *));
790 if (!temp) {
791 // # nocov start
792 for (int k=0; k<nSourceLevels; ++k) SET_TRUELENGTH(sourceLevelsD[k], 0);
793 savetl_end();
794 error(_("Unable to allocate working memory of %d bytes to combine factor levels"), nAdd*sizeof(SEXP *));
795 // # nocov end
796 }
797 for (int k=0, thisAdd=0; thisAdd<nAdd; ++k) { // thisAdd<nAdd to stop early when the added ones are all reached
798 SEXP s = sourceLevelsD[k];
799 int tl = TRUELENGTH(s);
800 if (tl) { // tl negative here
801 if (tl != -nTargetLevels-thisAdd-1) error(_("Internal error: extra level check sum failed")); // # nocov
802 temp[thisAdd++] = s;
803 SET_TRUELENGTH(s,0);
804 }
805 }
806 savetl_end();
807 setAttrib(target, R_LevelsSymbol, targetLevels=growVector(targetLevels, nTargetLevels + nAdd));
808 for (int k=0; k<nAdd; ++k) {
809 SET_STRING_ELT(targetLevels, nTargetLevels+k, temp[k]);
810 }
811 free(temp);
812 } else {
813 // all source levels were already in target levels, but not with the same integers; we're done
814 savetl_end();
815 }
816 // now continue, but with the mapped integers in the (new) source
817 }
818 }
819 } else if (isString(source) && !isString(target) && !isNewList(target)) {
820 warning(_("Coercing 'character' RHS to '%s' to match the type of the target column (column %d named '%s')."),
821 type2char(TYPEOF(target)), colnum, colname);
822 // this "Coercing ..." warning first to give context in case coerceVector warns 'NAs introduced by coercion'
823 source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++;
824 } else if (isNewList(source) && !isNewList(target)) {
825 if (targetIsI64) {
826 error(_("Cannot coerce 'list' RHS to 'integer64' to match the type of the target column (column %d named '%s')."), colnum, colname);
827 // because R's coerceVector doesn't know about integer64
828 }
829 // as in base R; e.g. let as.double(list(1,2,3)) work but not as.double(list(1,c(2,4),3))
830 // relied on by NNS, simstudy and table.express; tests 1294.*
831 warning(_("Coercing 'list' RHS to '%s' to match the type of the target column (column %d named '%s')."),
832 type2char(TYPEOF(target)), colnum, colname);
833 source = PROTECT(coerceVector(source, TYPEOF(target))); protecti++;
834 } else if ((TYPEOF(target)!=TYPEOF(source) || targetIsI64!=sourceIsI64) && !isNewList(target)) {
835 if (GetVerbose()) {
836 // only take the (small) cost of GetVerbose() (search of options() list) when types don't match
837 Rprintf(_("Zero-copy coerce when assigning '%s' to '%s' column %d named '%s'.\n"),
838 sourceIsI64 ? "integer64" : type2char(TYPEOF(source)),
839 targetIsI64 ? "integer64" : type2char(TYPEOF(target)),
840 colnum, colname);
841 }
842 // The following checks are up front here, otherwise we'd need them twice in the two branches
843 // inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future.
844 // The idea is to do these range checks without calling coerceVector() (which allocates)
845
846 #define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO) {{ \
847 const STYPE *sd = (const STYPE *)RFUN(source); \
848 for (int i=0; i<slen; ++i) { \
849 const STYPE val = sd[i+soff]; \
850 if (COND) { \
851 const char *sType = sourceIsI64 ? "integer64" : type2char(TYPEOF(source)); \
852 const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target)); \
853 int n = snprintf(memrecycle_message, MSGSIZE, \
854 "%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s'", val, sType, i+1, tType); \
855 if (colnum>0 && n>0 && n<MSGSIZE) \
856 snprintf(memrecycle_message+n, MSGSIZE-n, " (column %d named '%s')", colnum, colname); \
857 /* string returned so that rbindlist/dogroups can prefix it with which item of its list this refers to */ \
858 break; \
859 } \
860 } \
861 } break; }
862
863 switch(TYPEOF(target)) {
864 case LGLSXP:
865 switch (TYPEOF(source)) {
866 case RAWSXP: CHECK_RANGE(Rbyte, RAW, val!=0 && val!=1, "d", "taken as TRUE")
867 case INTSXP: CHECK_RANGE(int, INTEGER, val!=0 && val!=1 && val!=NA_INTEGER, "d", "taken as TRUE")
868 case REALSXP: if (sourceIsI64)
869 CHECK_RANGE(int64_t, REAL, val!=0 && val!=1 && val!=NA_INTEGER64, PRId64, "taken as TRUE")
870 else CHECK_RANGE(double, REAL, !ISNAN(val) && val!=0.0 && val!=1.0, "f", "taken as TRUE")
871 } break;
872 case RAWSXP:
873 switch (TYPEOF(source)) {
874 case INTSXP: CHECK_RANGE(int, INTEGER, val<0 || val>255, "d", "taken as 0")
875 case REALSXP: if (sourceIsI64)
876 CHECK_RANGE(int64_t, REAL, val<0 || val>255, PRId64, "taken as 0")
877 else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, "f", "either truncated (precision lost) or taken as 0")
878 } break;
879 case INTSXP:
880 if (TYPEOF(source)==REALSXP) {
881 if (sourceIsI64)
882 CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)")
883 else CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)")
884 } break;
885 case REALSXP:
886 if (targetIsI64 && isReal(source) && !sourceIsI64) {
887 CHECK_RANGE(double, REAL, !ISNAN(val) && (!R_FINITE(val) || (int)val!=val), "f", "truncated (precision lost)")
888 }
889 }
890 }
891
892 #undef BODY
893 #define BODY(STYPE, RFUN, CTYPE, CAST, ASSIGN) {{ \
894 const STYPE *sd = (const STYPE *)RFUN(source); \
895 if (length(where)) { \
896 if (slen==1) { \
897 const STYPE val = sd[soff]; \
898 const CTYPE cval = CAST; \
899 for (int wi=0; wi<len; ++wi) { \
900 const int w = wd[wi]; \
901 if (w<1) continue; /*0 or NA*/ \
902 const int i = w-1; \
903 ASSIGN; \
904 } \
905 } else { \
906 for (int wi=0; wi<len; ++wi) { \
907 const int w = wd[wi]; \
908 if (w<1) continue; \
909 const STYPE val = sd[wi+soff]; \
910 const CTYPE cval = CAST; \
911 const int i = w-1; \
912 ASSIGN; \
913 } \
914 } \
915 } else { \
916 if (slen==1) { \
917 const STYPE val = sd[soff]; \
918 const CTYPE cval = CAST; \
919 for (int i=0; i<len; ++i) { \
920 ASSIGN; \
921 } \
922 } else { \
923 for (int i=0; i<len; i++) { \
924 const STYPE val = sd[i+soff]; \
925 const CTYPE cval = CAST; \
926 ASSIGN; \
927 } \
928 } \
929 } \
930 } break; }
931
932 #define COERCE_ERROR(targetType) error(_("type '%s' cannot be coerced to '%s'"), type2char(TYPEOF(source)), targetType); // 'targetType' for integer64 vs double
933
934 const int off = length(where) ? 0 : start; // off = target offset; e.g. called from rbindlist with where=R_NilValue and start!=0
935 const bool mc = length(where)==0 && slen>0 && slen==len && soff==0; // mc=memcpy; only if types match and not for single items (a single assign faster than these non-const memcpy calls)
936 const int *wd = length(where) ? INTEGER(where)+start : NULL;
937 switch (TYPEOF(target)) {
938 case RAWSXP: {
939 Rbyte *td = RAW(target) + off;
940 switch (TYPEOF(source)) {
941 case RAWSXP:
942 if (mc) {
943 memcpy(td, RAW(source), slen*sizeof(Rbyte)); break;
944 } else BODY(Rbyte, RAW, Rbyte, val, td[i]=cval)
945 case LGLSXP: BODY(int, LOGICAL, Rbyte, val==1, td[i]=cval)
946 case INTSXP: BODY(int, INTEGER, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval)
947 case REALSXP:
948 if (sourceIsI64)
949 BODY(int64_t, REAL, Rbyte, (val>255 || val<0) ? 0 : val, td[i]=cval)
950 else BODY(double, REAL, Rbyte, (ISNAN(val)||val>255||val<0) ? 0 : val, td[i]=cval)
951 default: COERCE_ERROR("raw");
952 }
953 } break;
954 case LGLSXP: {
955 int *td = LOGICAL(target) + off;
956 switch (TYPEOF(source)) {
957 case RAWSXP: BODY(Rbyte, RAW, int, val!=0, td[i]=cval)
958 case LGLSXP:
959 if (mc) {
960 memcpy(td, LOGICAL(source), slen*sizeof(Rboolean)); break;
961 } else BODY(int, LOGICAL, int, val, td[i]=cval)
962 case INTSXP: BODY(int, INTEGER, int, val==NA_INTEGER ? NA_LOGICAL : val!=0, td[i]=cval)
963 case REALSXP:
964 if (sourceIsI64)
965 BODY(int64_t, REAL, int, val==NA_INTEGER64 ? NA_LOGICAL : val!=0, td[i]=cval)
966 else BODY(double, REAL, int, ISNAN(val) ? NA_LOGICAL : val!=0.0, td[i]=cval)
967 default: COERCE_ERROR("logical");
968 }
969 } break;
970 case INTSXP : {
971 int *td = INTEGER(target) + off;
972 switch (TYPEOF(source)) {
973 case RAWSXP: BODY(Rbyte, RAW, int, (int)val, td[i]=cval)
974 case LGLSXP: // same as INTSXP ...
975 case INTSXP:
976 if (mc) {
977 memcpy(td, INTEGER(source), slen*sizeof(int)); break;
978 } else BODY(int, INTEGER, int, val, td[i]=cval)
979 case REALSXP:
980 if (sourceIsI64)
981 BODY(int64_t, REAL, int, (val==NA_INTEGER64||val>INT_MAX||val<=NA_INTEGER) ? NA_INTEGER : (int)val, td[i]=cval)
982 else BODY(double, REAL, int, ISNAN(val) ? NA_INTEGER : (int)val, td[i]=cval)
983 default: COERCE_ERROR("integer"); // test 2005.4
984 }
985 } break;
986 case REALSXP : {
987 if (targetIsI64) {
988 int64_t *td = (int64_t *)REAL(target) + off;
989 switch (TYPEOF(source)) {
990 case RAWSXP: BODY(Rbyte, RAW, int64_t, (int64_t)val, td[i]=cval)
991 case LGLSXP: // same as INTSXP
992 case INTSXP: BODY(int, INTEGER, int64_t, val==NA_INTEGER ? NA_INTEGER64 : val, td[i]=cval)
993 case REALSXP:
994 if (sourceIsI64) {
995 if (mc) {
996 memcpy(td, (int64_t *)REAL(source), slen*sizeof(int64_t)); break;
997 } else BODY(int64_t, REAL, int64_t, val, td[i]=cval)
998 } else BODY(double, REAL, int64_t, R_FINITE(val) ? val : NA_INTEGER64, td[i]=cval)
999 default: COERCE_ERROR("integer64");
1000 }
1001 } else {
1002 double *td = REAL(target) + off;
1003 switch (TYPEOF(source)) {
1004 case RAWSXP: BODY(Rbyte, RAW, double, (double)val, td[i]=cval)
1005 case LGLSXP: // same as INTSXP
1006 case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER ? NA_REAL : val, td[i]=cval)
1007 case REALSXP:
1008 if (!sourceIsI64) {
1009 if (mc) {
1010 memcpy(td, (double *)REAL(source), slen*sizeof(double)); break;
1011 } else BODY(double, REAL, double, val, td[i]=cval)
1012 } else BODY(int64_t, REAL, double, val==NA_INTEGER64 ? NA_REAL : val, td[i]=cval)
1013 default: COERCE_ERROR("double");
1014 }
1015 }
1016 } break;
1017 case CPLXSXP: {
1018 Rcomplex *td = COMPLEX(target) + off;
1019 double im = 0.0;
1020 switch (TYPEOF(source)) {
1021 case RAWSXP: BODY(Rbyte, RAW, double, (im=0.0,val), td[i].r=cval;td[i].i=im)
1022 case LGLSXP: // same as INTSXP
1023 case INTSXP: BODY(int, INTEGER, double, val==NA_INTEGER?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im)
1024 case REALSXP:
1025 if (sourceIsI64)
1026 BODY(int64_t, REAL, double, val==NA_INTEGER64?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im)
1027 else BODY(double, REAL, double, ISNAN(val)?(im=NA_REAL,NA_REAL):(im=0.0,val), td[i].r=cval;td[i].i=im)
1028 case CPLXSXP:
1029 if (mc) {
1030 memcpy(td, COMPLEX(source), slen*sizeof(Rcomplex)); break;
1031 } else BODY(Rcomplex, COMPLEX, Rcomplex, val, td[i]=cval)
1032 default: COERCE_ERROR("complex");
1033 }
1034 } break;
1035 case STRSXP :
1036 if (sourceIsFactor) {
1037 const SEXP *ld = STRING_PTR(PROTECT(getAttrib(source, R_LevelsSymbol))); protecti++;
1038 BODY(int, INTEGER, SEXP, val==NA_INTEGER ? NA_STRING : ld[val-1], SET_STRING_ELT(target, off+i, cval))
1039 } else {
1040 if (!isString(source)) {
1041 if (allNA(source, true)) { // saves common coercion of NA (logical) to NA_character_
1042 // ^^ =errorForBadType; if type list, that was already an error earlier so we
1043 // want to be strict now otherwise list would get to coerceVector below
1044 if (length(where)) {
1045 for (int i=0; i<len; ++i) if (wd[i]>0) SET_STRING_ELT(target, wd[i]-1, NA_STRING);
1046 } else {
1047 for (int i=0; i<len; ++i) SET_STRING_ELT(target, start+i, NA_STRING);
1048 }
1049 break;
1050 }
1051 if (sourceIsI64)
1052 error(_("To assign integer64 to a character column, please use as.character() for clarity."));
1053 source = PROTECT(coerceVector(source, STRSXP)); protecti++;
1054 }
1055 BODY(SEXP, STRING_PTR, SEXP, val, SET_STRING_ELT(target, off+i, cval))
1056 }
1057 case VECSXP :
1058 case EXPRSXP : // #546
1059 if (TYPEOF(source)!=VECSXP && TYPEOF(source)!=EXPRSXP)
1060 BODY(SEXP, &, SEXP, val, SET_VECTOR_ELT(target, off+i, cval))
1061 else
1062 BODY(SEXP, SEXPPTR_RO, SEXP, val, SET_VECTOR_ELT(target, off+i, cval))
1063 default :
1064 error(_("Unsupported column type in assign.c:memrecycle '%s'"), type2char(TYPEOF(target))); // # nocov
1065 }
1066 UNPROTECT(protecti);
1067 return memrecycle_message[0] ? memrecycle_message : NULL;
1068 }
1069
writeNA(SEXP v,const int from,const int n)1070 void writeNA(SEXP v, const int from, const int n)
1071 // e.g. for use after allocVector() which does not initialize its result.
1072 {
1073 const int to = from-1+n; // writing to position 2147483647 in mind, 'i<=to' in loop conditions
1074 switch(TYPEOF(v)) {
1075 case RAWSXP:
1076 memset(RAW(v)+from, 0, n*sizeof(Rbyte));
1077 break;
1078 case LGLSXP: {
1079 Rboolean *vd = (Rboolean *)LOGICAL(v);
1080 for (int i=from; i<=to; ++i) vd[i] = NA_LOGICAL;
1081 } break;
1082 case INTSXP: {
1083 // same whether factor or not
1084 int *vd = INTEGER(v);
1085 for (int i=from; i<=to; ++i) vd[i] = NA_INTEGER;
1086 } break;
1087 case REALSXP: {
1088 if (Rinherits(v, char_integer64)) { // Rinherits covers nanotime too which inherits from integer64 via S4 extends
1089 int64_t *vd = (int64_t *)REAL(v);
1090 for (int i=from; i<=to; ++i) vd[i] = NA_INTEGER64;
1091 } else {
1092 double *vd = REAL(v);
1093 for (int i=from; i<=to; ++i) vd[i] = NA_REAL;
1094 }
1095 } break;
1096 case CPLXSXP: {
1097 Rcomplex *vd = COMPLEX(v);
1098 for (int i=from; i<=to; ++i) vd[i] = NA_CPLX;
1099 } break;
1100 case STRSXP:
1101 // character columns are initialized with blank string (""). So replace the all-"" with all-NA_character_
1102 // Since "" and NA_character_ are global constants in R, it should be ok to not use SET_STRING_ELT here. But use it anyway for safety (revisit if proved slow)
1103 // If there's ever a way added to R API to pass NA_STRING to allocVector() to tell it to initialize with NA not "", would be great
1104 for (int i=from; i<=to; ++i) SET_STRING_ELT(v, i, NA_STRING);
1105 break;
1106 case VECSXP: case EXPRSXP :
1107 // although allocVector already initializes to R_NilValue, we use writeNA() in other places too, so we shouldn't skip this assign
1108 for (int i=from; i<=to; ++i) SET_VECTOR_ELT(v, i, R_NilValue);
1109 break;
1110 default :
1111 error(_("Internal error: writeNA passed a vector of type '%s'"), type2char(TYPEOF(v))); // # nocov
1112 }
1113 }
1114
allocNAVector(SEXPTYPE type,R_len_t n)1115 SEXP allocNAVector(SEXPTYPE type, R_len_t n)
1116 {
1117 // an allocVector following with initialization to NA since a subassign to a new column using :=
1118 // routinely leaves untouched items (rather than 0 or "" as allocVector does with its memset)
1119 // We guess that author of allocVector would have liked to initialize with NA but was prevented since memset
1120 // is restricted to one byte.
1121 SEXP v = PROTECT(allocVector(type, n));
1122 writeNA(v, 0, n);
1123 UNPROTECT(1);
1124 return(v);
1125 }
1126
allocNAVectorLike(SEXP x,R_len_t n)1127 SEXP allocNAVectorLike(SEXP x, R_len_t n) {
1128 // writeNA needs the attribute retained to write NA_INTEGER64, #3723
1129 // TODO: remove allocNAVector above when usage in fastmean.c, fcast.c and fmelt.c can be adjusted; see comments in PR3724
1130 SEXP v = PROTECT(allocVector(TYPEOF(x), n));
1131 copyMostAttrib(x, v);
1132 writeNA(v, 0, n);
1133 UNPROTECT(1);
1134 return(v);
1135 }
1136
1137 static SEXP *saveds=NULL;
1138 static R_len_t *savedtl=NULL, nalloc=0, nsaved=0;
1139
savetl_init()1140 void savetl_init() {
1141 if (nsaved || nalloc || saveds || savedtl) {
1142 error(_("Internal error: savetl_init checks failed (%d %d %p %p). please report to data.table issue tracker."), nsaved, nalloc, saveds, savedtl); // # nocov
1143 }
1144 nsaved = 0;
1145 nalloc = 100;
1146 saveds = (SEXP *)malloc(nalloc * sizeof(SEXP));
1147 savedtl = (R_len_t *)malloc(nalloc * sizeof(R_len_t));
1148 if (saveds==NULL || savedtl==NULL) {
1149 savetl_end(); // # nocov
1150 error(_("Failed to allocate initial %d items in savetl_init"), nalloc); // # nocov
1151 }
1152 }
1153
savetl(SEXP s)1154 void savetl(SEXP s)
1155 {
1156 if (nsaved==nalloc) {
1157 if (nalloc==INT_MAX) {
1158 savetl_end(); // # nocov
1159 error(_("Internal error: reached maximum %d items for savetl. Please report to data.table issue tracker."), nalloc); // # nocov
1160 }
1161 nalloc = nalloc>(INT_MAX/2) ? INT_MAX : nalloc*2;
1162 char *tmp = (char *)realloc(saveds, nalloc*sizeof(SEXP));
1163 if (tmp==NULL) {
1164 // C spec states that if realloc() fails the original block is left untouched; it is not freed or moved. We rely on that here.
1165 savetl_end(); // # nocov free(saveds) happens inside savetl_end
1166 error(_("Failed to realloc saveds to %d items in savetl"), nalloc); // # nocov
1167 }
1168 saveds = (SEXP *)tmp;
1169 tmp = (char *)realloc(savedtl, nalloc*sizeof(R_len_t));
1170 if (tmp==NULL) {
1171 savetl_end(); // # nocov
1172 error(_("Failed to realloc savedtl to %d items in savetl"), nalloc); // # nocov
1173 }
1174 savedtl = (R_len_t *)tmp;
1175 }
1176 saveds[nsaved] = s;
1177 savedtl[nsaved] = TRUELENGTH(s);
1178 nsaved++;
1179 }
1180
savetl_end()1181 void savetl_end() {
1182 // Can get called if nothing has been saved yet (nsaved==0), or even if _init() hasn't been called yet (pointers NULL). Such
1183 // as to clear up before error. Also, it might be that nothing needed to be saved anyway.
1184 for (int i=0; i<nsaved; i++) SET_TRUELENGTH(saveds[i],savedtl[i]);
1185 free(saveds); // possible free(NULL) which is safe no-op
1186 saveds = NULL;
1187 free(savedtl);
1188 savedtl = NULL;
1189 nsaved = nalloc = 0;
1190 }
1191
setcharvec(SEXP x,SEXP which,SEXP newx)1192 SEXP setcharvec(SEXP x, SEXP which, SEXP newx)
1193 {
1194 int w;
1195 if (!isString(x)) error(_("x must be a character vector"));
1196 if (!isInteger(which)) error(_("'which' must be an integer vector"));
1197 if (!isString(newx)) error(_("'new' must be a character vector"));
1198 if (LENGTH(newx)!=LENGTH(which)) error(_("'new' is length %d. Should be the same as length of 'which' (%d)"),LENGTH(newx),LENGTH(which));
1199 for (int i=0; i<LENGTH(which); i++) {
1200 w = INTEGER(which)[i];
1201 if (w==NA_INTEGER || w<1 || w>LENGTH(x)) error(_("Item %d of 'which' is %d which is outside range of the length %d character vector"), i+1,w,LENGTH(x));
1202 SET_STRING_ELT(x, w-1, STRING_ELT(newx, i));
1203 }
1204 return R_NilValue;
1205 }
1206
1207