1 #define R_NO_REMAP
2 #include <R.h>
3 #include <Rinternals.h>
4 #include <stdbool.h>
5 #include <string.h>
6 #include "backports.h"
7 #include "coerce.h"
8 #include "conditions.h"
9 
10 static int check_input_lengths(int n, SEXP index, int i, bool strict);
11 static int check_double_index_finiteness(double val, SEXP index, int i, bool strict);
12 static int check_double_index_length(double val, int n, int i, bool strict);
13 static int check_character_index(SEXP string, int i, bool strict);
14 static int check_names(SEXP names, int i, bool strict);
15 static int check_offset(int offset, SEXP index_i, bool strict);
16 static int check_unbound_value(SEXP val, SEXP index_i, bool strict);
17 static int check_s4_slot(SEXP val, SEXP index_i, bool strict);
18 static int check_obj_length(SEXP n, bool strict);
19 
20 int obj_length(SEXP x, bool strict);
21 SEXP obj_names(SEXP x, bool strict);
22 
23 
24 // S3 objects must implement a `length()` method in the case of a
25 // numeric index and a `names()` method for the character case
find_offset(SEXP x,SEXP index,int i,bool strict)26 int find_offset(SEXP x, SEXP index, int i, bool strict) {
27   int n = obj_length(x, strict);
28   if (n < 0) {
29     return -1;
30   }
31 
32   if (check_input_lengths(n, index, i, strict)) {
33     return -1;
34   }
35 
36   switch (TYPEOF(index)) {
37   case INTSXP:
38   case REALSXP: {
39     int n_protect = 0;
40 
41     double val;
42     if (TYPEOF(index) == INTSXP) {
43       // Coerce instead of cast to standardise missing value
44       index = PROTECT(Rf_coerceVector(index, REALSXP));
45       ++n_protect;
46     }
47     val = REAL(index)[0];
48 
49     if (check_double_index_finiteness(val, index, i, strict)) {
50       goto numeric_index_error;
51     }
52 
53     --val;
54     if (check_double_index_length(val, n, i, strict)) {
55       goto numeric_index_error;
56     }
57 
58     UNPROTECT(n_protect);
59     return val;
60 
61    numeric_index_error:
62     UNPROTECT(n_protect);
63     return -1;
64   }
65 
66   case STRSXP: {
67     // Protection is needed because names could be generated in the S3 case
68     SEXP names = PROTECT(obj_names(x, strict));
69     if (check_names(names, i, strict)) {
70       UNPROTECT(1);
71       return -1;
72     }
73 
74     SEXP string = STRING_ELT(index, 0);
75     if (check_character_index(string, i, strict)) {
76       UNPROTECT(1);
77       return -1;
78     }
79 
80     const char* val = Rf_translateCharUTF8(string);
81     int n_names = Rf_length(names);
82 
83     for (int j = 0; j < n_names; ++j) {
84       if (STRING_ELT(names, j) == NA_STRING) {
85         continue;
86       }
87 
88       const char* names_j = Rf_translateCharUTF8(STRING_ELT(names, j));
89       if (strcmp(names_j, val) == 0) {
90         UNPROTECT(1);
91         return j;
92       }
93 
94     }
95     if (strict) {
96       Rf_errorcall(R_NilValue, "Can't find name `%s` in vector", val);
97     } else {
98       UNPROTECT(1);
99       return -1;
100     }
101   }
102 
103   default:
104     stop_bad_element_type(x, i + 1, "a character or numeric vector", "Index", NULL);
105   }
106 }
107 
extract_vector(SEXP x,SEXP index_i,int i,bool strict)108 SEXP extract_vector(SEXP x, SEXP index_i, int i, bool strict) {
109   int offset = find_offset(x, index_i, i, strict);
110   if (check_offset(offset, index_i, strict)) {
111     return R_NilValue;
112   }
113 
114   if (OBJECT(x)) {
115     // We check `index_i` with `check_offset()` but pass the original
116     // index rather than an offset in order to support unordered
117     // vector classes
118     SEXP extract_call = PROTECT(Rf_lang3(Rf_install("[["), x, index_i));
119     SEXP out = Rf_eval(extract_call, R_GlobalEnv);
120     UNPROTECT(1);
121     return out;
122   }
123 
124   switch (TYPEOF(x)) {
125   case LGLSXP:  return Rf_ScalarLogical(LOGICAL(x)[offset]);
126   case INTSXP:  return Rf_ScalarInteger(INTEGER(x)[offset]);
127   case REALSXP: return Rf_ScalarReal(REAL(x)[offset]);
128   case STRSXP:  return Rf_ScalarString(STRING_ELT(x, offset));
129   case VECSXP:  return VECTOR_ELT(x, offset);
130   case RAWSXP:  return Rf_ScalarRaw(RAW(x)[offset]) ;
131   default:
132     Rf_errorcall(R_NilValue,
133       "Don't know how to index object of type %s at level %d",
134       Rf_type2char(TYPEOF(x)), i + 1
135     );
136   }
137 
138   return R_NilValue;
139 }
140 
extract_env(SEXP x,SEXP index_i,int i,bool strict)141 SEXP extract_env(SEXP x, SEXP index_i, int i, bool strict) {
142   if (TYPEOF(index_i) != STRSXP || Rf_length(index_i) != 1) {
143     SEXP ptype = PROTECT(Rf_allocVector(STRSXP, 0));
144     stop_bad_element_vector(index_i, i + 1, ptype, 1, "Index", NULL, false);
145   }
146 
147   SEXP index = STRING_ELT(index_i, 0);
148   if (check_character_index(index, i, strict)) {
149     return R_NilValue;
150   }
151 
152   SEXP sym = Rf_installChar(index);
153   SEXP out = Rf_findVarInFrame3(x, sym, TRUE);
154 
155   if (check_unbound_value(out, index_i, strict)) {
156     return R_NilValue;
157   }
158 
159   return out;
160 }
161 
extract_s4(SEXP x,SEXP index_i,int i,bool strict)162 SEXP extract_s4(SEXP x, SEXP index_i, int i, bool strict) {
163   if (TYPEOF(index_i) != STRSXP || Rf_length(index_i) != 1) {
164     SEXP ptype = PROTECT(Rf_allocVector(STRSXP, 0));
165     stop_bad_element_vector(index_i, i + 1, ptype, 1, "Index", NULL, false);
166   }
167 
168   SEXP index = STRING_ELT(index_i, 0);
169   if (check_character_index(index, i, strict)) {
170     return R_NilValue;
171   }
172 
173   if (check_s4_slot(x, index_i, strict)) {
174     return R_NilValue;
175   }
176 
177   SEXP sym = Rf_installChar(index);
178   return Rf_getAttrib(x, sym);
179 }
180 
extract_fn(SEXP x,SEXP clo)181 SEXP extract_fn(SEXP x, SEXP clo) {
182   SEXP expr = PROTECT(Rf_lang2(clo, x));
183   SEXP out = Rf_eval(expr, R_GlobalEnv);
184 
185   UNPROTECT(1);
186   return out;
187 }
is_function(SEXP x)188 static bool is_function(SEXP x) {
189   switch (TYPEOF(x)) {
190   case CLOSXP:
191   case BUILTINSXP:
192   case SPECIALSXP:
193     return true;
194   default:
195     return false;
196   }
197 }
198 
pluck_impl(SEXP x,SEXP index,SEXP missing,SEXP strict_arg)199 SEXP pluck_impl(SEXP x, SEXP index, SEXP missing, SEXP strict_arg) {
200   if (TYPEOF(index) != VECSXP) {
201     stop_bad_type(index, "a list", NULL, "where");
202   }
203 
204   PROTECT_INDEX idx;
205   PROTECT_WITH_INDEX(x, &idx);
206 
207   int n = Rf_length(index);
208   bool strict = Rf_asLogical(strict_arg);
209 
210   for (int i = 0; i < n; ++i) {
211     SEXP index_i = VECTOR_ELT(index, i);
212 
213     if (is_function(index_i)) {
214       x = extract_fn(x, index_i);
215       REPROTECT(x, idx);
216       continue;
217     }
218     // Assume all S3 objects implement the vector interface
219     if (OBJECT(x) && TYPEOF(x) != S4SXP) {
220       x = extract_vector(x, index_i, i, strict);
221       REPROTECT(x, idx);
222       continue;
223     }
224 
225     switch (TYPEOF(x)) {
226     case NILSXP:
227       if (strict) {
228         Rf_errorcall(R_NilValue, "Plucked object can't be NULL");
229       }
230       // Leave the indexing loop early
231       goto end;
232     case LGLSXP:
233     case INTSXP:
234     case REALSXP:
235     case CPLXSXP:
236     case STRSXP:
237     case RAWSXP:
238     case VECSXP:
239     case EXPRSXP:
240       x = extract_vector(x, index_i, i, strict);
241       REPROTECT(x, idx);
242       break;
243     case ENVSXP:
244       x = extract_env(x, index_i, i, strict);
245       REPROTECT(x, idx);
246       break;
247     case S4SXP:
248       x = extract_s4(x, index_i, i, strict);
249       REPROTECT(x, idx);
250       break;
251     default:
252       Rf_errorcall(R_NilValue, "Can't pluck from a %s", Rf_type2char(TYPEOF(x)));
253     }
254 
255   }
256 
257  end:
258   UNPROTECT(1);
259   return (Rf_length(x) == 0) ? missing : x;
260 }
261 
262 
263 /* Type checking */
264 
check_input_lengths(int n,SEXP index,int i,bool strict)265 static int check_input_lengths(int n, SEXP index, int i, bool strict) {
266   int index_n = Rf_length(index);
267 
268   if (n == 0) {
269     if (strict) {
270       Rf_errorcall(R_NilValue, "Plucked object must have at least one element");
271     } else {
272       return -1;
273     }
274   }
275 
276   if (index_n > 1 || (strict && index_n == 0)) {
277     stop_bad_element_length(index, i + 1, 1, "Index", NULL, false);
278   }
279 
280   return 0;
281 }
282 
check_double_index_finiteness(double val,SEXP index,int i,bool strict)283 static int check_double_index_finiteness(double val, SEXP index, int i, bool strict) {
284   if (R_finite(val)) {
285     return 0;
286   }
287 
288   if (strict) {
289     Rf_errorcall(R_NilValue,
290                  "Index %d must be finite, not %s",
291                  i + 1,
292                  Rf_translateCharUTF8(Rf_asChar(index)));
293   } else {
294     return -1;
295   }
296 }
297 
check_double_index_length(double val,int n,int i,bool strict)298 static int check_double_index_length(double val, int n, int i, bool strict) {
299   if (val < 0) {
300     if (strict) {
301       Rf_errorcall(R_NilValue,
302                    "Index %d must be greater than 0, not %.0f",
303                    i + 1,
304                    val + 1);
305     } else {
306       return -1;
307     }
308   } else if (val >= n) {
309     if (strict) {
310       Rf_errorcall(R_NilValue,
311                    "Index %d exceeds the length of plucked object (%.0f > %d)",
312                    i + 1,
313                    val + 1,
314                    n);
315     } else {
316       return -1;
317     }
318   }
319 
320   return 0;
321 }
322 
check_character_index(SEXP string,int i,bool strict)323 static int check_character_index(SEXP string, int i, bool strict) {
324   if (string == NA_STRING) {
325     if (strict) {
326       Rf_errorcall(R_NilValue, "Index %d can't be NA", i + 1);
327     } else {
328       return -1;
329     }
330   }
331 
332   // "" matches nothing
333   const char* val = CHAR(string);
334   if (val[0] == '\0') {
335     if (strict) {
336       Rf_errorcall(R_NilValue, "Index %d can't be an empty string (\"\")", i + 1);
337     } else {
338       return -1;
339     }
340   }
341 
342   return 0;
343 }
344 
check_names(SEXP names,int i,bool strict)345 static int check_names(SEXP names, int i, bool strict) {
346   if (TYPEOF(names) == STRSXP) {
347     return 0;
348   }
349 
350   if (strict) {
351     Rf_errorcall(R_NilValue, "Index %d is attempting to pluck from an unnamed vector using a string name", i + 1);
352   } else {
353     return -1;
354   }
355 }
356 
check_offset(int offset,SEXP index_i,bool strict)357 static int check_offset(int offset, SEXP index_i, bool strict) {
358   if (offset >= 0) {
359     return 0;
360   }
361 
362   if (strict) {
363     Rf_errorcall(R_NilValue,
364                  "Can't find index `%s` in vector",
365                  Rf_translateCharUTF8(Rf_asChar(index_i)));
366   } else {
367     return -1;
368   }
369 }
370 
check_unbound_value(SEXP val,SEXP index_i,bool strict)371 static int check_unbound_value(SEXP val, SEXP index_i, bool strict) {
372   if (val != R_UnboundValue) {
373     return 0;
374   }
375 
376   if (strict) {
377     Rf_errorcall(R_NilValue,
378                  "Can't find object `%s` in environment",
379                  Rf_translateCharUTF8(Rf_asChar(index_i)));
380   } else {
381     return -1;
382   }
383 }
384 
check_s4_slot(SEXP val,SEXP index_i,bool strict)385 static int check_s4_slot(SEXP val, SEXP index_i, bool strict) {
386   if (R_has_slot(val, index_i)) {
387     return 0;
388   }
389 
390   if (strict) {
391     Rf_errorcall(R_NilValue,
392                  "Can't find slot `%s`.",
393                  Rf_translateCharUTF8(Rf_asChar(index_i)));
394   } else {
395     return -1;
396   }
397 }
398 
check_obj_length(SEXP n,bool strict)399 static int check_obj_length(SEXP n, bool strict) {
400   if (TYPEOF(n) != INTSXP || Rf_length(n) != 1) {
401     if (strict) {
402       Rf_errorcall(R_NilValue, "Length of S3 object must be a scalar integer");
403     } else {
404       return -1;
405     }
406   }
407 
408   return 0;
409 }
410 
411 
obj_length(SEXP x,bool strict)412 int obj_length(SEXP x, bool strict) {
413   if (!OBJECT(x)) {
414     return Rf_length(x);
415   }
416 
417   SEXP length_call = PROTECT(Rf_lang2(Rf_install("length"), x));
418   SEXP n = PROTECT(Rf_eval(length_call, R_GlobalEnv));
419 
420   if (check_obj_length(n, strict)) {
421     UNPROTECT(2);
422     return -1;
423   }
424 
425   UNPROTECT(2);
426   return INTEGER(n)[0];
427 }
428 
obj_names(SEXP x,bool strict)429 SEXP obj_names(SEXP x, bool strict) {
430   if (!OBJECT(x)) {
431     return Rf_getAttrib(x, R_NamesSymbol);
432   }
433 
434   SEXP names_call = PROTECT(Rf_lang2(Rf_install("names"), x));
435   SEXP names = Rf_eval(names_call, R_GlobalEnv);
436 
437   UNPROTECT(1);
438   return names;
439 }
440