1 // Licensed to the Apache Software Foundation (ASF) under one
2 // or more contributor license agreements.  See the NOTICE file
3 // distributed with this work for additional information
4 // regarding copyright ownership.  The ASF licenses this file
5 // to you under the Apache License, Version 2.0 (the
6 // "License"); you may not use this file except in compliance
7 // with the License.  You may obtain a copy of the License at
8 //
9 //   http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing,
12 // software distributed under the License is distributed on an
13 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14 // KIND, either express or implied.  See the License for the
15 // specific language governing permissions and limitations
16 // under the License.
17 
18 #include "./arrow_types.h"
19 
20 #if defined(ARROW_R_WITH_ARROW)
21 
22 #include <arrow/array.h>
23 #include <arrow/chunked_array.h>
24 #include <arrow/compute/api.h>
25 #include <arrow/util/bitmap_reader.h>
26 
27 #include <cpp11/altrep.hpp>
28 #include <cpp11/declarations.hpp>
29 #if defined(HAS_ALTREP)
30 
31 #if R_VERSION < R_Version(3, 6, 0)
32 
33 // workaround because R's <R_ext/Altrep.h> not so conveniently uses `class`
34 // as a variable name, and C++ is not happy about that
35 //
36 // SEXP R_new_altrep(R_altrep_class_t class, SEXP data1, SEXP data2);
37 //
38 #define class klass
39 
40 // Because functions declared in <R_ext/Altrep.h> have C linkage
41 extern "C" {
42 #include <R_ext/Altrep.h>
43 }
44 
45 // undo the workaround
46 #undef class
47 
48 #else
49 #include <R_ext/Altrep.h>
50 #endif
51 
52 #include "./r_task_group.h"
53 
54 namespace arrow {
55 namespace r {
56 namespace altrep {
57 
58 namespace {
59 template <typename c_type>
60 R_xlen_t Standard_Get_region(SEXP data2, R_xlen_t i, R_xlen_t n, c_type* buf);
61 
62 template <>
Standard_Get_region(SEXP data2,R_xlen_t i,R_xlen_t n,double * buf)63 R_xlen_t Standard_Get_region<double>(SEXP data2, R_xlen_t i, R_xlen_t n, double* buf) {
64   return REAL_GET_REGION(data2, i, n, buf);
65 }
66 
67 template <>
Standard_Get_region(SEXP data2,R_xlen_t i,R_xlen_t n,int * buf)68 R_xlen_t Standard_Get_region<int>(SEXP data2, R_xlen_t i, R_xlen_t n, int* buf) {
69   return INTEGER_GET_REGION(data2, i, n, buf);
70 }
71 
DeleteArray(std::shared_ptr<Array> * ptr)72 void DeleteArray(std::shared_ptr<Array>* ptr) { delete ptr; }
73 using Pointer = cpp11::external_pointer<std::shared_ptr<Array>, DeleteArray>;
74 
75 // the Array that is being wrapped by the altrep object
GetArray(SEXP alt)76 static const std::shared_ptr<Array>& GetArray(SEXP alt) {
77   return *Pointer(R_altrep_data1(alt));
78 }
79 
80 // base class for all altrep vectors
81 //
82 // data1: the Array as an external pointer.
83 // data2: starts as NULL, and becomes a standard R vector with the same
84 //        data if necessary: if materialization is needed, e.g. if we need
85 //        to access its data pointer, with DATAPTR().
86 template <typename Impl>
87 struct AltrepVectorBase {
88   // store the Array as an external pointer in data1, mark as immutable
Makearrow::r::altrep::__anone6ab5c950111::AltrepVectorBase89   static SEXP Make(const std::shared_ptr<Array>& array) {
90     SEXP alt = R_new_altrep(Impl::class_t, Pointer(new std::shared_ptr<Array>(array)),
91                             R_NilValue);
92     MARK_NOT_MUTABLE(alt);
93 
94     return alt;
95   }
96 
97   // Is the vector materialized, i.e. does the data2 slot contain a
98   // standard R vector with the same data as the array.
IsMaterializedarrow::r::altrep::__anone6ab5c950111::AltrepVectorBase99   static bool IsMaterialized(SEXP alt) { return !Rf_isNull(R_altrep_data2(alt)); }
100 
Lengtharrow::r::altrep::__anone6ab5c950111::AltrepVectorBase101   static R_xlen_t Length(SEXP alt) { return GetArray(alt)->length(); }
102 
No_NAarrow::r::altrep::__anone6ab5c950111::AltrepVectorBase103   static int No_NA(SEXP alt) { return GetArray(alt)->null_count() == 0; }
104 
Is_sortedarrow::r::altrep::__anone6ab5c950111::AltrepVectorBase105   static int Is_sorted(SEXP alt) { return UNKNOWN_SORTEDNESS; }
106 
107   // What gets printed on .Internal(inspect(<the altrep object>))
Inspectarrow::r::altrep::__anone6ab5c950111::AltrepVectorBase108   static Rboolean Inspect(SEXP alt, int pre, int deep, int pvec,
109                           void (*inspect_subtree)(SEXP, int, int, int)) {
110     const auto& array = GetArray(alt);
111     Rprintf("arrow::Array<%s, %d nulls> len=%d, Array=<%p>\n",
112             array->type()->ToString().c_str(), array->null_count(), array->length(),
113             array.get());
114     return TRUE;
115   }
116 
117   // Duplication is done by first materializing the vector and
118   // then make a lazy duplicate of data2
Duplicatearrow::r::altrep::__anone6ab5c950111::AltrepVectorBase119   static SEXP Duplicate(SEXP alt, Rboolean /* deep */) {
120     return Rf_lazy_duplicate(Impl::Materialize(alt));
121   }
122 
Coercearrow::r::altrep::__anone6ab5c950111::AltrepVectorBase123   static SEXP Coerce(SEXP alt, int type) {
124     return Rf_coerceVector(Impl::Materialize(alt), type);
125   }
126 
Serialized_statearrow::r::altrep::__anone6ab5c950111::AltrepVectorBase127   static SEXP Serialized_state(SEXP alt) { return Impl::Materialize(alt); }
128 
Unserializearrow::r::altrep::__anone6ab5c950111::AltrepVectorBase129   static SEXP Unserialize(SEXP /* class_ */, SEXP state) { return state; }
130 };
131 
132 // altrep R vector shadowing an primitive (int or double) Array.
133 //
134 // This tries as much as possible to directly use the data
135 // from the Array and minimize data copies.
136 template <int sexp_type>
137 struct AltrepVectorPrimitive : public AltrepVectorBase<AltrepVectorPrimitive<sexp_type>> {
138   using Base = AltrepVectorBase<AltrepVectorPrimitive<sexp_type>>;
139 
140   // singleton altrep class description
141   static R_altrep_class_t class_t;
142 
143   using c_type = typename std::conditional<sexp_type == REALSXP, double, int>::type;
144 
145   // Force materialization. After calling this, the data2 slot of the altrep
146   // object contains a standard R vector with the same data, with
147   // R sentinels where the Array has nulls.
148   //
149   // The Array remains available so that it can be used by Length(), Min(), etc ...
Materializearrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive150   static SEXP Materialize(SEXP alt) {
151     if (!Base::IsMaterialized(alt)) {
152       auto size = Base::Length(alt);
153 
154       // create a standard R vector
155       SEXP copy = PROTECT(Rf_allocVector(sexp_type, size));
156 
157       // copy the data from the array, through Get_region
158       Get_region(alt, 0, size, reinterpret_cast<c_type*>(DATAPTR(copy)));
159 
160       // store as data2, this is now considered materialized
161       R_set_altrep_data2(alt, copy);
162       MARK_NOT_MUTABLE(copy);
163 
164       UNPROTECT(1);
165     }
166     return R_altrep_data2(alt);
167   }
168 
169   // R calls this to get a pointer to the start of the vector data
170   // but only if this is possible without allocating (in the R sense).
Dataptr_or_nullarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive171   static const void* Dataptr_or_null(SEXP alt) {
172     // data2 has been created, and so the R sentinels are in place where the array has
173     // nulls
174     if (Base::IsMaterialized(alt)) {
175       return DATAPTR_RO(R_altrep_data2(alt));
176     }
177 
178     // the Array has no nulls, we can directly return the start of its data
179     const auto& array = GetArray(alt);
180     if (array->null_count() == 0) {
181       return reinterpret_cast<const void*>(array->data()->template GetValues<c_type>(1));
182     }
183 
184     // Otherwise: if the array has nulls and data2 has not been generated: give up
185     return nullptr;
186   }
187 
188   // R calls this to get a pointer to the start of the data, R allocations are allowed.
Dataptrarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive189   static void* Dataptr(SEXP alt, Rboolean writeable) {
190     // If the object hasn't been materialized, and the array has no
191     // nulls we can directly point to the array data.
192     if (!Base::IsMaterialized(alt)) {
193       const auto& array = GetArray(alt);
194 
195       if (array->null_count() == 0) {
196         return reinterpret_cast<void*>(
197             const_cast<c_type*>(array->data()->template GetValues<c_type>(1)));
198       }
199     }
200 
201     // Otherwise we have to materialize and hand the pointer to data2
202     //
203     // NOTE: this returns the DATAPTR() of data2 even in the case writeable = TRUE
204     //
205     // which is risky because C(++) clients of this object might
206     // modify data2, and therefore make it diverge from the data of the Array,
207     // but the object was marked as immutable on creation, so doing this is
208     // disregarding the R api.
209     //
210     // Simply stop() when `writeable = TRUE` is too strong, e.g. this fails
211     // identical() which calls DATAPTR() even though DATAPTR_RO() would
212     // be enough
213     return DATAPTR(Materialize(alt));
214   }
215 
216   // The value at position i
Eltarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive217   static c_type Elt(SEXP alt, R_xlen_t i) {
218     const auto& array = GetArray(alt);
219     return array->IsNull(i) ? cpp11::na<c_type>()
220                             : array->data()->template GetValues<c_type>(1)[i];
221   }
222 
223   // R calls this when it wants data from position `i` to `i + n` copied into `buf`
224   // The returned value is the number of values that were really copied
225   // (this can be lower than n)
Get_regionarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive226   static R_xlen_t Get_region(SEXP alt, R_xlen_t i, R_xlen_t n, c_type* buf) {
227     // If we have data2, we can just copy the region into buf
228     // using the standard Get_region for this R type
229     if (Base::IsMaterialized(alt)) {
230       return Standard_Get_region<c_type>(R_altrep_data2(alt), i, n, buf);
231     }
232 
233     // The vector was not materialized, aka we don't have data2
234     //
235     // In that case, we copy the data from the Array, and then
236     // do a second pass to force the R sentinels for where the
237     // array has nulls
238     //
239     // This only materialize the region, into buf. Not the entire vector.
240     auto slice = GetArray(alt)->Slice(i, n);
241     R_xlen_t ncopy = slice->length();
242 
243     // first copy the data buffer
244     memcpy(buf, slice->data()->template GetValues<c_type>(1), ncopy * sizeof(c_type));
245 
246     // then set the R NA sentinels if needed
247     if (slice->null_count() > 0) {
248       internal::BitmapReader bitmap_reader(slice->null_bitmap()->data(), slice->offset(),
249                                            ncopy);
250 
251       for (R_xlen_t j = 0; j < ncopy; j++, bitmap_reader.Next()) {
252         if (bitmap_reader.IsNotSet()) {
253           buf[j] = cpp11::na<c_type>();
254         }
255       }
256     }
257 
258     return ncopy;
259   }
260 
NaRmOptionsarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive261   static std::shared_ptr<arrow::compute::ScalarAggregateOptions> NaRmOptions(
262       const std::shared_ptr<Array>& array, bool na_rm) {
263     auto options = std::make_shared<arrow::compute::ScalarAggregateOptions>(
264         arrow::compute::ScalarAggregateOptions::Defaults());
265     options->min_count = 0;
266     options->skip_nulls = na_rm;
267     return options;
268   }
269 
270   template <bool Min>
MinMaxarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive271   static SEXP MinMax(SEXP alt, Rboolean narm) {
272     using data_type = typename std::conditional<sexp_type == REALSXP, double, int>::type;
273     using scalar_type =
274         typename std::conditional<sexp_type == INTSXP, Int32Scalar, DoubleScalar>::type;
275 
276     const auto& array = GetArray(alt);
277     bool na_rm = narm == TRUE;
278     auto n = array->length();
279     auto null_count = array->null_count();
280     if ((na_rm || n == 0) && null_count == n) {
281       return Rf_ScalarReal(Min ? R_PosInf : R_NegInf);
282     }
283     if (!na_rm && null_count > 0) {
284       return cpp11::as_sexp(cpp11::na<data_type>());
285     }
286 
287     auto options = NaRmOptions(array, na_rm);
288 
289     const auto& minmax =
290         ValueOrStop(arrow::compute::CallFunction("min_max", {array}, options.get()));
291     const auto& minmax_scalar =
292         internal::checked_cast<const StructScalar&>(*minmax.scalar());
293 
294     const auto& result_scalar = internal::checked_cast<const scalar_type&>(
295         *ValueOrStop(minmax_scalar.field(Min ? "min" : "max")));
296     return cpp11::as_sexp(result_scalar.value);
297   }
298 
Minarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive299   static SEXP Min(SEXP alt, Rboolean narm) { return MinMax<true>(alt, narm); }
300 
Maxarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive301   static SEXP Max(SEXP alt, Rboolean narm) { return MinMax<false>(alt, narm); }
302 
Sumarrow::r::altrep::__anone6ab5c950111::AltrepVectorPrimitive303   static SEXP Sum(SEXP alt, Rboolean narm) {
304     using data_type = typename std::conditional<sexp_type == REALSXP, double, int>::type;
305 
306     const auto& array = GetArray(alt);
307     bool na_rm = narm == TRUE;
308     auto null_count = array->null_count();
309 
310     if (!na_rm && null_count > 0) {
311       return cpp11::as_sexp(cpp11::na<data_type>());
312     }
313     auto options = NaRmOptions(array, na_rm);
314 
315     const auto& sum =
316         ValueOrStop(arrow::compute::CallFunction("sum", {array}, options.get()));
317 
318     if (sexp_type == INTSXP) {
319       // When calling the "sum" function on an int32 array, we get an Int64 scalar
320       // in case of overflow, make it a double like R
321       int64_t value = internal::checked_cast<const Int64Scalar&>(*sum.scalar()).value;
322       if (value <= INT32_MIN || value > INT32_MAX) {
323         return Rf_ScalarReal(static_cast<double>(value));
324       } else {
325         return Rf_ScalarInteger(static_cast<int>(value));
326       }
327     } else {
328       return Rf_ScalarReal(
329           internal::checked_cast<const DoubleScalar&>(*sum.scalar()).value);
330     }
331   }
332 };
333 template <int sexp_type>
334 R_altrep_class_t AltrepVectorPrimitive<sexp_type>::class_t;
335 
336 // Implementation for string arrays
337 template <typename Type>
338 struct AltrepVectorString : public AltrepVectorBase<AltrepVectorString<Type>> {
339   using Base = AltrepVectorBase<AltrepVectorString<Type>>;
340 
341   static R_altrep_class_t class_t;
342   using StringArrayType = typename TypeTraits<Type>::ArrayType;
343 
344   // Helper class to convert to R strings
345   struct RStringViewer {
RStringViewerarrow::r::altrep::__anone6ab5c950111::AltrepVectorString::RStringViewer346     explicit RStringViewer(const std::shared_ptr<Array>& array)
347         : array_(array),
348           string_array_(internal::checked_cast<const StringArrayType*>(array.get())),
349           strip_out_nuls_(GetBoolOption("arrow.skip_nul", false)),
350           nul_was_stripped_(false) {}
351 
352     // convert the i'th string of the Array to an R string (CHARSXP)
Convertarrow::r::altrep::__anone6ab5c950111::AltrepVectorString::RStringViewer353     SEXP Convert(size_t i) {
354       if (array_->IsNull(i)) {
355         return NA_STRING;
356       }
357 
358       view_ = string_array_->GetView(i);
359       bool no_nul = std::find(view_.begin(), view_.end(), '\0') == view_.end();
360 
361       if (no_nul) {
362         return Rf_mkCharLenCE(view_.data(), view_.size(), CE_UTF8);
363       } else if (strip_out_nuls_) {
364         return ConvertStripNul();
365       } else {
366         Error();
367 
368         // not reached
369         return R_NilValue;
370       }
371     }
372 
373     // strip the nuls and then convert to R string
ConvertStripNularrow::r::altrep::__anone6ab5c950111::AltrepVectorString::RStringViewer374     SEXP ConvertStripNul() {
375       const char* old_string = view_.data();
376 
377       size_t stripped_len = 0, nul_count = 0;
378 
379       for (size_t i = 0; i < view_.size(); i++) {
380         if (old_string[i] == '\0') {
381           ++nul_count;
382 
383           if (nul_count == 1) {
384             // first nul spotted: allocate stripped string storage
385             stripped_string_.assign(view_.begin(), view_.end());
386             stripped_len = i;
387           }
388 
389           // don't copy old_string[i] (which is \0) into stripped_string
390           continue;
391         }
392 
393         if (nul_count > 0) {
394           stripped_string_[stripped_len++] = old_string[i];
395         }
396       }
397 
398       nul_was_stripped_ = true;
399       return Rf_mkCharLenCE(stripped_string_.data(), stripped_len, CE_UTF8);
400     }
401 
nul_was_strippedarrow::r::altrep::__anone6ab5c950111::AltrepVectorString::RStringViewer402     bool nul_was_stripped() const { return nul_was_stripped_; }
403 
404     // throw R error about embedded nul
Errorarrow::r::altrep::__anone6ab5c950111::AltrepVectorString::RStringViewer405     void Error() {
406       stripped_string_ = "embedded nul in string: '";
407       for (char c : view_) {
408         if (c) {
409           stripped_string_ += c;
410         } else {
411           stripped_string_ += "\\0";
412         }
413       }
414 
415       stripped_string_ +=
416           "'; to strip nuls when converting from Arrow to R, set options(arrow.skip_nul "
417           "= TRUE)";
418 
419       Rf_error(stripped_string_.c_str());
420     }
421 
422     const std::shared_ptr<Array>& array_;
423     const StringArrayType* string_array_;
424     std::string stripped_string_;
425     const bool strip_out_nuls_;
426     bool nul_was_stripped_;
427     util::string_view view_;
428   };
429 
430   // Get a single string, as a CHARSXP SEXP,
431   // either from data2 or directly from the Array
Eltarrow::r::altrep::__anone6ab5c950111::AltrepVectorString432   static SEXP Elt(SEXP alt, R_xlen_t i) {
433     if (Base::IsMaterialized(alt)) {
434       return STRING_ELT(R_altrep_data2(alt), i);
435     }
436 
437     BEGIN_CPP11
438 
439     const auto& array = GetArray(alt);
440     RStringViewer r_string_viewer(array);
441 
442     // r_string_viewer.Convert(i) might jump so it's wrapped
443     // in cpp11::unwind_protect() so that string_viewer
444     // can be properly destructed before the unwinding continues
445     SEXP s = NA_STRING;
446     cpp11::unwind_protect([&]() {
447       s = r_string_viewer.Convert(i);
448       if (r_string_viewer.nul_was_stripped()) {
449         cpp11::warning("Stripping '\\0' (nul) from character vector");
450       }
451     });
452     return s;
453 
454     END_CPP11
455   }
456 
Dataptrarrow::r::altrep::__anone6ab5c950111::AltrepVectorString457   static void* Dataptr(SEXP alt, Rboolean writeable) { return DATAPTR(Materialize(alt)); }
458 
Materializearrow::r::altrep::__anone6ab5c950111::AltrepVectorString459   static SEXP Materialize(SEXP alt) {
460     if (Base::IsMaterialized(alt)) {
461       return R_altrep_data2(alt);
462     }
463 
464     BEGIN_CPP11
465 
466     const auto& array = GetArray(alt);
467     R_xlen_t n = array->length();
468     SEXP data2 = PROTECT(Rf_allocVector(STRSXP, n));
469     MARK_NOT_MUTABLE(data2);
470 
471     RStringViewer r_string_viewer(array);
472 
473     // r_string_viewer.Convert(i) might jump so we have to
474     // wrap it in unwind_protect() to:
475     // - correctly destruct the C++ objects
476     // - resume the unwinding
477     cpp11::unwind_protect([&]() {
478       for (R_xlen_t i = 0; i < n; i++) {
479         SET_STRING_ELT(data2, i, r_string_viewer.Convert(i));
480       }
481 
482       if (r_string_viewer.nul_was_stripped()) {
483         cpp11::warning("Stripping '\\0' (nul) from character vector");
484       }
485     });
486 
487     // only set to data2 if all the values have been converted
488     R_set_altrep_data2(alt, data2);
489     UNPROTECT(1);  // data2
490 
491     return data2;
492 
493     END_CPP11
494   }
495 
Dataptr_or_nullarrow::r::altrep::__anone6ab5c950111::AltrepVectorString496   static const void* Dataptr_or_null(SEXP alt) {
497     if (Base::IsMaterialized(alt)) return DATAPTR(R_altrep_data2(alt));
498 
499     // otherwise give up
500     return nullptr;
501   }
502 
Set_eltarrow::r::altrep::__anone6ab5c950111::AltrepVectorString503   static void Set_elt(SEXP alt, R_xlen_t i, SEXP v) {
504     Rf_error("ALTSTRING objects of type <arrow::array_string_vector> are immutable");
505   }
506 };
507 
508 template <typename Type>
509 R_altrep_class_t AltrepVectorString<Type>::class_t;
510 
511 // initialize altrep, altvec, altreal, and altinteger methods
512 template <typename AltrepClass>
InitAltrepMethods(R_altrep_class_t class_t,DllInfo * dll)513 void InitAltrepMethods(R_altrep_class_t class_t, DllInfo* dll) {
514   R_set_altrep_Length_method(class_t, AltrepClass::Length);
515   R_set_altrep_Inspect_method(class_t, AltrepClass::Inspect);
516   R_set_altrep_Duplicate_method(class_t, AltrepClass::Duplicate);
517   R_set_altrep_Serialized_state_method(class_t, AltrepClass::Serialized_state);
518   R_set_altrep_Unserialize_method(class_t, AltrepClass::Unserialize);
519   R_set_altrep_Coerce_method(class_t, AltrepClass::Coerce);
520 }
521 
522 template <typename AltrepClass>
InitAltvecMethods(R_altrep_class_t class_t,DllInfo * dll)523 void InitAltvecMethods(R_altrep_class_t class_t, DllInfo* dll) {
524   R_set_altvec_Dataptr_method(class_t, AltrepClass::Dataptr);
525   R_set_altvec_Dataptr_or_null_method(class_t, AltrepClass::Dataptr_or_null);
526 }
527 
528 template <typename AltrepClass>
InitAltRealMethods(R_altrep_class_t class_t,DllInfo * dll)529 void InitAltRealMethods(R_altrep_class_t class_t, DllInfo* dll) {
530   R_set_altreal_No_NA_method(class_t, AltrepClass::No_NA);
531   R_set_altreal_Is_sorted_method(class_t, AltrepClass::Is_sorted);
532 
533   R_set_altreal_Sum_method(class_t, AltrepClass::Sum);
534   R_set_altreal_Min_method(class_t, AltrepClass::Min);
535   R_set_altreal_Max_method(class_t, AltrepClass::Max);
536 
537   R_set_altreal_Elt_method(class_t, AltrepClass::Elt);
538   R_set_altreal_Get_region_method(class_t, AltrepClass::Get_region);
539 }
540 
541 template <typename AltrepClass>
InitAltIntegerMethods(R_altrep_class_t class_t,DllInfo * dll)542 void InitAltIntegerMethods(R_altrep_class_t class_t, DllInfo* dll) {
543   R_set_altinteger_No_NA_method(class_t, AltrepClass::No_NA);
544   R_set_altinteger_Is_sorted_method(class_t, AltrepClass::Is_sorted);
545 
546   R_set_altinteger_Sum_method(class_t, AltrepClass::Sum);
547   R_set_altinteger_Min_method(class_t, AltrepClass::Min);
548   R_set_altinteger_Max_method(class_t, AltrepClass::Max);
549 
550   R_set_altinteger_Elt_method(class_t, AltrepClass::Elt);
551   R_set_altinteger_Get_region_method(class_t, AltrepClass::Get_region);
552 }
553 
554 template <typename AltrepClass>
InitAltRealClass(DllInfo * dll,const char * name)555 void InitAltRealClass(DllInfo* dll, const char* name) {
556   AltrepClass::class_t = R_make_altreal_class(name, "arrow", dll);
557   InitAltrepMethods<AltrepClass>(AltrepClass::class_t, dll);
558   InitAltvecMethods<AltrepClass>(AltrepClass::class_t, dll);
559   InitAltRealMethods<AltrepClass>(AltrepClass::class_t, dll);
560 }
561 
562 template <typename AltrepClass>
InitAltIntegerClass(DllInfo * dll,const char * name)563 void InitAltIntegerClass(DllInfo* dll, const char* name) {
564   AltrepClass::class_t = R_make_altinteger_class(name, "arrow", dll);
565   InitAltrepMethods<AltrepClass>(AltrepClass::class_t, dll);
566   InitAltvecMethods<AltrepClass>(AltrepClass::class_t, dll);
567   InitAltIntegerMethods<AltrepClass>(AltrepClass::class_t, dll);
568 }
569 
570 template <typename AltrepClass>
InitAltStringClass(DllInfo * dll,const char * name)571 void InitAltStringClass(DllInfo* dll, const char* name) {
572   AltrepClass::class_t = R_make_altstring_class(name, "arrow", dll);
573   R_set_altrep_Length_method(AltrepClass::class_t, AltrepClass::Length);
574   R_set_altrep_Inspect_method(AltrepClass::class_t, AltrepClass::Inspect);
575   R_set_altrep_Duplicate_method(AltrepClass::class_t, AltrepClass::Duplicate);
576   R_set_altrep_Serialized_state_method(AltrepClass::class_t,
577                                        AltrepClass::Serialized_state);
578   R_set_altrep_Unserialize_method(AltrepClass::class_t, AltrepClass::Unserialize);
579   R_set_altrep_Coerce_method(AltrepClass::class_t, AltrepClass::Coerce);
580 
581   R_set_altvec_Dataptr_method(AltrepClass::class_t, AltrepClass::Dataptr);
582   R_set_altvec_Dataptr_or_null_method(AltrepClass::class_t, AltrepClass::Dataptr_or_null);
583 
584   R_set_altstring_Elt_method(AltrepClass::class_t, AltrepClass::Elt);
585   R_set_altstring_Set_elt_method(AltrepClass::class_t, AltrepClass::Set_elt);
586   R_set_altstring_No_NA_method(AltrepClass::class_t, AltrepClass::No_NA);
587   R_set_altstring_Is_sorted_method(AltrepClass::class_t, AltrepClass::Is_sorted);
588 }
589 
590 }  // namespace
591 
592 // initialize the altrep classes
Init_Altrep_classes(DllInfo * dll)593 void Init_Altrep_classes(DllInfo* dll) {
594   InitAltRealClass<AltrepVectorPrimitive<REALSXP>>(dll, "arrow::array_dbl_vector");
595   InitAltIntegerClass<AltrepVectorPrimitive<INTSXP>>(dll, "arrow::array_int_vector");
596 
597   InitAltStringClass<AltrepVectorString<StringType>>(dll, "arrow::array_string_vector");
598   InitAltStringClass<AltrepVectorString<LargeStringType>>(
599       dll, "arrow::array_large_string_vector");
600 }
601 
602 // return an altrep R vector that shadows the array if possible
MakeAltrepVector(const std::shared_ptr<ChunkedArray> & chunked_array)603 SEXP MakeAltrepVector(const std::shared_ptr<ChunkedArray>& chunked_array) {
604   // special case when there is only one array
605   if (chunked_array->num_chunks() == 1) {
606     const auto& array = chunked_array->chunk(0);
607     // using altrep if
608     // - the arrow.use_altrep is set to TRUE or unset (implicit TRUE)
609     // - the array has at least one element
610     if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0) {
611       switch (array->type()->id()) {
612         case arrow::Type::DOUBLE:
613           return altrep::AltrepVectorPrimitive<REALSXP>::Make(array);
614 
615         case arrow::Type::INT32:
616           return altrep::AltrepVectorPrimitive<INTSXP>::Make(array);
617 
618         case arrow::Type::STRING:
619           return altrep::AltrepVectorString<StringType>::Make(array);
620 
621         case arrow::Type::LARGE_STRING:
622           return altrep::AltrepVectorString<LargeStringType>::Make(array);
623 
624         default:
625           break;
626       }
627     }
628   }
629   return R_NilValue;
630 }
631 
is_arrow_altrep(SEXP x)632 bool is_arrow_altrep(SEXP x) {
633   if (ALTREP(x)) {
634     SEXP info = ALTREP_CLASS_SERIALIZED_CLASS(ALTREP_CLASS(x));
635     SEXP pkg = ALTREP_SERIALIZED_CLASS_PKGSYM(info);
636 
637     if (pkg == symbols::arrow) return true;
638   }
639 
640   return false;
641 }
642 
vec_to_arrow_altrep_bypass(SEXP x)643 std::shared_ptr<Array> vec_to_arrow_altrep_bypass(SEXP x) {
644   if (is_arrow_altrep(x)) {
645     return GetArray(x);
646   }
647 
648   return nullptr;
649 }
650 
651 }  // namespace altrep
652 }  // namespace r
653 }  // namespace arrow
654 
655 #else  // HAS_ALTREP
656 
657 namespace arrow {
658 namespace r {
659 namespace altrep {
660 
661 // return an altrep R vector that shadows the array if possible
MakeAltrepVector(const std::shared_ptr<ChunkedArray> & chunked_array)662 SEXP MakeAltrepVector(const std::shared_ptr<ChunkedArray>& chunked_array) {
663   return R_NilValue;
664 }
665 
is_arrow_altrep(SEXP)666 bool is_arrow_altrep(SEXP) { return false; }
667 
vec_to_arrow_altrep_bypass(SEXP x)668 std::shared_ptr<Array> vec_to_arrow_altrep_bypass(SEXP x) { return nullptr; }
669 
670 }  // namespace altrep
671 }  // namespace r
672 }  // namespace arrow
673 
674 #endif
675 
676 // [[arrow::export]]
test_SET_STRING_ELT(SEXP s)677 void test_SET_STRING_ELT(SEXP s) { SET_STRING_ELT(s, 0, Rf_mkChar("forbidden")); }
678 
679 // [[arrow::export]]
test_same_Array(SEXP x,SEXP y)680 bool test_same_Array(SEXP x, SEXP y) {
681   auto* p_x = reinterpret_cast<std::shared_ptr<arrow::Array>*>(x);
682   auto* p_y = reinterpret_cast<std::shared_ptr<arrow::Array>*>(y);
683 
684   return p_x->get() == p_y->get();
685 }
686 
687 // [[arrow::export]]
is_arrow_altrep(SEXP x)688 bool is_arrow_altrep(SEXP x) { return arrow::r::altrep::is_arrow_altrep(x); }
689 
690 #endif
691