1"""Base definitions for R objects."""
2
3import abc
4import collections.abc
5from collections import OrderedDict
6import enum
7import typing
8from rpy2.rinterface_lib import embedded
9from rpy2.rinterface_lib import memorymanagement
10from rpy2.rinterface_lib import openrlib
11import rpy2.rinterface_lib._rinterface_capi as _rinterface
12from rpy2.rinterface_lib._rinterface_capi import _evaluated_promise
13from rpy2.rinterface_lib._rinterface_capi import SupportsSEXP
14from rpy2.rinterface_lib import conversion
15from rpy2.rinterface_lib.conversion import _cdata_res_to_rinterface
16from rpy2.rinterface_lib import na_values
17
18
19class Singleton(type):
20
21    _instances: typing.Dict[typing.Type['Singleton'], 'Singleton'] = {}
22
23    def __call__(cls, *args, **kwargs):
24        instances = cls._instances
25        if cls not in instances:
26            instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
27        return instances[cls]
28
29
30class SingletonABC(Singleton, abc.ABCMeta):
31    pass
32
33
34class RTYPES(enum.IntEnum):
35    """Native R types as defined in R's C API."""
36
37    NILSXP = openrlib.rlib.NILSXP
38    SYMSXP = openrlib.rlib.SYMSXP
39    LISTSXP = openrlib.rlib.LISTSXP
40    CLOSXP = openrlib.rlib.CLOSXP
41    ENVSXP = openrlib.rlib.ENVSXP
42    PROMSXP = openrlib.rlib.PROMSXP
43    LANGSXP = openrlib.rlib.LANGSXP
44    SPECIALSXP = openrlib.rlib.SPECIALSXP
45    BUILTINSXP = openrlib.rlib.BUILTINSXP
46    CHARSXP = openrlib.rlib.CHARSXP
47    LGLSXP = openrlib.rlib.LGLSXP
48    INTSXP = openrlib.rlib.INTSXP
49    REALSXP = openrlib.rlib.REALSXP
50    CPLXSXP = openrlib.rlib.CPLXSXP
51    STRSXP = openrlib.rlib.STRSXP
52    DOTSXP = openrlib.rlib.DOTSXP
53    ANYSXP = openrlib.rlib.ANYSXP
54    VECSXP = openrlib.rlib.VECSXP
55    EXPRSXP = openrlib.rlib.EXPRSXP
56    BCODESXP = openrlib.rlib.BCODESXP
57    EXTPTRSXP = openrlib.rlib.EXTPTRSXP
58    WEAKREFSXP = openrlib.rlib.WEAKREFSXP
59    RAWSXP = openrlib.rlib.RAWSXP
60    S4SXP = openrlib.rlib.S4SXP
61
62    NEWSXP = openrlib.rlib.NEWSXP
63    FREESXP = openrlib.rlib.FREESXP
64
65    FUNSXP = openrlib.rlib.FUNSXP
66
67
68class Sexp(SupportsSEXP):
69    """Base class for R objects.
70
71    The name of a class corresponds to the name SEXP
72    used in R's C API."""
73
74    __slots__ = ('_sexpobject', )
75
76    def __init__(self,
77                 sexp: typing.Union[SupportsSEXP,
78                                    '_rinterface.SexpCapsule',
79                                    '_rinterface.UninitializedRCapsule']):
80        if isinstance(sexp, SupportsSEXP):
81            self._sexpobject = sexp.__sexp__
82        elif isinstance(sexp, _rinterface.CapsuleBase):
83            self._sexpobject = sexp
84        else:
85            raise ValueError(
86                'The constructor must be called '
87                'with an instance of rpy2.rinterface.Sexp '
88                'or an instance of '
89                'rpy2.rinterface._rinterface.SexpCapsule')
90
91    def __repr__(self) -> str:
92        return super().__repr__() + (' [%s]' % self.typeof)
93
94    @property
95    def __sexp__(self) -> '_rinterface.CapsuleBase':
96        """Access to the underlying C pointer to the R object.
97
98        When assigning a new SexpCapsule to this attribute, the
99        R C-level type of the new capsule must be equal to the
100        type of the old capsule. A ValueError is raised otherwise."""
101        return self._sexpobject
102
103    @__sexp__.setter
104    def __sexp__(self,
105                 value: '_rinterface.CapsuleBase') -> None:
106        assert isinstance(value, _rinterface.SexpCapsule)
107        if value.typeof != self.__sexp__.typeof:
108            raise ValueError('New capsule type mismatch: %s' %
109                             RTYPES(value.typeof))
110        self._sexpobject = value
111
112    @property
113    def __sexp_refcount__(self) -> int:
114        """Count the number of independent Python references to
115        the underlying R object."""
116        return _rinterface._R_PRESERVED[
117            _rinterface.get_rid(self.__sexp__._cdata)
118        ]
119
120    def __getstate__(self) -> bytes:
121        with memorymanagement.rmemory() as rmemory:
122            ser = rmemory.protect(
123                _rinterface.serialize(
124                    self.__sexp__._cdata,
125                    globalenv.__sexp__._cdata)
126            )
127            n = openrlib.rlib.Rf_xlength(ser)
128            res = bytes(_rinterface.ffi.buffer(openrlib.rlib.RAW(ser), n))
129        return res
130
131    def __setstate__(self, state: bytes) -> None:
132        self._sexpobject = unserialize(state)
133
134    @property
135    def rclass(self) -> 'StrSexpVector':
136        """Get or set the R "class" attribute for the object."""
137        return rclass_get(self.__sexp__)
138
139    @rclass.setter
140    def rclass(self,
141               value: 'typing.Union[StrSexpVector, str]'):
142        rclass_set(self.__sexp__, value)
143
144    @property
145    def rid(self) -> int:
146        """ID of the underlying R object (memory address)."""
147        return _rinterface.get_rid(self.__sexp__._cdata)
148
149    @property
150    def typeof(self) -> RTYPES:
151        return RTYPES(_rinterface._TYPEOF(self.__sexp__._cdata))
152
153    @property
154    def named(self) -> int:
155        return _rinterface._NAMED(self.__sexp__._cdata)
156
157    @conversion._cdata_res_to_rinterface
158    def list_attrs(self) -> 'typing.Union[StrSexpVector, str]':
159        return _rinterface._list_attrs(self.__sexp__._cdata)
160
161    @conversion._cdata_res_to_rinterface
162    def do_slot(self, name: str) -> None:
163        _rinterface._assert_valid_slotname(name)
164        cchar = conversion._str_to_cchar(name)
165        with memorymanagement.rmemory() as rmemory:
166            name_r = rmemory.protect(openrlib.rlib.Rf_install(cchar))
167            if not _rinterface._has_slot(self.__sexp__._cdata, name_r):
168                raise LookupError(name)
169            res = openrlib.rlib.R_do_slot(self.__sexp__._cdata, name_r)
170        return res
171
172    def do_slot_assign(self, name: str, value) -> None:
173        _rinterface._assert_valid_slotname(name)
174        cchar = conversion._str_to_cchar(name)
175        with memorymanagement.rmemory() as rmemory:
176            name_r = rmemory.protect(openrlib.rlib.Rf_install(cchar))
177            cdata = rmemory.protect(conversion._get_cdata(value))
178            openrlib.rlib.R_do_slot_assign(self.__sexp__._cdata,
179                                           name_r,
180                                           cdata)
181
182    @conversion._cdata_res_to_rinterface
183    def get_attrib(self, name: str) -> 'Sexp':
184        res = openrlib.rlib.Rf_getAttrib(self.__sexp__._cdata,
185                                         conversion._str_to_charsxp(name))
186        return res
187
188    # TODO: deprecate this (and implement __eq__) ?
189    def rsame(self, sexp) -> bool:
190        if isinstance(sexp, Sexp):
191            return self.__sexp__._cdata == sexp.__sexp__._cdata
192        elif isinstance(sexp, _rinterface.SexpCapsule):
193            return sexp._cdata == sexp._cdata
194        else:
195            raise ValueError('Not an R object.')
196
197    @property
198    def names(self) -> 'Sexp':
199        return baseenv['names'](self)
200
201    @names.setter
202    def names(self, value) -> None:
203        if not isinstance(value, StrSexpVector):
204            raise ValueError('The new names should be a StrSexpVector.')
205        openrlib.rlib.Rf_namesgets(
206            self.__sexp__._cdata, value.__sexp__._cdata)
207
208    @property
209    @conversion._cdata_res_to_rinterface
210    def names_from_c_attribute(self) -> 'Sexp':
211        return openrlib.rlib.Rf_getAttrib(
212            self.__sexp__._cdata,
213            openrlib.rlib.R_NameSymbol)
214
215
216class NULLType(Sexp, metaclass=SingletonABC):
217    """A singleton class for R's NULL."""
218
219    def __init__(self):
220        if embedded.isready():
221            tmp = Sexp(
222                _rinterface.UnmanagedSexpCapsule(
223                    openrlib.rlib.R_NilValue
224                )
225            )
226        else:
227            tmp = Sexp(_rinterface.UninitializedRCapsule(RTYPES.NILSXP.value))
228        super().__init__(tmp)
229
230    def __bool__(self) -> bool:
231        """This is always False."""
232        return False
233
234    @property
235    def __sexp__(self) -> _rinterface.CapsuleBase:
236        return self._sexpobject
237
238    @property
239    def rid(self) -> int:
240        return self._sexpobject.rid
241
242
243class CETYPE(enum.Enum):
244    """Character encodings for R string."""
245    CE_NATIVE = openrlib.rlib.CE_NATIVE
246    CE_UTF8 = openrlib.rlib.CE_UTF8
247    CE_LATIN1 = openrlib.rlib.CE_LATIN1
248    CE_BYTES = openrlib.rlib.CE_BYTES
249    CE_SYMBOL = openrlib.rlib.CE_SYMBOL
250    CE_ANY = openrlib.rlib.CE_ANY
251
252
253class NCHAR_TYPE(enum.Enum):
254    """Type of string scalar in R."""
255    Bytes = 0
256    Chars = 1
257    Width = 2
258
259
260class CharSexp(Sexp):
261    """R's internal (C API-level) scalar for strings."""
262
263    _R_TYPE = openrlib.rlib.CHARSXP
264    _NCHAR_MSG = openrlib.ffi.new('char []', b'rpy2.rinterface.CharSexp.nchar')
265
266    @property
267    def encoding(self) -> CETYPE:
268        return CETYPE(
269            openrlib.rlib.Rf_getCharCE(self.__sexp__._cdata)
270        )
271
272    def nchar(self, what: NCHAR_TYPE = NCHAR_TYPE.Bytes) -> int:
273        # TODO: nchar_type is not parsed properly by cffi ?
274        return openrlib.rlib.R_nchar(self.__sexp__._cdata,
275                                     what.value,
276                                     openrlib.rlib.FALSE,
277                                     openrlib.rlib.FALSE,
278                                     self._NCHAR_MSG)
279
280
281class SexpEnvironment(Sexp):
282    """Proxy for an R "environment" object.
283
284    An R "environment" object can be thought of as a mix of a
285    mapping (like a `dict`) and a scope. To make it more "Pythonic",
286    both aspects are kept separate and the method `__getitem__` will
287    get an item as it would for a Python `dict` while the method `find`
288    will get an item as if it was a scope.
289
290    As soon as R is initialized the following main environments become
291    available to the user:
292    - `globalenv`: The "workspace" for the current R process. This can
293      be thought of as when `__name__ == '__main__'` in Python.
294    - `baseenv`: The namespace of R's "base" package.
295    """
296
297    @_cdata_res_to_rinterface
298    @_evaluated_promise
299    def find(self,
300             key: str,
301             wantfun: bool = False) -> Sexp:
302        """Find an item, starting with this R environment.
303
304        Raises a `KeyError` if the key cannot be found.
305
306        This method is called `find` because it is somewhat different
307        from the method :meth:`get` in Python mappings such :class:`dict`.
308        This is looking for a key across enclosing environments, returning
309        the first key found."""
310
311        if not isinstance(key, str):
312            raise TypeError('The key must be a non-empty string.')
313        elif not len(key):
314            raise ValueError('The key must be a non-empty string.')
315        with memorymanagement.rmemory() as rmemory:
316            key_cchar = conversion._str_to_cchar(key, 'utf-8')
317            symbol = rmemory.protect(
318                openrlib.rlib.Rf_install(key_cchar)
319            )
320            if wantfun:
321                # One would expect this to be like
322                #   res = _rinterface._findfun(symbol, self.__sexp__._cdata)
323                # but R's findfun will segfault if the symbol is not in
324                # the environment. :/
325                rho = self
326                while rho.rid != emptyenv.rid:
327                    res = rmemory.protect(
328                        _rinterface.findvar_in_frame_wrap(
329                            rho.__sexp__._cdata, symbol
330                        )
331                    )
332                    if _rinterface._TYPEOF(res) in (openrlib.rlib.CLOSXP,
333                                                    openrlib.rlib.BUILTINSXP):
334                        break
335                    # TODO: move check of R_UnboundValue to _rinterface ?
336                    res = openrlib.rlib.R_UnboundValue
337                    rho = rho.enclos
338            else:
339                res = _rinterface._findvar(symbol, self.__sexp__._cdata)
340        # TODO: move check of R_UnboundValue to _rinterface ?
341        if res == openrlib.rlib.R_UnboundValue:
342            raise KeyError("'%s' not found" % key)
343        return res
344
345    @_cdata_res_to_rinterface
346    @_evaluated_promise
347    def __getitem__(self, key: str) -> typing.Any:
348        if not isinstance(key, str):
349            raise TypeError('The key must be a non-empty string.')
350        elif not len(key):
351            raise ValueError('The key must be a non-empty string.')
352        with memorymanagement.rmemory() as rmemory:
353            key_cchar = conversion._str_to_cchar(key)
354            symbol = rmemory.protect(
355                openrlib.rlib.Rf_install(key_cchar)
356            )
357            res = rmemory.protect(
358                _rinterface.findvar_in_frame_wrap(
359                    self.__sexp__._cdata, symbol
360                )
361            )
362
363        # TODO: move check of R_UnboundValue to _rinterface
364        if res == openrlib.rlib.R_UnboundValue:
365            raise KeyError("'%s' not found" % key)
366        return res
367
368    def __setitem__(self, key: str, value) -> None:
369        # TODO: move body to _rinterface-level function
370        if not isinstance(key, str):
371            raise TypeError('The key must be a non-empty string.')
372        elif not len(key):
373            raise ValueError('The key must be a non-empty string.')
374        if (self.__sexp__._cdata == openrlib.rlib.R_BaseEnv) or \
375           (self.__sexp__._cdata == openrlib.rlib.R_EmptyEnv):
376            raise ValueError('Cannot remove variables from the base or '
377                             'empty environments.')
378        # TODO: call to Rf_duplicate needed ?
379        with memorymanagement.rmemory() as rmemory:
380            key_cchar = conversion._str_to_cchar(key)
381            symbol = rmemory.protect(
382                openrlib.rlib.Rf_install(key_cchar)
383            )
384            cdata = rmemory.protect(conversion._get_cdata(value))
385            cdata_copy = rmemory.protect(
386                openrlib.rlib.Rf_duplicate(cdata)
387            )
388            openrlib.rlib.Rf_defineVar(symbol,
389                                       cdata_copy,
390                                       self.__sexp__._cdata)
391
392    def __len__(self) -> int:
393        with memorymanagement.rmemory() as rmemory:
394            symbols = rmemory.protect(
395                openrlib.rlib.R_lsInternal(self.__sexp__._cdata,
396                                           openrlib.rlib.TRUE)
397            )
398            n = openrlib.rlib.Rf_xlength(symbols)
399        return n
400
401    def __delitem__(self, key: str) -> None:
402        # Testing that key is a non-empty string is implicitly
403        # performed when checking that the key is in the environment.
404        if key not in self:
405            raise KeyError("'%s' not found" % key)
406
407        if self.__sexp__ == baseenv.__sexp__:
408            raise ValueError('Values from the R base environment '
409                             'cannot be removed.')
410        # TODO: also check it is not R_EmpyEnv or R_BaseNamespace
411        if self.is_locked():
412            ValueError('Cannot remove an item from a locked '
413                       'environment.')
414
415        with memorymanagement.rmemory() as rmemory:
416            key_cdata = rmemory.protect(
417                openrlib.rlib.Rf_mkString(conversion._str_to_cchar(key))
418            )
419            _rinterface._remove(key_cdata,
420                                self.__sexp__._cdata,
421                                openrlib.rlib.Rf_ScalarLogical(
422                                    openrlib.rlib.FALSE))
423
424    @_cdata_res_to_rinterface
425    def frame(self) -> 'typing.Union[NULLType, SexpEnvironment]':
426        """Get the parent frame of the environment."""
427        return openrlib.rlib.FRAME(self.__sexp__._cdata)
428
429    @property
430    @_cdata_res_to_rinterface
431    def enclos(self) -> 'typing.Union[NULLType, SexpEnvironment]':
432        """Get or set the enclosing environment."""
433        return openrlib.rlib.ENCLOS(self.__sexp__._cdata)
434
435    @enclos.setter
436    def enclos(self, value: 'SexpEnvironment') -> None:
437        assert isinstance(value, SexpEnvironment)
438        openrlib.rlib.SET_ENCLOS(self.__sexp__._cdata,
439                                 value.__sexp__._cdata)
440
441    def keys(self) -> typing.Generator[str, None, None]:
442        """Generator over the keys (symbols) in the environment."""
443        with memorymanagement.rmemory() as rmemory:
444            symbols = rmemory.protect(
445                openrlib.rlib.R_lsInternal(self.__sexp__._cdata,
446                                           openrlib.rlib.TRUE)
447            )
448            n = openrlib.rlib.Rf_xlength(symbols)
449            res = []
450            for i in range(n):
451                res.append(_rinterface._string_getitem(symbols, i))
452        for e in res:
453            yield e
454
455    def __iter__(self) -> typing.Generator[str, None, None]:
456        """See method `keys()`."""
457        return self.keys()
458
459    def is_locked(self) -> bool:
460        return openrlib.rlib.R_EnvironmentIsLocked(
461            self.__sexp__._cdata)
462
463
464_UNINIT_CAPSULE_ENV = _rinterface.UninitializedRCapsule(RTYPES.ENVSXP.value)
465emptyenv = SexpEnvironment(_UNINIT_CAPSULE_ENV)
466baseenv = SexpEnvironment(_UNINIT_CAPSULE_ENV)
467globalenv = SexpEnvironment(_UNINIT_CAPSULE_ENV)
468
469
470# TODO: move to _rinterface-level function (as ABI / API compatibility
471# will have API-defined code compile for efficiency).
472def _populate_r_vector(iterable, r_vector, set_elt, cast_value):
473    for i, v in enumerate(iterable):
474        set_elt(r_vector, i, cast_value(v))
475
476
477VT = typing.TypeVar('VT', bound='SexpVector')
478
479
480class SexpVector(Sexp, metaclass=abc.ABCMeta):
481    """Base abstract class for R vector objects.
482
483    R vector objects are, at the C level, essentially C arrays wrapped in
484    the general structure for R objects."""
485
486    @property
487    @abc.abstractmethod
488    def _R_TYPE(self):
489        pass
490
491    @property
492    @abc.abstractmethod
493    def _R_SIZEOF_ELT(self):
494        pass
495
496    @staticmethod
497    @abc.abstractmethod
498    def _CAST_IN(o):
499        pass
500
501    @staticmethod
502    @abc.abstractmethod
503    def _R_SET_VECTOR_ELT(x, i, v):
504        pass
505
506    @staticmethod
507    @abc.abstractmethod
508    def _R_VECTOR_ELT(x, i):
509        pass
510
511    @staticmethod
512    @abc.abstractmethod
513    def _R_GET_PTR(o):
514        pass
515
516    def __init__(self,
517                 obj: typing.Union[_rinterface.SexpCapsule,
518                                   collections.abc.Sized]):
519        if isinstance(obj, Sexp) or isinstance(obj,
520                                               _rinterface.SexpCapsule):
521            super().__init__(obj)
522        elif isinstance(obj, collections.abc.Sized):
523            super().__init__(self.from_object(obj).__sexp__)
524        else:
525            raise TypeError('The constructor must be called '
526                            'with an instance of '
527                            'rpy2.rinterface.Sexp '
528                            'or an instance of '
529                            'rpy2.rinterface._rinterface.SexpCapsule')
530
531    @classmethod
532    @_cdata_res_to_rinterface
533    def from_iterable(cls, iterable,
534                      populate_func=None,
535                      set_elt=None,
536                      cast_value=None) -> VT:
537        """Create an R vector/array from an iterable."""
538        if not embedded.isready():
539            raise embedded.RNotReadyError('Embedded R is not ready to use.')
540        if populate_func is None:
541            populate_func = _populate_r_vector
542        if set_elt is None:
543            set_elt = cls._R_SET_VECTOR_ELT
544        if cast_value is None:
545            cast_value = cls._CAST_IN
546        n = len(iterable)
547        with memorymanagement.rmemory() as rmemory:
548            r_vector = rmemory.protect(
549                openrlib.rlib.Rf_allocVector(
550                    cls._R_TYPE, n)
551            )
552            populate_func(iterable, r_vector, set_elt, cast_value)
553        return r_vector
554
555    @classmethod
556    @_cdata_res_to_rinterface
557    def from_memoryview(cls, mview: memoryview) -> VT:
558        """Create an R vector/array from a memoryview.
559
560        The memoryview must be contiguous, and the C representation
561        for the vector must be compatible between R and Python. If
562        not the case, a :class:`ValueError` exception with will be
563        raised."""
564        if not embedded.isready():
565            raise embedded.RNotReadyError('Embedded R is not ready to use.')
566        if not mview.contiguous:
567            raise ValueError('The memory view must be contiguous.')
568        if (
569                (mview.itemsize != cls._R_SIZEOF_ELT)
570                or
571                not hasattr(cls, '_NP_TYPESTR') or
572                not (cls._NP_TYPESTR == '|u1' or
573                     cls._NP_TYPESTR.endswith(mview.format))
574        ):
575            msg = (
576                'Incompatible C type sizes. '
577                'The R array type is {r_type} with {r_size} byte{r_size_pl} '
578                'per item '
579                'while the Python array type is {py_type} with {py_size} '
580                'byte{py_size_pl} per item.'
581                .format(r_type=cls._R_TYPE,
582                        r_size=cls._R_SIZEOF_ELT,
583                        r_size_pl='s' if cls._R_SIZEOF_ELT > 1 else '',
584                        py_type=mview.format,
585                        py_size=mview.itemsize,
586                        py_size_pl='s' if mview.itemsize > 1 else '')
587            )
588            raise ValueError(msg)
589        r_vector = None
590        n = len(mview)
591        with memorymanagement.rmemory() as rmemory:
592            r_vector = rmemory.protect(
593                openrlib.rlib.Rf_allocVector(
594                    cls._R_TYPE, n)
595            )
596            dest_ptr = cls._R_GET_PTR(r_vector)
597            src_ptr = _rinterface.ffi.from_buffer(mview)
598            nbytes = n * mview.itemsize
599            _rinterface.ffi.memmove(dest_ptr, src_ptr, nbytes)
600        return r_vector
601
602    @classmethod
603    def from_object(cls, obj) -> VT:
604        """Create an R vector/array from a Python object, if possible.
605
606        An exception :class:`ValueError` will be raised if not possible."""
607
608        res = None
609        try:
610            mv = memoryview(obj)
611            res = cls.from_memoryview(mv)
612        except (TypeError, ValueError):
613            try:
614                res = cls.from_iterable(obj)
615            except ValueError:
616                msg = ('The class methods from_memoryview() and '
617                       'from_iterable() both failed to make a {} '
618                       'from an object of class {}'
619                       .format(cls, type(obj)))
620                raise ValueError(msg)
621        return res
622
623    def __getitem__(
624            self,
625            i: typing.Union[int, slice]) -> typing.Union[Sexp, VT, typing.Any]:
626        cdata = self.__sexp__._cdata
627        if isinstance(i, int):
628            i_c = _rinterface._python_index_to_c(cdata, i)
629            res = conversion._cdata_to_rinterface(
630                self._R_VECTOR_ELT(cdata, i_c))
631        elif isinstance(i, slice):
632            res = self.from_iterable(
633                [
634                    self._R_VECTOR_ELT(
635                        cdata, i_c,
636                    ) for i_c in range(*i.indices(len(self)))
637                ],
638                cast_value=lambda x: x
639            )
640        else:
641            raise TypeError(
642                'Indices must be integers or slices, not %s' % type(i))
643        return res
644
645    def __setitem__(self, i: typing.Union[int, slice], value) -> None:
646        cdata = self.__sexp__._cdata
647        if isinstance(i, int):
648            i_c = _rinterface._python_index_to_c(cdata, i)
649            self._R_SET_VECTOR_ELT(cdata, i_c,
650                                   value.__sexp__._cdata)
651        elif isinstance(i, slice):
652            for i_c, v in zip(range(*i.indices(len(self))), value):
653                self._R_SET_VECTOR_ELT(cdata, i_c,
654                                       v.__sexp__._cdata)
655        else:
656            raise TypeError(
657                'Indices must be integers or slices, not %s' % type(i))
658
659    def __len__(self) -> int:
660        return openrlib.rlib.Rf_xlength(self.__sexp__._cdata)
661
662    def __iter__(self) -> typing.Iterator[typing.Union[Sexp, VT, typing.Any]]:
663        for i in range(len(self)):
664            yield self[i]
665
666    def index(self, item: typing.Any) -> int:
667        for i, e in enumerate(self):
668            if e == item:
669                return i
670        raise ValueError("'%s' is not in R vector" % item)
671
672
673def _as_charsxp_cdata(x: typing.Union[CharSexp, str]):
674    if isinstance(x, CharSexp):
675        return x.__sexp__._cdata
676    else:
677        return conversion._str_to_charsxp(x)
678
679
680class StrSexpVector(SexpVector):
681    """R vector of strings."""
682
683    _R_TYPE = openrlib.rlib.STRSXP
684    _R_GET_PTR = openrlib._STRING_PTR
685    _R_SIZEOF_ELT = None
686    _R_VECTOR_ELT = openrlib.rlib.STRING_ELT
687    _R_SET_VECTOR_ELT = openrlib.rlib.SET_STRING_ELT
688    _CAST_IN = _as_charsxp_cdata
689
690    def __getitem__(
691            self,
692            i: typing.Union[int, slice]
693    ) -> typing.Union['StrSexpVector', str, 'na_values.NA_Character']:
694        cdata = self.__sexp__._cdata
695        if isinstance(i, int):
696            i_c = _rinterface._python_index_to_c(cdata, i)
697            res = _rinterface._string_getitem(cdata, i_c)
698            if res is None:
699                res = na_values.NA_Character
700        elif isinstance(i, slice):
701            res = self.from_iterable(
702                [_rinterface._string_getitem(cdata, i_c)
703                 for i_c in range(*i.indices(len(self)))]
704            )
705        else:
706            raise TypeError('Indices must be integers or slices,'
707                            ' not %s' % type(i))
708        return res
709
710    def __setitem__(
711            self,
712            i: typing.Union[int, slice],
713            value: typing.Union[str, typing.Sequence[typing.Optional[str]],
714                                'StrSexpVector', 'na_values.NA_Character']
715    ) -> None:
716        cdata = self.__sexp__._cdata
717        if isinstance(i, int):
718            i_c = _rinterface._python_index_to_c(cdata, i)
719            if isinstance(value, Sexp):
720                val_cdata = value.__sexp__._cdata
721            else:
722                if not isinstance(value, str):
723                    value = str(value)
724                val_cdata = _as_charsxp_cdata(value)
725            self._R_SET_VECTOR_ELT(
726                cdata, i_c,
727                val_cdata
728            )
729        elif isinstance(i, slice):
730            for i_c, v in zip(range(*i.indices(len(self))), value):
731                if v is None:
732                    v_cdata = openrlib.rlib.R_NaString
733                else:
734                    if not isinstance(value, str):
735                        v = str(v)
736                    v_cdata = _as_charsxp_cdata(v)
737                self._R_SET_VECTOR_ELT(
738                    cdata, i_c,
739                    v_cdata
740                )
741        else:
742            raise TypeError('Indices must be integers or slices, '
743                            'not %s' % type(i))
744
745    def get_charsxp(self, i: int) -> CharSexp:
746        """Get the R CharSexp objects for the index i."""
747        i_c = _rinterface._python_index_to_c(self.__sexp__._cdata, i)
748        return CharSexp(
749            _rinterface.SexpCapsule(
750                openrlib.rlib.STRING_ELT(self.__sexp__._cdata, i_c)
751            )
752        )
753
754
755class RVersion(metaclass=Singleton):
756
757    _version = None
758
759    def __init__(self):
760        assert embedded.isinitialized()
761        robj = StrSexpVector(['R.version'])
762        with memorymanagement.rmemory() as rmemory:
763            parsed = _rinterface._parse(robj.__sexp__._cdata, 1, rmemory)
764        res = baseenv['eval'](parsed)
765        self._version = OrderedDict((k, v[0]) for k, v in zip(res.names, res))
766
767    def __getitem__(self, k):
768        return self._version[k]
769
770    def keys(self):
771        return self._version.keys()
772
773
774_TYPE2STR = {
775    RTYPES.NILSXP: 'NULL',
776    RTYPES.SYMSXP: 'symbol',  # alias: name
777    RTYPES.LISTSXP: 'pairlist',
778    RTYPES.CLOSXP: 'closure',
779    RTYPES.ENVSXP: 'environment',
780    RTYPES.PROMSXP: 'promise',
781    RTYPES.LANGSXP: 'language',
782    RTYPES.SPECIALSXP: 'special',
783    RTYPES.BUILTINSXP: 'builtin',
784    RTYPES.CHARSXP: 'char',
785    RTYPES.LGLSXP: 'logical',
786    RTYPES.INTSXP: 'integer',
787    RTYPES.REALSXP: 'double',  # alias: numeric
788    RTYPES.CPLXSXP: 'complex',
789    RTYPES.STRSXP: 'character',
790    RTYPES.DOTSXP: '...',
791    RTYPES.ANYSXP: 'any',
792    RTYPES.EXPRSXP: 'expression',
793    RTYPES.VECSXP: 'list',
794    RTYPES.EXTPTRSXP: 'externalptr',
795    RTYPES.BCODESXP: 'bytecode',
796    RTYPES.WEAKREFSXP: 'weakref',
797    RTYPES.RAWSXP: 'raw',
798    RTYPES.S4SXP: 'S4'
799}
800
801
802def rclass_get(scaps: _rinterface.CapsuleBase) -> StrSexpVector:
803    """ Get the R class name.
804
805    If no specific attribute "class" is defined from the objects, this
806    will perform the equivalent of R_data_class()
807    (src/main/attrib.c in the R source code).
808    """
809    rlib = openrlib.rlib
810    with memorymanagement.rmemory() as rmemory:
811        classes = rmemory.protect(
812            rlib.Rf_getAttrib(scaps._cdata,
813                              rlib.R_ClassSymbol))
814        if rlib.Rf_length(classes) == 0:
815            dim = rmemory.protect(
816                rlib.Rf_getAttrib(scaps._cdata,
817                                  rlib.R_DimSymbol))
818            ndim = rlib.Rf_length(dim)
819            if ndim > 0:
820                if ndim == 2:
821                    if int(RVersion()['major']) >= 4:
822                        classname = ('matrix', 'array')
823                    else:
824                        classname = ('matrix', )
825                else:
826                    classname = ('array', )
827            else:
828                typeof = RTYPES(scaps.typeof)
829                if typeof in (RTYPES.CLOSXP,
830                              RTYPES.SPECIALSXP,
831                              RTYPES.BUILTINSXP):
832                    classname = ('function', )
833                elif typeof == RTYPES.REALSXP:
834                    classname = ('numeric', )
835                elif typeof == RTYPES.SYMSXP:
836                    classname = ('name', )
837                elif typeof == RTYPES.LANGSXP:
838                    symb = rlib.CAR(scaps._cdata)
839                    if openrlib.rlib.Rf_isSymbol(symb):
840                        symb_rstr = openrlib.rlib.PRINTNAME(symb)
841                        symb_str = conversion._cchar_to_str(
842                            openrlib.rlib.R_CHAR(symb_rstr),
843                            conversion._R_ENC_PY[openrlib.rlib
844                                                 .Rf_getCharCE(symb_rstr)]
845                        )
846                        if symb_str in ('if', 'while', 'for', '=',
847                                        '<-', '(', '{'):
848                            classname = (symb_str, )
849                        else:
850                            classname = ('call', )
851                    else:
852                        classname = ('call', )
853                else:
854                    classname = (_TYPE2STR.get(typeof, str(typeof)), )
855            classes = StrSexpVector.from_iterable(classname)
856        else:
857            classes = conversion._cdata_to_rinterface(classes)
858    return classes
859
860
861def rclass_set(
862        scaps: _rinterface.CapsuleBase,
863        value: 'typing.Union[StrSexpVector, str]'
864) -> None:
865    """ Set the R class.
866
867    :param:`scaps` A capsule with a pointer to an R object.
868    :param:`value` An R vector of strings."""
869    if isinstance(value, StrSexpVector):
870        value_r = value
871    elif isinstance(value, str):
872        value_r = StrSexpVector.from_iterable(
873            [value])
874    else:
875        raise TypeError('Value should a str or '
876                        'a rpy2.rinterface.sexp.StrSexpVector.')
877    openrlib.rlib.Rf_setAttrib(scaps._cdata,
878                               openrlib.rlib.R_ClassSymbol,
879                               value_r.__sexp__._cdata)
880
881
882def unserialize(state):
883    n = len(state)
884    with memorymanagement.rmemory() as rmemory:
885        cdata = rmemory.protect(
886            openrlib.rlib.Rf_allocVector(openrlib.rlib.RAWSXP, n))
887        _rinterface.ffi.memmove(
888            openrlib.rlib.RAW(cdata), state, n)
889        ser = rmemory.protect(
890            _rinterface.unserialize(cdata,
891                                    globalenv.__sexp__._cdata)
892        )
893        res = _rinterface.SexpCapsule(ser)
894    return res
895
896
897class NAIntegerType(int, metaclass=Singleton):
898
899    def __new__(cls, *args, **kwargs):
900        embedded.assert_isready()
901        return super().__new__(cls, openrlib.rlib.R_NaInt)
902
903    def __repr__(self) -> str:
904        return 'NA_integer_'
905
906    def __str__(self) -> str:
907        return 'NA_integer_'
908
909    def __bool__(self):
910        raise ValueError('R value for missing integer value')
911
912
913class NACharacterType(CharSexp, metaclass=SingletonABC):
914
915    def __init__(self):
916        embedded.assert_isready()
917        super().__init__(
918            CharSexp(
919                _rinterface.SexpCapsule(openrlib.rlib.R_NaString)
920            )
921        )
922
923    def __repr__(self) -> str:
924        return 'NA_character_'
925
926    def __str__(self) -> str:
927        return 'NA_character_'
928
929    def __bool__(self):
930        raise ValueError('R value for missing character value')
931
932
933class NALogicalType(int, metaclass=Singleton):
934
935    def __new__(cls, *args, **kwargs):
936        embedded.assert_isready()
937        return super().__new__(cls, openrlib.rlib.R_NaInt)
938
939    def __repr__(self) -> str:
940        return 'NA'
941
942    def __str__(self) -> str:
943        return 'NA'
944
945    def __bool__(self) -> bool:
946        raise ValueError('R value for missing boolean value')
947
948
949class NARealType(float, metaclass=Singleton):
950
951    def __new__(cls, *args, **kwargs):
952        embedded.assert_isready()
953        return super().__new__(cls, openrlib.rlib.R_NaReal)
954
955    def __repr__(self) -> str:
956        return 'NA_real_'
957
958    def __str__(self) -> str:
959        return 'NA_real_'
960
961    def __bool__(self) -> bool:
962        raise ValueError('R value for missing float value')
963
964
965class NAComplexType(complex, metaclass=Singleton):
966
967    def __new__(cls, *args, **kwargs):
968        embedded.assert_isready()
969        return super().__new__(cls,
970                               openrlib.rlib.R_NaReal,
971                               openrlib.rlib.R_NaReal)
972
973    def __repr__(self) -> str:
974        return 'NA_complex_'
975
976    def __str__(self) -> str:
977        return 'NA_complex_'
978
979    def __bool__(self):
980        raise ValueError('R value for missing complex value')
981