1import math 2from collections import OrderedDict 3from datetime import datetime 4 5import pytest 6 7from rpy2 import rinterface 8from rpy2 import robjects 9from rpy2.robjects import vectors 10from rpy2.robjects import conversion 11 12 13class MockNamespace(object): 14 def __getattr__(self, name): 15 return None 16 17 18has_pandas = False 19try: 20 import pandas 21 has_pandas = True 22except: 23 pandas = MockNamespace() 24 25has_numpy = False 26try: 27 import numpy 28 has_numpy = True 29except: 30 numpy = MockNamespace() 31 32if has_pandas: 33 import rpy2.robjects.pandas2ri as rpyp 34 35from rpy2.robjects import default_converter 36from rpy2.robjects.conversion import localconverter 37 38@pytest.mark.skipif(not has_pandas, reason='Package pandas is not installed.') 39class TestPandasConversions(object): 40 41 def testActivate(self): 42 #FIXME: is the following still making sense ? 43 assert rpyp.py2rpy != robjects.conversion.py2rpy 44 l = len(robjects.conversion.py2rpy.registry) 45 k = set(robjects.conversion.py2rpy.registry.keys()) 46 rpyp.activate() 47 assert len(conversion.py2rpy.registry) > l 48 rpyp.deactivate() 49 assert len(conversion.py2rpy.registry) == l 50 assert set(conversion.py2rpy.registry.keys()) == k 51 52 def testActivateTwice(self): 53 #FIXME: is the following still making sense ? 54 assert rpyp.py2rpy != robjects.conversion.py2rpy 55 l = len(robjects.conversion.py2rpy.registry) 56 k = set(robjects.conversion.py2rpy.registry.keys()) 57 rpyp.activate() 58 rpyp.deactivate() 59 rpyp.activate() 60 assert len(conversion.py2rpy.registry) > l 61 rpyp.deactivate() 62 assert len(conversion.py2rpy.registry) == l 63 assert set(conversion.py2rpy.registry.keys()) == k 64 65 def test_dataframe(self): 66 # Content for test data frame 67 l = ( 68 ('b', numpy.array([True, False, True], dtype=numpy.bool_)), 69 ('i', numpy.array([1, 2, 3], dtype='i')), 70 ('f', numpy.array([1, 2, 3], dtype='f')), 71 # ('s', numpy.array([b'b', b'c', b'd'], dtype='S1')), 72 ('u', numpy.array([u'a', u'b', u'c'], dtype='U')), 73 ('dates', [datetime(2012, 5, 2), 74 datetime(2012, 6, 3), 75 datetime(2012, 7, 1)]) 76 ) 77 od = OrderedDict(l) 78 # Pandas data frame 79 pd_df = pandas.core.frame.DataFrame(od) 80 # Convert to R 81 with localconverter(default_converter + rpyp.converter) as cv: 82 rp_df = robjects.conversion.py2rpy(pd_df) 83 assert pd_df.shape[0] == rp_df.nrow 84 assert pd_df.shape[1] == rp_df.ncol 85 # assert tuple(rp_df.rx2('s')) == (b'b', b'c', b'd') 86 assert tuple(rp_df.rx2('u')) == ('a', 'b', 'c') 87 88 def test_dataframe_columnnames(self): 89 pd_df = pandas.DataFrame({'the one': [1, 2], 'the other': [3, 4]}) 90 # Convert to R 91 with localconverter(default_converter + rpyp.converter) as cv: 92 rp_df = robjects.conversion.py2rpy(pd_df) 93 assert tuple(rp_df.names) == ('the one', 'the other') 94 95 def test_series(self): 96 Series = pandas.core.series.Series 97 s = Series(numpy.random.randn(5), index=['a', 'b', 'c', 'd', 'e']) 98 with localconverter(default_converter + rpyp.converter) as cv: 99 rp_s = robjects.conversion.py2rpy(s) 100 assert isinstance(rp_s, rinterface.FloatSexpVector) 101 102 @pytest.mark.parametrize('dtype', 103 ('i', 104 numpy.int32 if has_pandas else None, 105 numpy.int8 if has_pandas else None, 106 numpy.int16 if has_pandas else None, 107 numpy.int32 if has_pandas else None, 108 numpy.int64 if has_pandas else None, 109 numpy.uint8 if has_pandas else None, 110 numpy.uint16 if has_pandas else None, 111 pandas.Int32Dtype if has_pandas else None, 112 pandas.Int64Dtype if has_pandas else None)) 113 def test_series_int(self, dtype): 114 Series = pandas.core.series.Series 115 s = Series(range(5), 116 index=['a', 'b', 'c', 'd', 'e'], 117 dtype=dtype) 118 with localconverter(default_converter + rpyp.converter) as cv: 119 rp_s = robjects.conversion.py2rpy(s) 120 assert isinstance(rp_s, rinterface.IntSexpVector) 121 122 @pytest.mark.parametrize('dtype', 123 (pandas.Int32Dtype() if has_pandas else None, 124 pandas.Int64Dtype() if has_pandas else None)) 125 def test_dataframe_int_nan(self, dtype): 126 a = pandas.DataFrame([(numpy.NaN,)], dtype=dtype, columns=['z']) 127 with localconverter(default_converter + rpyp.converter) as cv: 128 b = robjects.conversion.py2rpy(a) 129 assert b[0][0] is rinterface.na_values.NA_Integer 130 with localconverter(default_converter + rpyp.converter) as cv: 131 c = robjects.conversion.rpy2py(b) 132 133 @pytest.mark.parametrize('dtype', (pandas.Int32Dtype() if has_pandas else None, 134 pandas.Int64Dtype() if has_pandas else None)) 135 def test_series_int_nan(self, dtype): 136 a = pandas.Series((numpy.NaN,), dtype=dtype, index=['z']) 137 with localconverter(default_converter + rpyp.converter) as _: 138 b = robjects.conversion.py2rpy(a) 139 assert b[0] is rinterface.na_values.NA_Integer 140 with localconverter(default_converter + rpyp.converter) as _: 141 c = robjects.conversion.rpy2py(b) 142 143 @pytest.mark.skipif(not (has_numpy and has_pandas), 144 reason='Packages numpy and pandas must be installed.') 145 @pytest.mark.parametrize( 146 'data', 147 (['x', 'y', 'z'], 148 ['x', 'y', None], 149 ['x', 'y', numpy.nan], 150 ['x', 'y', pandas.NA]) 151 ) 152 @pytest.mark.parametrize( 153 'dtype', ['O', pandas.StringDtype() if has_pandas else None] 154 ) 155 def test_series_obj_str(self, data, dtype): 156 Series = pandas.core.series.Series 157 s = Series(data, index=['a', 'b', 'c'], dtype=dtype) 158 with localconverter(default_converter + rpyp.converter) as cv: 159 rp_s = robjects.conversion.py2rpy(s) 160 assert isinstance(rp_s, rinterface.StrSexpVector) 161 162 def test_series_obj_mixed(self): 163 Series = pandas.core.series.Series 164 s = Series(['x', 1, False], index=['a', 'b', 'c']) 165 with localconverter(default_converter + rpyp.converter) as cv: 166 with pytest.raises(ValueError): 167 rp_s = robjects.conversion.py2rpy(s) 168 169 s = Series(['x', 1, None], index=['a', 'b', 'c']) 170 with localconverter(default_converter + rpyp.converter) as cv: 171 with pytest.raises(ValueError): 172 rp_s = robjects.conversion.py2rpy(s) 173 174 175 def test_series_obj_bool(self): 176 Series = pandas.core.series.Series 177 s = Series([True, False, True], index=['a', 'b', 'c']) 178 with localconverter(default_converter + rpyp.converter) as cv: 179 rp_s = robjects.conversion.py2rpy(s) 180 assert isinstance(rp_s, rinterface.BoolSexpVector) 181 182 s = Series([True, False, None], index=['a', 'b', 'c']) 183 with localconverter(default_converter + rpyp.converter) as cv: 184 rp_s = robjects.conversion.py2rpy(s) 185 assert isinstance(rp_s, rinterface.BoolSexpVector) 186 187 188 def test_series_obj_allnone(self): 189 Series = pandas.core.series.Series 190 s = Series([None, None, None], index=['a', 'b', 'c']) 191 with localconverter(default_converter + rpyp.converter) as cv: 192 rp_s = robjects.conversion.py2rpy(s) 193 assert isinstance(rp_s, rinterface.BoolSexpVector) 194 195 196 def test_series_issue264(self): 197 Series = pandas.core.series.Series 198 s = Series(('a', 'b', 'c', 'd', 'e'), 199 index=pandas.Int64Index([0,1,2,3,4])) 200 with localconverter(default_converter + rpyp.converter) as cv: 201 rp_s = robjects.conversion.py2rpy(s) 202 # segfault before the fix 203 str(rp_s) 204 assert isinstance(rp_s, rinterface.StrSexpVector) 205 206 def test_object2String(self): 207 series = pandas.Series(["a","b","c","a"], dtype="O") 208 with localconverter(default_converter + rpyp.converter) as cv: 209 rp_c = robjects.conversion.py2rpy(series) 210 assert isinstance(rp_c, rinterface.StrSexpVector) 211 212 def test_object2String_with_None(self): 213 series = pandas.Series([None, "a","b","c","a"], dtype="O") 214 with localconverter(default_converter + rpyp.converter) as cv: 215 rp_c = robjects.conversion.py2rpy(series) 216 assert isinstance(rp_c, rinterface.StrSexpVector) 217 218 def test_factor2Category(self): 219 factor = robjects.vectors.FactorVector(('a', 'b', 'a')) 220 with localconverter(default_converter + rpyp.converter) as cv: 221 rp_c = robjects.conversion.rpy2py(factor) 222 assert isinstance(rp_c, pandas.Categorical) 223 224 def test_factorwithNA2Category(self): 225 factor = robjects.vectors.FactorVector(('a', 'b', 'a', None)) 226 assert factor[3] is rinterface.na_values.NA_Integer 227 with localconverter(default_converter + rpyp.converter) as cv: 228 rp_c = robjects.conversion.rpy2py(factor) 229 assert isinstance(rp_c, pandas.Categorical) 230 assert math.isnan(rp_c[3]) 231 232 def test_orderedFactor2Category(self): 233 factor = robjects.vectors.FactorVector(('a', 'b', 'a'), ordered=True) 234 with localconverter(default_converter + rpyp.converter) as cv: 235 rp_c = robjects.conversion.rpy2py(factor) 236 assert isinstance(rp_c, pandas.Categorical) 237 238 def test_category2Factor(self): 239 category = pandas.Series(["a","b","c","a"], dtype="category") 240 with localconverter(default_converter + rpyp.converter) as cv: 241 rp_c = robjects.conversion.py2rpy(category) 242 assert isinstance(rp_c, robjects.vectors.FactorVector) 243 244 def test_categorywithNA2Factor(self): 245 category = pandas.Series(['a', 'b', 'c', numpy.nan], dtype='category') 246 with localconverter(default_converter + rpyp.converter) as cv: 247 rp_c = robjects.conversion.py2rpy(category) 248 assert isinstance(rp_c, robjects.vectors.FactorVector) 249 assert rp_c[3] == rinterface.NA_Integer 250 251 def test_orderedCategory2Factor(self): 252 category = pandas.Series(pandas.Categorical(['a','b','c','a'], 253 categories=['a','b','c'], 254 ordered=True)) 255 with localconverter(default_converter + rpyp.converter) as cv: 256 rp_c = robjects.conversion.py2rpy(category) 257 assert isinstance(rp_c, robjects.vectors.FactorVector) 258 259 def test_datetime2posixct(self): 260 datetime = pandas.Series( 261 pandas.date_range('2017-01-01 00:00:00.234', 262 periods=20, freq='ms', tz='UTC') 263 ) 264 with localconverter(default_converter + rpyp.converter) as cv: 265 rp_c = robjects.conversion.py2rpy(datetime) 266 assert isinstance(rp_c, robjects.vectors.POSIXct) 267 assert int(rp_c[0]) == 1483228800 268 assert int(rp_c[1]) == 1483228800 269 assert rp_c[0] != rp_c[1] 270 271 def test_datetime2posixct_withNA(self): 272 datetime = pandas.Series( 273 pandas.date_range('2017-01-01 00:00:00.234', 274 periods=20, freq='ms', tz='UTC') 275 ) 276 datetime[1] = pandas.NaT 277 with localconverter(default_converter + rpyp.converter) as cv: 278 rp_c = robjects.conversion.py2rpy(datetime) 279 assert isinstance(rp_c, robjects.vectors.POSIXct) 280 assert int(rp_c[0]) == 1483228800 281 assert math.isnan(rp_c[1]) 282 assert rp_c[0] != rp_c[1] 283 284 def test_date2posixct(self): 285 today = datetime.now().date() 286 date = pandas.Series([today]) 287 with localconverter(default_converter + rpyp.converter) as cv: 288 rp_c = robjects.conversion.py2rpy(date) 289 assert isinstance(rp_c, robjects.vectors.FloatSexpVector) 290 assert tuple(int(x) for x in rp_c) == (today.toordinal(), ) 291 292 def test_timeR2Pandas(self): 293 tzone = robjects.vectors.get_timezone() 294 dt = [datetime(1960, 5, 2), 295 datetime(1970, 6, 3), 296 datetime(2012, 7, 1)] 297 dt = [x.replace(tzinfo=tzone) for x in dt] 298 # fix the time 299 ts = [x.timestamp() for x in dt] 300 # Create an R POSIXct vector. 301 r_time = robjects.baseenv['as.POSIXct']( 302 rinterface.FloatSexpVector(ts), 303 origin=rinterface.StrSexpVector(('1970-01-01',)) 304 ) 305 306 # Convert R POSIXct vector to pandas-compatible vector 307 with localconverter(default_converter + rpyp.converter) as cv: 308 py_time = robjects.conversion.rpy2py(r_time) 309 310 # Check that the round trip did not introduce changes 311 for expected, obtained in zip(dt, py_time): 312 assert expected == obtained.to_pydatetime() 313 314 # Try with NA. 315 r_time[1] = rinterface.na_values.NA_Real 316 # Convert R POSIXct vector to pandas-compatible vector 317 with localconverter(default_converter + rpyp.converter) as cv: 318 py_time = robjects.conversion.rpy2py(r_time) 319 320 assert py_time[1] is pandas.NaT 321 322 def test_posixct_in_dataframe_to_pandas(self): 323 tzone = robjects.vectors.get_timezone() 324 dt = [datetime(1960, 5, 2), 325 datetime(1970, 6, 3), 326 datetime(2012, 7, 1)] 327 dt = [x.replace(tzinfo=tzone) for x in dt] 328 # fix the time 329 ts = [x.timestamp() for x in dt] 330 # Create an R data.frame with a posixct_vector. 331 r_dataf = robjects.vectors.DataFrame({ 332 'mydate': robjects.baseenv['as.POSIXct']( 333 rinterface.FloatSexpVector(ts), 334 origin=rinterface.StrSexpVector(('1970-01-01',)) 335 )}) 336 337 # Convert R POSIXct vector to pandas-compatible vector 338 with localconverter(default_converter + rpyp.converter): 339 py_dataf = robjects.conversion.rpy2py(r_dataf) 340 assert pandas.core.dtypes.common.is_datetime64_any_dtype(py_dataf['mydate']) 341 342 def test_repr(self): 343 # this should go to testVector, with other tests for repr() 344 l = (('b', numpy.array([True, False, True], dtype=numpy.bool_)), 345 ('i', numpy.array([1, 2, 3], dtype="i")), 346 ('f', numpy.array([1, 2, 3], dtype="f")), 347 ('s', numpy.array(["a", "b", "c"], dtype="S")), 348 ('u', numpy.array([u"a", u"b", u"c"], dtype="U"))) 349 od = OrderedDict(l) 350 pd_df = pandas.core.frame.DataFrame(od) 351 with localconverter(default_converter + rpyp.converter) as cv: 352 rp_df = robjects.conversion.py2rpy(pd_df) 353 s = repr(rp_df) # used to fail with a TypeError. 354 s = s.split('\n') 355 repr_str = ('[BoolSex..., IntSexp..., FloatSe..., ' 356 'ByteSex..., StrSexp...]') 357 assert repr_str == s[2].strip() 358 359 # Try again with the conversion still active. 360 with localconverter(default_converter + rpyp.converter) as cv: 361 rp_df = robjects.conversion.py2rpy(pd_df) 362 s = repr(rp_df) # used to fail with a TypeError. 363 s = s.split('\n') 364 assert repr_str == s[2].strip() 365 366 def test_ri2pandas(self): 367 rdataf = robjects.r('data.frame(a=1:2, ' 368 ' b=I(c("a", "b")), ' 369 ' c=c("a", "b"))') 370 with localconverter(default_converter + rpyp.converter) as cv: 371 pandas_df = robjects.conversion.rpy2py(rdataf) 372 373 assert isinstance(pandas_df, pandas.DataFrame) 374 assert ('a', 'b', 'c') == tuple(pandas_df.keys()) 375 assert pandas_df['a'].dtype in (numpy.dtype('int32'), 376 numpy.dtype('int64')) 377 assert pandas_df['b'].dtype == numpy.dtype('O') 378 assert isinstance(pandas_df['c'].dtype, 379 pandas.api.types.CategoricalDtype) 380 381 def test_ri2pandas(self): 382 rdataf = robjects.r('data.frame(a=1:2, ' 383 ' row.names=c("a", "b"))') 384 with localconverter(default_converter + rpyp.converter) as cv: 385 pandas_df = cv.rpy2py(rdataf) 386 assert all(x == y for x, y in zip(rdataf.rownames, pandas_df.index)) 387 388 def test_ri2pandas_issue207(self): 389 d = robjects.DataFrame({'x': 1}) 390 with localconverter(default_converter + rpyp.converter) as cv: 391 try: 392 ok = True 393 robjects.globalenv['d'] = d 394 except ValueError: 395 ok = False 396 finally: 397 if 'd' in robjects.globalenv: 398 del(robjects.globalenv['d']) 399 assert ok 400