1import math 2 3import numpy as np 4import pytest 5 6import pandas.util._test_decorators as td 7 8import pandas as pd 9from pandas import Series, isna 10import pandas._testing as tm 11 12 13class TestSeriesCov: 14 def test_cov(self, datetime_series): 15 # full overlap 16 tm.assert_almost_equal( 17 datetime_series.cov(datetime_series), datetime_series.std() ** 2 18 ) 19 20 # partial overlap 21 tm.assert_almost_equal( 22 datetime_series[:15].cov(datetime_series[5:]), 23 datetime_series[5:15].std() ** 2, 24 ) 25 26 # No overlap 27 assert np.isnan(datetime_series[::2].cov(datetime_series[1::2])) 28 29 # all NA 30 cp = datetime_series[:10].copy() 31 cp[:] = np.nan 32 assert isna(cp.cov(cp)) 33 34 # min_periods 35 assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12)) 36 37 ts1 = datetime_series[:15].reindex(datetime_series.index) 38 ts2 = datetime_series[5:].reindex(datetime_series.index) 39 assert isna(ts1.cov(ts2, min_periods=12)) 40 41 @pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3]) 42 def test_cov_ddof(self, test_ddof): 43 # GH#34611 44 np_array1 = np.random.rand(10) 45 np_array2 = np.random.rand(10) 46 47 s1 = Series(np_array1) 48 s2 = Series(np_array2) 49 50 result = s1.cov(s2, ddof=test_ddof) 51 expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1] 52 assert math.isclose(expected, result) 53 54 55class TestSeriesCorr: 56 @td.skip_if_no_scipy 57 def test_corr(self, datetime_series): 58 import scipy.stats as stats 59 60 # full overlap 61 tm.assert_almost_equal(datetime_series.corr(datetime_series), 1) 62 63 # partial overlap 64 tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1) 65 66 assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12)) 67 68 ts1 = datetime_series[:15].reindex(datetime_series.index) 69 ts2 = datetime_series[5:].reindex(datetime_series.index) 70 assert isna(ts1.corr(ts2, min_periods=12)) 71 72 # No overlap 73 assert np.isnan(datetime_series[::2].corr(datetime_series[1::2])) 74 75 # all NA 76 cp = datetime_series[:10].copy() 77 cp[:] = np.nan 78 assert isna(cp.corr(cp)) 79 80 A = tm.makeTimeSeries() 81 B = tm.makeTimeSeries() 82 result = A.corr(B) 83 expected, _ = stats.pearsonr(A, B) 84 tm.assert_almost_equal(result, expected) 85 86 @td.skip_if_no_scipy 87 def test_corr_rank(self): 88 import scipy.stats as stats 89 90 # kendall and spearman 91 A = tm.makeTimeSeries() 92 B = tm.makeTimeSeries() 93 A[-5:] = A[:5] 94 result = A.corr(B, method="kendall") 95 expected = stats.kendalltau(A, B)[0] 96 tm.assert_almost_equal(result, expected) 97 98 result = A.corr(B, method="spearman") 99 expected = stats.spearmanr(A, B)[0] 100 tm.assert_almost_equal(result, expected) 101 102 # results from R 103 A = Series( 104 [ 105 -0.89926396, 106 0.94209606, 107 -1.03289164, 108 -0.95445587, 109 0.76910310, 110 -0.06430576, 111 -2.09704447, 112 0.40660407, 113 -0.89926396, 114 0.94209606, 115 ] 116 ) 117 B = Series( 118 [ 119 -1.01270225, 120 -0.62210117, 121 -1.56895827, 122 0.59592943, 123 -0.01680292, 124 1.17258718, 125 -1.06009347, 126 -0.10222060, 127 -0.89076239, 128 0.89372375, 129 ] 130 ) 131 kexp = 0.4319297 132 sexp = 0.5853767 133 tm.assert_almost_equal(A.corr(B, method="kendall"), kexp) 134 tm.assert_almost_equal(A.corr(B, method="spearman"), sexp) 135 136 def test_corr_invalid_method(self): 137 # GH PR #22298 138 s1 = Series(np.random.randn(10)) 139 s2 = Series(np.random.randn(10)) 140 msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, " 141 with pytest.raises(ValueError, match=msg): 142 s1.corr(s2, method="____") 143 144 def test_corr_callable_method(self, datetime_series): 145 # simple correlation example 146 # returns 1 if exact equality, 0 otherwise 147 my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0 148 149 # simple example 150 s1 = Series([1, 2, 3, 4, 5]) 151 s2 = Series([5, 4, 3, 2, 1]) 152 expected = 0 153 tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected) 154 155 # full overlap 156 tm.assert_almost_equal( 157 datetime_series.corr(datetime_series, method=my_corr), 1.0 158 ) 159 160 # partial overlap 161 tm.assert_almost_equal( 162 datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0 163 ) 164 165 # No overlap 166 assert np.isnan( 167 datetime_series[::2].corr(datetime_series[1::2], method=my_corr) 168 ) 169 170 # dataframe example 171 df = pd.DataFrame([s1, s2]) 172 expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}]) 173 tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected) 174