1import numpy as np 2import pytest 3 4from pandas import DataFrame, Series 5import pandas._testing as tm 6 7 8class TestDataFrameClip: 9 def test_clip(self, float_frame): 10 median = float_frame.median().median() 11 original = float_frame.copy() 12 13 double = float_frame.clip(upper=median, lower=median) 14 assert not (double.values != median).any() 15 16 # Verify that float_frame was not changed inplace 17 assert (float_frame.values == original.values).all() 18 19 def test_inplace_clip(self, float_frame): 20 # GH#15388 21 median = float_frame.median().median() 22 frame_copy = float_frame.copy() 23 24 return_value = frame_copy.clip(upper=median, lower=median, inplace=True) 25 assert return_value is None 26 assert not (frame_copy.values != median).any() 27 28 def test_dataframe_clip(self): 29 # GH#2747 30 df = DataFrame(np.random.randn(1000, 2)) 31 32 for lb, ub in [(-1, 1), (1, -1)]: 33 clipped_df = df.clip(lb, ub) 34 35 lb, ub = min(lb, ub), max(ub, lb) 36 lb_mask = df.values <= lb 37 ub_mask = df.values >= ub 38 mask = ~lb_mask & ~ub_mask 39 assert (clipped_df.values[lb_mask] == lb).all() 40 assert (clipped_df.values[ub_mask] == ub).all() 41 assert (clipped_df.values[mask] == df.values[mask]).all() 42 43 def test_clip_mixed_numeric(self): 44 # TODO(jreback) 45 # clip on mixed integer or floats 46 # with integer clippers coerces to float 47 df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]}) 48 result = df.clip(1, 2) 49 expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]}) 50 tm.assert_frame_equal(result, expected, check_like=True) 51 52 # GH#24162, clipping now preserves numeric types per column 53 df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"]) 54 expected = df.dtypes 55 result = df.clip(upper=3).dtypes 56 tm.assert_series_equal(result, expected) 57 58 @pytest.mark.parametrize("inplace", [True, False]) 59 def test_clip_against_series(self, inplace): 60 # GH#6966 61 62 df = DataFrame(np.random.randn(1000, 2)) 63 lb = Series(np.random.randn(1000)) 64 ub = lb + 1 65 66 original = df.copy() 67 clipped_df = df.clip(lb, ub, axis=0, inplace=inplace) 68 69 if inplace: 70 clipped_df = df 71 72 for i in range(2): 73 lb_mask = original.iloc[:, i] <= lb 74 ub_mask = original.iloc[:, i] >= ub 75 mask = ~lb_mask & ~ub_mask 76 77 result = clipped_df.loc[lb_mask, i] 78 tm.assert_series_equal(result, lb[lb_mask], check_names=False) 79 assert result.name == i 80 81 result = clipped_df.loc[ub_mask, i] 82 tm.assert_series_equal(result, ub[ub_mask], check_names=False) 83 assert result.name == i 84 85 tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i]) 86 87 @pytest.mark.parametrize("inplace", [True, False]) 88 @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])]) 89 @pytest.mark.parametrize( 90 "axis,res", 91 [ 92 (0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]), 93 (1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]), 94 ], 95 ) 96 def test_clip_against_list_like(self, simple_frame, inplace, lower, axis, res): 97 # GH#15390 98 original = simple_frame.copy(deep=True) 99 100 result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace) 101 102 expected = DataFrame(res, columns=original.columns, index=original.index) 103 if inplace: 104 result = original 105 tm.assert_frame_equal(result, expected, check_exact=True) 106 107 @pytest.mark.parametrize("axis", [0, 1, None]) 108 def test_clip_against_frame(self, axis): 109 df = DataFrame(np.random.randn(1000, 2)) 110 lb = DataFrame(np.random.randn(1000, 2)) 111 ub = lb + 1 112 113 clipped_df = df.clip(lb, ub, axis=axis) 114 115 lb_mask = df <= lb 116 ub_mask = df >= ub 117 mask = ~lb_mask & ~ub_mask 118 119 tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask]) 120 tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask]) 121 tm.assert_frame_equal(clipped_df[mask], df[mask]) 122 123 def test_clip_against_unordered_columns(self): 124 # GH#20911 125 df1 = DataFrame(np.random.randn(1000, 4), columns=["A", "B", "C", "D"]) 126 df2 = DataFrame(np.random.randn(1000, 4), columns=["D", "A", "B", "C"]) 127 df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"]) 128 result_upper = df1.clip(lower=0, upper=df2) 129 expected_upper = df1.clip(lower=0, upper=df2[df1.columns]) 130 result_lower = df1.clip(lower=df3, upper=3) 131 expected_lower = df1.clip(lower=df3[df1.columns], upper=3) 132 result_lower_upper = df1.clip(lower=df3, upper=df2) 133 expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns]) 134 tm.assert_frame_equal(result_upper, expected_upper) 135 tm.assert_frame_equal(result_lower, expected_lower) 136 tm.assert_frame_equal(result_lower_upper, expected_lower_upper) 137 138 def test_clip_with_na_args(self, float_frame): 139 """Should process np.nan argument as None """ 140 # GH#17276 141 tm.assert_frame_equal(float_frame.clip(np.nan), float_frame) 142 tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame) 143 144 # GH#19992 145 df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}) 146 147 result = df.clip(lower=[4, 5, np.nan], axis=0) 148 expected = DataFrame( 149 {"col_0": [4, 5, np.nan], "col_1": [4, 5, np.nan], "col_2": [7, 8, np.nan]} 150 ) 151 tm.assert_frame_equal(result, expected) 152 153 result = df.clip(lower=[4, 5, np.nan], axis=1) 154 expected = DataFrame( 155 {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [np.nan, np.nan, np.nan]} 156 ) 157 tm.assert_frame_equal(result, expected) 158