200 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			200 lines
		
	
	
		
			7.4 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| import numpy as np
 | |
| import pytest
 | |
| 
 | |
| from pandas import (
 | |
|     DataFrame,
 | |
|     Series,
 | |
| )
 | |
| import pandas._testing as tm
 | |
| 
 | |
| 
 | |
| class TestDataFrameClip:
 | |
|     def test_clip(self, float_frame):
 | |
|         median = float_frame.median().median()
 | |
|         original = float_frame.copy()
 | |
| 
 | |
|         double = float_frame.clip(upper=median, lower=median)
 | |
|         assert not (double.values != median).any()
 | |
| 
 | |
|         # Verify that float_frame was not changed inplace
 | |
|         assert (float_frame.values == original.values).all()
 | |
| 
 | |
|     def test_inplace_clip(self, float_frame):
 | |
|         # GH#15388
 | |
|         median = float_frame.median().median()
 | |
|         frame_copy = float_frame.copy()
 | |
| 
 | |
|         return_value = frame_copy.clip(upper=median, lower=median, inplace=True)
 | |
|         assert return_value is None
 | |
|         assert not (frame_copy.values != median).any()
 | |
| 
 | |
|     def test_dataframe_clip(self):
 | |
|         # GH#2747
 | |
|         df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
 | |
| 
 | |
|         for lb, ub in [(-1, 1), (1, -1)]:
 | |
|             clipped_df = df.clip(lb, ub)
 | |
| 
 | |
|             lb, ub = min(lb, ub), max(ub, lb)
 | |
|             lb_mask = df.values <= lb
 | |
|             ub_mask = df.values >= ub
 | |
|             mask = ~lb_mask & ~ub_mask
 | |
|             assert (clipped_df.values[lb_mask] == lb).all()
 | |
|             assert (clipped_df.values[ub_mask] == ub).all()
 | |
|             assert (clipped_df.values[mask] == df.values[mask]).all()
 | |
| 
 | |
|     def test_clip_mixed_numeric(self):
 | |
|         # clip on mixed integer or floats
 | |
|         # GH#24162, clipping now preserves numeric types per column
 | |
|         df = DataFrame({"A": [1, 2, 3], "B": [1.0, np.nan, 3.0]})
 | |
|         result = df.clip(1, 2)
 | |
|         expected = DataFrame({"A": [1, 2, 2], "B": [1.0, np.nan, 2.0]})
 | |
|         tm.assert_frame_equal(result, expected)
 | |
| 
 | |
|         df = DataFrame([[1, 2, 3.4], [3, 4, 5.6]], columns=["foo", "bar", "baz"])
 | |
|         expected = df.dtypes
 | |
|         result = df.clip(upper=3).dtypes
 | |
|         tm.assert_series_equal(result, expected)
 | |
| 
 | |
|     @pytest.mark.parametrize("inplace", [True, False])
 | |
|     def test_clip_against_series(self, inplace):
 | |
|         # GH#6966
 | |
| 
 | |
|         df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
 | |
|         lb = Series(np.random.default_rng(2).standard_normal(1000))
 | |
|         ub = lb + 1
 | |
| 
 | |
|         original = df.copy()
 | |
|         clipped_df = df.clip(lb, ub, axis=0, inplace=inplace)
 | |
| 
 | |
|         if inplace:
 | |
|             clipped_df = df
 | |
| 
 | |
|         for i in range(2):
 | |
|             lb_mask = original.iloc[:, i] <= lb
 | |
|             ub_mask = original.iloc[:, i] >= ub
 | |
|             mask = ~lb_mask & ~ub_mask
 | |
| 
 | |
|             result = clipped_df.loc[lb_mask, i]
 | |
|             tm.assert_series_equal(result, lb[lb_mask], check_names=False)
 | |
|             assert result.name == i
 | |
| 
 | |
|             result = clipped_df.loc[ub_mask, i]
 | |
|             tm.assert_series_equal(result, ub[ub_mask], check_names=False)
 | |
|             assert result.name == i
 | |
| 
 | |
|             tm.assert_series_equal(clipped_df.loc[mask, i], df.loc[mask, i])
 | |
| 
 | |
|     @pytest.mark.parametrize("inplace", [True, False])
 | |
|     @pytest.mark.parametrize("lower", [[2, 3, 4], np.asarray([2, 3, 4])])
 | |
|     @pytest.mark.parametrize(
 | |
|         "axis,res",
 | |
|         [
 | |
|             (0, [[2.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 7.0, 7.0]]),
 | |
|             (1, [[2.0, 3.0, 4.0], [4.0, 5.0, 6.0], [5.0, 6.0, 7.0]]),
 | |
|         ],
 | |
|     )
 | |
|     def test_clip_against_list_like(self, inplace, lower, axis, res):
 | |
|         # GH#15390
 | |
|         arr = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]])
 | |
| 
 | |
|         original = DataFrame(
 | |
|             arr, columns=["one", "two", "three"], index=["a", "b", "c"]
 | |
|         )
 | |
| 
 | |
|         result = original.clip(lower=lower, upper=[5, 6, 7], axis=axis, inplace=inplace)
 | |
| 
 | |
|         expected = DataFrame(res, columns=original.columns, index=original.index)
 | |
|         if inplace:
 | |
|             result = original
 | |
|         tm.assert_frame_equal(result, expected, check_exact=True)
 | |
| 
 | |
|     @pytest.mark.parametrize("axis", [0, 1, None])
 | |
|     def test_clip_against_frame(self, axis):
 | |
|         df = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
 | |
|         lb = DataFrame(np.random.default_rng(2).standard_normal((1000, 2)))
 | |
|         ub = lb + 1
 | |
| 
 | |
|         clipped_df = df.clip(lb, ub, axis=axis)
 | |
| 
 | |
|         lb_mask = df <= lb
 | |
|         ub_mask = df >= ub
 | |
|         mask = ~lb_mask & ~ub_mask
 | |
| 
 | |
|         tm.assert_frame_equal(clipped_df[lb_mask], lb[lb_mask])
 | |
|         tm.assert_frame_equal(clipped_df[ub_mask], ub[ub_mask])
 | |
|         tm.assert_frame_equal(clipped_df[mask], df[mask])
 | |
| 
 | |
|     def test_clip_against_unordered_columns(self):
 | |
|         # GH#20911
 | |
|         df1 = DataFrame(
 | |
|             np.random.default_rng(2).standard_normal((1000, 4)),
 | |
|             columns=["A", "B", "C", "D"],
 | |
|         )
 | |
|         df2 = DataFrame(
 | |
|             np.random.default_rng(2).standard_normal((1000, 4)),
 | |
|             columns=["D", "A", "B", "C"],
 | |
|         )
 | |
|         df3 = DataFrame(df2.values - 1, columns=["B", "D", "C", "A"])
 | |
|         result_upper = df1.clip(lower=0, upper=df2)
 | |
|         expected_upper = df1.clip(lower=0, upper=df2[df1.columns])
 | |
|         result_lower = df1.clip(lower=df3, upper=3)
 | |
|         expected_lower = df1.clip(lower=df3[df1.columns], upper=3)
 | |
|         result_lower_upper = df1.clip(lower=df3, upper=df2)
 | |
|         expected_lower_upper = df1.clip(lower=df3[df1.columns], upper=df2[df1.columns])
 | |
|         tm.assert_frame_equal(result_upper, expected_upper)
 | |
|         tm.assert_frame_equal(result_lower, expected_lower)
 | |
|         tm.assert_frame_equal(result_lower_upper, expected_lower_upper)
 | |
| 
 | |
|     def test_clip_with_na_args(self, float_frame):
 | |
|         """Should process np.nan argument as None"""
 | |
|         # GH#17276
 | |
|         tm.assert_frame_equal(float_frame.clip(np.nan), float_frame)
 | |
|         tm.assert_frame_equal(float_frame.clip(upper=np.nan, lower=np.nan), float_frame)
 | |
| 
 | |
|         # GH#19992 and adjusted in GH#40420
 | |
|         df = DataFrame({"col_0": [1, 2, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]})
 | |
| 
 | |
|         msg = "Downcasting behavior in Series and DataFrame methods 'where'"
 | |
|         # TODO: avoid this warning here?  seems like we should never be upcasting
 | |
|         #  in the first place?
 | |
|         with tm.assert_produces_warning(FutureWarning, match=msg):
 | |
|             result = df.clip(lower=[4, 5, np.nan], axis=0)
 | |
|         expected = DataFrame(
 | |
|             {"col_0": [4, 5, 3], "col_1": [4, 5, 6], "col_2": [7, 8, 9]}
 | |
|         )
 | |
|         tm.assert_frame_equal(result, expected)
 | |
| 
 | |
|         result = df.clip(lower=[4, 5, np.nan], axis=1)
 | |
|         expected = DataFrame(
 | |
|             {"col_0": [4, 4, 4], "col_1": [5, 5, 6], "col_2": [7, 8, 9]}
 | |
|         )
 | |
|         tm.assert_frame_equal(result, expected)
 | |
| 
 | |
|         # GH#40420
 | |
|         data = {"col_0": [9, -3, 0, -1, 5], "col_1": [-2, -7, 6, 8, -5]}
 | |
|         df = DataFrame(data)
 | |
|         t = Series([2, -4, np.nan, 6, 3])
 | |
|         with tm.assert_produces_warning(FutureWarning, match=msg):
 | |
|             result = df.clip(lower=t, axis=0)
 | |
|         expected = DataFrame({"col_0": [9, -3, 0, 6, 5], "col_1": [2, -4, 6, 8, 3]})
 | |
|         tm.assert_frame_equal(result, expected)
 | |
| 
 | |
|     def test_clip_int_data_with_float_bound(self):
 | |
|         # GH51472
 | |
|         df = DataFrame({"a": [1, 2, 3]})
 | |
|         result = df.clip(lower=1.5)
 | |
|         expected = DataFrame({"a": [1.5, 2.0, 3.0]})
 | |
|         tm.assert_frame_equal(result, expected)
 | |
| 
 | |
|     def test_clip_with_list_bound(self):
 | |
|         # GH#54817
 | |
|         df = DataFrame([1, 5])
 | |
|         expected = DataFrame([3, 5])
 | |
|         result = df.clip([3])
 | |
|         tm.assert_frame_equal(result, expected)
 | |
| 
 | |
|         expected = DataFrame([1, 3])
 | |
|         result = df.clip(upper=[3])
 | |
|         tm.assert_frame_equal(result, expected)
 |