217 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			217 lines
		
	
	
		
			5.9 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from datetime import datetime
 | |
| 
 | |
| import numpy as np
 | |
| import pytest
 | |
| 
 | |
| import pandas as pd
 | |
| from pandas import (
 | |
|     DataFrame,
 | |
|     Series,
 | |
|     Timestamp,
 | |
|     date_range,
 | |
| )
 | |
| import pandas._testing as tm
 | |
| 
 | |
| from pandas.tseries.offsets import BDay
 | |
| 
 | |
| 
 | |
| def test_map(float_frame):
 | |
|     result = float_frame.map(lambda x: x * 2)
 | |
|     tm.assert_frame_equal(result, float_frame * 2)
 | |
|     float_frame.map(type)
 | |
| 
 | |
|     # GH 465: function returning tuples
 | |
|     result = float_frame.map(lambda x: (x, x))["A"].iloc[0]
 | |
|     assert isinstance(result, tuple)
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("val", [1, 1.0])
 | |
| def test_map_float_object_conversion(val):
 | |
|     # GH 2909: object conversion to float in constructor?
 | |
|     df = DataFrame(data=[val, "a"])
 | |
|     result = df.map(lambda x: x).dtypes[0]
 | |
|     assert result == object
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize("na_action", [None, "ignore"])
 | |
| def test_map_keeps_dtype(na_action):
 | |
|     # GH52219
 | |
|     arr = Series(["a", np.nan, "b"])
 | |
|     sparse_arr = arr.astype(pd.SparseDtype(object))
 | |
|     df = DataFrame(data={"a": arr, "b": sparse_arr})
 | |
| 
 | |
|     def func(x):
 | |
|         return str.upper(x) if not pd.isna(x) else x
 | |
| 
 | |
|     result = df.map(func, na_action=na_action)
 | |
| 
 | |
|     expected_sparse = pd.array(["A", np.nan, "B"], dtype=pd.SparseDtype(object))
 | |
|     expected_arr = expected_sparse.astype(object)
 | |
|     expected = DataFrame({"a": expected_arr, "b": expected_sparse})
 | |
| 
 | |
|     tm.assert_frame_equal(result, expected)
 | |
| 
 | |
|     result_empty = df.iloc[:0, :].map(func, na_action=na_action)
 | |
|     expected_empty = expected.iloc[:0, :]
 | |
|     tm.assert_frame_equal(result_empty, expected_empty)
 | |
| 
 | |
| 
 | |
| def test_map_str():
 | |
|     # GH 2786
 | |
|     df = DataFrame(np.random.default_rng(2).random((3, 4)))
 | |
|     df2 = df.copy()
 | |
|     cols = ["a", "a", "a", "a"]
 | |
|     df.columns = cols
 | |
| 
 | |
|     expected = df2.map(str)
 | |
|     expected.columns = cols
 | |
|     result = df.map(str)
 | |
|     tm.assert_frame_equal(result, expected)
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize(
 | |
|     "col, val",
 | |
|     [["datetime", Timestamp("20130101")], ["timedelta", pd.Timedelta("1 min")]],
 | |
| )
 | |
| def test_map_datetimelike(col, val):
 | |
|     # datetime/timedelta
 | |
|     df = DataFrame(np.random.default_rng(2).random((3, 4)))
 | |
|     df[col] = val
 | |
|     result = df.map(str)
 | |
|     assert result.loc[0, col] == str(df.loc[0, col])
 | |
| 
 | |
| 
 | |
| @pytest.mark.parametrize(
 | |
|     "expected",
 | |
|     [
 | |
|         DataFrame(),
 | |
|         DataFrame(columns=list("ABC")),
 | |
|         DataFrame(index=list("ABC")),
 | |
|         DataFrame({"A": [], "B": [], "C": []}),
 | |
|     ],
 | |
| )
 | |
| @pytest.mark.parametrize("func", [round, lambda x: x])
 | |
| def test_map_empty(expected, func):
 | |
|     # GH 8222
 | |
|     result = expected.map(func)
 | |
|     tm.assert_frame_equal(result, expected)
 | |
| 
 | |
| 
 | |
| def test_map_kwargs():
 | |
|     # GH 40652
 | |
|     result = DataFrame([[1, 2], [3, 4]]).map(lambda x, y: x + y, y=2)
 | |
|     expected = DataFrame([[3, 4], [5, 6]])
 | |
|     tm.assert_frame_equal(result, expected)
 | |
| 
 | |
| 
 | |
| def test_map_na_ignore(float_frame):
 | |
|     # GH 23803
 | |
|     strlen_frame = float_frame.map(lambda x: len(str(x)))
 | |
|     float_frame_with_na = float_frame.copy()
 | |
|     mask = np.random.default_rng(2).integers(0, 2, size=float_frame.shape, dtype=bool)
 | |
|     float_frame_with_na[mask] = pd.NA
 | |
|     strlen_frame_na_ignore = float_frame_with_na.map(
 | |
|         lambda x: len(str(x)), na_action="ignore"
 | |
|     )
 | |
|     # Set float64 type to avoid upcast when setting NA below
 | |
|     strlen_frame_with_na = strlen_frame.copy().astype("float64")
 | |
|     strlen_frame_with_na[mask] = pd.NA
 | |
|     tm.assert_frame_equal(strlen_frame_na_ignore, strlen_frame_with_na)
 | |
| 
 | |
| 
 | |
| def test_map_box_timestamps():
 | |
|     # GH 2689, GH 2627
 | |
|     ser = Series(date_range("1/1/2000", periods=10))
 | |
| 
 | |
|     def func(x):
 | |
|         return (x.hour, x.day, x.month)
 | |
| 
 | |
|     # it works!
 | |
|     DataFrame(ser).map(func)
 | |
| 
 | |
| 
 | |
| def test_map_box():
 | |
|     # ufunc will not be boxed. Same test cases as the test_map_box
 | |
|     df = DataFrame(
 | |
|         {
 | |
|             "a": [Timestamp("2011-01-01"), Timestamp("2011-01-02")],
 | |
|             "b": [
 | |
|                 Timestamp("2011-01-01", tz="US/Eastern"),
 | |
|                 Timestamp("2011-01-02", tz="US/Eastern"),
 | |
|             ],
 | |
|             "c": [pd.Timedelta("1 days"), pd.Timedelta("2 days")],
 | |
|             "d": [
 | |
|                 pd.Period("2011-01-01", freq="M"),
 | |
|                 pd.Period("2011-01-02", freq="M"),
 | |
|             ],
 | |
|         }
 | |
|     )
 | |
| 
 | |
|     result = df.map(lambda x: type(x).__name__)
 | |
|     expected = DataFrame(
 | |
|         {
 | |
|             "a": ["Timestamp", "Timestamp"],
 | |
|             "b": ["Timestamp", "Timestamp"],
 | |
|             "c": ["Timedelta", "Timedelta"],
 | |
|             "d": ["Period", "Period"],
 | |
|         }
 | |
|     )
 | |
|     tm.assert_frame_equal(result, expected)
 | |
| 
 | |
| 
 | |
| def test_frame_map_dont_convert_datetime64():
 | |
|     df = DataFrame({"x1": [datetime(1996, 1, 1)]})
 | |
| 
 | |
|     df = df.map(lambda x: x + BDay())
 | |
|     df = df.map(lambda x: x + BDay())
 | |
| 
 | |
|     result = df.x1.dtype
 | |
|     assert result == "M8[ns]"
 | |
| 
 | |
| 
 | |
| def test_map_function_runs_once():
 | |
|     df = DataFrame({"a": [1, 2, 3]})
 | |
|     values = []  # Save values function is applied to
 | |
| 
 | |
|     def reducing_function(val):
 | |
|         values.append(val)
 | |
| 
 | |
|     def non_reducing_function(val):
 | |
|         values.append(val)
 | |
|         return val
 | |
| 
 | |
|     for func in [reducing_function, non_reducing_function]:
 | |
|         del values[:]
 | |
| 
 | |
|         df.map(func)
 | |
|         assert values == df.a.to_list()
 | |
| 
 | |
| 
 | |
| def test_map_type():
 | |
|     # GH 46719
 | |
|     df = DataFrame(
 | |
|         {"col1": [3, "string", float], "col2": [0.25, datetime(2020, 1, 1), np.nan]},
 | |
|         index=["a", "b", "c"],
 | |
|     )
 | |
| 
 | |
|     result = df.map(type)
 | |
|     expected = DataFrame(
 | |
|         {"col1": [int, str, type], "col2": [float, datetime, float]},
 | |
|         index=["a", "b", "c"],
 | |
|     )
 | |
|     tm.assert_frame_equal(result, expected)
 | |
| 
 | |
| 
 | |
| def test_map_invalid_na_action(float_frame):
 | |
|     # GH 23803
 | |
|     with pytest.raises(ValueError, match="na_action must be .*Got 'abc'"):
 | |
|         float_frame.map(lambda x: len(str(x)), na_action="abc")
 | |
| 
 | |
| 
 | |
| def test_applymap_deprecated():
 | |
|     # GH52353
 | |
|     df = DataFrame({"a": [1, 2, 3]})
 | |
|     msg = "DataFrame.applymap has been deprecated. Use DataFrame.map instead."
 | |
|     with tm.assert_produces_warning(FutureWarning, match=msg):
 | |
|         df.applymap(lambda x: x)
 |