done
This commit is contained in:
@ -0,0 +1,7 @@
|
||||
"""
|
||||
Test files dedicated to individual (stand-alone) Series methods
|
||||
|
||||
Ideally these files/tests should correspond 1-to-1 with tests.frame.methods
|
||||
|
||||
These may also present opportunities for sharing/de-duplicating test code.
|
||||
"""
|
||||
@ -0,0 +1,41 @@
|
||||
import pytest
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_add_prefix_suffix(string_series):
|
||||
with_prefix = string_series.add_prefix("foo#")
|
||||
expected = Index([f"foo#{c}" for c in string_series.index])
|
||||
tm.assert_index_equal(with_prefix.index, expected)
|
||||
|
||||
with_suffix = string_series.add_suffix("#foo")
|
||||
expected = Index([f"{c}#foo" for c in string_series.index])
|
||||
tm.assert_index_equal(with_suffix.index, expected)
|
||||
|
||||
with_pct_prefix = string_series.add_prefix("%")
|
||||
expected = Index([f"%{c}" for c in string_series.index])
|
||||
tm.assert_index_equal(with_pct_prefix.index, expected)
|
||||
|
||||
with_pct_suffix = string_series.add_suffix("%")
|
||||
expected = Index([f"{c}%" for c in string_series.index])
|
||||
tm.assert_index_equal(with_pct_suffix.index, expected)
|
||||
|
||||
|
||||
def test_add_prefix_suffix_axis(string_series):
|
||||
# GH 47819
|
||||
with_prefix = string_series.add_prefix("foo#", axis=0)
|
||||
expected = Index([f"foo#{c}" for c in string_series.index])
|
||||
tm.assert_index_equal(with_prefix.index, expected)
|
||||
|
||||
with_pct_suffix = string_series.add_suffix("#foo", axis=0)
|
||||
expected = Index([f"{c}#foo" for c in string_series.index])
|
||||
tm.assert_index_equal(with_pct_suffix.index, expected)
|
||||
|
||||
|
||||
def test_add_prefix_suffix_invalid_axis(string_series):
|
||||
with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
|
||||
string_series.add_prefix("foo#", axis=1)
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 1 for object type Series"):
|
||||
string_series.add_suffix("foo#", axis=1)
|
||||
@ -0,0 +1,262 @@
|
||||
from datetime import timezone
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_slice,second_slice",
|
||||
[
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fill", [None, -1])
|
||||
def test_align(datetime_series, first_slice, second_slice, join_type, fill):
|
||||
a = datetime_series[slice(*first_slice)]
|
||||
b = datetime_series[slice(*second_slice)]
|
||||
|
||||
aa, ab = a.align(b, join=join_type, fill_value=fill)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
if fill is not None:
|
||||
diff_a = aa.index.difference(join_index)
|
||||
diff_b = ab.index.difference(join_index)
|
||||
if len(diff_a) > 0:
|
||||
assert (aa.reindex(diff_a) == fill).all()
|
||||
if len(diff_b) > 0:
|
||||
assert (ab.reindex(diff_b) == fill).all()
|
||||
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
if fill is not None:
|
||||
ea = ea.fillna(fill)
|
||||
eb = eb.fillna(fill)
|
||||
|
||||
tm.assert_series_equal(aa, ea)
|
||||
tm.assert_series_equal(ab, eb)
|
||||
assert aa.name == "ts"
|
||||
assert ea.name == "ts"
|
||||
assert ab.name == "ts"
|
||||
assert eb.name == "ts"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first_slice,second_slice",
|
||||
[
|
||||
[[2, None], [None, -5]],
|
||||
[[None, 0], [None, -5]],
|
||||
[[None, -5], [None, 0]],
|
||||
[[None, 0], [None, 0]],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("method", ["pad", "bfill"])
|
||||
@pytest.mark.parametrize("limit", [None, 1])
|
||||
def test_align_fill_method(
|
||||
datetime_series, first_slice, second_slice, join_type, method, limit
|
||||
):
|
||||
a = datetime_series[slice(*first_slice)]
|
||||
b = datetime_series[slice(*second_slice)]
|
||||
|
||||
msg = (
|
||||
"The 'method', 'limit', and 'fill_axis' keywords in Series.align "
|
||||
"are deprecated"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
aa, ab = a.align(b, join=join_type, method=method, limit=limit)
|
||||
|
||||
join_index = a.index.join(b.index, how=join_type)
|
||||
ea = a.reindex(join_index)
|
||||
eb = b.reindex(join_index)
|
||||
|
||||
msg2 = "Series.fillna with 'method' is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
ea = ea.fillna(method=method, limit=limit)
|
||||
eb = eb.fillna(method=method, limit=limit)
|
||||
|
||||
tm.assert_series_equal(aa, ea)
|
||||
tm.assert_series_equal(ab, eb)
|
||||
|
||||
|
||||
def test_align_nocopy(datetime_series, using_copy_on_write):
|
||||
b = datetime_series[:5].copy()
|
||||
|
||||
# do copy
|
||||
a = datetime_series.copy()
|
||||
ra, _ = a.align(b, join="left")
|
||||
ra[:5] = 5
|
||||
assert not (a[:5] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = datetime_series.copy()
|
||||
ra, _ = a.align(b, join="left", copy=False)
|
||||
ra[:5] = 5
|
||||
if using_copy_on_write:
|
||||
assert not (a[:5] == 5).any()
|
||||
else:
|
||||
assert (a[:5] == 5).all()
|
||||
|
||||
# do copy
|
||||
a = datetime_series.copy()
|
||||
b = datetime_series[:5].copy()
|
||||
_, rb = a.align(b, join="right")
|
||||
rb[:3] = 5
|
||||
assert not (b[:3] == 5).any()
|
||||
|
||||
# do not copy
|
||||
a = datetime_series.copy()
|
||||
b = datetime_series[:5].copy()
|
||||
_, rb = a.align(b, join="right", copy=False)
|
||||
rb[:2] = 5
|
||||
if using_copy_on_write:
|
||||
assert not (b[:2] == 5).any()
|
||||
else:
|
||||
assert (b[:2] == 5).all()
|
||||
|
||||
|
||||
def test_align_same_index(datetime_series, using_copy_on_write):
|
||||
a, b = datetime_series.align(datetime_series, copy=False)
|
||||
if not using_copy_on_write:
|
||||
assert a.index is datetime_series.index
|
||||
assert b.index is datetime_series.index
|
||||
else:
|
||||
assert a.index.is_(datetime_series.index)
|
||||
assert b.index.is_(datetime_series.index)
|
||||
|
||||
a, b = datetime_series.align(datetime_series, copy=True)
|
||||
assert a.index is not datetime_series.index
|
||||
assert b.index is not datetime_series.index
|
||||
assert a.index.is_(datetime_series.index)
|
||||
assert b.index.is_(datetime_series.index)
|
||||
|
||||
|
||||
def test_align_multiindex():
|
||||
# GH 10665
|
||||
|
||||
midx = pd.MultiIndex.from_product(
|
||||
[range(2), range(3), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
idx = pd.Index(range(2), name="b")
|
||||
s1 = Series(np.arange(12, dtype="int64"), index=midx)
|
||||
s2 = Series(np.arange(2, dtype="int64"), index=idx)
|
||||
|
||||
# these must be the same results (but flipped)
|
||||
res1l, res1r = s1.align(s2, join="left")
|
||||
res2l, res2r = s2.align(s1, join="right")
|
||||
|
||||
expl = s1
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = Series([0, 0, 1, 1, np.nan, np.nan] * 2, index=midx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
res1l, res1r = s1.align(s2, join="right")
|
||||
res2l, res2r = s2.align(s1, join="left")
|
||||
|
||||
exp_idx = pd.MultiIndex.from_product(
|
||||
[range(2), range(2), range(2)], names=("a", "b", "c")
|
||||
)
|
||||
expl = Series([0, 1, 2, 3, 6, 7, 8, 9], index=exp_idx)
|
||||
tm.assert_series_equal(expl, res1l)
|
||||
tm.assert_series_equal(expl, res2r)
|
||||
expr = Series([0, 0, 1, 1] * 2, index=exp_idx)
|
||||
tm.assert_series_equal(expr, res1r)
|
||||
tm.assert_series_equal(expr, res2l)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["backfill", "bfill", "pad", "ffill", None])
|
||||
def test_align_with_dataframe_method(method):
|
||||
# GH31788
|
||||
ser = Series(range(3), index=range(3))
|
||||
df = pd.DataFrame(0.0, index=range(3), columns=range(3))
|
||||
|
||||
msg = (
|
||||
"The 'method', 'limit', and 'fill_axis' keywords in Series.align "
|
||||
"are deprecated"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result_ser, result_df = ser.align(df, method=method)
|
||||
tm.assert_series_equal(result_ser, ser)
|
||||
tm.assert_frame_equal(result_df, df)
|
||||
|
||||
|
||||
def test_align_dt64tzindex_mismatched_tzs():
|
||||
idx1 = date_range("2001", periods=5, freq="h", tz="US/Eastern")
|
||||
ser = Series(np.random.default_rng(2).standard_normal(len(idx1)), index=idx1)
|
||||
ser_central = ser.tz_convert("US/Central")
|
||||
# different timezones convert to UTC
|
||||
|
||||
new1, new2 = ser.align(ser_central)
|
||||
assert new1.index.tz is timezone.utc
|
||||
assert new2.index.tz is timezone.utc
|
||||
|
||||
|
||||
def test_align_periodindex(join_type):
|
||||
rng = period_range("1/1/2000", "1/1/2010", freq="Y")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
# TODO: assert something?
|
||||
ts.align(ts[::2], join=join_type)
|
||||
|
||||
|
||||
def test_align_stringindex(any_string_dtype):
|
||||
left = Series(range(3), index=pd.Index(["a", "b", "d"], dtype=any_string_dtype))
|
||||
right = Series(range(3), index=pd.Index(["a", "b", "c"], dtype=any_string_dtype))
|
||||
result_left, result_right = left.align(right)
|
||||
|
||||
expected_idx = pd.Index(["a", "b", "c", "d"], dtype=any_string_dtype)
|
||||
expected_left = Series([0, 1, np.nan, 2], index=expected_idx)
|
||||
expected_right = Series([0, 1, 2, np.nan], index=expected_idx)
|
||||
|
||||
tm.assert_series_equal(result_left, expected_left)
|
||||
tm.assert_series_equal(result_right, expected_right)
|
||||
|
||||
|
||||
def test_align_left_fewer_levels():
|
||||
# GH#45224
|
||||
left = Series([2], index=pd.MultiIndex.from_tuples([(1, 3)], names=["a", "c"]))
|
||||
right = Series(
|
||||
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
|
||||
)
|
||||
result_left, result_right = left.align(right)
|
||||
|
||||
expected_right = Series(
|
||||
[1], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
|
||||
)
|
||||
expected_left = Series(
|
||||
[2], index=pd.MultiIndex.from_tuples([(1, 3, 2)], names=["a", "c", "b"])
|
||||
)
|
||||
tm.assert_series_equal(result_left, expected_left)
|
||||
tm.assert_series_equal(result_right, expected_right)
|
||||
|
||||
|
||||
def test_align_left_different_named_levels():
|
||||
# GH#45224
|
||||
left = Series(
|
||||
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3)], names=["a", "d", "c"])
|
||||
)
|
||||
right = Series(
|
||||
[1], index=pd.MultiIndex.from_tuples([(1, 2, 3)], names=["a", "b", "c"])
|
||||
)
|
||||
result_left, result_right = left.align(right)
|
||||
|
||||
expected_left = Series(
|
||||
[2], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
|
||||
)
|
||||
expected_right = Series(
|
||||
[1], index=pd.MultiIndex.from_tuples([(1, 4, 3, 2)], names=["a", "d", "c", "b"])
|
||||
)
|
||||
tm.assert_series_equal(result_left, expected_left)
|
||||
tm.assert_series_equal(result_right, expected_right)
|
||||
@ -0,0 +1,84 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesArgsort:
|
||||
def test_argsort_axis(self):
|
||||
# GH#54257
|
||||
ser = Series(range(3))
|
||||
|
||||
msg = "No axis named 2 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.argsort(axis=2)
|
||||
|
||||
def test_argsort_numpy(self, datetime_series):
|
||||
ser = datetime_series
|
||||
|
||||
res = np.argsort(ser).values
|
||||
expected = np.argsort(np.array(ser))
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
# with missing values
|
||||
ts = ser.copy()
|
||||
ts[::2] = np.nan
|
||||
|
||||
msg = "The behavior of Series.argsort in the presence of NA values"
|
||||
with tm.assert_produces_warning(
|
||||
FutureWarning, match=msg, check_stacklevel=False
|
||||
):
|
||||
result = np.argsort(ts)[1::2]
|
||||
expected = np.argsort(np.array(ts.dropna()))
|
||||
|
||||
tm.assert_numpy_array_equal(result.values, expected)
|
||||
|
||||
def test_argsort(self, datetime_series):
|
||||
argsorted = datetime_series.argsort()
|
||||
assert issubclass(argsorted.dtype.type, np.integer)
|
||||
|
||||
def test_argsort_dt64(self, unit):
|
||||
# GH#2967 (introduced bug in 0.11-dev I think)
|
||||
ser = Series(
|
||||
[Timestamp(f"201301{i:02d}") for i in range(1, 6)], dtype=f"M8[{unit}]"
|
||||
)
|
||||
assert ser.dtype == f"datetime64[{unit}]"
|
||||
shifted = ser.shift(-1)
|
||||
assert shifted.dtype == f"datetime64[{unit}]"
|
||||
assert isna(shifted[4])
|
||||
|
||||
result = ser.argsort()
|
||||
expected = Series(range(5), dtype=np.intp)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "The behavior of Series.argsort in the presence of NA values"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = shifted.argsort()
|
||||
expected = Series(list(range(4)) + [-1], dtype=np.intp)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_argsort_stable(self):
|
||||
ser = Series(np.random.default_rng(2).integers(0, 100, size=10000))
|
||||
mindexer = ser.argsort(kind="mergesort")
|
||||
qindexer = ser.argsort()
|
||||
|
||||
mexpected = np.argsort(ser.values, kind="mergesort")
|
||||
qexpected = np.argsort(ser.values, kind="quicksort")
|
||||
|
||||
tm.assert_series_equal(mindexer.astype(np.intp), Series(mexpected))
|
||||
tm.assert_series_equal(qindexer.astype(np.intp), Series(qexpected))
|
||||
msg = (
|
||||
r"ndarray Expected type <class 'numpy\.ndarray'>, "
|
||||
r"found <class 'pandas\.core\.series\.Series'> instead"
|
||||
)
|
||||
with pytest.raises(AssertionError, match=msg):
|
||||
tm.assert_numpy_array_equal(qindexer, mindexer)
|
||||
|
||||
def test_argsort_preserve_name(self, datetime_series):
|
||||
result = datetime_series.argsort()
|
||||
assert result.name == datetime_series.name
|
||||
@ -0,0 +1,205 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import IncompatibleFrequency
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
notna,
|
||||
offsets,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesAsof:
|
||||
def test_asof_nanosecond_index_access(self):
|
||||
ts = Timestamp("20130101").as_unit("ns")._value
|
||||
dti = DatetimeIndex([ts + 50 + i for i in range(100)])
|
||||
ser = Series(np.random.default_rng(2).standard_normal(100), index=dti)
|
||||
|
||||
first_value = ser.asof(ser.index[0])
|
||||
|
||||
# GH#46903 previously incorrectly was "day"
|
||||
assert dti.resolution == "nanosecond"
|
||||
|
||||
# this used to not work bc parsing was done by dateutil that didn't
|
||||
# handle nanoseconds
|
||||
assert first_value == ser["2013-01-01 00:00:00.000000050"]
|
||||
|
||||
expected_ts = np.datetime64("2013-01-01 00:00:00.000000050", "ns")
|
||||
assert first_value == ser[Timestamp(expected_ts)]
|
||||
|
||||
def test_basic(self):
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
ts.iloc[15:30] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
mask = (result.index >= lb) & (result.index < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
val = result[result.index[result.index >= ub][0]]
|
||||
assert ts[ub] == val
|
||||
|
||||
def test_scalar(self):
|
||||
N = 30
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
# Explicit cast to float avoid implicit cast when setting nan
|
||||
ts = Series(np.arange(N), index=rng, dtype="float")
|
||||
ts.iloc[5:10] = np.nan
|
||||
ts.iloc[15:20] = np.nan
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts.iloc[4]
|
||||
assert val2 == ts.iloc[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts.iloc[4]
|
||||
|
||||
# in there
|
||||
result = ts.asof(ts.index[3])
|
||||
assert result == ts.iloc[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0] - offsets.BDay()
|
||||
assert np.isnan(ts.asof(d))
|
||||
|
||||
def test_with_nan(self):
|
||||
# basic asof test
|
||||
rng = date_range("1/1/2000", "1/2/2000", freq="4h")
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
r = s.resample("2h").mean()
|
||||
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[3:5] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 5, 5, 6.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
r.iloc[-3:] = np.nan
|
||||
result = r.asof(r.index)
|
||||
expected = Series(
|
||||
[0, 0, 1, 1, 1, 1, 3, 3, 4, 4, 4, 4, 4.0],
|
||||
index=date_range("1/1/2000", "1/2/2000", freq="2h"),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_periodindex(self):
|
||||
# array or list or dates
|
||||
N = 50
|
||||
rng = period_range("1/1/1990", periods=N, freq="h")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
ts.iloc[15:30] = np.nan
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="37min")
|
||||
|
||||
result = ts.asof(dates)
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
result = ts.asof(list(dates))
|
||||
assert notna(result).all()
|
||||
lb = ts.index[14]
|
||||
ub = ts.index[30]
|
||||
|
||||
pix = PeriodIndex(result.index.values, freq="h")
|
||||
mask = (pix >= lb) & (pix < ub)
|
||||
rs = result[mask]
|
||||
assert (rs == ts[lb]).all()
|
||||
|
||||
ts.iloc[5:10] = np.nan
|
||||
ts.iloc[15:20] = np.nan
|
||||
|
||||
val1 = ts.asof(ts.index[7])
|
||||
val2 = ts.asof(ts.index[19])
|
||||
|
||||
assert val1 == ts.iloc[4]
|
||||
assert val2 == ts.iloc[14]
|
||||
|
||||
# accepts strings
|
||||
val1 = ts.asof(str(ts.index[7]))
|
||||
assert val1 == ts.iloc[4]
|
||||
|
||||
# in there
|
||||
assert ts.asof(ts.index[3]) == ts.iloc[3]
|
||||
|
||||
# no as of value
|
||||
d = ts.index[0].to_timestamp() - offsets.BDay()
|
||||
assert isna(ts.asof(d))
|
||||
|
||||
# Mismatched freq
|
||||
msg = "Input has different freq"
|
||||
with pytest.raises(IncompatibleFrequency, match=msg):
|
||||
ts.asof(rng.asfreq("D"))
|
||||
|
||||
def test_errors(self):
|
||||
s = Series(
|
||||
[1, 2, 3],
|
||||
index=[Timestamp("20130101"), Timestamp("20130103"), Timestamp("20130102")],
|
||||
)
|
||||
|
||||
# non-monotonic
|
||||
assert not s.index.is_monotonic_increasing
|
||||
with pytest.raises(ValueError, match="requires a sorted index"):
|
||||
s.asof(s.index[0])
|
||||
|
||||
# subset with Series
|
||||
N = 10
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
s = Series(np.random.default_rng(2).standard_normal(N), index=rng)
|
||||
with pytest.raises(ValueError, match="not valid for Series"):
|
||||
s.asof(s.index[0], subset="foo")
|
||||
|
||||
def test_all_nans(self):
|
||||
# GH 15713
|
||||
# series is all nans
|
||||
|
||||
# testing non-default indexes
|
||||
N = 50
|
||||
rng = date_range("1/1/1990", periods=N, freq="53s")
|
||||
|
||||
dates = date_range("1/1/1990", periods=N * 3, freq="25s")
|
||||
result = Series(np.nan, index=rng).asof(dates)
|
||||
expected = Series(np.nan, index=dates)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# testing scalar input
|
||||
date = date_range("1/1/1990", periods=N * 3, freq="25s")[0]
|
||||
result = Series(np.nan, index=rng).asof(date)
|
||||
assert isna(result)
|
||||
|
||||
# test name is propagated
|
||||
result = Series(np.nan, index=[1, 2, 3, 4], name="test").asof([4, 5])
|
||||
expected = Series(np.nan, index=[4, 5], name="test")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,689 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
from importlib import reload
|
||||
import string
|
||||
import sys
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import iNaT
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
DatetimeTZDtype,
|
||||
Index,
|
||||
Interval,
|
||||
NaT,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
cut,
|
||||
date_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def rand_str(nchars: int) -> str:
|
||||
"""
|
||||
Generate one random byte string.
|
||||
"""
|
||||
RANDS_CHARS = np.array(
|
||||
list(string.ascii_letters + string.digits), dtype=(np.str_, 1)
|
||||
)
|
||||
return "".join(np.random.default_rng(2).choice(RANDS_CHARS, nchars))
|
||||
|
||||
|
||||
class TestAstypeAPI:
|
||||
def test_astype_unitless_dt64_raises(self):
|
||||
# GH#47844
|
||||
ser = Series(["1970-01-01", "1970-01-01", "1970-01-01"], dtype="datetime64[ns]")
|
||||
df = ser.to_frame()
|
||||
|
||||
msg = "Casting to unit-less dtype 'datetime64' is not supported"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.astype(np.datetime64)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.astype(np.datetime64)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.astype("datetime64")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
df.astype("datetime64")
|
||||
|
||||
def test_arg_for_errors_in_astype(self):
|
||||
# see GH#14878
|
||||
ser = Series([1, 2, 3])
|
||||
|
||||
msg = (
|
||||
r"Expected value of kwarg 'errors' to be one of \['raise', "
|
||||
r"'ignore'\]\. Supplied value is 'False'"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype(np.float64, errors=False)
|
||||
|
||||
ser.astype(np.int8, errors="raise")
|
||||
|
||||
@pytest.mark.parametrize("dtype_class", [dict, Series])
|
||||
def test_astype_dict_like(self, dtype_class):
|
||||
# see GH#7271
|
||||
ser = Series(range(0, 10, 2), name="abc")
|
||||
|
||||
dt1 = dtype_class({"abc": str})
|
||||
result = ser.astype(dt1)
|
||||
expected = Series(["0", "2", "4", "6", "8"], name="abc", dtype="str")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt2 = dtype_class({"abc": "float64"})
|
||||
result = ser.astype(dt2)
|
||||
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dt3 = dtype_class({"abc": str, "def": str})
|
||||
msg = (
|
||||
"Only the Series name can be used for the key in Series dtype "
|
||||
r"mappings\."
|
||||
)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.astype(dt3)
|
||||
|
||||
dt4 = dtype_class({0: str})
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.astype(dt4)
|
||||
|
||||
# GH#16717
|
||||
# if dtypes provided is empty, it should error
|
||||
if dtype_class is Series:
|
||||
dt5 = dtype_class({}, dtype=object)
|
||||
else:
|
||||
dt5 = dtype_class({})
|
||||
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
ser.astype(dt5)
|
||||
|
||||
|
||||
class TestAstype:
|
||||
@pytest.mark.parametrize("tz", [None, "UTC", "US/Pacific"])
|
||||
def test_astype_object_to_dt64_non_nano(self, tz):
|
||||
# GH#55756, GH#54620
|
||||
ts = Timestamp("2999-01-01")
|
||||
dtype = "M8[us]"
|
||||
if tz is not None:
|
||||
dtype = f"M8[us, {tz}]"
|
||||
vals = [ts, "2999-01-02 03:04:05.678910", 2500]
|
||||
ser = Series(vals, dtype=object)
|
||||
result = ser.astype(dtype)
|
||||
|
||||
# The 2500 is interpreted as microseconds, consistent with what
|
||||
# we would get if we created DatetimeIndexes from vals[:2] and vals[2:]
|
||||
# and concated the results.
|
||||
pointwise = [
|
||||
vals[0].tz_localize(tz),
|
||||
Timestamp(vals[1], tz=tz),
|
||||
to_datetime(vals[2], unit="us", utc=True).tz_convert(tz),
|
||||
]
|
||||
exp_vals = [x.as_unit("us").asm8 for x in pointwise]
|
||||
exp_arr = np.array(exp_vals, dtype="M8[us]")
|
||||
expected = Series(exp_arr, dtype="M8[us]")
|
||||
if tz is not None:
|
||||
expected = expected.dt.tz_localize("UTC").dt.tz_convert(tz)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_mixed_object_to_dt64tz(self):
|
||||
# pre-2.0 this raised ValueError bc of tz mismatch
|
||||
# xref GH#32581
|
||||
ts = Timestamp("2016-01-04 05:06:07", tz="US/Pacific")
|
||||
ts2 = ts.tz_convert("Asia/Tokyo")
|
||||
|
||||
ser = Series([ts, ts2], dtype=object)
|
||||
res = ser.astype("datetime64[ns, Europe/Brussels]")
|
||||
expected = Series(
|
||||
[ts.tz_convert("Europe/Brussels"), ts2.tz_convert("Europe/Brussels")],
|
||||
dtype="datetime64[ns, Europe/Brussels]",
|
||||
)
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", np.typecodes["All"])
|
||||
def test_astype_empty_constructor_equality(self, dtype):
|
||||
# see GH#15524
|
||||
|
||||
if dtype not in (
|
||||
"S",
|
||||
"V", # poor support (if any) currently
|
||||
"M",
|
||||
"m", # Generic timestamps raise a ValueError. Already tested.
|
||||
):
|
||||
init_empty = Series([], dtype=dtype)
|
||||
as_type_empty = Series([]).astype(dtype)
|
||||
tm.assert_series_equal(init_empty, as_type_empty)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [str, np.str_])
|
||||
@pytest.mark.parametrize(
|
||||
"series",
|
||||
[
|
||||
Series([string.digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
|
||||
Series([string.digits * 10, rand_str(63), rand_str(64), np.nan, 1.0]),
|
||||
],
|
||||
)
|
||||
def test_astype_str_map(self, dtype, series, using_infer_string):
|
||||
# see GH#4405
|
||||
using_string_dtype = using_infer_string and dtype is str
|
||||
result = series.astype(dtype)
|
||||
if using_string_dtype:
|
||||
expected = series.map(lambda val: str(val) if val is not np.nan else np.nan)
|
||||
else:
|
||||
expected = series.map(str)
|
||||
if using_infer_string:
|
||||
expected = expected.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_float_to_period(self):
|
||||
result = Series([np.nan]).astype("period[D]")
|
||||
expected = Series([NaT], dtype="period[D]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_no_pandas_dtype(self):
|
||||
# https://github.com/pandas-dev/pandas/pull/24866
|
||||
ser = Series([1, 2], dtype="int64")
|
||||
# Don't have NumpyEADtype in the public API, so we use `.array.dtype`,
|
||||
# which is a NumpyEADtype.
|
||||
result = ser.astype(ser.array.dtype)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
|
||||
def test_astype_generic_timestamp_no_frequency(self, dtype, request):
|
||||
# see GH#15524, GH#15987
|
||||
data = [1]
|
||||
ser = Series(data)
|
||||
|
||||
if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
|
||||
mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
|
||||
request.applymarker(mark)
|
||||
|
||||
msg = (
|
||||
rf"The '{dtype.__name__}' dtype has no unit\. "
|
||||
rf"Please pass in '{dtype.__name__}\[ns\]' instead."
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype(dtype)
|
||||
|
||||
def test_astype_dt64_to_str(self):
|
||||
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
|
||||
dti = date_range("2012-01-01", periods=3)
|
||||
result = Series(dti).astype(str)
|
||||
expected = Series(["2012-01-01", "2012-01-02", "2012-01-03"], dtype="str")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_dt64tz_to_str(self):
|
||||
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
|
||||
dti_tz = date_range("2012-01-01", periods=3, tz="US/Eastern")
|
||||
result = Series(dti_tz).astype(str)
|
||||
expected = Series(
|
||||
[
|
||||
"2012-01-01 00:00:00-05:00",
|
||||
"2012-01-02 00:00:00-05:00",
|
||||
"2012-01-03 00:00:00-05:00",
|
||||
],
|
||||
dtype="str",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_datetime(self, unit):
|
||||
ser = Series(iNaT, dtype=f"M8[{unit}]", index=range(5))
|
||||
|
||||
ser = ser.astype("O")
|
||||
assert ser.dtype == np.object_
|
||||
|
||||
ser = Series([datetime(2001, 1, 2, 0, 0)])
|
||||
|
||||
ser = ser.astype("O")
|
||||
assert ser.dtype == np.object_
|
||||
|
||||
ser = Series(
|
||||
[datetime(2001, 1, 2, 0, 0) for i in range(3)], dtype=f"M8[{unit}]"
|
||||
)
|
||||
|
||||
ser[1] = np.nan
|
||||
assert ser.dtype == f"M8[{unit}]"
|
||||
|
||||
ser = ser.astype("O")
|
||||
assert ser.dtype == np.object_
|
||||
|
||||
def test_astype_datetime64tz(self):
|
||||
ser = Series(date_range("20130101", periods=3, tz="US/Eastern"))
|
||||
|
||||
# astype
|
||||
result = ser.astype(object)
|
||||
expected = Series(ser.astype(object), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = Series(ser.values).dt.tz_localize("UTC").dt.tz_convert(ser.dt.tz)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
# astype - object, preserves on construction
|
||||
result = Series(ser.astype(object))
|
||||
expected = ser.astype(object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# astype - datetime64[ns, tz]
|
||||
msg = "Cannot use .astype to convert from timezone-naive"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# dt64->dt64tz astype deprecated
|
||||
Series(ser.values).astype("datetime64[ns, US/Eastern]")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# dt64->dt64tz astype deprecated
|
||||
Series(ser.values).astype(ser.dtype)
|
||||
|
||||
result = ser.astype("datetime64[ns, CET]")
|
||||
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_str_cast_dt64(self):
|
||||
# see GH#9757
|
||||
ts = Series([Timestamp("2010-01-04 00:00:00")])
|
||||
res = ts.astype(str)
|
||||
|
||||
expected = Series(["2010-01-04"], dtype="str")
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
|
||||
res = ts.astype(str)
|
||||
|
||||
expected = Series(["2010-01-04 00:00:00-05:00"], dtype="str")
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
def test_astype_str_cast_td64(self):
|
||||
# see GH#9757
|
||||
|
||||
td = Series([Timedelta(1, unit="d")])
|
||||
ser = td.astype(str)
|
||||
|
||||
expected = Series(["1 days"], dtype="str")
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_dt64_series_astype_object(self):
|
||||
dt64ser = Series(date_range("20130101", periods=3))
|
||||
result = dt64ser.astype(object)
|
||||
assert isinstance(result.iloc[0], datetime)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
def test_td64_series_astype_object(self):
|
||||
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
|
||||
result = tdser.astype(object)
|
||||
assert isinstance(result.iloc[0], timedelta)
|
||||
assert result.dtype == np.object_
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype",
|
||||
[
|
||||
(["x", "y", "z"], "string[python]"),
|
||||
pytest.param(
|
||||
["x", "y", "z"],
|
||||
"string[pyarrow]",
|
||||
marks=td.skip_if_no("pyarrow"),
|
||||
),
|
||||
(["x", "y", "z"], "category"),
|
||||
(3 * [Timestamp("2020-01-01", tz="UTC")], None),
|
||||
(3 * [Interval(0, 1)], None),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("errors", ["raise", "ignore"])
|
||||
def test_astype_ignores_errors_for_extension_dtypes(self, data, dtype, errors):
|
||||
# https://github.com/pandas-dev/pandas/issues/35471
|
||||
ser = Series(data, dtype=dtype)
|
||||
if errors == "ignore":
|
||||
expected = ser
|
||||
result = ser.astype(float, errors="ignore")
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
msg = "(Cannot cast)|(could not convert)"
|
||||
with pytest.raises((ValueError, TypeError), match=msg):
|
||||
ser.astype(float, errors=errors)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float16, np.float32, np.float64])
|
||||
def test_astype_from_float_to_str(self, dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/36451
|
||||
ser = Series([0.1], dtype=dtype)
|
||||
result = ser.astype(str)
|
||||
expected = Series(["0.1"], dtype="str")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"value, string_value",
|
||||
[
|
||||
(None, "None"),
|
||||
(np.nan, "nan"),
|
||||
(NA, "<NA>"),
|
||||
],
|
||||
)
|
||||
def test_astype_to_str_preserves_na(self, value, string_value, using_infer_string):
|
||||
# https://github.com/pandas-dev/pandas/issues/36904
|
||||
ser = Series(["a", "b", value], dtype=object)
|
||||
result = ser.astype(str)
|
||||
expected = Series(
|
||||
["a", "b", None if using_infer_string else string_value], dtype="str"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
|
||||
def test_astype(self, dtype):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(5), name="foo")
|
||||
as_typed = ser.astype(dtype)
|
||||
|
||||
assert as_typed.dtype == dtype
|
||||
assert as_typed.name == ser.name
|
||||
|
||||
@pytest.mark.parametrize("value", [np.nan, np.inf])
|
||||
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
|
||||
def test_astype_cast_nan_inf_int(self, dtype, value):
|
||||
# gh-14265: check NaN and inf raise error when converting to int
|
||||
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
|
||||
ser = Series([value])
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
|
||||
def test_astype_cast_object_int_fail(self, dtype):
|
||||
arr = Series(["car", "house", "tree", "1"])
|
||||
msg = r"invalid literal for int\(\) with base 10: 'car'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
arr.astype(dtype)
|
||||
|
||||
def test_astype_float_to_uint_negatives_raise(
|
||||
self, float_numpy_dtype, any_unsigned_int_numpy_dtype
|
||||
):
|
||||
# GH#45151 We don't cast negative numbers to nonsense values
|
||||
# TODO: same for EA float/uint dtypes, signed integers?
|
||||
arr = np.arange(5).astype(float_numpy_dtype) - 3 # includes negatives
|
||||
ser = Series(arr)
|
||||
|
||||
msg = "Cannot losslessly cast from .* to .*"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype(any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.to_frame().astype(any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
# We currently catch and re-raise in Index.astype
|
||||
Index(ser).astype(any_unsigned_int_numpy_dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.array.astype(any_unsigned_int_numpy_dtype)
|
||||
|
||||
def test_astype_cast_object_int(self):
|
||||
arr = Series(["1", "2", "3", "4"], dtype=object)
|
||||
result = arr.astype(int)
|
||||
|
||||
tm.assert_series_equal(result, Series(np.arange(1, 5)))
|
||||
|
||||
def test_astype_unicode(self, using_infer_string):
|
||||
# see GH#7758: A bit of magic is required to set
|
||||
# default encoding to utf-8
|
||||
digits = string.digits
|
||||
test_series = [
|
||||
Series([digits * 10, rand_str(63), rand_str(64), rand_str(1000)]),
|
||||
Series(["データーサイエンス、お前はもう死んでいる"]),
|
||||
]
|
||||
|
||||
former_encoding = None
|
||||
|
||||
if sys.getdefaultencoding() == "utf-8":
|
||||
# GH#45326 as of 2.0 Series.astype matches Index.astype by handling
|
||||
# bytes with obj.decode() instead of str(obj)
|
||||
item = "野菜食べないとやばい"
|
||||
ser = Series([item.encode()])
|
||||
result = ser.astype(np.str_)
|
||||
expected = Series([item], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
for ser in test_series:
|
||||
res = ser.astype(np.str_)
|
||||
expec = ser.map(str)
|
||||
if using_infer_string:
|
||||
expec = expec.astype(object)
|
||||
tm.assert_series_equal(res, expec)
|
||||
|
||||
# Restore the former encoding
|
||||
if former_encoding is not None and former_encoding != "utf-8":
|
||||
reload(sys)
|
||||
sys.setdefaultencoding(former_encoding)
|
||||
|
||||
def test_astype_bytes(self):
|
||||
# GH#39474
|
||||
result = Series(["foo", "bar", "baz"]).astype(bytes)
|
||||
assert result.dtypes == np.dtype("S3")
|
||||
|
||||
def test_astype_nan_to_bool(self):
|
||||
# GH#43018
|
||||
ser = Series(np.nan, dtype="object")
|
||||
result = ser.astype("bool")
|
||||
expected = Series(True, dtype="bool")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
tm.ALL_INT_EA_DTYPES + tm.FLOAT_EA_DTYPES,
|
||||
)
|
||||
def test_astype_ea_to_datetimetzdtype(self, dtype):
|
||||
# GH37553
|
||||
ser = Series([4, 0, 9], dtype=dtype)
|
||||
result = ser.astype(DatetimeTZDtype(tz="US/Pacific"))
|
||||
|
||||
expected = Series(
|
||||
{
|
||||
0: Timestamp("1969-12-31 16:00:00.000000004-08:00", tz="US/Pacific"),
|
||||
1: Timestamp("1969-12-31 16:00:00.000000000-08:00", tz="US/Pacific"),
|
||||
2: Timestamp("1969-12-31 16:00:00.000000009-08:00", tz="US/Pacific"),
|
||||
}
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_retain_attrs(self, any_numpy_dtype):
|
||||
# GH#44414
|
||||
ser = Series([0, 1, 2, 3])
|
||||
ser.attrs["Location"] = "Michigan"
|
||||
|
||||
result = ser.astype(any_numpy_dtype).attrs
|
||||
expected = ser.attrs
|
||||
|
||||
tm.assert_dict_equal(expected, result)
|
||||
|
||||
|
||||
class TestAstypeString:
|
||||
@pytest.mark.parametrize(
|
||||
"data, dtype",
|
||||
[
|
||||
([True, NA], "boolean"),
|
||||
(["A", NA], "category"),
|
||||
(["2020-10-10", "2020-10-10"], "datetime64[ns]"),
|
||||
(["2020-10-10", "2020-10-10", NaT], "datetime64[ns]"),
|
||||
(
|
||||
["2012-01-01 00:00:00-05:00", NaT],
|
||||
"datetime64[ns, US/Eastern]",
|
||||
),
|
||||
([1, None], "UInt16"),
|
||||
(["1/1/2021", "2/1/2021"], "period[M]"),
|
||||
(["1/1/2021", "2/1/2021", NaT], "period[M]"),
|
||||
(["1 Day", "59 Days", NaT], "timedelta64[ns]"),
|
||||
# currently no way to parse IntervalArray from a list of strings
|
||||
],
|
||||
)
|
||||
def test_astype_string_to_extension_dtype_roundtrip(
|
||||
self, data, dtype, request, nullable_string_dtype
|
||||
):
|
||||
if dtype == "boolean":
|
||||
mark = pytest.mark.xfail(
|
||||
reason="TODO StringArray.astype() with missing values #GH40566"
|
||||
)
|
||||
request.applymarker(mark)
|
||||
# GH-40351
|
||||
ser = Series(data, dtype=dtype)
|
||||
|
||||
# Note: just passing .astype(dtype) fails for dtype="category"
|
||||
# with bc ser.dtype.categories will be object dtype whereas
|
||||
# result.dtype.categories will have string dtype
|
||||
result = ser.astype(nullable_string_dtype).astype(ser.dtype)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
class TestAstypeCategorical:
|
||||
def test_astype_categorical_to_other(self):
|
||||
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
|
||||
ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values()
|
||||
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
|
||||
|
||||
expected = ser
|
||||
tm.assert_series_equal(ser.astype("category"), expected)
|
||||
tm.assert_series_equal(ser.astype(CategoricalDtype()), expected)
|
||||
msg = r"Cannot cast object|str dtype to float64"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.astype("float64")
|
||||
|
||||
cat = Series(Categorical(["a", "b", "b", "a", "a", "c", "c", "c"]))
|
||||
exp = Series(["a", "b", "b", "a", "a", "c", "c", "c"], dtype="str")
|
||||
tm.assert_series_equal(cat.astype("str"), exp)
|
||||
s2 = Series(Categorical(["1", "2", "3", "4"]))
|
||||
exp2 = Series([1, 2, 3, 4]).astype("int")
|
||||
tm.assert_series_equal(s2.astype("int"), exp2)
|
||||
|
||||
# object don't sort correctly, so just compare that we have the same
|
||||
# values
|
||||
def cmp(a, b):
|
||||
tm.assert_almost_equal(np.sort(np.unique(a)), np.sort(np.unique(b)))
|
||||
|
||||
expected = Series(np.array(ser.values), name="value_group")
|
||||
cmp(ser.astype("object"), expected)
|
||||
cmp(ser.astype(np.object_), expected)
|
||||
|
||||
# array conversion
|
||||
tm.assert_almost_equal(np.array(ser), np.array(ser.values))
|
||||
|
||||
tm.assert_series_equal(ser.astype("category"), ser)
|
||||
tm.assert_series_equal(ser.astype(CategoricalDtype()), ser)
|
||||
|
||||
roundtrip_expected = ser.cat.set_categories(
|
||||
ser.cat.categories.sort_values()
|
||||
).cat.remove_unused_categories()
|
||||
result = ser.astype("object").astype("category")
|
||||
tm.assert_series_equal(result, roundtrip_expected)
|
||||
result = ser.astype("object").astype(CategoricalDtype())
|
||||
tm.assert_series_equal(result, roundtrip_expected)
|
||||
|
||||
def test_astype_categorical_invalid_conversions(self):
|
||||
# invalid conversion (these are NOT a dtype)
|
||||
cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
|
||||
ser = Series(np.random.default_rng(2).integers(0, 10000, 100)).sort_values()
|
||||
ser = cut(ser, range(0, 10500, 500), right=False, labels=cat)
|
||||
|
||||
msg = (
|
||||
"dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
|
||||
"not understood"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.astype(Categorical)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.astype("object").astype(Categorical)
|
||||
|
||||
def test_astype_categoricaldtype(self):
|
||||
ser = Series(["a", "b", "a"])
|
||||
result = ser.astype(CategoricalDtype(["a", "b"], ordered=True))
|
||||
expected = Series(Categorical(["a", "b", "a"], ordered=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.astype(CategoricalDtype(["a", "b"], ordered=False))
|
||||
expected = Series(Categorical(["a", "b", "a"], ordered=False))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
|
||||
expected = Series(
|
||||
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("series_ordered", [True, False])
|
||||
def test_astype_categorical_to_categorical(
|
||||
self, name, dtype_ordered, series_ordered
|
||||
):
|
||||
# GH#10696, GH#18593
|
||||
s_data = list("abcaacbab")
|
||||
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
|
||||
ser = Series(s_data, dtype=s_dtype, name=name)
|
||||
|
||||
# unspecified categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = ser.astype(dtype)
|
||||
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
|
||||
expected = Series(s_data, name=name, dtype=exp_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# different categories
|
||||
dtype = CategoricalDtype(list("adc"), dtype_ordered)
|
||||
result = ser.astype(dtype)
|
||||
expected = Series(s_data, name=name, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# not specifying ordered, so only test once
|
||||
expected = ser
|
||||
result = ser.astype("category")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_bool_missing_to_categorical(self):
|
||||
# GH-19182
|
||||
ser = Series([True, False, np.nan])
|
||||
assert ser.dtypes == np.object_
|
||||
|
||||
result = ser.astype(CategoricalDtype(categories=[True, False]))
|
||||
expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_astype_categories_raises(self):
|
||||
# deprecated GH#17636, removed in GH#27141
|
||||
ser = Series(["a", "b", "a"])
|
||||
with pytest.raises(TypeError, match="got an unexpected"):
|
||||
ser.astype("category", categories=["a", "b"], ordered=True)
|
||||
|
||||
@pytest.mark.parametrize("items", [["a", "b", "c", "a"], [1, 2, 3, 1]])
|
||||
def test_astype_from_categorical(self, items):
|
||||
ser = Series(items)
|
||||
exp = Series(Categorical(items))
|
||||
res = ser.astype("category")
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_astype_from_categorical_with_keywords(self):
|
||||
# with keywords
|
||||
lst = ["a", "b", "c", "a"]
|
||||
ser = Series(lst)
|
||||
exp = Series(Categorical(lst, ordered=True))
|
||||
res = ser.astype(CategoricalDtype(None, ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
exp = Series(Categorical(lst, categories=list("abcdef"), ordered=True))
|
||||
res = ser.astype(CategoricalDtype(list("abcdef"), ordered=True))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_astype_timedelta64_with_np_nan(self):
|
||||
# GH45798
|
||||
result = Series([Timedelta(1), np.nan], dtype="timedelta64[ns]")
|
||||
expected = Series([Timedelta(1), NaT], dtype="timedelta64[ns]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_astype_int_na_string(self):
|
||||
# GH#57418
|
||||
ser = Series([12, NA], dtype="Int64[pyarrow]")
|
||||
result = ser.astype("string[pyarrow]")
|
||||
expected = Series(["12", NA], dtype="string[pyarrow]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TestAutoCorr:
|
||||
def test_autocorr(self, datetime_series):
|
||||
# Just run the function
|
||||
corr1 = datetime_series.autocorr()
|
||||
|
||||
# Now run it with the lag parameter
|
||||
corr2 = datetime_series.autocorr(lag=1)
|
||||
|
||||
# corr() with lag needs Series of at least length 2
|
||||
if len(datetime_series) <= 2:
|
||||
assert np.isnan(corr1)
|
||||
assert np.isnan(corr2)
|
||||
else:
|
||||
assert corr1 == corr2
|
||||
|
||||
# Choose a random lag between 1 and length of Series - 2
|
||||
# and compare the result with the Series corr() function
|
||||
n = 1 + np.random.default_rng(2).integers(max(1, len(datetime_series) - 2))
|
||||
corr1 = datetime_series.corr(datetime_series.shift(n))
|
||||
corr2 = datetime_series.autocorr(lag=n)
|
||||
|
||||
# corr() with lag needs Series of at least length 2
|
||||
if len(datetime_series) <= 2:
|
||||
assert np.isnan(corr1)
|
||||
assert np.isnan(corr2)
|
||||
else:
|
||||
assert corr1 == corr2
|
||||
@ -0,0 +1,75 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
bdate_range,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestBetween:
|
||||
def test_between(self):
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
left, right = series[[2, 7]]
|
||||
|
||||
result = series.between(left, right)
|
||||
expected = (series >= left) & (series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_between_datetime_object_dtype(self):
|
||||
ser = Series(bdate_range("1/1/2000", periods=20), dtype=object)
|
||||
ser[::2] = np.nan
|
||||
|
||||
result = ser[ser.between(ser[3], ser[17])]
|
||||
expected = ser[3:18].dropna()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[ser.between(ser[3], ser[17], inclusive="neither")]
|
||||
expected = ser[5:16].dropna()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_between_period_values(self):
|
||||
ser = Series(period_range("2000-01-01", periods=10, freq="D"))
|
||||
left, right = ser[[2, 7]]
|
||||
result = ser.between(left, right)
|
||||
expected = (ser >= left) & (ser <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_between_inclusive_string(self):
|
||||
# GH 40628
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
left, right = series[[2, 7]]
|
||||
|
||||
result = series.between(left, right, inclusive="both")
|
||||
expected = (series >= left) & (series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.between(left, right, inclusive="left")
|
||||
expected = (series >= left) & (series < right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.between(left, right, inclusive="right")
|
||||
expected = (series > left) & (series <= right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.between(left, right, inclusive="neither")
|
||||
expected = (series > left) & (series < right)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("inclusive", ["yes", True, False])
|
||||
def test_between_error_args(self, inclusive):
|
||||
# GH 40628
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
left, right = series[[2, 7]]
|
||||
|
||||
value_error_msg = (
|
||||
"Inclusive has to be either string of 'both',"
|
||||
"'left', 'right', or 'neither'."
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=value_error_msg):
|
||||
series = Series(date_range("1/1/2000", periods=10))
|
||||
series.between(left, right, inclusive=inclusive)
|
||||
@ -0,0 +1,148 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
array as pd_array,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def df():
|
||||
"""
|
||||
base dataframe for testing
|
||||
"""
|
||||
return DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
|
||||
|
||||
|
||||
def test_case_when_caselist_is_not_a_list(df):
|
||||
"""
|
||||
Raise ValueError if caselist is not a list.
|
||||
"""
|
||||
msg = "The caselist argument should be a list; "
|
||||
msg += "instead got.+"
|
||||
with pytest.raises(TypeError, match=msg): # GH39154
|
||||
df["a"].case_when(caselist=())
|
||||
|
||||
|
||||
def test_case_when_no_caselist(df):
|
||||
"""
|
||||
Raise ValueError if no caselist is provided.
|
||||
"""
|
||||
msg = "provide at least one boolean condition, "
|
||||
msg += "with a corresponding replacement."
|
||||
with pytest.raises(ValueError, match=msg): # GH39154
|
||||
df["a"].case_when([])
|
||||
|
||||
|
||||
def test_case_when_odd_caselist(df):
|
||||
"""
|
||||
Raise ValueError if no of caselist is odd.
|
||||
"""
|
||||
msg = "Argument 0 must have length 2; "
|
||||
msg += "a condition and replacement; instead got length 3."
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df["a"].case_when([(df["a"].eq(1), 1, df.a.gt(1))])
|
||||
|
||||
|
||||
def test_case_when_raise_error_from_mask(df):
|
||||
"""
|
||||
Raise Error from within Series.mask
|
||||
"""
|
||||
msg = "Failed to apply condition0 and replacement0."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df["a"].case_when([(df["a"].eq(1), [1, 2])])
|
||||
|
||||
|
||||
def test_case_when_single_condition(df):
|
||||
"""
|
||||
Test output on a single condition.
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when([(df.a.eq(1), 1)])
|
||||
expected = Series([1, np.nan, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_multiple_conditions(df):
|
||||
"""
|
||||
Test output when booleans are derived from a computation
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when(
|
||||
[(df.a.eq(1), 1), (Series([False, True, False]), 2)]
|
||||
)
|
||||
expected = Series([1, 2, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_multiple_conditions_replacement_list(df):
|
||||
"""
|
||||
Test output when replacement is a list
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when(
|
||||
[([True, False, False], 1), (df["a"].gt(1) & df["b"].eq(5), [1, 2, 3])]
|
||||
)
|
||||
expected = Series([1, 2, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_multiple_conditions_replacement_extension_dtype(df):
|
||||
"""
|
||||
Test output when replacement has an extension dtype
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when(
|
||||
[
|
||||
([True, False, False], 1),
|
||||
(df["a"].gt(1) & df["b"].eq(5), pd_array([1, 2, 3], dtype="Int64")),
|
||||
],
|
||||
)
|
||||
expected = Series([1, 2, np.nan], dtype="Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_multiple_conditions_replacement_series(df):
|
||||
"""
|
||||
Test output when replacement is a Series
|
||||
"""
|
||||
result = Series([np.nan, np.nan, np.nan]).case_when(
|
||||
[
|
||||
(np.array([True, False, False]), 1),
|
||||
(df["a"].gt(1) & df["b"].eq(5), Series([1, 2, 3])),
|
||||
],
|
||||
)
|
||||
expected = Series([1, 2, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_non_range_index():
|
||||
"""
|
||||
Test output if index is not RangeIndex
|
||||
"""
|
||||
rng = np.random.default_rng(seed=123)
|
||||
dates = date_range("1/1/2000", periods=8)
|
||||
df = DataFrame(
|
||||
rng.standard_normal(size=(8, 4)), index=dates, columns=["A", "B", "C", "D"]
|
||||
)
|
||||
result = Series(5, index=df.index, name="A").case_when([(df.A.gt(0), df.B)])
|
||||
expected = df.A.mask(df.A.gt(0), df.B).where(df.A.gt(0), 5)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_case_when_callable():
|
||||
"""
|
||||
Test output on a callable
|
||||
"""
|
||||
# https://numpy.org/doc/stable/reference/generated/numpy.piecewise.html
|
||||
x = np.linspace(-2.5, 2.5, 6)
|
||||
ser = Series(x)
|
||||
result = ser.case_when(
|
||||
caselist=[
|
||||
(lambda df: df < 0, lambda df: -df),
|
||||
(lambda df: df >= 0, lambda df: df),
|
||||
]
|
||||
)
|
||||
expected = np.piecewise(x, [x < 0, x >= 0], [lambda x: -x, lambda x: x])
|
||||
tm.assert_series_equal(result, Series(expected))
|
||||
@ -0,0 +1,146 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
isna,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesClip:
|
||||
def test_clip(self, datetime_series):
|
||||
val = datetime_series.median()
|
||||
|
||||
assert datetime_series.clip(lower=val).min() == val
|
||||
assert datetime_series.clip(upper=val).max() == val
|
||||
|
||||
result = datetime_series.clip(-0.5, 0.5)
|
||||
expected = np.clip(datetime_series, -0.5, 0.5)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert isinstance(expected, Series)
|
||||
|
||||
def test_clip_types_and_nulls(self):
|
||||
sers = [
|
||||
Series([np.nan, 1.0, 2.0, 3.0]),
|
||||
Series([None, "a", "b", "c"]),
|
||||
Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")),
|
||||
]
|
||||
|
||||
for s in sers:
|
||||
thresh = s[2]
|
||||
lower = s.clip(lower=thresh)
|
||||
upper = s.clip(upper=thresh)
|
||||
assert lower[notna(lower)].min() == thresh
|
||||
assert upper[notna(upper)].max() == thresh
|
||||
assert list(isna(s)) == list(isna(lower))
|
||||
assert list(isna(s)) == list(isna(upper))
|
||||
|
||||
def test_series_clipping_with_na_values(self, any_numeric_ea_dtype, nulls_fixture):
|
||||
# Ensure that clipping method can handle NA values with out failing
|
||||
# GH#40581
|
||||
|
||||
if nulls_fixture is pd.NaT:
|
||||
# constructor will raise, see
|
||||
# test_constructor_mismatched_null_nullable_dtype
|
||||
pytest.skip("See test_constructor_mismatched_null_nullable_dtype")
|
||||
|
||||
ser = Series([nulls_fixture, 1.0, 3.0], dtype=any_numeric_ea_dtype)
|
||||
s_clipped_upper = ser.clip(upper=2.0)
|
||||
s_clipped_lower = ser.clip(lower=2.0)
|
||||
|
||||
expected_upper = Series([nulls_fixture, 1.0, 2.0], dtype=any_numeric_ea_dtype)
|
||||
expected_lower = Series([nulls_fixture, 2.0, 3.0], dtype=any_numeric_ea_dtype)
|
||||
|
||||
tm.assert_series_equal(s_clipped_upper, expected_upper)
|
||||
tm.assert_series_equal(s_clipped_lower, expected_lower)
|
||||
|
||||
def test_clip_with_na_args(self):
|
||||
"""Should process np.nan argument as None"""
|
||||
# GH#17276
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
tm.assert_series_equal(s.clip(np.nan), Series([1, 2, 3]))
|
||||
tm.assert_series_equal(s.clip(upper=np.nan, lower=np.nan), Series([1, 2, 3]))
|
||||
|
||||
# GH#19992
|
||||
msg = "Downcasting behavior in Series and DataFrame methods 'where'"
|
||||
# TODO: avoid this warning here? seems like we should never be upcasting
|
||||
# in the first place?
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = s.clip(lower=[0, 4, np.nan])
|
||||
tm.assert_series_equal(res, Series([1, 4, 3]))
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
res = s.clip(upper=[1, np.nan, 1])
|
||||
tm.assert_series_equal(res, Series([1, 2, 1]))
|
||||
|
||||
# GH#40420
|
||||
s = Series([1, 2, 3])
|
||||
result = s.clip(0, [np.nan, np.nan, np.nan])
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
def test_clip_against_series(self):
|
||||
# GH#6966
|
||||
|
||||
s = Series([1.0, 1.0, 4.0])
|
||||
|
||||
lower = Series([1.0, 2.0, 3.0])
|
||||
upper = Series([1.5, 2.5, 3.5])
|
||||
|
||||
tm.assert_series_equal(s.clip(lower, upper), Series([1.0, 2.0, 3.5]))
|
||||
tm.assert_series_equal(s.clip(1.5, upper), Series([1.5, 1.5, 3.5]))
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
@pytest.mark.parametrize("upper", [[1, 2, 3], np.asarray([1, 2, 3])])
|
||||
def test_clip_against_list_like(self, inplace, upper):
|
||||
# GH#15390
|
||||
original = Series([5, 6, 7])
|
||||
result = original.clip(upper=upper, inplace=inplace)
|
||||
expected = Series([1, 2, 3])
|
||||
|
||||
if inplace:
|
||||
result = original
|
||||
tm.assert_series_equal(result, expected, check_exact=True)
|
||||
|
||||
def test_clip_with_datetimes(self):
|
||||
# GH#11838
|
||||
# naive and tz-aware datetimes
|
||||
|
||||
t = Timestamp("2015-12-01 09:30:30")
|
||||
s = Series([Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:31:00")])
|
||||
result = s.clip(upper=t)
|
||||
expected = Series(
|
||||
[Timestamp("2015-12-01 09:30:00"), Timestamp("2015-12-01 09:30:30")]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
t = Timestamp("2015-12-01 09:30:30", tz="US/Eastern")
|
||||
s = Series(
|
||||
[
|
||||
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
|
||||
Timestamp("2015-12-01 09:31:00", tz="US/Eastern"),
|
||||
]
|
||||
)
|
||||
result = s.clip(upper=t)
|
||||
expected = Series(
|
||||
[
|
||||
Timestamp("2015-12-01 09:30:00", tz="US/Eastern"),
|
||||
Timestamp("2015-12-01 09:30:30", tz="US/Eastern"),
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [object, "M8[us]"])
|
||||
def test_clip_with_timestamps_and_oob_datetimes(self, dtype):
|
||||
# GH-42794
|
||||
ser = Series([datetime(1, 1, 1), datetime(9999, 9, 9)], dtype=dtype)
|
||||
|
||||
result = ser.clip(lower=Timestamp.min, upper=Timestamp.max)
|
||||
expected = Series([Timestamp.min, Timestamp.max], dtype=dtype)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,17 @@
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCombine:
|
||||
def test_combine_scalar(self):
|
||||
# GH#21248
|
||||
# Note - combine() with another Series is tested elsewhere because
|
||||
# it is used when testing operators
|
||||
ser = Series([i * 10 for i in range(5)])
|
||||
result = ser.combine(3, lambda x, y: x + y)
|
||||
expected = Series([i * 10 + 3 for i in range(5)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.combine(22, lambda x, y: min(x, y))
|
||||
expected = Series([min(i * 10, 22) for i in range(5)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,150 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Period,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCombineFirst:
|
||||
def test_combine_first_period_datetime(self):
|
||||
# GH#3367
|
||||
didx = date_range(start="1950-01-31", end="1950-07-31", freq="ME")
|
||||
pidx = period_range(start=Period("1950-1"), end=Period("1950-7"), freq="M")
|
||||
# check to be consistent with DatetimeIndex
|
||||
for idx in [didx, pidx]:
|
||||
a = Series([1, np.nan, np.nan, 4, 5, np.nan, 7], index=idx)
|
||||
b = Series([9, 9, 9, 9, 9, 9, 9], index=idx)
|
||||
|
||||
result = a.combine_first(b)
|
||||
expected = Series([1, 9, 9, 4, 5, 9, 7], index=idx, dtype=np.float64)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_first_name(self, datetime_series):
|
||||
result = datetime_series.combine_first(datetime_series[:5])
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
def test_combine_first(self, using_infer_string):
|
||||
values = np.arange(20, dtype=np.float64)
|
||||
series = Series(values, index=np.arange(20, dtype=np.int64))
|
||||
|
||||
series_copy = series * 2
|
||||
series_copy[::2] = np.nan
|
||||
|
||||
# nothing used from the input
|
||||
combined = series.combine_first(series_copy)
|
||||
|
||||
tm.assert_series_equal(combined, series)
|
||||
|
||||
# Holes filled from input
|
||||
combined = series_copy.combine_first(series)
|
||||
assert np.isfinite(combined).all()
|
||||
|
||||
tm.assert_series_equal(combined[::2], series[::2])
|
||||
tm.assert_series_equal(combined[1::2], series_copy[1::2])
|
||||
|
||||
# mixed types
|
||||
index = pd.Index([str(i) for i in range(20)])
|
||||
floats = Series(np.random.default_rng(2).standard_normal(20), index=index)
|
||||
strings = Series([str(i) for i in range(10)], index=index[::2], dtype=object)
|
||||
|
||||
combined = strings.combine_first(floats)
|
||||
|
||||
tm.assert_series_equal(strings, combined.loc[index[::2]])
|
||||
tm.assert_series_equal(floats[1::2].astype(object), combined.loc[index[1::2]])
|
||||
|
||||
# corner case
|
||||
ser = Series([1.0, 2, 3], index=[0, 1, 2])
|
||||
empty = Series([], index=[], dtype=object)
|
||||
msg = "The behavior of array concatenation with empty entries is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.combine_first(empty)
|
||||
if not using_infer_string:
|
||||
ser.index = ser.index.astype("O")
|
||||
tm.assert_series_equal(ser, result)
|
||||
|
||||
def test_combine_first_dt64(self, unit):
|
||||
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
|
||||
s1 = to_datetime(Series([np.nan, "2011"])).dt.as_unit(unit)
|
||||
rs = s0.combine_first(s1)
|
||||
xp = to_datetime(Series(["2010", "2011"])).dt.as_unit(unit)
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
s0 = to_datetime(Series(["2010", np.nan])).dt.as_unit(unit)
|
||||
s1 = Series([np.nan, "2011"])
|
||||
rs = s0.combine_first(s1)
|
||||
|
||||
xp = Series([datetime(2010, 1, 1), "2011"], dtype="datetime64[ns]")
|
||||
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_combine_first_dt_tz_values(self, tz_naive_fixture):
|
||||
ser1 = Series(
|
||||
pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture),
|
||||
name="ser1",
|
||||
)
|
||||
ser2 = Series(
|
||||
pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture),
|
||||
index=[2, 3, 4],
|
||||
name="ser2",
|
||||
)
|
||||
result = ser1.combine_first(ser2)
|
||||
exp_vals = pd.DatetimeIndex(
|
||||
["20150101", "20150102", "20150103", "20160515", "20160516"],
|
||||
tz=tz_naive_fixture,
|
||||
)
|
||||
exp = Series(exp_vals, name="ser1")
|
||||
tm.assert_series_equal(exp, result)
|
||||
|
||||
def test_combine_first_timezone_series_with_empty_series(self):
|
||||
# GH 41800
|
||||
time_index = date_range(
|
||||
datetime(2021, 1, 1, 1),
|
||||
datetime(2021, 1, 1, 10),
|
||||
freq="h",
|
||||
tz="Europe/Rome",
|
||||
)
|
||||
s1 = Series(range(10), index=time_index)
|
||||
s2 = Series(index=time_index)
|
||||
msg = "The behavior of array concatenation with empty entries is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s1.combine_first(s2)
|
||||
tm.assert_series_equal(result, s1)
|
||||
|
||||
def test_combine_first_preserves_dtype(self):
|
||||
# GH51764
|
||||
s1 = Series([1666880195890293744, 1666880195890293837])
|
||||
s2 = Series([1, 2, 3])
|
||||
result = s1.combine_first(s2)
|
||||
expected = Series([1666880195890293744, 1666880195890293837, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_combine_mixed_timezone(self):
|
||||
# GH 26283
|
||||
uniform_tz = Series({pd.Timestamp("2019-05-01", tz="UTC"): 1.0})
|
||||
multi_tz = Series(
|
||||
{
|
||||
pd.Timestamp("2019-05-01 01:00:00+0100", tz="Europe/London"): 2.0,
|
||||
pd.Timestamp("2019-05-02", tz="UTC"): 3.0,
|
||||
}
|
||||
)
|
||||
|
||||
result = uniform_tz.combine_first(multi_tz)
|
||||
expected = Series(
|
||||
[1.0, 3.0],
|
||||
index=pd.Index(
|
||||
[
|
||||
pd.Timestamp("2019-05-01 00:00:00+00:00", tz="UTC"),
|
||||
pd.Timestamp("2019-05-02 00:00:00+00:00", tz="UTC"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,141 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("align_axis", [0, 1, "index", "columns"])
|
||||
def test_compare_axis(align_axis):
|
||||
# GH#30429
|
||||
s1 = pd.Series(["a", "b", "c"])
|
||||
s2 = pd.Series(["x", "b", "z"])
|
||||
|
||||
result = s1.compare(s2, align_axis=align_axis)
|
||||
|
||||
if align_axis in (1, "columns"):
|
||||
indices = pd.Index([0, 2])
|
||||
columns = pd.Index(["self", "other"])
|
||||
expected = pd.DataFrame(
|
||||
[["a", "x"], ["c", "z"]], index=indices, columns=columns
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
|
||||
expected = pd.Series(["a", "x", "c", "z"], index=indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep_shape, keep_equal",
|
||||
[
|
||||
(True, False),
|
||||
(False, True),
|
||||
(True, True),
|
||||
# False, False case is already covered in test_compare_axis
|
||||
],
|
||||
)
|
||||
def test_compare_various_formats(keep_shape, keep_equal):
|
||||
s1 = pd.Series(["a", "b", "c"])
|
||||
s2 = pd.Series(["x", "b", "z"])
|
||||
|
||||
result = s1.compare(s2, keep_shape=keep_shape, keep_equal=keep_equal)
|
||||
|
||||
if keep_shape:
|
||||
indices = pd.Index([0, 1, 2])
|
||||
columns = pd.Index(["self", "other"])
|
||||
if keep_equal:
|
||||
expected = pd.DataFrame(
|
||||
[["a", "x"], ["b", "b"], ["c", "z"]], index=indices, columns=columns
|
||||
)
|
||||
else:
|
||||
expected = pd.DataFrame(
|
||||
[["a", "x"], [np.nan, np.nan], ["c", "z"]],
|
||||
index=indices,
|
||||
columns=columns,
|
||||
)
|
||||
else:
|
||||
indices = pd.Index([0, 2])
|
||||
columns = pd.Index(["self", "other"])
|
||||
expected = pd.DataFrame(
|
||||
[["a", "x"], ["c", "z"]], index=indices, columns=columns
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_with_equal_nulls():
|
||||
# We want to make sure two NaNs are considered the same
|
||||
# and dropped where applicable
|
||||
s1 = pd.Series(["a", "b", np.nan])
|
||||
s2 = pd.Series(["x", "b", np.nan])
|
||||
|
||||
result = s1.compare(s2)
|
||||
expected = pd.DataFrame([["a", "x"]], columns=["self", "other"])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_with_non_equal_nulls():
|
||||
# We want to make sure the relevant NaNs do not get dropped
|
||||
s1 = pd.Series(["a", "b", "c"])
|
||||
s2 = pd.Series(["x", "b", np.nan])
|
||||
|
||||
result = s1.compare(s2, align_axis=0)
|
||||
|
||||
indices = pd.MultiIndex.from_product([[0, 2], ["self", "other"]])
|
||||
expected = pd.Series(["a", "x", "c", np.nan], index=indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_multi_index():
|
||||
index = pd.MultiIndex.from_arrays([[0, 0, 1], [0, 1, 2]])
|
||||
s1 = pd.Series(["a", "b", "c"], index=index)
|
||||
s2 = pd.Series(["x", "b", "z"], index=index)
|
||||
|
||||
result = s1.compare(s2, align_axis=0)
|
||||
|
||||
indices = pd.MultiIndex.from_arrays(
|
||||
[[0, 0, 1, 1], [0, 0, 2, 2], ["self", "other", "self", "other"]]
|
||||
)
|
||||
expected = pd.Series(["a", "x", "c", "z"], index=indices)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_compare_unaligned_objects():
|
||||
# test Series with different indices
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser1 = pd.Series([1, 2, 3], index=["a", "b", "c"])
|
||||
ser2 = pd.Series([1, 2, 3], index=["a", "b", "d"])
|
||||
ser1.compare(ser2)
|
||||
|
||||
# test Series with different lengths
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser1 = pd.Series([1, 2, 3])
|
||||
ser2 = pd.Series([1, 2, 3, 4])
|
||||
ser1.compare(ser2)
|
||||
|
||||
|
||||
def test_compare_datetime64_and_string():
|
||||
# Issue https://github.com/pandas-dev/pandas/issues/45506
|
||||
# Catch OverflowError when comparing datetime64 and string
|
||||
data = [
|
||||
{"a": "2015-07-01", "b": "08335394550"},
|
||||
{"a": "2015-07-02", "b": "+49 (0) 0345 300033"},
|
||||
{"a": "2015-07-03", "b": "+49(0)2598 04457"},
|
||||
{"a": "2015-07-04", "b": "0741470003"},
|
||||
{"a": "2015-07-05", "b": "04181 83668"},
|
||||
]
|
||||
dtypes = {"a": "datetime64[ns]", "b": "string"}
|
||||
df = pd.DataFrame(data=data).astype(dtypes)
|
||||
|
||||
result_eq1 = df["a"].eq(df["b"])
|
||||
result_eq2 = df["a"] == df["b"]
|
||||
result_neq = df["a"] != df["b"]
|
||||
|
||||
expected_eq = pd.Series([False] * 5) # For .eq and ==
|
||||
expected_neq = pd.Series([True] * 5) # For !=
|
||||
|
||||
tm.assert_series_equal(result_eq1, expected_eq)
|
||||
tm.assert_series_equal(result_eq2, expected_eq)
|
||||
tm.assert_series_equal(result_neq, expected_neq)
|
||||
@ -0,0 +1,309 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_string_dtype
|
||||
|
||||
from pandas._libs import lib
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
# Each test case consists of a tuple with the data and dtype to create the
|
||||
# test Series, the default dtype for the expected result (which is valid
|
||||
# for most cases), and the specific cases where the result deviates from
|
||||
# this default. Those overrides are defined as a dict with (keyword, val) as
|
||||
# dictionary key. In case of multiple items, the last override takes precedence.
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
(
|
||||
# data
|
||||
[1, 2, 3],
|
||||
# original dtype
|
||||
np.dtype("int32"),
|
||||
# default expected dtype
|
||||
"Int32",
|
||||
# exceptions on expected dtype
|
||||
{("convert_integer", False): np.dtype("int32")},
|
||||
),
|
||||
(
|
||||
[1, 2, 3],
|
||||
np.dtype("int64"),
|
||||
"Int64",
|
||||
{("convert_integer", False): np.dtype("int64")},
|
||||
),
|
||||
(
|
||||
["x", "y", "z"],
|
||||
np.dtype("O"),
|
||||
pd.StringDtype(),
|
||||
{("convert_string", False): np.dtype("O")},
|
||||
),
|
||||
(
|
||||
[True, False, np.nan],
|
||||
np.dtype("O"),
|
||||
pd.BooleanDtype(),
|
||||
{("convert_boolean", False): np.dtype("O")},
|
||||
),
|
||||
(
|
||||
["h", "i", np.nan],
|
||||
np.dtype("O"),
|
||||
pd.StringDtype(),
|
||||
{("convert_string", False): np.dtype("O")},
|
||||
),
|
||||
( # GH32117
|
||||
["h", "i", 1],
|
||||
np.dtype("O"),
|
||||
np.dtype("O"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
[10, np.nan, 20],
|
||||
np.dtype("float"),
|
||||
"Int64",
|
||||
{
|
||||
("convert_integer", False, "convert_floating", True): "Float64",
|
||||
("convert_integer", False, "convert_floating", False): np.dtype(
|
||||
"float"
|
||||
),
|
||||
},
|
||||
),
|
||||
(
|
||||
[np.nan, 100.5, 200],
|
||||
np.dtype("float"),
|
||||
"Float64",
|
||||
{("convert_floating", False): np.dtype("float")},
|
||||
),
|
||||
(
|
||||
[3, 4, 5],
|
||||
"Int8",
|
||||
"Int8",
|
||||
{},
|
||||
),
|
||||
(
|
||||
[[1, 2], [3, 4], [5]],
|
||||
None,
|
||||
np.dtype("O"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
[4, 5, 6],
|
||||
np.dtype("uint32"),
|
||||
"UInt32",
|
||||
{("convert_integer", False): np.dtype("uint32")},
|
||||
),
|
||||
(
|
||||
[-10, 12, 13],
|
||||
np.dtype("i1"),
|
||||
"Int8",
|
||||
{("convert_integer", False): np.dtype("i1")},
|
||||
),
|
||||
(
|
||||
[1.2, 1.3],
|
||||
np.dtype("float32"),
|
||||
"Float32",
|
||||
{("convert_floating", False): np.dtype("float32")},
|
||||
),
|
||||
(
|
||||
[1, 2.0],
|
||||
object,
|
||||
"Int64",
|
||||
{
|
||||
("convert_integer", False): "Float64",
|
||||
("convert_integer", False, "convert_floating", False): np.dtype(
|
||||
"float"
|
||||
),
|
||||
("infer_objects", False): np.dtype("object"),
|
||||
},
|
||||
),
|
||||
(
|
||||
[1, 2.5],
|
||||
object,
|
||||
"Float64",
|
||||
{
|
||||
("convert_floating", False): np.dtype("float"),
|
||||
("infer_objects", False): np.dtype("object"),
|
||||
},
|
||||
),
|
||||
(["a", "b"], pd.CategoricalDtype(), pd.CategoricalDtype(), {}),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("s"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ms"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("us"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
pd.DatetimeTZDtype(tz="UTC"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
|
||||
"datetime64[ns]",
|
||||
np.dtype("datetime64[ns]"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.to_datetime(["2020-01-14 10:00", "2020-01-15 11:11"]).as_unit("ns"),
|
||||
object,
|
||||
np.dtype("datetime64[ns]"),
|
||||
{("infer_objects", False): np.dtype("object")},
|
||||
),
|
||||
(
|
||||
pd.period_range("1/1/2011", freq="M", periods=3),
|
||||
None,
|
||||
pd.PeriodDtype("M"),
|
||||
{},
|
||||
),
|
||||
(
|
||||
pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)]),
|
||||
None,
|
||||
pd.IntervalDtype("int64", "right"),
|
||||
{},
|
||||
),
|
||||
]
|
||||
)
|
||||
def test_cases(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestSeriesConvertDtypes:
|
||||
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
|
||||
@pytest.mark.parametrize("params", product(*[(True, False)] * 5))
|
||||
def test_convert_dtypes(
|
||||
self,
|
||||
test_cases,
|
||||
params,
|
||||
using_infer_string,
|
||||
):
|
||||
data, maindtype, expected_default, expected_other = test_cases
|
||||
if (
|
||||
hasattr(data, "dtype")
|
||||
and lib.is_np_dtype(data.dtype, "M")
|
||||
and isinstance(maindtype, pd.DatetimeTZDtype)
|
||||
):
|
||||
# this astype is deprecated in favor of tz_localize
|
||||
msg = "Cannot use .astype to convert from timezone-naive dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
pd.Series(data, dtype=maindtype)
|
||||
return
|
||||
|
||||
if maindtype is not None:
|
||||
series = pd.Series(data, dtype=maindtype)
|
||||
else:
|
||||
series = pd.Series(data)
|
||||
|
||||
result = series.convert_dtypes(*params)
|
||||
|
||||
param_names = [
|
||||
"infer_objects",
|
||||
"convert_string",
|
||||
"convert_integer",
|
||||
"convert_boolean",
|
||||
"convert_floating",
|
||||
]
|
||||
params_dict = dict(zip(param_names, params))
|
||||
|
||||
expected_dtype = expected_default
|
||||
for spec, dtype in expected_other.items():
|
||||
if all(params_dict[key] is val for key, val in zip(spec[::2], spec[1::2])):
|
||||
expected_dtype = dtype
|
||||
if (
|
||||
using_infer_string
|
||||
and expected_default == "string"
|
||||
and expected_dtype == object
|
||||
and params[0]
|
||||
and not params[1]
|
||||
):
|
||||
# If convert_string=False and infer_objects=True, we end up with the
|
||||
# default string dtype instead of preserving object for string data
|
||||
expected_dtype = pd.StringDtype(na_value=np.nan)
|
||||
|
||||
expected = pd.Series(data, dtype=expected_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Test that it is a copy
|
||||
copy = series.copy(deep=True)
|
||||
|
||||
if result.notna().sum() > 0 and result.dtype in ["interval[int64, right]"]:
|
||||
with tm.assert_produces_warning(FutureWarning, match="incompatible dtype"):
|
||||
result[result.notna()] = np.nan
|
||||
else:
|
||||
result[result.notna()] = np.nan
|
||||
|
||||
# Make sure original not changed
|
||||
tm.assert_series_equal(series, copy)
|
||||
|
||||
def test_convert_string_dtype(self, nullable_string_dtype):
|
||||
# https://github.com/pandas-dev/pandas/issues/31731 -> converting columns
|
||||
# that are already string dtype
|
||||
df = pd.DataFrame(
|
||||
{"A": ["a", "b", pd.NA], "B": ["ä", "ö", "ü"]}, dtype=nullable_string_dtype
|
||||
)
|
||||
result = df.convert_dtypes()
|
||||
tm.assert_frame_equal(df, result)
|
||||
|
||||
def test_convert_bool_dtype(self):
|
||||
# GH32287
|
||||
df = pd.DataFrame({"A": pd.array([True])})
|
||||
tm.assert_frame_equal(df, df.convert_dtypes())
|
||||
|
||||
def test_convert_byte_string_dtype(self):
|
||||
# GH-43183
|
||||
byte_str = b"binary-string"
|
||||
|
||||
df = pd.DataFrame(data={"A": byte_str}, index=[0])
|
||||
result = df.convert_dtypes()
|
||||
expected = df
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_objects, dtype", [(True, "Int64"), (False, "object")]
|
||||
)
|
||||
def test_convert_dtype_object_with_na(self, infer_objects, dtype):
|
||||
# GH#48791
|
||||
ser = pd.Series([1, pd.NA])
|
||||
result = ser.convert_dtypes(infer_objects=infer_objects)
|
||||
expected = pd.Series([1, pd.NA], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"infer_objects, dtype", [(True, "Float64"), (False, "object")]
|
||||
)
|
||||
def test_convert_dtype_object_with_na_float(self, infer_objects, dtype):
|
||||
# GH#48791
|
||||
ser = pd.Series([1.5, pd.NA])
|
||||
result = ser.convert_dtypes(infer_objects=infer_objects)
|
||||
expected = pd.Series([1.5, pd.NA], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_convert_dtypes_pyarrow_to_np_nullable(self):
|
||||
# GH 53648
|
||||
pytest.importorskip("pyarrow")
|
||||
ser = pd.Series(range(2), dtype="int32[pyarrow]")
|
||||
result = ser.convert_dtypes(dtype_backend="numpy_nullable")
|
||||
expected = pd.Series(range(2), dtype="Int32")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_convert_dtypes_pyarrow_null(self):
|
||||
# GH#55346
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
ser = pd.Series([None, None])
|
||||
result = ser.convert_dtypes(dtype_backend="pyarrow")
|
||||
expected = pd.Series([None, None], dtype=pd.ArrowDtype(pa.null()))
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,91 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCopy:
|
||||
@pytest.mark.parametrize("deep", ["default", None, False, True])
|
||||
def test_copy(self, deep, using_copy_on_write, warn_copy_on_write):
|
||||
ser = Series(np.arange(10), dtype="float64")
|
||||
|
||||
# default deep is True
|
||||
if deep == "default":
|
||||
ser2 = ser.copy()
|
||||
else:
|
||||
ser2 = ser.copy(deep=deep)
|
||||
|
||||
if using_copy_on_write:
|
||||
# INFO(CoW) a shallow copy doesn't yet copy the data
|
||||
# but parent will not be modified (CoW)
|
||||
if deep is None or deep is False:
|
||||
assert np.may_share_memory(ser.values, ser2.values)
|
||||
else:
|
||||
assert not np.may_share_memory(ser.values, ser2.values)
|
||||
|
||||
with tm.assert_cow_warning(warn_copy_on_write and deep is False):
|
||||
ser2[::2] = np.nan
|
||||
|
||||
if deep is not False or using_copy_on_write:
|
||||
# Did not modify original Series
|
||||
assert np.isnan(ser2[0])
|
||||
assert not np.isnan(ser[0])
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
assert np.isnan(ser2[0])
|
||||
assert np.isnan(ser[0])
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Setting a value on a view:FutureWarning")
|
||||
@pytest.mark.parametrize("deep", ["default", None, False, True])
|
||||
def test_copy_tzaware(self, deep, using_copy_on_write):
|
||||
# GH#11794
|
||||
# copy of tz-aware
|
||||
expected = Series([Timestamp("2012/01/01", tz="UTC")])
|
||||
expected2 = Series([Timestamp("1999/01/01", tz="UTC")])
|
||||
|
||||
ser = Series([Timestamp("2012/01/01", tz="UTC")])
|
||||
|
||||
if deep == "default":
|
||||
ser2 = ser.copy()
|
||||
else:
|
||||
ser2 = ser.copy(deep=deep)
|
||||
|
||||
if using_copy_on_write:
|
||||
# INFO(CoW) a shallow copy doesn't yet copy the data
|
||||
# but parent will not be modified (CoW)
|
||||
if deep is None or deep is False:
|
||||
assert np.may_share_memory(ser.values, ser2.values)
|
||||
else:
|
||||
assert not np.may_share_memory(ser.values, ser2.values)
|
||||
|
||||
ser2[0] = Timestamp("1999/01/01", tz="UTC")
|
||||
|
||||
# default deep is True
|
||||
if deep is not False or using_copy_on_write:
|
||||
# Did not modify original Series
|
||||
tm.assert_series_equal(ser2, expected2)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
else:
|
||||
# we DID modify the original Series
|
||||
tm.assert_series_equal(ser2, expected2)
|
||||
tm.assert_series_equal(ser, expected2)
|
||||
|
||||
def test_copy_name(self, datetime_series):
|
||||
result = datetime_series.copy()
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
def test_copy_index_name_checking(self, datetime_series):
|
||||
# don't want to be able to modify the index stored elsewhere after
|
||||
# making a copy
|
||||
|
||||
datetime_series.index.name = None
|
||||
assert datetime_series.index.name is None
|
||||
assert datetime_series is datetime_series
|
||||
|
||||
cp = datetime_series.copy()
|
||||
cp.index.name = "foo"
|
||||
assert datetime_series.index.name is None
|
||||
@ -0,0 +1,34 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesCount:
|
||||
def test_count(self, datetime_series):
|
||||
assert datetime_series.count() == len(datetime_series)
|
||||
|
||||
datetime_series[::2] = np.nan
|
||||
|
||||
assert datetime_series.count() == np.isfinite(datetime_series).sum()
|
||||
|
||||
def test_count_inf_as_na(self):
|
||||
# GH#29478
|
||||
ser = Series([pd.Timestamp("1990/1/1")])
|
||||
msg = "use_inf_as_na option is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
with pd.option_context("use_inf_as_na", True):
|
||||
assert ser.count() == 1
|
||||
|
||||
def test_count_categorical(self):
|
||||
ser = Series(
|
||||
Categorical(
|
||||
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
|
||||
)
|
||||
)
|
||||
result = ser.count()
|
||||
assert result == 2
|
||||
@ -0,0 +1,185 @@
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesCov:
|
||||
def test_cov(self, datetime_series):
|
||||
# full overlap
|
||||
tm.assert_almost_equal(
|
||||
datetime_series.cov(datetime_series), datetime_series.std() ** 2
|
||||
)
|
||||
|
||||
# partial overlap
|
||||
tm.assert_almost_equal(
|
||||
datetime_series[:15].cov(datetime_series[5:]),
|
||||
datetime_series[5:15].std() ** 2,
|
||||
)
|
||||
|
||||
# No overlap
|
||||
assert np.isnan(datetime_series[::2].cov(datetime_series[1::2]))
|
||||
|
||||
# all NA
|
||||
cp = datetime_series[:10].copy()
|
||||
cp[:] = np.nan
|
||||
assert isna(cp.cov(cp))
|
||||
|
||||
# min_periods
|
||||
assert isna(datetime_series[:15].cov(datetime_series[5:], min_periods=12))
|
||||
|
||||
ts1 = datetime_series[:15].reindex(datetime_series.index)
|
||||
ts2 = datetime_series[5:].reindex(datetime_series.index)
|
||||
assert isna(ts1.cov(ts2, min_periods=12))
|
||||
|
||||
@pytest.mark.parametrize("test_ddof", [None, 0, 1, 2, 3])
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
def test_cov_ddof(self, test_ddof, dtype):
|
||||
# GH#34611
|
||||
np_array1 = np.random.default_rng(2).random(10)
|
||||
np_array2 = np.random.default_rng(2).random(10)
|
||||
|
||||
s1 = Series(np_array1, dtype=dtype)
|
||||
s2 = Series(np_array2, dtype=dtype)
|
||||
|
||||
result = s1.cov(s2, ddof=test_ddof)
|
||||
expected = np.cov(np_array1, np_array2, ddof=test_ddof)[0][1]
|
||||
assert math.isclose(expected, result)
|
||||
|
||||
|
||||
class TestSeriesCorr:
|
||||
@pytest.mark.parametrize("dtype", ["float64", "Float64"])
|
||||
def test_corr(self, datetime_series, dtype):
|
||||
stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
datetime_series = datetime_series.astype(dtype)
|
||||
|
||||
# full overlap
|
||||
tm.assert_almost_equal(datetime_series.corr(datetime_series), 1)
|
||||
|
||||
# partial overlap
|
||||
tm.assert_almost_equal(datetime_series[:15].corr(datetime_series[5:]), 1)
|
||||
|
||||
assert isna(datetime_series[:15].corr(datetime_series[5:], min_periods=12))
|
||||
|
||||
ts1 = datetime_series[:15].reindex(datetime_series.index)
|
||||
ts2 = datetime_series[5:].reindex(datetime_series.index)
|
||||
assert isna(ts1.corr(ts2, min_periods=12))
|
||||
|
||||
# No overlap
|
||||
assert np.isnan(datetime_series[::2].corr(datetime_series[1::2]))
|
||||
|
||||
# all NA
|
||||
cp = datetime_series[:10].copy()
|
||||
cp[:] = np.nan
|
||||
assert isna(cp.corr(cp))
|
||||
|
||||
A = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
)
|
||||
B = A.copy()
|
||||
result = A.corr(B)
|
||||
expected, _ = stats.pearsonr(A, B)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
def test_corr_rank(self):
|
||||
stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
# kendall and spearman
|
||||
A = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
)
|
||||
B = A.copy()
|
||||
A[-5:] = A[:5].copy()
|
||||
result = A.corr(B, method="kendall")
|
||||
expected = stats.kendalltau(A, B)[0]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
result = A.corr(B, method="spearman")
|
||||
expected = stats.spearmanr(A, B)[0]
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# results from R
|
||||
A = Series(
|
||||
[
|
||||
-0.89926396,
|
||||
0.94209606,
|
||||
-1.03289164,
|
||||
-0.95445587,
|
||||
0.76910310,
|
||||
-0.06430576,
|
||||
-2.09704447,
|
||||
0.40660407,
|
||||
-0.89926396,
|
||||
0.94209606,
|
||||
]
|
||||
)
|
||||
B = Series(
|
||||
[
|
||||
-1.01270225,
|
||||
-0.62210117,
|
||||
-1.56895827,
|
||||
0.59592943,
|
||||
-0.01680292,
|
||||
1.17258718,
|
||||
-1.06009347,
|
||||
-0.10222060,
|
||||
-0.89076239,
|
||||
0.89372375,
|
||||
]
|
||||
)
|
||||
kexp = 0.4319297
|
||||
sexp = 0.5853767
|
||||
tm.assert_almost_equal(A.corr(B, method="kendall"), kexp)
|
||||
tm.assert_almost_equal(A.corr(B, method="spearman"), sexp)
|
||||
|
||||
def test_corr_invalid_method(self):
|
||||
# GH PR #22298
|
||||
s1 = Series(np.random.default_rng(2).standard_normal(10))
|
||||
s2 = Series(np.random.default_rng(2).standard_normal(10))
|
||||
msg = "method must be either 'pearson', 'spearman', 'kendall', or a callable, "
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s1.corr(s2, method="____")
|
||||
|
||||
def test_corr_callable_method(self, datetime_series):
|
||||
# simple correlation example
|
||||
# returns 1 if exact equality, 0 otherwise
|
||||
my_corr = lambda a, b: 1.0 if (a == b).all() else 0.0
|
||||
|
||||
# simple example
|
||||
s1 = Series([1, 2, 3, 4, 5])
|
||||
s2 = Series([5, 4, 3, 2, 1])
|
||||
expected = 0
|
||||
tm.assert_almost_equal(s1.corr(s2, method=my_corr), expected)
|
||||
|
||||
# full overlap
|
||||
tm.assert_almost_equal(
|
||||
datetime_series.corr(datetime_series, method=my_corr), 1.0
|
||||
)
|
||||
|
||||
# partial overlap
|
||||
tm.assert_almost_equal(
|
||||
datetime_series[:15].corr(datetime_series[5:], method=my_corr), 1.0
|
||||
)
|
||||
|
||||
# No overlap
|
||||
assert np.isnan(
|
||||
datetime_series[::2].corr(datetime_series[1::2], method=my_corr)
|
||||
)
|
||||
|
||||
# dataframe example
|
||||
df = pd.DataFrame([s1, s2])
|
||||
expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
|
||||
tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)
|
||||
@ -0,0 +1,203 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gte1p25
|
||||
|
||||
from pandas.core.dtypes.common import (
|
||||
is_complex_dtype,
|
||||
is_extension_array_dtype,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Period,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesDescribe:
|
||||
def test_describe_ints(self):
|
||||
ser = Series([0, 1, 2, 3, 4], name="int_data")
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[5, 2, ser.std(), 0, 1, 2, 3, 4],
|
||||
name="int_data",
|
||||
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_bools(self):
|
||||
ser = Series([True, True, False, False, False], name="bool_data")
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[5, 2, False, 3], name="bool_data", index=["count", "unique", "top", "freq"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_strs(self):
|
||||
ser = Series(["a", "a", "b", "c", "d"], name="str_data")
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[5, 4, "a", 2], name="str_data", index=["count", "unique", "top", "freq"]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_timedelta64(self):
|
||||
ser = Series(
|
||||
[
|
||||
Timedelta("1 days"),
|
||||
Timedelta("2 days"),
|
||||
Timedelta("3 days"),
|
||||
Timedelta("4 days"),
|
||||
Timedelta("5 days"),
|
||||
],
|
||||
name="timedelta_data",
|
||||
)
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[5, ser[2], ser.std(), ser[0], ser[1], ser[2], ser[3], ser[4]],
|
||||
name="timedelta_data",
|
||||
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_period(self):
|
||||
ser = Series(
|
||||
[Period("2020-01", "M"), Period("2020-01", "M"), Period("2019-12", "M")],
|
||||
name="period_data",
|
||||
)
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[3, 2, ser[0], 2],
|
||||
name="period_data",
|
||||
index=["count", "unique", "top", "freq"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_empty_object(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/27183
|
||||
s = Series([None, None], dtype=object)
|
||||
result = s.describe()
|
||||
expected = Series(
|
||||
[0, 0, np.nan, np.nan],
|
||||
dtype=object,
|
||||
index=["count", "unique", "top", "freq"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s[:0].describe()
|
||||
tm.assert_series_equal(result, expected)
|
||||
# ensure NaN, not None
|
||||
assert np.isnan(result.iloc[2])
|
||||
assert np.isnan(result.iloc[3])
|
||||
|
||||
def test_describe_with_tz(self, tz_naive_fixture):
|
||||
# GH 21332
|
||||
tz = tz_naive_fixture
|
||||
name = str(tz_naive_fixture)
|
||||
start = Timestamp(2018, 1, 1)
|
||||
end = Timestamp(2018, 1, 5)
|
||||
s = Series(date_range(start, end, tz=tz), name=name)
|
||||
result = s.describe()
|
||||
expected = Series(
|
||||
[
|
||||
5,
|
||||
Timestamp(2018, 1, 3).tz_localize(tz),
|
||||
start.tz_localize(tz),
|
||||
s[1],
|
||||
s[2],
|
||||
s[3],
|
||||
end.tz_localize(tz),
|
||||
],
|
||||
name=name,
|
||||
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_with_tz_numeric(self):
|
||||
name = tz = "CET"
|
||||
start = Timestamp(2018, 1, 1)
|
||||
end = Timestamp(2018, 1, 5)
|
||||
s = Series(date_range(start, end, tz=tz), name=name)
|
||||
|
||||
result = s.describe()
|
||||
|
||||
expected = Series(
|
||||
[
|
||||
5,
|
||||
Timestamp("2018-01-03 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-01 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-02 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-03 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-04 00:00:00", tz=tz),
|
||||
Timestamp("2018-01-05 00:00:00", tz=tz),
|
||||
],
|
||||
name=name,
|
||||
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_datetime_is_numeric_includes_datetime(self):
|
||||
s = Series(date_range("2012", periods=3))
|
||||
result = s.describe()
|
||||
expected = Series(
|
||||
[
|
||||
3,
|
||||
Timestamp("2012-01-02"),
|
||||
Timestamp("2012-01-01"),
|
||||
Timestamp("2012-01-01T12:00:00"),
|
||||
Timestamp("2012-01-02"),
|
||||
Timestamp("2012-01-02T12:00:00"),
|
||||
Timestamp("2012-01-03"),
|
||||
],
|
||||
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:Casting complex values to real discards")
|
||||
def test_numeric_result_dtype(self, any_numeric_dtype):
|
||||
# GH#48340 - describe should always return float on non-complex numeric input
|
||||
if is_extension_array_dtype(any_numeric_dtype):
|
||||
dtype = "Float64"
|
||||
else:
|
||||
dtype = "complex128" if is_complex_dtype(any_numeric_dtype) else None
|
||||
|
||||
ser = Series([0, 1], dtype=any_numeric_dtype)
|
||||
if dtype == "complex128" and np_version_gte1p25:
|
||||
with pytest.raises(
|
||||
TypeError, match=r"^a must be an array of real numbers$"
|
||||
):
|
||||
ser.describe()
|
||||
return
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[
|
||||
2.0,
|
||||
0.5,
|
||||
ser.std(),
|
||||
0,
|
||||
0.25,
|
||||
0.5,
|
||||
0.75,
|
||||
1.0,
|
||||
],
|
||||
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
|
||||
dtype=dtype,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_describe_one_element_ea(self):
|
||||
# GH#52515
|
||||
ser = Series([0.0], dtype="Float64")
|
||||
with tm.assert_produces_warning(None):
|
||||
result = ser.describe()
|
||||
expected = Series(
|
||||
[1, 0, NA, 0, 0, 0, 0, 0],
|
||||
dtype="Float64",
|
||||
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,88 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesDiff:
|
||||
def test_diff_np(self):
|
||||
# TODO(__array_function__): could make np.diff return a Series
|
||||
# matching ser.diff()
|
||||
|
||||
ser = Series(np.arange(5))
|
||||
|
||||
res = np.diff(ser)
|
||||
expected = np.array([1, 1, 1, 1])
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
def test_diff_int(self):
|
||||
# int dtype
|
||||
a = 10000000000000000
|
||||
b = a + 1
|
||||
ser = Series([a, b])
|
||||
|
||||
result = ser.diff()
|
||||
assert result[1] == 1
|
||||
|
||||
def test_diff_tz(self):
|
||||
# Combined datetime diff, normal diff and boolean diff test
|
||||
ts = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
name="ts",
|
||||
)
|
||||
ts.diff()
|
||||
|
||||
# neg n
|
||||
result = ts.diff(-1)
|
||||
expected = ts - ts.shift(-1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# 0
|
||||
result = ts.diff(0)
|
||||
expected = ts - ts
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_diff_dt64(self):
|
||||
# datetime diff (GH#3100)
|
||||
ser = Series(date_range("20130102", periods=5))
|
||||
result = ser.diff()
|
||||
expected = ser - ser.shift(1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# timedelta diff
|
||||
result = result - result.shift(1) # previous result
|
||||
expected = expected.diff() # previously expected
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_diff_dt64tz(self):
|
||||
# with tz
|
||||
ser = Series(
|
||||
date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo"
|
||||
)
|
||||
result = ser.diff()
|
||||
expected = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input,output,diff",
|
||||
[([False, True, True, False, False], [np.nan, True, False, True, False], 1)],
|
||||
)
|
||||
def test_diff_bool(self, input, output, diff):
|
||||
# boolean series (test for fixing #17294)
|
||||
ser = Series(input)
|
||||
result = ser.diff()
|
||||
expected = Series(output)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_diff_object_dtype(self):
|
||||
# object series
|
||||
ser = Series([False, True, 5.0, np.nan, True, False])
|
||||
result = ser.diff()
|
||||
expected = ser - ser.shift(1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,99 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_bool_dtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels, axis, expected_data, expected_index",
|
||||
[
|
||||
# Unique Index
|
||||
([1, 2], ["one", "two"], ["two"], 0, [1], ["one"]),
|
||||
([1, 2], ["one", "two"], ["two"], "rows", [1], ["one"]),
|
||||
([1, 1, 2], ["one", "two", "one"], ["two"], 0, [1, 2], ["one", "one"]),
|
||||
# GH 5248 Non-Unique Index
|
||||
([1, 1, 2], ["one", "two", "one"], "two", 0, [1, 2], ["one", "one"]),
|
||||
([1, 1, 2], ["one", "two", "one"], ["one"], 0, [1], ["two"]),
|
||||
([1, 1, 2], ["one", "two", "one"], "one", 0, [1], ["two"]),
|
||||
],
|
||||
)
|
||||
def test_drop_unique_and_non_unique_index(
|
||||
data, index, axis, drop_labels, expected_data, expected_index
|
||||
):
|
||||
ser = Series(data=data, index=index)
|
||||
result = ser.drop(drop_labels, axis=axis)
|
||||
expected = Series(data=expected_data, index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels, axis, error_type, error_desc",
|
||||
[
|
||||
# single string/tuple-like
|
||||
(range(3), list("abc"), "bc", 0, KeyError, "not found in axis"),
|
||||
# bad axis
|
||||
(range(3), list("abc"), ("a",), 0, KeyError, "not found in axis"),
|
||||
(range(3), list("abc"), "one", "columns", ValueError, "No axis named columns"),
|
||||
],
|
||||
)
|
||||
def test_drop_exception_raised(data, index, drop_labels, axis, error_type, error_desc):
|
||||
ser = Series(data, index=index)
|
||||
with pytest.raises(error_type, match=error_desc):
|
||||
ser.drop(drop_labels, axis=axis)
|
||||
|
||||
|
||||
def test_drop_with_ignore_errors():
|
||||
# errors='ignore'
|
||||
ser = Series(range(3), index=list("abc"))
|
||||
result = ser.drop("bc", errors="ignore")
|
||||
tm.assert_series_equal(result, ser)
|
||||
result = ser.drop(["a", "d"], errors="ignore")
|
||||
expected = ser.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH 8522
|
||||
ser = Series([2, 3], index=[True, False])
|
||||
assert is_bool_dtype(ser.index)
|
||||
assert ser.index.dtype == bool
|
||||
result = ser.drop(True)
|
||||
expected = Series([3], index=[False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("index", [[1, 2, 3], [1, 1, 3]])
|
||||
@pytest.mark.parametrize("drop_labels", [[], [1], [3]])
|
||||
def test_drop_empty_list(index, drop_labels):
|
||||
# GH 21494
|
||||
expected_index = [i for i in index if i not in drop_labels]
|
||||
series = Series(index=index, dtype=object).drop(drop_labels)
|
||||
expected = Series(index=expected_index, dtype=object)
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, drop_labels",
|
||||
[
|
||||
(None, [1, 2, 3], [1, 4]),
|
||||
(None, [1, 2, 2], [1, 4]),
|
||||
([2, 3], [0, 1], [False, True]),
|
||||
],
|
||||
)
|
||||
def test_drop_non_empty_list(data, index, drop_labels):
|
||||
# GH 21494 and GH 16877
|
||||
dtype = object if data is None else None
|
||||
ser = Series(data=data, index=index, dtype=dtype)
|
||||
with pytest.raises(KeyError, match="not found in axis"):
|
||||
ser.drop(drop_labels)
|
||||
|
||||
|
||||
def test_drop_index_ea_dtype(any_numeric_ea_dtype):
|
||||
# GH#45860
|
||||
df = Series(100, index=Index([1, 2, 2], dtype=any_numeric_ea_dtype))
|
||||
idx = Index([df.index[1]])
|
||||
result = df.drop(idx)
|
||||
expected = Series(100, index=Index([1], dtype=any_numeric_ea_dtype))
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,267 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, False, False, True, True, False])),
|
||||
("last", Series([False, True, True, False, False, False, False])),
|
||||
(False, Series([False, True, True, False, True, True, False])),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(any_numpy_dtype, keep, expected):
|
||||
tc = Series([1, 0, 3, 5, 3, 0, 4], dtype=np.dtype(any_numpy_dtype))
|
||||
|
||||
if tc.dtype == "bool":
|
||||
pytest.skip("tested separately in test_drop_duplicates_bool")
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(keep=keep, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, True])),
|
||||
("last", Series([True, True, False, False])),
|
||||
(False, Series([True, True, True, True])),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates_bool(keep, expected):
|
||||
tc = Series([True, False, True, False])
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=keep), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(keep=keep, inplace=True)
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
assert return_value is None
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[], list(range(5))])
|
||||
def test_drop_duplicates_no_duplicates(any_numpy_dtype, keep, values):
|
||||
tc = Series(values, dtype=np.dtype(any_numpy_dtype))
|
||||
expected = Series([False] * len(tc), dtype="bool")
|
||||
|
||||
if tc.dtype == "bool":
|
||||
# 0 -> False and 1-> True
|
||||
# any other value would be duplicated
|
||||
tc = tc[:2]
|
||||
expected = expected[:2]
|
||||
|
||||
tm.assert_series_equal(tc.duplicated(keep=keep), expected)
|
||||
|
||||
result_dropped = tc.drop_duplicates(keep=keep)
|
||||
tm.assert_series_equal(result_dropped, tc)
|
||||
|
||||
# validate shallow copy
|
||||
assert result_dropped is not tc
|
||||
|
||||
|
||||
class TestSeriesDropDuplicates:
|
||||
@pytest.fixture(
|
||||
params=["int_", "uint", "float64", "str_", "timedelta64[h]", "datetime64[D]"]
|
||||
)
|
||||
def dtype(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def cat_series_unused_category(self, dtype, ordered):
|
||||
# Test case 1
|
||||
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
|
||||
|
||||
input1 = np.array([1, 2, 3, 3], dtype=np.dtype(dtype))
|
||||
cat = Categorical(input1, categories=cat_array, ordered=ordered)
|
||||
tc1 = Series(cat)
|
||||
return tc1
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool(self, cat_series_unused_category):
|
||||
tc1 = cat_series_unused_category
|
||||
|
||||
expected = Series([False, False, False, True])
|
||||
|
||||
result = tc1.duplicated()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc1.drop_duplicates()
|
||||
tm.assert_series_equal(result, tc1[~expected])
|
||||
|
||||
sc = tc1.copy()
|
||||
return_value = sc.drop_duplicates(inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc1[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool_keeplast(
|
||||
self, cat_series_unused_category
|
||||
):
|
||||
tc1 = cat_series_unused_category
|
||||
|
||||
expected = Series([False, False, True, False])
|
||||
|
||||
result = tc1.duplicated(keep="last")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc1.drop_duplicates(keep="last")
|
||||
tm.assert_series_equal(result, tc1[~expected])
|
||||
|
||||
sc = tc1.copy()
|
||||
return_value = sc.drop_duplicates(keep="last", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc1[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool_keepfalse(
|
||||
self, cat_series_unused_category
|
||||
):
|
||||
tc1 = cat_series_unused_category
|
||||
|
||||
expected = Series([False, False, True, True])
|
||||
|
||||
result = tc1.duplicated(keep=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc1.drop_duplicates(keep=False)
|
||||
tm.assert_series_equal(result, tc1[~expected])
|
||||
|
||||
sc = tc1.copy()
|
||||
return_value = sc.drop_duplicates(keep=False, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc1[~expected])
|
||||
|
||||
@pytest.fixture
|
||||
def cat_series(self, dtype, ordered):
|
||||
# no unused categories, unlike cat_series_unused_category
|
||||
cat_array = np.array([1, 2, 3, 4, 5], dtype=np.dtype(dtype))
|
||||
|
||||
input2 = np.array([1, 2, 3, 5, 3, 2, 4], dtype=np.dtype(dtype))
|
||||
cat = Categorical(input2, categories=cat_array, ordered=ordered)
|
||||
tc2 = Series(cat)
|
||||
return tc2
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool2(self, cat_series):
|
||||
tc2 = cat_series
|
||||
|
||||
expected = Series([False, False, False, False, True, True, False])
|
||||
|
||||
result = tc2.duplicated()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc2.drop_duplicates()
|
||||
tm.assert_series_equal(result, tc2[~expected])
|
||||
|
||||
sc = tc2.copy()
|
||||
return_value = sc.drop_duplicates(inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc2[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool2_keeplast(self, cat_series):
|
||||
tc2 = cat_series
|
||||
|
||||
expected = Series([False, True, True, False, False, False, False])
|
||||
|
||||
result = tc2.duplicated(keep="last")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc2.drop_duplicates(keep="last")
|
||||
tm.assert_series_equal(result, tc2[~expected])
|
||||
|
||||
sc = tc2.copy()
|
||||
return_value = sc.drop_duplicates(keep="last", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc2[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_non_bool2_keepfalse(self, cat_series):
|
||||
tc2 = cat_series
|
||||
|
||||
expected = Series([False, True, True, False, True, True, False])
|
||||
|
||||
result = tc2.duplicated(keep=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = tc2.drop_duplicates(keep=False)
|
||||
tm.assert_series_equal(result, tc2[~expected])
|
||||
|
||||
sc = tc2.copy()
|
||||
return_value = sc.drop_duplicates(keep=False, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc2[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_bool(self, ordered):
|
||||
tc = Series(
|
||||
Categorical(
|
||||
[True, False, True, False], categories=[True, False], ordered=ordered
|
||||
)
|
||||
)
|
||||
|
||||
expected = Series([False, False, True, True])
|
||||
tm.assert_series_equal(tc.duplicated(), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
expected = Series([True, True, False, False])
|
||||
tm.assert_series_equal(tc.duplicated(keep="last"), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep="last"), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(keep="last", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
expected = Series([True, True, True, True])
|
||||
tm.assert_series_equal(tc.duplicated(keep=False), expected)
|
||||
tm.assert_series_equal(tc.drop_duplicates(keep=False), tc[~expected])
|
||||
sc = tc.copy()
|
||||
return_value = sc.drop_duplicates(keep=False, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(sc, tc[~expected])
|
||||
|
||||
def test_drop_duplicates_categorical_bool_na(self, nulls_fixture):
|
||||
# GH#44351
|
||||
ser = Series(
|
||||
Categorical(
|
||||
[True, False, True, False, nulls_fixture],
|
||||
categories=[True, False],
|
||||
ordered=True,
|
||||
)
|
||||
)
|
||||
result = ser.drop_duplicates()
|
||||
expected = Series(
|
||||
Categorical([True, False, np.nan], categories=[True, False], ordered=True),
|
||||
index=[0, 1, 4],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_drop_duplicates_ignore_index(self):
|
||||
# GH#48304
|
||||
ser = Series([1, 2, 2, 3])
|
||||
result = ser.drop_duplicates(ignore_index=True)
|
||||
expected = Series([1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_duplicated_arrow_dtype(self):
|
||||
pytest.importorskip("pyarrow")
|
||||
ser = Series([True, False, None, False], dtype="bool[pyarrow]")
|
||||
result = ser.drop_duplicates()
|
||||
expected = Series([True, False, None], dtype="bool[pyarrow]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_drop_duplicates_arrow_strings(self):
|
||||
# GH#54904
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
ser = Series(["a", "a"], dtype=pd.ArrowDtype(pa.string()))
|
||||
result = ser.drop_duplicates()
|
||||
expecetd = Series(["a"], dtype=pd.ArrowDtype(pa.string()))
|
||||
tm.assert_series_equal(result, expecetd)
|
||||
@ -0,0 +1,117 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
Period,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDropna:
|
||||
def test_dropna_empty(self):
|
||||
ser = Series([], dtype=object)
|
||||
|
||||
assert len(ser.dropna()) == 0
|
||||
return_value = ser.dropna(inplace=True)
|
||||
assert return_value is None
|
||||
assert len(ser) == 0
|
||||
|
||||
# invalid axis
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.dropna(axis=1)
|
||||
|
||||
def test_dropna_preserve_name(self, datetime_series):
|
||||
datetime_series[:5] = np.nan
|
||||
result = datetime_series.dropna()
|
||||
assert result.name == datetime_series.name
|
||||
name = datetime_series.name
|
||||
ts = datetime_series.copy()
|
||||
return_value = ts.dropna(inplace=True)
|
||||
assert return_value is None
|
||||
assert ts.name == name
|
||||
|
||||
def test_dropna_no_nan(self):
|
||||
for ser in [
|
||||
Series([1, 2, 3], name="x"),
|
||||
Series([False, True, False], name="x"),
|
||||
]:
|
||||
result = ser.dropna()
|
||||
tm.assert_series_equal(result, ser)
|
||||
assert result is not ser
|
||||
|
||||
s2 = ser.copy()
|
||||
return_value = s2.dropna(inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(s2, ser)
|
||||
|
||||
def test_dropna_intervals(self):
|
||||
ser = Series(
|
||||
[np.nan, 1, 2, 3],
|
||||
IntervalIndex.from_arrays([np.nan, 0, 1, 2], [np.nan, 1, 2, 3]),
|
||||
)
|
||||
|
||||
result = ser.dropna()
|
||||
expected = ser.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_dropna_period_dtype(self):
|
||||
# GH#13737
|
||||
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
|
||||
result = ser.dropna()
|
||||
expected = Series([Period("2011-01", freq="M")])
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_datetime64_tz_dropna(self, unit):
|
||||
# DatetimeLikeBlock
|
||||
ser = Series(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00"),
|
||||
NaT,
|
||||
Timestamp("2011-01-03 10:00"),
|
||||
NaT,
|
||||
],
|
||||
dtype=f"M8[{unit}]",
|
||||
)
|
||||
result = ser.dropna()
|
||||
expected = Series(
|
||||
[Timestamp("2011-01-01 10:00"), Timestamp("2011-01-03 10:00")],
|
||||
index=[0, 2],
|
||||
dtype=f"M8[{unit}]",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# DatetimeTZBlock
|
||||
idx = DatetimeIndex(
|
||||
["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz="Asia/Tokyo"
|
||||
).as_unit(unit)
|
||||
ser = Series(idx)
|
||||
assert ser.dtype == f"datetime64[{unit}, Asia/Tokyo]"
|
||||
result = ser.dropna()
|
||||
expected = Series(
|
||||
[
|
||||
Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"),
|
||||
Timestamp("2011-01-03 10:00", tz="Asia/Tokyo"),
|
||||
],
|
||||
index=[0, 2],
|
||||
dtype=f"datetime64[{unit}, Asia/Tokyo]",
|
||||
)
|
||||
assert result.dtype == f"datetime64[{unit}, Asia/Tokyo]"
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("val", [1, 1.5])
|
||||
def test_dropna_ignore_index(self, val):
|
||||
# GH#31725
|
||||
ser = Series([1, 2, val], index=[3, 2, 1])
|
||||
result = ser.dropna(ignore_index=True)
|
||||
expected = Series([1, 2, val])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser.dropna(ignore_index=True, inplace=True)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@ -0,0 +1,7 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
class TestSeriesDtypes:
|
||||
def test_dtype(self, datetime_series):
|
||||
assert datetime_series.dtype == np.dtype("float64")
|
||||
assert datetime_series.dtypes == np.dtype("float64")
|
||||
@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, False, True], name="name")),
|
||||
("last", Series([True, True, False, False, False], name="name")),
|
||||
(False, Series([True, True, True, False, True], name="name")),
|
||||
],
|
||||
)
|
||||
def test_duplicated_keep(keep, expected):
|
||||
ser = Series(["a", "b", "b", "c", "a"], name="name")
|
||||
|
||||
result = ser.duplicated(keep=keep)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", Series([False, False, True, False, True])),
|
||||
("last", Series([True, True, False, False, False])),
|
||||
(False, Series([True, True, True, False, True])),
|
||||
],
|
||||
)
|
||||
def test_duplicated_nan_none(keep, expected):
|
||||
ser = Series([np.nan, 3, 3, None, np.nan], dtype=object)
|
||||
|
||||
result = ser.duplicated(keep=keep)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicated_categorical_bool_na(nulls_fixture):
|
||||
# GH#44351
|
||||
ser = Series(
|
||||
Categorical(
|
||||
[True, False, True, False, nulls_fixture],
|
||||
categories=[True, False],
|
||||
ordered=True,
|
||||
)
|
||||
)
|
||||
result = ser.duplicated()
|
||||
expected = Series([False, False, True, True, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, vals",
|
||||
[
|
||||
("last", [True, True, False]),
|
||||
("first", [False, True, True]),
|
||||
(False, [True, True, True]),
|
||||
],
|
||||
)
|
||||
def test_duplicated_mask(keep, vals):
|
||||
# GH#48150
|
||||
ser = Series([1, 2, NA, NA, NA], dtype="Int64")
|
||||
result = ser.duplicated(keep=keep)
|
||||
expected = Series([False, False] + vals)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicated_mask_no_duplicated_na(keep):
|
||||
# GH#48150
|
||||
ser = Series([1, 2, NA], dtype="Int64")
|
||||
result = ser.duplicated(keep=keep)
|
||||
expected = Series([False, False, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,145 @@
|
||||
from contextlib import nullcontext
|
||||
import copy
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.missing import is_matching_na
|
||||
from pandas.compat.numpy import np_version_gte1p25
|
||||
|
||||
from pandas.core.dtypes.common import is_float
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arr, idx",
|
||||
[
|
||||
([1, 2, 3, 4], [0, 2, 1, 3]),
|
||||
([1, np.nan, 3, np.nan], [0, 2, 1, 3]),
|
||||
(
|
||||
[1, np.nan, 3, np.nan],
|
||||
MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c"), (3, "c")]),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_equals(arr, idx):
|
||||
s1 = Series(arr, index=idx)
|
||||
s2 = s1.copy()
|
||||
assert s1.equals(s2)
|
||||
|
||||
s1[1] = 9
|
||||
assert not s1.equals(s2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val", [1, 1.1, 1 + 1j, True, "abc", [1, 2], (1, 2), {1, 2}, {"a": 1}, None]
|
||||
)
|
||||
def test_equals_list_array(val):
|
||||
# GH20676 Verify equals operator for list of Numpy arrays
|
||||
arr = np.array([1, 2])
|
||||
s1 = Series([arr, arr])
|
||||
s2 = s1.copy()
|
||||
assert s1.equals(s2)
|
||||
|
||||
s1[1] = val
|
||||
|
||||
cm = (
|
||||
tm.assert_produces_warning(FutureWarning, check_stacklevel=False)
|
||||
if isinstance(val, str) and not np_version_gte1p25
|
||||
else nullcontext()
|
||||
)
|
||||
with cm:
|
||||
assert not s1.equals(s2)
|
||||
|
||||
|
||||
def test_equals_false_negative():
|
||||
# GH8437 Verify false negative behavior of equals function for dtype object
|
||||
arr = [False, np.nan]
|
||||
s1 = Series(arr)
|
||||
s2 = s1.copy()
|
||||
s3 = Series(index=range(2), dtype=object)
|
||||
s4 = s3.copy()
|
||||
s5 = s3.copy()
|
||||
s6 = s3.copy()
|
||||
|
||||
s3[:-1] = s4[:-1] = s5[0] = s6[0] = False
|
||||
assert s1.equals(s1)
|
||||
assert s1.equals(s2)
|
||||
assert s1.equals(s3)
|
||||
assert s1.equals(s4)
|
||||
assert s1.equals(s5)
|
||||
assert s5.equals(s6)
|
||||
|
||||
|
||||
def test_equals_matching_nas():
|
||||
# matching but not identical NAs
|
||||
left = Series([np.datetime64("NaT")], dtype=object)
|
||||
right = Series([np.datetime64("NaT")], dtype=object)
|
||||
assert left.equals(right)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
assert Index(left).equals(Index(right))
|
||||
assert left.array.equals(right.array)
|
||||
|
||||
left = Series([np.timedelta64("NaT")], dtype=object)
|
||||
right = Series([np.timedelta64("NaT")], dtype=object)
|
||||
assert left.equals(right)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
assert Index(left).equals(Index(right))
|
||||
assert left.array.equals(right.array)
|
||||
|
||||
left = Series([np.float64("NaN")], dtype=object)
|
||||
right = Series([np.float64("NaN")], dtype=object)
|
||||
assert left.equals(right)
|
||||
assert Index(left, dtype=left.dtype).equals(Index(right, dtype=right.dtype))
|
||||
assert left.array.equals(right.array)
|
||||
|
||||
|
||||
def test_equals_mismatched_nas(nulls_fixture, nulls_fixture2):
|
||||
# GH#39650
|
||||
left = nulls_fixture
|
||||
right = nulls_fixture2
|
||||
if hasattr(right, "copy"):
|
||||
right = right.copy()
|
||||
else:
|
||||
right = copy.copy(right)
|
||||
|
||||
ser = Series([left], dtype=object)
|
||||
ser2 = Series([right], dtype=object)
|
||||
|
||||
if is_matching_na(left, right):
|
||||
assert ser.equals(ser2)
|
||||
elif (left is None and is_float(right)) or (right is None and is_float(left)):
|
||||
assert ser.equals(ser2)
|
||||
else:
|
||||
assert not ser.equals(ser2)
|
||||
|
||||
|
||||
def test_equals_none_vs_nan():
|
||||
# GH#39650
|
||||
ser = Series([1, None], dtype=object)
|
||||
ser2 = Series([1, np.nan], dtype=object)
|
||||
|
||||
assert ser.equals(ser2)
|
||||
assert Index(ser, dtype=ser.dtype).equals(Index(ser2, dtype=ser2.dtype))
|
||||
assert ser.array.equals(ser2.array)
|
||||
|
||||
|
||||
def test_equals_None_vs_float():
|
||||
# GH#44190
|
||||
left = Series([-np.inf, np.nan, -1.0, 0.0, 1.0, 10 / 3, np.inf], dtype=object)
|
||||
right = Series([None] * len(left))
|
||||
|
||||
# these series were found to be equal due to a bug, check that they are correctly
|
||||
# found to not equal
|
||||
assert not left.equals(right)
|
||||
assert not right.equals(left)
|
||||
assert not left.to_frame().equals(right.to_frame())
|
||||
assert not right.to_frame().equals(left.to_frame())
|
||||
assert not Index(left, dtype="object").equals(Index(right, dtype="object"))
|
||||
assert not Index(right, dtype="object").equals(Index(left, dtype="object"))
|
||||
@ -0,0 +1,183 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_basic():
|
||||
s = pd.Series([[0, 1, 2], np.nan, [], (3, 4)], index=list("abcd"), name="foo")
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, np.nan, 3, 4], index=list("aaabcdd"), dtype=object, name="foo"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_mixed_type():
|
||||
s = pd.Series(
|
||||
[[0, 1, 2], np.nan, None, np.array([]), pd.Series(["a", "b"])], name="foo"
|
||||
)
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, None, np.nan, "a", "b"],
|
||||
index=[0, 0, 0, 1, 2, 3, 4, 4],
|
||||
dtype=object,
|
||||
name="foo",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_empty():
|
||||
s = pd.Series(dtype=object)
|
||||
result = s.explode()
|
||||
expected = s.copy()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_nested_lists():
|
||||
s = pd.Series([[[1, 2, 3]], [1, 2], 1])
|
||||
result = s.explode()
|
||||
expected = pd.Series([[1, 2, 3], 1, 2, 1], index=[0, 1, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_multi_index():
|
||||
s = pd.Series(
|
||||
[[0, 1, 2], np.nan, [], (3, 4)],
|
||||
name="foo",
|
||||
index=pd.MultiIndex.from_product([list("ab"), range(2)], names=["foo", "bar"]),
|
||||
)
|
||||
result = s.explode()
|
||||
index = pd.MultiIndex.from_tuples(
|
||||
[("a", 0), ("a", 0), ("a", 0), ("a", 1), ("b", 0), ("b", 1), ("b", 1)],
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
expected = pd.Series(
|
||||
[0, 1, 2, np.nan, np.nan, 3, 4], index=index, dtype=object, name="foo"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_large():
|
||||
s = pd.Series([range(256)]).explode()
|
||||
result = s.explode()
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_invert_array():
|
||||
df = pd.DataFrame({"a": pd.date_range("20190101", periods=3, tz="UTC")})
|
||||
|
||||
listify = df.apply(lambda x: x.array, axis=1)
|
||||
result = listify.explode()
|
||||
tm.assert_series_equal(result, df["a"].rename())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s", [pd.Series([1, 2, 3]), pd.Series(pd.date_range("2019", periods=3, tz="UTC"))]
|
||||
)
|
||||
def test_non_object_dtype(s):
|
||||
result = s.explode()
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_typical_usecase():
|
||||
df = pd.DataFrame(
|
||||
[{"var1": "a,b,c", "var2": 1}, {"var1": "d,e,f", "var2": 2}],
|
||||
columns=["var1", "var2"],
|
||||
)
|
||||
exploded = df.var1.str.split(",").explode()
|
||||
result = df[["var2"]].join(exploded)
|
||||
expected = pd.DataFrame(
|
||||
{"var2": [1, 1, 1, 2, 2, 2], "var1": list("abcdef")},
|
||||
columns=["var2", "var1"],
|
||||
index=[0, 0, 0, 1, 1, 1],
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_nested_EA():
|
||||
# a nested EA array
|
||||
s = pd.Series(
|
||||
[
|
||||
pd.date_range("20170101", periods=3, tz="UTC"),
|
||||
pd.date_range("20170104", periods=3, tz="UTC"),
|
||||
]
|
||||
)
|
||||
result = s.explode()
|
||||
expected = pd.Series(
|
||||
pd.date_range("20170101", periods=6, tz="UTC"), index=[0, 0, 0, 1, 1, 1]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_index():
|
||||
# GH 28005
|
||||
s = pd.Series([[1, 2], [3, 4]], index=[0, 0])
|
||||
result = s.explode()
|
||||
expected = pd.Series([1, 2, 3, 4], index=[0, 0, 0, 0], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_ignore_index():
|
||||
# GH 34932
|
||||
s = pd.Series([[1, 2], [3, 4]])
|
||||
result = s.explode(ignore_index=True)
|
||||
expected = pd.Series([1, 2, 3, 4], index=[0, 1, 2, 3], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_explode_sets():
|
||||
# https://github.com/pandas-dev/pandas/issues/35614
|
||||
s = pd.Series([{"a", "b", "c"}], index=[1])
|
||||
result = s.explode().sort_values()
|
||||
expected = pd.Series(["a", "b", "c"], index=[1, 1, 1])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_explode_scalars_can_ignore_index():
|
||||
# https://github.com/pandas-dev/pandas/issues/40487
|
||||
s = pd.Series([1, 2, 3], index=["a", "b", "c"])
|
||||
result = s.explode(ignore_index=True)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ignore_index", [True, False])
|
||||
def test_explode_pyarrow_list_type(ignore_index):
|
||||
# GH 53602
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
data = [
|
||||
[None, None],
|
||||
[1],
|
||||
[],
|
||||
[2, 3],
|
||||
None,
|
||||
]
|
||||
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.list_(pa.int64())))
|
||||
result = ser.explode(ignore_index=ignore_index)
|
||||
expected = pd.Series(
|
||||
data=[None, None, 1, None, 2, 3, None],
|
||||
index=None if ignore_index else [0, 0, 1, 2, 3, 3, 4],
|
||||
dtype=pd.ArrowDtype(pa.int64()),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ignore_index", [True, False])
|
||||
def test_explode_pyarrow_non_list_type(ignore_index):
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
data = [1, 2, 3]
|
||||
ser = pd.Series(data, dtype=pd.ArrowDtype(pa.int64()))
|
||||
result = ser.explode(ignore_index=ignore_index)
|
||||
expected = pd.Series([1, 2, 3], dtype="int64[pyarrow]", index=[0, 1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_str_dtype():
|
||||
# https://github.com/pandas-dev/pandas/pull/61623
|
||||
ser = pd.Series(["x", "y"], dtype="str")
|
||||
result = ser.explode()
|
||||
assert result is not ser
|
||||
tm.assert_series_equal(result, ser)
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,38 @@
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetNumericData:
|
||||
def test_get_numeric_data_preserve_dtype(
|
||||
self, using_copy_on_write, warn_copy_on_write
|
||||
):
|
||||
# get the numeric data
|
||||
obj = Series([1, 2, 3])
|
||||
result = obj._get_numeric_data()
|
||||
tm.assert_series_equal(result, obj)
|
||||
|
||||
# returned object is a shallow copy
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
result.iloc[0] = 0
|
||||
if using_copy_on_write:
|
||||
assert obj.iloc[0] == 1
|
||||
else:
|
||||
assert obj.iloc[0] == 0
|
||||
|
||||
obj = Series([1, "2", 3.0])
|
||||
result = obj._get_numeric_data()
|
||||
expected = Series([], dtype=object, index=Index([], dtype=object))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
obj = Series([True, False, True])
|
||||
result = obj._get_numeric_data()
|
||||
tm.assert_series_equal(result, obj)
|
||||
|
||||
obj = Series(date_range("20130101", periods=3))
|
||||
result = obj._get_numeric_data()
|
||||
expected = Series([], dtype="M8[ns]", index=Index([], dtype=object))
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,8 @@
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_head_tail(string_series):
|
||||
tm.assert_series_equal(string_series.head(), string_series[:5])
|
||||
tm.assert_series_equal(string_series.head(0), string_series[0:0])
|
||||
tm.assert_series_equal(string_series.tail(), string_series[-5:])
|
||||
tm.assert_series_equal(string_series.tail(0), string_series[0:0])
|
||||
@ -0,0 +1,56 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
interval_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestInferObjects:
|
||||
def test_copy(self, index_or_series):
|
||||
# GH#50096
|
||||
# case where we don't need to do inference because it is already non-object
|
||||
obj = index_or_series(np.array([1, 2, 3], dtype="int64"))
|
||||
|
||||
result = obj.infer_objects(copy=False)
|
||||
assert tm.shares_memory(result, obj)
|
||||
|
||||
# case where we try to do inference but can't do better than object
|
||||
obj2 = index_or_series(np.array(["foo", 2], dtype=object))
|
||||
result2 = obj2.infer_objects(copy=False)
|
||||
assert tm.shares_memory(result2, obj2)
|
||||
|
||||
def test_infer_objects_series(self, index_or_series):
|
||||
# GH#11221
|
||||
actual = index_or_series(np.array([1, 2, 3], dtype="O")).infer_objects()
|
||||
expected = index_or_series([1, 2, 3])
|
||||
tm.assert_equal(actual, expected)
|
||||
|
||||
actual = index_or_series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
|
||||
expected = index_or_series([1.0, 2.0, 3.0, np.nan])
|
||||
tm.assert_equal(actual, expected)
|
||||
|
||||
# only soft conversions, unconvertible pass thru unchanged
|
||||
|
||||
obj = index_or_series(np.array([1, 2, 3, None, "a"], dtype="O"))
|
||||
actual = obj.infer_objects()
|
||||
expected = index_or_series([1, 2, 3, None, "a"], dtype=object)
|
||||
|
||||
assert actual.dtype == "object"
|
||||
tm.assert_equal(actual, expected)
|
||||
|
||||
def test_infer_objects_interval(self, index_or_series):
|
||||
# GH#50090
|
||||
ii = interval_range(1, 10)
|
||||
obj = index_or_series(ii)
|
||||
|
||||
result = obj.astype(object).infer_objects()
|
||||
tm.assert_equal(result, obj)
|
||||
|
||||
def test_infer_objects_bytes(self):
|
||||
# GH#49650
|
||||
ser = Series([b"a"], dtype="bytes")
|
||||
expected = ser.copy()
|
||||
result = ser.infer_objects()
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,192 @@
|
||||
from io import StringIO
|
||||
from string import ascii_uppercase
|
||||
import textwrap
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_string_dtype
|
||||
|
||||
from pandas.compat import (
|
||||
HAS_PYARROW,
|
||||
PYPY,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
def test_info_categorical_column_just_works():
|
||||
n = 2500
|
||||
data = np.array(list("abcdefghij")).take(
|
||||
np.random.default_rng(2).integers(0, 10, size=n, dtype=int)
|
||||
)
|
||||
s = Series(data).astype("category")
|
||||
s.isna()
|
||||
buf = StringIO()
|
||||
s.info(buf=buf)
|
||||
|
||||
s2 = s[s == "d"]
|
||||
buf = StringIO()
|
||||
s2.info(buf=buf)
|
||||
|
||||
|
||||
def test_info_categorical():
|
||||
# GH14298
|
||||
idx = CategoricalIndex(["a", "b"])
|
||||
s = Series(np.zeros(2), index=idx)
|
||||
buf = StringIO()
|
||||
s.info(buf=buf)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("verbose", [True, False])
|
||||
def test_info_series(
|
||||
lexsorted_two_level_string_multiindex, verbose, using_infer_string
|
||||
):
|
||||
index = lexsorted_two_level_string_multiindex
|
||||
ser = Series(range(len(index)), index=index, name="sth")
|
||||
buf = StringIO()
|
||||
ser.info(verbose=verbose, buf=buf)
|
||||
result = buf.getvalue()
|
||||
|
||||
expected = textwrap.dedent(
|
||||
"""\
|
||||
<class 'pandas.core.series.Series'>
|
||||
MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
|
||||
"""
|
||||
)
|
||||
if verbose:
|
||||
expected += textwrap.dedent(
|
||||
"""\
|
||||
Series name: sth
|
||||
Non-Null Count Dtype
|
||||
-------------- -----
|
||||
10 non-null int64
|
||||
"""
|
||||
)
|
||||
qualifier = "" if using_infer_string and HAS_PYARROW else "+"
|
||||
expected += textwrap.dedent(
|
||||
f"""\
|
||||
dtypes: int64(1)
|
||||
memory usage: {ser.memory_usage()}.0{qualifier} bytes
|
||||
"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_info_memory():
|
||||
s = Series([1, 2], dtype="i8")
|
||||
buf = StringIO()
|
||||
s.info(buf=buf)
|
||||
result = buf.getvalue()
|
||||
memory_bytes = float(s.memory_usage())
|
||||
expected = textwrap.dedent(
|
||||
f"""\
|
||||
<class 'pandas.core.series.Series'>
|
||||
RangeIndex: 2 entries, 0 to 1
|
||||
Series name: None
|
||||
Non-Null Count Dtype
|
||||
-------------- -----
|
||||
2 non-null int64
|
||||
dtypes: int64(1)
|
||||
memory usage: {memory_bytes} bytes
|
||||
"""
|
||||
)
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_info_wide():
|
||||
s = Series(np.random.default_rng(2).standard_normal(101))
|
||||
msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.info(max_cols=1)
|
||||
|
||||
|
||||
def test_info_shows_dtypes():
|
||||
dtypes = [
|
||||
"int64",
|
||||
"float64",
|
||||
"datetime64[ns]",
|
||||
"timedelta64[ns]",
|
||||
"complex128",
|
||||
"object",
|
||||
"bool",
|
||||
]
|
||||
n = 10
|
||||
for dtype in dtypes:
|
||||
s = Series(np.random.default_rng(2).integers(2, size=n).astype(dtype))
|
||||
buf = StringIO()
|
||||
s.info(buf=buf)
|
||||
res = buf.getvalue()
|
||||
name = f"{n:d} non-null {dtype}"
|
||||
assert name in res
|
||||
|
||||
|
||||
@pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result")
|
||||
def test_info_memory_usage_deep_not_pypy():
|
||||
s_with_object_index = Series({"a": [1]}, index=["foo"])
|
||||
assert s_with_object_index.memory_usage(
|
||||
index=True, deep=True
|
||||
) > s_with_object_index.memory_usage(index=True)
|
||||
|
||||
s_object = Series({"a": ["a"]})
|
||||
assert s_object.memory_usage(deep=True) > s_object.memory_usage()
|
||||
|
||||
|
||||
@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
|
||||
def test_info_memory_usage_deep_pypy():
|
||||
s_with_object_index = Series({"a": [1]}, index=["foo"])
|
||||
assert s_with_object_index.memory_usage(
|
||||
index=True, deep=True
|
||||
) == s_with_object_index.memory_usage(index=True)
|
||||
|
||||
s_object = Series({"a": ["a"]})
|
||||
assert s_object.memory_usage(deep=True) == s_object.memory_usage()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index, plus",
|
||||
[
|
||||
([1, 2, 3], False),
|
||||
(Index(list("ABC"), dtype="str"), not (using_string_dtype() and HAS_PYARROW)),
|
||||
(Index(list("ABC"), dtype=object), True),
|
||||
(MultiIndex.from_product([range(3), range(3)]), False),
|
||||
(
|
||||
MultiIndex.from_product([range(3), ["foo", "bar"]]),
|
||||
not (using_string_dtype() and HAS_PYARROW),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_info_memory_usage_qualified(index, plus):
|
||||
series = Series(1, index=index)
|
||||
buf = StringIO()
|
||||
series.info(buf=buf)
|
||||
if plus:
|
||||
assert "+" in buf.getvalue()
|
||||
else:
|
||||
assert "+" not in buf.getvalue()
|
||||
|
||||
|
||||
def test_info_memory_usage_bug_on_multiindex():
|
||||
# GH 14308
|
||||
# memory usage introspection should not materialize .values
|
||||
N = 100
|
||||
M = len(ascii_uppercase)
|
||||
index = MultiIndex.from_product(
|
||||
[list(ascii_uppercase), date_range("20160101", periods=N)],
|
||||
names=["id", "date"],
|
||||
)
|
||||
s = Series(np.random.default_rng(2).standard_normal(N * M), index=index)
|
||||
|
||||
unstacked = s.unstack("id")
|
||||
assert s.values.nbytes == unstacked.values.nbytes
|
||||
assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
|
||||
|
||||
# high upper bound
|
||||
diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
|
||||
assert diff < 2000
|
||||
@ -0,0 +1,868 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"linear",
|
||||
"index",
|
||||
"values",
|
||||
"nearest",
|
||||
"slinear",
|
||||
"zero",
|
||||
"quadratic",
|
||||
"cubic",
|
||||
"barycentric",
|
||||
"krogh",
|
||||
"polynomial",
|
||||
"spline",
|
||||
"piecewise_polynomial",
|
||||
"from_derivatives",
|
||||
"pchip",
|
||||
"akima",
|
||||
"cubicspline",
|
||||
]
|
||||
)
|
||||
def nontemporal_method(request):
|
||||
"""Fixture that returns an (method name, required kwargs) pair.
|
||||
|
||||
This fixture does not include method 'time' as a parameterization; that
|
||||
method requires a Series with a DatetimeIndex, and is generally tested
|
||||
separately from these non-temporal methods.
|
||||
"""
|
||||
method = request.param
|
||||
kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
|
||||
return method, kwargs
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"linear",
|
||||
"slinear",
|
||||
"zero",
|
||||
"quadratic",
|
||||
"cubic",
|
||||
"barycentric",
|
||||
"krogh",
|
||||
"polynomial",
|
||||
"spline",
|
||||
"piecewise_polynomial",
|
||||
"from_derivatives",
|
||||
"pchip",
|
||||
"akima",
|
||||
"cubicspline",
|
||||
]
|
||||
)
|
||||
def interp_methods_ind(request):
|
||||
"""Fixture that returns a (method name, required kwargs) pair to
|
||||
be tested for various Index types.
|
||||
|
||||
This fixture does not include methods - 'time', 'index', 'nearest',
|
||||
'values' as a parameterization
|
||||
"""
|
||||
method = request.param
|
||||
kwargs = {"order": 1} if method in ("spline", "polynomial") else {}
|
||||
return method, kwargs
|
||||
|
||||
|
||||
class TestSeriesInterpolateData:
|
||||
@pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method")
|
||||
def test_interpolate_period_values(self):
|
||||
orig = Series(date_range("2012-01-01", periods=5))
|
||||
ser = orig.copy()
|
||||
ser[2] = pd.NaT
|
||||
|
||||
# period cast
|
||||
ser_per = ser.dt.to_period("D")
|
||||
res_per = ser_per.interpolate()
|
||||
expected_per = orig.dt.to_period("D")
|
||||
tm.assert_series_equal(res_per, expected_per)
|
||||
|
||||
def test_interpolate(self, datetime_series):
|
||||
ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index)
|
||||
|
||||
ts_copy = ts.copy()
|
||||
ts_copy[5:10] = np.nan
|
||||
|
||||
linear_interp = ts_copy.interpolate(method="linear")
|
||||
tm.assert_series_equal(linear_interp, ts)
|
||||
|
||||
ord_ts = Series(
|
||||
[d.toordinal() for d in datetime_series.index], index=datetime_series.index
|
||||
).astype(float)
|
||||
|
||||
ord_ts_copy = ord_ts.copy()
|
||||
ord_ts_copy[5:10] = np.nan
|
||||
|
||||
time_interp = ord_ts_copy.interpolate(method="time")
|
||||
tm.assert_series_equal(time_interp, ord_ts)
|
||||
|
||||
def test_interpolate_time_raises_for_non_timeseries(self):
|
||||
# When method='time' is used on a non-TimeSeries that contains a null
|
||||
# value, a ValueError should be raised.
|
||||
non_ts = Series([0, 1, 2, np.nan])
|
||||
msg = "time-weighted interpolation only works on Series.* with a DatetimeIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
non_ts.interpolate(method="time")
|
||||
|
||||
def test_interpolate_cubicspline(self):
|
||||
pytest.importorskip("scipy")
|
||||
ser = Series([10, 11, 12, 13])
|
||||
|
||||
expected = Series(
|
||||
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
|
||||
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
|
||||
)
|
||||
# interpolate at new_index
|
||||
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
|
||||
float
|
||||
)
|
||||
result = ser.reindex(new_index).interpolate(method="cubicspline").loc[1:3]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interpolate_pchip(self):
|
||||
pytest.importorskip("scipy")
|
||||
ser = Series(np.sort(np.random.default_rng(2).uniform(size=100)))
|
||||
|
||||
# interpolate at new_index
|
||||
new_index = ser.index.union(
|
||||
Index([49.25, 49.5, 49.75, 50.25, 50.5, 50.75])
|
||||
).astype(float)
|
||||
interp_s = ser.reindex(new_index).interpolate(method="pchip")
|
||||
# does not blow up, GH5977
|
||||
interp_s.loc[49:51]
|
||||
|
||||
def test_interpolate_akima(self):
|
||||
pytest.importorskip("scipy")
|
||||
ser = Series([10, 11, 12, 13])
|
||||
|
||||
# interpolate at new_index where `der` is zero
|
||||
expected = Series(
|
||||
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
|
||||
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
|
||||
)
|
||||
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
|
||||
float
|
||||
)
|
||||
interp_s = ser.reindex(new_index).interpolate(method="akima")
|
||||
tm.assert_series_equal(interp_s.loc[1:3], expected)
|
||||
|
||||
# interpolate at new_index where `der` is a non-zero int
|
||||
expected = Series(
|
||||
[11.0, 1.0, 1.0, 1.0, 12.0, 1.0, 1.0, 1.0, 13.0],
|
||||
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
|
||||
)
|
||||
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
|
||||
float
|
||||
)
|
||||
interp_s = ser.reindex(new_index).interpolate(method="akima", der=1)
|
||||
tm.assert_series_equal(interp_s.loc[1:3], expected)
|
||||
|
||||
def test_interpolate_piecewise_polynomial(self):
|
||||
pytest.importorskip("scipy")
|
||||
ser = Series([10, 11, 12, 13])
|
||||
|
||||
expected = Series(
|
||||
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
|
||||
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
|
||||
)
|
||||
# interpolate at new_index
|
||||
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
|
||||
float
|
||||
)
|
||||
interp_s = ser.reindex(new_index).interpolate(method="piecewise_polynomial")
|
||||
tm.assert_series_equal(interp_s.loc[1:3], expected)
|
||||
|
||||
def test_interpolate_from_derivatives(self):
|
||||
pytest.importorskip("scipy")
|
||||
ser = Series([10, 11, 12, 13])
|
||||
|
||||
expected = Series(
|
||||
[11.00, 11.25, 11.50, 11.75, 12.00, 12.25, 12.50, 12.75, 13.00],
|
||||
index=Index([1.0, 1.25, 1.5, 1.75, 2.0, 2.25, 2.5, 2.75, 3.0]),
|
||||
)
|
||||
# interpolate at new_index
|
||||
new_index = ser.index.union(Index([1.25, 1.5, 1.75, 2.25, 2.5, 2.75])).astype(
|
||||
float
|
||||
)
|
||||
interp_s = ser.reindex(new_index).interpolate(method="from_derivatives")
|
||||
tm.assert_series_equal(interp_s.loc[1:3], expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[
|
||||
{},
|
||||
pytest.param(
|
||||
{"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy")
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_interpolate_corners(self, kwargs):
|
||||
s = Series([np.nan, np.nan])
|
||||
tm.assert_series_equal(s.interpolate(**kwargs), s)
|
||||
|
||||
s = Series([], dtype=object).interpolate()
|
||||
tm.assert_series_equal(s.interpolate(**kwargs), s)
|
||||
|
||||
def test_interpolate_index_values(self):
|
||||
s = Series(np.nan, index=np.sort(np.random.default_rng(2).random(30)))
|
||||
s.loc[::3] = np.random.default_rng(2).standard_normal(10)
|
||||
|
||||
vals = s.index.values.astype(float)
|
||||
|
||||
result = s.interpolate(method="index")
|
||||
|
||||
expected = s.copy()
|
||||
bad = isna(expected.values)
|
||||
good = ~bad
|
||||
expected = Series(
|
||||
np.interp(vals[bad], vals[good], s.values[good]), index=s.index[bad]
|
||||
)
|
||||
|
||||
tm.assert_series_equal(result[bad], expected)
|
||||
|
||||
# 'values' is synonymous with 'index' for the method kwarg
|
||||
other_result = s.interpolate(method="values")
|
||||
|
||||
tm.assert_series_equal(other_result, result)
|
||||
tm.assert_series_equal(other_result[bad], expected)
|
||||
|
||||
def test_interpolate_non_ts(self):
|
||||
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
|
||||
msg = (
|
||||
"time-weighted interpolation only works on Series or DataFrames "
|
||||
"with a DatetimeIndex"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method="time")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kwargs",
|
||||
[
|
||||
{},
|
||||
pytest.param(
|
||||
{"method": "polynomial", "order": 1}, marks=td.skip_if_no("scipy")
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_nan_interpolate(self, kwargs):
|
||||
s = Series([0, 1, np.nan, 3])
|
||||
result = s.interpolate(**kwargs)
|
||||
expected = Series([0.0, 1.0, 2.0, 3.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_nan_irregular_index(self):
|
||||
s = Series([1, 2, np.nan, 4], index=[1, 3, 5, 9])
|
||||
result = s.interpolate()
|
||||
expected = Series([1.0, 2.0, 3.0, 4.0], index=[1, 3, 5, 9])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_nan_str_index(self):
|
||||
s = Series([0, 1, 2, np.nan], index=list("abcd"))
|
||||
result = s.interpolate()
|
||||
expected = Series([0.0, 1.0, 2.0, 2.0], index=list("abcd"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_quad(self):
|
||||
pytest.importorskip("scipy")
|
||||
sq = Series([1, 4, np.nan, 16], index=[1, 2, 3, 4])
|
||||
result = sq.interpolate(method="quadratic")
|
||||
expected = Series([1.0, 4.0, 9.0, 16.0], index=[1, 2, 3, 4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_scipy_basic(self):
|
||||
pytest.importorskip("scipy")
|
||||
s = Series([1, 3, np.nan, 12, np.nan, 25])
|
||||
# slinear
|
||||
expected = Series([1.0, 3.0, 7.5, 12.0, 18.5, 25.0])
|
||||
result = s.interpolate(method="slinear")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "The 'downcast' keyword in Series.interpolate is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.interpolate(method="slinear", downcast="infer")
|
||||
tm.assert_series_equal(result, expected)
|
||||
# nearest
|
||||
expected = Series([1, 3, 3, 12, 12, 25])
|
||||
result = s.interpolate(method="nearest")
|
||||
tm.assert_series_equal(result, expected.astype("float"))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.interpolate(method="nearest", downcast="infer")
|
||||
tm.assert_series_equal(result, expected)
|
||||
# zero
|
||||
expected = Series([1, 3, 3, 12, 12, 25])
|
||||
result = s.interpolate(method="zero")
|
||||
tm.assert_series_equal(result, expected.astype("float"))
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.interpolate(method="zero", downcast="infer")
|
||||
tm.assert_series_equal(result, expected)
|
||||
# quadratic
|
||||
# GH #15662.
|
||||
expected = Series([1, 3.0, 6.823529, 12.0, 18.058824, 25.0])
|
||||
result = s.interpolate(method="quadratic")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.interpolate(method="quadratic", downcast="infer")
|
||||
tm.assert_series_equal(result, expected)
|
||||
# cubic
|
||||
expected = Series([1.0, 3.0, 6.8, 12.0, 18.2, 25.0])
|
||||
result = s.interpolate(method="cubic")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_limit(self):
|
||||
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
|
||||
|
||||
expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
|
||||
result = s.interpolate(method="linear", limit=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("limit", [-1, 0])
|
||||
def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method, limit):
|
||||
# GH 9217: make sure limit is greater than zero.
|
||||
s = Series([1, 2, np.nan, 4])
|
||||
method, kwargs = nontemporal_method
|
||||
with pytest.raises(ValueError, match="Limit must be greater than 0"):
|
||||
s.interpolate(limit=limit, method=method, **kwargs)
|
||||
|
||||
def test_interpolate_invalid_float_limit(self, nontemporal_method):
|
||||
# GH 9217: make sure limit is an integer.
|
||||
s = Series([1, 2, np.nan, 4])
|
||||
method, kwargs = nontemporal_method
|
||||
limit = 2.0
|
||||
with pytest.raises(ValueError, match="Limit must be an integer"):
|
||||
s.interpolate(limit=limit, method=method, **kwargs)
|
||||
|
||||
@pytest.mark.parametrize("invalid_method", [None, "nonexistent_method"])
|
||||
def test_interp_invalid_method(self, invalid_method):
|
||||
s = Series([1, 3, np.nan, 12, np.nan, 25])
|
||||
|
||||
msg = f"method must be one of.* Got '{invalid_method}' instead"
|
||||
if invalid_method is None:
|
||||
msg = "'method' should be a string, not None"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method=invalid_method)
|
||||
|
||||
# When an invalid method and invalid limit (such as -1) are
|
||||
# provided, the error message reflects the invalid method.
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method=invalid_method, limit=-1)
|
||||
|
||||
def test_interp_invalid_method_and_value(self):
|
||||
# GH#36624
|
||||
ser = Series([1, 3, np.nan, 12, np.nan, 25])
|
||||
|
||||
msg = "'fill_value' is not a valid keyword for Series.interpolate"
|
||||
msg2 = "Series.interpolate with method=pad"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
ser.interpolate(fill_value=3, method="pad")
|
||||
|
||||
def test_interp_limit_forward(self):
|
||||
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
|
||||
|
||||
# Provide 'forward' (the default) explicitly here.
|
||||
expected = Series([1.0, 3.0, 5.0, 7.0, np.nan, 11.0])
|
||||
|
||||
result = s.interpolate(method="linear", limit=2, limit_direction="forward")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.interpolate(method="linear", limit=2, limit_direction="FORWARD")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_unlimited(self):
|
||||
# these test are for issue #16282 default Limit=None is unlimited
|
||||
s = Series([np.nan, 1.0, 3.0, np.nan, np.nan, np.nan, 11.0, np.nan])
|
||||
expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
|
||||
result = s.interpolate(method="linear", limit_direction="both")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([np.nan, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, 11.0])
|
||||
result = s.interpolate(method="linear", limit_direction="forward")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([1.0, 1.0, 3.0, 5.0, 7.0, 9.0, 11.0, np.nan])
|
||||
result = s.interpolate(method="linear", limit_direction="backward")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_limit_bad_direction(self):
|
||||
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
|
||||
|
||||
msg = (
|
||||
r"Invalid limit_direction: expecting one of \['forward', "
|
||||
r"'backward', 'both'\], got 'abc'"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method="linear", limit=2, limit_direction="abc")
|
||||
|
||||
# raises an error even if no limit is specified.
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method="linear", limit_direction="abc")
|
||||
|
||||
# limit_area introduced GH #16284
|
||||
def test_interp_limit_area(self):
|
||||
# These tests are for issue #9218 -- fill NaNs in both directions.
|
||||
s = Series([np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan])
|
||||
|
||||
expected = Series([np.nan, np.nan, 3.0, 4.0, 5.0, 6.0, 7.0, np.nan, np.nan])
|
||||
result = s.interpolate(method="linear", limit_area="inside")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(
|
||||
[np.nan, np.nan, 3.0, 4.0, np.nan, np.nan, 7.0, np.nan, np.nan]
|
||||
)
|
||||
result = s.interpolate(method="linear", limit_area="inside", limit=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([np.nan, np.nan, 3.0, 4.0, np.nan, 6.0, 7.0, np.nan, np.nan])
|
||||
result = s.interpolate(
|
||||
method="linear", limit_area="inside", limit_direction="both", limit=1
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0])
|
||||
result = s.interpolate(method="linear", limit_area="outside")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(
|
||||
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan]
|
||||
)
|
||||
result = s.interpolate(method="linear", limit_area="outside", limit=1)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan])
|
||||
result = s.interpolate(
|
||||
method="linear", limit_area="outside", limit_direction="both", limit=1
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan])
|
||||
result = s.interpolate(
|
||||
method="linear", limit_area="outside", limit_direction="backward"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# raises an error even if limit type is wrong.
|
||||
msg = r"Invalid limit_area: expecting one of \['inside', 'outside'\], got abc"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method="linear", limit_area="abc")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, limit_direction, expected",
|
||||
[
|
||||
("pad", "backward", "forward"),
|
||||
("ffill", "backward", "forward"),
|
||||
("backfill", "forward", "backward"),
|
||||
("bfill", "forward", "backward"),
|
||||
("pad", "both", "forward"),
|
||||
("ffill", "both", "forward"),
|
||||
("backfill", "both", "backward"),
|
||||
("bfill", "both", "backward"),
|
||||
],
|
||||
)
|
||||
def test_interp_limit_direction_raises(self, method, limit_direction, expected):
|
||||
# https://github.com/pandas-dev/pandas/pull/34746
|
||||
s = Series([1, 2, 3])
|
||||
|
||||
msg = f"`limit_direction` must be '{expected}' for method `{method}`"
|
||||
msg2 = "Series.interpolate with method="
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
s.interpolate(method=method, limit_direction=limit_direction)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, expected_data, kwargs",
|
||||
(
|
||||
(
|
||||
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
|
||||
[np.nan, np.nan, 3.0, 3.0, 3.0, 3.0, 7.0, np.nan, np.nan],
|
||||
{"method": "pad", "limit_area": "inside"},
|
||||
),
|
||||
(
|
||||
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
|
||||
[np.nan, np.nan, 3.0, 3.0, np.nan, np.nan, 7.0, np.nan, np.nan],
|
||||
{"method": "pad", "limit_area": "inside", "limit": 1},
|
||||
),
|
||||
(
|
||||
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
|
||||
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, 7.0],
|
||||
{"method": "pad", "limit_area": "outside"},
|
||||
),
|
||||
(
|
||||
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
|
||||
[np.nan, np.nan, 3.0, np.nan, np.nan, np.nan, 7.0, 7.0, np.nan],
|
||||
{"method": "pad", "limit_area": "outside", "limit": 1},
|
||||
),
|
||||
(
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
|
||||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
|
||||
{"method": "pad", "limit_area": "outside", "limit": 1},
|
||||
),
|
||||
(
|
||||
range(5),
|
||||
range(5),
|
||||
{"method": "pad", "limit_area": "outside", "limit": 1},
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_interp_limit_area_with_pad(self, data, expected_data, kwargs):
|
||||
# GH26796
|
||||
|
||||
s = Series(data)
|
||||
expected = Series(expected_data)
|
||||
msg = "Series.interpolate with method=pad"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.interpolate(**kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, expected_data, kwargs",
|
||||
(
|
||||
(
|
||||
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
|
||||
[np.nan, np.nan, 3.0, 7.0, 7.0, 7.0, 7.0, np.nan, np.nan],
|
||||
{"method": "bfill", "limit_area": "inside"},
|
||||
),
|
||||
(
|
||||
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
|
||||
[np.nan, np.nan, 3.0, np.nan, np.nan, 7.0, 7.0, np.nan, np.nan],
|
||||
{"method": "bfill", "limit_area": "inside", "limit": 1},
|
||||
),
|
||||
(
|
||||
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
|
||||
[3.0, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
|
||||
{"method": "bfill", "limit_area": "outside"},
|
||||
),
|
||||
(
|
||||
[np.nan, np.nan, 3, np.nan, np.nan, np.nan, 7, np.nan, np.nan],
|
||||
[np.nan, 3.0, 3.0, np.nan, np.nan, np.nan, 7.0, np.nan, np.nan],
|
||||
{"method": "bfill", "limit_area": "outside", "limit": 1},
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_interp_limit_area_with_backfill(self, data, expected_data, kwargs):
|
||||
# GH26796
|
||||
|
||||
s = Series(data)
|
||||
expected = Series(expected_data)
|
||||
msg = "Series.interpolate with method=bfill"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.interpolate(**kwargs)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_limit_direction(self):
|
||||
# These tests are for issue #9218 -- fill NaNs in both directions.
|
||||
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
|
||||
|
||||
expected = Series([1.0, 3.0, np.nan, 7.0, 9.0, 11.0])
|
||||
result = s.interpolate(method="linear", limit=2, limit_direction="backward")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([1.0, 3.0, 5.0, np.nan, 9.0, 11.0])
|
||||
result = s.interpolate(method="linear", limit=1, limit_direction="both")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Check that this works on a longer series of nans.
|
||||
s = Series([1, 3, np.nan, np.nan, np.nan, 7, 9, np.nan, np.nan, 12, np.nan])
|
||||
|
||||
expected = Series([1.0, 3.0, 4.0, 5.0, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0])
|
||||
result = s.interpolate(method="linear", limit=2, limit_direction="both")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(
|
||||
[1.0, 3.0, 4.0, np.nan, 6.0, 7.0, 9.0, 10.0, 11.0, 12.0, 12.0]
|
||||
)
|
||||
result = s.interpolate(method="linear", limit=1, limit_direction="both")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_limit_to_ends(self):
|
||||
# These test are for issue #10420 -- flow back to beginning.
|
||||
s = Series([np.nan, np.nan, 5, 7, 9, np.nan])
|
||||
|
||||
expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, np.nan])
|
||||
result = s.interpolate(method="linear", limit=2, limit_direction="backward")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([5.0, 5.0, 5.0, 7.0, 9.0, 9.0])
|
||||
result = s.interpolate(method="linear", limit=2, limit_direction="both")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_limit_before_ends(self):
|
||||
# These test are for issue #11115 -- limit ends properly.
|
||||
s = Series([np.nan, np.nan, 5, 7, np.nan, np.nan])
|
||||
|
||||
expected = Series([np.nan, np.nan, 5.0, 7.0, 7.0, np.nan])
|
||||
result = s.interpolate(method="linear", limit=1, limit_direction="forward")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([np.nan, 5.0, 5.0, 7.0, np.nan, np.nan])
|
||||
result = s.interpolate(method="linear", limit=1, limit_direction="backward")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([np.nan, 5.0, 5.0, 7.0, 7.0, np.nan])
|
||||
result = s.interpolate(method="linear", limit=1, limit_direction="both")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_all_good(self):
|
||||
pytest.importorskip("scipy")
|
||||
s = Series([1, 2, 3])
|
||||
result = s.interpolate(method="polynomial", order=1)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
# non-scipy
|
||||
result = s.interpolate()
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"check_scipy", [False, pytest.param(True, marks=td.skip_if_no("scipy"))]
|
||||
)
|
||||
def test_interp_multiIndex(self, check_scipy):
|
||||
idx = MultiIndex.from_tuples([(0, "a"), (1, "b"), (2, "c")])
|
||||
s = Series([1, 2, np.nan], index=idx)
|
||||
|
||||
expected = s.copy()
|
||||
expected.loc[2] = 2
|
||||
result = s.interpolate()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = "Only `method=linear` interpolation is supported on MultiIndexes"
|
||||
if check_scipy:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method="polynomial", order=1)
|
||||
|
||||
def test_interp_nonmono_raise(self):
|
||||
pytest.importorskip("scipy")
|
||||
s = Series([1, np.nan, 3], index=[0, 2, 1])
|
||||
msg = "krogh interpolation requires that the index be monotonic"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method="krogh")
|
||||
|
||||
@pytest.mark.parametrize("method", ["nearest", "pad"])
|
||||
def test_interp_datetime64(self, method, tz_naive_fixture):
|
||||
pytest.importorskip("scipy")
|
||||
df = Series(
|
||||
[1, np.nan, 3], index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture)
|
||||
)
|
||||
warn = None if method == "nearest" else FutureWarning
|
||||
msg = "Series.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = df.interpolate(method=method)
|
||||
if warn is not None:
|
||||
# check the "use ffill instead" is equivalent
|
||||
alt = df.ffill()
|
||||
tm.assert_series_equal(result, alt)
|
||||
|
||||
expected = Series(
|
||||
[1.0, 1.0, 3.0],
|
||||
index=date_range("1/1/2000", periods=3, tz=tz_naive_fixture),
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_pad_datetime64tz_values(self):
|
||||
# GH#27628 missing.interpolate_2d should handle datetimetz values
|
||||
dti = date_range("2015-04-05", periods=3, tz="US/Central")
|
||||
ser = Series(dti)
|
||||
ser[1] = pd.NaT
|
||||
|
||||
msg = "Series.interpolate with method=pad is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.interpolate(method="pad")
|
||||
# check the "use ffill instead" is equivalent
|
||||
alt = ser.ffill()
|
||||
tm.assert_series_equal(result, alt)
|
||||
|
||||
expected = Series(dti)
|
||||
expected[1] = expected[0]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interp_limit_no_nans(self):
|
||||
# GH 7173
|
||||
s = Series([1.0, 2.0, 3.0])
|
||||
result = s.interpolate(limit=1)
|
||||
expected = s
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("method", ["polynomial", "spline"])
|
||||
def test_no_order(self, method):
|
||||
# see GH-10633, GH-24014
|
||||
pytest.importorskip("scipy")
|
||||
s = Series([0, 1, np.nan, 3])
|
||||
msg = "You must specify the order of the spline or polynomial"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method=method)
|
||||
|
||||
@pytest.mark.parametrize("order", [-1, -1.0, 0, 0.0, np.nan])
|
||||
def test_interpolate_spline_invalid_order(self, order):
|
||||
pytest.importorskip("scipy")
|
||||
s = Series([0, 1, np.nan, 3])
|
||||
msg = "order needs to be specified and greater than 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.interpolate(method="spline", order=order)
|
||||
|
||||
def test_spline(self):
|
||||
pytest.importorskip("scipy")
|
||||
s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
|
||||
result = s.interpolate(method="spline", order=1)
|
||||
expected = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_spline_extrapolate(self):
|
||||
pytest.importorskip("scipy")
|
||||
s = Series([1, 2, 3, 4, np.nan, 6, np.nan])
|
||||
result3 = s.interpolate(method="spline", order=1, ext=3)
|
||||
expected3 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 6.0])
|
||||
tm.assert_series_equal(result3, expected3)
|
||||
|
||||
result1 = s.interpolate(method="spline", order=1, ext=0)
|
||||
expected1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
|
||||
tm.assert_series_equal(result1, expected1)
|
||||
|
||||
def test_spline_smooth(self):
|
||||
pytest.importorskip("scipy")
|
||||
s = Series([1, 2, np.nan, 4, 5.1, np.nan, 7])
|
||||
assert (
|
||||
s.interpolate(method="spline", order=3, s=0)[5]
|
||||
!= s.interpolate(method="spline", order=3)[5]
|
||||
)
|
||||
|
||||
def test_spline_interpolation(self):
|
||||
# Explicit cast to float to avoid implicit cast when setting np.nan
|
||||
pytest.importorskip("scipy")
|
||||
s = Series(np.arange(10) ** 2, dtype="float")
|
||||
s[np.random.default_rng(2).integers(0, 9, 3)] = np.nan
|
||||
result1 = s.interpolate(method="spline", order=1)
|
||||
expected1 = s.interpolate(method="spline", order=1)
|
||||
tm.assert_series_equal(result1, expected1)
|
||||
|
||||
def test_interp_timedelta64(self):
|
||||
# GH 6424
|
||||
df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 3]))
|
||||
result = df.interpolate(method="time")
|
||||
expected = Series([1.0, 2.0, 3.0], index=pd.to_timedelta([1, 2, 3]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# test for non uniform spacing
|
||||
df = Series([1, np.nan, 3], index=pd.to_timedelta([1, 2, 4]))
|
||||
result = df.interpolate(method="time")
|
||||
expected = Series([1.0, 1.666667, 3.0], index=pd.to_timedelta([1, 2, 4]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_series_interpolate_method_values(self):
|
||||
# GH#1646
|
||||
rng = date_range("1/1/2000", "1/20/2000", freq="D")
|
||||
ts = Series(np.random.default_rng(2).standard_normal(len(rng)), index=rng)
|
||||
|
||||
ts[::2] = np.nan
|
||||
|
||||
result = ts.interpolate(method="values")
|
||||
exp = ts.interpolate()
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
def test_series_interpolate_intraday(self):
|
||||
# #1698
|
||||
index = date_range("1/1/2012", periods=4, freq="12D")
|
||||
ts = Series([0, 12, 24, 36], index)
|
||||
new_index = index.append(index + pd.DateOffset(days=1)).sort_values()
|
||||
|
||||
exp = ts.reindex(new_index).interpolate(method="time")
|
||||
|
||||
index = date_range("1/1/2012", periods=4, freq="12h")
|
||||
ts = Series([0, 12, 24, 36], index)
|
||||
new_index = index.append(index + pd.DateOffset(hours=1)).sort_values()
|
||||
result = ts.reindex(new_index).interpolate(method="time")
|
||||
|
||||
tm.assert_numpy_array_equal(result.values, exp.values)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ind",
|
||||
[
|
||||
["a", "b", "c", "d"],
|
||||
pd.period_range(start="2019-01-01", periods=4),
|
||||
pd.interval_range(start=0, end=4),
|
||||
],
|
||||
)
|
||||
def test_interp_non_timedelta_index(self, interp_methods_ind, ind):
|
||||
# gh 21662
|
||||
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
|
||||
|
||||
method, kwargs = interp_methods_ind
|
||||
if method == "pchip":
|
||||
pytest.importorskip("scipy")
|
||||
|
||||
if method == "linear":
|
||||
result = df[0].interpolate(**kwargs)
|
||||
expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
|
||||
tm.assert_series_equal(result, expected)
|
||||
else:
|
||||
expected_error = (
|
||||
"Index column must be numeric or datetime type when "
|
||||
f"using {method} method other than linear. "
|
||||
"Try setting a numeric or datetime index column before "
|
||||
"interpolating."
|
||||
)
|
||||
with pytest.raises(ValueError, match=expected_error):
|
||||
df[0].interpolate(method=method, **kwargs)
|
||||
|
||||
def test_interpolate_timedelta_index(self, request, interp_methods_ind):
|
||||
"""
|
||||
Tests for non numerical index types - object, period, timedelta
|
||||
Note that all methods except time, index, nearest and values
|
||||
are tested here.
|
||||
"""
|
||||
# gh 21662
|
||||
pytest.importorskip("scipy")
|
||||
ind = pd.timedelta_range(start=1, periods=4)
|
||||
df = pd.DataFrame([0, 1, np.nan, 3], index=ind)
|
||||
|
||||
method, kwargs = interp_methods_ind
|
||||
|
||||
if method in {"cubic", "zero"}:
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason=f"{method} interpolation is not supported for TimedeltaIndex"
|
||||
)
|
||||
)
|
||||
result = df[0].interpolate(method=method, **kwargs)
|
||||
expected = Series([0.0, 1.0, 2.0, 3.0], name=0, index=ind)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ascending, expected_values",
|
||||
[(True, [1, 2, 3, 9, 10]), (False, [10, 9, 3, 2, 1])],
|
||||
)
|
||||
def test_interpolate_unsorted_index(self, ascending, expected_values):
|
||||
# GH 21037
|
||||
ts = Series(data=[10, 9, np.nan, 2, 1], index=[10, 9, 3, 2, 1])
|
||||
result = ts.sort_index(ascending=ascending).interpolate(method="index")
|
||||
expected = Series(data=expected_values, index=expected_values, dtype=float)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_interpolate_asfreq_raises(self):
|
||||
ser = Series(["a", None, "b"], dtype=object)
|
||||
msg2 = "Series.interpolate with object dtype"
|
||||
msg = "Invalid fill method"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
ser.interpolate(method="asfreq")
|
||||
|
||||
def test_interpolate_fill_value(self):
|
||||
# GH#54920
|
||||
pytest.importorskip("scipy")
|
||||
ser = Series([np.nan, 0, 1, np.nan, 3, np.nan])
|
||||
result = ser.interpolate(method="nearest", fill_value=0)
|
||||
expected = Series([np.nan, 0, 1, 1, 3, 0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,26 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
class TestIsMonotonic:
|
||||
def test_is_monotonic_numeric(self):
|
||||
ser = Series(np.random.default_rng(2).integers(0, 10, size=1000))
|
||||
assert not ser.is_monotonic_increasing
|
||||
ser = Series(np.arange(1000))
|
||||
assert ser.is_monotonic_increasing is True
|
||||
assert ser.is_monotonic_increasing is True
|
||||
ser = Series(np.arange(1000, 0, -1))
|
||||
assert ser.is_monotonic_decreasing is True
|
||||
|
||||
def test_is_monotonic_dt64(self):
|
||||
ser = Series(date_range("20130101", periods=10))
|
||||
assert ser.is_monotonic_increasing is True
|
||||
assert ser.is_monotonic_increasing is True
|
||||
|
||||
ser = Series(list(reversed(ser)))
|
||||
assert ser.is_monotonic_increasing is False
|
||||
assert ser.is_monotonic_decreasing is True
|
||||
@ -0,0 +1,40 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, expected",
|
||||
[
|
||||
(np.random.default_rng(2).integers(0, 10, size=1000), False),
|
||||
(np.arange(1000), True),
|
||||
([], True),
|
||||
([np.nan], True),
|
||||
(["foo", "bar", np.nan], True),
|
||||
(["foo", "foo", np.nan], False),
|
||||
(["foo", "bar", np.nan, np.nan], False),
|
||||
],
|
||||
)
|
||||
def test_is_unique(data, expected):
|
||||
# GH#11946 / GH#25180
|
||||
ser = Series(data)
|
||||
assert ser.is_unique is expected
|
||||
|
||||
|
||||
def test_is_unique_class_ne(capsys):
|
||||
# GH#20661
|
||||
class Foo:
|
||||
def __init__(self, val) -> None:
|
||||
self._value = val
|
||||
|
||||
def __ne__(self, other):
|
||||
raise Exception("NEQ not supported")
|
||||
|
||||
with capsys.disabled():
|
||||
li = [Foo(i) for i in range(5)]
|
||||
ser = Series(li, index=list(range(5)))
|
||||
|
||||
ser.is_unique
|
||||
captured = capsys.readouterr()
|
||||
assert len(captured.err) == 0
|
||||
@ -0,0 +1,252 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core import algorithms
|
||||
from pandas.core.arrays import PeriodArray
|
||||
|
||||
|
||||
class TestSeriesIsIn:
|
||||
def test_isin(self):
|
||||
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
|
||||
|
||||
result = s.isin(["A", "C"])
|
||||
expected = Series([True, False, True, False, False, False, True, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# GH#16012
|
||||
# This specific issue has to have a series over 1e6 in len, but the
|
||||
# comparison array (in_list) must be large enough so that numpy doesn't
|
||||
# do a manual masking trick that will avoid this issue altogether
|
||||
s = Series(list("abcdefghijk" * 10**5))
|
||||
# If numpy doesn't do the manual comparison/mask, these
|
||||
# unorderable mixed types are what cause the exception in numpy
|
||||
in_list = [-1, "a", "b", "G", "Y", "Z", "E", "K", "E", "S", "I", "R", "R"] * 6
|
||||
|
||||
assert s.isin(in_list).sum() == 200000
|
||||
|
||||
def test_isin_with_string_scalar(self):
|
||||
# GH#4763
|
||||
s = Series(["A", "B", "C", "a", "B", "B", "A", "C"])
|
||||
msg = (
|
||||
r"only list-like objects are allowed to be passed to isin\(\), "
|
||||
r"you passed a `str`"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.isin("a")
|
||||
|
||||
s = Series(["aaa", "b", "c"])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.isin("aaa")
|
||||
|
||||
def test_isin_datetimelike_mismatched_reso(self):
|
||||
expected = Series([True, True, False, False, False])
|
||||
|
||||
ser = Series(date_range("jan-01-2013", "jan-05-2013"))
|
||||
|
||||
# fails on dtype conversion in the first place
|
||||
day_values = np.asarray(ser[0:2].values).astype("datetime64[D]")
|
||||
result = ser.isin(day_values)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dta = ser[:2]._values.astype("M8[s]")
|
||||
result = ser.isin(dta)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_isin_datetimelike_mismatched_reso_list(self):
|
||||
expected = Series([True, True, False, False, False])
|
||||
|
||||
ser = Series(date_range("jan-01-2013", "jan-05-2013"))
|
||||
|
||||
dta = ser[:2]._values.astype("M8[s]")
|
||||
result = ser.isin(list(dta))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_isin_with_i8(self):
|
||||
# GH#5021
|
||||
|
||||
expected = Series([True, True, False, False, False])
|
||||
expected2 = Series([False, True, False, False, False])
|
||||
|
||||
# datetime64[ns]
|
||||
s = Series(date_range("jan-01-2013", "jan-05-2013"))
|
||||
|
||||
result = s.isin(s[0:2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.isin(s[0:2].values)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.isin([s[1]])
|
||||
tm.assert_series_equal(result, expected2)
|
||||
|
||||
result = s.isin([np.datetime64(s[1])])
|
||||
tm.assert_series_equal(result, expected2)
|
||||
|
||||
result = s.isin(set(s[0:2]))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# timedelta64[ns]
|
||||
s = Series(pd.to_timedelta(range(5), unit="d"))
|
||||
result = s.isin(s[0:2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("empty", [[], Series(dtype=object), np.array([])])
|
||||
def test_isin_empty(self, empty):
|
||||
# see GH#16991
|
||||
s = Series(["a", "b"])
|
||||
expected = Series([False, False])
|
||||
|
||||
result = s.isin(empty)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_isin_read_only(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/37174
|
||||
arr = np.array([1, 2, 3])
|
||||
arr.setflags(write=False)
|
||||
s = Series([1, 2, 3])
|
||||
result = s.isin(arr)
|
||||
expected = Series([True, True, True])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [object, None])
|
||||
def test_isin_dt64_values_vs_ints(self, dtype):
|
||||
# GH#36621 dont cast integers to datetimes for isin
|
||||
dti = date_range("2013-01-01", "2013-01-05")
|
||||
ser = Series(dti)
|
||||
|
||||
comps = np.asarray([1356998400000000000], dtype=dtype)
|
||||
|
||||
res = dti.isin(comps)
|
||||
expected = np.array([False] * len(dti), dtype=bool)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
res = ser.isin(comps)
|
||||
tm.assert_series_equal(res, Series(expected))
|
||||
|
||||
res = pd.core.algorithms.isin(ser, comps)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
def test_isin_tzawareness_mismatch(self):
|
||||
dti = date_range("2013-01-01", "2013-01-05")
|
||||
ser = Series(dti)
|
||||
|
||||
other = dti.tz_localize("UTC")
|
||||
|
||||
res = dti.isin(other)
|
||||
expected = np.array([False] * len(dti), dtype=bool)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
res = ser.isin(other)
|
||||
tm.assert_series_equal(res, Series(expected))
|
||||
|
||||
res = pd.core.algorithms.isin(ser, other)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
def test_isin_period_freq_mismatch(self):
|
||||
dti = date_range("2013-01-01", "2013-01-05")
|
||||
pi = dti.to_period("M")
|
||||
ser = Series(pi)
|
||||
|
||||
# We construct another PeriodIndex with the same i8 values
|
||||
# but different dtype
|
||||
dtype = dti.to_period("Y").dtype
|
||||
other = PeriodArray._simple_new(pi.asi8, dtype=dtype)
|
||||
|
||||
res = pi.isin(other)
|
||||
expected = np.array([False] * len(pi), dtype=bool)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
res = ser.isin(other)
|
||||
tm.assert_series_equal(res, Series(expected))
|
||||
|
||||
res = pd.core.algorithms.isin(ser, other)
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("values", [[-9.0, 0.0], [-9, 0]])
|
||||
def test_isin_float_in_int_series(self, values):
|
||||
# GH#19356 GH#21804
|
||||
ser = Series(values)
|
||||
result = ser.isin([-9, -0.5])
|
||||
expected = Series([True, False])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["boolean", "Int64", "Float64"])
|
||||
@pytest.mark.parametrize(
|
||||
"data,values,expected",
|
||||
[
|
||||
([0, 1, 0], [1], [False, True, False]),
|
||||
([0, 1, 0], [1, pd.NA], [False, True, False]),
|
||||
([0, pd.NA, 0], [1, 0], [True, False, True]),
|
||||
([0, 1, pd.NA], [1, pd.NA], [False, True, True]),
|
||||
([0, 1, pd.NA], [1, np.nan], [False, True, False]),
|
||||
([0, pd.NA, pd.NA], [np.nan, pd.NaT, None], [False, False, False]),
|
||||
],
|
||||
)
|
||||
def test_isin_masked_types(self, dtype, data, values, expected):
|
||||
# GH#42405
|
||||
ser = Series(data, dtype=dtype)
|
||||
|
||||
result = ser.isin(values)
|
||||
expected = Series(expected, dtype="boolean")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin_large_series_mixed_dtypes_and_nan(monkeypatch):
|
||||
# https://github.com/pandas-dev/pandas/issues/37094
|
||||
# combination of object dtype for the values
|
||||
# and > _MINIMUM_COMP_ARR_LEN elements
|
||||
min_isin_comp = 5
|
||||
ser = Series([1, 2, np.nan] * min_isin_comp)
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(algorithms, "_MINIMUM_COMP_ARR_LEN", min_isin_comp)
|
||||
result = ser.isin({"foo", "bar"})
|
||||
expected = Series([False] * 3 * min_isin_comp)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array,expected",
|
||||
[
|
||||
(
|
||||
[0, 1j, 1j, 1, 1 + 1j, 1 + 2j, 1 + 1j],
|
||||
Series([False, True, True, False, True, True, True], dtype=bool),
|
||||
)
|
||||
],
|
||||
)
|
||||
def test_isin_complex_numbers(array, expected):
|
||||
# GH 17927
|
||||
result = Series(array).isin([1j, 1 + 1j, 1 + 2j])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,is_in",
|
||||
[([1, [2]], [1]), (["simple str", [{"values": 3}]], ["simple str"])],
|
||||
)
|
||||
def test_isin_filtering_with_mixed_object_types(data, is_in):
|
||||
# GH 20883
|
||||
|
||||
ser = Series(data)
|
||||
result = ser.isin(is_in)
|
||||
expected = Series([True, False])
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data", [[1, 2, 3], [1.0, 2.0, 3.0]])
|
||||
@pytest.mark.parametrize("isin", [[1, 2], [1.0, 2.0]])
|
||||
def test_isin_filtering_on_iterable(data, isin):
|
||||
# GH 50234
|
||||
|
||||
ser = Series(data)
|
||||
result = ser.isin(i for i in isin)
|
||||
expected_result = Series([True, True, False])
|
||||
|
||||
tm.assert_series_equal(result, expected_result)
|
||||
@ -0,0 +1,35 @@
|
||||
"""
|
||||
We also test Series.notna in this file.
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Period,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIsna:
|
||||
def test_isna_period_dtype(self):
|
||||
# GH#13737
|
||||
ser = Series([Period("2011-01", freq="M"), Period("NaT", freq="M")])
|
||||
|
||||
expected = Series([False, True])
|
||||
|
||||
result = ser.isna()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.notna()
|
||||
tm.assert_series_equal(result, ~expected)
|
||||
|
||||
def test_isna(self):
|
||||
ser = Series([0, 5.4, 3, np.nan, -0.001])
|
||||
expected = Series([False, False, False, True, False])
|
||||
tm.assert_series_equal(ser.isna(), expected)
|
||||
tm.assert_series_equal(ser.notna(), ~expected)
|
||||
|
||||
ser = Series(["hi", "", np.nan])
|
||||
expected = Series([False, False, True])
|
||||
tm.assert_series_equal(ser.isna(), expected)
|
||||
tm.assert_series_equal(ser.notna(), ~expected)
|
||||
@ -0,0 +1,59 @@
|
||||
"""
|
||||
Series.item method, mainly testing that we get python scalars as opposed to
|
||||
numpy scalars.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
class TestItem:
|
||||
def test_item(self):
|
||||
# We are testing that we get python scalars as opposed to numpy scalars
|
||||
ser = Series([1])
|
||||
result = ser.item()
|
||||
assert result == 1
|
||||
assert result == ser.iloc[0]
|
||||
assert isinstance(result, int) # i.e. not np.int64
|
||||
|
||||
ser = Series([0.5], index=[3])
|
||||
result = ser.item()
|
||||
assert isinstance(result, float)
|
||||
assert result == 0.5
|
||||
|
||||
ser = Series([1, 2])
|
||||
msg = "can only convert an array of size 1"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.item()
|
||||
|
||||
dti = date_range("2016-01-01", periods=2)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
dti.item()
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(dti).item()
|
||||
|
||||
val = dti[:1].item()
|
||||
assert isinstance(val, Timestamp)
|
||||
val = Series(dti)[:1].item()
|
||||
assert isinstance(val, Timestamp)
|
||||
|
||||
tdi = dti - dti
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
tdi.item()
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Series(tdi).item()
|
||||
|
||||
val = tdi[:1].item()
|
||||
assert isinstance(val, Timedelta)
|
||||
val = Series(tdi)[:1].item()
|
||||
assert isinstance(val, Timedelta)
|
||||
|
||||
# Case where ser[0] would not work
|
||||
ser = Series(dti, index=[5, 6])
|
||||
val = ser.iloc[:1].item()
|
||||
assert val == dti[0]
|
||||
@ -0,0 +1,604 @@
|
||||
from collections import (
|
||||
Counter,
|
||||
defaultdict,
|
||||
)
|
||||
from decimal import Decimal
|
||||
import math
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
bdate_range,
|
||||
date_range,
|
||||
isna,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_series_map_box_timedelta():
|
||||
# GH#11349
|
||||
ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h"))
|
||||
|
||||
def f(x):
|
||||
return x.total_seconds()
|
||||
|
||||
ser.map(f)
|
||||
|
||||
|
||||
def test_map_callable(datetime_series):
|
||||
with np.errstate(all="ignore"):
|
||||
tm.assert_series_equal(datetime_series.map(np.sqrt), np.sqrt(datetime_series))
|
||||
|
||||
# map function element-wise
|
||||
tm.assert_series_equal(datetime_series.map(math.exp), np.exp(datetime_series))
|
||||
|
||||
# empty series
|
||||
s = Series(dtype=object, name="foo", index=Index([], name="bar"))
|
||||
rs = s.map(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
# check all metadata (GH 9322)
|
||||
assert s is not rs
|
||||
assert s.index is rs.index
|
||||
assert s.dtype == rs.dtype
|
||||
assert s.name == rs.name
|
||||
|
||||
# index but no data
|
||||
s = Series(index=[1, 2, 3], dtype=np.float64)
|
||||
rs = s.map(lambda x: x)
|
||||
tm.assert_series_equal(s, rs)
|
||||
|
||||
|
||||
def test_map_same_length_inference_bug():
|
||||
s = Series([1, 2])
|
||||
|
||||
def f(x):
|
||||
return (x, x + 1)
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
result = s.map(f)
|
||||
expected = Series([(1, 2), (2, 3), (3, 4)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series(["foo,bar"])
|
||||
result = s.map(lambda x: x.split(","))
|
||||
expected = Series([("foo", "bar")])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_series_map_box_timestamps():
|
||||
# GH#2689, GH#2627
|
||||
ser = Series(date_range("1/1/2000", periods=3))
|
||||
|
||||
def func(x):
|
||||
return (x.hour, x.day, x.month)
|
||||
|
||||
result = ser.map(func)
|
||||
expected = Series([(0, 1, 1), (0, 2, 1), (0, 3, 1)])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_series_stringdtype(any_string_dtype, using_infer_string):
|
||||
# map test on StringDType, GH#40823
|
||||
ser1 = Series(
|
||||
data=["cat", "dog", "rabbit"],
|
||||
index=["id1", "id2", "id3"],
|
||||
dtype=any_string_dtype,
|
||||
)
|
||||
ser2 = Series(["id3", "id2", "id1", "id7000"], dtype=any_string_dtype)
|
||||
result = ser2.map(ser1)
|
||||
|
||||
item = pd.NA
|
||||
if ser2.dtype == object:
|
||||
item = np.nan
|
||||
|
||||
expected = Series(data=["rabbit", "dog", "cat", item], dtype=any_string_dtype)
|
||||
if using_infer_string and any_string_dtype == "object":
|
||||
expected = expected.astype("str")
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, expected_dtype",
|
||||
[(["1-1", "1-1", np.nan], "category"), (["1-1", "1-2", np.nan], "str")],
|
||||
)
|
||||
def test_map_categorical_with_nan_values(data, expected_dtype):
|
||||
# GH 20714 bug fixed in: GH 24275
|
||||
def func(val):
|
||||
return val.split("-")[0]
|
||||
|
||||
s = Series(data, dtype="category")
|
||||
|
||||
result = s.map(func, na_action="ignore")
|
||||
expected = Series(["1", "1", np.nan], dtype=expected_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_empty_integer_series():
|
||||
# GH52384
|
||||
s = Series([], dtype=int)
|
||||
result = s.map(lambda x: x)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
def test_map_empty_integer_series_with_datetime_index():
|
||||
# GH 21245
|
||||
s = Series([], index=date_range(start="2018-01-01", periods=0), dtype=int)
|
||||
result = s.map(lambda x: x)
|
||||
tm.assert_series_equal(result, s)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [str, lambda x: str(x)])
|
||||
def test_map_simple_str_callables_same_as_astype(
|
||||
string_series, func, using_infer_string
|
||||
):
|
||||
# test that we are evaluating row-by-row first
|
||||
# before vectorized evaluation
|
||||
result = string_series.map(func)
|
||||
expected = string_series.astype(str if not using_infer_string else "str")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_list_raises(string_series):
|
||||
with pytest.raises(TypeError, match="'list' object is not callable"):
|
||||
string_series.map([lambda x: x])
|
||||
|
||||
|
||||
def test_map():
|
||||
data = {
|
||||
"A": [0.0, 1.0, 2.0, 3.0, 4.0],
|
||||
"B": [0.0, 1.0, 0.0, 1.0, 0.0],
|
||||
"C": ["foo1", "foo2", "foo3", "foo4", "foo5"],
|
||||
"D": bdate_range("1/1/2009", periods=5),
|
||||
}
|
||||
|
||||
source = Series(data["B"], index=data["C"])
|
||||
target = Series(data["C"][:4], index=data["D"][:4])
|
||||
|
||||
merged = target.map(source)
|
||||
|
||||
for k, v in merged.items():
|
||||
assert v == source[target[k]]
|
||||
|
||||
# input could be a dict
|
||||
merged = target.map(source.to_dict())
|
||||
|
||||
for k, v in merged.items():
|
||||
assert v == source[target[k]]
|
||||
|
||||
|
||||
def test_map_datetime(datetime_series):
|
||||
# function
|
||||
result = datetime_series.map(lambda x: x * 2)
|
||||
tm.assert_series_equal(result, datetime_series * 2)
|
||||
|
||||
|
||||
def test_map_category():
|
||||
# GH 10324
|
||||
a = Series([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = Series(["odd", "even", "odd", np.nan], dtype="category")
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series(["odd", "even", "odd", np.nan])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
|
||||
def test_map_category_numeric():
|
||||
a = Series(["a", "b", "c", "d"])
|
||||
b = Series([1, 2, 3, 4], index=pd.CategoricalIndex(["b", "c", "d", "e"]))
|
||||
c = Series([1, 2, 3, 4], index=Index(["b", "c", "d", "e"]))
|
||||
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, 1, 2, 3])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
|
||||
def test_map_category_string():
|
||||
a = Series(["a", "b", "c", "d"])
|
||||
b = Series(
|
||||
["B", "C", "D", "E"],
|
||||
dtype="category",
|
||||
index=pd.CategoricalIndex(["b", "c", "d", "e"]),
|
||||
)
|
||||
c = Series(["B", "C", "D", "E"], index=Index(["b", "c", "d", "e"]))
|
||||
|
||||
exp = Series(
|
||||
pd.Categorical([np.nan, "B", "C", "D"], categories=["B", "C", "D", "E"])
|
||||
)
|
||||
tm.assert_series_equal(a.map(b), exp)
|
||||
exp = Series([np.nan, "B", "C", "D"])
|
||||
tm.assert_series_equal(a.map(c), exp)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:Dtype inference:FutureWarning")
|
||||
def test_map_empty(request, index):
|
||||
if isinstance(index, MultiIndex):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(
|
||||
reason="Initializing a Series from a MultiIndex is not supported"
|
||||
)
|
||||
)
|
||||
|
||||
s = Series(index)
|
||||
result = s.map({})
|
||||
|
||||
expected = Series(np.nan, index=s.index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_compat():
|
||||
# related GH 8024
|
||||
s = Series([True, True, False], index=[1, 2, 3])
|
||||
result = s.map({True: "foo", False: "bar"})
|
||||
expected = Series(["foo", "foo", "bar"], index=[1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_int():
|
||||
left = Series({"a": 1.0, "b": 2.0, "c": 3.0, "d": 4})
|
||||
right = Series({1: 11, 2: 22, 3: 33})
|
||||
|
||||
assert left.dtype == np.float64
|
||||
assert issubclass(right.dtype.type, np.integer)
|
||||
|
||||
merged = left.map(right)
|
||||
assert merged.dtype == np.float64
|
||||
assert isna(merged["d"])
|
||||
assert not isna(merged["c"])
|
||||
|
||||
|
||||
def test_map_type_inference():
|
||||
s = Series(range(3))
|
||||
s2 = s.map(lambda x: np.where(x == 0, 0, 1))
|
||||
assert issubclass(s2.dtype.type, np.integer)
|
||||
|
||||
|
||||
def test_map_decimal(string_series):
|
||||
result = string_series.map(lambda x: Decimal(str(x)))
|
||||
assert result.dtype == np.object_
|
||||
assert isinstance(result.iloc[0], Decimal)
|
||||
|
||||
|
||||
def test_map_na_exclusion():
|
||||
s = Series([1.5, np.nan, 3, np.nan, 5])
|
||||
|
||||
result = s.map(lambda x: x * 2, na_action="ignore")
|
||||
exp = s * 2
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_map_dict_with_tuple_keys():
|
||||
"""
|
||||
Due to new MultiIndex-ing behaviour in v0.14.0,
|
||||
dicts with tuple keys passed to map were being
|
||||
converted to a multi-index, preventing tuple values
|
||||
from being mapped properly.
|
||||
"""
|
||||
# GH 18496
|
||||
df = DataFrame({"a": [(1,), (2,), (3, 4), (5, 6)]})
|
||||
label_mappings = {(1,): "A", (2,): "B", (3, 4): "A", (5, 6): "B"}
|
||||
|
||||
df["labels"] = df["a"].map(label_mappings)
|
||||
df["expected_labels"] = Series(["A", "B", "A", "B"], index=df.index)
|
||||
# All labels should be filled now
|
||||
tm.assert_series_equal(df["labels"], df["expected_labels"], check_names=False)
|
||||
|
||||
|
||||
def test_map_counter():
|
||||
s = Series(["a", "b", "c"], index=[1, 2, 3])
|
||||
counter = Counter()
|
||||
counter["b"] = 5
|
||||
counter["c"] += 1
|
||||
result = s.map(counter)
|
||||
expected = Series([0, 5, 1], index=[1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_defaultdict():
|
||||
s = Series([1, 2, 3], index=["a", "b", "c"])
|
||||
default_dict = defaultdict(lambda: "blank")
|
||||
default_dict[1] = "stuff"
|
||||
result = s.map(default_dict)
|
||||
expected = Series(["stuff", "blank", "blank"], index=["a", "b", "c"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_dict_na_key():
|
||||
# https://github.com/pandas-dev/pandas/issues/17648
|
||||
# Checks that np.nan key is appropriately mapped
|
||||
s = Series([1, 2, np.nan])
|
||||
expected = Series(["a", "b", "c"])
|
||||
result = s.map({1: "a", 2: "b", np.nan: "c"})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map_defaultdict_na_key(na_action):
|
||||
# GH 48813
|
||||
s = Series([1, 2, np.nan])
|
||||
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"})
|
||||
result = s.map(default_map, na_action=na_action)
|
||||
expected = Series({0: "a", 1: "b", 2: "c" if na_action is None else np.nan})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map_defaultdict_missing_key(na_action):
|
||||
# GH 48813
|
||||
s = Series([1, 2, np.nan])
|
||||
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", 3: "c"})
|
||||
result = s.map(default_map, na_action=na_action)
|
||||
expected = Series({0: "a", 1: "b", 2: "missing" if na_action is None else np.nan})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map_defaultdict_unmutated(na_action):
|
||||
# GH 48813
|
||||
s = Series([1, 2, np.nan])
|
||||
default_map = defaultdict(lambda: "missing", {1: "a", 2: "b", np.nan: "c"})
|
||||
expected_default_map = default_map.copy()
|
||||
s.map(default_map, na_action=na_action)
|
||||
assert default_map == expected_default_map
|
||||
|
||||
|
||||
@pytest.mark.parametrize("arg_func", [dict, Series])
|
||||
def test_map_dict_ignore_na(arg_func):
|
||||
# GH#47527
|
||||
mapping = arg_func({1: 10, np.nan: 42})
|
||||
ser = Series([1, np.nan, 2])
|
||||
result = ser.map(mapping, na_action="ignore")
|
||||
expected = Series([10, np.nan, np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_defaultdict_ignore_na():
|
||||
# GH#47527
|
||||
mapping = defaultdict(int, {1: 10, np.nan: 42})
|
||||
ser = Series([1, np.nan, 2])
|
||||
result = ser.map(mapping)
|
||||
expected = Series([10, 42, 0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"na_action, expected",
|
||||
[(None, Series([10.0, 42.0, np.nan])), ("ignore", Series([10, np.nan, np.nan]))],
|
||||
)
|
||||
def test_map_categorical_na_ignore(na_action, expected):
|
||||
# GH#47527
|
||||
values = pd.Categorical([1, np.nan, 2], categories=[10, 1, 2])
|
||||
ser = Series(values)
|
||||
result = ser.map({1: 10, np.nan: 42}, na_action=na_action)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_dict_subclass_with_missing():
|
||||
"""
|
||||
Test Series.map with a dictionary subclass that defines __missing__,
|
||||
i.e. sets a default value (GH #15999).
|
||||
"""
|
||||
|
||||
class DictWithMissing(dict):
|
||||
def __missing__(self, key):
|
||||
return "missing"
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithMissing({3: "three"})
|
||||
result = s.map(dictionary)
|
||||
expected = Series(["missing", "missing", "three"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_dict_subclass_without_missing():
|
||||
class DictWithoutMissing(dict):
|
||||
pass
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
dictionary = DictWithoutMissing({3: "three"})
|
||||
result = s.map(dictionary)
|
||||
expected = Series([np.nan, np.nan, "three"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_abc_mapping(non_dict_mapping_subclass):
|
||||
# https://github.com/pandas-dev/pandas/issues/29733
|
||||
# Check collections.abc.Mapping support as mapper for Series.map
|
||||
s = Series([1, 2, 3])
|
||||
not_a_dictionary = non_dict_mapping_subclass({3: "three"})
|
||||
result = s.map(not_a_dictionary)
|
||||
expected = Series([np.nan, np.nan, "three"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_abc_mapping_with_missing(non_dict_mapping_subclass):
|
||||
# https://github.com/pandas-dev/pandas/issues/29733
|
||||
# Check collections.abc.Mapping support as mapper for Series.map
|
||||
class NonDictMappingWithMissing(non_dict_mapping_subclass):
|
||||
def __missing__(self, key):
|
||||
return "missing"
|
||||
|
||||
s = Series([1, 2, 3])
|
||||
not_a_dictionary = NonDictMappingWithMissing({3: "three"})
|
||||
result = s.map(not_a_dictionary)
|
||||
# __missing__ is a dict concept, not a Mapping concept,
|
||||
# so it should not change the result!
|
||||
expected = Series([np.nan, np.nan, "three"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_box_dt64(unit):
|
||||
vals = [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]
|
||||
ser = Series(vals).dt.as_unit(unit)
|
||||
assert ser.dtype == f"datetime64[{unit}]"
|
||||
# boxed value must be Timestamp instance
|
||||
res = ser.map(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
|
||||
exp = Series(["Timestamp_1_None", "Timestamp_2_None"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_map_box_dt64tz(unit):
|
||||
vals = [
|
||||
pd.Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
]
|
||||
ser = Series(vals).dt.as_unit(unit)
|
||||
assert ser.dtype == f"datetime64[{unit}, US/Eastern]"
|
||||
res = ser.map(lambda x: f"{type(x).__name__}_{x.day}_{x.tz}")
|
||||
exp = Series(["Timestamp_1_US/Eastern", "Timestamp_2_US/Eastern"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_map_box_td64(unit):
|
||||
# timedelta
|
||||
vals = [pd.Timedelta("1 days"), pd.Timedelta("2 days")]
|
||||
ser = Series(vals).dt.as_unit(unit)
|
||||
assert ser.dtype == f"timedelta64[{unit}]"
|
||||
res = ser.map(lambda x: f"{type(x).__name__}_{x.days}")
|
||||
exp = Series(["Timedelta_1", "Timedelta_2"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
def test_map_box_period():
|
||||
# period
|
||||
vals = [pd.Period("2011-01-01", freq="M"), pd.Period("2011-01-02", freq="M")]
|
||||
ser = Series(vals)
|
||||
assert ser.dtype == "Period[M]"
|
||||
res = ser.map(lambda x: f"{type(x).__name__}_{x.freqstr}")
|
||||
exp = Series(["Period_M", "Period_M"])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_action", [None, "ignore"])
|
||||
def test_map_categorical(na_action, using_infer_string):
|
||||
values = pd.Categorical(list("ABBABCD"), categories=list("DCBA"), ordered=True)
|
||||
s = Series(values, name="XX", index=list("abcdefg"))
|
||||
|
||||
result = s.map(lambda x: x.lower(), na_action=na_action)
|
||||
exp_values = pd.Categorical(list("abbabcd"), categories=list("dcba"), ordered=True)
|
||||
exp = Series(exp_values, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
tm.assert_categorical_equal(result.values, exp_values)
|
||||
|
||||
result = s.map(lambda x: "A", na_action=na_action)
|
||||
exp = Series(["A"] * 7, name="XX", index=list("abcdefg"))
|
||||
tm.assert_series_equal(result, exp)
|
||||
assert result.dtype == object if not using_infer_string else "str"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"na_action, expected",
|
||||
(
|
||||
[None, Series(["A", "B", "nan"], name="XX")],
|
||||
[
|
||||
"ignore",
|
||||
Series(
|
||||
["A", "B", np.nan],
|
||||
name="XX",
|
||||
dtype=pd.CategoricalDtype(list("DCBA"), True),
|
||||
),
|
||||
],
|
||||
),
|
||||
)
|
||||
def test_map_categorical_na_action(na_action, expected):
|
||||
dtype = pd.CategoricalDtype(list("DCBA"), ordered=True)
|
||||
values = pd.Categorical(list("AB") + [np.nan], dtype=dtype)
|
||||
s = Series(values, name="XX")
|
||||
result = s.map(str, na_action=na_action)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_datetimetz():
|
||||
values = date_range("2011-01-01", "2011-01-02", freq="h").tz_localize("Asia/Tokyo")
|
||||
s = Series(values, name="XX")
|
||||
|
||||
# keep tz
|
||||
result = s.map(lambda x: x + pd.offsets.Day())
|
||||
exp_values = date_range("2011-01-02", "2011-01-03", freq="h").tz_localize(
|
||||
"Asia/Tokyo"
|
||||
)
|
||||
exp = Series(exp_values, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
result = s.map(lambda x: x.hour)
|
||||
exp = Series(list(range(24)) + [0], name="XX", dtype=np.int64)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
# not vectorized
|
||||
def f(x):
|
||||
if not isinstance(x, pd.Timestamp):
|
||||
raise ValueError
|
||||
return str(x.tz)
|
||||
|
||||
result = s.map(f)
|
||||
exp = Series(["Asia/Tokyo"] * 25, name="XX")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals,mapping,exp",
|
||||
[
|
||||
(list("abc"), {np.nan: "not NaN"}, [np.nan] * 3 + ["not NaN"]),
|
||||
(list("abc"), {"a": "a letter"}, ["a letter"] + [np.nan] * 3),
|
||||
(list(range(3)), {0: 42}, [42] + [np.nan] * 3),
|
||||
],
|
||||
)
|
||||
def test_map_missing_mixed(vals, mapping, exp):
|
||||
# GH20495
|
||||
s = Series(vals + [np.nan])
|
||||
result = s.map(mapping)
|
||||
exp = Series(exp)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
|
||||
def test_map_scalar_on_date_time_index_aware_series():
|
||||
# GH 25959
|
||||
# Calling map on a localized time series should not cause an error
|
||||
series = Series(
|
||||
np.arange(10, dtype=np.float64),
|
||||
index=date_range("2020-01-01", periods=10, tz="UTC"),
|
||||
name="ts",
|
||||
)
|
||||
result = Series(series.index).map(lambda x: 1)
|
||||
tm.assert_series_equal(result, Series(np.ones(len(series)), dtype="int64"))
|
||||
|
||||
|
||||
def test_map_float_to_string_precision():
|
||||
# GH 13228
|
||||
ser = Series(1 / 3)
|
||||
result = ser.map(lambda val: str(val)).to_dict()
|
||||
expected = {0: "0.3333333333333333"}
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_map_to_timedelta():
|
||||
list_of_valid_strings = ["00:00:01", "00:00:02"]
|
||||
a = pd.to_timedelta(list_of_valid_strings)
|
||||
b = Series(list_of_valid_strings).map(pd.to_timedelta)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
list_of_strings = ["00:00:01", np.nan, pd.NaT, pd.NaT]
|
||||
|
||||
a = pd.to_timedelta(list_of_strings)
|
||||
ser = Series(list_of_strings)
|
||||
b = ser.map(pd.to_timedelta)
|
||||
tm.assert_series_equal(Series(a), b)
|
||||
|
||||
|
||||
def test_map_type():
|
||||
# GH 46719
|
||||
s = Series([3, "string", float], index=["a", "b", "c"])
|
||||
result = s.map(type)
|
||||
expected = Series([int, str, type], index=["a", "b", "c"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,82 @@
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMatmul:
|
||||
def test_matmul(self):
|
||||
# matmul test is for GH#10259
|
||||
a = Series(
|
||||
np.random.default_rng(2).standard_normal(4), index=["p", "q", "r", "s"]
|
||||
)
|
||||
b = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((3, 4)),
|
||||
index=["1", "2", "3"],
|
||||
columns=["p", "q", "r", "s"],
|
||||
).T
|
||||
|
||||
# Series @ DataFrame -> Series
|
||||
result = operator.matmul(a, b)
|
||||
expected = Series(np.dot(a.values, b.values), index=["1", "2", "3"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# DataFrame @ Series -> Series
|
||||
result = operator.matmul(b.T, a)
|
||||
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Series @ Series -> scalar
|
||||
result = operator.matmul(a, a)
|
||||
expected = np.dot(a.values, a.values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# GH#21530
|
||||
# vector (1D np.array) @ Series (__rmatmul__)
|
||||
result = operator.matmul(a.values, a)
|
||||
expected = np.dot(a.values, a.values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# GH#21530
|
||||
# vector (1D list) @ Series (__rmatmul__)
|
||||
result = operator.matmul(a.values.tolist(), a)
|
||||
expected = np.dot(a.values, a.values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# GH#21530
|
||||
# matrix (2D np.array) @ Series (__rmatmul__)
|
||||
result = operator.matmul(b.T.values, a)
|
||||
expected = np.dot(b.T.values, a.values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# GH#21530
|
||||
# matrix (2D nested lists) @ Series (__rmatmul__)
|
||||
result = operator.matmul(b.T.values.tolist(), a)
|
||||
expected = np.dot(b.T.values, a.values)
|
||||
tm.assert_almost_equal(result, expected)
|
||||
|
||||
# mixed dtype DataFrame @ Series
|
||||
a["p"] = int(a.p)
|
||||
result = operator.matmul(b.T, a)
|
||||
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# different dtypes DataFrame @ Series
|
||||
a = a.astype(int)
|
||||
result = operator.matmul(b.T, a)
|
||||
expected = Series(np.dot(b.T.values, a.T.values), index=["1", "2", "3"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = r"Dot product shape mismatch, \(4,\) vs \(3,\)"
|
||||
# exception raised is of type Exception
|
||||
with pytest.raises(Exception, match=msg):
|
||||
a.dot(a.values[:3])
|
||||
msg = "matrices are not aligned"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
a.dot(b.T)
|
||||
@ -0,0 +1,248 @@
|
||||
"""
|
||||
Note: for naming purposes, most tests are title with as e.g. "test_nlargest_foo"
|
||||
but are implicitly also testing nsmallest_foo.
|
||||
"""
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
main_dtypes = [
|
||||
"datetime",
|
||||
"datetimetz",
|
||||
"timedelta",
|
||||
"int8",
|
||||
"int16",
|
||||
"int32",
|
||||
"int64",
|
||||
"float32",
|
||||
"float64",
|
||||
"uint8",
|
||||
"uint16",
|
||||
"uint32",
|
||||
"uint64",
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def s_main_dtypes():
|
||||
"""
|
||||
A DataFrame with many dtypes
|
||||
|
||||
* datetime
|
||||
* datetimetz
|
||||
* timedelta
|
||||
* [u]int{8,16,32,64}
|
||||
* float{32,64}
|
||||
|
||||
The columns are the name of the dtype.
|
||||
"""
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"datetime": pd.to_datetime(["2003", "2002", "2001", "2002", "2005"]),
|
||||
"datetimetz": pd.to_datetime(
|
||||
["2003", "2002", "2001", "2002", "2005"]
|
||||
).tz_localize("US/Eastern"),
|
||||
"timedelta": pd.to_timedelta(["3d", "2d", "1d", "2d", "5d"]),
|
||||
}
|
||||
)
|
||||
|
||||
for dtype in [
|
||||
"int8",
|
||||
"int16",
|
||||
"int32",
|
||||
"int64",
|
||||
"float32",
|
||||
"float64",
|
||||
"uint8",
|
||||
"uint16",
|
||||
"uint32",
|
||||
"uint64",
|
||||
]:
|
||||
df[dtype] = Series([3, 2, 1, 2, 5], dtype=dtype)
|
||||
|
||||
return df
|
||||
|
||||
|
||||
@pytest.fixture(params=main_dtypes)
|
||||
def s_main_dtypes_split(request, s_main_dtypes):
|
||||
"""Each series in s_main_dtypes."""
|
||||
return s_main_dtypes[request.param]
|
||||
|
||||
|
||||
def assert_check_nselect_boundary(vals, dtype, method):
|
||||
# helper function for 'test_boundary_{dtype}' tests
|
||||
ser = Series(vals, dtype=dtype)
|
||||
result = getattr(ser, method)(3)
|
||||
expected_idxr = [0, 1, 2] if method == "nsmallest" else [3, 2, 1]
|
||||
expected = ser.loc[expected_idxr]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesNLargestNSmallest:
|
||||
@pytest.mark.parametrize(
|
||||
"r",
|
||||
[
|
||||
Series([3.0, 2, 1, 2, "5"], dtype="object"),
|
||||
Series([3.0, 2, 1, 2, 5], dtype="object"),
|
||||
# not supported on some archs
|
||||
# Series([3., 2, 1, 2, 5], dtype='complex256'),
|
||||
Series([3.0, 2, 1, 2, 5], dtype="complex128"),
|
||||
Series(list("abcde")),
|
||||
Series(list("abcde"), dtype="category"),
|
||||
],
|
||||
)
|
||||
def test_nlargest_error(self, r):
|
||||
dt = r.dtype
|
||||
msg = f"Cannot use method 'n(largest|smallest)' with dtype {dt}"
|
||||
args = 2, len(r), 0, -1
|
||||
methods = r.nlargest, r.nsmallest
|
||||
for method, arg in product(methods, args):
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
method(arg)
|
||||
|
||||
def test_nsmallest_nlargest(self, s_main_dtypes_split):
|
||||
# float, int, datetime64 (use i8), timedelts64 (same),
|
||||
# object that are numbers, object that are strings
|
||||
ser = s_main_dtypes_split
|
||||
|
||||
tm.assert_series_equal(ser.nsmallest(2), ser.iloc[[2, 1]])
|
||||
tm.assert_series_equal(ser.nsmallest(2, keep="last"), ser.iloc[[2, 3]])
|
||||
|
||||
empty = ser.iloc[0:0]
|
||||
tm.assert_series_equal(ser.nsmallest(0), empty)
|
||||
tm.assert_series_equal(ser.nsmallest(-1), empty)
|
||||
tm.assert_series_equal(ser.nlargest(0), empty)
|
||||
tm.assert_series_equal(ser.nlargest(-1), empty)
|
||||
|
||||
tm.assert_series_equal(ser.nsmallest(len(ser)), ser.sort_values())
|
||||
tm.assert_series_equal(ser.nsmallest(len(ser) + 1), ser.sort_values())
|
||||
tm.assert_series_equal(ser.nlargest(len(ser)), ser.iloc[[4, 0, 1, 3, 2]])
|
||||
tm.assert_series_equal(ser.nlargest(len(ser) + 1), ser.iloc[[4, 0, 1, 3, 2]])
|
||||
|
||||
def test_nlargest_misc(self):
|
||||
ser = Series([3.0, np.nan, 1, 2, 5])
|
||||
result = ser.nlargest()
|
||||
expected = ser.iloc[[4, 0, 3, 2, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = ser.nsmallest()
|
||||
expected = ser.iloc[[2, 3, 0, 4, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
msg = 'keep must be either "first", "last"'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.nsmallest(keep="invalid")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.nlargest(keep="invalid")
|
||||
|
||||
# GH#15297
|
||||
ser = Series([1] * 5, index=[1, 2, 3, 4, 5])
|
||||
expected_first = Series([1] * 3, index=[1, 2, 3])
|
||||
expected_last = Series([1] * 3, index=[5, 4, 3])
|
||||
|
||||
result = ser.nsmallest(3)
|
||||
tm.assert_series_equal(result, expected_first)
|
||||
|
||||
result = ser.nsmallest(3, keep="last")
|
||||
tm.assert_series_equal(result, expected_last)
|
||||
|
||||
result = ser.nlargest(3)
|
||||
tm.assert_series_equal(result, expected_first)
|
||||
|
||||
result = ser.nlargest(3, keep="last")
|
||||
tm.assert_series_equal(result, expected_last)
|
||||
|
||||
@pytest.mark.parametrize("n", range(1, 5))
|
||||
def test_nlargest_n(self, n):
|
||||
# GH 13412
|
||||
ser = Series([1, 4, 3, 2], index=[0, 0, 1, 1])
|
||||
result = ser.nlargest(n)
|
||||
expected = ser.sort_values(ascending=False).head(n)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.nsmallest(n)
|
||||
expected = ser.sort_values().head(n)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_nlargest_boundary_integer(self, nselect_method, any_int_numpy_dtype):
|
||||
# GH#21426
|
||||
dtype_info = np.iinfo(any_int_numpy_dtype)
|
||||
min_val, max_val = dtype_info.min, dtype_info.max
|
||||
vals = [min_val, min_val + 1, max_val - 1, max_val]
|
||||
assert_check_nselect_boundary(vals, any_int_numpy_dtype, nselect_method)
|
||||
|
||||
def test_nlargest_boundary_float(self, nselect_method, float_numpy_dtype):
|
||||
# GH#21426
|
||||
dtype_info = np.finfo(float_numpy_dtype)
|
||||
min_val, max_val = dtype_info.min, dtype_info.max
|
||||
min_2nd, max_2nd = np.nextafter([min_val, max_val], 0, dtype=float_numpy_dtype)
|
||||
vals = [min_val, min_2nd, max_2nd, max_val]
|
||||
assert_check_nselect_boundary(vals, float_numpy_dtype, nselect_method)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
def test_nlargest_boundary_datetimelike(self, nselect_method, dtype):
|
||||
# GH#21426
|
||||
# use int64 bounds and +1 to min_val since true minimum is NaT
|
||||
# (include min_val/NaT at end to maintain same expected_idxr)
|
||||
dtype_info = np.iinfo("int64")
|
||||
min_val, max_val = dtype_info.min, dtype_info.max
|
||||
vals = [min_val + 1, min_val + 2, max_val - 1, max_val, min_val]
|
||||
assert_check_nselect_boundary(vals, dtype, nselect_method)
|
||||
|
||||
def test_nlargest_duplicate_keep_all_ties(self):
|
||||
# see GH#16818
|
||||
ser = Series([10, 9, 8, 7, 7, 7, 7, 6])
|
||||
result = ser.nlargest(4, keep="all")
|
||||
expected = Series([10, 9, 8, 7, 7, 7, 7])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.nsmallest(2, keep="all")
|
||||
expected = Series([6, 7, 7, 7, 7], index=[7, 3, 4, 5, 6])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data,expected", [([True, False], [True]), ([True, False, True, True], [True])]
|
||||
)
|
||||
def test_nlargest_boolean(self, data, expected):
|
||||
# GH#26154 : ensure True > False
|
||||
ser = Series(data)
|
||||
result = ser.nlargest(1)
|
||||
expected = Series(expected)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_nlargest_nullable(self, any_numeric_ea_dtype):
|
||||
# GH#42816
|
||||
dtype = any_numeric_ea_dtype
|
||||
if dtype.startswith("UInt"):
|
||||
# Can't cast from negative float to uint on some platforms
|
||||
arr = np.random.default_rng(2).integers(1, 10, 10)
|
||||
else:
|
||||
arr = np.random.default_rng(2).standard_normal(10)
|
||||
arr = arr.astype(dtype.lower(), copy=False)
|
||||
|
||||
ser = Series(arr.copy(), dtype=dtype)
|
||||
ser[1] = pd.NA
|
||||
result = ser.nlargest(5)
|
||||
|
||||
expected = (
|
||||
Series(np.delete(arr, 1), index=ser.index.delete(1))
|
||||
.nlargest(5)
|
||||
.astype(dtype)
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_nsmallest_nan_when_keep_is_all(self):
|
||||
# GH#46589
|
||||
s = Series([1, 2, 3, 3, 3, None])
|
||||
result = s.nsmallest(3, keep="all")
|
||||
expected = Series([1.0, 2.0, 3.0, 3.0, 3.0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = Series([1, 2, None, None, None])
|
||||
result = s.nsmallest(3, keep="all")
|
||||
expected = Series([1, 2, None, None, None])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,24 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
Series,
|
||||
)
|
||||
|
||||
|
||||
def test_nunique():
|
||||
# basics.rst doc example
|
||||
series = Series(np.random.default_rng(2).standard_normal(500))
|
||||
series[20:500] = np.nan
|
||||
series[10:20] = 5000
|
||||
result = series.nunique()
|
||||
assert result == 11
|
||||
|
||||
|
||||
def test_nunique_categorical():
|
||||
# GH#18051
|
||||
ser = Series(Categorical([]))
|
||||
assert ser.nunique() == 0
|
||||
|
||||
ser = Series(Categorical([np.nan]))
|
||||
assert ser.nunique() == 0
|
||||
@ -0,0 +1,128 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesPctChange:
|
||||
def test_pct_change(self, datetime_series):
|
||||
msg = (
|
||||
"The 'fill_method' keyword being not None and the 'limit' keyword in "
|
||||
"Series.pct_change are deprecated"
|
||||
)
|
||||
|
||||
rs = datetime_series.pct_change(fill_method=None)
|
||||
tm.assert_series_equal(rs, datetime_series / datetime_series.shift(1) - 1)
|
||||
|
||||
rs = datetime_series.pct_change(2)
|
||||
filled = datetime_series.ffill()
|
||||
tm.assert_series_equal(rs, filled / filled.shift(2) - 1)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = datetime_series.pct_change(fill_method="bfill", limit=1)
|
||||
filled = datetime_series.bfill(limit=1)
|
||||
tm.assert_series_equal(rs, filled / filled.shift(1) - 1)
|
||||
|
||||
rs = datetime_series.pct_change(freq="5D")
|
||||
filled = datetime_series.ffill()
|
||||
tm.assert_series_equal(
|
||||
rs, (filled / filled.shift(freq="5D") - 1).reindex_like(filled)
|
||||
)
|
||||
|
||||
def test_pct_change_with_duplicate_axis(self):
|
||||
# GH#28664
|
||||
common_idx = date_range("2019-11-14", periods=5, freq="D")
|
||||
result = Series(range(5), common_idx).pct_change(freq="B")
|
||||
|
||||
# the reason that the expected should be like this is documented at PR 28681
|
||||
expected = Series([np.nan, np.inf, np.nan, np.nan, 3.0], common_idx)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_pct_change_shift_over_nas(self):
|
||||
s = Series([1.0, 1.5, np.nan, 2.5, 3.0])
|
||||
|
||||
msg = "The default fill_method='pad' in Series.pct_change is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
chg = s.pct_change()
|
||||
|
||||
expected = Series([np.nan, 0.5, 0.0, 2.5 / 1.5 - 1, 0.2])
|
||||
tm.assert_series_equal(chg, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods, fill_method, limit",
|
||||
[
|
||||
("5B", 5, None, None),
|
||||
("3B", 3, None, None),
|
||||
("3B", 3, "bfill", None),
|
||||
("7B", 7, "pad", 1),
|
||||
("7B", 7, "bfill", 3),
|
||||
("14B", 14, None, None),
|
||||
],
|
||||
)
|
||||
def test_pct_change_periods_freq(
|
||||
self, freq, periods, fill_method, limit, datetime_series
|
||||
):
|
||||
msg = (
|
||||
"The 'fill_method' keyword being not None and the 'limit' keyword in "
|
||||
"Series.pct_change are deprecated"
|
||||
)
|
||||
|
||||
# GH#7292
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs_freq = datetime_series.pct_change(
|
||||
freq=freq, fill_method=fill_method, limit=limit
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs_periods = datetime_series.pct_change(
|
||||
periods, fill_method=fill_method, limit=limit
|
||||
)
|
||||
tm.assert_series_equal(rs_freq, rs_periods)
|
||||
|
||||
empty_ts = Series(index=datetime_series.index, dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs_freq = empty_ts.pct_change(
|
||||
freq=freq, fill_method=fill_method, limit=limit
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs_periods = empty_ts.pct_change(
|
||||
periods, fill_method=fill_method, limit=limit
|
||||
)
|
||||
tm.assert_series_equal(rs_freq, rs_periods)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fill_method", ["pad", "ffill", None])
|
||||
def test_pct_change_with_duplicated_indices(fill_method):
|
||||
# GH30463
|
||||
s = Series([np.nan, 1, 2, 3, 9, 18], index=["a", "b"] * 3)
|
||||
|
||||
warn = None if fill_method is None else FutureWarning
|
||||
msg = (
|
||||
"The 'fill_method' keyword being not None and the 'limit' keyword in "
|
||||
"Series.pct_change are deprecated"
|
||||
)
|
||||
with tm.assert_produces_warning(warn, match=msg):
|
||||
result = s.pct_change(fill_method=fill_method)
|
||||
|
||||
expected = Series([np.nan, np.nan, 1.0, 0.5, 2.0, 1.0], index=["a", "b"] * 3)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pct_change_no_warning_na_beginning():
|
||||
# GH#54981
|
||||
ser = Series([None, None, 1, 2, 3])
|
||||
result = ser.pct_change()
|
||||
expected = Series([np.nan, np.nan, np.nan, 1, 0.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_pct_change_empty():
|
||||
# GH 57056
|
||||
ser = Series([], dtype="float64")
|
||||
expected = ser.copy()
|
||||
result = ser.pct_change(periods=0)
|
||||
tm.assert_series_equal(expected, result)
|
||||
@ -0,0 +1,13 @@
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_pop():
|
||||
# GH#6600
|
||||
ser = Series([0, 4, 0], index=["A", "B", "C"], name=4)
|
||||
|
||||
result = ser.pop("B")
|
||||
assert result == 4
|
||||
|
||||
expected = Series([0, 0], index=["A", "C"], name=4)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
@ -0,0 +1,247 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.datetimes import Timestamp
|
||||
|
||||
|
||||
class TestSeriesQuantile:
|
||||
def test_quantile(self, datetime_series):
|
||||
q = datetime_series.quantile(0.1)
|
||||
assert q == np.percentile(datetime_series.dropna(), 10)
|
||||
|
||||
q = datetime_series.quantile(0.9)
|
||||
assert q == np.percentile(datetime_series.dropna(), 90)
|
||||
|
||||
# object dtype
|
||||
q = Series(datetime_series, dtype=object).quantile(0.9)
|
||||
assert q == np.percentile(datetime_series.dropna(), 90)
|
||||
|
||||
# datetime64[ns] dtype
|
||||
dts = datetime_series.index.to_series()
|
||||
q = dts.quantile(0.2)
|
||||
assert q == Timestamp("2000-01-10 19:12:00")
|
||||
|
||||
# timedelta64[ns] dtype
|
||||
tds = dts.diff()
|
||||
q = tds.quantile(0.25)
|
||||
assert q == pd.to_timedelta("24:00:00")
|
||||
|
||||
# GH7661
|
||||
result = Series([np.timedelta64("NaT")]).sum()
|
||||
assert result == pd.Timedelta(0)
|
||||
|
||||
msg = "percentiles should all be in the interval \\[0, 1\\]"
|
||||
for invalid in [-1, 2, [0.5, -1], [0.5, 2]]:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
datetime_series.quantile(invalid)
|
||||
|
||||
s = Series(np.random.default_rng(2).standard_normal(100))
|
||||
percentile_array = [-0.5, 0.25, 1.5]
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.quantile(percentile_array)
|
||||
|
||||
def test_quantile_multi(self, datetime_series, unit):
|
||||
datetime_series.index = datetime_series.index.as_unit(unit)
|
||||
qs = [0.1, 0.9]
|
||||
result = datetime_series.quantile(qs)
|
||||
expected = Series(
|
||||
[
|
||||
np.percentile(datetime_series.dropna(), 10),
|
||||
np.percentile(datetime_series.dropna(), 90),
|
||||
],
|
||||
index=qs,
|
||||
name=datetime_series.name,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
dts = datetime_series.index.to_series()
|
||||
dts.name = "xxx"
|
||||
result = dts.quantile((0.2, 0.2))
|
||||
expected = Series(
|
||||
[Timestamp("2000-01-10 19:12:00"), Timestamp("2000-01-10 19:12:00")],
|
||||
index=[0.2, 0.2],
|
||||
name="xxx",
|
||||
dtype=f"M8[{unit}]",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = datetime_series.quantile([])
|
||||
expected = Series(
|
||||
[], name=datetime_series.name, index=Index([], dtype=float), dtype="float64"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_interpolation(self, datetime_series):
|
||||
# see gh-10174
|
||||
|
||||
# interpolation = linear (default case)
|
||||
q = datetime_series.quantile(0.1, interpolation="linear")
|
||||
assert q == np.percentile(datetime_series.dropna(), 10)
|
||||
q1 = datetime_series.quantile(0.1)
|
||||
assert q1 == np.percentile(datetime_series.dropna(), 10)
|
||||
|
||||
# test with and without interpolation keyword
|
||||
assert q == q1
|
||||
|
||||
def test_quantile_interpolation_dtype(self):
|
||||
# GH #10174
|
||||
|
||||
# interpolation = linear (default case)
|
||||
q = Series([1, 3, 4]).quantile(0.5, interpolation="lower")
|
||||
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||||
assert is_integer(q)
|
||||
|
||||
q = Series([1, 3, 4]).quantile(0.5, interpolation="higher")
|
||||
assert q == np.percentile(np.array([1, 3, 4]), 50)
|
||||
assert is_integer(q)
|
||||
|
||||
def test_quantile_nan(self):
|
||||
# GH 13098
|
||||
ser = Series([1, 2, 3, 4, np.nan])
|
||||
result = ser.quantile(0.5)
|
||||
expected = 2.5
|
||||
assert result == expected
|
||||
|
||||
# all nan/empty
|
||||
s1 = Series([], dtype=object)
|
||||
cases = [s1, Series([np.nan, np.nan])]
|
||||
|
||||
for ser in cases:
|
||||
res = ser.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = ser.quantile([0.5])
|
||||
tm.assert_series_equal(res, Series([np.nan], index=[0.5]))
|
||||
|
||||
res = ser.quantile([0.2, 0.3])
|
||||
tm.assert_series_equal(res, Series([np.nan, np.nan], index=[0.2, 0.3]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"case",
|
||||
[
|
||||
[
|
||||
Timestamp("2011-01-01"),
|
||||
Timestamp("2011-01-02"),
|
||||
Timestamp("2011-01-03"),
|
||||
],
|
||||
[
|
||||
Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
Timestamp("2011-01-03", tz="US/Eastern"),
|
||||
],
|
||||
[pd.Timedelta("1 days"), pd.Timedelta("2 days"), pd.Timedelta("3 days")],
|
||||
# NaT
|
||||
[
|
||||
Timestamp("2011-01-01"),
|
||||
Timestamp("2011-01-02"),
|
||||
Timestamp("2011-01-03"),
|
||||
pd.NaT,
|
||||
],
|
||||
[
|
||||
Timestamp("2011-01-01", tz="US/Eastern"),
|
||||
Timestamp("2011-01-02", tz="US/Eastern"),
|
||||
Timestamp("2011-01-03", tz="US/Eastern"),
|
||||
pd.NaT,
|
||||
],
|
||||
[
|
||||
pd.Timedelta("1 days"),
|
||||
pd.Timedelta("2 days"),
|
||||
pd.Timedelta("3 days"),
|
||||
pd.NaT,
|
||||
],
|
||||
],
|
||||
)
|
||||
def test_quantile_box(self, case):
|
||||
ser = Series(case, name="XXX")
|
||||
res = ser.quantile(0.5)
|
||||
assert res == case[1]
|
||||
|
||||
res = ser.quantile([0.5])
|
||||
exp = Series([case[1]], index=[0.5], name="XXX")
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_datetime_timedelta_quantiles(self):
|
||||
# covers #9694
|
||||
assert pd.isna(Series([], dtype="M8[ns]").quantile(0.5))
|
||||
assert pd.isna(Series([], dtype="m8[ns]").quantile(0.5))
|
||||
|
||||
def test_quantile_nat(self):
|
||||
res = Series([pd.NaT, pd.NaT]).quantile(0.5)
|
||||
assert res is pd.NaT
|
||||
|
||||
res = Series([pd.NaT, pd.NaT]).quantile([0.5])
|
||||
tm.assert_series_equal(res, Series([pd.NaT], index=[0.5]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, dtype",
|
||||
[([0, 0, 0, 1, 2, 3], "Sparse[int]"), ([0.0, None, 1.0, 2.0], "Sparse[float]")],
|
||||
)
|
||||
def test_quantile_sparse(self, values, dtype):
|
||||
ser = Series(values, dtype=dtype)
|
||||
result = ser.quantile([0.5])
|
||||
expected = Series(np.asarray(ser)).quantile([0.5]).astype("Sparse[float]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_empty_float64(self):
|
||||
# floats
|
||||
ser = Series([], dtype="float64")
|
||||
|
||||
res = ser.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = ser.quantile([0.5])
|
||||
exp = Series([np.nan], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_quantile_empty_int64(self):
|
||||
# int
|
||||
ser = Series([], dtype="int64")
|
||||
|
||||
res = ser.quantile(0.5)
|
||||
assert np.isnan(res)
|
||||
|
||||
res = ser.quantile([0.5])
|
||||
exp = Series([np.nan], index=[0.5])
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_quantile_empty_dt64(self):
|
||||
# datetime
|
||||
ser = Series([], dtype="datetime64[ns]")
|
||||
|
||||
res = ser.quantile(0.5)
|
||||
assert res is pd.NaT
|
||||
|
||||
res = ser.quantile([0.5])
|
||||
exp = Series([pd.NaT], index=[0.5], dtype=ser.dtype)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
@pytest.mark.parametrize("dtype", [int, float, "Int64"])
|
||||
def test_quantile_dtypes(self, dtype):
|
||||
result = Series([1, 2, 3], dtype=dtype).quantile(np.arange(0, 1, 0.25))
|
||||
expected = Series(np.arange(1, 3, 0.5), index=np.arange(0, 1, 0.25))
|
||||
if dtype == "Int64":
|
||||
expected = expected.astype("Float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_all_na(self, any_int_ea_dtype):
|
||||
# GH#50681
|
||||
ser = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
|
||||
with tm.assert_produces_warning(None):
|
||||
result = ser.quantile([0.1, 0.5])
|
||||
expected = Series([pd.NA, pd.NA], dtype=any_int_ea_dtype, index=[0.1, 0.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_quantile_dtype_size(self, any_int_ea_dtype):
|
||||
# GH#50681
|
||||
ser = Series([pd.NA, pd.NA, 1], dtype=any_int_ea_dtype)
|
||||
result = ser.quantile([0.1, 0.5])
|
||||
expected = Series([1, 1], dtype=any_int_ea_dtype, index=[0.1, 0.5])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,563 @@
|
||||
from itertools import chain
|
||||
import operator
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.algos import (
|
||||
Infinity,
|
||||
NegInfinity,
|
||||
)
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import CategoricalDtype
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ser():
|
||||
return Series([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
["average", np.array([1.5, 5.5, 7.0, 3.5, np.nan, 3.5, 1.5, 8.0, np.nan, 5.5])],
|
||||
["min", np.array([1, 5, 7, 3, np.nan, 3, 1, 8, np.nan, 5])],
|
||||
["max", np.array([2, 6, 7, 4, np.nan, 4, 2, 8, np.nan, 6])],
|
||||
["first", np.array([1, 5, 7, 3, np.nan, 4, 2, 8, np.nan, 6])],
|
||||
["dense", np.array([1, 3, 4, 2, np.nan, 2, 1, 5, np.nan, 3])],
|
||||
],
|
||||
ids=lambda x: x[0],
|
||||
)
|
||||
def results(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
"object",
|
||||
"float64",
|
||||
"int64",
|
||||
"Float64",
|
||||
"Int64",
|
||||
pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow")),
|
||||
pytest.param("int64[pyarrow]", marks=td.skip_if_no("pyarrow")),
|
||||
pytest.param("string[pyarrow]", marks=td.skip_if_no("pyarrow")),
|
||||
"string[python]",
|
||||
"str",
|
||||
]
|
||||
)
|
||||
def dtype(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def expected_dtype(dtype, method, pct=False):
|
||||
exp_dtype = "float64"
|
||||
# elif dtype in ["Int64", "Float64", "string[pyarrow]", "string[python]"]:
|
||||
if dtype in ["string[pyarrow]"]:
|
||||
exp_dtype = "Float64"
|
||||
elif dtype in ["float64[pyarrow]", "int64[pyarrow]"]:
|
||||
if method == "average" or pct:
|
||||
exp_dtype = "double[pyarrow]"
|
||||
else:
|
||||
exp_dtype = "uint64[pyarrow]"
|
||||
|
||||
return exp_dtype
|
||||
|
||||
|
||||
class TestSeriesRank:
|
||||
def test_rank(self, datetime_series):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
datetime_series[::2] = np.nan
|
||||
datetime_series[:10:3] = 4.0
|
||||
|
||||
ranks = datetime_series.rank()
|
||||
oranks = datetime_series.astype("O").rank()
|
||||
|
||||
tm.assert_series_equal(ranks, oranks)
|
||||
|
||||
mask = np.isnan(datetime_series)
|
||||
filled = datetime_series.fillna(np.inf)
|
||||
|
||||
# rankdata returns a ndarray
|
||||
exp = Series(sp_stats.rankdata(filled), index=filled.index, name="ts")
|
||||
exp[mask] = np.nan
|
||||
|
||||
tm.assert_series_equal(ranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5).repeat(2))
|
||||
|
||||
iranks = iseries.rank()
|
||||
exp = iseries.astype(float).rank()
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
iseries = Series(np.arange(5)) + 1.0
|
||||
exp = iseries / 5.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.repeat(1, 100))
|
||||
exp = Series(np.repeat(0.505, 100))
|
||||
iranks = iseries.rank(pct=True)
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
# Explicit cast to float to avoid implicit cast when setting nan
|
||||
iseries = iseries.astype("float")
|
||||
iseries[1] = np.nan
|
||||
exp = Series(np.repeat(50.0 / 99.0, 100))
|
||||
exp[1] = np.nan
|
||||
iranks = iseries.rank(pct=True)
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.arange(5)) + 1.0
|
||||
iseries[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series(np.repeat(np.nan, 100))
|
||||
exp = iseries.copy()
|
||||
iranks = iseries.rank(pct=True)
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
# Explicit cast to float to avoid implicit cast when setting nan
|
||||
iseries = Series(np.arange(5), dtype="float") + 1
|
||||
iseries[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
rng = date_range("1/1/1990", periods=5)
|
||||
# Explicit cast to float to avoid implicit cast when setting nan
|
||||
iseries = Series(np.arange(5), rng, dtype="float") + 1
|
||||
iseries.iloc[4] = np.nan
|
||||
exp = iseries / 4.0
|
||||
iranks = iseries.rank(pct=True)
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
iseries = Series([1e-50, 1e-100, 1e-20, 1e-2, 1e-20 + 1e-30, 1e-1])
|
||||
exp = Series([2, 1, 3, 5, 4, 6.0])
|
||||
iranks = iseries.rank()
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
# GH 5968
|
||||
iseries = Series(["3 day", "1 day 10m", "-2 day", NaT], dtype="m8[ns]")
|
||||
exp = Series([3, 2, 1, np.nan])
|
||||
iranks = iseries.rank()
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
values = np.array(
|
||||
[-50, -1, -1e-20, -1e-25, -1e-50, 0, 1e-40, 1e-20, 1e-10, 2, 40],
|
||||
dtype="float64",
|
||||
)
|
||||
random_order = np.random.default_rng(2).permutation(len(values))
|
||||
iseries = Series(values[random_order])
|
||||
exp = Series(random_order + 1.0, dtype="float64")
|
||||
iranks = iseries.rank()
|
||||
tm.assert_series_equal(iranks, exp)
|
||||
|
||||
def test_rank_categorical(self):
|
||||
# GH issue #15420 rank incorrectly orders ordered categories
|
||||
|
||||
# Test ascending/descending ranking for ordered categoricals
|
||||
exp = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
|
||||
exp_desc = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
|
||||
ordered = Series(
|
||||
["first", "second", "third", "fourth", "fifth", "sixth"]
|
||||
).astype(
|
||||
CategoricalDtype(
|
||||
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
|
||||
ordered=True,
|
||||
)
|
||||
)
|
||||
tm.assert_series_equal(ordered.rank(), exp)
|
||||
tm.assert_series_equal(ordered.rank(ascending=False), exp_desc)
|
||||
|
||||
# Unordered categoricals should be ranked as objects
|
||||
unordered = Series(
|
||||
["first", "second", "third", "fourth", "fifth", "sixth"]
|
||||
).astype(
|
||||
CategoricalDtype(
|
||||
categories=["first", "second", "third", "fourth", "fifth", "sixth"],
|
||||
ordered=False,
|
||||
)
|
||||
)
|
||||
exp_unordered = Series([2.0, 4.0, 6.0, 3.0, 1.0, 5.0])
|
||||
res = unordered.rank()
|
||||
tm.assert_series_equal(res, exp_unordered)
|
||||
|
||||
unordered1 = Series([1, 2, 3, 4, 5, 6]).astype(
|
||||
CategoricalDtype([1, 2, 3, 4, 5, 6], False)
|
||||
)
|
||||
exp_unordered1 = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0])
|
||||
res1 = unordered1.rank()
|
||||
tm.assert_series_equal(res1, exp_unordered1)
|
||||
|
||||
# Test na_option for rank data
|
||||
na_ser = Series(
|
||||
["first", "second", "third", "fourth", "fifth", "sixth", np.nan]
|
||||
).astype(
|
||||
CategoricalDtype(
|
||||
["first", "second", "third", "fourth", "fifth", "sixth", "seventh"],
|
||||
True,
|
||||
)
|
||||
)
|
||||
|
||||
exp_top = Series([2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 1.0])
|
||||
exp_bot = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0])
|
||||
exp_keep = Series([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, np.nan])
|
||||
|
||||
tm.assert_series_equal(na_ser.rank(na_option="top"), exp_top)
|
||||
tm.assert_series_equal(na_ser.rank(na_option="bottom"), exp_bot)
|
||||
tm.assert_series_equal(na_ser.rank(na_option="keep"), exp_keep)
|
||||
|
||||
# Test na_option for rank data with ascending False
|
||||
exp_top = Series([7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0])
|
||||
exp_bot = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 7.0])
|
||||
exp_keep = Series([6.0, 5.0, 4.0, 3.0, 2.0, 1.0, np.nan])
|
||||
|
||||
tm.assert_series_equal(na_ser.rank(na_option="top", ascending=False), exp_top)
|
||||
tm.assert_series_equal(
|
||||
na_ser.rank(na_option="bottom", ascending=False), exp_bot
|
||||
)
|
||||
tm.assert_series_equal(na_ser.rank(na_option="keep", ascending=False), exp_keep)
|
||||
|
||||
# Test invalid values for na_option
|
||||
msg = "na_option must be one of 'keep', 'top', or 'bottom'"
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
na_ser.rank(na_option="bad", ascending=False)
|
||||
|
||||
# invalid type
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
na_ser.rank(na_option=True, ascending=False)
|
||||
|
||||
# Test with pct=True
|
||||
na_ser = Series(["first", "second", "third", "fourth", np.nan]).astype(
|
||||
CategoricalDtype(["first", "second", "third", "fourth"], True)
|
||||
)
|
||||
exp_top = Series([0.4, 0.6, 0.8, 1.0, 0.2])
|
||||
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.0])
|
||||
exp_keep = Series([0.25, 0.5, 0.75, 1.0, np.nan])
|
||||
|
||||
tm.assert_series_equal(na_ser.rank(na_option="top", pct=True), exp_top)
|
||||
tm.assert_series_equal(na_ser.rank(na_option="bottom", pct=True), exp_bot)
|
||||
tm.assert_series_equal(na_ser.rank(na_option="keep", pct=True), exp_keep)
|
||||
|
||||
def test_rank_signature(self):
|
||||
s = Series([0, 1])
|
||||
s.rank(method="average")
|
||||
msg = "No axis named average for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.rank("average")
|
||||
|
||||
def test_rank_tie_methods(self, ser, results, dtype, using_infer_string):
|
||||
method, exp = results
|
||||
if (
|
||||
dtype == "int64"
|
||||
or dtype == "Int64"
|
||||
or (not using_infer_string and dtype == "str")
|
||||
):
|
||||
pytest.skip("int64/str does not support NaN")
|
||||
|
||||
ser = ser if dtype is None else ser.astype(dtype)
|
||||
result = ser.rank(method=method)
|
||||
tm.assert_series_equal(result, Series(exp, dtype=expected_dtype(dtype, method)))
|
||||
|
||||
@pytest.mark.parametrize("ascending", [True, False])
|
||||
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
|
||||
@pytest.mark.parametrize("na_option", ["top", "bottom", "keep"])
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, na_value, pos_inf, neg_inf",
|
||||
[
|
||||
("object", None, Infinity(), NegInfinity()),
|
||||
("float64", np.nan, np.inf, -np.inf),
|
||||
("Float64", NA, np.inf, -np.inf),
|
||||
pytest.param(
|
||||
"float64[pyarrow]",
|
||||
NA,
|
||||
np.inf,
|
||||
-np.inf,
|
||||
marks=td.skip_if_no("pyarrow"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_rank_tie_methods_on_infs_nans(
|
||||
self, method, na_option, ascending, dtype, na_value, pos_inf, neg_inf
|
||||
):
|
||||
pytest.importorskip("scipy")
|
||||
if dtype == "float64[pyarrow]":
|
||||
if method == "average":
|
||||
exp_dtype = "float64[pyarrow]"
|
||||
else:
|
||||
exp_dtype = "uint64[pyarrow]"
|
||||
else:
|
||||
exp_dtype = "float64"
|
||||
|
||||
chunk = 3
|
||||
in_arr = [neg_inf] * chunk + [na_value] * chunk + [pos_inf] * chunk
|
||||
iseries = Series(in_arr, dtype=dtype)
|
||||
exp_ranks = {
|
||||
"average": ([2, 2, 2], [5, 5, 5], [8, 8, 8]),
|
||||
"min": ([1, 1, 1], [4, 4, 4], [7, 7, 7]),
|
||||
"max": ([3, 3, 3], [6, 6, 6], [9, 9, 9]),
|
||||
"first": ([1, 2, 3], [4, 5, 6], [7, 8, 9]),
|
||||
"dense": ([1, 1, 1], [2, 2, 2], [3, 3, 3]),
|
||||
}
|
||||
ranks = exp_ranks[method]
|
||||
if na_option == "top":
|
||||
order = [ranks[1], ranks[0], ranks[2]]
|
||||
elif na_option == "bottom":
|
||||
order = [ranks[0], ranks[2], ranks[1]]
|
||||
else:
|
||||
order = [ranks[0], [np.nan] * chunk, ranks[1]]
|
||||
expected = order if ascending else order[::-1]
|
||||
expected = list(chain.from_iterable(expected))
|
||||
result = iseries.rank(method=method, na_option=na_option, ascending=ascending)
|
||||
tm.assert_series_equal(result, Series(expected, dtype=exp_dtype))
|
||||
|
||||
def test_rank_desc_mix_nans_infs(self):
|
||||
# GH 19538
|
||||
# check descending ranking when mix nans and infs
|
||||
iseries = Series([1, np.nan, np.inf, -np.inf, 25])
|
||||
result = iseries.rank(ascending=False)
|
||||
exp = Series([3, np.nan, 1, 4, 2], dtype="float64")
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize("method", ["average", "min", "max", "first", "dense"])
|
||||
@pytest.mark.parametrize(
|
||||
"op, value",
|
||||
[
|
||||
[operator.add, 0],
|
||||
[operator.add, 1e6],
|
||||
[operator.mul, 1e-6],
|
||||
],
|
||||
)
|
||||
def test_rank_methods_series(self, method, op, value):
|
||||
sp_stats = pytest.importorskip("scipy.stats")
|
||||
|
||||
xs = np.random.default_rng(2).standard_normal(9)
|
||||
xs = np.concatenate([xs[i:] for i in range(0, 9, 2)]) # add duplicates
|
||||
np.random.default_rng(2).shuffle(xs)
|
||||
|
||||
index = [chr(ord("a") + i) for i in range(len(xs))]
|
||||
vals = op(xs, value)
|
||||
ts = Series(vals, index=index)
|
||||
result = ts.rank(method=method)
|
||||
sprank = sp_stats.rankdata(vals, method if method != "first" else "ordinal")
|
||||
expected = Series(sprank, index=index).astype("float64")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1]),
|
||||
([2], [1]),
|
||||
([0], [1]),
|
||||
([2, 2], [1, 1]),
|
||||
([1, 2, 3], [1, 2, 3]),
|
||||
([4, 2, 1], [3, 2, 1]),
|
||||
([1, 1, 5, 5, 3], [1, 1, 3, 3, 2]),
|
||||
([-5, -4, -3, -2, -1], [1, 2, 3, 4, 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_dense_method(self, dtype, ser, exp):
|
||||
if ser[0] < 0 and dtype.startswith("str"):
|
||||
exp = exp[::-1]
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="dense")
|
||||
expected = Series(exp).astype(expected_dtype(dtype, "dense"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_descending(self, ser, results, dtype, using_infer_string):
|
||||
method, _ = results
|
||||
if dtype == "int64" or (not using_infer_string and dtype == "str"):
|
||||
s = ser.dropna()
|
||||
else:
|
||||
s = ser.astype(dtype)
|
||||
|
||||
res = s.rank(ascending=False)
|
||||
if dtype.startswith("str"):
|
||||
expected = (s.astype("float64").max() - s.astype("float64")).rank()
|
||||
else:
|
||||
expected = (s.max() - s).rank()
|
||||
tm.assert_series_equal(res, expected.astype(expected_dtype(dtype, "average")))
|
||||
|
||||
if dtype.startswith("str"):
|
||||
expected = (s.astype("float64").max() - s.astype("float64")).rank(
|
||||
method=method
|
||||
)
|
||||
else:
|
||||
expected = (s.max() - s).rank(method=method)
|
||||
res2 = s.rank(method=method, ascending=False)
|
||||
tm.assert_series_equal(res2, expected.astype(expected_dtype(dtype, method)))
|
||||
|
||||
def test_rank_int(self, ser, results):
|
||||
method, exp = results
|
||||
s = ser.dropna().astype("i8")
|
||||
|
||||
result = s.rank(method=method)
|
||||
expected = Series(exp).dropna()
|
||||
expected.index = result.index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_object_bug(self):
|
||||
# GH 13445
|
||||
|
||||
# smoke tests
|
||||
Series([np.nan] * 32).astype(object).rank(ascending=True)
|
||||
Series([np.nan] * 32).astype(object).rank(ascending=False)
|
||||
|
||||
def test_rank_modify_inplace(self):
|
||||
# GH 18521
|
||||
# Check rank does not mutate series
|
||||
s = Series([Timestamp("2017-01-05 10:20:27.569000"), NaT])
|
||||
expected = s.copy()
|
||||
|
||||
s.rank()
|
||||
result = s
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rank_ea_small_values(self):
|
||||
# GH#52471
|
||||
ser = Series(
|
||||
[5.4954145e29, -9.791984e-21, 9.3715776e-26, NA, 1.8790257e-28],
|
||||
dtype="Float64",
|
||||
)
|
||||
result = ser.rank(method="min")
|
||||
expected = Series([4, 1, 3, np.nan, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
# GH15630, pct should be on 100% basis when method='dense'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.0, 1.0]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 2, 2.0 / 2, 2.0 / 2]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [1.0 / 3, 1.0 / 3, 3.0 / 3, 3.0 / 3, 2.0 / 3]),
|
||||
([1, 1, 3, 3, 5, 5], [1.0 / 3, 1.0 / 3, 2.0 / 3, 2.0 / 3, 3.0 / 3, 3.0 / 3]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_dense_pct(dtype, ser, exp):
|
||||
if ser[0] < 0 and dtype.startswith("str"):
|
||||
exp = exp[::-1]
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="dense", pct=True)
|
||||
expected = Series(exp).astype(expected_dtype(dtype, "dense", pct=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.0 / 2, 1.0 / 2]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 3, 2.0 / 3, 2.0 / 3]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [1.0 / 5, 1.0 / 5, 4.0 / 5, 4.0 / 5, 3.0 / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1.0 / 6, 1.0 / 6, 3.0 / 6, 3.0 / 6, 5.0 / 6, 5.0 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_min_pct(dtype, ser, exp):
|
||||
if ser[0] < 0 and dtype.startswith("str"):
|
||||
exp = exp[::-1]
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="min", pct=True)
|
||||
expected = Series(exp).astype(expected_dtype(dtype, "min", pct=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.0, 1.0]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 3, 3.0 / 3, 3.0 / 3]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [2.0 / 5, 2.0 / 5, 5.0 / 5, 5.0 / 5, 3.0 / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [2.0 / 6, 2.0 / 6, 4.0 / 6, 4.0 / 6, 6.0 / 6, 6.0 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_max_pct(dtype, ser, exp):
|
||||
if ser[0] < 0 and dtype.startswith("str"):
|
||||
exp = exp[::-1]
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="max", pct=True)
|
||||
expected = Series(exp).astype(expected_dtype(dtype, "max", pct=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.5 / 2, 1.5 / 2]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 3, 2.5 / 3, 2.5 / 3]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [1.5 / 5, 1.5 / 5, 4.5 / 5, 4.5 / 5, 3.0 / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1.5 / 6, 1.5 / 6, 3.5 / 6, 3.5 / 6, 5.5 / 6, 5.5 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_average_pct(dtype, ser, exp):
|
||||
if ser[0] < 0 and dtype.startswith("str"):
|
||||
exp = exp[::-1]
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="average", pct=True)
|
||||
expected = Series(exp).astype(expected_dtype(dtype, "average", pct=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp",
|
||||
[
|
||||
([1], [1.0]),
|
||||
([1, 2], [1.0 / 2, 2.0 / 2]),
|
||||
([2, 2], [1.0 / 2, 2.0 / 2.0]),
|
||||
([1, 2, 3], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([1, 2, 2], [1.0 / 3, 2.0 / 3, 3.0 / 3]),
|
||||
([4, 2, 1], [3.0 / 3, 2.0 / 3, 1.0 / 3]),
|
||||
([1, 1, 5, 5, 3], [1.0 / 5, 2.0 / 5, 4.0 / 5, 5.0 / 5, 3.0 / 5]),
|
||||
([1, 1, 3, 3, 5, 5], [1.0 / 6, 2.0 / 6, 3.0 / 6, 4.0 / 6, 5.0 / 6, 6.0 / 6]),
|
||||
([-5, -4, -3, -2, -1], [1.0 / 5, 2.0 / 5, 3.0 / 5, 4.0 / 5, 5.0 / 5]),
|
||||
],
|
||||
)
|
||||
def test_rank_first_pct(dtype, ser, exp):
|
||||
if ser[0] < 0 and dtype.startswith("str"):
|
||||
exp = exp[::-1]
|
||||
s = Series(ser).astype(dtype)
|
||||
result = s.rank(method="first", pct=True)
|
||||
expected = Series(exp).astype(expected_dtype(dtype, "first", pct=True))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.single_cpu
|
||||
def test_pct_max_many_rows():
|
||||
# GH 18271
|
||||
s = Series(np.arange(2**24 + 1))
|
||||
result = s.rank(pct=True).max()
|
||||
assert result == 1
|
||||
@ -0,0 +1,443 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Categorical,
|
||||
Float64Dtype,
|
||||
Index,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_reindex(datetime_series, string_series):
|
||||
identity = string_series.reindex(string_series.index)
|
||||
|
||||
assert tm.shares_memory(string_series.index, identity.index)
|
||||
|
||||
assert identity.index.is_(string_series.index)
|
||||
assert identity.index.identical(string_series.index)
|
||||
|
||||
subIndex = string_series.index[10:20]
|
||||
subSeries = string_series.reindex(subIndex)
|
||||
|
||||
for idx, val in subSeries.items():
|
||||
assert val == string_series[idx]
|
||||
|
||||
subIndex2 = datetime_series.index[10:20]
|
||||
subTS = datetime_series.reindex(subIndex2)
|
||||
|
||||
for idx, val in subTS.items():
|
||||
assert val == datetime_series[idx]
|
||||
stuffSeries = datetime_series.reindex(subIndex)
|
||||
|
||||
assert np.isnan(stuffSeries).all()
|
||||
|
||||
# This is extremely important for the Cython code to not screw up
|
||||
nonContigIndex = datetime_series.index[::2]
|
||||
subNonContig = datetime_series.reindex(nonContigIndex)
|
||||
for idx, val in subNonContig.items():
|
||||
assert val == datetime_series[idx]
|
||||
|
||||
# return a copy the same index here
|
||||
result = datetime_series.reindex()
|
||||
assert result is not datetime_series
|
||||
|
||||
|
||||
def test_reindex_nan():
|
||||
ts = Series([2, 3, 5, 7], index=[1, 4, np.nan, 8])
|
||||
|
||||
i, j = [np.nan, 1, np.nan, 8, 4, np.nan], [2, 0, 2, 3, 1, 2]
|
||||
tm.assert_series_equal(ts.reindex(i), ts.iloc[j])
|
||||
|
||||
ts.index = ts.index.astype("object")
|
||||
|
||||
# reindex coerces index.dtype to float, loc/iloc doesn't
|
||||
tm.assert_series_equal(ts.reindex(i), ts.iloc[j], check_index_type=False)
|
||||
|
||||
|
||||
def test_reindex_series_add_nat():
|
||||
rng = date_range("1/1/2000 00:00:00", periods=10, freq="10s")
|
||||
series = Series(rng)
|
||||
|
||||
result = series.reindex(range(15))
|
||||
assert np.issubdtype(result.dtype, np.dtype("M8[ns]"))
|
||||
|
||||
mask = result.isna()
|
||||
assert mask[-5:].all()
|
||||
assert not mask[:-5].any()
|
||||
|
||||
|
||||
def test_reindex_with_datetimes():
|
||||
rng = date_range("1/1/2000", periods=20)
|
||||
ts = Series(np.random.default_rng(2).standard_normal(20), index=rng)
|
||||
|
||||
result = ts.reindex(list(ts.index[5:10]))
|
||||
expected = ts[5:10]
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ts[list(ts.index[5:10])]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_corner(datetime_series):
|
||||
# (don't forget to fix this) I think it's fixed
|
||||
empty = Series(index=[])
|
||||
empty.reindex(datetime_series.index, method="pad") # it works
|
||||
|
||||
# corner case: pad empty series
|
||||
reindexed = empty.reindex(datetime_series.index, method="pad")
|
||||
|
||||
# pass non-Index
|
||||
reindexed = datetime_series.reindex(list(datetime_series.index))
|
||||
datetime_series.index = datetime_series.index._with_freq(None)
|
||||
tm.assert_series_equal(datetime_series, reindexed)
|
||||
|
||||
# bad fill method
|
||||
ts = datetime_series[::2]
|
||||
msg = (
|
||||
r"Invalid fill method\. Expecting pad \(ffill\), backfill "
|
||||
r"\(bfill\) or nearest\. Got foo"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.reindex(datetime_series.index, method="foo")
|
||||
|
||||
|
||||
def test_reindex_pad():
|
||||
s = Series(np.arange(10), dtype="int64")
|
||||
s2 = s[::2]
|
||||
|
||||
reindexed = s2.reindex(s.index, method="pad")
|
||||
reindexed2 = s2.reindex(s.index, method="ffill")
|
||||
tm.assert_series_equal(reindexed, reindexed2)
|
||||
|
||||
expected = Series([0, 0, 2, 2, 4, 4, 6, 6, 8, 8])
|
||||
tm.assert_series_equal(reindexed, expected)
|
||||
|
||||
|
||||
def test_reindex_pad2():
|
||||
# GH4604
|
||||
s = Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
|
||||
new_index = ["a", "g", "c", "f"]
|
||||
expected = Series([1, 1, 3, 3], index=new_index)
|
||||
|
||||
# this changes dtype because the ffill happens after
|
||||
result = s.reindex(new_index).ffill()
|
||||
tm.assert_series_equal(result, expected.astype("float64"))
|
||||
|
||||
msg = "The 'downcast' keyword in ffill is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.reindex(new_index).ffill(downcast="infer")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series([1, 5, 3, 5], index=new_index)
|
||||
result = s.reindex(new_index, method="ffill")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_inference():
|
||||
# inference of new dtype
|
||||
s = Series([True, False, False, True], index=list("abcd"))
|
||||
new_index = "agc"
|
||||
msg = "Downcasting object dtype arrays on"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.reindex(list(new_index)).ffill()
|
||||
expected = Series([True, True, False], index=list(new_index))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_downcasting():
|
||||
# GH4618 shifted series downcasting
|
||||
s = Series(False, index=range(5))
|
||||
msg = "Downcasting object dtype arrays on"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.shift(1).bfill()
|
||||
expected = Series(False, index=range(5))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_nearest():
|
||||
s = Series(np.arange(10, dtype="int64"))
|
||||
target = [0.1, 0.9, 1.5, 2.0]
|
||||
result = s.reindex(target, method="nearest")
|
||||
expected = Series(np.around(target).astype("int64"), target)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.reindex(target, method="nearest", tolerance=0.2)
|
||||
expected = Series([0, 1, np.nan, 2], target)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = s.reindex(target, method="nearest", tolerance=[0.3, 0.01, 0.4, 3])
|
||||
expected = Series([0, np.nan, np.nan, 2], target)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
|
||||
def test_reindex_int(datetime_series):
|
||||
ts = datetime_series[::2]
|
||||
int_ts = Series(np.zeros(len(ts), dtype=int), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_int = int_ts.reindex(datetime_series.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_int.dtype == np.float64
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_int = int_ts.reindex(int_ts.index[::2])
|
||||
assert reindexed_int.dtype == np.dtype(int)
|
||||
|
||||
|
||||
def test_reindex_bool(datetime_series):
|
||||
# A series other than float, int, string, or object
|
||||
ts = datetime_series[::2]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
|
||||
# this should work fine
|
||||
reindexed_bool = bool_ts.reindex(datetime_series.index)
|
||||
|
||||
# if NaNs introduced
|
||||
assert reindexed_bool.dtype == np.object_
|
||||
|
||||
# NO NaNs introduced
|
||||
reindexed_bool = bool_ts.reindex(bool_ts.index[::2])
|
||||
assert reindexed_bool.dtype == np.bool_
|
||||
|
||||
|
||||
def test_reindex_bool_pad(datetime_series):
|
||||
# fail
|
||||
ts = datetime_series[5:]
|
||||
bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index)
|
||||
filled_bool = bool_ts.reindex(datetime_series.index, method="pad")
|
||||
assert isna(filled_bool[:5]).all()
|
||||
|
||||
|
||||
def test_reindex_categorical():
|
||||
index = date_range("20000101", periods=3)
|
||||
|
||||
# reindexing to an invalid Categorical
|
||||
s = Series(["a", "b", "c"], dtype="category")
|
||||
result = s.reindex(index)
|
||||
expected = Series(
|
||||
Categorical(values=[np.nan, np.nan, np.nan], categories=["a", "b", "c"])
|
||||
)
|
||||
expected.index = index
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# partial reindexing
|
||||
expected = Series(Categorical(values=["b", "c"], categories=["a", "b", "c"]))
|
||||
expected.index = [1, 2]
|
||||
result = s.reindex([1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
expected = Series(Categorical(values=["c", np.nan], categories=["a", "b", "c"]))
|
||||
expected.index = [2, 3]
|
||||
result = s.reindex([2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_astype_order_consistency():
|
||||
# GH#17444
|
||||
ser = Series([1, 2, 3], index=[2, 0, 1])
|
||||
new_index = [0, 1, 2]
|
||||
temp_dtype = "category"
|
||||
new_dtype = str
|
||||
result = ser.reindex(new_index).astype(temp_dtype).astype(new_dtype)
|
||||
expected = ser.astype(temp_dtype).reindex(new_index).astype(new_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_fill_value():
|
||||
# -----------------------------------------------------------
|
||||
# floats
|
||||
floats = Series([1.0, 2.0, 3.0])
|
||||
result = floats.reindex([1, 2, 3])
|
||||
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = floats.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2.0, 3.0, 0], index=[1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# ints
|
||||
ints = Series([1, 2, 3])
|
||||
|
||||
result = ints.reindex([1, 2, 3])
|
||||
expected = Series([2.0, 3.0, np.nan], index=[1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# don't upcast
|
||||
result = ints.reindex([1, 2, 3], fill_value=0)
|
||||
expected = Series([2, 3, 0], index=[1, 2, 3])
|
||||
assert issubclass(result.dtype.type, np.integer)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# -----------------------------------------------------------
|
||||
# objects
|
||||
objects = Series([1, 2, 3], dtype=object)
|
||||
|
||||
result = objects.reindex([1, 2, 3])
|
||||
expected = Series([2, 3, np.nan], index=[1, 2, 3], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = objects.reindex([1, 2, 3], fill_value="foo")
|
||||
expected = Series([2, 3, "foo"], index=[1, 2, 3], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# ------------------------------------------------------------
|
||||
# bools
|
||||
bools = Series([True, False, True])
|
||||
|
||||
result = bools.reindex([1, 2, 3])
|
||||
expected = Series([False, True, np.nan], index=[1, 2, 3], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = bools.reindex([1, 2, 3], fill_value=False)
|
||||
expected = Series([False, True, False], index=[1, 2, 3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_array_manager_not_yet_implemented
|
||||
@pytest.mark.parametrize("dtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
@pytest.mark.parametrize("fill_value", ["string", 0, Timedelta(0)])
|
||||
def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value, using_array_manager):
|
||||
# https://github.com/pandas-dev/pandas/issues/42921
|
||||
if dtype == "timedelta64[ns]" and fill_value == Timedelta(0):
|
||||
# use the scalar that is not compatible with the dtype for this test
|
||||
fill_value = Timestamp(0)
|
||||
|
||||
ser = Series([NaT], dtype=dtype)
|
||||
|
||||
result = ser.reindex([0, 1], fill_value=fill_value)
|
||||
expected = Series([NaT, fill_value], index=[0, 1], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_datetimeindexes_tz_naive_and_aware():
|
||||
# GH 8306
|
||||
idx = date_range("20131101", tz="America/Chicago", periods=7)
|
||||
newidx = date_range("20131103", periods=10, freq="h")
|
||||
s = Series(range(7), index=idx)
|
||||
msg = (
|
||||
r"Cannot compare dtypes datetime64\[ns, America/Chicago\] "
|
||||
r"and datetime64\[ns\]"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
s.reindex(newidx, method="ffill")
|
||||
|
||||
|
||||
def test_reindex_empty_series_tz_dtype():
|
||||
# GH 20869
|
||||
result = Series(dtype="datetime64[ns, UTC]").reindex([0, 1])
|
||||
expected = Series([NaT] * 2, dtype="datetime64[ns, UTC]")
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"p_values, o_values, values, expected_values",
|
||||
[
|
||||
(
|
||||
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
|
||||
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"],
|
||||
[1.0, 1.0],
|
||||
[1.0, 1.0, np.nan],
|
||||
),
|
||||
(
|
||||
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
|
||||
[Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")],
|
||||
[1.0, 1.0],
|
||||
[1.0, 1.0],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_reindex_periodindex_with_object(p_values, o_values, values, expected_values):
|
||||
# GH#28337
|
||||
period_index = PeriodIndex(p_values)
|
||||
object_index = Index(o_values)
|
||||
|
||||
ser = Series(values, index=period_index)
|
||||
result = ser.reindex(object_index)
|
||||
expected = Series(expected_values, index=object_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_too_many_args():
|
||||
# GH 40980
|
||||
ser = Series([1, 2])
|
||||
msg = r"reindex\(\) takes from 1 to 2 positional arguments but 3 were given"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.reindex([2, 3], False)
|
||||
|
||||
|
||||
def test_reindex_double_index():
|
||||
# GH 40980
|
||||
ser = Series([1, 2])
|
||||
msg = r"reindex\(\) got multiple values for argument 'index'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.reindex([2, 3], index=[3, 4])
|
||||
|
||||
|
||||
def test_reindex_no_posargs():
|
||||
# GH 40980
|
||||
ser = Series([1, 2])
|
||||
result = ser.reindex(index=[1, 0])
|
||||
expected = Series([2, 1], index=[1, 0])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("values", [[["a"], ["x"]], [[], []]])
|
||||
def test_reindex_empty_with_level(values):
|
||||
# GH41170
|
||||
ser = Series(
|
||||
range(len(values[0])), index=MultiIndex.from_arrays(values), dtype="object"
|
||||
)
|
||||
result = ser.reindex(np.array(["b"]), level=0)
|
||||
expected = Series(
|
||||
index=MultiIndex(levels=[["b"], values[1]], codes=[[], []]), dtype="object"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_missing_category():
|
||||
# GH#18185
|
||||
ser = Series([1, 2, 3, 1], dtype="category")
|
||||
msg = r"Cannot setitem on a Categorical with a new category \(-1\)"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.reindex([1, 2, 3, 4, 5], fill_value=-1)
|
||||
|
||||
|
||||
def test_reindexing_with_float64_NA_log():
|
||||
# GH 47055
|
||||
s = Series([1.0, NA], dtype=Float64Dtype())
|
||||
s_reindex = s.reindex(range(3))
|
||||
result = s_reindex.values._data
|
||||
expected = np.array([1, np.nan, np.nan])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
with tm.assert_produces_warning(None):
|
||||
result_log = np.log(s_reindex)
|
||||
expected_log = Series([0, np.nan, np.nan], dtype=Float64Dtype())
|
||||
tm.assert_series_equal(result_log, expected_log)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["timedelta64", "datetime64"])
|
||||
def test_reindex_expand_nonnano_nat(dtype):
|
||||
# GH 53497
|
||||
ser = Series(np.array([1], dtype=f"{dtype}[s]"))
|
||||
result = ser.reindex(RangeIndex(2))
|
||||
expected = Series(
|
||||
np.array([1, getattr(np, dtype)("nat", "s")], dtype=f"{dtype}[s]")
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,41 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_reindex_like(datetime_series):
|
||||
other = datetime_series[::2]
|
||||
tm.assert_series_equal(
|
||||
datetime_series.reindex(other.index), datetime_series.reindex_like(other)
|
||||
)
|
||||
|
||||
# GH#7179
|
||||
day1 = datetime(2013, 3, 5)
|
||||
day2 = datetime(2013, 5, 5)
|
||||
day3 = datetime(2014, 3, 5)
|
||||
|
||||
series1 = Series([5, None, None], [day1, day2, day3])
|
||||
series2 = Series([None, None], [day1, day3])
|
||||
|
||||
result = series1.reindex_like(series2, method="pad")
|
||||
expected = Series([5, np.nan], index=[day1, day3])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_reindex_like_nearest():
|
||||
ser = Series(np.arange(10, dtype="int64"))
|
||||
|
||||
target = [0.1, 0.9, 1.5, 2.0]
|
||||
other = ser.reindex(target, method="nearest")
|
||||
expected = Series(np.around(target).astype("int64"), target)
|
||||
|
||||
result = ser.reindex_like(other, method="nearest")
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
result = ser.reindex_like(other, method="nearest", tolerance=1)
|
||||
tm.assert_series_equal(expected, result)
|
||||
result = ser.reindex_like(other, method="nearest", tolerance=[1, 2, 3, 4])
|
||||
tm.assert_series_equal(expected, result)
|
||||
@ -0,0 +1,184 @@
|
||||
from datetime import datetime
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestRename:
|
||||
def test_rename(self, datetime_series):
|
||||
ts = datetime_series
|
||||
renamer = lambda x: x.strftime("%Y%m%d")
|
||||
renamed = ts.rename(renamer)
|
||||
assert renamed.index[0] == renamer(ts.index[0])
|
||||
|
||||
# dict
|
||||
rename_dict = dict(zip(ts.index, renamed.index))
|
||||
renamed2 = ts.rename(rename_dict)
|
||||
tm.assert_series_equal(renamed, renamed2)
|
||||
|
||||
def test_rename_partial_dict(self):
|
||||
# partial dict
|
||||
ser = Series(np.arange(4), index=["a", "b", "c", "d"], dtype="int64")
|
||||
renamed = ser.rename({"b": "foo", "d": "bar"})
|
||||
tm.assert_index_equal(renamed.index, Index(["a", "foo", "c", "bar"]))
|
||||
|
||||
def test_rename_retain_index_name(self):
|
||||
# index with name
|
||||
renamer = Series(
|
||||
np.arange(4), index=Index(["a", "b", "c", "d"], name="name"), dtype="int64"
|
||||
)
|
||||
renamed = renamer.rename({})
|
||||
assert renamed.index.name == renamer.index.name
|
||||
|
||||
def test_rename_by_series(self):
|
||||
ser = Series(range(5), name="foo")
|
||||
renamer = Series({1: 10, 2: 20})
|
||||
result = ser.rename(renamer)
|
||||
expected = Series(range(5), index=[0, 10, 20, 3, 4], name="foo")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_set_name(self, using_infer_string):
|
||||
ser = Series(range(4), index=list("abcd"))
|
||||
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
|
||||
result = ser.rename(name)
|
||||
assert result.name == name
|
||||
if using_infer_string:
|
||||
tm.assert_extension_array_equal(result.index.values, ser.index.values)
|
||||
else:
|
||||
tm.assert_numpy_array_equal(result.index.values, ser.index.values)
|
||||
assert ser.name is None
|
||||
|
||||
def test_rename_set_name_inplace(self, using_infer_string):
|
||||
ser = Series(range(3), index=list("abc"))
|
||||
for name in ["foo", 123, 123.0, datetime(2001, 11, 11), ("foo",)]:
|
||||
ser.rename(name, inplace=True)
|
||||
assert ser.name == name
|
||||
exp = np.array(["a", "b", "c"], dtype=np.object_)
|
||||
if using_infer_string:
|
||||
exp = array(exp, dtype="str")
|
||||
tm.assert_extension_array_equal(ser.index.values, exp)
|
||||
else:
|
||||
tm.assert_numpy_array_equal(ser.index.values, exp)
|
||||
|
||||
def test_rename_axis_supported(self):
|
||||
# Supporting axis for compatibility, detailed in GH-18589
|
||||
ser = Series(range(5))
|
||||
ser.rename({}, axis=0)
|
||||
ser.rename({}, axis="index")
|
||||
|
||||
with pytest.raises(ValueError, match="No axis named 5"):
|
||||
ser.rename({}, axis=5)
|
||||
|
||||
def test_rename_inplace(self, datetime_series):
|
||||
renamer = lambda x: x.strftime("%Y%m%d")
|
||||
expected = renamer(datetime_series.index[0])
|
||||
|
||||
datetime_series.rename(renamer, inplace=True)
|
||||
assert datetime_series.index[0] == expected
|
||||
|
||||
def test_rename_with_custom_indexer(self):
|
||||
# GH 27814
|
||||
class MyIndexer:
|
||||
pass
|
||||
|
||||
ix = MyIndexer()
|
||||
ser = Series([1, 2, 3]).rename(ix)
|
||||
assert ser.name is ix
|
||||
|
||||
def test_rename_with_custom_indexer_inplace(self):
|
||||
# GH 27814
|
||||
class MyIndexer:
|
||||
pass
|
||||
|
||||
ix = MyIndexer()
|
||||
ser = Series([1, 2, 3])
|
||||
ser.rename(ix, inplace=True)
|
||||
assert ser.name is ix
|
||||
|
||||
def test_rename_callable(self):
|
||||
# GH 17407
|
||||
ser = Series(range(1, 6), index=Index(range(2, 7), name="IntIndex"))
|
||||
result = ser.rename(str)
|
||||
expected = ser.rename(lambda i: str(i))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
assert result.name == expected.name
|
||||
|
||||
def test_rename_none(self):
|
||||
# GH 40977
|
||||
ser = Series([1, 2], name="foo")
|
||||
result = ser.rename(None)
|
||||
expected = Series([1, 2])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_series_with_multiindex(self):
|
||||
# issue #43659
|
||||
arrays = [
|
||||
["bar", "baz", "baz", "foo", "qux"],
|
||||
["one", "one", "two", "two", "one"],
|
||||
]
|
||||
|
||||
index = MultiIndex.from_arrays(arrays, names=["first", "second"])
|
||||
ser = Series(np.ones(5), index=index)
|
||||
result = ser.rename(index={"one": "yes"}, level="second", errors="raise")
|
||||
|
||||
arrays_expected = [
|
||||
["bar", "baz", "baz", "foo", "qux"],
|
||||
["yes", "yes", "two", "two", "yes"],
|
||||
]
|
||||
|
||||
index_expected = MultiIndex.from_arrays(
|
||||
arrays_expected, names=["first", "second"]
|
||||
)
|
||||
series_expected = Series(np.ones(5), index=index_expected)
|
||||
|
||||
tm.assert_series_equal(result, series_expected)
|
||||
|
||||
def test_rename_series_with_multiindex_keeps_ea_dtypes(self):
|
||||
# GH21055
|
||||
arrays = [
|
||||
Index([1, 2, 3], dtype="Int64").astype("category"),
|
||||
Index([1, 2, 3], dtype="Int64"),
|
||||
]
|
||||
mi = MultiIndex.from_arrays(arrays, names=["A", "B"])
|
||||
ser = Series(1, index=mi)
|
||||
result = ser.rename({1: 4}, level=1)
|
||||
|
||||
arrays_expected = [
|
||||
Index([1, 2, 3], dtype="Int64").astype("category"),
|
||||
Index([4, 2, 3], dtype="Int64"),
|
||||
]
|
||||
mi_expected = MultiIndex.from_arrays(arrays_expected, names=["A", "B"])
|
||||
expected = Series(1, index=mi_expected)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_rename_error_arg(self):
|
||||
# GH 46889
|
||||
ser = Series(["foo", "bar"])
|
||||
match = re.escape("[2] not found in axis")
|
||||
with pytest.raises(KeyError, match=match):
|
||||
ser.rename({2: 9}, errors="raise")
|
||||
|
||||
def test_rename_copy_false(self, using_copy_on_write, warn_copy_on_write):
|
||||
# GH 46889
|
||||
ser = Series(["foo", "bar"])
|
||||
ser_orig = ser.copy()
|
||||
shallow_copy = ser.rename({1: 9}, copy=False)
|
||||
with tm.assert_cow_warning(warn_copy_on_write):
|
||||
ser[0] = "foobar"
|
||||
if using_copy_on_write:
|
||||
assert ser_orig[0] == shallow_copy[0]
|
||||
assert ser_orig[1] == shallow_copy[9]
|
||||
else:
|
||||
assert ser[0] == shallow_copy[0]
|
||||
assert ser[1] == shallow_copy[9]
|
||||
@ -0,0 +1,47 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesRenameAxis:
|
||||
def test_rename_axis_mapper(self):
|
||||
# GH 19978
|
||||
mi = MultiIndex.from_product([["a", "b", "c"], [1, 2]], names=["ll", "nn"])
|
||||
ser = Series(list(range(len(mi))), index=mi)
|
||||
|
||||
result = ser.rename_axis(index={"ll": "foo"})
|
||||
assert result.index.names == ["foo", "nn"]
|
||||
|
||||
result = ser.rename_axis(index=str.upper, axis=0)
|
||||
assert result.index.names == ["LL", "NN"]
|
||||
|
||||
result = ser.rename_axis(index=["foo", "goo"])
|
||||
assert result.index.names == ["foo", "goo"]
|
||||
|
||||
with pytest.raises(TypeError, match="unexpected"):
|
||||
ser.rename_axis(columns="wrong")
|
||||
|
||||
def test_rename_axis_inplace(self, datetime_series):
|
||||
# GH 15704
|
||||
expected = datetime_series.rename_axis("foo")
|
||||
result = datetime_series
|
||||
no_return = result.rename_axis("foo", inplace=True)
|
||||
|
||||
assert no_return is None
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("kwargs", [{"mapper": None}, {"index": None}, {}])
|
||||
def test_rename_axis_none(self, kwargs):
|
||||
# GH 25034
|
||||
index = Index(list("abc"), name="foo")
|
||||
ser = Series([1, 2, 3], index=index)
|
||||
|
||||
result = ser.rename_axis(**kwargs)
|
||||
expected_index = index.rename(None) if kwargs else index
|
||||
expected = Series([1, 2, 3], index=expected_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,40 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestRepeat:
|
||||
def test_repeat(self):
|
||||
ser = Series(np.random.default_rng(2).standard_normal(3), index=["a", "b", "c"])
|
||||
|
||||
reps = ser.repeat(5)
|
||||
exp = Series(ser.values.repeat(5), index=ser.index.values.repeat(5))
|
||||
tm.assert_series_equal(reps, exp)
|
||||
|
||||
to_rep = [2, 3, 4]
|
||||
reps = ser.repeat(to_rep)
|
||||
exp = Series(ser.values.repeat(to_rep), index=ser.index.values.repeat(to_rep))
|
||||
tm.assert_series_equal(reps, exp)
|
||||
|
||||
def test_numpy_repeat(self):
|
||||
ser = Series(np.arange(3), name="x")
|
||||
expected = Series(
|
||||
ser.values.repeat(2), name="x", index=ser.index.values.repeat(2)
|
||||
)
|
||||
tm.assert_series_equal(np.repeat(ser, 2), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(ser, 2, axis=0)
|
||||
|
||||
def test_repeat_with_multiindex(self):
|
||||
# GH#9361, fixed by GH#7891
|
||||
m_idx = MultiIndex.from_tuples([(1, 2), (3, 4), (5, 6), (7, 8)])
|
||||
data = ["a", "b", "c", "d"]
|
||||
m_df = Series(data, index=m_idx)
|
||||
assert m_df.repeat(3).shape == (3 * len(data),)
|
||||
@ -0,0 +1,819 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import IntervalArray
|
||||
|
||||
|
||||
class TestSeriesReplace:
|
||||
def test_replace_explicit_none(self):
|
||||
# GH#36984 if the user explicitly passes value=None, give it to them
|
||||
ser = pd.Series([0, 0, ""], dtype=object)
|
||||
result = ser.replace("", None)
|
||||
expected = pd.Series([0, 0, None], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Cast column 2 to object to avoid implicit cast when setting entry to ""
|
||||
df = pd.DataFrame(np.zeros((3, 3))).astype({2: object})
|
||||
df.iloc[2, 2] = ""
|
||||
result = df.replace("", None)
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
0: np.zeros(3),
|
||||
1: np.zeros(3),
|
||||
2: np.array([0.0, 0.0, None], dtype=object),
|
||||
}
|
||||
)
|
||||
assert expected.iloc[2, 2] is None
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH#19998 same thing with object dtype
|
||||
ser = pd.Series([10, 20, 30, "a", "a", "b", "a"])
|
||||
result = ser.replace("a", None)
|
||||
expected = pd.Series([10, 20, 30, None, None, "b", None])
|
||||
assert expected.iloc[-1] is None
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_noop_doesnt_downcast(self):
|
||||
# GH#44498
|
||||
ser = pd.Series([None, None, pd.Timestamp("2021-12-16 17:31")], dtype=object)
|
||||
res = ser.replace({np.nan: None}) # should be a no-op
|
||||
tm.assert_series_equal(res, ser)
|
||||
assert res.dtype == object
|
||||
|
||||
# same thing but different calling convention
|
||||
res = ser.replace(np.nan, None)
|
||||
tm.assert_series_equal(res, ser)
|
||||
assert res.dtype == object
|
||||
|
||||
def test_replace(self):
|
||||
N = 50
|
||||
ser = pd.Series(np.random.default_rng(2).standard_normal(N))
|
||||
ser[0:4] = np.nan
|
||||
ser[6:10] = 0
|
||||
|
||||
# replace list with a single value
|
||||
return_value = ser.replace([np.nan], -1, inplace=True)
|
||||
assert return_value is None
|
||||
|
||||
exp = ser.fillna(-1)
|
||||
tm.assert_series_equal(ser, exp)
|
||||
|
||||
rs = ser.replace(0.0, np.nan)
|
||||
ser[ser == 0.0] = np.nan
|
||||
tm.assert_series_equal(rs, ser)
|
||||
|
||||
ser = pd.Series(
|
||||
np.fabs(np.random.default_rng(2).standard_normal(N)),
|
||||
pd.date_range("2020-01-01", periods=N),
|
||||
dtype=object,
|
||||
)
|
||||
ser[:5] = np.nan
|
||||
ser[6:10] = "foo"
|
||||
ser[20:30] = "bar"
|
||||
|
||||
# replace list with a single value
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.replace([np.nan, "foo", "bar"], -1)
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -1).all()
|
||||
assert (rs[20:30] == -1).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -2).all()
|
||||
assert (rs[20:30] == -3).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values with 2 lists
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
# replace inplace
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
|
||||
assert return_value is None
|
||||
|
||||
assert (ser[:5] == -1).all()
|
||||
assert (ser[6:10] == -1).all()
|
||||
assert (ser[20:30] == -1).all()
|
||||
|
||||
def test_replace_nan_with_inf(self):
|
||||
ser = pd.Series([np.nan, 0, np.inf])
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
|
||||
ser = pd.Series([np.nan, 0, "foo", "bar", np.inf, None, pd.NaT])
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
filled = ser.copy()
|
||||
filled[4] = 0
|
||||
tm.assert_series_equal(ser.replace(np.inf, 0), filled)
|
||||
|
||||
def test_replace_listlike_value_listlike_target(self, datetime_series):
|
||||
ser = pd.Series(datetime_series.index)
|
||||
tm.assert_series_equal(ser.replace(np.nan, 0), ser.fillna(0))
|
||||
|
||||
# malformed
|
||||
msg = r"Replacement lists must match in length\. Expecting 3 got 2"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.replace([1, 2, 3], [np.nan, 0])
|
||||
|
||||
# ser is dt64 so can't hold 1 or 2, so this replace is a no-op
|
||||
result = ser.replace([1, 2], [np.nan, 0])
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
ser = pd.Series([0, 1, 2, 3, 4])
|
||||
result = ser.replace([0, 1, 2, 3, 4], [4, 3, 2, 1, 0])
|
||||
tm.assert_series_equal(result, pd.Series([4, 3, 2, 1, 0]))
|
||||
|
||||
def test_replace_gh5319(self):
|
||||
# API change from 0.12?
|
||||
# GH 5319
|
||||
ser = pd.Series([0, np.nan, 2, 3, 4])
|
||||
expected = ser.ffill()
|
||||
msg = (
|
||||
"Series.replace without 'value' and with non-dict-like "
|
||||
"'to_replace' is deprecated"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.replace([np.nan])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = pd.Series([0, np.nan, 2, 3, 4])
|
||||
expected = ser.ffill()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.replace(np.nan)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_datetime64(self):
|
||||
# GH 5797
|
||||
ser = pd.Series(pd.date_range("20130101", periods=5))
|
||||
expected = ser.copy()
|
||||
expected.loc[2] = pd.Timestamp("20120101")
|
||||
result = ser.replace({pd.Timestamp("20130103"): pd.Timestamp("20120101")})
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = ser.replace(pd.Timestamp("20130103"), pd.Timestamp("20120101"))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_nat_with_tz(self):
|
||||
# GH 11792: Test with replacing NaT in a list with tz data
|
||||
ts = pd.Timestamp("2015/01/01", tz="UTC")
|
||||
s = pd.Series([pd.NaT, pd.Timestamp("2015/01/01", tz="UTC")])
|
||||
result = s.replace([np.nan, pd.NaT], pd.Timestamp.min)
|
||||
expected = pd.Series([pd.Timestamp.min, ts], dtype=object)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_timedelta_td64(self):
|
||||
tdi = pd.timedelta_range(0, periods=5)
|
||||
ser = pd.Series(tdi)
|
||||
|
||||
# Using a single dict argument means we go through replace_list
|
||||
result = ser.replace({ser[1]: ser[3]})
|
||||
|
||||
expected = pd.Series([ser[0], ser[3], ser[2], ser[3], ser[4]])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_with_single_list(self):
|
||||
ser = pd.Series([0, 1, 2, 3, 4])
|
||||
msg2 = (
|
||||
"Series.replace without 'value' and with non-dict-like "
|
||||
"'to_replace' is deprecated"
|
||||
)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
result = ser.replace([1, 2, 3])
|
||||
tm.assert_series_equal(result, pd.Series([0, 0, 0, 0, 4]))
|
||||
|
||||
s = ser.copy()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
return_value = s.replace([1, 2, 3], inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(s, pd.Series([0, 0, 0, 0, 4]))
|
||||
|
||||
# make sure things don't get corrupted when fillna call fails
|
||||
s = ser.copy()
|
||||
msg = (
|
||||
r"Invalid fill method\. Expecting pad \(ffill\) or backfill "
|
||||
r"\(bfill\)\. Got crash_cymbal"
|
||||
)
|
||||
msg3 = "The 'method' keyword in Series.replace is deprecated"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg3):
|
||||
return_value = s.replace([1, 2, 3], inplace=True, method="crash_cymbal")
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(s, ser)
|
||||
|
||||
def test_replace_mixed_types(self):
|
||||
ser = pd.Series(np.arange(5), dtype="int64")
|
||||
|
||||
def check_replace(to_rep, val, expected):
|
||||
sc = ser.copy()
|
||||
result = ser.replace(to_rep, val)
|
||||
return_value = sc.replace(to_rep, val, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(expected, result)
|
||||
tm.assert_series_equal(expected, sc)
|
||||
|
||||
# 3.0 can still be held in our int64 series, so we do not upcast GH#44940
|
||||
tr, v = [3], [3.0]
|
||||
check_replace(tr, v, ser)
|
||||
# Note this matches what we get with the scalars 3 and 3.0
|
||||
check_replace(tr[0], v[0], ser)
|
||||
|
||||
# MUST upcast to float
|
||||
e = pd.Series([0, 1, 2, 3.5, 4])
|
||||
tr, v = [3], [3.5]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, "a"])
|
||||
tr, v = [3, 4], [3.5, "a"]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# again casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, pd.Timestamp("20130101")])
|
||||
tr, v = [3, 4], [3.5, pd.Timestamp("20130101")]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# casts to object
|
||||
e = pd.Series([0, 1, 2, 3.5, True], dtype="object")
|
||||
tr, v = [3, 4], [3.5, True]
|
||||
check_replace(tr, v, e)
|
||||
|
||||
# test an object with dates + floats + integers + strings
|
||||
dr = pd.Series(pd.date_range("1/1/2001", "1/10/2001", freq="D"))
|
||||
result = dr.astype(object).replace([dr[0], dr[1], dr[2]], [1.0, 2, "a"])
|
||||
expected = pd.Series([1.0, 2, "a"] + dr[3:].tolist(), dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_bool_with_string_no_op(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace("fun", "in-the-sun")
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
def test_replace_bool_with_string(self):
|
||||
# nonexistent elements
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace(True, "2u")
|
||||
expected = pd.Series(["2u", False, "2u"])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_bool_with_bool(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace(True, False)
|
||||
expected = pd.Series([False] * len(s))
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_with_dict_with_bool_keys(self):
|
||||
s = pd.Series([True, False, True])
|
||||
result = s.replace({"asdf": "asdb", True: "yes"})
|
||||
expected = pd.Series(["yes", False, "yes"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_Int_with_na(self, any_int_ea_dtype):
|
||||
# GH 38267
|
||||
result = pd.Series([0, None], dtype=any_int_ea_dtype).replace(0, pd.NA)
|
||||
expected = pd.Series([pd.NA, pd.NA], dtype=any_int_ea_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
result = pd.Series([0, 1], dtype=any_int_ea_dtype).replace(0, pd.NA)
|
||||
result.replace(1, pd.NA, inplace=True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace2(self):
|
||||
N = 50
|
||||
ser = pd.Series(
|
||||
np.fabs(np.random.default_rng(2).standard_normal(N)),
|
||||
pd.date_range("2020-01-01", periods=N),
|
||||
dtype=object,
|
||||
)
|
||||
ser[:5] = np.nan
|
||||
ser[6:10] = "foo"
|
||||
ser[20:30] = "bar"
|
||||
|
||||
# replace list with a single value
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.replace([np.nan, "foo", "bar"], -1)
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -1).all()
|
||||
assert (rs[20:30] == -1).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs = ser.replace({np.nan: -1, "foo": -2, "bar": -3})
|
||||
|
||||
assert (rs[:5] == -1).all()
|
||||
assert (rs[6:10] == -2).all()
|
||||
assert (rs[20:30] == -3).all()
|
||||
assert (pd.isna(ser[:5])).all()
|
||||
|
||||
# replace with different values with 2 lists
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
rs2 = ser.replace([np.nan, "foo", "bar"], [-1, -2, -3])
|
||||
tm.assert_series_equal(rs, rs2)
|
||||
|
||||
# replace inplace
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
return_value = ser.replace([np.nan, "foo", "bar"], -1, inplace=True)
|
||||
assert return_value is None
|
||||
assert (ser[:5] == -1).all()
|
||||
assert (ser[6:10] == -1).all()
|
||||
assert (ser[20:30] == -1).all()
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_replace_cascade(self, inplace):
|
||||
# Test that replaced values are not replaced again
|
||||
# GH #50778
|
||||
ser = pd.Series([1, 2, 3])
|
||||
expected = pd.Series([2, 3, 4])
|
||||
|
||||
res = ser.replace([1, 2, 3], [2, 3, 4], inplace=inplace)
|
||||
if inplace:
|
||||
tm.assert_series_equal(ser, expected)
|
||||
else:
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
def test_replace_with_dictlike_and_string_dtype(self, nullable_string_dtype):
|
||||
# GH 32621, GH#44940
|
||||
ser = pd.Series(["one", "two", np.nan], dtype=nullable_string_dtype)
|
||||
expected = pd.Series(["1", "2", np.nan], dtype=nullable_string_dtype)
|
||||
result = ser.replace({"one": "1", "two": "2"})
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_with_empty_dictlike(self):
|
||||
# GH 15289
|
||||
s = pd.Series(list("abcd"))
|
||||
tm.assert_series_equal(s, s.replace({}))
|
||||
|
||||
empty_series = pd.Series([])
|
||||
tm.assert_series_equal(s, s.replace(empty_series))
|
||||
|
||||
def test_replace_string_with_number(self):
|
||||
# GH 15743
|
||||
s = pd.Series([1, 2, 3])
|
||||
result = s.replace("2", np.nan)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_replacer_equals_replacement(self):
|
||||
# GH 20656
|
||||
# make sure all replacers are matching against original values
|
||||
s = pd.Series(["a", "b"])
|
||||
expected = pd.Series(["b", "a"])
|
||||
result = s.replace({"a": "b", "b": "a"})
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_unicode_with_number(self):
|
||||
# GH 15743
|
||||
s = pd.Series([1, 2, 3])
|
||||
result = s.replace("2", np.nan)
|
||||
expected = pd.Series([1, 2, 3])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
def test_replace_mixed_types_with_string(self):
|
||||
# Testing mixed
|
||||
s = pd.Series([1, 2, 3, "4", 4, 5])
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = s.replace([2, "4"], np.nan)
|
||||
expected = pd.Series([1, np.nan, 3, np.nan, 4, 5])
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"categorical, numeric",
|
||||
[
|
||||
(pd.Categorical(["A"], categories=["A", "B"]), [1]),
|
||||
(pd.Categorical(["A", "B"], categories=["A", "B"]), [1, 2]),
|
||||
],
|
||||
)
|
||||
def test_replace_categorical(self, categorical, numeric, using_infer_string):
|
||||
# GH 24971, GH#23305
|
||||
ser = pd.Series(categorical)
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
msg = "with CategoricalDtype is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = ser.replace({"A": 1, "B": 2})
|
||||
expected = pd.Series(numeric).astype("category")
|
||||
if 2 not in expected.cat.categories:
|
||||
# i.e. categories should be [1, 2] even if there are no "B"s present
|
||||
# GH#44940
|
||||
expected = expected.cat.add_categories(2)
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, data_exp", [(["a", "b", "c"], ["b", "b", "c"]), (["a"], ["b"])]
|
||||
)
|
||||
def test_replace_categorical_inplace(self, data, data_exp):
|
||||
# GH 53358
|
||||
result = pd.Series(data, dtype="category")
|
||||
msg = "with CategoricalDtype is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result.replace(to_replace="a", value="b", inplace=True)
|
||||
expected = pd.Series(data_exp, dtype="category")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_categorical_single(self):
|
||||
# GH 26988
|
||||
dti = pd.date_range("2016-01-01", periods=3, tz="US/Pacific")
|
||||
s = pd.Series(dti)
|
||||
c = s.astype("category")
|
||||
|
||||
expected = c.copy()
|
||||
expected = expected.cat.add_categories("foo")
|
||||
expected[2] = "foo"
|
||||
expected = expected.cat.remove_unused_categories()
|
||||
assert c[2] != "foo"
|
||||
|
||||
msg = "with CategoricalDtype is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = c.replace(c[2], "foo")
|
||||
tm.assert_series_equal(expected, result)
|
||||
assert c[2] != "foo" # ensure non-inplace call does not alter original
|
||||
|
||||
msg = "with CategoricalDtype is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
return_value = c.replace(c[2], "foo", inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(expected, c)
|
||||
|
||||
first_value = c[0]
|
||||
msg = "with CategoricalDtype is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
return_value = c.replace(c[1], c[0], inplace=True)
|
||||
assert return_value is None
|
||||
assert c[0] == c[1] == first_value # test replacing with existing value
|
||||
|
||||
def test_replace_with_no_overflowerror(self):
|
||||
# GH 25616
|
||||
# casts to object without Exception from OverflowError
|
||||
s = pd.Series([0, 1, 2, 3, 4])
|
||||
result = s.replace([3], ["100000000000000000000"])
|
||||
expected = pd.Series([0, 1, 2, "100000000000000000000", 4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
s = pd.Series([0, "100000000000000000000", "100000000000000000001"])
|
||||
result = s.replace(["100000000000000000000"], [1])
|
||||
expected = pd.Series([0, 1, "100000000000000000001"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, to_replace, exp",
|
||||
[
|
||||
([1, 2, 3], {1: 2, 2: 3, 3: 4}, [2, 3, 4]),
|
||||
(["1", "2", "3"], {"1": "2", "2": "3", "3": "4"}, ["2", "3", "4"]),
|
||||
],
|
||||
)
|
||||
def test_replace_commutative(self, ser, to_replace, exp):
|
||||
# GH 16051
|
||||
# DataFrame.replace() overwrites when values are non-numeric
|
||||
|
||||
series = pd.Series(ser)
|
||||
|
||||
expected = pd.Series(exp)
|
||||
result = series.replace(to_replace)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, exp", [([1, 2, 3], [1, True, 3]), (["x", 2, 3], ["x", True, 3])]
|
||||
)
|
||||
def test_replace_no_cast(self, ser, exp):
|
||||
# GH 9113
|
||||
# BUG: replace int64 dtype with bool coerces to int64
|
||||
|
||||
series = pd.Series(ser)
|
||||
result = series.replace(2, True)
|
||||
expected = pd.Series(exp)
|
||||
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_invalid_to_replace(self):
|
||||
# GH 18634
|
||||
# API: replace() should raise an exception if invalid argument is given
|
||||
series = pd.Series(["a", "b", "c "])
|
||||
msg = (
|
||||
r"Expecting 'to_replace' to be either a scalar, array-like, "
|
||||
r"dict or None, got invalid type.*"
|
||||
)
|
||||
msg2 = (
|
||||
"Series.replace without 'value' and with non-dict-like "
|
||||
"'to_replace' is deprecated"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
series.replace(lambda x: x.strip())
|
||||
|
||||
@pytest.mark.parametrize("frame", [False, True])
|
||||
def test_replace_nonbool_regex(self, frame):
|
||||
obj = pd.Series(["a", "b", "c "])
|
||||
if frame:
|
||||
obj = obj.to_frame()
|
||||
|
||||
msg = "'to_replace' must be 'None' if 'regex' is not a bool"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
obj.replace(to_replace=["a"], regex="foo")
|
||||
|
||||
@pytest.mark.parametrize("frame", [False, True])
|
||||
def test_replace_empty_copy(self, frame):
|
||||
obj = pd.Series([], dtype=np.float64)
|
||||
if frame:
|
||||
obj = obj.to_frame()
|
||||
|
||||
res = obj.replace(4, 5, inplace=True)
|
||||
assert res is None
|
||||
|
||||
res = obj.replace(4, 5, inplace=False)
|
||||
tm.assert_equal(res, obj)
|
||||
assert res is not obj
|
||||
|
||||
def test_replace_only_one_dictlike_arg(self, fixed_now_ts):
|
||||
# GH#33340
|
||||
|
||||
ser = pd.Series([1, 2, "A", fixed_now_ts, True])
|
||||
to_replace = {0: 1, 2: "A"}
|
||||
value = "foo"
|
||||
msg = "Series.replace cannot use dict-like to_replace and non-None value"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.replace(to_replace, value)
|
||||
|
||||
to_replace = 1
|
||||
value = {0: "foo", 2: "bar"}
|
||||
msg = "Series.replace cannot use dict-value and non-None to_replace"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.replace(to_replace, value)
|
||||
|
||||
def test_replace_extension_other(self, frame_or_series):
|
||||
# https://github.com/pandas-dev/pandas/issues/34530
|
||||
obj = frame_or_series(pd.array([1, 2, 3], dtype="Int64"))
|
||||
result = obj.replace("", "") # no exception
|
||||
# should not have changed dtype
|
||||
tm.assert_equal(obj, result)
|
||||
|
||||
def _check_replace_with_method(self, ser: pd.Series):
|
||||
df = ser.to_frame()
|
||||
|
||||
msg1 = "The 'method' keyword in Series.replace is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg1):
|
||||
res = ser.replace(ser[1], method="pad")
|
||||
expected = pd.Series([ser[0], ser[0]] + list(ser[2:]), dtype=ser.dtype)
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
msg2 = "The 'method' keyword in DataFrame.replace is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
res_df = df.replace(ser[1], method="pad")
|
||||
tm.assert_frame_equal(res_df, expected.to_frame())
|
||||
|
||||
ser2 = ser.copy()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg1):
|
||||
res2 = ser2.replace(ser[1], method="pad", inplace=True)
|
||||
assert res2 is None
|
||||
tm.assert_series_equal(ser2, expected)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg2):
|
||||
res_df2 = df.replace(ser[1], method="pad", inplace=True)
|
||||
assert res_df2 is None
|
||||
tm.assert_frame_equal(df, expected.to_frame())
|
||||
|
||||
def test_replace_ea_dtype_with_method(self, any_numeric_ea_dtype):
|
||||
arr = pd.array([1, 2, pd.NA, 4], dtype=any_numeric_ea_dtype)
|
||||
ser = pd.Series(arr)
|
||||
|
||||
self._check_replace_with_method(ser)
|
||||
|
||||
@pytest.mark.parametrize("as_categorical", [True, False])
|
||||
def test_replace_interval_with_method(self, as_categorical):
|
||||
# in particular interval that can't hold NA
|
||||
|
||||
idx = pd.IntervalIndex.from_breaks(range(4))
|
||||
ser = pd.Series(idx)
|
||||
if as_categorical:
|
||||
ser = ser.astype("category")
|
||||
|
||||
self._check_replace_with_method(ser)
|
||||
|
||||
@pytest.mark.parametrize("as_period", [True, False])
|
||||
@pytest.mark.parametrize("as_categorical", [True, False])
|
||||
def test_replace_datetimelike_with_method(self, as_period, as_categorical):
|
||||
idx = pd.date_range("2016-01-01", periods=5, tz="US/Pacific")
|
||||
if as_period:
|
||||
idx = idx.tz_localize(None).to_period("D")
|
||||
|
||||
ser = pd.Series(idx)
|
||||
ser.iloc[-2] = pd.NaT
|
||||
if as_categorical:
|
||||
ser = ser.astype("category")
|
||||
|
||||
self._check_replace_with_method(ser)
|
||||
|
||||
def test_replace_with_compiled_regex(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/35680
|
||||
s = pd.Series(["a", "b", "c"])
|
||||
regex = re.compile("^a$")
|
||||
result = s.replace({regex: "z"}, regex=True)
|
||||
expected = pd.Series(["z", "b", "c"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_pandas_replace_na(self):
|
||||
# GH#43344
|
||||
# GH#56599
|
||||
ser = pd.Series(["AA", "BB", "CC", "DD", "EE", "", pd.NA, "AA"], dtype="string")
|
||||
regex_mapping = {
|
||||
"AA": "CC",
|
||||
"BB": "CC",
|
||||
"EE": "CC",
|
||||
"CC": "CC-REPL",
|
||||
}
|
||||
result = ser.replace(regex_mapping, regex=True)
|
||||
exp = pd.Series(
|
||||
["CC", "CC", "CC-REPL", "DD", "CC", "", pd.NA, "CC"], dtype="string"
|
||||
)
|
||||
tm.assert_series_equal(result, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, input_data, to_replace, expected_data",
|
||||
[
|
||||
("bool", [True, False], {True: False}, [False, False]),
|
||||
("int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
|
||||
("Int64", [1, 2], {1: 10, 2: 20}, [10, 20]),
|
||||
("float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
|
||||
("Float64", [1.1, 2.2], {1.1: 10.1, 2.2: 20.5}, [10.1, 20.5]),
|
||||
("string", ["one", "two"], {"one": "1", "two": "2"}, ["1", "2"]),
|
||||
(
|
||||
pd.IntervalDtype("int64"),
|
||||
IntervalArray([pd.Interval(1, 2), pd.Interval(2, 3)]),
|
||||
{pd.Interval(1, 2): pd.Interval(10, 20)},
|
||||
IntervalArray([pd.Interval(10, 20), pd.Interval(2, 3)]),
|
||||
),
|
||||
(
|
||||
pd.IntervalDtype("float64"),
|
||||
IntervalArray([pd.Interval(1.0, 2.7), pd.Interval(2.8, 3.1)]),
|
||||
{pd.Interval(1.0, 2.7): pd.Interval(10.6, 20.8)},
|
||||
IntervalArray([pd.Interval(10.6, 20.8), pd.Interval(2.8, 3.1)]),
|
||||
),
|
||||
(
|
||||
pd.PeriodDtype("M"),
|
||||
[pd.Period("2020-05", freq="M")],
|
||||
{pd.Period("2020-05", freq="M"): pd.Period("2020-06", freq="M")},
|
||||
[pd.Period("2020-06", freq="M")],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_replace_dtype(self, dtype, input_data, to_replace, expected_data):
|
||||
# GH#33484
|
||||
ser = pd.Series(input_data, dtype=dtype)
|
||||
result = ser.replace(to_replace)
|
||||
expected = pd.Series(expected_data, dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_string_dtype(self):
|
||||
# GH#40732, GH#44940
|
||||
ser = pd.Series(["one", "two", np.nan], dtype="string")
|
||||
res = ser.replace({"one": "1", "two": "2"})
|
||||
expected = pd.Series(["1", "2", np.nan], dtype="string")
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
# GH#31644
|
||||
ser2 = pd.Series(["A", np.nan], dtype="string")
|
||||
res2 = ser2.replace("A", "B")
|
||||
expected2 = pd.Series(["B", np.nan], dtype="string")
|
||||
tm.assert_series_equal(res2, expected2)
|
||||
|
||||
ser3 = pd.Series(["A", "B"], dtype="string")
|
||||
res3 = ser3.replace("A", pd.NA)
|
||||
expected3 = pd.Series([pd.NA, "B"], dtype="string")
|
||||
tm.assert_series_equal(res3, expected3)
|
||||
|
||||
def test_replace_string_dtype_list_to_replace(self):
|
||||
# GH#41215, GH#44940
|
||||
ser = pd.Series(["abc", "def"], dtype="string")
|
||||
res = ser.replace(["abc", "any other string"], "xyz")
|
||||
expected = pd.Series(["xyz", "def"], dtype="string")
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
def test_replace_string_dtype_regex(self):
|
||||
# GH#31644
|
||||
ser = pd.Series(["A", "B"], dtype="string")
|
||||
res = ser.replace(r".", "C", regex=True)
|
||||
expected = pd.Series(["C", "C"], dtype="string")
|
||||
tm.assert_series_equal(res, expected)
|
||||
|
||||
def test_replace_nullable_numeric(self):
|
||||
# GH#40732, GH#44940
|
||||
|
||||
floats = pd.Series([1.0, 2.0, 3.999, 4.4], dtype=pd.Float64Dtype())
|
||||
assert floats.replace({1.0: 9}).dtype == floats.dtype
|
||||
assert floats.replace(1.0, 9).dtype == floats.dtype
|
||||
assert floats.replace({1.0: 9.0}).dtype == floats.dtype
|
||||
assert floats.replace(1.0, 9.0).dtype == floats.dtype
|
||||
|
||||
res = floats.replace(to_replace=[1.0, 2.0], value=[9.0, 10.0])
|
||||
assert res.dtype == floats.dtype
|
||||
|
||||
ints = pd.Series([1, 2, 3, 4], dtype=pd.Int64Dtype())
|
||||
assert ints.replace({1: 9}).dtype == ints.dtype
|
||||
assert ints.replace(1, 9).dtype == ints.dtype
|
||||
assert ints.replace({1: 9.0}).dtype == ints.dtype
|
||||
assert ints.replace(1, 9.0).dtype == ints.dtype
|
||||
|
||||
# nullable (for now) raises instead of casting
|
||||
with pytest.raises(TypeError, match="Invalid value"):
|
||||
ints.replace({1: 9.5})
|
||||
with pytest.raises(TypeError, match="Invalid value"):
|
||||
ints.replace(1, 9.5)
|
||||
|
||||
@pytest.mark.parametrize("regex", [False, True])
|
||||
def test_replace_regex_dtype_series(self, regex):
|
||||
# GH-48644
|
||||
series = pd.Series(["0"], dtype=object)
|
||||
expected = pd.Series([1])
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = series.replace(to_replace="0", value=1, regex=regex)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("regex", [False, True])
|
||||
def test_replace_regex_dtype_series_string(self, regex):
|
||||
series = pd.Series(["0"], dtype="str")
|
||||
expected = pd.Series([1], dtype="int64")
|
||||
msg = "Downcasting behavior in `replace`"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = series.replace(to_replace="0", value=1, regex=regex)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_different_int_types(self, any_int_numpy_dtype):
|
||||
# GH#45311
|
||||
labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
|
||||
|
||||
maps = pd.Series([0, 2, 1], dtype=any_int_numpy_dtype)
|
||||
map_dict = dict(zip(maps.values, maps.index))
|
||||
|
||||
result = labs.replace(map_dict)
|
||||
expected = labs.replace({0: 0, 2: 1, 1: 2})
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("val", [2, np.nan, 2.0])
|
||||
def test_replace_value_none_dtype_numeric(self, val):
|
||||
# GH#48231
|
||||
ser = pd.Series([1, val])
|
||||
result = ser.replace(val, None)
|
||||
expected = pd.Series([1, None], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_replace_change_dtype_series(self):
|
||||
# GH#25797
|
||||
df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
|
||||
df["Test"] = df["Test"].replace([True], [np.nan])
|
||||
expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
|
||||
df["Test"] = df["Test"].replace([None], [np.nan])
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
|
||||
df["Test"] = df["Test"].fillna(np.nan)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["object", "Int64"])
|
||||
def test_replace_na_in_obj_column(self, dtype):
|
||||
# GH#47480
|
||||
ser = pd.Series([0, 1, pd.NA], dtype=dtype)
|
||||
expected = pd.Series([0, 2, pd.NA], dtype=dtype)
|
||||
result = ser.replace(to_replace=1, value=2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser.replace(to_replace=1, value=2, inplace=True)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
@pytest.mark.parametrize("val", [0, 0.5])
|
||||
def test_replace_numeric_column_with_na(self, val):
|
||||
# GH#50758
|
||||
ser = pd.Series([val, 1])
|
||||
expected = pd.Series([val, pd.NA])
|
||||
result = ser.replace(to_replace=1, value=pd.NA)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser.replace(to_replace=1, value=pd.NA, inplace=True)
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_replace_ea_float_with_bool(self):
|
||||
# GH#55398
|
||||
ser = pd.Series([0.0], dtype="Float64")
|
||||
expected = ser.copy()
|
||||
result = ser.replace(False, 1.0)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
ser = pd.Series([False], dtype="boolean")
|
||||
expected = ser.copy()
|
||||
result = ser.replace(0.0, True)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,225 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
option_context,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestResetIndex:
|
||||
def test_reset_index_dti_round_trip(self):
|
||||
dti = date_range(start="1/1/2001", end="6/1/2001", freq="D")._with_freq(None)
|
||||
d1 = DataFrame({"v": np.random.default_rng(2).random(len(dti))}, index=dti)
|
||||
d2 = d1.reset_index()
|
||||
assert d2.dtypes.iloc[0] == np.dtype("M8[ns]")
|
||||
d3 = d2.set_index("index")
|
||||
tm.assert_frame_equal(d1, d3, check_names=False)
|
||||
|
||||
# GH#2329
|
||||
stamp = datetime(2012, 11, 22)
|
||||
df = DataFrame([[stamp, 12.1]], columns=["Date", "Value"])
|
||||
df = df.set_index("Date")
|
||||
|
||||
assert df.index[0] == stamp
|
||||
assert df.reset_index()["Date"].iloc[0] == stamp
|
||||
|
||||
def test_reset_index(self):
|
||||
df = DataFrame(
|
||||
1.1 * np.arange(120).reshape((30, 4)),
|
||||
columns=Index(list("ABCD"), dtype=object),
|
||||
index=Index([f"i-{i}" for i in range(30)], dtype=object),
|
||||
)[:5]
|
||||
ser = df.stack(future_stack=True)
|
||||
ser.index.names = ["hash", "category"]
|
||||
|
||||
ser.name = "value"
|
||||
df = ser.reset_index()
|
||||
assert "value" in df
|
||||
|
||||
df = ser.reset_index(name="value2")
|
||||
assert "value2" in df
|
||||
|
||||
# check inplace
|
||||
s = ser.reset_index(drop=True)
|
||||
s2 = ser
|
||||
return_value = s2.reset_index(drop=True, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(s, s2)
|
||||
|
||||
# level
|
||||
index = MultiIndex(
|
||||
levels=[["bar"], ["one", "two", "three"], [0, 1]],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
|
||||
)
|
||||
s = Series(np.random.default_rng(2).standard_normal(6), index=index)
|
||||
rs = s.reset_index(level=1)
|
||||
assert len(rs.columns) == 2
|
||||
|
||||
rs = s.reset_index(level=[0, 2], drop=True)
|
||||
tm.assert_index_equal(rs.index, Index(index.get_level_values(1)))
|
||||
assert isinstance(rs, Series)
|
||||
|
||||
def test_reset_index_name(self):
|
||||
s = Series([1, 2, 3], index=Index(range(3), name="x"))
|
||||
assert s.reset_index().index.name is None
|
||||
assert s.reset_index(drop=True).index.name is None
|
||||
|
||||
def test_reset_index_level(self):
|
||||
df = DataFrame([[1, 2, 3], [4, 5, 6]], columns=["A", "B", "C"])
|
||||
|
||||
for levels in ["A", "B"], [0, 1]:
|
||||
# With MultiIndex
|
||||
s = df.set_index(["A", "B"])["C"]
|
||||
|
||||
result = s.reset_index(level=levels[0])
|
||||
tm.assert_frame_equal(result, df.set_index("B"))
|
||||
|
||||
result = s.reset_index(level=levels[:1])
|
||||
tm.assert_frame_equal(result, df.set_index("B"))
|
||||
|
||||
result = s.reset_index(level=levels)
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.set_index(["A", "B"]).reset_index(level=levels, drop=True)
|
||||
tm.assert_frame_equal(result, df[["C"]])
|
||||
|
||||
with pytest.raises(KeyError, match="Level E "):
|
||||
s.reset_index(level=["A", "E"])
|
||||
|
||||
# With single-level Index
|
||||
s = df.set_index("A")["B"]
|
||||
|
||||
result = s.reset_index(level=levels[0])
|
||||
tm.assert_frame_equal(result, df[["A", "B"]])
|
||||
|
||||
result = s.reset_index(level=levels[:1])
|
||||
tm.assert_frame_equal(result, df[["A", "B"]])
|
||||
|
||||
result = s.reset_index(level=levels[0], drop=True)
|
||||
tm.assert_series_equal(result, df["B"])
|
||||
|
||||
with pytest.raises(IndexError, match="Too many levels"):
|
||||
s.reset_index(level=[0, 1, 2])
|
||||
|
||||
# Check that .reset_index([],drop=True) doesn't fail
|
||||
result = Series(range(4)).reset_index([], drop=True)
|
||||
expected = Series(range(4))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_reset_index_range(self):
|
||||
# GH 12071
|
||||
s = Series(range(2), name="A", dtype="int64")
|
||||
series_result = s.reset_index()
|
||||
assert isinstance(series_result.index, RangeIndex)
|
||||
series_expected = DataFrame(
|
||||
[[0, 0], [1, 1]], columns=["index", "A"], index=RangeIndex(stop=2)
|
||||
)
|
||||
tm.assert_frame_equal(series_result, series_expected)
|
||||
|
||||
def test_reset_index_drop_errors(self):
|
||||
# GH 20925
|
||||
|
||||
# KeyError raised for series index when passed level name is missing
|
||||
s = Series(range(4))
|
||||
with pytest.raises(KeyError, match="does not match index name"):
|
||||
s.reset_index("wrong", drop=True)
|
||||
with pytest.raises(KeyError, match="does not match index name"):
|
||||
s.reset_index("wrong")
|
||||
|
||||
# KeyError raised for series when level to be dropped is missing
|
||||
s = Series(range(4), index=MultiIndex.from_product([[1, 2]] * 2))
|
||||
with pytest.raises(KeyError, match="not found"):
|
||||
s.reset_index("wrong", drop=True)
|
||||
|
||||
def test_reset_index_with_drop(self):
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
data = np.random.default_rng(2).standard_normal(8)
|
||||
ser = Series(data, index=index)
|
||||
ser.iloc[3] = np.nan
|
||||
|
||||
deleveled = ser.reset_index()
|
||||
assert isinstance(deleveled, DataFrame)
|
||||
assert len(deleveled.columns) == len(ser.index.levels) + 1
|
||||
assert deleveled.index.name == ser.index.name
|
||||
|
||||
deleveled = ser.reset_index(drop=True)
|
||||
assert isinstance(deleveled, Series)
|
||||
assert deleveled.index.name == ser.index.name
|
||||
|
||||
def test_reset_index_inplace_and_drop_ignore_name(self):
|
||||
# GH#44575
|
||||
ser = Series(range(2), name="old")
|
||||
ser.reset_index(name="new", drop=True, inplace=True)
|
||||
expected = Series(range(2), name="old")
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_reset_index_drop_infer_string(self):
|
||||
# GH#56160
|
||||
pytest.importorskip("pyarrow")
|
||||
ser = Series(["a", "b", "c"], dtype=object)
|
||||
with option_context("future.infer_string", True):
|
||||
result = ser.reset_index(drop=True)
|
||||
tm.assert_series_equal(result, ser)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"array, dtype",
|
||||
[
|
||||
(["a", "b"], object),
|
||||
(
|
||||
pd.period_range("12-1-2000", periods=2, freq="Q-DEC"),
|
||||
pd.PeriodDtype(freq="Q-DEC"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_reset_index_dtypes_on_empty_series_with_multiindex(
|
||||
array, dtype, using_infer_string
|
||||
):
|
||||
# GH 19602 - Preserve dtype on empty Series with MultiIndex
|
||||
idx = MultiIndex.from_product([[0, 1], [0.5, 1.0], array])
|
||||
result = Series(dtype=object, index=idx)[:0].reset_index().dtypes
|
||||
exp = "str" if using_infer_string else object
|
||||
expected = Series(
|
||||
{
|
||||
"level_0": np.int64,
|
||||
"level_1": np.float64,
|
||||
"level_2": exp if dtype == object else dtype,
|
||||
0: object,
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names, expected_names",
|
||||
[
|
||||
(["A", "A"], ["A", "A"]),
|
||||
(["level_1", None], ["level_1", "level_1"]),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("allow_duplicates", [False, True])
|
||||
def test_column_name_duplicates(names, expected_names, allow_duplicates):
|
||||
# GH#44755 reset_index with duplicate column labels
|
||||
s = Series([1], index=MultiIndex.from_arrays([[1], [1]], names=names))
|
||||
if allow_duplicates:
|
||||
result = s.reset_index(allow_duplicates=True)
|
||||
expected = DataFrame([[1, 1, 1]], columns=expected_names + [0])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
else:
|
||||
with pytest.raises(ValueError, match="cannot insert"):
|
||||
s.reset_index()
|
||||
@ -0,0 +1,81 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesRound:
|
||||
def test_round(self, datetime_series):
|
||||
datetime_series.index.name = "index_name"
|
||||
result = datetime_series.round(2)
|
||||
expected = Series(
|
||||
np.round(datetime_series.values, 2), index=datetime_series.index, name="ts"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
def test_round_numpy(self, any_float_dtype):
|
||||
# See GH#12600
|
||||
ser = Series([1.53, 1.36, 0.06], dtype=any_float_dtype)
|
||||
out = np.round(ser, decimals=0)
|
||||
expected = Series([2.0, 1.0, 0.0], dtype=any_float_dtype)
|
||||
tm.assert_series_equal(out, expected)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.round(ser, decimals=0, out=ser)
|
||||
|
||||
def test_round_numpy_with_nan(self, any_float_dtype):
|
||||
# See GH#14197
|
||||
ser = Series([1.53, np.nan, 0.06], dtype=any_float_dtype)
|
||||
with tm.assert_produces_warning(None):
|
||||
result = ser.round()
|
||||
expected = Series([2.0, np.nan, 0.0], dtype=any_float_dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_round_builtin(self, any_float_dtype):
|
||||
ser = Series(
|
||||
[1.123, 2.123, 3.123],
|
||||
index=range(3),
|
||||
dtype=any_float_dtype,
|
||||
)
|
||||
result = round(ser)
|
||||
expected_rounded0 = Series(
|
||||
[1.0, 2.0, 3.0], index=range(3), dtype=any_float_dtype
|
||||
)
|
||||
tm.assert_series_equal(result, expected_rounded0)
|
||||
|
||||
decimals = 2
|
||||
expected_rounded = Series(
|
||||
[1.12, 2.12, 3.12], index=range(3), dtype=any_float_dtype
|
||||
)
|
||||
result = round(ser, decimals)
|
||||
tm.assert_series_equal(result, expected_rounded)
|
||||
|
||||
@pytest.mark.parametrize("method", ["round", "floor", "ceil"])
|
||||
@pytest.mark.parametrize("freq", ["s", "5s", "min", "5min", "h", "5h"])
|
||||
def test_round_nat(self, method, freq, unit):
|
||||
# GH14940, GH#56158
|
||||
ser = Series([pd.NaT], dtype=f"M8[{unit}]")
|
||||
expected = Series(pd.NaT, dtype=f"M8[{unit}]")
|
||||
round_method = getattr(ser.dt, method)
|
||||
result = round_method(freq)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_round_ea_boolean(self):
|
||||
# GH#55936
|
||||
ser = Series([True, False], dtype="boolean")
|
||||
expected = ser.copy()
|
||||
result = ser.round(2)
|
||||
tm.assert_series_equal(result, expected)
|
||||
result.iloc[0] = False
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
def test_round_dtype_object(self):
|
||||
# GH#61206
|
||||
ser = Series([0.2], dtype="object")
|
||||
msg = "Expected numeric dtype, got object instead."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ser.round()
|
||||
@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.api.types import is_scalar
|
||||
|
||||
|
||||
class TestSeriesSearchSorted:
|
||||
def test_searchsorted(self):
|
||||
ser = Series([1, 2, 3])
|
||||
|
||||
result = ser.searchsorted(1, side="left")
|
||||
assert is_scalar(result)
|
||||
assert result == 0
|
||||
|
||||
result = ser.searchsorted(1, side="right")
|
||||
assert is_scalar(result)
|
||||
assert result == 1
|
||||
|
||||
def test_searchsorted_numeric_dtypes_scalar(self):
|
||||
ser = Series([1, 2, 90, 1000, 3e9])
|
||||
res = ser.searchsorted(30)
|
||||
assert is_scalar(res)
|
||||
assert res == 2
|
||||
|
||||
res = ser.searchsorted([30])
|
||||
exp = np.array([2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_searchsorted_numeric_dtypes_vector(self):
|
||||
ser = Series([1, 2, 90, 1000, 3e9])
|
||||
res = ser.searchsorted([91, 2e6])
|
||||
exp = np.array([3, 4], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_searchsorted_datetime64_scalar(self):
|
||||
ser = Series(date_range("20120101", periods=10, freq="2D"))
|
||||
val = Timestamp("20120102")
|
||||
res = ser.searchsorted(val)
|
||||
assert is_scalar(res)
|
||||
assert res == 1
|
||||
|
||||
def test_searchsorted_datetime64_scalar_mixed_timezones(self):
|
||||
# GH 30086
|
||||
ser = Series(date_range("20120101", periods=10, freq="2D", tz="UTC"))
|
||||
val = Timestamp("20120102", tz="America/New_York")
|
||||
res = ser.searchsorted(val)
|
||||
assert is_scalar(res)
|
||||
assert res == 1
|
||||
|
||||
def test_searchsorted_datetime64_list(self):
|
||||
ser = Series(date_range("20120101", periods=10, freq="2D"))
|
||||
vals = [Timestamp("20120102"), Timestamp("20120104")]
|
||||
res = ser.searchsorted(vals)
|
||||
exp = np.array([1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_searchsorted_sorter(self):
|
||||
# GH8490
|
||||
ser = Series([3, 1, 2])
|
||||
res = ser.searchsorted([0, 3], sorter=np.argsort(ser))
|
||||
exp = np.array([0, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_searchsorted_dataframe_fail(self):
|
||||
# GH#49620
|
||||
ser = Series([1, 2, 3, 4, 5])
|
||||
vals = pd.DataFrame([[1, 2], [3, 4]])
|
||||
msg = "Value must be 1-D array-like or scalar, DataFrame is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.searchsorted(vals)
|
||||
@ -0,0 +1,21 @@
|
||||
from datetime import datetime
|
||||
|
||||
from pandas import Series
|
||||
|
||||
|
||||
class TestSetName:
|
||||
def test_set_name(self):
|
||||
ser = Series([1, 2, 3])
|
||||
ser2 = ser._set_name("foo")
|
||||
assert ser2.name == "foo"
|
||||
assert ser.name is None
|
||||
assert ser is not ser2
|
||||
|
||||
def test_set_name_attribute(self):
|
||||
ser = Series([1, 2, 3])
|
||||
ser2 = Series([1, 2, 3], name="bar")
|
||||
for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]:
|
||||
ser.name = name
|
||||
assert ser.name == name
|
||||
ser2.name = name
|
||||
assert ser2.name == name
|
||||
@ -0,0 +1,22 @@
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, index, expected",
|
||||
[
|
||||
([1, 2, 3], None, 3),
|
||||
({"a": 1, "b": 2, "c": 3}, None, 3),
|
||||
([1, 2, 3], ["x", "y", "z"], 3),
|
||||
([1, 2, 3, 4, 5], ["x", "y", "z", "w", "n"], 5),
|
||||
([1, 2, 3], None, 3),
|
||||
([1, 2, 3], ["x", "y", "z"], 3),
|
||||
([1, 2, 3, 4], ["x", "y", "z", "w"], 4),
|
||||
],
|
||||
)
|
||||
def test_series(data, index, expected):
|
||||
# GH#52897
|
||||
ser = Series(data, index=index)
|
||||
assert ser.size == expected
|
||||
assert isinstance(ser.size, int)
|
||||
@ -0,0 +1,337 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=["quicksort", "mergesort", "heapsort", "stable"])
|
||||
def sort_kind(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestSeriesSortIndex:
|
||||
def test_sort_index_name(self, datetime_series):
|
||||
result = datetime_series.sort_index(ascending=False)
|
||||
assert result.name == datetime_series.name
|
||||
|
||||
def test_sort_index(self, datetime_series):
|
||||
datetime_series.index = datetime_series.index._with_freq(None)
|
||||
|
||||
rindex = list(datetime_series.index)
|
||||
np.random.default_rng(2).shuffle(rindex)
|
||||
|
||||
random_order = datetime_series.reindex(rindex)
|
||||
sorted_series = random_order.sort_index()
|
||||
tm.assert_series_equal(sorted_series, datetime_series)
|
||||
|
||||
# descending
|
||||
sorted_series = random_order.sort_index(ascending=False)
|
||||
tm.assert_series_equal(
|
||||
sorted_series, datetime_series.reindex(datetime_series.index[::-1])
|
||||
)
|
||||
|
||||
# compat on level
|
||||
sorted_series = random_order.sort_index(level=0)
|
||||
tm.assert_series_equal(sorted_series, datetime_series)
|
||||
|
||||
# compat on axis
|
||||
sorted_series = random_order.sort_index(axis=0)
|
||||
tm.assert_series_equal(sorted_series, datetime_series)
|
||||
|
||||
msg = "No axis named 1 for object type Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
random_order.sort_values(axis=1)
|
||||
|
||||
sorted_series = random_order.sort_index(level=0, axis=0)
|
||||
tm.assert_series_equal(sorted_series, datetime_series)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
random_order.sort_index(level=0, axis=1)
|
||||
|
||||
def test_sort_index_inplace(self, datetime_series):
|
||||
datetime_series.index = datetime_series.index._with_freq(None)
|
||||
|
||||
# For GH#11402
|
||||
rindex = list(datetime_series.index)
|
||||
np.random.default_rng(2).shuffle(rindex)
|
||||
|
||||
# descending
|
||||
random_order = datetime_series.reindex(rindex)
|
||||
result = random_order.sort_index(ascending=False, inplace=True)
|
||||
|
||||
assert result is None
|
||||
expected = datetime_series.reindex(datetime_series.index[::-1])
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(random_order, expected)
|
||||
|
||||
# ascending
|
||||
random_order = datetime_series.reindex(rindex)
|
||||
result = random_order.sort_index(ascending=True, inplace=True)
|
||||
|
||||
assert result is None
|
||||
expected = datetime_series.copy()
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(random_order, expected)
|
||||
|
||||
def test_sort_index_level(self):
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
s = Series([1, 2], mi)
|
||||
backwards = s.iloc[[1, 0]]
|
||||
|
||||
res = s.sort_index(level="A")
|
||||
tm.assert_series_equal(backwards, res)
|
||||
|
||||
res = s.sort_index(level=["A", "B"])
|
||||
tm.assert_series_equal(backwards, res)
|
||||
|
||||
res = s.sort_index(level="A", sort_remaining=False)
|
||||
tm.assert_series_equal(s, res)
|
||||
|
||||
res = s.sort_index(level=["A", "B"], sort_remaining=False)
|
||||
tm.assert_series_equal(s, res)
|
||||
|
||||
@pytest.mark.parametrize("level", ["A", 0]) # GH#21052
|
||||
def test_sort_index_multiindex(self, level):
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
s = Series([1, 2], mi)
|
||||
backwards = s.iloc[[1, 0]]
|
||||
|
||||
# implicit sort_remaining=True
|
||||
res = s.sort_index(level=level)
|
||||
tm.assert_series_equal(backwards, res)
|
||||
|
||||
# GH#13496
|
||||
# sort has no effect without remaining lvls
|
||||
res = s.sort_index(level=level, sort_remaining=False)
|
||||
tm.assert_series_equal(s, res)
|
||||
|
||||
def test_sort_index_kind(self, sort_kind):
|
||||
# GH#14444 & GH#13589: Add support for sort algo choosing
|
||||
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
|
||||
expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object)
|
||||
|
||||
index_sorted_series = series.sort_index(kind=sort_kind)
|
||||
tm.assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
def test_sort_index_na_position(self):
|
||||
series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object)
|
||||
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object)
|
||||
|
||||
index_sorted_series = series.sort_index(na_position="first")
|
||||
tm.assert_series_equal(expected_series_first, index_sorted_series)
|
||||
|
||||
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object)
|
||||
|
||||
index_sorted_series = series.sort_index(na_position="last")
|
||||
tm.assert_series_equal(expected_series_last, index_sorted_series)
|
||||
|
||||
def test_sort_index_intervals(self):
|
||||
s = Series(
|
||||
[np.nan, 1, 2, 3], IntervalIndex.from_arrays([0, 1, 2, 3], [1, 2, 3, 4])
|
||||
)
|
||||
|
||||
result = s.sort_index()
|
||||
expected = s
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.sort_index(ascending=False)
|
||||
expected = Series(
|
||||
[3, 2, 1, np.nan], IntervalIndex.from_arrays([3, 2, 1, 0], [4, 3, 2, 1])
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"original_list, sorted_list, ascending, ignore_index, output_index",
|
||||
[
|
||||
([2, 3, 6, 1], [2, 3, 6, 1], True, True, [0, 1, 2, 3]),
|
||||
([2, 3, 6, 1], [2, 3, 6, 1], True, False, [0, 1, 2, 3]),
|
||||
([2, 3, 6, 1], [1, 6, 3, 2], False, True, [0, 1, 2, 3]),
|
||||
([2, 3, 6, 1], [1, 6, 3, 2], False, False, [3, 2, 1, 0]),
|
||||
],
|
||||
)
|
||||
def test_sort_index_ignore_index(
|
||||
self, inplace, original_list, sorted_list, ascending, ignore_index, output_index
|
||||
):
|
||||
# GH 30114
|
||||
ser = Series(original_list)
|
||||
expected = Series(sorted_list, index=output_index)
|
||||
kwargs = {
|
||||
"ascending": ascending,
|
||||
"ignore_index": ignore_index,
|
||||
"inplace": inplace,
|
||||
}
|
||||
|
||||
if inplace:
|
||||
result_ser = ser.copy()
|
||||
result_ser.sort_index(**kwargs)
|
||||
else:
|
||||
result_ser = ser.sort_index(**kwargs)
|
||||
|
||||
tm.assert_series_equal(result_ser, expected)
|
||||
tm.assert_series_equal(ser, Series(original_list))
|
||||
|
||||
def test_sort_index_ascending_list(self):
|
||||
# GH#16934
|
||||
|
||||
# Set up a Series with a three level MultiIndex
|
||||
arrays = [
|
||||
["bar", "bar", "baz", "baz", "foo", "foo", "qux", "qux"],
|
||||
["one", "two", "one", "two", "one", "two", "one", "two"],
|
||||
[4, 3, 2, 1, 4, 3, 2, 1],
|
||||
]
|
||||
tuples = zip(*arrays)
|
||||
mi = MultiIndex.from_tuples(tuples, names=["first", "second", "third"])
|
||||
ser = Series(range(8), index=mi)
|
||||
|
||||
# Sort with boolean ascending
|
||||
result = ser.sort_index(level=["third", "first"], ascending=False)
|
||||
expected = ser.iloc[[4, 0, 5, 1, 6, 2, 7, 3]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Sort with list of boolean ascending
|
||||
result = ser.sort_index(level=["third", "first"], ascending=[False, True])
|
||||
expected = ser.iloc[[0, 4, 1, 5, 2, 6, 3, 7]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ascending",
|
||||
[
|
||||
None,
|
||||
(True, None),
|
||||
(False, "True"),
|
||||
],
|
||||
)
|
||||
def test_sort_index_ascending_bad_value_raises(self, ascending):
|
||||
ser = Series(range(10), index=[0, 3, 2, 1, 4, 5, 7, 6, 8, 9])
|
||||
match = 'For argument "ascending" expected type bool'
|
||||
with pytest.raises(ValueError, match=match):
|
||||
ser.sort_index(ascending=ascending)
|
||||
|
||||
|
||||
class TestSeriesSortIndexKey:
|
||||
def test_sort_index_multiindex_key(self):
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
s = Series([1, 2], mi)
|
||||
backwards = s.iloc[[1, 0]]
|
||||
|
||||
result = s.sort_index(level="C", key=lambda x: -x)
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
result = s.sort_index(level="C", key=lambda x: x) # nothing happens
|
||||
tm.assert_series_equal(backwards, result)
|
||||
|
||||
def test_sort_index_multiindex_key_multi_level(self):
|
||||
mi = MultiIndex.from_tuples([[1, 1, 3], [1, 1, 1]], names=list("ABC"))
|
||||
s = Series([1, 2], mi)
|
||||
backwards = s.iloc[[1, 0]]
|
||||
|
||||
result = s.sort_index(level=["A", "C"], key=lambda x: -x)
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
result = s.sort_index(level=["A", "C"], key=lambda x: x) # nothing happens
|
||||
tm.assert_series_equal(backwards, result)
|
||||
|
||||
def test_sort_index_key(self):
|
||||
series = Series(np.arange(6, dtype="int64"), index=list("aaBBca"))
|
||||
|
||||
result = series.sort_index()
|
||||
expected = series.iloc[[2, 3, 0, 1, 5, 4]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.sort_index(key=lambda x: x.str.lower())
|
||||
expected = series.iloc[[0, 1, 5, 2, 3, 4]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.sort_index(key=lambda x: x.str.lower(), ascending=False)
|
||||
expected = series.iloc[[4, 2, 3, 0, 1, 5]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_sort_index_key_int(self):
|
||||
series = Series(np.arange(6, dtype="int64"), index=np.arange(6, dtype="int64"))
|
||||
|
||||
result = series.sort_index()
|
||||
tm.assert_series_equal(result, series)
|
||||
|
||||
result = series.sort_index(key=lambda x: -x)
|
||||
expected = series.sort_index(ascending=False)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.sort_index(key=lambda x: 2 * x)
|
||||
tm.assert_series_equal(result, series)
|
||||
|
||||
def test_sort_index_kind_key(self, sort_kind, sort_by_key):
|
||||
# GH #14444 & #13589: Add support for sort algo choosing
|
||||
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
|
||||
expected_series = Series(index=[1, 2, 3, 3, 4], dtype=object)
|
||||
|
||||
index_sorted_series = series.sort_index(kind=sort_kind, key=sort_by_key)
|
||||
tm.assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
def test_sort_index_kind_neg_key(self, sort_kind):
|
||||
# GH #14444 & #13589: Add support for sort algo choosing
|
||||
series = Series(index=[3, 2, 1, 4, 3], dtype=object)
|
||||
expected_series = Series(index=[4, 3, 3, 2, 1], dtype=object)
|
||||
|
||||
index_sorted_series = series.sort_index(kind=sort_kind, key=lambda x: -x)
|
||||
tm.assert_series_equal(expected_series, index_sorted_series)
|
||||
|
||||
def test_sort_index_na_position_key(self, sort_by_key):
|
||||
series = Series(index=[3, 2, 1, 4, 3, np.nan], dtype=object)
|
||||
expected_series_first = Series(index=[np.nan, 1, 2, 3, 3, 4], dtype=object)
|
||||
|
||||
index_sorted_series = series.sort_index(na_position="first", key=sort_by_key)
|
||||
tm.assert_series_equal(expected_series_first, index_sorted_series)
|
||||
|
||||
expected_series_last = Series(index=[1, 2, 3, 3, 4, np.nan], dtype=object)
|
||||
|
||||
index_sorted_series = series.sort_index(na_position="last", key=sort_by_key)
|
||||
tm.assert_series_equal(expected_series_last, index_sorted_series)
|
||||
|
||||
def test_changes_length_raises(self):
|
||||
s = Series([1, 2, 3])
|
||||
with pytest.raises(ValueError, match="change the shape"):
|
||||
s.sort_index(key=lambda x: x[:1])
|
||||
|
||||
def test_sort_values_key_type(self):
|
||||
s = Series([1, 2, 3], DatetimeIndex(["2008-10-24", "2008-11-23", "2007-12-22"]))
|
||||
|
||||
result = s.sort_index(key=lambda x: x.month)
|
||||
expected = s.iloc[[0, 1, 2]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.sort_index(key=lambda x: x.day)
|
||||
expected = s.iloc[[2, 1, 0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.sort_index(key=lambda x: x.year)
|
||||
expected = s.iloc[[2, 0, 1]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s.sort_index(key=lambda x: x.month_name())
|
||||
expected = s.iloc[[2, 1, 0]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ascending",
|
||||
[
|
||||
[True, False],
|
||||
[False, True],
|
||||
],
|
||||
)
|
||||
def test_sort_index_multi_already_monotonic(self, ascending):
|
||||
# GH 56049
|
||||
mi = MultiIndex.from_product([[1, 2], [3, 4]])
|
||||
ser = Series(range(len(mi)), index=mi)
|
||||
result = ser.sort_index(ascending=ascending)
|
||||
if ascending == [True, False]:
|
||||
expected = ser.take([1, 0, 3, 2])
|
||||
elif ascending == [False, True]:
|
||||
expected = ser.take([2, 3, 0, 1])
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,246 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
DataFrame,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesSortValues:
|
||||
def test_sort_values(self, datetime_series, using_copy_on_write):
|
||||
# check indexes are reordered corresponding with the values
|
||||
ser = Series([3, 2, 4, 1], ["A", "B", "C", "D"])
|
||||
expected = Series([1, 2, 3, 4], ["D", "B", "A", "C"])
|
||||
result = ser.sort_values()
|
||||
tm.assert_series_equal(expected, result)
|
||||
|
||||
ts = datetime_series.copy()
|
||||
ts[:5] = np.nan
|
||||
vals = ts.values
|
||||
|
||||
result = ts.sort_values()
|
||||
assert np.isnan(result[-5:]).all()
|
||||
tm.assert_numpy_array_equal(result[:-5].values, np.sort(vals[5:]))
|
||||
|
||||
# na_position
|
||||
result = ts.sort_values(na_position="first")
|
||||
assert np.isnan(result[:5]).all()
|
||||
tm.assert_numpy_array_equal(result[5:].values, np.sort(vals[5:]))
|
||||
|
||||
# something object-type
|
||||
ser = Series(["A", "B"], [1, 2])
|
||||
# no failure
|
||||
ser.sort_values()
|
||||
|
||||
# ascending=False
|
||||
ordered = ts.sort_values(ascending=False)
|
||||
expected = np.sort(ts.dropna().values)[::-1]
|
||||
tm.assert_almost_equal(expected, ordered.dropna().values)
|
||||
ordered = ts.sort_values(ascending=False, na_position="first")
|
||||
tm.assert_almost_equal(expected, ordered.dropna().values)
|
||||
|
||||
# ascending=[False] should behave the same as ascending=False
|
||||
ordered = ts.sort_values(ascending=[False])
|
||||
expected = ts.sort_values(ascending=False)
|
||||
tm.assert_series_equal(expected, ordered)
|
||||
ordered = ts.sort_values(ascending=[False], na_position="first")
|
||||
expected = ts.sort_values(ascending=False, na_position="first")
|
||||
tm.assert_series_equal(expected, ordered)
|
||||
|
||||
msg = 'For argument "ascending" expected type bool, received type NoneType.'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=None)
|
||||
msg = r"Length of ascending \(0\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[])
|
||||
msg = r"Length of ascending \(3\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[1, 2, 3])
|
||||
msg = r"Length of ascending \(2\) must be 1 for Series"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending=[False, False])
|
||||
msg = 'For argument "ascending" expected type bool, received type str.'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.sort_values(ascending="foobar")
|
||||
|
||||
# inplace=True
|
||||
ts = datetime_series.copy()
|
||||
return_value = ts.sort_values(ascending=False, inplace=True)
|
||||
assert return_value is None
|
||||
tm.assert_series_equal(ts, datetime_series.sort_values(ascending=False))
|
||||
tm.assert_index_equal(
|
||||
ts.index, datetime_series.sort_values(ascending=False).index
|
||||
)
|
||||
|
||||
# GH#5856/5853
|
||||
# Series.sort_values operating on a view
|
||||
df = DataFrame(np.random.default_rng(2).standard_normal((10, 4)))
|
||||
s = df.iloc[:, 0]
|
||||
|
||||
msg = (
|
||||
"This Series is a view of some other array, to sort in-place "
|
||||
"you must create a copy"
|
||||
)
|
||||
if using_copy_on_write:
|
||||
s.sort_values(inplace=True)
|
||||
tm.assert_series_equal(s, df.iloc[:, 0].sort_values())
|
||||
else:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
s.sort_values(inplace=True)
|
||||
|
||||
def test_sort_values_categorical(self):
|
||||
c = Categorical(["a", "b", "b", "a"], ordered=False)
|
||||
cat = Series(c.copy())
|
||||
|
||||
# sort in the categories order
|
||||
expected = Series(
|
||||
Categorical(["a", "a", "b", "b"], ordered=False), index=[0, 3, 1, 2]
|
||||
)
|
||||
result = cat.sort_values()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
cat = Series(Categorical(["a", "c", "b", "d"], ordered=True))
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
cat = Series(
|
||||
Categorical(
|
||||
["a", "c", "b", "d"], categories=["a", "b", "c", "d"], ordered=True
|
||||
)
|
||||
)
|
||||
res = cat.sort_values()
|
||||
exp = np.array(["a", "b", "c", "d"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
res = cat.sort_values(ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res.__array__(), exp)
|
||||
|
||||
raw_cat1 = Categorical(
|
||||
["a", "b", "c", "d"], categories=["a", "b", "c", "d"], ordered=False
|
||||
)
|
||||
raw_cat2 = Categorical(
|
||||
["a", "b", "c", "d"], categories=["d", "c", "b", "a"], ordered=True
|
||||
)
|
||||
s = ["a", "b", "c", "d"]
|
||||
df = DataFrame(
|
||||
{"unsort": raw_cat1, "sort": raw_cat2, "string": s, "values": [1, 2, 3, 4]}
|
||||
)
|
||||
|
||||
# Cats must be sorted in a dataframe
|
||||
res = df.sort_values(by=["string"], ascending=False)
|
||||
exp = np.array(["d", "c", "b", "a"], dtype=np.object_)
|
||||
tm.assert_numpy_array_equal(res["sort"].values.__array__(), exp)
|
||||
assert res["sort"].dtype == "category"
|
||||
|
||||
res = df.sort_values(by=["sort"], ascending=False)
|
||||
exp = df.sort_values(by=["string"], ascending=True)
|
||||
tm.assert_series_equal(res["values"], exp["values"])
|
||||
assert res["sort"].dtype == "category"
|
||||
assert res["unsort"].dtype == "category"
|
||||
|
||||
# unordered cat, but we allow this
|
||||
df.sort_values(by=["unsort"], ascending=False)
|
||||
|
||||
# multi-columns sort
|
||||
# GH#7848
|
||||
df = DataFrame(
|
||||
{"id": [6, 5, 4, 3, 2, 1], "raw_grade": ["a", "b", "b", "a", "a", "e"]}
|
||||
)
|
||||
df["grade"] = Categorical(df["raw_grade"], ordered=True)
|
||||
df["grade"] = df["grade"].cat.set_categories(["b", "e", "a"])
|
||||
|
||||
# sorts 'grade' according to the order of the categories
|
||||
result = df.sort_values(by=["grade"])
|
||||
expected = df.iloc[[1, 2, 5, 0, 3, 4]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# multi
|
||||
result = df.sort_values(by=["grade", "id"])
|
||||
expected = df.iloc[[2, 1, 5, 4, 3, 0]]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"original_list, sorted_list, ignore_index, output_index",
|
||||
[
|
||||
([2, 3, 6, 1], [6, 3, 2, 1], True, [0, 1, 2, 3]),
|
||||
([2, 3, 6, 1], [6, 3, 2, 1], False, [2, 1, 0, 3]),
|
||||
],
|
||||
)
|
||||
def test_sort_values_ignore_index(
|
||||
self, inplace, original_list, sorted_list, ignore_index, output_index
|
||||
):
|
||||
# GH 30114
|
||||
ser = Series(original_list)
|
||||
expected = Series(sorted_list, index=output_index)
|
||||
kwargs = {"ignore_index": ignore_index, "inplace": inplace}
|
||||
|
||||
if inplace:
|
||||
result_ser = ser.copy()
|
||||
result_ser.sort_values(ascending=False, **kwargs)
|
||||
else:
|
||||
result_ser = ser.sort_values(ascending=False, **kwargs)
|
||||
|
||||
tm.assert_series_equal(result_ser, expected)
|
||||
tm.assert_series_equal(ser, Series(original_list))
|
||||
|
||||
def test_mergesort_descending_stability(self):
|
||||
# GH 28697
|
||||
s = Series([1, 2, 1, 3], ["first", "b", "second", "c"])
|
||||
result = s.sort_values(ascending=False, kind="mergesort")
|
||||
expected = Series([3, 2, 1, 1], ["c", "b", "first", "second"])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_sort_values_validate_ascending_for_value_error(self):
|
||||
# GH41634
|
||||
ser = Series([23, 7, 21])
|
||||
|
||||
msg = 'For argument "ascending" expected type bool, received type str.'
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.sort_values(ascending="False")
|
||||
|
||||
@pytest.mark.parametrize("ascending", [False, 0, 1, True])
|
||||
def test_sort_values_validate_ascending_functional(self, ascending):
|
||||
# GH41634
|
||||
ser = Series([23, 7, 21])
|
||||
expected = np.sort(ser.values)
|
||||
|
||||
sorted_ser = ser.sort_values(ascending=ascending)
|
||||
if not ascending:
|
||||
expected = expected[::-1]
|
||||
|
||||
result = sorted_ser.values
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestSeriesSortingKey:
|
||||
def test_sort_values_key(self):
|
||||
series = Series(np.array(["Hello", "goodbye"]))
|
||||
|
||||
result = series.sort_values(axis=0)
|
||||
expected = series
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.sort_values(axis=0, key=lambda x: x.str.lower())
|
||||
expected = series[::-1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_sort_values_key_nan(self):
|
||||
series = Series(np.array([0, 5, np.nan, 3, 2, np.nan]))
|
||||
|
||||
result = series.sort_values(axis=0)
|
||||
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.sort_values(axis=0, key=lambda x: x + 5)
|
||||
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = series.sort_values(axis=0, key=lambda x: -x, ascending=False)
|
||||
expected = series.iloc[[0, 4, 3, 1, 2, 5]]
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,179 @@
|
||||
from datetime import datetime
|
||||
from io import StringIO
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.io.common import get_handle
|
||||
|
||||
|
||||
class TestSeriesToCSV:
|
||||
def read_csv(self, path, **kwargs):
|
||||
params = {"index_col": 0, "header": None}
|
||||
params.update(**kwargs)
|
||||
|
||||
header = params.get("header")
|
||||
out = pd.read_csv(path, **params).squeeze("columns")
|
||||
|
||||
if header is None:
|
||||
out.name = out.index.name = None
|
||||
|
||||
return out
|
||||
|
||||
def test_from_csv(self, datetime_series, string_series):
|
||||
# freq doesn't round-trip
|
||||
datetime_series.index = datetime_series.index._with_freq(None)
|
||||
|
||||
with tm.ensure_clean() as path:
|
||||
datetime_series.to_csv(path, header=False)
|
||||
ts = self.read_csv(path, parse_dates=True)
|
||||
tm.assert_series_equal(datetime_series, ts, check_names=False)
|
||||
|
||||
assert ts.name is None
|
||||
assert ts.index.name is None
|
||||
|
||||
# see gh-10483
|
||||
datetime_series.to_csv(path, header=True)
|
||||
ts_h = self.read_csv(path, header=0)
|
||||
assert ts_h.name == "ts"
|
||||
|
||||
string_series.to_csv(path, header=False)
|
||||
series = self.read_csv(path)
|
||||
tm.assert_series_equal(string_series, series, check_names=False)
|
||||
|
||||
assert series.name is None
|
||||
assert series.index.name is None
|
||||
|
||||
string_series.to_csv(path, header=True)
|
||||
series_h = self.read_csv(path, header=0)
|
||||
assert series_h.name == "series"
|
||||
|
||||
with open(path, "w", encoding="utf-8") as outfile:
|
||||
outfile.write("1998-01-01|1.0\n1999-01-01|2.0")
|
||||
|
||||
series = self.read_csv(path, sep="|", parse_dates=True)
|
||||
check_series = Series(
|
||||
{datetime(1998, 1, 1): 1.0, datetime(1999, 1, 1): 2.0}
|
||||
)
|
||||
tm.assert_series_equal(check_series, series)
|
||||
|
||||
series = self.read_csv(path, sep="|", parse_dates=False)
|
||||
check_series = Series({"1998-01-01": 1.0, "1999-01-01": 2.0})
|
||||
tm.assert_series_equal(check_series, series)
|
||||
|
||||
def test_to_csv(self, datetime_series):
|
||||
with tm.ensure_clean() as path:
|
||||
datetime_series.to_csv(path, header=False)
|
||||
|
||||
with open(path, newline=None, encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
assert lines[1] != "\n"
|
||||
|
||||
datetime_series.to_csv(path, index=False, header=False)
|
||||
arr = np.loadtxt(path)
|
||||
tm.assert_almost_equal(arr, datetime_series.values)
|
||||
|
||||
def test_to_csv_unicode_index(self):
|
||||
buf = StringIO()
|
||||
s = Series(["\u05d0", "d2"], index=["\u05d0", "\u05d1"])
|
||||
|
||||
s.to_csv(buf, encoding="UTF-8", header=False)
|
||||
buf.seek(0)
|
||||
|
||||
s2 = self.read_csv(buf, index_col=0, encoding="UTF-8")
|
||||
tm.assert_series_equal(s, s2)
|
||||
|
||||
def test_to_csv_float_format(self):
|
||||
with tm.ensure_clean() as filename:
|
||||
ser = Series([0.123456, 0.234567, 0.567567])
|
||||
ser.to_csv(filename, float_format="%.2f", header=False)
|
||||
|
||||
rs = self.read_csv(filename)
|
||||
xp = Series([0.12, 0.23, 0.57])
|
||||
tm.assert_series_equal(rs, xp)
|
||||
|
||||
def test_to_csv_list_entries(self):
|
||||
s = Series(["jack and jill", "jesse and frank"])
|
||||
|
||||
split = s.str.split(r"\s+and\s+")
|
||||
|
||||
buf = StringIO()
|
||||
split.to_csv(buf, header=False)
|
||||
|
||||
def test_to_csv_path_is_none(self):
|
||||
# GH 8215
|
||||
# Series.to_csv() was returning None, inconsistent with
|
||||
# DataFrame.to_csv() which returned string
|
||||
s = Series([1, 2, 3])
|
||||
csv_str = s.to_csv(path_or_buf=None, header=False)
|
||||
assert isinstance(csv_str, str)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"s,encoding",
|
||||
[
|
||||
(
|
||||
Series([0.123456, 0.234567, 0.567567], index=["A", "B", "C"], name="X"),
|
||||
None,
|
||||
),
|
||||
# GH 21241, 21118
|
||||
(Series(["abc", "def", "ghi"], name="X"), "ascii"),
|
||||
(Series(["123", "你好", "世界"], name="中文"), "gb2312"),
|
||||
(
|
||||
Series(["123", "Γειά σου", "Κόσμε"], name="Ελληνικά"), # noqa: RUF001
|
||||
"cp737",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_to_csv_compression(self, s, encoding, compression):
|
||||
with tm.ensure_clean() as filename:
|
||||
s.to_csv(filename, compression=compression, encoding=encoding, header=True)
|
||||
# test the round trip - to_csv -> read_csv
|
||||
result = pd.read_csv(
|
||||
filename,
|
||||
compression=compression,
|
||||
encoding=encoding,
|
||||
index_col=0,
|
||||
).squeeze("columns")
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
# test the round trip using file handle - to_csv -> read_csv
|
||||
with get_handle(
|
||||
filename, "w", compression=compression, encoding=encoding
|
||||
) as handles:
|
||||
s.to_csv(handles.handle, encoding=encoding, header=True)
|
||||
|
||||
result = pd.read_csv(
|
||||
filename,
|
||||
compression=compression,
|
||||
encoding=encoding,
|
||||
index_col=0,
|
||||
).squeeze("columns")
|
||||
tm.assert_series_equal(s, result)
|
||||
|
||||
# explicitly ensure file was compressed
|
||||
with tm.decompress_file(filename, compression) as fh:
|
||||
text = fh.read().decode(encoding or "utf8")
|
||||
assert s.name in text
|
||||
|
||||
with tm.decompress_file(filename, compression) as fh:
|
||||
tm.assert_series_equal(
|
||||
s,
|
||||
pd.read_csv(fh, index_col=0, encoding=encoding).squeeze("columns"),
|
||||
)
|
||||
|
||||
def test_to_csv_interval_index(self, using_infer_string):
|
||||
# GH 28210
|
||||
s = Series(["foo", "bar", "baz"], index=pd.interval_range(0, 3))
|
||||
|
||||
with tm.ensure_clean("__tmp_to_csv_interval_index__.csv") as path:
|
||||
s.to_csv(path, header=False)
|
||||
result = self.read_csv(path, index_col=0)
|
||||
|
||||
# can't roundtrip intervalindex via read_csv so check string repr (GH 23595)
|
||||
expected = s
|
||||
expected.index = expected.index.astype("str")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,38 @@
|
||||
import collections
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import Series
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesToDict:
|
||||
@pytest.mark.parametrize(
|
||||
"mapping", (dict, collections.defaultdict(list), collections.OrderedDict)
|
||||
)
|
||||
def test_to_dict(self, mapping, datetime_series):
|
||||
# GH#16122
|
||||
result = Series(datetime_series.to_dict(into=mapping), name="ts")
|
||||
expected = datetime_series.copy()
|
||||
expected.index = expected.index._with_freq(None)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
from_method = Series(datetime_series.to_dict(into=collections.Counter))
|
||||
from_constructor = Series(collections.Counter(datetime_series.items()))
|
||||
tm.assert_series_equal(from_method, from_constructor)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input",
|
||||
(
|
||||
{"a": np.int64(64), "b": 10},
|
||||
{"a": np.int64(64), "b": 10, "c": "ABC"},
|
||||
{"a": np.uint64(64), "b": 10, "c": "ABC"},
|
||||
),
|
||||
)
|
||||
def test_to_dict_return_types(self, input):
|
||||
# GH25969
|
||||
|
||||
d = Series(input).to_dict()
|
||||
assert isinstance(d["a"], int)
|
||||
assert isinstance(d["b"], int)
|
||||
@ -0,0 +1,63 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestToFrame:
|
||||
def test_to_frame_respects_name_none(self):
|
||||
# GH#44212 if we explicitly pass name=None, then that should be respected,
|
||||
# not changed to 0
|
||||
# GH-45448 this is first deprecated & enforced in 2.0
|
||||
ser = Series(range(3))
|
||||
result = ser.to_frame(None)
|
||||
|
||||
exp_index = Index([None], dtype=object)
|
||||
tm.assert_index_equal(result.columns, exp_index)
|
||||
|
||||
result = ser.rename("foo").to_frame(None)
|
||||
exp_index = Index([None], dtype=object)
|
||||
tm.assert_index_equal(result.columns, exp_index)
|
||||
|
||||
def test_to_frame(self, datetime_series):
|
||||
datetime_series.name = None
|
||||
rs = datetime_series.to_frame()
|
||||
xp = DataFrame(datetime_series.values, index=datetime_series.index)
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
datetime_series.name = "testname"
|
||||
rs = datetime_series.to_frame()
|
||||
xp = DataFrame(
|
||||
{"testname": datetime_series.values}, index=datetime_series.index
|
||||
)
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
rs = datetime_series.to_frame(name="testdifferent")
|
||||
xp = DataFrame(
|
||||
{"testdifferent": datetime_series.values}, index=datetime_series.index
|
||||
)
|
||||
tm.assert_frame_equal(rs, xp)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning"
|
||||
)
|
||||
def test_to_frame_expanddim(self):
|
||||
# GH#9762
|
||||
|
||||
class SubclassedSeries(Series):
|
||||
@property
|
||||
def _constructor_expanddim(self):
|
||||
return SubclassedFrame
|
||||
|
||||
class SubclassedFrame(DataFrame):
|
||||
pass
|
||||
|
||||
ser = SubclassedSeries([1, 2, 3], name="X")
|
||||
result = ser.to_frame()
|
||||
assert isinstance(result, SubclassedFrame)
|
||||
expected = SubclassedFrame({"X": [1, 2, 3]})
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@ -0,0 +1,49 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
Series,
|
||||
Timedelta,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", ["int64", "float64"])
|
||||
def test_to_numpy_na_value(dtype):
|
||||
# GH#48951
|
||||
ser = Series([1, 2, NA, 4])
|
||||
result = ser.to_numpy(dtype=dtype, na_value=0)
|
||||
expected = np.array([1, 2, 0, 4], dtype=dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_numpy_cast_before_setting_na():
|
||||
# GH#50600
|
||||
ser = Series([1])
|
||||
result = ser.to_numpy(dtype=np.float64, na_value=np.nan)
|
||||
expected = np.array([1.0])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@td.skip_if_no("pyarrow")
|
||||
def test_to_numpy_arrow_dtype_given():
|
||||
# GH#57121
|
||||
ser = Series([1, NA], dtype="int64[pyarrow]")
|
||||
result = ser.to_numpy(dtype="float64")
|
||||
expected = np.array([1.0, np.nan])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_astype_ea_int_to_td_ts():
|
||||
# GH#57093
|
||||
ser = Series([1, None], dtype="Int64")
|
||||
result = ser.astype("m8[ns]")
|
||||
expected = Series([1, Timedelta("nat")], dtype="m8[ns]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.astype("M8[ns]")
|
||||
expected = Series([1, Timedelta("nat")], dtype="M8[ns]")
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,36 @@
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
Interval,
|
||||
Period,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values, dtype, expected_dtype",
|
||||
(
|
||||
([1], "int64", int),
|
||||
([1], "Int64", int),
|
||||
([1.0], "float64", float),
|
||||
([1.0], "Float64", float),
|
||||
(["abc"], "object", str),
|
||||
(["abc"], "string", str),
|
||||
([Interval(1, 3)], "interval", Interval),
|
||||
([Period("2000-01-01", "D")], "period[D]", Period),
|
||||
([Timedelta(days=1)], "timedelta64[ns]", Timedelta),
|
||||
([Timestamp("2000-01-01")], "datetime64[ns]", Timestamp),
|
||||
pytest.param([1], "int64[pyarrow]", int, marks=td.skip_if_no("pyarrow")),
|
||||
pytest.param([1.0], "float64[pyarrow]", float, marks=td.skip_if_no("pyarrow")),
|
||||
pytest.param(["abc"], "string[pyarrow]", str, marks=td.skip_if_no("pyarrow")),
|
||||
),
|
||||
)
|
||||
def test_tolist_scalar_dtype(values, dtype, expected_dtype):
|
||||
# GH49890
|
||||
ser = Series(values, dtype=dtype)
|
||||
result_dtype = type(ser.tolist()[0])
|
||||
assert result_dtype == expected_dtype
|
||||
@ -0,0 +1,67 @@
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestTruncate:
|
||||
def test_truncate_datetimeindex_tz(self):
|
||||
# GH 9243
|
||||
idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
|
||||
s = Series(range(len(idx)), index=idx)
|
||||
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
|
||||
# GH#36148 as of 2.0 we require tzawareness compat
|
||||
s.truncate(datetime(2005, 4, 2), datetime(2005, 4, 4))
|
||||
|
||||
lb = idx[1]
|
||||
ub = idx[3]
|
||||
result = s.truncate(lb.to_pydatetime(), ub.to_pydatetime())
|
||||
expected = Series([1, 2, 3], index=idx[1:4])
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_truncate_periodindex(self):
|
||||
# GH 17717
|
||||
idx1 = pd.PeriodIndex(
|
||||
[pd.Period("2017-09-02"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
|
||||
)
|
||||
series1 = Series([1, 2, 3], index=idx1)
|
||||
result1 = series1.truncate(after="2017-09-02")
|
||||
|
||||
expected_idx1 = pd.PeriodIndex(
|
||||
[pd.Period("2017-09-02"), pd.Period("2017-09-02")]
|
||||
)
|
||||
tm.assert_series_equal(result1, Series([1, 2], index=expected_idx1))
|
||||
|
||||
idx2 = pd.PeriodIndex(
|
||||
[pd.Period("2017-09-03"), pd.Period("2017-09-02"), pd.Period("2017-09-03")]
|
||||
)
|
||||
series2 = Series([1, 2, 3], index=idx2)
|
||||
result2 = series2.sort_index().truncate(after="2017-09-02")
|
||||
|
||||
expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
|
||||
tm.assert_series_equal(result2, Series([2], index=expected_idx2))
|
||||
|
||||
def test_truncate_one_element_series(self):
|
||||
# GH 35544
|
||||
series = Series([0.1], index=pd.DatetimeIndex(["2020-08-04"]))
|
||||
before = pd.Timestamp("2020-08-02")
|
||||
after = pd.Timestamp("2020-08-04")
|
||||
|
||||
result = series.truncate(before=before, after=after)
|
||||
|
||||
# the input Series and the expected Series are the same
|
||||
tm.assert_series_equal(result, series)
|
||||
|
||||
def test_truncate_index_only_one_unique_value(self):
|
||||
# GH 42365
|
||||
obj = Series(0, index=date_range("2021-06-30", "2021-06-30")).repeat(5)
|
||||
|
||||
truncated = obj.truncate("2021-06-28", "2021-07-01")
|
||||
|
||||
tm.assert_series_equal(truncated, obj)
|
||||
@ -0,0 +1,123 @@
|
||||
from datetime import timezone
|
||||
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs import timezones
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestTZLocalize:
|
||||
def test_series_tz_localize_ambiguous_bool(self):
|
||||
# make sure that we are correctly accepting bool values as ambiguous
|
||||
|
||||
# GH#14402
|
||||
ts = Timestamp("2015-11-01 01:00:03")
|
||||
expected0 = Timestamp("2015-11-01 01:00:03-0500", tz="US/Central")
|
||||
expected1 = Timestamp("2015-11-01 01:00:03-0600", tz="US/Central")
|
||||
|
||||
ser = Series([ts])
|
||||
expected0 = Series([expected0])
|
||||
expected1 = Series([expected1])
|
||||
|
||||
with tm.external_error_raised(pytz.AmbiguousTimeError):
|
||||
ser.dt.tz_localize("US/Central")
|
||||
|
||||
result = ser.dt.tz_localize("US/Central", ambiguous=True)
|
||||
tm.assert_series_equal(result, expected0)
|
||||
|
||||
result = ser.dt.tz_localize("US/Central", ambiguous=[True])
|
||||
tm.assert_series_equal(result, expected0)
|
||||
|
||||
result = ser.dt.tz_localize("US/Central", ambiguous=False)
|
||||
tm.assert_series_equal(result, expected1)
|
||||
|
||||
result = ser.dt.tz_localize("US/Central", ambiguous=[False])
|
||||
tm.assert_series_equal(result, expected1)
|
||||
|
||||
def test_series_tz_localize_matching_index(self):
|
||||
# Matching the index of the result with that of the original series
|
||||
# GH 43080
|
||||
dt_series = Series(
|
||||
date_range(start="2021-01-01T02:00:00", periods=5, freq="1D"),
|
||||
index=[2, 6, 7, 8, 11],
|
||||
dtype="category",
|
||||
)
|
||||
result = dt_series.dt.tz_localize("Europe/Berlin")
|
||||
expected = Series(
|
||||
date_range(
|
||||
start="2021-01-01T02:00:00", periods=5, freq="1D", tz="Europe/Berlin"
|
||||
),
|
||||
index=[2, 6, 7, 8, 11],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method, exp",
|
||||
[
|
||||
["shift_forward", "2015-03-29 03:00:00"],
|
||||
["shift_backward", "2015-03-29 01:59:59.999999999"],
|
||||
["NaT", NaT],
|
||||
["raise", None],
|
||||
["foo", "invalid"],
|
||||
],
|
||||
)
|
||||
def test_tz_localize_nonexistent(self, warsaw, method, exp, unit):
|
||||
# GH 8917
|
||||
tz = warsaw
|
||||
n = 60
|
||||
dti = date_range(start="2015-03-29 02:00:00", periods=n, freq="min", unit=unit)
|
||||
ser = Series(1, index=dti)
|
||||
df = ser.to_frame()
|
||||
|
||||
if method == "raise":
|
||||
with tm.external_error_raised(pytz.NonExistentTimeError):
|
||||
dti.tz_localize(tz, nonexistent=method)
|
||||
with tm.external_error_raised(pytz.NonExistentTimeError):
|
||||
ser.tz_localize(tz, nonexistent=method)
|
||||
with tm.external_error_raised(pytz.NonExistentTimeError):
|
||||
df.tz_localize(tz, nonexistent=method)
|
||||
|
||||
elif exp == "invalid":
|
||||
msg = (
|
||||
"The nonexistent argument must be one of "
|
||||
"'raise', 'NaT', 'shift_forward', 'shift_backward' "
|
||||
"or a timedelta object"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
dti.tz_localize(tz, nonexistent=method)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ser.tz_localize(tz, nonexistent=method)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
df.tz_localize(tz, nonexistent=method)
|
||||
|
||||
else:
|
||||
result = ser.tz_localize(tz, nonexistent=method)
|
||||
expected = Series(1, index=DatetimeIndex([exp] * n, tz=tz).as_unit(unit))
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = df.tz_localize(tz, nonexistent=method)
|
||||
expected = expected.to_frame()
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
res_index = dti.tz_localize(tz, nonexistent=method)
|
||||
tm.assert_index_equal(res_index, expected.index)
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_series_tz_localize_empty(self, tzstr):
|
||||
# GH#2248
|
||||
ser = Series(dtype=object)
|
||||
|
||||
ser2 = ser.tz_localize("utc")
|
||||
assert ser2.index.tz == timezone.utc
|
||||
|
||||
ser2 = ser.tz_localize(tzstr)
|
||||
timezones.tz_compare(ser2.index.tz, timezones.maybe_get_tz(tzstr))
|
||||
@ -0,0 +1,76 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestUnique:
|
||||
def test_unique_uint64(self):
|
||||
ser = Series([1, 2, 2**63, 2**63], dtype=np.uint64)
|
||||
res = ser.unique()
|
||||
exp = np.array([1, 2, 2**63], dtype=np.uint64)
|
||||
tm.assert_numpy_array_equal(res, exp)
|
||||
|
||||
def test_unique_data_ownership(self):
|
||||
# it works! GH#1807
|
||||
Series(Series(["a", "c", "b"]).unique()).sort_values()
|
||||
|
||||
def test_unique(self):
|
||||
# GH#714 also, dtype=float
|
||||
ser = Series([1.2345] * 100)
|
||||
ser[::2] = np.nan
|
||||
result = ser.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
# explicit f4 dtype
|
||||
ser = Series([1.2345] * 100, dtype="f4")
|
||||
ser[::2] = np.nan
|
||||
result = ser.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
def test_unique_nan_object_dtype(self):
|
||||
# NAs in object arrays GH#714
|
||||
ser = Series(["foo"] * 100, dtype="O")
|
||||
ser[::2] = np.nan
|
||||
result = ser.unique()
|
||||
assert len(result) == 2
|
||||
|
||||
def test_unique_none(self):
|
||||
# decision about None
|
||||
ser = Series([1, 2, 3, None, None, None], dtype=object)
|
||||
result = ser.unique()
|
||||
expected = np.array([1, 2, 3, None], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_unique_categorical(self):
|
||||
# GH#18051
|
||||
cat = Categorical([])
|
||||
ser = Series(cat)
|
||||
result = ser.unique()
|
||||
tm.assert_categorical_equal(result, cat)
|
||||
|
||||
cat = Categorical([np.nan])
|
||||
ser = Series(cat)
|
||||
result = ser.unique()
|
||||
tm.assert_categorical_equal(result, cat)
|
||||
|
||||
def test_tz_unique(self):
|
||||
# GH 46128
|
||||
dti1 = date_range("2016-01-01", periods=3)
|
||||
ii1 = IntervalIndex.from_breaks(dti1)
|
||||
ser1 = Series(ii1)
|
||||
uni1 = ser1.unique()
|
||||
tm.assert_interval_array_equal(ser1.array, uni1)
|
||||
|
||||
dti2 = date_range("2016-01-01", periods=3, tz="US/Eastern")
|
||||
ii2 = IntervalIndex.from_breaks(dti2)
|
||||
ser2 = Series(ii2)
|
||||
uni2 = ser2.unique()
|
||||
tm.assert_interval_array_equal(ser2.array, uni2)
|
||||
|
||||
assert uni1.dtype != uni2.dtype
|
||||
@ -0,0 +1,169 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_unstack_preserves_object():
|
||||
mi = MultiIndex.from_product([["bar", "foo"], ["one", "two"]])
|
||||
|
||||
ser = Series(np.arange(4.0), index=mi, dtype=object)
|
||||
|
||||
res1 = ser.unstack()
|
||||
assert (res1.dtypes == object).all()
|
||||
|
||||
res2 = ser.unstack(level=0)
|
||||
assert (res2.dtypes == object).all()
|
||||
|
||||
|
||||
def test_unstack():
|
||||
index = MultiIndex(
|
||||
levels=[["bar", "foo"], ["one", "three", "two"]],
|
||||
codes=[[1, 1, 0, 0], [0, 1, 0, 2]],
|
||||
)
|
||||
|
||||
s = Series(np.arange(4.0), index=index)
|
||||
unstacked = s.unstack()
|
||||
|
||||
expected = DataFrame(
|
||||
[[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]],
|
||||
index=["bar", "foo"],
|
||||
columns=["one", "three", "two"],
|
||||
)
|
||||
|
||||
tm.assert_frame_equal(unstacked, expected)
|
||||
|
||||
unstacked = s.unstack(level=0)
|
||||
tm.assert_frame_equal(unstacked, expected.T)
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[["bar"], ["one", "two", "three"], [0, 1]],
|
||||
codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
|
||||
)
|
||||
s = Series(np.random.default_rng(2).standard_normal(6), index=index)
|
||||
exp_index = MultiIndex(
|
||||
levels=[["one", "two", "three"], [0, 1]],
|
||||
codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
|
||||
)
|
||||
expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0)
|
||||
unstacked = s.unstack(0).sort_index()
|
||||
tm.assert_frame_equal(unstacked, expected)
|
||||
|
||||
# GH5873
|
||||
idx = MultiIndex.from_arrays([[101, 102], [3.5, np.nan]])
|
||||
ts = Series([1, 2], index=idx)
|
||||
left = ts.unstack()
|
||||
right = DataFrame(
|
||||
[[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5]
|
||||
)
|
||||
tm.assert_frame_equal(left, right)
|
||||
|
||||
idx = MultiIndex.from_arrays(
|
||||
[
|
||||
["cat", "cat", "cat", "dog", "dog"],
|
||||
["a", "a", "b", "a", "b"],
|
||||
[1, 2, 1, 1, np.nan],
|
||||
]
|
||||
)
|
||||
ts = Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx)
|
||||
right = DataFrame(
|
||||
[[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]],
|
||||
columns=["cat", "dog"],
|
||||
)
|
||||
tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)]
|
||||
right.index = MultiIndex.from_tuples(tpls)
|
||||
tm.assert_frame_equal(ts.unstack(level=0), right)
|
||||
|
||||
|
||||
def test_unstack_tuplename_in_multiindex():
|
||||
# GH 19966
|
||||
idx = MultiIndex.from_product(
|
||||
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
|
||||
)
|
||||
ser = Series(1, index=idx)
|
||||
result = ser.unstack(("A", "a"))
|
||||
|
||||
expected = DataFrame(
|
||||
[[1, 1, 1], [1, 1, 1], [1, 1, 1]],
|
||||
columns=MultiIndex.from_tuples([("a",), ("b",), ("c",)], names=[("A", "a")]),
|
||||
index=Index([1, 2, 3], name=("B", "b")),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"unstack_idx, expected_values, expected_index, expected_columns",
|
||||
[
|
||||
(
|
||||
("A", "a"),
|
||||
[[1, 1], [1, 1], [1, 1], [1, 1]],
|
||||
MultiIndex.from_tuples([(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]),
|
||||
MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
|
||||
),
|
||||
(
|
||||
(("A", "a"), "B"),
|
||||
[[1, 1, 1, 1], [1, 1, 1, 1]],
|
||||
Index([3, 4], name="C"),
|
||||
MultiIndex.from_tuples(
|
||||
[("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_unstack_mixed_type_name_in_multiindex(
|
||||
unstack_idx, expected_values, expected_index, expected_columns
|
||||
):
|
||||
# GH 19966
|
||||
idx = MultiIndex.from_product(
|
||||
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
|
||||
)
|
||||
ser = Series(1, index=idx)
|
||||
result = ser.unstack(unstack_idx)
|
||||
|
||||
expected = DataFrame(
|
||||
expected_values, columns=expected_columns, index=expected_index
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_unstack_multi_index_categorical_values():
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
columns=Index(list("ABCD")),
|
||||
index=date_range("2000-01-01", periods=10, freq="B"),
|
||||
)
|
||||
mi = df.stack(future_stack=True).index.rename(["major", "minor"])
|
||||
ser = Series(["foo"] * len(mi), index=mi, name="category", dtype="category")
|
||||
|
||||
result = ser.unstack()
|
||||
|
||||
dti = ser.index.levels[0]
|
||||
c = pd.Categorical(["foo"] * len(dti))
|
||||
expected = DataFrame(
|
||||
{"A": c.copy(), "B": c.copy(), "C": c.copy(), "D": c.copy()},
|
||||
columns=Index(list("ABCD"), name="minor"),
|
||||
index=dti.rename("major"),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_unstack_mixed_level_names():
|
||||
# GH#48763
|
||||
arrays = [["a", "a"], [1, 2], ["red", "blue"]]
|
||||
idx = MultiIndex.from_arrays(arrays, names=("x", 0, "y"))
|
||||
ser = Series([1, 2], index=idx)
|
||||
result = ser.unstack("x")
|
||||
expected = DataFrame(
|
||||
[[1], [2]],
|
||||
columns=Index(["a"], name="x"),
|
||||
index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]),
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
@ -0,0 +1,139 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
CategoricalDtype,
|
||||
DataFrame,
|
||||
NaT,
|
||||
Series,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestUpdate:
|
||||
def test_update(self, using_copy_on_write):
|
||||
s = Series([1.5, np.nan, 3.0, 4.0, np.nan])
|
||||
s2 = Series([np.nan, 3.5, np.nan, 5.0])
|
||||
s.update(s2)
|
||||
|
||||
expected = Series([1.5, 3.5, 3.0, 5.0, np.nan])
|
||||
tm.assert_series_equal(s, expected)
|
||||
|
||||
# GH 3217
|
||||
df = DataFrame([{"a": 1}, {"a": 3, "b": 2}])
|
||||
df["c"] = np.nan
|
||||
# Cast to object to avoid upcast when setting "foo"
|
||||
df["c"] = df["c"].astype(object)
|
||||
df_orig = df.copy()
|
||||
|
||||
if using_copy_on_write:
|
||||
with tm.raises_chained_assignment_error():
|
||||
df["c"].update(Series(["foo"], index=[0]))
|
||||
expected = df_orig
|
||||
else:
|
||||
with tm.assert_produces_warning(FutureWarning, match="inplace method"):
|
||||
df["c"].update(Series(["foo"], index=[0]))
|
||||
expected = DataFrame(
|
||||
[[1, np.nan, "foo"], [3, 2.0, np.nan]], columns=["a", "b", "c"]
|
||||
)
|
||||
expected["c"] = expected["c"].astype(object)
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"other, dtype, expected, warn",
|
||||
[
|
||||
# other is int
|
||||
([61, 63], "int32", Series([10, 61, 12], dtype="int32"), None),
|
||||
([61, 63], "int64", Series([10, 61, 12]), None),
|
||||
([61, 63], float, Series([10.0, 61.0, 12.0]), None),
|
||||
([61, 63], object, Series([10, 61, 12], dtype=object), None),
|
||||
# other is float, but can be cast to int
|
||||
([61.0, 63.0], "int32", Series([10, 61, 12], dtype="int32"), None),
|
||||
([61.0, 63.0], "int64", Series([10, 61, 12]), None),
|
||||
([61.0, 63.0], float, Series([10.0, 61.0, 12.0]), None),
|
||||
([61.0, 63.0], object, Series([10, 61.0, 12], dtype=object), None),
|
||||
# others is float, cannot be cast to int
|
||||
([61.1, 63.1], "int32", Series([10.0, 61.1, 12.0]), FutureWarning),
|
||||
([61.1, 63.1], "int64", Series([10.0, 61.1, 12.0]), FutureWarning),
|
||||
([61.1, 63.1], float, Series([10.0, 61.1, 12.0]), None),
|
||||
([61.1, 63.1], object, Series([10, 61.1, 12], dtype=object), None),
|
||||
# other is object, cannot be cast
|
||||
([(61,), (63,)], "int32", Series([10, (61,), 12]), FutureWarning),
|
||||
([(61,), (63,)], "int64", Series([10, (61,), 12]), FutureWarning),
|
||||
([(61,), (63,)], float, Series([10.0, (61,), 12.0]), FutureWarning),
|
||||
([(61,), (63,)], object, Series([10, (61,), 12]), None),
|
||||
],
|
||||
)
|
||||
def test_update_dtypes(self, other, dtype, expected, warn):
|
||||
ser = Series([10, 11, 12], dtype=dtype)
|
||||
other = Series(other, index=[1, 3])
|
||||
with tm.assert_produces_warning(warn, match="item of incompatible dtype"):
|
||||
ser.update(other)
|
||||
|
||||
tm.assert_series_equal(ser, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"series, other, expected",
|
||||
[
|
||||
# update by key
|
||||
(
|
||||
Series({"a": 1, "b": 2, "c": 3, "d": 4}),
|
||||
{"b": 5, "c": np.nan},
|
||||
Series({"a": 1, "b": 5, "c": 3, "d": 4}),
|
||||
),
|
||||
# update by position
|
||||
(Series([1, 2, 3, 4]), [np.nan, 5, 1], Series([1, 5, 1, 4])),
|
||||
],
|
||||
)
|
||||
def test_update_from_non_series(self, series, other, expected):
|
||||
# GH 33215
|
||||
series.update(other)
|
||||
tm.assert_series_equal(series, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, other, expected, dtype",
|
||||
[
|
||||
(["a", None], [None, "b"], ["a", "b"], "string[python]"),
|
||||
pytest.param(
|
||||
["a", None],
|
||||
[None, "b"],
|
||||
["a", "b"],
|
||||
"string[pyarrow]",
|
||||
marks=td.skip_if_no("pyarrow"),
|
||||
),
|
||||
([1, None], [None, 2], [1, 2], "Int64"),
|
||||
([True, None], [None, False], [True, False], "boolean"),
|
||||
(
|
||||
["a", None],
|
||||
[None, "b"],
|
||||
["a", "b"],
|
||||
CategoricalDtype(categories=["a", "b"]),
|
||||
),
|
||||
(
|
||||
[Timestamp(year=2020, month=1, day=1, tz="Europe/London"), NaT],
|
||||
[NaT, Timestamp(year=2020, month=1, day=1, tz="Europe/London")],
|
||||
[Timestamp(year=2020, month=1, day=1, tz="Europe/London")] * 2,
|
||||
"datetime64[ns, Europe/London]",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_update_extension_array_series(self, data, other, expected, dtype):
|
||||
result = Series(data, dtype=dtype)
|
||||
other = Series(other, dtype=dtype)
|
||||
expected = Series(expected, dtype=dtype)
|
||||
|
||||
result.update(other)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_update_with_categorical_type(self):
|
||||
# GH 25744
|
||||
dtype = CategoricalDtype(["a", "b", "c", "d"])
|
||||
s1 = Series(["a", "b", "c"], index=[1, 2, 3], dtype=dtype)
|
||||
s2 = Series(["b", "a"], index=[1, 2], dtype=dtype)
|
||||
s1.update(s2)
|
||||
result = s1
|
||||
expected = Series(["b", "a", "c"], index=[1, 2, 3], dtype=dtype)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,271 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSeriesValueCounts:
|
||||
def test_value_counts_datetime(self, unit):
|
||||
# most dtypes are tested in tests/base
|
||||
values = [
|
||||
pd.Timestamp("2011-01-01 09:00"),
|
||||
pd.Timestamp("2011-01-01 10:00"),
|
||||
pd.Timestamp("2011-01-01 11:00"),
|
||||
pd.Timestamp("2011-01-01 09:00"),
|
||||
pd.Timestamp("2011-01-01 09:00"),
|
||||
pd.Timestamp("2011-01-01 11:00"),
|
||||
]
|
||||
|
||||
exp_idx = pd.DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
|
||||
name="xxx",
|
||||
).as_unit(unit)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx").dt.as_unit(unit)
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
# check DatetimeIndex outputs the same result
|
||||
idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit)
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
# normalize
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_datetime_tz(self, unit):
|
||||
values = [
|
||||
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 10:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 09:00", tz="US/Eastern"),
|
||||
pd.Timestamp("2011-01-01 11:00", tz="US/Eastern"),
|
||||
]
|
||||
|
||||
exp_idx = pd.DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 11:00", "2011-01-01 10:00"],
|
||||
tz="US/Eastern",
|
||||
name="xxx",
|
||||
).as_unit(unit)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx").dt.as_unit(unit)
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
idx = pd.DatetimeIndex(values, name="xxx").as_unit(unit)
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_period(self):
|
||||
values = [
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-02", freq="M"),
|
||||
pd.Period("2011-03", freq="M"),
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-01", freq="M"),
|
||||
pd.Period("2011-03", freq="M"),
|
||||
]
|
||||
|
||||
exp_idx = pd.PeriodIndex(
|
||||
["2011-01", "2011-03", "2011-02"], freq="M", name="xxx"
|
||||
)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx")
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
# check DatetimeIndex outputs the same result
|
||||
idx = pd.PeriodIndex(values, name="xxx")
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
# normalize
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_categorical_ordered(self):
|
||||
# most dtypes are tested in tests/base
|
||||
values = Categorical([1, 2, 3, 1, 1, 3], ordered=True)
|
||||
|
||||
exp_idx = CategoricalIndex(
|
||||
[1, 3, 2], categories=[1, 2, 3], ordered=True, name="xxx"
|
||||
)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx")
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
# check CategoricalIndex outputs the same result
|
||||
idx = CategoricalIndex(values, name="xxx")
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
# normalize
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_categorical_not_ordered(self):
|
||||
values = Categorical([1, 2, 3, 1, 1, 3], ordered=False)
|
||||
|
||||
exp_idx = CategoricalIndex(
|
||||
[1, 3, 2], categories=[1, 2, 3], ordered=False, name="xxx"
|
||||
)
|
||||
exp = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
ser = Series(values, name="xxx")
|
||||
tm.assert_series_equal(ser.value_counts(), exp)
|
||||
# check CategoricalIndex outputs the same result
|
||||
idx = CategoricalIndex(values, name="xxx")
|
||||
tm.assert_series_equal(idx.value_counts(), exp)
|
||||
|
||||
# normalize
|
||||
exp = Series(np.array([3.0, 2.0, 1]) / 6.0, index=exp_idx, name="proportion")
|
||||
tm.assert_series_equal(ser.value_counts(normalize=True), exp)
|
||||
tm.assert_series_equal(idx.value_counts(normalize=True), exp)
|
||||
|
||||
def test_value_counts_categorical(self):
|
||||
# GH#12835
|
||||
cats = Categorical(list("abcccb"), categories=list("cabd"))
|
||||
ser = Series(cats, name="xxx")
|
||||
res = ser.value_counts(sort=False)
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list("cabd"), categories=cats.categories, name="xxx"
|
||||
)
|
||||
exp = Series([3, 1, 2, 0], name="count", index=exp_index)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = ser.value_counts(sort=True)
|
||||
|
||||
exp_index = CategoricalIndex(
|
||||
list("cbad"), categories=cats.categories, name="xxx"
|
||||
)
|
||||
exp = Series([3, 2, 1, 0], name="count", index=exp_index)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# check object dtype handles the Series.name as the same
|
||||
# (tested in tests/base)
|
||||
ser = Series(["a", "b", "c", "c", "c", "b"], name="xxx")
|
||||
res = ser.value_counts()
|
||||
exp = Series([3, 2, 1], name="count", index=Index(["c", "b", "a"], name="xxx"))
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
def test_value_counts_categorical_with_nan(self):
|
||||
# see GH#9443
|
||||
|
||||
# sanity check
|
||||
ser = Series(["a", "b", "a"], dtype="category")
|
||||
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count")
|
||||
|
||||
res = ser.value_counts(dropna=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
res = ser.value_counts(dropna=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# same Series via two different constructions --> same behaviour
|
||||
series = [
|
||||
Series(["a", "b", None, "a", None, None], dtype="category"),
|
||||
Series(
|
||||
Categorical(["a", "b", None, "a", None, None], categories=["a", "b"])
|
||||
),
|
||||
]
|
||||
|
||||
for ser in series:
|
||||
# None is a NaN value, so we exclude its count here
|
||||
exp = Series([2, 1], index=CategoricalIndex(["a", "b"]), name="count")
|
||||
res = ser.value_counts(dropna=True)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# we don't exclude the count of None and sort by counts
|
||||
exp = Series(
|
||||
[3, 2, 1], index=CategoricalIndex([np.nan, "a", "b"]), name="count"
|
||||
)
|
||||
res = ser.value_counts(dropna=False)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
# When we aren't sorting by counts, and np.nan isn't a
|
||||
# category, it should be last.
|
||||
exp = Series(
|
||||
[2, 1, 3], index=CategoricalIndex(["a", "b", np.nan]), name="count"
|
||||
)
|
||||
res = ser.value_counts(dropna=False, sort=False)
|
||||
tm.assert_series_equal(res, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"ser, dropna, exp",
|
||||
[
|
||||
(
|
||||
Series([False, True, True, pd.NA]),
|
||||
False,
|
||||
Series([2, 1, 1], index=[True, False, pd.NA], name="count"),
|
||||
),
|
||||
(
|
||||
Series([False, True, True, pd.NA]),
|
||||
True,
|
||||
Series([2, 1], index=Index([True, False], dtype=object), name="count"),
|
||||
),
|
||||
(
|
||||
Series(range(3), index=[True, False, np.nan]).index,
|
||||
False,
|
||||
Series([1, 1, 1], index=[True, False, np.nan], name="count"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_value_counts_bool_with_nan(self, ser, dropna, exp):
|
||||
# GH32146
|
||||
out = ser.value_counts(dropna=dropna)
|
||||
tm.assert_series_equal(out, exp)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_array,expected",
|
||||
[
|
||||
(
|
||||
[1 + 1j, 1 + 1j, 1, 3j, 3j, 3j],
|
||||
Series(
|
||||
[3, 2, 1],
|
||||
index=Index([3j, 1 + 1j, 1], dtype=np.complex128),
|
||||
name="count",
|
||||
),
|
||||
),
|
||||
(
|
||||
np.array([1 + 1j, 1 + 1j, 1, 3j, 3j, 3j], dtype=np.complex64),
|
||||
Series(
|
||||
[3, 2, 1],
|
||||
index=Index([3j, 1 + 1j, 1], dtype=np.complex64),
|
||||
name="count",
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_value_counts_complex_numbers(self, input_array, expected):
|
||||
# GH 17927
|
||||
result = Series(input_array).value_counts()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_value_counts_masked(self):
|
||||
# GH#54984
|
||||
dtype = "Int64"
|
||||
ser = Series([1, 2, None, 2, None, 3], dtype=dtype)
|
||||
result = ser.value_counts(dropna=False)
|
||||
expected = Series(
|
||||
[2, 2, 1, 1],
|
||||
index=Index([2, None, 1, 3], dtype=dtype),
|
||||
dtype=dtype,
|
||||
name="count",
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser.value_counts(dropna=True)
|
||||
expected = Series(
|
||||
[2, 1, 1], index=Index([2, 1, 3], dtype=dtype), dtype=dtype, name="count"
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,29 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
IntervalIndex,
|
||||
Series,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestValues:
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
period_range("2000", periods=4),
|
||||
IntervalIndex.from_breaks([1, 2, 3, 4]),
|
||||
],
|
||||
)
|
||||
def test_values_object_extension_dtypes(self, data):
|
||||
# https://github.com/pandas-dev/pandas/issues/23995
|
||||
result = Series(data).values
|
||||
expected = np.array(data.astype(object))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_values(self, datetime_series):
|
||||
tm.assert_almost_equal(
|
||||
datetime_series.values, list(datetime_series), check_dtype=False
|
||||
)
|
||||
@ -0,0 +1,61 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
array,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
pytestmark = pytest.mark.filterwarnings(
|
||||
"ignore:Series.view is deprecated and will be removed in a future version.:FutureWarning" # noqa: E501
|
||||
)
|
||||
|
||||
|
||||
class TestView:
|
||||
def test_view_i8_to_datetimelike(self):
|
||||
dti = date_range("2000", periods=4, tz="US/Central")
|
||||
ser = Series(dti.asi8)
|
||||
|
||||
result = ser.view(dti.dtype)
|
||||
tm.assert_datetime_array_equal(result._values, dti._data._with_freq(None))
|
||||
|
||||
pi = dti.tz_localize(None).to_period("D")
|
||||
ser = Series(pi.asi8)
|
||||
result = ser.view(pi.dtype)
|
||||
tm.assert_period_array_equal(result._values, pi._data)
|
||||
|
||||
def test_view_tz(self):
|
||||
# GH#24024
|
||||
ser = Series(date_range("2000", periods=4, tz="US/Central"))
|
||||
result = ser.view("i8")
|
||||
expected = Series(
|
||||
[
|
||||
946706400000000000,
|
||||
946792800000000000,
|
||||
946879200000000000,
|
||||
946965600000000000,
|
||||
]
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"second", ["m8[ns]", "M8[ns]", "M8[ns, US/Central]", "period[D]"]
|
||||
)
|
||||
@pytest.mark.parametrize("box", [Series, Index, array])
|
||||
def test_view_between_datetimelike(self, first, second, box):
|
||||
dti = date_range("2016-01-01", periods=3)
|
||||
|
||||
orig = box(dti)
|
||||
obj = orig.view(first)
|
||||
assert obj.dtype == first
|
||||
tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8)
|
||||
|
||||
res = obj.view(second)
|
||||
assert res.dtype == second
|
||||
tm.assert_numpy_array_equal(np.asarray(obj.view("i8")), dti.asi8)
|
||||
Reference in New Issue
Block a user