done
This commit is contained in:
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIndexConstructor:
|
||||
# Tests for the Index constructor, specifically for cases that do
|
||||
# not return a subclass
|
||||
|
||||
@pytest.mark.parametrize("value", [1, np.int64(1)])
|
||||
def test_constructor_corner(self, value):
|
||||
# corner case
|
||||
msg = (
|
||||
r"Index\(\.\.\.\) must be called with a collection of some "
|
||||
f"kind, {value} was passed"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
Index(value)
|
||||
|
||||
@pytest.mark.parametrize("index_vals", [[("A", 1), "B"], ["B", ("A", 1)]])
|
||||
def test_construction_list_mixed_tuples(self, index_vals):
|
||||
# see gh-10697: if we are constructing from a mixed list of tuples,
|
||||
# make sure that we are independent of the sorting order.
|
||||
index = Index(index_vals)
|
||||
assert isinstance(index, Index)
|
||||
assert not isinstance(index, MultiIndex)
|
||||
|
||||
def test_constructor_cast(self):
|
||||
msg = "could not convert string to float"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(["a", "b", "c"], dtype=float)
|
||||
|
||||
@pytest.mark.parametrize("tuple_list", [[()], [(), ()]])
|
||||
def test_construct_empty_tuples(self, tuple_list):
|
||||
# GH #45608
|
||||
result = Index(tuple_list)
|
||||
expected = MultiIndex.from_tuples(tuple_list)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_index_string_inference(self):
|
||||
# GH#54430
|
||||
expected = Index(["a", "b"], dtype=pd.StringDtype(na_value=np.nan))
|
||||
with pd.option_context("future.infer_string", True):
|
||||
ser = Index(["a", "b"])
|
||||
tm.assert_index_equal(ser, expected)
|
||||
|
||||
expected = Index(["a", 1], dtype="object")
|
||||
with pd.option_context("future.infer_string", True):
|
||||
ser = Index(["a", 1])
|
||||
tm.assert_index_equal(ser, expected)
|
||||
|
||||
def test_inference_on_pandas_objects(self):
|
||||
# GH#56012
|
||||
idx = Index([pd.Timestamp("2019-12-31")], dtype=object)
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
result = Index(idx)
|
||||
assert result.dtype != np.object_
|
||||
|
||||
ser = Series([pd.Timestamp("2019-12-31")], dtype=object)
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match="Dtype inference"):
|
||||
result = Index(ser)
|
||||
assert result.dtype != np.object_
|
||||
|
||||
def test_constructor_not_read_only(self):
|
||||
# GH#57130
|
||||
ser = Series([1, 2], dtype=object)
|
||||
with pd.option_context("mode.copy_on_write", True):
|
||||
idx = Index(ser)
|
||||
assert idx._values.flags.writeable
|
||||
@ -0,0 +1,163 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_string_dtype
|
||||
import pandas._config.config as cf
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIndexRendering:
|
||||
def test_repr_is_valid_construction_code(self):
|
||||
# for the case of Index, where the repr is traditional rather than
|
||||
# stylized
|
||||
idx = Index(["a", "b"])
|
||||
res = eval(repr(idx))
|
||||
tm.assert_index_equal(res, idx)
|
||||
|
||||
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
|
||||
@pytest.mark.parametrize(
|
||||
"index,expected",
|
||||
[
|
||||
# ASCII
|
||||
# short
|
||||
(
|
||||
Index(["a", "bb", "ccc"]),
|
||||
"""Index(['a', 'bb', 'ccc'], dtype='object')""",
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["a", "bb", "ccc"] * 10),
|
||||
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
|
||||
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
|
||||
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', "
|
||||
"'bb', 'ccc', 'a', 'bb', 'ccc',\n"
|
||||
" 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
|
||||
" dtype='object')",
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["a", "bb", "ccc"] * 100),
|
||||
"Index(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',\n"
|
||||
" ...\n"
|
||||
" 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],\n"
|
||||
" dtype='object', length=300)",
|
||||
),
|
||||
# Non-ASCII
|
||||
# short
|
||||
(
|
||||
Index(["あ", "いい", "ううう"]),
|
||||
"""Index(['あ', 'いい', 'ううう'], dtype='object')""",
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 10),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう'],\n"
|
||||
" dtype='object')"
|
||||
),
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 100),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', "
|
||||
"'あ', 'いい', 'ううう', 'あ',\n"
|
||||
" ...\n"
|
||||
" 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう'],\n"
|
||||
" dtype='object', length=300)"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_string_index_repr(self, index, expected):
|
||||
result = repr(index)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
|
||||
@pytest.mark.parametrize(
|
||||
"index,expected",
|
||||
[
|
||||
# short
|
||||
(
|
||||
Index(["あ", "いい", "ううう"]),
|
||||
("Index(['あ', 'いい', 'ううう'], dtype='object')"),
|
||||
),
|
||||
# multiple lines
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 10),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ', 'いい', 'ううう'],\n"
|
||||
" dtype='object')"
|
||||
""
|
||||
),
|
||||
),
|
||||
# truncated
|
||||
(
|
||||
Index(["あ", "いい", "ううう"] * 100),
|
||||
(
|
||||
"Index(['あ', 'いい', 'ううう', 'あ', 'いい', "
|
||||
"'ううう', 'あ', 'いい', 'ううう',\n"
|
||||
" 'あ',\n"
|
||||
" ...\n"
|
||||
" 'ううう', 'あ', 'いい', 'ううう', 'あ', "
|
||||
"'いい', 'ううう', 'あ', 'いい',\n"
|
||||
" 'ううう'],\n"
|
||||
" dtype='object', length=300)"
|
||||
),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_string_index_repr_with_unicode_option(self, index, expected):
|
||||
# Enable Unicode option -----------------------------------------
|
||||
with cf.option_context("display.unicode.east_asian_width", True):
|
||||
result = repr(index)
|
||||
assert result == expected
|
||||
|
||||
def test_repr_summary(self):
|
||||
with cf.option_context("display.max_seq_items", 10):
|
||||
result = repr(Index(np.arange(1000)))
|
||||
assert len(result) < 200
|
||||
assert "..." in result
|
||||
|
||||
def test_summary_bug(self):
|
||||
# GH#3869
|
||||
ind = Index(["{other}%s", "~:{range}:0"], name="A")
|
||||
result = ind._summary()
|
||||
# shouldn't be formatted accidentally.
|
||||
assert "~:{range}:0" in result
|
||||
assert "{other}%s" in result
|
||||
|
||||
def test_index_repr_bool_nan(self):
|
||||
# GH32146
|
||||
arr = Index([True, False, np.nan], dtype=object)
|
||||
msg = "Index.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
exp1 = arr.format()
|
||||
out1 = ["True", "False", "NaN"]
|
||||
assert out1 == exp1
|
||||
|
||||
exp2 = repr(arr)
|
||||
out2 = "Index([True, False, nan], dtype='object')"
|
||||
assert out2 == exp2
|
||||
|
||||
def test_format_different_scalar_lengths(self):
|
||||
# GH#35439
|
||||
idx = Index(["aaaaaaaaa", "b"])
|
||||
expected = ["aaaaaaaaa", "b"]
|
||||
msg = r"Index\.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert idx.format() == expected
|
||||
@ -0,0 +1,104 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
NaT,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetSliceBounds:
|
||||
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
|
||||
def test_get_slice_bounds_within(self, side, expected):
|
||||
index = Index(list("abcdef"))
|
||||
result = index.get_slice_bound("e", side=side)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("side", ["left", "right"])
|
||||
@pytest.mark.parametrize(
|
||||
"data, bound, expected", [(list("abcdef"), "x", 6), (list("bcdefg"), "a", 0)]
|
||||
)
|
||||
def test_get_slice_bounds_outside(self, side, expected, data, bound):
|
||||
index = Index(data)
|
||||
result = index.get_slice_bound(bound, side=side)
|
||||
assert result == expected
|
||||
|
||||
def test_get_slice_bounds_invalid_side(self):
|
||||
with pytest.raises(ValueError, match="Invalid value for side kwarg"):
|
||||
Index([]).get_slice_bound("a", side="middle")
|
||||
|
||||
|
||||
class TestGetIndexerNonUnique:
|
||||
def test_get_indexer_non_unique_dtype_mismatch(self):
|
||||
# GH#25459
|
||||
indexes, missing = Index(["A", "B"]).get_indexer_non_unique(Index([0]))
|
||||
tm.assert_numpy_array_equal(np.array([-1], dtype=np.intp), indexes)
|
||||
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), missing)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx_values,idx_non_unique",
|
||||
[
|
||||
([np.nan, 100, 200, 100], [np.nan, 100]),
|
||||
([np.nan, 100.0, 200.0, 100.0], [np.nan, 100.0]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_non_unique_int_index(self, idx_values, idx_non_unique):
|
||||
indexes, missing = Index(idx_values).get_indexer_non_unique(Index([np.nan]))
|
||||
tm.assert_numpy_array_equal(np.array([0], dtype=np.intp), indexes)
|
||||
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
|
||||
|
||||
indexes, missing = Index(idx_values).get_indexer_non_unique(
|
||||
Index(idx_non_unique)
|
||||
)
|
||||
tm.assert_numpy_array_equal(np.array([0, 1, 3], dtype=np.intp), indexes)
|
||||
tm.assert_numpy_array_equal(np.array([], dtype=np.intp), missing)
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
@pytest.mark.slow # to_flat_index takes a while
|
||||
def test_get_loc_tuple_monotonic_above_size_cutoff(self, monkeypatch):
|
||||
# Go through the libindex path for which using
|
||||
# _bin_search vs ndarray.searchsorted makes a difference
|
||||
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 100)
|
||||
lev = list("ABCD")
|
||||
dti = pd.date_range("2016-01-01", periods=10)
|
||||
|
||||
mi = pd.MultiIndex.from_product([lev, range(5), dti])
|
||||
oidx = mi.to_flat_index()
|
||||
|
||||
loc = len(oidx) // 2
|
||||
tup = oidx[loc]
|
||||
|
||||
res = oidx.get_loc(tup)
|
||||
assert res == loc
|
||||
|
||||
def test_get_loc_nan_object_dtype_nonmonotonic_nonunique(self):
|
||||
# case that goes through _maybe_get_bool_indexer
|
||||
idx = Index(["foo", np.nan, None, "foo", 1.0, None], dtype=object)
|
||||
|
||||
# we dont raise KeyError on nan
|
||||
res = idx.get_loc(np.nan)
|
||||
assert res == 1
|
||||
|
||||
# we only match on None, not on np.nan
|
||||
res = idx.get_loc(None)
|
||||
expected = np.array([False, False, True, False, False, True])
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
# we don't match at all on mismatched NA
|
||||
with pytest.raises(KeyError, match="NaT"):
|
||||
idx.get_loc(NaT)
|
||||
|
||||
|
||||
def test_getitem_boolean_ea_indexer():
|
||||
# GH#45806
|
||||
ser = pd.Series([True, False, pd.NA], dtype="boolean")
|
||||
result = ser.index[ser]
|
||||
expected = Index([0])
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,11 @@
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_pickle_preserves_object_dtype():
|
||||
# GH#43188, GH#43155 don't infer numeric dtype
|
||||
index = Index([1, 2, 3], dtype=object)
|
||||
|
||||
result = tm.round_trip_pickle(index)
|
||||
assert result.dtype == object
|
||||
tm.assert_index_equal(index, result)
|
||||
@ -0,0 +1,97 @@
|
||||
"""
|
||||
Tests for ndarray-like method on the base Index class
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestReshape:
|
||||
def test_repeat(self):
|
||||
repeats = 2
|
||||
index = Index([1, 2, 3])
|
||||
expected = Index([1, 1, 2, 2, 3, 3])
|
||||
|
||||
result = index.repeat(repeats)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert(self):
|
||||
# GH 7256
|
||||
# validate neg/pos inserts
|
||||
result = Index(["b", "c", "d"])
|
||||
|
||||
# test 0th element
|
||||
tm.assert_index_equal(Index(["a", "b", "c", "d"]), result.insert(0, "a"))
|
||||
|
||||
# test Nth element that follows Python list behavior
|
||||
tm.assert_index_equal(Index(["b", "c", "e", "d"]), result.insert(-1, "e"))
|
||||
|
||||
# test loc +/- neq (0, -1)
|
||||
tm.assert_index_equal(result.insert(1, "z"), result.insert(-2, "z"))
|
||||
|
||||
# test empty
|
||||
null_index = Index([])
|
||||
tm.assert_index_equal(Index(["a"]), null_index.insert(0, "a"))
|
||||
|
||||
def test_insert_missing(self, request, nulls_fixture, using_infer_string):
|
||||
if using_infer_string and nulls_fixture is pd.NA:
|
||||
request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)"))
|
||||
# GH#22295
|
||||
# test there is no mangling of NA values
|
||||
expected = Index(["a", nulls_fixture, "b", "c"], dtype=object)
|
||||
result = Index(list("abc"), dtype=object).insert(
|
||||
1, Index([nulls_fixture], dtype=object)
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"val", [(1, 2), np.datetime64("2019-12-31"), np.timedelta64(1, "D")]
|
||||
)
|
||||
@pytest.mark.parametrize("loc", [-1, 2])
|
||||
def test_insert_datetime_into_object(self, loc, val):
|
||||
# GH#44509
|
||||
idx = Index(["1", "2", "3"])
|
||||
result = idx.insert(loc, val)
|
||||
expected = Index(["1", "2", val, "3"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert type(expected[2]) is type(val)
|
||||
|
||||
def test_insert_none_into_string_numpy(self, string_dtype_no_object):
|
||||
# GH#55365
|
||||
index = Index(["a", "b", "c"], dtype=string_dtype_no_object)
|
||||
result = index.insert(-1, None)
|
||||
expected = Index(["a", "b", None, "c"], dtype=string_dtype_no_object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"pos,expected",
|
||||
[
|
||||
(0, Index(["b", "c", "d"], name="index")),
|
||||
(-1, Index(["a", "b", "c"], name="index")),
|
||||
],
|
||||
)
|
||||
def test_delete(self, pos, expected):
|
||||
index = Index(["a", "b", "c", "d"], name="index")
|
||||
result = index.delete(pos)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
def test_delete_raises(self):
|
||||
index = Index(["a", "b", "c", "d"], name="index")
|
||||
msg = "index 5 is out of bounds for axis 0 with size 4"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
index.delete(5)
|
||||
|
||||
def test_append_multiple(self):
|
||||
index = Index(["a", "b", "c", "d", "e", "f"])
|
||||
|
||||
foos = [index[:2], index[2:4], index[4:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
# empty
|
||||
result = index.append([])
|
||||
tm.assert_index_equal(result, index)
|
||||
@ -0,0 +1,266 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.algorithms import safe_sort
|
||||
|
||||
|
||||
def equal_contents(arr1, arr2) -> bool:
|
||||
"""
|
||||
Checks if the set of unique elements of arr1 and arr2 are equivalent.
|
||||
"""
|
||||
return frozenset(arr1) == frozenset(arr2)
|
||||
|
||||
|
||||
class TestIndexSetOps:
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_setops_sort_validation(self, method):
|
||||
idx1 = Index(["a", "b"])
|
||||
idx2 = Index(["b", "c"])
|
||||
|
||||
with pytest.raises(ValueError, match="The 'sort' keyword only takes"):
|
||||
getattr(idx1, method)(idx2, sort=2)
|
||||
|
||||
# sort=True is supported as of GH#??
|
||||
getattr(idx1, method)(idx2, sort=True)
|
||||
|
||||
def test_setops_preserve_object_dtype(self):
|
||||
idx = Index([1, 2, 3], dtype=object)
|
||||
result = idx.intersection(idx[1:])
|
||||
expected = idx[1:]
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# if other is not monotonic increasing, intersection goes through
|
||||
# a different route
|
||||
result = idx.intersection(idx[1:][::-1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx._union(idx[1:], sort=None)
|
||||
expected = idx
|
||||
tm.assert_numpy_array_equal(result, expected.values)
|
||||
|
||||
result = idx.union(idx[1:], sort=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# if other is not monotonic increasing, _union goes through
|
||||
# a different route
|
||||
result = idx._union(idx[1:][::-1], sort=None)
|
||||
tm.assert_numpy_array_equal(result, expected.values)
|
||||
|
||||
result = idx.union(idx[1:][::-1], sort=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_union_base(self):
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[3:]
|
||||
second = index[:5]
|
||||
|
||||
result = first.union(second)
|
||||
|
||||
expected = Index([0, 1, 2, "a", "b", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [np.array, Series, list])
|
||||
def test_union_different_type_base(self, klass):
|
||||
# GH 10149
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[3:]
|
||||
second = index[:5]
|
||||
|
||||
result = first.union(klass(second.values))
|
||||
|
||||
assert equal_contents(result, index)
|
||||
|
||||
def test_union_sort_other_incomparable(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/24959
|
||||
idx = Index([1, pd.Timestamp("2000")])
|
||||
# default (sort=None)
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1])
|
||||
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=None
|
||||
with tm.assert_produces_warning(RuntimeWarning):
|
||||
result = idx.union(idx[:1], sort=None)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# sort=False
|
||||
result = idx.union(idx[:1], sort=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
def test_union_sort_other_incomparable_true(self):
|
||||
idx = Index([1, pd.Timestamp("2000")])
|
||||
with pytest.raises(TypeError, match=".*"):
|
||||
idx.union(idx[:1], sort=True)
|
||||
|
||||
def test_intersection_equal_sort_true(self):
|
||||
idx = Index(["c", "a", "b"])
|
||||
sorted_ = Index(["a", "b", "c"])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)
|
||||
|
||||
def test_intersection_base(self, sort):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:5]
|
||||
second = index[:3]
|
||||
|
||||
expected = Index([0, 1, "a"]) if sort is None else Index([0, "a", 1])
|
||||
result = first.intersection(second, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("klass", [np.array, Series, list])
|
||||
def test_intersection_different_type_base(self, klass, sort):
|
||||
# GH 10149
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:5]
|
||||
second = index[:3]
|
||||
|
||||
result = first.intersection(klass(second.values), sort=sort)
|
||||
assert equal_contents(result, second)
|
||||
|
||||
def test_intersection_nosort(self):
|
||||
result = Index(["c", "b", "a"]).intersection(["b", "a"])
|
||||
expected = Index(["b", "a"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_equal_sort(self):
|
||||
idx = Index(["c", "a", "b"])
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
|
||||
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)
|
||||
|
||||
def test_intersection_str_dates(self, sort):
|
||||
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]
|
||||
|
||||
i1 = Index(dt_dates, dtype=object)
|
||||
i2 = Index(["aa"], dtype=object)
|
||||
result = i2.intersection(i1, sort=sort)
|
||||
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index2,expected_arr",
|
||||
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B"])],
|
||||
)
|
||||
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
|
||||
# non-monotonic non-unique
|
||||
index1 = Index(["A", "B", "A", "C"])
|
||||
expected = Index(expected_arr)
|
||||
result = index1.intersection(index2, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_difference_base(self, sort):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:4]
|
||||
second = index[3:]
|
||||
|
||||
result = first.difference(second, sort)
|
||||
expected = Index([0, "a", 1])
|
||||
if sort is None:
|
||||
expected = Index(safe_sort(expected))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_symmetric_difference(self):
|
||||
# (same results for py2 and py3 but sortedness not tested elsewhere)
|
||||
index = Index([0, "a", 1, "b", 2, "c"])
|
||||
first = index[:4]
|
||||
second = index[3:]
|
||||
|
||||
result = first.symmetric_difference(second)
|
||||
expected = Index([0, 1, 2, "a", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method,expected,sort",
|
||||
[
|
||||
(
|
||||
"intersection",
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
),
|
||||
False,
|
||||
),
|
||||
(
|
||||
"intersection",
|
||||
np.array(
|
||||
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
),
|
||||
None,
|
||||
),
|
||||
(
|
||||
"union",
|
||||
np.array(
|
||||
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
),
|
||||
None,
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_tuple_union_bug(self, method, expected, sort):
|
||||
index1 = Index(
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
)
|
||||
)
|
||||
index2 = Index(
|
||||
np.array(
|
||||
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
|
||||
dtype=[("num", int), ("let", "S1")],
|
||||
)
|
||||
)
|
||||
|
||||
result = getattr(index1, method)(index2, sort=sort)
|
||||
assert result.ndim == 1
|
||||
|
||||
expected = Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("first_list", [["b", "a"], []])
|
||||
@pytest.mark.parametrize("second_list", [["a", "b"], []])
|
||||
@pytest.mark.parametrize(
|
||||
"first_name, second_name, expected_name",
|
||||
[("A", "B", None), (None, "B", None), ("A", None, None)],
|
||||
)
|
||||
def test_union_name_preservation(
|
||||
self, first_list, second_list, first_name, second_name, expected_name, sort
|
||||
):
|
||||
first = Index(first_list, name=first_name)
|
||||
second = Index(second_list, name=second_name)
|
||||
union = first.union(second, sort=sort)
|
||||
|
||||
vals = set(first_list).union(second_list)
|
||||
|
||||
if sort is None and len(first_list) > 0 and len(second_list) > 0:
|
||||
expected = Index(sorted(vals), name=expected_name)
|
||||
tm.assert_index_equal(union, expected)
|
||||
else:
|
||||
expected = Index(vals, name=expected_name)
|
||||
tm.assert_index_equal(union.sort_values(), expected.sort_values())
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"diff_type, expected",
|
||||
[["difference", [1, "B"]], ["symmetric_difference", [1, 2, "B", "C"]]],
|
||||
)
|
||||
def test_difference_object_type(self, diff_type, expected):
|
||||
# GH 13432
|
||||
idx1 = Index([0, 1, "A", "B"])
|
||||
idx2 = Index([0, 2, "A", "C"])
|
||||
result = getattr(idx1, diff_type)(idx2)
|
||||
expected = Index(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,13 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import Index
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where_intlike_str_doesnt_cast_ints(self):
|
||||
idx = Index(range(3))
|
||||
mask = np.array([True, False, True])
|
||||
res = idx.where(mask, "2")
|
||||
expected = Index([0, "2", 2])
|
||||
tm.assert_index_equal(res, expected)
|
||||
@ -0,0 +1,62 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAppend:
|
||||
@pytest.fixture
|
||||
def ci(self):
|
||||
categories = list("cab")
|
||||
return CategoricalIndex(list("aabbca"), categories=categories, ordered=False)
|
||||
|
||||
def test_append(self, ci):
|
||||
# append cats with the same categories
|
||||
result = ci[:3].append(ci[3:])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
foos = [ci[:1], ci[1:3], ci[3:]]
|
||||
result = foos[0].append(foos[1:])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
def test_append_empty(self, ci):
|
||||
# empty
|
||||
result = ci.append([])
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
def test_append_mismatched_categories(self, ci):
|
||||
# appending with different categories or reordered is not ok
|
||||
msg = "all inputs must be Index"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci.append(ci.values.set_categories(list("abcd")))
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci.append(ci.values.reorder_categories(list("abc")))
|
||||
|
||||
def test_append_category_objects(self, ci):
|
||||
# with objects
|
||||
result = ci.append(Index(["c", "a"]))
|
||||
expected = CategoricalIndex(list("aabbcaca"), categories=ci.categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_non_categories(self, ci):
|
||||
# invalid objects -> cast to object via concat_compat
|
||||
result = ci.append(Index(["a", "d"]))
|
||||
expected = Index(["a", "a", "b", "b", "c", "a", "a", "d"])
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_object(self, ci):
|
||||
# GH#14298 - if base object is not categorical -> coerce to object
|
||||
result = Index(["c", "a"]).append(ci)
|
||||
expected = Index(list("caaabbca"))
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_append_to_another(self):
|
||||
# hits Index._concat
|
||||
fst = Index(["a", "b"])
|
||||
snd = CategoricalIndex(["d", "e"])
|
||||
result = fst.append(snd)
|
||||
expected = Index(["a", "b", "d", "e"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,90 @@
|
||||
from datetime import date
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestAstype:
|
||||
def test_astype(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
result = ci.astype(object)
|
||||
tm.assert_index_equal(result, Index(np.array(ci), dtype=object))
|
||||
|
||||
# this IS equal, but not the same class
|
||||
assert result.equals(ci)
|
||||
assert isinstance(result, Index)
|
||||
assert not isinstance(result, CategoricalIndex)
|
||||
|
||||
# interval
|
||||
ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed="right")
|
||||
|
||||
ci = CategoricalIndex(
|
||||
Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)
|
||||
)
|
||||
|
||||
result = ci.astype("interval")
|
||||
expected = ii.take([0, 1, -1], allow_fill=True, fill_value=np.nan)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = IntervalIndex(result.values)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "foo"])
|
||||
@pytest.mark.parametrize("dtype_ordered", [True, False])
|
||||
@pytest.mark.parametrize("index_ordered", [True, False])
|
||||
def test_astype_category(self, name, dtype_ordered, index_ordered):
|
||||
# GH#18630
|
||||
index = CategoricalIndex(
|
||||
list("aabbca"), categories=list("cab"), ordered=index_ordered
|
||||
)
|
||||
if name:
|
||||
index = index.rename(name)
|
||||
|
||||
# standard categories
|
||||
dtype = CategoricalDtype(ordered=dtype_ordered)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(
|
||||
index.tolist(),
|
||||
name=name,
|
||||
categories=index.categories,
|
||||
ordered=dtype_ordered,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-standard categories
|
||||
dtype = CategoricalDtype(index.unique().tolist()[:-1], dtype_ordered)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(index.tolist(), name=name, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if dtype_ordered is False:
|
||||
# dtype='category' can't specify ordered, so only test once
|
||||
result = index.astype("category")
|
||||
expected = index
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [True, False])
|
||||
def test_categorical_date_roundtrip(self, box):
|
||||
# astype to categorical and back should preserve date objects
|
||||
v = date.today()
|
||||
|
||||
obj = Index([v, v])
|
||||
assert obj.dtype == object
|
||||
if box:
|
||||
obj = obj.array
|
||||
|
||||
cat = obj.astype("category")
|
||||
|
||||
rtrip = cat.astype(object)
|
||||
assert rtrip.dtype == object
|
||||
assert type(rtrip[0]) is date
|
||||
@ -0,0 +1,391 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas._libs.arrays import NDArrayBacked
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.indexes.api import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
|
||||
|
||||
class TestCategoricalIndex:
|
||||
@pytest.fixture
|
||||
def simple_index(self) -> CategoricalIndex:
|
||||
return CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
def test_can_hold_identifiers(self):
|
||||
idx = CategoricalIndex(list("aabbca"), categories=None, ordered=False)
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
def test_insert(self, simple_index):
|
||||
ci = simple_index
|
||||
categories = ci.categories
|
||||
|
||||
# test 0th element
|
||||
result = ci.insert(0, "a")
|
||||
expected = CategoricalIndex(list("aaabbca"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# test Nth element that follows Python list behavior
|
||||
result = ci.insert(-1, "a")
|
||||
expected = CategoricalIndex(list("aabbcaa"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# test empty
|
||||
result = CategoricalIndex([], categories=categories).insert(0, "a")
|
||||
expected = CategoricalIndex(["a"], categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# invalid -> cast to object
|
||||
expected = ci.astype(object).insert(0, "d")
|
||||
result = ci.insert(0, "d").astype(object)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# GH 18295 (test missing)
|
||||
expected = CategoricalIndex(["a", np.nan, "a", "b", "c", "b"])
|
||||
for na in (np.nan, pd.NaT, None):
|
||||
result = CategoricalIndex(list("aabcb")).insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_na_mismatched_dtype(self):
|
||||
ci = CategoricalIndex([0, 1, 1])
|
||||
result = ci.insert(0, pd.NaT)
|
||||
expected = Index([pd.NaT, 0, 1, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_delete(self, simple_index):
|
||||
ci = simple_index
|
||||
categories = ci.categories
|
||||
|
||||
result = ci.delete(0)
|
||||
expected = CategoricalIndex(list("abbca"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
result = ci.delete(-1)
|
||||
expected = CategoricalIndex(list("aabbc"), categories=categories)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
with tm.external_error_raised((IndexError, ValueError)):
|
||||
# Either depending on NumPy version
|
||||
ci.delete(10)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, non_lexsorted_data",
|
||||
[[[1, 2, 3], [9, 0, 1, 2, 3]], [list("abc"), list("fabcd")]],
|
||||
)
|
||||
def test_is_monotonic(self, data, non_lexsorted_data):
|
||||
c = CategoricalIndex(data)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(data, ordered=True)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(data, categories=reversed(data))
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is True
|
||||
|
||||
c = CategoricalIndex(data, categories=reversed(data), ordered=True)
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is True
|
||||
|
||||
# test when data is neither monotonic increasing nor decreasing
|
||||
reordered_data = [data[0], data[2], data[1]]
|
||||
c = CategoricalIndex(reordered_data, categories=reversed(data))
|
||||
assert c.is_monotonic_increasing is False
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
# non lexsorted categories
|
||||
categories = non_lexsorted_data
|
||||
|
||||
c = CategoricalIndex(categories[:2], categories=categories)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
c = CategoricalIndex(categories[1:3], categories=categories)
|
||||
assert c.is_monotonic_increasing is True
|
||||
assert c.is_monotonic_decreasing is False
|
||||
|
||||
def test_has_duplicates(self):
|
||||
idx = CategoricalIndex([0, 0, 0], name="foo")
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
idx = CategoricalIndex([0, 1], categories=[2, 3], name="foo")
|
||||
assert idx.is_unique is False
|
||||
assert idx.has_duplicates is True
|
||||
|
||||
idx = CategoricalIndex([0, 1, 2, 3], categories=[1, 2, 3], name="foo")
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories, expected",
|
||||
[
|
||||
(
|
||||
[1, 1, 1],
|
||||
[1, 2, 3],
|
||||
{
|
||||
"first": np.array([False, True, True]),
|
||||
"last": np.array([True, True, False]),
|
||||
False: np.array([True, True, True]),
|
||||
},
|
||||
),
|
||||
(
|
||||
[1, 1, 1],
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.array([False, True, True]),
|
||||
"last": np.array([True, True, False]),
|
||||
False: np.array([True, True, True]),
|
||||
},
|
||||
),
|
||||
(
|
||||
[2, "a", "b"],
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.zeros(shape=(3), dtype=np.bool_),
|
||||
"last": np.zeros(shape=(3), dtype=np.bool_),
|
||||
False: np.zeros(shape=(3), dtype=np.bool_),
|
||||
},
|
||||
),
|
||||
(
|
||||
list("abb"),
|
||||
list("abc"),
|
||||
{
|
||||
"first": np.array([False, False, True]),
|
||||
"last": np.array([False, True, False]),
|
||||
False: np.array([False, True, True]),
|
||||
},
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(self, data, categories, expected):
|
||||
idx = CategoricalIndex(data, categories=categories, name="foo")
|
||||
for keep, e in expected.items():
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), e)
|
||||
e = idx[~e]
|
||||
result = idx.drop_duplicates(keep=keep)
|
||||
tm.assert_index_equal(result, e)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories, expected_data",
|
||||
[
|
||||
([1, 1, 1], [1, 2, 3], [1]),
|
||||
([1, 1, 1], list("abc"), [np.nan]),
|
||||
([1, 2, "a"], [1, 2, 3], [1, 2, np.nan]),
|
||||
([2, "a", "b"], list("abc"), [np.nan, "a", "b"]),
|
||||
],
|
||||
)
|
||||
def test_unique(self, data, categories, expected_data, ordered):
|
||||
dtype = CategoricalDtype(categories, ordered=ordered)
|
||||
|
||||
idx = CategoricalIndex(data, dtype=dtype)
|
||||
expected = CategoricalIndex(expected_data, dtype=dtype)
|
||||
tm.assert_index_equal(idx.unique(), expected)
|
||||
|
||||
def test_repr_roundtrip(self):
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
str(ci)
|
||||
tm.assert_index_equal(eval(repr(ci)), ci, exact=True)
|
||||
|
||||
# formatting
|
||||
str(ci)
|
||||
|
||||
# long format
|
||||
# this is not reprable
|
||||
ci = CategoricalIndex(np.random.default_rng(2).integers(0, 5, size=100))
|
||||
str(ci)
|
||||
|
||||
def test_isin(self):
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c"]), np.array([False, False, False, True, False, False])
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c", "a", "b"]), np.array([True] * 5 + [False])
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
ci.isin(["c", "a", "b", np.nan]), np.array([True] * 6)
|
||||
)
|
||||
|
||||
# mismatched categorical -> coerced to ndarray so doesn't matter
|
||||
result = ci.isin(ci.set_categories(list("abcdefghi")))
|
||||
expected = np.array([True] * 6)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = ci.isin(ci.set_categories(list("defghi")))
|
||||
expected = np.array([False] * 5 + [True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_isin_overlapping_intervals(self):
|
||||
# GH 34974
|
||||
idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)])
|
||||
result = CategoricalIndex(idx).isin(idx)
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_identical(self):
|
||||
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
||||
assert ci1.identical(ci1)
|
||||
assert ci1.identical(ci1.copy())
|
||||
assert not ci1.identical(ci2)
|
||||
|
||||
def test_ensure_copied_data(self):
|
||||
# gh-12309: Check the "copy" argument of each
|
||||
# Index.__new__ is honored.
|
||||
#
|
||||
# Must be tested separately from other indexes because
|
||||
# self.values is not an ndarray.
|
||||
index = CategoricalIndex(list("ab") * 5)
|
||||
|
||||
result = CategoricalIndex(index.values, copy=True)
|
||||
tm.assert_index_equal(index, result)
|
||||
assert not np.shares_memory(result._data._codes, index._data._codes)
|
||||
|
||||
result = CategoricalIndex(index.values, copy=False)
|
||||
assert result._data._codes is index._data._codes
|
||||
|
||||
|
||||
class TestCategoricalIndex2:
|
||||
def test_view_i8(self):
|
||||
# GH#25464
|
||||
ci = CategoricalIndex(list("ab") * 50)
|
||||
msg = "When changing to a larger dtype, its size must be a divisor"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.view("i8")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci._data.view("i8")
|
||||
|
||||
ci = ci[:-4] # length divisible by 8
|
||||
|
||||
res = ci.view("i8")
|
||||
expected = ci._data.codes.view("i8")
|
||||
tm.assert_numpy_array_equal(res, expected)
|
||||
|
||||
cat = ci._data
|
||||
tm.assert_numpy_array_equal(cat.view("i8"), expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, engine_type",
|
||||
[
|
||||
(np.int8, libindex.Int8Engine),
|
||||
(np.int16, libindex.Int16Engine),
|
||||
(np.int32, libindex.Int32Engine),
|
||||
(np.int64, libindex.Int64Engine),
|
||||
],
|
||||
)
|
||||
def test_engine_type(self, dtype, engine_type):
|
||||
if dtype != np.int64:
|
||||
# num. of uniques required to push CategoricalIndex.codes to a
|
||||
# dtype (128 categories required for .codes dtype to be int16 etc.)
|
||||
num_uniques = {np.int8: 1, np.int16: 128, np.int32: 32768}[dtype]
|
||||
ci = CategoricalIndex(range(num_uniques))
|
||||
else:
|
||||
# having 2**32 - 2**31 categories would be very memory-intensive,
|
||||
# so we cheat a bit with the dtype
|
||||
ci = CategoricalIndex(range(32768)) # == 2**16 - 2**(16 - 1)
|
||||
arr = ci.values._ndarray.astype("int64")
|
||||
NDArrayBacked.__init__(ci._data, arr, ci.dtype)
|
||||
assert np.issubdtype(ci.codes.dtype, dtype)
|
||||
assert isinstance(ci._engine, engine_type)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func,op_name",
|
||||
[
|
||||
(lambda idx: idx - idx, "__sub__"),
|
||||
(lambda idx: idx + idx, "__add__"),
|
||||
(lambda idx: idx - ["a", "b"], "__sub__"),
|
||||
(lambda idx: idx + ["a", "b"], "__add__"),
|
||||
(lambda idx: ["a", "b"] - idx, "__rsub__"),
|
||||
(lambda idx: ["a", "b"] + idx, "__radd__"),
|
||||
],
|
||||
)
|
||||
def test_disallow_addsub_ops(self, func, op_name):
|
||||
# GH 10039
|
||||
# set ops (+/-) raise TypeError
|
||||
idx = Index(Categorical(["a", "b"]))
|
||||
cat_or_list = "'(Categorical|list)' and '(Categorical|list)'"
|
||||
msg = "|".join(
|
||||
[
|
||||
f"cannot perform {op_name} with this index type: CategoricalIndex",
|
||||
"can only concatenate list",
|
||||
rf"unsupported operand type\(s\) for [\+-]: {cat_or_list}",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
||||
|
||||
def test_method_delegation(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.set_categories(list("cab"))
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.rename_categories(list("efg"))
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("ffggef"), categories=list("efg"))
|
||||
)
|
||||
|
||||
# GH18862 (let rename_categories take callables)
|
||||
result = ci.rename_categories(lambda x: x.upper())
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.add_categories(["d"])
|
||||
tm.assert_index_equal(
|
||||
result, CategoricalIndex(list("aabbca"), categories=list("cabd"))
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
|
||||
result = ci.remove_categories(["c"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")),
|
||||
)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.as_unordered()
|
||||
tm.assert_index_equal(result, ci)
|
||||
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
|
||||
result = ci.as_ordered()
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True),
|
||||
)
|
||||
|
||||
# invalid
|
||||
msg = "cannot use inplace with CategoricalIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.set_categories(list("cab"), inplace=True)
|
||||
|
||||
def test_remove_maintains_order(self):
|
||||
ci = CategoricalIndex(list("abcdda"), categories=list("abcd"))
|
||||
result = ci.reorder_categories(["d", "c", "b", "a"], ordered=True)
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(list("abcdda"), categories=list("dcba"), ordered=True),
|
||||
)
|
||||
result = result.remove_categories(["c"])
|
||||
tm.assert_index_equal(
|
||||
result,
|
||||
CategoricalIndex(
|
||||
["a", "b", np.nan, "d", "d", "a"], categories=list("dba"), ordered=True
|
||||
),
|
||||
)
|
||||
@ -0,0 +1,142 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexConstructors:
|
||||
def test_construction_disallows_scalar(self):
|
||||
msg = "must be called with a collection of some kind"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
CategoricalIndex(data=1, categories=list("abcd"), ordered=False)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
CategoricalIndex(categories=list("abcd"), ordered=False)
|
||||
|
||||
def test_construction(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("abcd"), ordered=False)
|
||||
categories = ci.categories
|
||||
|
||||
result = Index(ci)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
assert not result.ordered
|
||||
|
||||
result = Index(ci.values)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
assert not result.ordered
|
||||
|
||||
# empty
|
||||
result = CategoricalIndex([], categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(result.codes, np.array([], dtype="int8"))
|
||||
assert not result.ordered
|
||||
|
||||
# passing categories
|
||||
result = CategoricalIndex(list("aabbca"), categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
|
||||
c = Categorical(list("aabbca"))
|
||||
result = CategoricalIndex(c)
|
||||
tm.assert_index_equal(result.categories, Index(list("abc")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(c, categories=categories)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
ci = CategoricalIndex(c, categories=list("abcd"))
|
||||
result = CategoricalIndex(ci)
|
||||
tm.assert_index_equal(result.categories, Index(categories))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, 2, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"))
|
||||
tm.assert_index_equal(result.categories, Index(list("ab")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
||||
)
|
||||
assert not result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
||||
tm.assert_index_equal(result.categories, Index(list("ab")))
|
||||
tm.assert_numpy_array_equal(
|
||||
result.codes, np.array([0, 0, 1, 1, -1, 0], dtype="int8")
|
||||
)
|
||||
assert result.ordered
|
||||
|
||||
result = CategoricalIndex(ci, categories=list("ab"), ordered=True)
|
||||
expected = CategoricalIndex(
|
||||
ci, categories=list("ab"), ordered=True, dtype="category"
|
||||
)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# turn me to an Index
|
||||
result = Index(np.array(ci))
|
||||
assert isinstance(result, Index)
|
||||
assert not isinstance(result, CategoricalIndex)
|
||||
|
||||
def test_construction_with_dtype(self):
|
||||
# specify dtype
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("abc"), ordered=False)
|
||||
|
||||
result = Index(np.array(ci), dtype="category")
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
result = Index(np.array(ci).tolist(), dtype="category")
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
# these are generally only equal when the categories are reordered
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
|
||||
result = Index(np.array(ci), dtype="category").reorder_categories(ci.categories)
|
||||
tm.assert_index_equal(result, ci, exact=True)
|
||||
|
||||
# make sure indexes are handled
|
||||
idx = Index(range(3))
|
||||
expected = CategoricalIndex([0, 1, 2], categories=idx, ordered=True)
|
||||
result = CategoricalIndex(idx, categories=idx, ordered=True)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
def test_construction_empty_with_bool_categories(self):
|
||||
# see GH#22702
|
||||
cat = CategoricalIndex([], categories=[True, False])
|
||||
categories = sorted(cat.categories.tolist())
|
||||
assert categories == [False, True]
|
||||
|
||||
def test_construction_with_categorical_dtype(self):
|
||||
# construction with CategoricalDtype
|
||||
# GH#18109
|
||||
data, cats, ordered = "a a b b".split(), "c b a".split(), True
|
||||
dtype = CategoricalDtype(categories=cats, ordered=ordered)
|
||||
|
||||
result = CategoricalIndex(data, dtype=dtype)
|
||||
expected = CategoricalIndex(data, categories=cats, ordered=ordered)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# GH#19032
|
||||
result = Index(data, dtype=dtype)
|
||||
tm.assert_index_equal(result, expected, exact=True)
|
||||
|
||||
# error when combining categories/ordered and dtype kwargs
|
||||
msg = "Cannot specify `categories` or `ordered` together with `dtype`."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
CategoricalIndex(data, categories=cats, dtype=dtype)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
CategoricalIndex(data, ordered=ordered, dtype=dtype)
|
||||
@ -0,0 +1,96 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
class TestEquals:
|
||||
def test_equals_categorical(self):
|
||||
ci1 = CategoricalIndex(["a", "b"], categories=["a", "b"], ordered=True)
|
||||
ci2 = CategoricalIndex(["a", "b"], categories=["a", "b", "c"], ordered=True)
|
||||
|
||||
assert ci1.equals(ci1)
|
||||
assert not ci1.equals(ci2)
|
||||
assert ci1.equals(ci1.astype(object))
|
||||
assert ci1.astype(object).equals(ci1)
|
||||
|
||||
assert (ci1 == ci1).all()
|
||||
assert not (ci1 != ci1).all()
|
||||
assert not (ci1 > ci1).all()
|
||||
assert not (ci1 < ci1).all()
|
||||
assert (ci1 <= ci1).all()
|
||||
assert (ci1 >= ci1).all()
|
||||
|
||||
assert not (ci1 == 1).all()
|
||||
assert (ci1 == Index(["a", "b"])).all()
|
||||
assert (ci1 == ci1.values).all()
|
||||
|
||||
# invalid comparisons
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
ci1 == Index(["a", "b", "c"])
|
||||
|
||||
msg = "Categoricals can only be compared if 'categories' are the same"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == ci2
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == Categorical(ci1.values, ordered=False)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
ci1 == Categorical(ci1.values, categories=list("abc"))
|
||||
|
||||
# tests
|
||||
# make sure that we are testing for category inclusion properly
|
||||
ci = CategoricalIndex(list("aabca"), categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca"))
|
||||
# Same categories, but different order
|
||||
# Unordered
|
||||
assert ci.equals(CategoricalIndex(list("aabca")))
|
||||
# Ordered
|
||||
assert not ci.equals(CategoricalIndex(list("aabca"), ordered=True))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca"))
|
||||
assert not ci.equals(CategoricalIndex(list("aabca")))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
ci = CategoricalIndex(list("aabca") + [np.nan], categories=["c", "a", "b"])
|
||||
assert not ci.equals(list("aabca") + [np.nan])
|
||||
assert ci.equals(CategoricalIndex(list("aabca") + [np.nan]))
|
||||
assert not ci.equals(CategoricalIndex(list("aabca") + [np.nan], ordered=True))
|
||||
assert ci.equals(ci.copy())
|
||||
|
||||
def test_equals_categorical_unordered(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16603
|
||||
a = CategoricalIndex(["A"], categories=["A", "B"])
|
||||
b = CategoricalIndex(["A"], categories=["B", "A"])
|
||||
c = CategoricalIndex(["C"], categories=["B", "A"])
|
||||
assert a.equals(b)
|
||||
assert not a.equals(c)
|
||||
assert not b.equals(c)
|
||||
|
||||
def test_equals_non_category(self):
|
||||
# GH#37667 Case where other contains a value not among ci's
|
||||
# categories ("D") and also contains np.nan
|
||||
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
|
||||
other = Index(["A", "B", "D", np.nan])
|
||||
|
||||
assert not ci.equals(other)
|
||||
|
||||
def test_equals_multiindex(self):
|
||||
# dont raise NotImplementedError when calling is_dtype_compat
|
||||
|
||||
mi = MultiIndex.from_arrays([["A", "B", "C", "D"], range(4)])
|
||||
ci = mi.to_flat_index().astype("category")
|
||||
|
||||
assert not ci.equals(mi)
|
||||
|
||||
def test_equals_string_dtype(self, any_string_dtype):
|
||||
# GH#55364
|
||||
idx = CategoricalIndex(list("abc"), name="B")
|
||||
other = Index(["a", "b", "c"], name="B", dtype=any_string_dtype)
|
||||
assert idx.equals(other)
|
||||
@ -0,0 +1,54 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestFillNA:
|
||||
def test_fillna_categorical(self):
|
||||
# GH#11343
|
||||
idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name="x")
|
||||
# fill by value in categories
|
||||
exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name="x")
|
||||
tm.assert_index_equal(idx.fillna(1.0), exp)
|
||||
|
||||
cat = idx._data
|
||||
|
||||
# fill by value not in categories raises TypeError on EA, casts on CI
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(2.0)
|
||||
|
||||
result = idx.fillna(2.0)
|
||||
expected = idx.astype(object).fillna(2.0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_fillna_copies_with_no_nas(self):
|
||||
# Nothing to fill, should still get a copy for the Categorical method,
|
||||
# but OK to get a view on CategoricalIndex method
|
||||
ci = CategoricalIndex([0, 1, 1])
|
||||
result = ci.fillna(0)
|
||||
assert result is not ci
|
||||
assert tm.shares_memory(result, ci)
|
||||
|
||||
# But at the EA level we always get a copy.
|
||||
cat = ci._data
|
||||
result = cat.fillna(0)
|
||||
assert result._ndarray is not cat._ndarray
|
||||
assert result._ndarray.base is None
|
||||
assert not tm.shares_memory(result, cat)
|
||||
|
||||
def test_fillna_validates_with_no_nas(self):
|
||||
# We validate the fill value even if fillna is a no-op
|
||||
ci = CategoricalIndex([2, 3, 3])
|
||||
cat = ci._data
|
||||
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
res = ci.fillna(False)
|
||||
# nothing to fill, so we dont cast
|
||||
tm.assert_index_equal(res, ci)
|
||||
|
||||
# Same check directly on the Categorical
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
cat.fillna(False)
|
||||
@ -0,0 +1,120 @@
|
||||
"""
|
||||
Tests for CategoricalIndex.__repr__ and related methods.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from pandas._config import using_string_dtype
|
||||
import pandas._config.config as cf
|
||||
|
||||
from pandas import CategoricalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestCategoricalIndexRepr:
|
||||
def test_format_different_scalar_lengths(self):
|
||||
# GH#35439
|
||||
idx = CategoricalIndex(["aaaaaaaaa", "b"])
|
||||
expected = ["aaaaaaaaa", "b"]
|
||||
msg = r"CategoricalIndex\.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert idx.format() == expected
|
||||
|
||||
@pytest.mark.xfail(using_string_dtype(), reason="repr different")
|
||||
def test_string_categorical_index_repr(self):
|
||||
# short
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"])
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc'], categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"] * 10)
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
||||
'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb',
|
||||
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
||||
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["a", "bb", "ccc"] * 100)
|
||||
expected = """CategoricalIndex(['a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a',
|
||||
...
|
||||
'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc', 'a', 'bb', 'ccc'],
|
||||
categories=['a', 'bb', 'ccc'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("abcdefghijklmmo"))
|
||||
expected = """CategoricalIndex(['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l',
|
||||
'm', 'm', 'o'],
|
||||
categories=['a', 'b', 'c', 'd', ..., 'k', 'l', 'm', 'o'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# short
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
||||
'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ',
|
||||
...
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
||||
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ', 'さ', 'し',
|
||||
'す', 'せ', 'そ'],
|
||||
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# Enable Unicode option -----------------------------------------
|
||||
with cf.option_context("display.unicode.east_asian_width", True):
|
||||
# short
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"])
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう'], categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
assert repr(idx) == expected
|
||||
|
||||
# multiple lines
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 10)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
||||
'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# truncated
|
||||
idx = CategoricalIndex(["あ", "いい", "ううう"] * 100)
|
||||
expected = """CategoricalIndex(['あ', 'いい', 'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい',
|
||||
'ううう', 'あ',
|
||||
...
|
||||
'ううう', 'あ', 'いい', 'ううう', 'あ', 'いい', 'ううう',
|
||||
'あ', 'いい', 'ううう'],
|
||||
categories=['あ', 'いい', 'ううう'], ordered=False, dtype='category', length=300)""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
|
||||
# larger categories
|
||||
idx = CategoricalIndex(list("あいうえおかきくけこさしすせそ"))
|
||||
expected = """CategoricalIndex(['あ', 'い', 'う', 'え', 'お', 'か', 'き', 'く', 'け', 'こ',
|
||||
'さ', 'し', 'す', 'せ', 'そ'],
|
||||
categories=['あ', 'い', 'う', 'え', ..., 'し', 'す', 'せ', 'そ'], ordered=False, dtype='category')""" # noqa: E501
|
||||
|
||||
assert repr(idx) == expected
|
||||
@ -0,0 +1,420 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take_fill_value(self):
|
||||
# GH 12631
|
||||
|
||||
# numeric category
|
||||
idx = CategoricalIndex([1, 2, 3], name="xxx")
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = CategoricalIndex([2, 1, np.nan], categories=[1, 2, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = CategoricalIndex([2, 1, 3], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# object category
|
||||
idx = CategoricalIndex(
|
||||
list("CBA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = CategoricalIndex(
|
||||
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = CategoricalIndex(
|
||||
["B", "C", np.nan], categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = CategoricalIndex(
|
||||
list("BCA"), categories=list("ABC"), ordered=True, name="xxx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
tm.assert_categorical_equal(result.values, expected.values)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_fill_value_datetime(self):
|
||||
# datetime category
|
||||
idx = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
|
||||
idx = CategoricalIndex(idx)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
expected = CategoricalIndex(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = pd.DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
|
||||
exp_cats = pd.DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"])
|
||||
expected = CategoricalIndex(expected, categories=exp_cats)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = pd.DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx"
|
||||
)
|
||||
expected = CategoricalIndex(expected)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "index -5 is out of bounds for (axis 0 with )?size 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
idx = CategoricalIndex([1, 2, 3], name="foo")
|
||||
indices = [1, 0, -1]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
def test_get_loc(self):
|
||||
# GH 12531
|
||||
cidx1 = CategoricalIndex(list("abcde"), categories=list("edabc"))
|
||||
idx1 = Index(list("abcde"))
|
||||
assert cidx1.get_loc("a") == idx1.get_loc("a")
|
||||
assert cidx1.get_loc("e") == idx1.get_loc("e")
|
||||
|
||||
for i in [cidx1, idx1]:
|
||||
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
||||
i.get_loc("NOT-EXIST")
|
||||
|
||||
# non-unique
|
||||
cidx2 = CategoricalIndex(list("aacded"), categories=list("edabc"))
|
||||
idx2 = Index(list("aacded"))
|
||||
|
||||
# results in bool array
|
||||
res = cidx2.get_loc("d")
|
||||
tm.assert_numpy_array_equal(res, idx2.get_loc("d"))
|
||||
tm.assert_numpy_array_equal(
|
||||
res, np.array([False, False, False, True, False, True])
|
||||
)
|
||||
# unique element results in scalar
|
||||
res = cidx2.get_loc("e")
|
||||
assert res == idx2.get_loc("e")
|
||||
assert res == 4
|
||||
|
||||
for i in [cidx2, idx2]:
|
||||
with pytest.raises(KeyError, match="'NOT-EXIST'"):
|
||||
i.get_loc("NOT-EXIST")
|
||||
|
||||
# non-unique, sliceable
|
||||
cidx3 = CategoricalIndex(list("aabbb"), categories=list("abc"))
|
||||
idx3 = Index(list("aabbb"))
|
||||
|
||||
# results in slice
|
||||
res = cidx3.get_loc("a")
|
||||
assert res == idx3.get_loc("a")
|
||||
assert res == slice(0, 2, None)
|
||||
|
||||
res = cidx3.get_loc("b")
|
||||
assert res == idx3.get_loc("b")
|
||||
assert res == slice(2, 5, None)
|
||||
|
||||
for i in [cidx3, idx3]:
|
||||
with pytest.raises(KeyError, match="'c'"):
|
||||
i.get_loc("c")
|
||||
|
||||
def test_get_loc_unique(self):
|
||||
cidx = CategoricalIndex(list("abc"))
|
||||
result = cidx.get_loc("b")
|
||||
assert result == 1
|
||||
|
||||
def test_get_loc_monotonic_nonunique(self):
|
||||
cidx = CategoricalIndex(list("abbc"))
|
||||
result = cidx.get_loc("b")
|
||||
expected = slice(1, 3, None)
|
||||
assert result == expected
|
||||
|
||||
def test_get_loc_nonmonotonic_nonunique(self):
|
||||
cidx = CategoricalIndex(list("abcb"))
|
||||
result = cidx.get_loc("b")
|
||||
expected = np.array([False, True, False, True], dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_loc_nan(self):
|
||||
# GH#41933
|
||||
ci = CategoricalIndex(["A", "B", np.nan])
|
||||
res = ci.get_loc(np.nan)
|
||||
|
||||
assert res == 2
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
def test_get_indexer_base(self):
|
||||
# Determined by cat ordering.
|
||||
idx = CategoricalIndex(list("cab"), categories=list("cab"))
|
||||
expected = np.arange(len(idx), dtype=np.intp)
|
||||
|
||||
actual = idx.get_indexer(idx)
|
||||
tm.assert_numpy_array_equal(expected, actual)
|
||||
|
||||
with pytest.raises(ValueError, match="Invalid fill method"):
|
||||
idx.get_indexer(idx, method="invalid")
|
||||
|
||||
def test_get_indexer_requires_unique(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
oidx = Index(np.array(ci))
|
||||
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
|
||||
for n in [1, 2, 5, len(ci)]:
|
||||
finder = oidx[np.random.default_rng(2).integers(0, len(ci), size=n)]
|
||||
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
ci.get_indexer(finder)
|
||||
|
||||
# see gh-17323
|
||||
#
|
||||
# Even when indexer is equal to the
|
||||
# members in the index, we should
|
||||
# respect duplicates instead of taking
|
||||
# the fast-track path.
|
||||
for finder in [list("aabbca"), list("aababca")]:
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
ci.get_indexer(finder)
|
||||
|
||||
def test_get_indexer_non_unique(self):
|
||||
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
||||
idx2 = CategoricalIndex(list("abf"))
|
||||
|
||||
for indexer in [idx2, list("abf"), Index(list("abf"))]:
|
||||
msg = "Reindexing only valid with uniquely valued Index objects"
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx1.get_indexer(indexer)
|
||||
|
||||
r1, _ = idx1.get_indexer_non_unique(indexer)
|
||||
expected = np.array([0, 1, 2, -1], dtype=np.intp)
|
||||
tm.assert_almost_equal(r1, expected)
|
||||
|
||||
def test_get_indexer_method(self):
|
||||
idx1 = CategoricalIndex(list("aabcde"), categories=list("edabc"))
|
||||
idx2 = CategoricalIndex(list("abf"))
|
||||
|
||||
msg = "method pad not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="pad")
|
||||
msg = "method backfill not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="backfill")
|
||||
|
||||
msg = "method nearest not yet implemented for CategoricalIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx2.get_indexer(idx1, method="nearest")
|
||||
|
||||
def test_get_indexer_array(self):
|
||||
arr = np.array(
|
||||
[Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")],
|
||||
dtype=object,
|
||||
)
|
||||
cats = [Timestamp("1999-12-31 00:00:00"), Timestamp("2000-12-31 00:00:00")]
|
||||
ci = CategoricalIndex(cats, categories=cats, ordered=False, dtype="category")
|
||||
result = ci.get_indexer(arr)
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_same_order(self):
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["a", "b"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_same_categories_different_order(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/19551
|
||||
ci = CategoricalIndex(["a", "b"], categories=["a", "b"])
|
||||
|
||||
result = ci.get_indexer(CategoricalIndex(["b", "b"], categories=["b", "a"]))
|
||||
expected = np.array([1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_nans_in_index_and_target(self):
|
||||
# GH 45361
|
||||
ci = CategoricalIndex([1, 2, np.nan, 3])
|
||||
other1 = [2, 3, 4, np.nan]
|
||||
res1 = ci.get_indexer(other1)
|
||||
expected1 = np.array([1, 3, -1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res1, expected1)
|
||||
other2 = [1, 4, 2, 3]
|
||||
res2 = ci.get_indexer(other2)
|
||||
expected2 = np.array([0, -1, 1, 3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(res2, expected2)
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where(self, listlike_box):
|
||||
klass = listlike_box
|
||||
|
||||
i = CategoricalIndex(list("aabbca"), categories=list("cab"), ordered=False)
|
||||
cond = [True] * len(i)
|
||||
expected = i
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * (len(i) - 1)
|
||||
expected = CategoricalIndex([np.nan] + i[1:].tolist(), categories=i.categories)
|
||||
result = i.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_non_categories(self):
|
||||
ci = CategoricalIndex(["a", "b", "c", "d"])
|
||||
mask = np.array([True, False, True, False])
|
||||
|
||||
result = ci.where(mask, 2)
|
||||
expected = Index(["a", 2, "c", 2], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = "Cannot setitem on a Categorical with a new category"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# Test the Categorical method directly
|
||||
ci._data._where(mask, 2)
|
||||
|
||||
|
||||
class TestContains:
|
||||
def test_contains(self):
|
||||
ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False)
|
||||
|
||||
assert "a" in ci
|
||||
assert "z" not in ci
|
||||
assert "e" not in ci
|
||||
assert np.nan not in ci
|
||||
|
||||
# assert codes NOT in index
|
||||
assert 0 not in ci
|
||||
assert 1 not in ci
|
||||
|
||||
def test_contains_nan(self):
|
||||
ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef"))
|
||||
assert np.nan in ci
|
||||
|
||||
@pytest.mark.parametrize("unwrap", [True, False])
|
||||
def test_contains_na_dtype(self, unwrap):
|
||||
dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT)
|
||||
pi = dti.to_period("D")
|
||||
tdi = dti - dti[-1]
|
||||
ci = CategoricalIndex(dti)
|
||||
|
||||
obj = ci
|
||||
if unwrap:
|
||||
obj = ci._data
|
||||
|
||||
assert np.nan in obj
|
||||
assert None in obj
|
||||
assert pd.NaT in obj
|
||||
assert np.datetime64("NaT") in obj
|
||||
assert np.timedelta64("NaT") not in obj
|
||||
|
||||
obj2 = CategoricalIndex(tdi)
|
||||
if unwrap:
|
||||
obj2 = obj2._data
|
||||
|
||||
assert np.nan in obj2
|
||||
assert None in obj2
|
||||
assert pd.NaT in obj2
|
||||
assert np.datetime64("NaT") not in obj2
|
||||
assert np.timedelta64("NaT") in obj2
|
||||
|
||||
obj3 = CategoricalIndex(pi)
|
||||
if unwrap:
|
||||
obj3 = obj3._data
|
||||
|
||||
assert np.nan in obj3
|
||||
assert None in obj3
|
||||
assert pd.NaT in obj3
|
||||
assert np.datetime64("NaT") not in obj3
|
||||
assert np.timedelta64("NaT") not in obj3
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"item, expected",
|
||||
[
|
||||
(pd.Interval(0, 1), True),
|
||||
(1.5, True),
|
||||
(pd.Interval(0.5, 1.5), False),
|
||||
("a", False),
|
||||
(Timestamp(1), False),
|
||||
(pd.Timedelta(1), False),
|
||||
],
|
||||
ids=str,
|
||||
)
|
||||
def test_contains_interval(self, item, expected):
|
||||
# GH 23705
|
||||
ci = CategoricalIndex(IntervalIndex.from_breaks(range(3)))
|
||||
result = item in ci
|
||||
assert result is expected
|
||||
|
||||
def test_contains_list(self):
|
||||
# GH#21729
|
||||
idx = CategoricalIndex([1, 2, 3])
|
||||
|
||||
assert "a" not in idx
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a"] in idx
|
||||
|
||||
with pytest.raises(TypeError, match="unhashable type"):
|
||||
["a", "b"] in idx
|
||||
@ -0,0 +1,144 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, categories",
|
||||
[
|
||||
(list("abcbca"), list("cab")),
|
||||
(pd.interval_range(0, 3).repeat(3), pd.interval_range(0, 3)),
|
||||
],
|
||||
ids=["string", "interval"],
|
||||
)
|
||||
def test_map_str(data, categories, ordered):
|
||||
# GH 31202 - override base class since we want to maintain categorical/ordered
|
||||
index = CategoricalIndex(data, categories=categories, ordered=ordered)
|
||||
result = index.map(str)
|
||||
expected = CategoricalIndex(
|
||||
map(str, data), categories=map(str, categories), ordered=ordered
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_map():
|
||||
ci = CategoricalIndex(list("ABABC"), categories=list("CBA"), ordered=True)
|
||||
result = ci.map(lambda x: x.lower())
|
||||
exp = CategoricalIndex(list("ababc"), categories=list("cba"), ordered=True)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
ci = CategoricalIndex(
|
||||
list("ABABC"), categories=list("BAC"), ordered=False, name="XXX"
|
||||
)
|
||||
result = ci.map(lambda x: x.lower())
|
||||
exp = CategoricalIndex(
|
||||
list("ababc"), categories=list("bac"), ordered=False, name="XXX"
|
||||
)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# GH 12766: Return an index not an array
|
||||
tm.assert_index_equal(
|
||||
ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name="XXX")
|
||||
)
|
||||
|
||||
# change categories dtype
|
||||
ci = CategoricalIndex(list("ABABC"), categories=list("BAC"), ordered=False)
|
||||
|
||||
def f(x):
|
||||
return {"A": 10, "B": 20, "C": 30}.get(x)
|
||||
|
||||
result = ci.map(f)
|
||||
exp = CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = ci.map(Series([10, 20, 30], index=["A", "B", "C"]))
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
result = ci.map({"A": 10, "B": 20, "C": 30})
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
def test_map_with_categorical_series():
|
||||
# GH 12756
|
||||
a = Index([1, 2, 3, 4])
|
||||
b = Series(["even", "odd", "even", "odd"], dtype="category")
|
||||
c = Series(["even", "odd", "even", "odd"])
|
||||
|
||||
exp = CategoricalIndex(["odd", "even", "odd", np.nan])
|
||||
tm.assert_index_equal(a.map(b), exp)
|
||||
exp = Index(["odd", "even", "odd", np.nan])
|
||||
tm.assert_index_equal(a.map(c), exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, np.nan])),
|
||||
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
CategoricalIndex([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False, False, False]),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_ignore(data, f, expected): # GH 24241
|
||||
values = CategoricalIndex(data)
|
||||
result = values.map(f, na_action="ignore")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("data", "f", "expected"),
|
||||
(
|
||||
([1, 1, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 2, np.nan], pd.isna, Index([False, False, True])),
|
||||
([1, 1, np.nan], {1: False}, CategoricalIndex([False, False, np.nan])),
|
||||
([1, 2, np.nan], {1: False, 2: False}, Index([False, False, np.nan])),
|
||||
(
|
||||
[1, 1, np.nan],
|
||||
Series([False, False]),
|
||||
CategoricalIndex([False, False, np.nan]),
|
||||
),
|
||||
(
|
||||
[1, 2, np.nan],
|
||||
Series([False, False, False]),
|
||||
Index([False, False, np.nan]),
|
||||
),
|
||||
),
|
||||
)
|
||||
def test_map_with_nan_none(data, f, expected): # GH 24241
|
||||
values = CategoricalIndex(data)
|
||||
result = values.map(f, na_action=None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_map_with_dict_or_series():
|
||||
orig_values = ["a", "B", 1, "a"]
|
||||
new_values = ["one", 2, 3.0, "one"]
|
||||
cur_index = CategoricalIndex(orig_values, name="XXX")
|
||||
expected = CategoricalIndex(new_values, name="XXX", categories=[3.0, 2, "one"])
|
||||
|
||||
mapper = Series(new_values[:-1], index=orig_values[:-1])
|
||||
result = cur_index.map(mapper)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
mapper = dict(zip(orig_values[:-1], new_values[:-1]))
|
||||
result = cur_index.map(mapper)
|
||||
# Order of categories in result can be different
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,78 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestReindex:
|
||||
def test_reindex_list_non_unique(self):
|
||||
# GH#11586
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(["a", "c"])
|
||||
|
||||
def test_reindex_categorical_non_unique(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(Categorical(["a", "c"]))
|
||||
|
||||
def test_reindex_list_non_unique_unused_category(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(["a", "c"])
|
||||
|
||||
def test_reindex_categorical_non_unique_unused_category(self):
|
||||
msg = "cannot reindex on an axis with duplicate labels"
|
||||
ci = CategoricalIndex(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ci.reindex(Categorical(["a", "c"]))
|
||||
|
||||
def test_reindex_duplicate_target(self):
|
||||
# See GH25459
|
||||
cat = CategoricalIndex(["a", "b", "c"], categories=["a", "b", "c", "d"])
|
||||
res, indexer = cat.reindex(["a", "c", "c"])
|
||||
exp = Index(["a", "c", "c"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
||||
|
||||
res, indexer = cat.reindex(
|
||||
CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
||||
)
|
||||
exp = CategoricalIndex(["a", "c", "c"], categories=["a", "b", "c", "d"])
|
||||
tm.assert_index_equal(res, exp, exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 2, 2], dtype=np.intp))
|
||||
|
||||
def test_reindex_empty_index(self):
|
||||
# See GH16770
|
||||
c = CategoricalIndex([])
|
||||
res, indexer = c.reindex(["a", "b"])
|
||||
tm.assert_index_equal(res, Index(["a", "b"]), exact=True)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([-1, -1], dtype=np.intp))
|
||||
|
||||
def test_reindex_categorical_added_category(self):
|
||||
# GH 42424
|
||||
ci = CategoricalIndex(
|
||||
[Interval(0, 1, closed="right"), Interval(1, 2, closed="right")],
|
||||
ordered=True,
|
||||
)
|
||||
ci_add = CategoricalIndex(
|
||||
[
|
||||
Interval(0, 1, closed="right"),
|
||||
Interval(1, 2, closed="right"),
|
||||
Interval(2, 3, closed="right"),
|
||||
Interval(3, 4, closed="right"),
|
||||
],
|
||||
ordered=True,
|
||||
)
|
||||
result, _ = ci.reindex(ci_add)
|
||||
expected = ci_add
|
||||
tm.assert_index_equal(expected, result)
|
||||
@ -0,0 +1,18 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("na_value", [None, np.nan])
|
||||
def test_difference_with_na(na_value):
|
||||
# GH 57318
|
||||
ci = CategoricalIndex(["a", "b", "c", None])
|
||||
other = Index(["c", na_value])
|
||||
result = ci.difference(other)
|
||||
expected = CategoricalIndex(["a", "b"], categories=["a", "b", "c"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,41 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Series,
|
||||
array,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, False])
|
||||
def sort(request):
|
||||
"""
|
||||
Valid values for the 'sort' parameter used in the Index
|
||||
setops methods (intersection, union, etc.)
|
||||
|
||||
Caution:
|
||||
Don't confuse this one with the "sort" fixture used
|
||||
for DataFrame.append or concat. That one has
|
||||
parameters [True, False].
|
||||
|
||||
We can't combine them as sort=True is not permitted
|
||||
in the Index setops methods.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=["D", "3D", "-3D", "h", "2h", "-2h", "min", "2min", "s", "-3s"])
|
||||
def freq_sample(request):
|
||||
"""
|
||||
Valid values for 'freq' parameter used to create date_range and
|
||||
timedelta_range..
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[list, tuple, np.array, array, Series])
|
||||
def listlike_box(request):
|
||||
"""
|
||||
Types that may be passed as the indexer to searchsorted.
|
||||
"""
|
||||
return request.param
|
||||
@ -0,0 +1,89 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
PeriodIndex,
|
||||
Series,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class DropDuplicates:
|
||||
def test_drop_duplicates_metadata(self, idx):
|
||||
# GH#10115
|
||||
result = idx.drop_duplicates()
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert idx.freq == result.freq
|
||||
|
||||
idx_dup = idx.append(idx)
|
||||
result = idx_dup.drop_duplicates()
|
||||
|
||||
expected = idx
|
||||
if not isinstance(idx, PeriodIndex):
|
||||
# freq is reset except for PeriodIndex
|
||||
assert idx_dup.freq is None
|
||||
assert result.freq is None
|
||||
expected = idx._with_freq(None)
|
||||
else:
|
||||
assert result.freq == expected.freq
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected, index",
|
||||
[
|
||||
(
|
||||
"first",
|
||||
np.concatenate(([False] * 10, [True] * 5)),
|
||||
np.arange(0, 10, dtype=np.int64),
|
||||
),
|
||||
(
|
||||
"last",
|
||||
np.concatenate(([True] * 5, [False] * 10)),
|
||||
np.arange(5, 15, dtype=np.int64),
|
||||
),
|
||||
(
|
||||
False,
|
||||
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
|
||||
np.arange(5, 10, dtype=np.int64),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_drop_duplicates(self, keep, expected, index, idx):
|
||||
# to check Index/Series compat
|
||||
idx = idx.append(idx[:5])
|
||||
|
||||
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
|
||||
expected = idx[~expected]
|
||||
|
||||
result = idx.drop_duplicates(keep=keep)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Series(idx).drop_duplicates(keep=keep)
|
||||
expected = Series(expected, index=index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
class TestDropDuplicatesPeriodIndex(DropDuplicates):
|
||||
@pytest.fixture(params=["D", "3D", "h", "2h", "min", "2min", "s", "3s"])
|
||||
def freq(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def idx(self, freq):
|
||||
return period_range("2011-01-01", periods=10, freq=freq, name="idx")
|
||||
|
||||
|
||||
class TestDropDuplicatesDatetimeIndex(DropDuplicates):
|
||||
@pytest.fixture
|
||||
def idx(self, freq_sample):
|
||||
return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
|
||||
|
||||
|
||||
class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
|
||||
@pytest.fixture
|
||||
def idx(self, freq_sample):
|
||||
return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|
||||
@ -0,0 +1,181 @@
|
||||
"""
|
||||
Tests shared for DatetimeIndex/TimedeltaIndex/PeriodIndex
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class EqualsTests:
|
||||
def test_not_equals_numeric(self, index):
|
||||
assert not index.equals(Index(index.asi8))
|
||||
assert not index.equals(Index(index.asi8.astype("u8")))
|
||||
assert not index.equals(Index(index.asi8).astype("f8"))
|
||||
|
||||
def test_equals(self, index):
|
||||
assert index.equals(index)
|
||||
assert index.equals(index.astype(object))
|
||||
assert index.equals(CategoricalIndex(index))
|
||||
assert index.equals(CategoricalIndex(index.astype(object)))
|
||||
|
||||
def test_not_equals_non_arraylike(self, index):
|
||||
assert not index.equals(list(index))
|
||||
|
||||
def test_not_equals_strings(self, index):
|
||||
other = Index([str(x) for x in index], dtype=object)
|
||||
assert not index.equals(other)
|
||||
assert not index.equals(CategoricalIndex(other))
|
||||
|
||||
def test_not_equals_misc_strs(self, index):
|
||||
other = Index(list("abc"))
|
||||
assert not index.equals(other)
|
||||
|
||||
|
||||
class TestPeriodIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return period_range("2013-01-01", periods=5, freq="D")
|
||||
|
||||
# TODO: de-duplicate with other test_equals2 methods
|
||||
@pytest.mark.parametrize("freq", ["D", "M"])
|
||||
def test_equals2(self, freq):
|
||||
# GH#13107
|
||||
idx = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq=freq)
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = PeriodIndex(["2011-01-01", "2011-01-02", "NaT"], freq="h")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = PeriodIndex._simple_new(
|
||||
idx._values._simple_new(idx._values.asi8, dtype=pd.PeriodDtype("h"))
|
||||
)
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
|
||||
class TestDatetimeIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return date_range("2013-01-01", periods=5)
|
||||
|
||||
def test_equals2(self):
|
||||
# GH#13107
|
||||
idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific")
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# same internal, different tz
|
||||
idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific")
|
||||
tm.assert_numpy_array_equal(idx.asi8, idx3.asi8)
|
||||
assert not idx.equals(idx3)
|
||||
assert not idx.equals(idx3.copy())
|
||||
assert not idx.equals(idx3.astype(object))
|
||||
assert not idx.astype(object).equals(idx3)
|
||||
assert not idx.equals(list(idx3))
|
||||
assert not idx.equals(pd.Series(idx3))
|
||||
|
||||
# check that we do not raise when comparing with OutOfBounds objects
|
||||
oob = Index([datetime(2500, 1, 1)] * 3, dtype=object)
|
||||
assert not idx.equals(oob)
|
||||
assert not idx2.equals(oob)
|
||||
assert not idx3.equals(oob)
|
||||
|
||||
# check that we do not raise when comparing with OutOfBounds dt64
|
||||
oob2 = oob.map(np.datetime64)
|
||||
assert not idx.equals(oob2)
|
||||
assert not idx2.equals(oob2)
|
||||
assert not idx3.equals(oob2)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_not_equals_bday(self, freq):
|
||||
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
|
||||
assert not rng.equals(list(rng))
|
||||
|
||||
|
||||
class TestTimedeltaIndexEquals(EqualsTests):
|
||||
@pytest.fixture
|
||||
def index(self):
|
||||
return timedelta_range("1 day", periods=10)
|
||||
|
||||
def test_equals2(self):
|
||||
# GH#13107
|
||||
idx = TimedeltaIndex(["1 days", "2 days", "NaT"])
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.astype(object).equals(idx)
|
||||
assert idx.astype(object).equals(idx.astype(object))
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(pd.Series(idx))
|
||||
|
||||
idx2 = TimedeltaIndex(["2 days", "1 days", "NaT"])
|
||||
assert not idx.equals(idx2)
|
||||
assert not idx.equals(idx2.copy())
|
||||
assert not idx.equals(idx2.astype(object))
|
||||
assert not idx.astype(object).equals(idx2)
|
||||
assert not idx.astype(object).equals(idx2.astype(object))
|
||||
assert not idx.equals(list(idx2))
|
||||
assert not idx.equals(pd.Series(idx2))
|
||||
|
||||
# Check that we dont raise OverflowError on comparisons outside the
|
||||
# implementation range GH#28532
|
||||
oob = Index([timedelta(days=10**6)] * 3, dtype=object)
|
||||
assert not idx.equals(oob)
|
||||
assert not idx2.equals(oob)
|
||||
|
||||
oob2 = Index([np.timedelta64(x) for x in oob], dtype=object)
|
||||
assert (oob == oob2).all()
|
||||
assert not idx.equals(oob2)
|
||||
assert not idx2.equals(oob2)
|
||||
|
||||
oob3 = oob.map(np.timedelta64)
|
||||
assert (oob3 == oob).all()
|
||||
assert not idx.equals(oob3)
|
||||
assert not idx2.equals(oob3)
|
||||
@ -0,0 +1,45 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
dtlike_dtypes = [
|
||||
np.dtype("timedelta64[ns]"),
|
||||
np.dtype("datetime64[ns]"),
|
||||
pd.DatetimeTZDtype("ns", "Asia/Tokyo"),
|
||||
pd.PeriodDtype("ns"),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ldtype", dtlike_dtypes)
|
||||
@pytest.mark.parametrize("rdtype", dtlike_dtypes)
|
||||
def test_get_indexer_non_unique_wrong_dtype(ldtype, rdtype):
|
||||
vals = np.tile(3600 * 10**9 * np.arange(3, dtype=np.int64), 2)
|
||||
|
||||
def construct(dtype):
|
||||
if dtype is dtlike_dtypes[-1]:
|
||||
# PeriodArray will try to cast ints to strings
|
||||
return DatetimeIndex(vals).astype(dtype)
|
||||
return Index(vals, dtype=dtype)
|
||||
|
||||
left = construct(ldtype)
|
||||
right = construct(rdtype)
|
||||
|
||||
result = left.get_indexer_non_unique(right)
|
||||
|
||||
if ldtype is rdtype:
|
||||
ex1 = np.array([0, 3, 1, 4, 2, 5] * 2, dtype=np.intp)
|
||||
ex2 = np.array([], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result[0], ex1)
|
||||
tm.assert_numpy_array_equal(result[1], ex2)
|
||||
|
||||
else:
|
||||
no_matches = np.array([-1] * 6, dtype=np.intp)
|
||||
missing = np.arange(6, dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result[0], no_matches)
|
||||
tm.assert_numpy_array_equal(result[1], missing)
|
||||
@ -0,0 +1,46 @@
|
||||
from pandas import (
|
||||
Index,
|
||||
NaT,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
def test_is_monotonic_with_nat():
|
||||
# GH#31437
|
||||
# PeriodIndex.is_monotonic_increasing should behave analogously to DatetimeIndex,
|
||||
# in particular never be monotonic when we have NaT
|
||||
dti = date_range("2016-01-01", periods=3)
|
||||
pi = dti.to_period("D")
|
||||
tdi = Index(dti.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi, pi._engine, dti, dti._engine, tdi, tdi._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert obj.is_monotonic_increasing
|
||||
assert obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
|
||||
dti1 = dti.insert(0, NaT)
|
||||
pi1 = dti1.to_period("D")
|
||||
tdi1 = Index(dti1.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi1, pi1._engine, dti1, dti1._engine, tdi1, tdi1._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
|
||||
dti2 = dti.insert(3, NaT)
|
||||
pi2 = dti2.to_period("h")
|
||||
tdi2 = Index(dti2.view("timedelta64[ns]"))
|
||||
|
||||
for obj in [pi2, pi2._engine, dti2, dti2._engine, tdi2, tdi2._engine]:
|
||||
if isinstance(obj, Index):
|
||||
# i.e. not Engines
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_increasing
|
||||
assert not obj.is_monotonic_decreasing
|
||||
assert obj.is_unique
|
||||
@ -0,0 +1,53 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class NATests:
|
||||
def test_nat(self, index_without_na):
|
||||
empty_index = index_without_na[:0]
|
||||
|
||||
index_with_na = index_without_na.copy(deep=True)
|
||||
index_with_na._data[1] = NaT
|
||||
|
||||
assert empty_index._na_value is NaT
|
||||
assert index_with_na._na_value is NaT
|
||||
assert index_without_na._na_value is NaT
|
||||
|
||||
idx = index_without_na
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, False]))
|
||||
assert idx.hasnans is False
|
||||
|
||||
idx = index_with_na
|
||||
assert idx._can_hold_na
|
||||
|
||||
tm.assert_numpy_array_equal(idx._isnan, np.array([False, True]))
|
||||
assert idx.hasnans is True
|
||||
|
||||
|
||||
class TestDatetimeIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
return DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz)
|
||||
|
||||
|
||||
class TestTimedeltaIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self):
|
||||
return TimedeltaIndex(["1 days", "2 days"])
|
||||
|
||||
|
||||
class TestPeriodIndexNA(NATests):
|
||||
@pytest.fixture
|
||||
def index_without_na(self):
|
||||
return PeriodIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
@ -0,0 +1,315 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
TimedeltaIndex,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def check_freq_ascending(ordered, orig, ascending):
|
||||
"""
|
||||
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
||||
when the original index is generated (or generate-able) with
|
||||
period_range/date_range/timedelta_range.
|
||||
"""
|
||||
if isinstance(ordered, PeriodIndex):
|
||||
assert ordered.freq == orig.freq
|
||||
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
||||
if ascending:
|
||||
assert ordered.freq.n == orig.freq.n
|
||||
else:
|
||||
assert ordered.freq.n == -1 * orig.freq.n
|
||||
|
||||
|
||||
def check_freq_nonmonotonic(ordered, orig):
|
||||
"""
|
||||
Check the expected freq on a PeriodIndex/DatetimeIndex/TimedeltaIndex
|
||||
when the original index is _not_ generated (or generate-able) with
|
||||
period_range/date_range//timedelta_range.
|
||||
"""
|
||||
if isinstance(ordered, PeriodIndex):
|
||||
assert ordered.freq == orig.freq
|
||||
elif isinstance(ordered, (DatetimeIndex, TimedeltaIndex)):
|
||||
assert ordered.freq is None
|
||||
|
||||
|
||||
class TestSortValues:
|
||||
@pytest.fixture(params=[DatetimeIndex, TimedeltaIndex, PeriodIndex])
|
||||
def non_monotonic_idx(self, request):
|
||||
if request.param is DatetimeIndex:
|
||||
return DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
elif request.param is PeriodIndex:
|
||||
dti = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"])
|
||||
return dti.to_period("D")
|
||||
else:
|
||||
return TimedeltaIndex(
|
||||
["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]
|
||||
)
|
||||
|
||||
def test_argmin_argmax(self, non_monotonic_idx):
|
||||
assert non_monotonic_idx.argmin() == 1
|
||||
assert non_monotonic_idx.argmax() == 0
|
||||
|
||||
def test_sort_values(self, non_monotonic_idx):
|
||||
idx = non_monotonic_idx
|
||||
ordered = idx.sort_values()
|
||||
assert ordered.is_monotonic_increasing
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
assert ordered[::-1].is_monotonic_increasing
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True)
|
||||
assert ordered.is_monotonic_increasing
|
||||
tm.assert_numpy_array_equal(dexer, np.array([1, 2, 0], dtype=np.intp))
|
||||
|
||||
ordered, dexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
assert ordered[::-1].is_monotonic_increasing
|
||||
tm.assert_numpy_array_equal(dexer, np.array([0, 2, 1], dtype=np.intp))
|
||||
|
||||
def check_sort_values_with_freq(self, idx):
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
check_freq_ascending(ordered, idx, True)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_ascending(ordered, idx, False)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, idx)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2], dtype=np.intp))
|
||||
check_freq_ascending(ordered, idx, True)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
expected = idx[::-1]
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0], dtype=np.intp))
|
||||
check_freq_ascending(ordered, idx, False)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "h"])
|
||||
def test_sort_values_with_freq_timedeltaindex(self, freq):
|
||||
# GH#10295
|
||||
idx = timedelta_range(start=f"1{freq}", periods=3, freq=freq).rename("idx")
|
||||
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
DatetimeIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx"
|
||||
),
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="h",
|
||||
name="tzidx",
|
||||
tz="Asia/Tokyo",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_with_freq_datetimeindex(self, idx):
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["D", "2D", "4D"])
|
||||
def test_sort_values_with_freq_periodindex(self, freq):
|
||||
# here with_freq refers to being period_range-like
|
||||
idx = PeriodIndex(
|
||||
["2011-01-01", "2011-01-02", "2011-01-03"], freq=freq, name="idx"
|
||||
)
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx",
|
||||
[
|
||||
PeriodIndex(["2011", "2012", "2013"], name="pidx", freq="Y"),
|
||||
Index([2011, 2012, 2013], name="idx"), # for compatibility check
|
||||
],
|
||||
)
|
||||
def test_sort_values_with_freq_periodindex2(self, idx):
|
||||
# here with_freq indicates this is period_range-like
|
||||
self.check_sort_values_with_freq(idx)
|
||||
|
||||
def check_sort_values_without_freq(self, idx, expected):
|
||||
ordered = idx.sort_values(na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
if not idx.isna().any():
|
||||
ordered = idx.sort_values()
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
if not idx.isna().any():
|
||||
ordered, indexer = idx.sort_values(return_indexer=True)
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
|
||||
exp = np.array([0, 4, 3, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered, indexer = idx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
|
||||
exp = np.array([2, 1, 3, 0, 4], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(indexer, exp)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
def test_sort_values_without_freq_timedeltaindex(self):
|
||||
# GH#10295
|
||||
|
||||
idx = TimedeltaIndex(
|
||||
["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1"
|
||||
)
|
||||
expected = TimedeltaIndex(
|
||||
["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1"
|
||||
)
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"index_dates,expected_dates",
|
||||
[
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"],
|
||||
["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_without_freq_datetimeindex(
|
||||
self, index_dates, expected_dates, tz_naive_fixture
|
||||
):
|
||||
tz = tz_naive_fixture
|
||||
|
||||
# without freq
|
||||
idx = DatetimeIndex(index_dates, tz=tz, name="idx")
|
||||
expected = DatetimeIndex(expected_dates, tz=tz, name="idx")
|
||||
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx,expected",
|
||||
[
|
||||
(
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
"2011-01-02",
|
||||
"2011-01-01",
|
||||
],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
),
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-01",
|
||||
"2011-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
],
|
||||
freq="D",
|
||||
name="idx1",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
"2011-01-02",
|
||||
"2011-01-01",
|
||||
],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
),
|
||||
PeriodIndex(
|
||||
[
|
||||
"2011-01-01",
|
||||
"2011-01-01",
|
||||
"2011-01-02",
|
||||
"2011-01-03",
|
||||
"2011-01-05",
|
||||
],
|
||||
freq="D",
|
||||
name="idx2",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
[NaT, "2011-01-03", "2011-01-05", "2011-01-02", NaT],
|
||||
freq="D",
|
||||
name="idx3",
|
||||
),
|
||||
PeriodIndex(
|
||||
[NaT, NaT, "2011-01-02", "2011-01-03", "2011-01-05"],
|
||||
freq="D",
|
||||
name="idx3",
|
||||
),
|
||||
),
|
||||
(
|
||||
PeriodIndex(
|
||||
["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y"
|
||||
),
|
||||
PeriodIndex(
|
||||
["2011", "2011", "2012", "2013", "2015"], name="pidx", freq="Y"
|
||||
),
|
||||
),
|
||||
(
|
||||
# For compatibility check
|
||||
Index([2011, 2013, 2015, 2012, 2011], name="idx"),
|
||||
Index([2011, 2011, 2012, 2013, 2015], name="idx"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sort_values_without_freq_periodindex(self, idx, expected):
|
||||
# here without_freq means not generateable by period_range
|
||||
self.check_sort_values_without_freq(idx, expected)
|
||||
|
||||
def test_sort_values_without_freq_periodindex_nat(self):
|
||||
# doesn't quite fit into check_sort_values_without_freq
|
||||
idx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D")
|
||||
expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D")
|
||||
|
||||
ordered = idx.sort_values(na_position="first")
|
||||
tm.assert_index_equal(ordered, expected)
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
ordered = idx.sort_values(ascending=False)
|
||||
tm.assert_index_equal(ordered, expected[::-1])
|
||||
check_freq_nonmonotonic(ordered, idx)
|
||||
|
||||
|
||||
def test_order_stability_compat():
|
||||
# GH#35922. sort_values is stable both for normal and datetime-like Index
|
||||
pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="Y")
|
||||
iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx")
|
||||
ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False)
|
||||
ordered2, indexer2 = iidx.sort_values(return_indexer=True, ascending=False)
|
||||
tm.assert_numpy_array_equal(indexer1, indexer2)
|
||||
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Series,
|
||||
TimedeltaIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestValueCounts:
|
||||
# GH#7735
|
||||
|
||||
def test_value_counts_unique_datetimeindex(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
orig = date_range("2011-01-01 09:00", freq="h", periods=10, tz=tz)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def test_value_counts_unique_timedeltaindex(self):
|
||||
orig = timedelta_range("1 days 09:00:00", freq="h", periods=10)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def test_value_counts_unique_periodindex(self):
|
||||
orig = period_range("2011-01-01 09:00", freq="h", periods=10)
|
||||
self._check_value_counts_with_repeats(orig)
|
||||
|
||||
def _check_value_counts_with_repeats(self, orig):
|
||||
# create repeated values, 'n'th element is repeated by n+1 times
|
||||
idx = type(orig)(
|
||||
np.repeat(orig._values, range(1, len(orig) + 1)), dtype=orig.dtype
|
||||
)
|
||||
|
||||
exp_idx = orig[::-1]
|
||||
if not isinstance(exp_idx, PeriodIndex):
|
||||
exp_idx = exp_idx._with_freq(None)
|
||||
expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64", name="count")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), orig)
|
||||
|
||||
def test_value_counts_unique_datetimeindex2(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
idx = DatetimeIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
tz=tz,
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def test_value_counts_unique_timedeltaindex2(self):
|
||||
idx = TimedeltaIndex(
|
||||
[
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 09:00:00",
|
||||
"1 days 08:00:00",
|
||||
"1 days 08:00:00",
|
||||
NaT,
|
||||
]
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def test_value_counts_unique_periodindex2(self):
|
||||
idx = PeriodIndex(
|
||||
[
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 09:00",
|
||||
"2013-01-01 08:00",
|
||||
"2013-01-01 08:00",
|
||||
NaT,
|
||||
],
|
||||
freq="h",
|
||||
)
|
||||
self._check_value_counts_dropna(idx)
|
||||
|
||||
def _check_value_counts_dropna(self, idx):
|
||||
exp_idx = idx[[2, 3]]
|
||||
expected = Series([3, 2], index=exp_idx, name="count")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(), expected)
|
||||
|
||||
exp_idx = idx[[2, 3, -1]]
|
||||
expected = Series([3, 2, 1], index=exp_idx, name="count")
|
||||
|
||||
for obj in [idx, Series(idx)]:
|
||||
tm.assert_series_equal(obj.value_counts(dropna=False), expected)
|
||||
|
||||
tm.assert_index_equal(idx.unique(), exp_idx)
|
||||
@ -0,0 +1,30 @@
|
||||
from datetime import timedelta
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Timestamp,
|
||||
date_range,
|
||||
isna,
|
||||
)
|
||||
|
||||
|
||||
class TestAsOf:
|
||||
def test_asof_partial(self):
|
||||
index = date_range("2010-01-01", periods=2, freq="ME")
|
||||
expected = Timestamp("2010-02-28")
|
||||
result = index.asof("2010-02")
|
||||
assert result == expected
|
||||
assert not isinstance(result, Index)
|
||||
|
||||
def test_asof(self):
|
||||
index = date_range("2020-01-01", periods=10)
|
||||
|
||||
dt = index[0]
|
||||
assert index.asof(dt) == dt
|
||||
assert isna(index.asof(dt - timedelta(1)))
|
||||
|
||||
dt = index[-1]
|
||||
assert index.asof(dt + timedelta(1)) == dt
|
||||
|
||||
dt = index[0].to_pydatetime()
|
||||
assert isinstance(index.asof(dt), Timestamp)
|
||||
@ -0,0 +1,338 @@
|
||||
from datetime import datetime
|
||||
|
||||
import dateutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
PeriodIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_dti_astype_asobject_around_dst_transition(self, tzstr):
|
||||
# GH#1345
|
||||
|
||||
# dates around a dst transition
|
||||
rng = date_range("2/13/2010", "5/6/2010", tz=tzstr)
|
||||
|
||||
objs = rng.astype(object)
|
||||
for i, x in enumerate(objs):
|
||||
exval = rng[i]
|
||||
assert x == exval
|
||||
assert x.tzinfo == exval.tzinfo
|
||||
|
||||
objs = rng.astype(object)
|
||||
for i, x in enumerate(objs):
|
||||
exval = rng[i]
|
||||
assert x == exval
|
||||
assert x.tzinfo == exval.tzinfo
|
||||
|
||||
def test_astype(self):
|
||||
# GH 13149, GH 13209
|
||||
idx = DatetimeIndex(
|
||||
["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx"
|
||||
)
|
||||
|
||||
result = idx.astype(object)
|
||||
expected = Index(
|
||||
[Timestamp("2016-05-16")] + [NaT] * 3, dtype=object, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx.astype(np.int64)
|
||||
expected = Index(
|
||||
[1463356800000000000] + [-9223372036854775808] * 3,
|
||||
dtype=np.int64,
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype2(self):
|
||||
rng = date_range("1/1/2000", periods=10, name="idx")
|
||||
result = rng.astype("i8")
|
||||
tm.assert_index_equal(result, Index(rng.asi8, name="idx"))
|
||||
tm.assert_numpy_array_equal(result.values, rng.asi8)
|
||||
|
||||
def test_astype_uint(self):
|
||||
arr = date_range("2000", periods=2, name="idx")
|
||||
|
||||
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
|
||||
arr.astype("uint64")
|
||||
with pytest.raises(TypeError, match=r"Do obj.astype\('int64'\)"):
|
||||
arr.astype("uint32")
|
||||
|
||||
def test_astype_with_tz(self):
|
||||
# with tz
|
||||
rng = date_range("1/1/2000", periods=10, tz="US/Eastern")
|
||||
msg = "Cannot use .astype to convert from timezone-aware"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# deprecated
|
||||
rng.astype("datetime64[ns]")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# check DatetimeArray while we're here deprecated
|
||||
rng._data.astype("datetime64[ns]")
|
||||
|
||||
def test_astype_tzaware_to_tzaware(self):
|
||||
# GH 18951: tz-aware to tz-aware
|
||||
idx = date_range("20170101", periods=4, tz="US/Pacific")
|
||||
result = idx.astype("datetime64[ns, US/Eastern]")
|
||||
expected = date_range("20170101 03:00:00", periods=4, tz="US/Eastern")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
def test_astype_tznaive_to_tzaware(self):
|
||||
# GH 18951: tz-naive to tz-aware
|
||||
idx = date_range("20170101", periods=4)
|
||||
idx = idx._with_freq(None) # tz_localize does not preserve freq
|
||||
msg = "Cannot use .astype to convert from timezone-naive"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# dt64->dt64tz deprecated
|
||||
idx.astype("datetime64[ns, US/Eastern]")
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# dt64->dt64tz deprecated
|
||||
idx._data.astype("datetime64[ns, US/Eastern]")
|
||||
|
||||
def test_astype_str_nat(self, using_infer_string):
|
||||
# GH 13149, GH 13209
|
||||
# verify that we are returning NaT as a string (and not unicode)
|
||||
|
||||
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan])
|
||||
result = idx.astype(str)
|
||||
if using_infer_string:
|
||||
expected = Index(["2016-05-16", None, None, None], dtype="str")
|
||||
else:
|
||||
expected = Index(["2016-05-16", "NaT", "NaT", "NaT"], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_str(self):
|
||||
# test astype string - #10442
|
||||
dti = date_range("2012-01-01", periods=4, name="test_name")
|
||||
result = dti.astype(str)
|
||||
expected = Index(
|
||||
["2012-01-01", "2012-01-02", "2012-01-03", "2012-01-04"],
|
||||
name="test_name",
|
||||
dtype="str",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_str_tz_and_name(self):
|
||||
# test astype string with tz and name
|
||||
dti = date_range("2012-01-01", periods=3, name="test_name", tz="US/Eastern")
|
||||
result = dti.astype(str)
|
||||
expected = Index(
|
||||
[
|
||||
"2012-01-01 00:00:00-05:00",
|
||||
"2012-01-02 00:00:00-05:00",
|
||||
"2012-01-03 00:00:00-05:00",
|
||||
],
|
||||
name="test_name",
|
||||
dtype="str",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_str_freq_and_name(self):
|
||||
# test astype string with freqH and name
|
||||
dti = date_range("1/1/2011", periods=3, freq="h", name="test_name")
|
||||
result = dti.astype(str)
|
||||
expected = Index(
|
||||
["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"],
|
||||
name="test_name",
|
||||
dtype="str",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_str_freq_and_tz(self):
|
||||
# test astype string with freqH and timezone
|
||||
dti = date_range(
|
||||
"3/6/2012 00:00", periods=2, freq="h", tz="Europe/London", name="test_name"
|
||||
)
|
||||
result = dti.astype(str)
|
||||
expected = Index(
|
||||
["2012-03-06 00:00:00+00:00", "2012-03-06 01:00:00+00:00"],
|
||||
dtype="str",
|
||||
name="test_name",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_astype_datetime64(self):
|
||||
# GH 13149, GH 13209
|
||||
idx = DatetimeIndex(
|
||||
["2016-05-16", "NaT", NaT, np.nan], dtype="M8[ns]", name="idx"
|
||||
)
|
||||
|
||||
result = idx.astype("datetime64[ns]")
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is not idx
|
||||
|
||||
result = idx.astype("datetime64[ns]", copy=False)
|
||||
tm.assert_index_equal(result, idx)
|
||||
assert result is idx
|
||||
|
||||
idx_tz = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan], tz="EST", name="idx")
|
||||
msg = "Cannot use .astype to convert from timezone-aware"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
# dt64tz->dt64 deprecated
|
||||
result = idx_tz.astype("datetime64[ns]")
|
||||
|
||||
def test_astype_object(self):
|
||||
rng = date_range("1/1/2000", periods=20)
|
||||
|
||||
casted = rng.astype("O")
|
||||
exp_values = list(rng)
|
||||
|
||||
tm.assert_index_equal(casted, Index(exp_values, dtype=np.object_))
|
||||
assert casted.tolist() == exp_values
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
|
||||
def test_astype_object_tz(self, tz):
|
||||
idx = date_range(start="2013-01-01", periods=4, freq="ME", name="idx", tz=tz)
|
||||
expected_list = [
|
||||
Timestamp("2013-01-31", tz=tz),
|
||||
Timestamp("2013-02-28", tz=tz),
|
||||
Timestamp("2013-03-31", tz=tz),
|
||||
Timestamp("2013-04-30", tz=tz),
|
||||
]
|
||||
expected = Index(expected_list, dtype=object, name="idx")
|
||||
result = idx.astype(object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert idx.tolist() == expected_list
|
||||
|
||||
def test_astype_object_with_nat(self):
|
||||
idx = DatetimeIndex(
|
||||
[datetime(2013, 1, 1), datetime(2013, 1, 2), NaT, datetime(2013, 1, 4)],
|
||||
name="idx",
|
||||
)
|
||||
expected_list = [
|
||||
Timestamp("2013-01-01"),
|
||||
Timestamp("2013-01-02"),
|
||||
NaT,
|
||||
Timestamp("2013-01-04"),
|
||||
]
|
||||
expected = Index(expected_list, dtype=object, name="idx")
|
||||
result = idx.astype(object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert idx.tolist() == expected_list
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[float, "timedelta64", "timedelta64[ns]", "datetime64", "datetime64[D]"],
|
||||
)
|
||||
def test_astype_raises(self, dtype):
|
||||
# GH 13149, GH 13209
|
||||
idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.nan])
|
||||
msg = "Cannot cast DatetimeIndex to dtype"
|
||||
if dtype == "datetime64":
|
||||
msg = "Casting to unit-less dtype 'datetime64' is not supported"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.astype(dtype)
|
||||
|
||||
def test_index_convert_to_datetime_array(self):
|
||||
def _check_rng(rng):
|
||||
converted = rng.to_pydatetime()
|
||||
assert isinstance(converted, np.ndarray)
|
||||
for x, stamp in zip(converted, rng):
|
||||
assert isinstance(x, datetime)
|
||||
assert x == stamp.to_pydatetime()
|
||||
assert x.tzinfo == stamp.tzinfo
|
||||
|
||||
rng = date_range("20090415", "20090519")
|
||||
rng_eastern = date_range("20090415", "20090519", tz="US/Eastern")
|
||||
rng_utc = date_range("20090415", "20090519", tz="utc")
|
||||
|
||||
_check_rng(rng)
|
||||
_check_rng(rng_eastern)
|
||||
_check_rng(rng_utc)
|
||||
|
||||
def test_index_convert_to_datetime_array_explicit_pytz(self):
|
||||
def _check_rng(rng):
|
||||
converted = rng.to_pydatetime()
|
||||
assert isinstance(converted, np.ndarray)
|
||||
for x, stamp in zip(converted, rng):
|
||||
assert isinstance(x, datetime)
|
||||
assert x == stamp.to_pydatetime()
|
||||
assert x.tzinfo == stamp.tzinfo
|
||||
|
||||
rng = date_range("20090415", "20090519")
|
||||
rng_eastern = date_range("20090415", "20090519", tz=pytz.timezone("US/Eastern"))
|
||||
rng_utc = date_range("20090415", "20090519", tz=pytz.utc)
|
||||
|
||||
_check_rng(rng)
|
||||
_check_rng(rng_eastern)
|
||||
_check_rng(rng_utc)
|
||||
|
||||
def test_index_convert_to_datetime_array_dateutil(self):
|
||||
def _check_rng(rng):
|
||||
converted = rng.to_pydatetime()
|
||||
assert isinstance(converted, np.ndarray)
|
||||
for x, stamp in zip(converted, rng):
|
||||
assert isinstance(x, datetime)
|
||||
assert x == stamp.to_pydatetime()
|
||||
assert x.tzinfo == stamp.tzinfo
|
||||
|
||||
rng = date_range("20090415", "20090519")
|
||||
rng_eastern = date_range("20090415", "20090519", tz="dateutil/US/Eastern")
|
||||
rng_utc = date_range("20090415", "20090519", tz=dateutil.tz.tzutc())
|
||||
|
||||
_check_rng(rng)
|
||||
_check_rng(rng_eastern)
|
||||
_check_rng(rng_utc)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz, dtype",
|
||||
[["US/Pacific", "datetime64[ns, US/Pacific]"], [None, "datetime64[ns]"]],
|
||||
)
|
||||
def test_integer_index_astype_datetime(self, tz, dtype):
|
||||
# GH 20997, 20964, 24559
|
||||
val = [Timestamp("2018-01-01", tz=tz).as_unit("ns")._value]
|
||||
result = Index(val, name="idx").astype(dtype)
|
||||
expected = DatetimeIndex(["2018-01-01"], tz=tz, name="idx").as_unit("ns")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_dti_astype_period(self):
|
||||
idx = DatetimeIndex([NaT, "2011-01-01", "2011-02-01"], name="idx")
|
||||
|
||||
res = idx.astype("period[M]")
|
||||
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="M", name="idx")
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
res = idx.astype("period[3M]")
|
||||
exp = PeriodIndex(["NaT", "2011-01", "2011-02"], freq="3M", name="idx")
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
|
||||
class TestAstype:
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
def test_astype_category(self, tz):
|
||||
obj = date_range("2000", periods=2, tz=tz, name="idx")
|
||||
result = obj.astype("category")
|
||||
dti = DatetimeIndex(["2000-01-01", "2000-01-02"], tz=tz).as_unit("ns")
|
||||
expected = pd.CategoricalIndex(
|
||||
dti,
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = obj._data.astype("category")
|
||||
expected = expected.values
|
||||
tm.assert_categorical_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Central"])
|
||||
def test_astype_array_fallback(self, tz):
|
||||
obj = date_range("2000", periods=2, tz=tz, name="idx")
|
||||
result = obj.astype(bool)
|
||||
expected = Index(np.array([True, True]), name="idx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = obj._data.astype(bool)
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@ -0,0 +1,141 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDelete:
|
||||
def test_delete(self, unit):
|
||||
idx = date_range(
|
||||
start="2000-01-01", periods=5, freq="ME", name="idx", unit=unit
|
||||
)
|
||||
|
||||
# preserve freq
|
||||
expected_0 = date_range(
|
||||
start="2000-02-01", periods=4, freq="ME", name="idx", unit=unit
|
||||
)
|
||||
expected_4 = date_range(
|
||||
start="2000-01-01", periods=4, freq="ME", name="idx", unit=unit
|
||||
)
|
||||
|
||||
# reset freq to None
|
||||
expected_1 = DatetimeIndex(
|
||||
["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"],
|
||||
freq=None,
|
||||
name="idx",
|
||||
).as_unit(unit)
|
||||
|
||||
cases = {
|
||||
0: expected_0,
|
||||
-5: expected_0,
|
||||
-1: expected_4,
|
||||
4: expected_4,
|
||||
1: expected_1,
|
||||
}
|
||||
for n, expected in cases.items():
|
||||
result = idx.delete(n)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
with pytest.raises((IndexError, ValueError), match="out of bounds"):
|
||||
# either depending on numpy version
|
||||
idx.delete(5)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"])
|
||||
def test_delete2(self, tz):
|
||||
idx = date_range(
|
||||
start="2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz
|
||||
)
|
||||
|
||||
expected = date_range(
|
||||
start="2000-01-01 10:00", periods=9, freq="h", name="idx", tz=tz
|
||||
)
|
||||
result = idx.delete(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freqstr == "h"
|
||||
assert result.tz == expected.tz
|
||||
|
||||
expected = date_range(
|
||||
start="2000-01-01 09:00", periods=9, freq="h", name="idx", tz=tz
|
||||
)
|
||||
result = idx.delete(-1)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freqstr == "h"
|
||||
assert result.tz == expected.tz
|
||||
|
||||
def test_delete_slice(self, unit):
|
||||
idx = date_range(
|
||||
start="2000-01-01", periods=10, freq="D", name="idx", unit=unit
|
||||
)
|
||||
|
||||
# preserve freq
|
||||
expected_0_2 = date_range(
|
||||
start="2000-01-04", periods=7, freq="D", name="idx", unit=unit
|
||||
)
|
||||
expected_7_9 = date_range(
|
||||
start="2000-01-01", periods=7, freq="D", name="idx", unit=unit
|
||||
)
|
||||
|
||||
# reset freq to None
|
||||
expected_3_5 = DatetimeIndex(
|
||||
[
|
||||
"2000-01-01",
|
||||
"2000-01-02",
|
||||
"2000-01-03",
|
||||
"2000-01-07",
|
||||
"2000-01-08",
|
||||
"2000-01-09",
|
||||
"2000-01-10",
|
||||
],
|
||||
freq=None,
|
||||
name="idx",
|
||||
).as_unit(unit)
|
||||
|
||||
cases = {
|
||||
(0, 1, 2): expected_0_2,
|
||||
(7, 8, 9): expected_7_9,
|
||||
(3, 4, 5): expected_3_5,
|
||||
}
|
||||
for n, expected in cases.items():
|
||||
result = idx.delete(n)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.delete(slice(n[0], n[-1] + 1))
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
# TODO: belongs in Series.drop tests?
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Pacific"])
|
||||
def test_delete_slice2(self, tz, unit):
|
||||
dti = date_range(
|
||||
"2000-01-01 09:00", periods=10, freq="h", name="idx", tz=tz, unit=unit
|
||||
)
|
||||
ts = Series(
|
||||
1,
|
||||
index=dti,
|
||||
)
|
||||
# preserve freq
|
||||
result = ts.drop(ts.index[:5]).index
|
||||
expected = dti[5:]
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
assert result.tz == expected.tz
|
||||
|
||||
# reset freq to None
|
||||
result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index
|
||||
expected = dti[::2]._with_freq(None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
assert result.tz == expected.tz
|
||||
@ -0,0 +1,125 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
date_range,
|
||||
factorize,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndexFactorize:
|
||||
def test_factorize(self):
|
||||
idx1 = DatetimeIndex(
|
||||
["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"]
|
||||
)
|
||||
|
||||
exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
|
||||
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
|
||||
|
||||
arr, idx = idx1.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
assert idx.freq == exp_idx.freq
|
||||
|
||||
arr, idx = idx1.factorize(sort=True)
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
assert idx.freq == exp_idx.freq
|
||||
|
||||
# tz must be preserved
|
||||
idx1 = idx1.tz_localize("Asia/Tokyo")
|
||||
exp_idx = exp_idx.tz_localize("Asia/Tokyo")
|
||||
|
||||
arr, idx = idx1.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
assert idx.freq == exp_idx.freq
|
||||
|
||||
idx2 = DatetimeIndex(
|
||||
["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]
|
||||
)
|
||||
|
||||
exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
|
||||
exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
|
||||
arr, idx = idx2.factorize(sort=True)
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
assert idx.freq == exp_idx.freq
|
||||
|
||||
exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
|
||||
exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
|
||||
arr, idx = idx2.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, exp_idx)
|
||||
assert idx.freq == exp_idx.freq
|
||||
|
||||
def test_factorize_preserves_freq(self):
|
||||
# GH#38120 freq should be preserved
|
||||
idx3 = date_range("2000-01", periods=4, freq="ME", tz="Asia/Tokyo")
|
||||
exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
|
||||
|
||||
arr, idx = idx3.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, idx3)
|
||||
assert idx.freq == idx3.freq
|
||||
|
||||
arr, idx = factorize(idx3)
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
tm.assert_index_equal(idx, idx3)
|
||||
assert idx.freq == idx3.freq
|
||||
|
||||
def test_factorize_tz(self, tz_naive_fixture, index_or_series):
|
||||
tz = tz_naive_fixture
|
||||
# GH#13750
|
||||
base = date_range("2016-11-05", freq="h", periods=100, tz=tz)
|
||||
idx = base.repeat(5)
|
||||
|
||||
exp_arr = np.arange(100, dtype=np.intp).repeat(5)
|
||||
|
||||
obj = index_or_series(idx)
|
||||
|
||||
arr, res = obj.factorize()
|
||||
tm.assert_numpy_array_equal(arr, exp_arr)
|
||||
expected = base._with_freq(None)
|
||||
tm.assert_index_equal(res, expected)
|
||||
assert res.freq == expected.freq
|
||||
|
||||
def test_factorize_dst(self, index_or_series):
|
||||
# GH#13750
|
||||
idx = date_range("2016-11-06", freq="h", periods=12, tz="US/Eastern")
|
||||
obj = index_or_series(idx)
|
||||
|
||||
arr, res = obj.factorize()
|
||||
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
|
||||
tm.assert_index_equal(res, idx)
|
||||
if index_or_series is Index:
|
||||
assert res.freq == idx.freq
|
||||
|
||||
idx = date_range("2016-06-13", freq="h", periods=12, tz="US/Eastern")
|
||||
obj = index_or_series(idx)
|
||||
|
||||
arr, res = obj.factorize()
|
||||
tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
|
||||
tm.assert_index_equal(res, idx)
|
||||
if index_or_series is Index:
|
||||
assert res.freq == idx.freq
|
||||
|
||||
@pytest.mark.parametrize("sort", [True, False])
|
||||
def test_factorize_no_freq_non_nano(self, tz_naive_fixture, sort):
|
||||
# GH#51978 case that does not go through the fastpath based on
|
||||
# non-None freq
|
||||
tz = tz_naive_fixture
|
||||
idx = date_range("2016-11-06", freq="h", periods=5, tz=tz)[[0, 4, 1, 3, 2]]
|
||||
exp_codes, exp_uniques = idx.factorize(sort=sort)
|
||||
|
||||
res_codes, res_uniques = idx.as_unit("s").factorize(sort=sort)
|
||||
|
||||
tm.assert_numpy_array_equal(res_codes, exp_codes)
|
||||
tm.assert_index_equal(res_uniques, exp_uniques.as_unit("s"))
|
||||
|
||||
res_codes, res_uniques = idx.as_unit("s").to_series().factorize(sort=sort)
|
||||
tm.assert_numpy_array_equal(res_codes, exp_codes)
|
||||
tm.assert_index_equal(res_uniques, exp_uniques.as_unit("s"))
|
||||
@ -0,0 +1,62 @@
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndexFillNA:
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "Asia/Tokyo"])
|
||||
def test_fillna_datetime64(self, tz):
|
||||
# GH 11343
|
||||
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"])
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"]
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
|
||||
|
||||
# tz mismatch
|
||||
exp = pd.Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01 09:00"),
|
||||
pd.Timestamp("2011-01-01 10:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 11:00"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
|
||||
|
||||
# object
|
||||
exp = pd.Index(
|
||||
[pd.Timestamp("2011-01-01 09:00"), "x", pd.Timestamp("2011-01-01 11:00")],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna("x"), exp)
|
||||
|
||||
idx = pd.DatetimeIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], tz=tz)
|
||||
|
||||
exp = pd.DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], tz=tz
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00", tz=tz)), exp)
|
||||
|
||||
exp = pd.Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01 09:00", tz=tz),
|
||||
pd.Timestamp("2011-01-01 10:00"),
|
||||
pd.Timestamp("2011-01-01 11:00", tz=tz),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna(pd.Timestamp("2011-01-01 10:00")), exp)
|
||||
|
||||
# object
|
||||
exp = pd.Index(
|
||||
[
|
||||
pd.Timestamp("2011-01-01 09:00", tz=tz),
|
||||
"x",
|
||||
pd.Timestamp("2011-01-01 11:00", tz=tz),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(idx.fillna("x"), exp)
|
||||
@ -0,0 +1,265 @@
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestInsert:
|
||||
@pytest.mark.parametrize("null", [None, np.nan, np.datetime64("NaT"), NaT, NA])
|
||||
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
|
||||
def test_insert_nat(self, tz, null):
|
||||
# GH#16537, GH#18295 (test missing)
|
||||
|
||||
idx = DatetimeIndex(["2017-01-01"], tz=tz)
|
||||
expected = DatetimeIndex(["NaT", "2017-01-01"], tz=tz)
|
||||
if tz is not None and isinstance(null, np.datetime64):
|
||||
expected = Index([null, idx[0]], dtype=object)
|
||||
|
||||
res = idx.insert(0, null)
|
||||
tm.assert_index_equal(res, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "UTC", "US/Eastern"])
|
||||
def test_insert_invalid_na(self, tz):
|
||||
idx = DatetimeIndex(["2017-01-01"], tz=tz)
|
||||
|
||||
item = np.timedelta64("NaT")
|
||||
result = idx.insert(0, item)
|
||||
expected = Index([item] + list(idx), dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_empty_preserves_freq(self, tz_naive_fixture):
|
||||
# GH#33573
|
||||
tz = tz_naive_fixture
|
||||
dti = DatetimeIndex([], tz=tz, freq="D")
|
||||
item = Timestamp("2017-04-05").tz_localize(tz)
|
||||
|
||||
result = dti.insert(0, item)
|
||||
assert result.freq == dti.freq
|
||||
|
||||
# But not when we insert an item that doesn't conform to freq
|
||||
dti = DatetimeIndex([], tz=tz, freq="W-THU")
|
||||
result = dti.insert(0, item)
|
||||
assert result.freq is None
|
||||
|
||||
def test_insert(self, unit):
|
||||
idx = DatetimeIndex(
|
||||
["2000-01-04", "2000-01-01", "2000-01-02"], name="idx"
|
||||
).as_unit(unit)
|
||||
|
||||
result = idx.insert(2, datetime(2000, 1, 5))
|
||||
exp = DatetimeIndex(
|
||||
["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx"
|
||||
).as_unit(unit)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
# insertion of non-datetime should coerce to object index
|
||||
result = idx.insert(1, "inserted")
|
||||
expected = Index(
|
||||
[
|
||||
datetime(2000, 1, 4),
|
||||
"inserted",
|
||||
datetime(2000, 1, 1),
|
||||
datetime(2000, 1, 2),
|
||||
],
|
||||
name="idx",
|
||||
)
|
||||
assert not isinstance(result, DatetimeIndex)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
|
||||
def test_insert2(self, unit):
|
||||
idx = date_range("1/1/2000", periods=3, freq="ME", name="idx", unit=unit)
|
||||
|
||||
# preserve freq
|
||||
expected_0 = DatetimeIndex(
|
||||
["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"],
|
||||
name="idx",
|
||||
freq="ME",
|
||||
).as_unit(unit)
|
||||
expected_3 = DatetimeIndex(
|
||||
["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"],
|
||||
name="idx",
|
||||
freq="ME",
|
||||
).as_unit(unit)
|
||||
|
||||
# reset freq to None
|
||||
expected_1_nofreq = DatetimeIndex(
|
||||
["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"],
|
||||
name="idx",
|
||||
freq=None,
|
||||
).as_unit(unit)
|
||||
expected_3_nofreq = DatetimeIndex(
|
||||
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
|
||||
name="idx",
|
||||
freq=None,
|
||||
).as_unit(unit)
|
||||
|
||||
cases = [
|
||||
(0, datetime(1999, 12, 31), expected_0),
|
||||
(-3, datetime(1999, 12, 31), expected_0),
|
||||
(3, datetime(2000, 4, 30), expected_3),
|
||||
(1, datetime(2000, 1, 31), expected_1_nofreq),
|
||||
(3, datetime(2000, 1, 2), expected_3_nofreq),
|
||||
]
|
||||
|
||||
for n, d, expected in cases:
|
||||
result = idx.insert(n, d)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
|
||||
def test_insert3(self, unit):
|
||||
idx = date_range("1/1/2000", periods=3, freq="ME", name="idx", unit=unit)
|
||||
|
||||
# reset freq to None
|
||||
result = idx.insert(3, datetime(2000, 1, 2))
|
||||
expected = DatetimeIndex(
|
||||
["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"],
|
||||
name="idx",
|
||||
freq=None,
|
||||
).as_unit(unit)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq is None
|
||||
|
||||
def test_insert4(self, unit):
|
||||
for tz in ["US/Pacific", "Asia/Singapore"]:
|
||||
idx = date_range(
|
||||
"1/1/2000 09:00", periods=6, freq="h", tz=tz, name="idx", unit=unit
|
||||
)
|
||||
# preserve freq
|
||||
expected = date_range(
|
||||
"1/1/2000 09:00", periods=7, freq="h", tz=tz, name="idx", unit=unit
|
||||
)
|
||||
for d in [
|
||||
Timestamp("2000-01-01 15:00", tz=tz),
|
||||
pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)),
|
||||
]:
|
||||
result = idx.insert(6, d)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.freq == expected.freq
|
||||
assert result.tz == expected.tz
|
||||
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
"2000-01-01 09:00",
|
||||
"2000-01-01 10:00",
|
||||
"2000-01-01 11:00",
|
||||
"2000-01-01 12:00",
|
||||
"2000-01-01 13:00",
|
||||
"2000-01-01 14:00",
|
||||
"2000-01-01 10:00",
|
||||
],
|
||||
name="idx",
|
||||
tz=tz,
|
||||
freq=None,
|
||||
).as_unit(unit)
|
||||
# reset freq to None
|
||||
for d in [
|
||||
Timestamp("2000-01-01 10:00", tz=tz),
|
||||
pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)),
|
||||
]:
|
||||
result = idx.insert(6, d)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == expected.name
|
||||
assert result.tz == expected.tz
|
||||
assert result.freq is None
|
||||
|
||||
# TODO: also changes DataFrame.__setitem__ with expansion
|
||||
def test_insert_mismatched_tzawareness(self):
|
||||
# see GH#7299
|
||||
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
|
||||
|
||||
# mismatched tz-awareness
|
||||
item = Timestamp("2000-01-04")
|
||||
result = idx.insert(3, item)
|
||||
expected = Index(
|
||||
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# mismatched tz-awareness
|
||||
item = datetime(2000, 1, 4)
|
||||
result = idx.insert(3, item)
|
||||
expected = Index(
|
||||
list(idx[:3]) + [item] + list(idx[3:]), dtype=object, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# TODO: also changes DataFrame.__setitem__ with expansion
|
||||
def test_insert_mismatched_tz(self):
|
||||
# see GH#7299
|
||||
# pre-2.0 with mismatched tzs we would cast to object
|
||||
idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx")
|
||||
|
||||
# mismatched tz -> cast to object (could reasonably cast to same tz or UTC)
|
||||
item = Timestamp("2000-01-04", tz="US/Eastern")
|
||||
result = idx.insert(3, item)
|
||||
expected = Index(
|
||||
list(idx[:3]) + [item.tz_convert(idx.tz)] + list(idx[3:]),
|
||||
name="idx",
|
||||
)
|
||||
assert expected.dtype == idx.dtype
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
item = datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern"))
|
||||
result = idx.insert(3, item)
|
||||
expected = Index(
|
||||
list(idx[:3]) + [item.astimezone(idx.tzinfo)] + list(idx[3:]),
|
||||
name="idx",
|
||||
)
|
||||
assert expected.dtype == idx.dtype
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"item", [0, np.int64(0), np.float64(0), np.array(0), np.timedelta64(456)]
|
||||
)
|
||||
def test_insert_mismatched_types_raises(self, tz_aware_fixture, item):
|
||||
# GH#33703 dont cast these to dt64
|
||||
tz = tz_aware_fixture
|
||||
dti = date_range("2019-11-04", periods=9, freq="-1D", name=9, tz=tz)
|
||||
|
||||
result = dti.insert(1, item)
|
||||
|
||||
if isinstance(item, np.ndarray):
|
||||
assert item.item() == 0
|
||||
expected = Index([dti[0], 0] + list(dti[1:]), dtype=object, name=9)
|
||||
else:
|
||||
expected = Index([dti[0], item] + list(dti[1:]), dtype=object, name=9)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_castable_str(self, tz_aware_fixture):
|
||||
# GH#33703
|
||||
tz = tz_aware_fixture
|
||||
dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz)
|
||||
|
||||
value = "2019-11-05"
|
||||
result = dti.insert(0, value)
|
||||
|
||||
ts = Timestamp(value).tz_localize(tz)
|
||||
expected = DatetimeIndex([ts] + list(dti), dtype=dti.dtype, name=9)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_insert_non_castable_str(self, tz_aware_fixture):
|
||||
# GH#33703
|
||||
tz = tz_aware_fixture
|
||||
dti = date_range("2019-11-04", periods=3, freq="-1D", name=9, tz=tz)
|
||||
|
||||
value = "foo"
|
||||
result = dti.insert(0, value)
|
||||
|
||||
expected = Index(["foo"] + list(dti), dtype=object, name=9)
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,28 @@
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_isocalendar_returns_correct_values_close_to_new_year_with_tz():
|
||||
# GH#6538: Check that DatetimeIndex and its TimeStamp elements
|
||||
# return the same weekofyear accessor close to new year w/ tz
|
||||
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
|
||||
dates = DatetimeIndex(dates, tz="Europe/Brussels")
|
||||
result = dates.isocalendar()
|
||||
expected_data_frame = DataFrame(
|
||||
[[2013, 52, 7], [2014, 1, 1], [2014, 1, 2]],
|
||||
columns=["year", "week", "day"],
|
||||
index=dates,
|
||||
dtype="UInt32",
|
||||
)
|
||||
tm.assert_frame_equal(result, expected_data_frame)
|
||||
|
||||
|
||||
def test_dti_timestamp_isocalendar_fields():
|
||||
idx = date_range("2020-01-01", periods=10)
|
||||
expected = tuple(idx.isocalendar().iloc[-1].to_list())
|
||||
result = idx[-1].isocalendar()
|
||||
assert result == expected
|
||||
@ -0,0 +1,47 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Period,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestMap:
|
||||
def test_map(self):
|
||||
rng = date_range("1/1/2000", periods=10)
|
||||
|
||||
f = lambda x: x.strftime("%Y%m%d")
|
||||
result = rng.map(f)
|
||||
exp = Index([f(x) for x in rng])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_map_fallthrough(self, capsys):
|
||||
# GH#22067, check we don't get warnings about silently ignored errors
|
||||
dti = date_range("2017-01-01", "2018-01-01", freq="B")
|
||||
|
||||
dti.map(lambda x: Period(year=x.year, month=x.month, freq="M"))
|
||||
|
||||
captured = capsys.readouterr()
|
||||
assert captured.err == ""
|
||||
|
||||
def test_map_bug_1677(self):
|
||||
index = DatetimeIndex(["2012-04-25 09:30:00.393000"])
|
||||
f = index.asof
|
||||
|
||||
result = index.map(f)
|
||||
expected = Index([f(index[0])])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("name", [None, "name"])
|
||||
def test_index_map(self, name):
|
||||
# see GH#20990
|
||||
count = 6
|
||||
index = date_range("2018-01-01", periods=count, freq="ME", name=name).map(
|
||||
lambda x: (x.year, x.month)
|
||||
)
|
||||
exp_index = MultiIndex.from_product(((2018,), range(1, 7)), names=[name, name])
|
||||
tm.assert_index_equal(index, exp_index)
|
||||
@ -0,0 +1,95 @@
|
||||
from dateutil.tz import tzlocal
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestNormalize:
|
||||
def test_normalize(self):
|
||||
rng = date_range("1/1/2000 9:30", periods=10, freq="D")
|
||||
|
||||
result = rng.normalize()
|
||||
expected = date_range("1/1/2000", periods=10, freq="D")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arr_ns = np.array([1380585623454345752, 1380585612343234312]).astype(
|
||||
"datetime64[ns]"
|
||||
)
|
||||
rng_ns = DatetimeIndex(arr_ns)
|
||||
rng_ns_normalized = rng_ns.normalize()
|
||||
|
||||
arr_ns = np.array([1380585600000000000, 1380585600000000000]).astype(
|
||||
"datetime64[ns]"
|
||||
)
|
||||
expected = DatetimeIndex(arr_ns)
|
||||
tm.assert_index_equal(rng_ns_normalized, expected)
|
||||
|
||||
assert result.is_normalized
|
||||
assert not rng.is_normalized
|
||||
|
||||
def test_normalize_nat(self):
|
||||
dti = DatetimeIndex([NaT, Timestamp("2018-01-01 01:00:00")])
|
||||
result = dti.normalize()
|
||||
expected = DatetimeIndex([NaT, Timestamp("2018-01-01")])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_normalize_tz(self):
|
||||
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="US/Eastern")
|
||||
|
||||
result = rng.normalize() # does not preserve freq
|
||||
expected = date_range("1/1/2000", periods=10, freq="D", tz="US/Eastern")
|
||||
tm.assert_index_equal(result, expected._with_freq(None))
|
||||
|
||||
assert result.is_normalized
|
||||
assert not rng.is_normalized
|
||||
|
||||
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz="UTC")
|
||||
|
||||
result = rng.normalize()
|
||||
expected = date_range("1/1/2000", periods=10, freq="D", tz="UTC")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
assert result.is_normalized
|
||||
assert not rng.is_normalized
|
||||
|
||||
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal())
|
||||
result = rng.normalize() # does not preserve freq
|
||||
expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal())
|
||||
tm.assert_index_equal(result, expected._with_freq(None))
|
||||
|
||||
assert result.is_normalized
|
||||
assert not rng.is_normalized
|
||||
|
||||
@td.skip_if_windows
|
||||
@pytest.mark.parametrize(
|
||||
"timezone",
|
||||
[
|
||||
"US/Pacific",
|
||||
"US/Eastern",
|
||||
"UTC",
|
||||
"Asia/Kolkata",
|
||||
"Asia/Shanghai",
|
||||
"Australia/Canberra",
|
||||
],
|
||||
)
|
||||
def test_normalize_tz_local(self, timezone):
|
||||
# GH#13459
|
||||
with tm.set_timezone(timezone):
|
||||
rng = date_range("1/1/2000 9:30", periods=10, freq="D", tz=tzlocal())
|
||||
|
||||
result = rng.normalize()
|
||||
expected = date_range("1/1/2000", periods=10, freq="D", tz=tzlocal())
|
||||
expected = expected._with_freq(None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
assert result.is_normalized
|
||||
assert not rng.is_normalized
|
||||
@ -0,0 +1,83 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestRepeat:
|
||||
def test_repeat_range(self, tz_naive_fixture):
|
||||
rng = date_range("1/1/2000", "1/1/2001")
|
||||
|
||||
result = rng.repeat(5)
|
||||
assert result.freq is None
|
||||
assert len(result) == 5 * len(rng)
|
||||
|
||||
def test_repeat_range2(self, tz_naive_fixture, unit):
|
||||
tz = tz_naive_fixture
|
||||
index = date_range("2001-01-01", periods=2, freq="D", tz=tz, unit=unit)
|
||||
exp = DatetimeIndex(
|
||||
["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz
|
||||
).as_unit(unit)
|
||||
for res in [index.repeat(2), np.repeat(index, 2)]:
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.freq is None
|
||||
|
||||
def test_repeat_range3(self, tz_naive_fixture, unit):
|
||||
tz = tz_naive_fixture
|
||||
index = date_range("2001-01-01", periods=2, freq="2D", tz=tz, unit=unit)
|
||||
exp = DatetimeIndex(
|
||||
["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz
|
||||
).as_unit(unit)
|
||||
for res in [index.repeat(2), np.repeat(index, 2)]:
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.freq is None
|
||||
|
||||
def test_repeat_range4(self, tz_naive_fixture, unit):
|
||||
tz = tz_naive_fixture
|
||||
index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz).as_unit(unit)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"2001-01-01",
|
||||
"2001-01-01",
|
||||
"2001-01-01",
|
||||
"NaT",
|
||||
"NaT",
|
||||
"NaT",
|
||||
"2003-01-01",
|
||||
"2003-01-01",
|
||||
"2003-01-01",
|
||||
],
|
||||
tz=tz,
|
||||
).as_unit(unit)
|
||||
for res in [index.repeat(3), np.repeat(index, 3)]:
|
||||
tm.assert_index_equal(res, exp)
|
||||
assert res.freq is None
|
||||
|
||||
def test_repeat(self, tz_naive_fixture, unit):
|
||||
tz = tz_naive_fixture
|
||||
reps = 2
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
|
||||
rng = date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz, unit=unit)
|
||||
|
||||
expected_rng = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz),
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz),
|
||||
Timestamp("2016-01-01 00:30:00", tz=tz),
|
||||
Timestamp("2016-01-01 00:30:00", tz=tz),
|
||||
]
|
||||
).as_unit(unit)
|
||||
|
||||
res = rng.repeat(reps)
|
||||
tm.assert_index_equal(res, expected_rng)
|
||||
assert res.freq is None
|
||||
|
||||
tm.assert_index_equal(np.repeat(rng, reps), expected_rng)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(rng, reps, axis=1)
|
||||
@ -0,0 +1,31 @@
|
||||
from dateutil.tz import tzlocal
|
||||
import pytest
|
||||
|
||||
from pandas.compat import IS64
|
||||
|
||||
from pandas import date_range
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq,expected",
|
||||
[
|
||||
("YE", "day"),
|
||||
("QE", "day"),
|
||||
("ME", "day"),
|
||||
("D", "day"),
|
||||
("h", "hour"),
|
||||
("min", "minute"),
|
||||
("s", "second"),
|
||||
("ms", "millisecond"),
|
||||
("us", "microsecond"),
|
||||
],
|
||||
)
|
||||
def test_dti_resolution(request, tz_naive_fixture, freq, expected):
|
||||
tz = tz_naive_fixture
|
||||
if freq == "YE" and not IS64 and isinstance(tz, tzlocal):
|
||||
request.applymarker(
|
||||
pytest.mark.xfail(reason="OverflowError inside tzlocal past 2038")
|
||||
)
|
||||
|
||||
idx = date_range(start="2013-04-01", periods=30, freq=freq, tz=tz)
|
||||
assert idx.resolution == expected
|
||||
@ -0,0 +1,221 @@
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import to_offset
|
||||
from pandas._libs.tslibs.offsets import INVALID_FREQ_ERR_MSG
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndexRound:
|
||||
def test_round_daily(self):
|
||||
dti = date_range("20130101 09:10:11", periods=5)
|
||||
result = dti.round("D")
|
||||
expected = date_range("20130101", periods=5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
|
||||
result = dti.round("D")
|
||||
expected = date_range("20130101", periods=5).tz_localize("US/Eastern")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = dti.round("s")
|
||||
tm.assert_index_equal(result, dti)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, error_msg",
|
||||
[
|
||||
("YE", "<YearEnd: month=12> is a non-fixed frequency"),
|
||||
("ME", "<MonthEnd> is a non-fixed frequency"),
|
||||
("foobar", "Invalid frequency: foobar"),
|
||||
],
|
||||
)
|
||||
def test_round_invalid(self, freq, error_msg):
|
||||
dti = date_range("20130101 09:10:11", periods=5)
|
||||
dti = dti.tz_localize("UTC").tz_convert("US/Eastern")
|
||||
with pytest.raises(ValueError, match=error_msg):
|
||||
dti.round(freq)
|
||||
|
||||
def test_round(self, tz_naive_fixture, unit):
|
||||
tz = tz_naive_fixture
|
||||
rng = date_range(start="2016-01-01", periods=5, freq="30Min", tz=tz, unit=unit)
|
||||
elt = rng[1]
|
||||
|
||||
expected_rng = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz),
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz),
|
||||
Timestamp("2016-01-01 01:00:00", tz=tz),
|
||||
Timestamp("2016-01-01 02:00:00", tz=tz),
|
||||
Timestamp("2016-01-01 02:00:00", tz=tz),
|
||||
]
|
||||
).as_unit(unit)
|
||||
expected_elt = expected_rng[1]
|
||||
|
||||
result = rng.round(freq="h")
|
||||
tm.assert_index_equal(result, expected_rng)
|
||||
assert elt.round(freq="h") == expected_elt
|
||||
|
||||
msg = INVALID_FREQ_ERR_MSG
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
rng.round(freq="foo")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
elt.round(freq="foo")
|
||||
|
||||
msg = "<MonthEnd> is a non-fixed frequency"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
rng.round(freq="ME")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
elt.round(freq="ME")
|
||||
|
||||
def test_round2(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
# GH#14440 & GH#15578
|
||||
index = DatetimeIndex(["2016-10-17 12:00:00.0015"], tz=tz).as_unit("ns")
|
||||
result = index.round("ms")
|
||||
expected = DatetimeIndex(["2016-10-17 12:00:00.002000"], tz=tz).as_unit("ns")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
for freq in ["us", "ns"]:
|
||||
tm.assert_index_equal(index, index.round(freq))
|
||||
|
||||
def test_round3(self, tz_naive_fixture):
|
||||
tz = tz_naive_fixture
|
||||
index = DatetimeIndex(["2016-10-17 12:00:00.00149"], tz=tz).as_unit("ns")
|
||||
result = index.round("ms")
|
||||
expected = DatetimeIndex(["2016-10-17 12:00:00.001000"], tz=tz).as_unit("ns")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_round4(self, tz_naive_fixture):
|
||||
index = DatetimeIndex(["2016-10-17 12:00:00.001501031"], dtype="M8[ns]")
|
||||
result = index.round("10ns")
|
||||
expected = DatetimeIndex(["2016-10-17 12:00:00.001501030"], dtype="M8[ns]")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
ts = "2016-10-17 12:00:00.001501031"
|
||||
dti = DatetimeIndex([ts], dtype="M8[ns]")
|
||||
with tm.assert_produces_warning(False):
|
||||
dti.round("1010ns")
|
||||
|
||||
def test_no_rounding_occurs(self, tz_naive_fixture):
|
||||
# GH 21262
|
||||
tz = tz_naive_fixture
|
||||
rng = date_range(start="2016-01-01", periods=5, freq="2Min", tz=tz)
|
||||
|
||||
expected_rng = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2016-01-01 00:00:00", tz=tz),
|
||||
Timestamp("2016-01-01 00:02:00", tz=tz),
|
||||
Timestamp("2016-01-01 00:04:00", tz=tz),
|
||||
Timestamp("2016-01-01 00:06:00", tz=tz),
|
||||
Timestamp("2016-01-01 00:08:00", tz=tz),
|
||||
]
|
||||
).as_unit("ns")
|
||||
|
||||
result = rng.round(freq="2min")
|
||||
tm.assert_index_equal(result, expected_rng)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"test_input, rounder, freq, expected",
|
||||
[
|
||||
(["2117-01-01 00:00:45"], "floor", "15s", ["2117-01-01 00:00:45"]),
|
||||
(["2117-01-01 00:00:45"], "ceil", "15s", ["2117-01-01 00:00:45"]),
|
||||
(
|
||||
["2117-01-01 00:00:45.000000012"],
|
||||
"floor",
|
||||
"10ns",
|
||||
["2117-01-01 00:00:45.000000010"],
|
||||
),
|
||||
(
|
||||
["1823-01-01 00:00:01.000000012"],
|
||||
"ceil",
|
||||
"10ns",
|
||||
["1823-01-01 00:00:01.000000020"],
|
||||
),
|
||||
(["1823-01-01 00:00:01"], "floor", "1s", ["1823-01-01 00:00:01"]),
|
||||
(["1823-01-01 00:00:01"], "ceil", "1s", ["1823-01-01 00:00:01"]),
|
||||
(["2018-01-01 00:15:00"], "ceil", "15min", ["2018-01-01 00:15:00"]),
|
||||
(["2018-01-01 00:15:00"], "floor", "15min", ["2018-01-01 00:15:00"]),
|
||||
(["1823-01-01 03:00:00"], "ceil", "3h", ["1823-01-01 03:00:00"]),
|
||||
(["1823-01-01 03:00:00"], "floor", "3h", ["1823-01-01 03:00:00"]),
|
||||
(
|
||||
("NaT", "1823-01-01 00:00:01"),
|
||||
"floor",
|
||||
"1s",
|
||||
("NaT", "1823-01-01 00:00:01"),
|
||||
),
|
||||
(
|
||||
("NaT", "1823-01-01 00:00:01"),
|
||||
"ceil",
|
||||
"1s",
|
||||
("NaT", "1823-01-01 00:00:01"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_ceil_floor_edge(self, test_input, rounder, freq, expected):
|
||||
dt = DatetimeIndex(list(test_input))
|
||||
func = getattr(dt, rounder)
|
||||
result = func(freq)
|
||||
expected = DatetimeIndex(list(expected))
|
||||
assert expected.equals(result)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, index_freq, periods",
|
||||
[("2018-01-01", "12h", 25), ("2018-01-01 0:0:0.124999", "1ns", 1000)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"round_freq",
|
||||
[
|
||||
"2ns",
|
||||
"3ns",
|
||||
"4ns",
|
||||
"5ns",
|
||||
"6ns",
|
||||
"7ns",
|
||||
"250ns",
|
||||
"500ns",
|
||||
"750ns",
|
||||
"1us",
|
||||
"19us",
|
||||
"250us",
|
||||
"500us",
|
||||
"750us",
|
||||
"1s",
|
||||
"2s",
|
||||
"3s",
|
||||
"12h",
|
||||
"1D",
|
||||
],
|
||||
)
|
||||
def test_round_int64(self, start, index_freq, periods, round_freq):
|
||||
dt = date_range(start=start, freq=index_freq, periods=periods)
|
||||
unit = to_offset(round_freq).nanos
|
||||
|
||||
# test floor
|
||||
result = dt.floor(round_freq)
|
||||
diff = dt.asi8 - result.asi8
|
||||
mod = result.asi8 % unit
|
||||
assert (mod == 0).all(), f"floor not a {round_freq} multiple"
|
||||
assert (0 <= diff).all() and (diff < unit).all(), "floor error"
|
||||
|
||||
# test ceil
|
||||
result = dt.ceil(round_freq)
|
||||
diff = result.asi8 - dt.asi8
|
||||
mod = result.asi8 % unit
|
||||
assert (mod == 0).all(), f"ceil not a {round_freq} multiple"
|
||||
assert (0 <= diff).all() and (diff < unit).all(), "ceil error"
|
||||
|
||||
# test round
|
||||
result = dt.round(round_freq)
|
||||
diff = abs(result.asi8 - dt.asi8)
|
||||
mod = result.asi8 % unit
|
||||
assert (mod == 0).all(), f"round not a {round_freq} multiple"
|
||||
assert (diff <= unit // 2).all(), "round error"
|
||||
if unit % 2 == 0:
|
||||
assert (
|
||||
result.asi8[diff == unit // 2] % 2 == 0
|
||||
).all(), "round half to even error"
|
||||
@ -0,0 +1,169 @@
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas.errors import NullFrequencyError
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
|
||||
|
||||
class TestDatetimeIndexShift:
|
||||
# -------------------------------------------------------------
|
||||
# DatetimeIndex.shift is used in integer addition
|
||||
|
||||
def test_dti_shift_tzaware(self, tz_naive_fixture, unit):
|
||||
# GH#9903
|
||||
tz = tz_naive_fixture
|
||||
idx = DatetimeIndex([], name="xxx", tz=tz).as_unit(unit)
|
||||
tm.assert_index_equal(idx.shift(0, freq="h"), idx)
|
||||
tm.assert_index_equal(idx.shift(3, freq="h"), idx)
|
||||
|
||||
idx = DatetimeIndex(
|
||||
["2011-01-01 10:00", "2011-01-01 11:00", "2011-01-01 12:00"],
|
||||
name="xxx",
|
||||
tz=tz,
|
||||
freq="h",
|
||||
).as_unit(unit)
|
||||
tm.assert_index_equal(idx.shift(0, freq="h"), idx)
|
||||
exp = DatetimeIndex(
|
||||
["2011-01-01 13:00", "2011-01-01 14:00", "2011-01-01 15:00"],
|
||||
name="xxx",
|
||||
tz=tz,
|
||||
freq="h",
|
||||
).as_unit(unit)
|
||||
tm.assert_index_equal(idx.shift(3, freq="h"), exp)
|
||||
exp = DatetimeIndex(
|
||||
["2011-01-01 07:00", "2011-01-01 08:00", "2011-01-01 09:00"],
|
||||
name="xxx",
|
||||
tz=tz,
|
||||
freq="h",
|
||||
).as_unit(unit)
|
||||
tm.assert_index_equal(idx.shift(-3, freq="h"), exp)
|
||||
|
||||
def test_dti_shift_freqs(self, unit):
|
||||
# test shift for DatetimeIndex and non DatetimeIndex
|
||||
# GH#8083
|
||||
drange = date_range("20130101", periods=5, unit=unit)
|
||||
result = drange.shift(1)
|
||||
expected = DatetimeIndex(
|
||||
["2013-01-02", "2013-01-03", "2013-01-04", "2013-01-05", "2013-01-06"],
|
||||
dtype=f"M8[{unit}]",
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = drange.shift(-1)
|
||||
expected = DatetimeIndex(
|
||||
["2012-12-31", "2013-01-01", "2013-01-02", "2013-01-03", "2013-01-04"],
|
||||
dtype=f"M8[{unit}]",
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = drange.shift(3, freq="2D")
|
||||
expected = DatetimeIndex(
|
||||
["2013-01-07", "2013-01-08", "2013-01-09", "2013-01-10", "2013-01-11"],
|
||||
dtype=f"M8[{unit}]",
|
||||
freq="D",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_dti_shift_int(self, unit):
|
||||
rng = date_range("1/1/2000", periods=20, unit=unit)
|
||||
|
||||
result = rng + 5 * rng.freq
|
||||
expected = rng.shift(5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = rng - 5 * rng.freq
|
||||
expected = rng.shift(-5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_dti_shift_no_freq(self, unit):
|
||||
# GH#19147
|
||||
dti = DatetimeIndex(["2011-01-01 10:00", "2011-01-01"], freq=None).as_unit(unit)
|
||||
with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"):
|
||||
dti.shift(2)
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_dti_shift_localized(self, tzstr, unit):
|
||||
dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI", unit=unit)
|
||||
dr_tz = dr.tz_localize(tzstr)
|
||||
|
||||
result = dr_tz.shift(1, "10min")
|
||||
assert result.tz == dr_tz.tz
|
||||
|
||||
def test_dti_shift_across_dst(self, unit):
|
||||
# GH 8616
|
||||
idx = date_range(
|
||||
"2013-11-03", tz="America/Chicago", periods=7, freq="h", unit=unit
|
||||
)
|
||||
ser = Series(index=idx[:-1], dtype=object)
|
||||
result = ser.shift(freq="h")
|
||||
expected = Series(index=idx[1:], dtype=object)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"shift, result_time",
|
||||
[
|
||||
[0, "2014-11-14 00:00:00"],
|
||||
[-1, "2014-11-13 23:00:00"],
|
||||
[1, "2014-11-14 01:00:00"],
|
||||
],
|
||||
)
|
||||
def test_dti_shift_near_midnight(self, shift, result_time, unit):
|
||||
# GH 8616
|
||||
dt = datetime(2014, 11, 14, 0)
|
||||
dt_est = pytz.timezone("EST").localize(dt)
|
||||
idx = DatetimeIndex([dt_est]).as_unit(unit)
|
||||
ser = Series(data=[1], index=idx)
|
||||
result = ser.shift(shift, freq="h")
|
||||
exp_index = DatetimeIndex([result_time], tz="EST").as_unit(unit)
|
||||
expected = Series(1, index=exp_index)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_shift_periods(self, unit):
|
||||
# GH#22458 : argument 'n' was deprecated in favor of 'periods'
|
||||
idx = date_range(start=START, end=END, periods=3, unit=unit)
|
||||
tm.assert_index_equal(idx.shift(periods=0), idx)
|
||||
tm.assert_index_equal(idx.shift(0), idx)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_shift_bday(self, freq, unit):
|
||||
rng = date_range(START, END, freq=freq, unit=unit)
|
||||
shifted = rng.shift(5)
|
||||
assert shifted[0] == rng[5]
|
||||
assert shifted.freq == rng.freq
|
||||
|
||||
shifted = rng.shift(-5)
|
||||
assert shifted[5] == rng[0]
|
||||
assert shifted.freq == rng.freq
|
||||
|
||||
shifted = rng.shift(0)
|
||||
assert shifted[0] == rng[0]
|
||||
assert shifted.freq == rng.freq
|
||||
|
||||
def test_shift_bmonth(self, unit):
|
||||
rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit)
|
||||
shifted = rng.shift(1, freq=pd.offsets.BDay())
|
||||
assert shifted[0] == rng[0] + pd.offsets.BDay()
|
||||
|
||||
rng = date_range(START, END, freq=pd.offsets.BMonthEnd(), unit=unit)
|
||||
with tm.assert_produces_warning(pd.errors.PerformanceWarning):
|
||||
shifted = rng.shift(1, freq=pd.offsets.CDay())
|
||||
assert shifted[0] == rng[0] + pd.offsets.CDay()
|
||||
|
||||
def test_shift_empty(self, unit):
|
||||
# GH#14811
|
||||
dti = date_range(start="2016-10-21", end="2016-10-21", freq="BME", unit=unit)
|
||||
result = dti.shift(1)
|
||||
tm.assert_index_equal(result, dti)
|
||||
@ -0,0 +1,47 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Shanghai", "Europe/Berlin"])
|
||||
@pytest.mark.parametrize("name", [None, "my_dti"])
|
||||
@pytest.mark.parametrize("unit", ["ns", "us", "ms", "s"])
|
||||
def test_dti_snap(name, tz, unit):
|
||||
dti = DatetimeIndex(
|
||||
[
|
||||
"1/1/2002",
|
||||
"1/2/2002",
|
||||
"1/3/2002",
|
||||
"1/4/2002",
|
||||
"1/5/2002",
|
||||
"1/6/2002",
|
||||
"1/7/2002",
|
||||
],
|
||||
name=name,
|
||||
tz=tz,
|
||||
freq="D",
|
||||
)
|
||||
dti = dti.as_unit(unit)
|
||||
|
||||
result = dti.snap(freq="W-MON")
|
||||
expected = date_range("12/31/2001", "1/7/2002", name=name, tz=tz, freq="w-mon")
|
||||
expected = expected.repeat([3, 4])
|
||||
expected = expected.as_unit(unit)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.tz == expected.tz
|
||||
assert result.freq is None
|
||||
assert expected.freq is None
|
||||
|
||||
result = dti.snap(freq="B")
|
||||
|
||||
expected = date_range("1/1/2002", "1/7/2002", name=name, tz=tz, freq="b")
|
||||
expected = expected.repeat([1, 1, 1, 2, 2])
|
||||
expected = expected.as_unit(unit)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.tz == expected.tz
|
||||
assert result.freq is None
|
||||
assert expected.freq is None
|
||||
@ -0,0 +1,28 @@
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
Index,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestToFrame:
|
||||
def test_to_frame_datetime_tz(self):
|
||||
# GH#25809
|
||||
idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC")
|
||||
result = idx.to_frame()
|
||||
expected = DataFrame(idx, index=idx)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_to_frame_respects_none_name(self):
|
||||
# GH#44212 if we explicitly pass name=None, then that should be respected,
|
||||
# not changed to 0
|
||||
# GH-45448 this is first deprecated to only change in the future
|
||||
idx = date_range(start="2019-01-01", end="2019-01-30", freq="D", tz="UTC")
|
||||
result = idx.to_frame(name=None)
|
||||
exp_idx = Index([None], dtype=object)
|
||||
tm.assert_index_equal(exp_idx, result.columns)
|
||||
|
||||
result = idx.rename("foo").to_frame(name=None)
|
||||
exp_idx = Index([None], dtype=object)
|
||||
tm.assert_index_equal(exp_idx, result.columns)
|
||||
@ -0,0 +1,45 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDateTimeIndexToJulianDate:
|
||||
def test_1700(self):
|
||||
dr = date_range(start=Timestamp("1710-10-01"), periods=5, freq="D")
|
||||
r1 = Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, Index) and r2.dtype == np.float64
|
||||
tm.assert_index_equal(r1, r2)
|
||||
|
||||
def test_2000(self):
|
||||
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="D")
|
||||
r1 = Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, Index) and r2.dtype == np.float64
|
||||
tm.assert_index_equal(r1, r2)
|
||||
|
||||
def test_hour(self):
|
||||
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="h")
|
||||
r1 = Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, Index) and r2.dtype == np.float64
|
||||
tm.assert_index_equal(r1, r2)
|
||||
|
||||
def test_minute(self):
|
||||
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="min")
|
||||
r1 = Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, Index) and r2.dtype == np.float64
|
||||
tm.assert_index_equal(r1, r2)
|
||||
|
||||
def test_second(self):
|
||||
dr = date_range(start=Timestamp("2000-02-27"), periods=5, freq="s")
|
||||
r1 = Index([x.to_julian_date() for x in dr])
|
||||
r2 = dr.to_julian_date()
|
||||
assert isinstance(r2, Index) and r2.dtype == np.float64
|
||||
tm.assert_index_equal(r1, r2)
|
||||
@ -0,0 +1,225 @@
|
||||
import dateutil.tz
|
||||
from dateutil.tz import tzlocal
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs.ccalendar import MONTHS
|
||||
from pandas._libs.tslibs.offsets import MonthEnd
|
||||
from pandas._libs.tslibs.period import INVALID_FREQ_ERR_MSG
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Period,
|
||||
PeriodIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestToPeriod:
|
||||
def test_dti_to_period(self):
|
||||
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
|
||||
pi1 = dti.to_period()
|
||||
pi2 = dti.to_period(freq="D")
|
||||
pi3 = dti.to_period(freq="3D")
|
||||
|
||||
assert pi1[0] == Period("Jan 2005", freq="M")
|
||||
assert pi2[0] == Period("1/31/2005", freq="D")
|
||||
assert pi3[0] == Period("1/31/2005", freq="3D")
|
||||
|
||||
assert pi1[-1] == Period("Nov 2005", freq="M")
|
||||
assert pi2[-1] == Period("11/30/2005", freq="D")
|
||||
assert pi3[-1], Period("11/30/2005", freq="3D")
|
||||
|
||||
tm.assert_index_equal(pi1, period_range("1/1/2005", "11/1/2005", freq="M"))
|
||||
tm.assert_index_equal(
|
||||
pi2, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("D")
|
||||
)
|
||||
tm.assert_index_equal(
|
||||
pi3, period_range("1/1/2005", "11/1/2005", freq="M").asfreq("3D")
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("month", MONTHS)
|
||||
def test_to_period_quarterly(self, month):
|
||||
# make sure we can make the round trip
|
||||
freq = f"Q-{month}"
|
||||
rng = period_range("1989Q3", "1991Q3", freq=freq)
|
||||
stamps = rng.to_timestamp()
|
||||
result = stamps.to_period(freq)
|
||||
tm.assert_index_equal(rng, result)
|
||||
|
||||
@pytest.mark.parametrize("off", ["BQE", "QS", "BQS"])
|
||||
def test_to_period_quarterlyish(self, off):
|
||||
rng = date_range("01-Jan-2012", periods=8, freq=off)
|
||||
prng = rng.to_period()
|
||||
assert prng.freq == "QE-DEC"
|
||||
|
||||
@pytest.mark.parametrize("off", ["BYE", "YS", "BYS"])
|
||||
def test_to_period_annualish(self, off):
|
||||
rng = date_range("01-Jan-2012", periods=8, freq=off)
|
||||
prng = rng.to_period()
|
||||
assert prng.freq == "YE-DEC"
|
||||
|
||||
def test_to_period_monthish(self):
|
||||
offsets = ["MS", "BME"]
|
||||
for off in offsets:
|
||||
rng = date_range("01-Jan-2012", periods=8, freq=off)
|
||||
prng = rng.to_period()
|
||||
assert prng.freqstr == "M"
|
||||
|
||||
rng = date_range("01-Jan-2012", periods=8, freq="ME")
|
||||
prng = rng.to_period()
|
||||
assert prng.freqstr == "M"
|
||||
|
||||
with pytest.raises(ValueError, match=INVALID_FREQ_ERR_MSG):
|
||||
date_range("01-Jan-2012", periods=8, freq="EOM")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq_offset, freq_period",
|
||||
[
|
||||
("2ME", "2M"),
|
||||
(MonthEnd(2), MonthEnd(2)),
|
||||
],
|
||||
)
|
||||
def test_dti_to_period_2monthish(self, freq_offset, freq_period):
|
||||
dti = date_range("2020-01-01", periods=3, freq=freq_offset)
|
||||
pi = dti.to_period()
|
||||
|
||||
tm.assert_index_equal(pi, period_range("2020-01", "2020-05", freq=freq_period))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, freq_depr",
|
||||
[
|
||||
("2ME", "2M"),
|
||||
("2QE", "2Q"),
|
||||
("2QE-SEP", "2Q-SEP"),
|
||||
("1YE", "1Y"),
|
||||
("2YE-MAR", "2Y-MAR"),
|
||||
("1YE", "1A"),
|
||||
("2YE-MAR", "2A-MAR"),
|
||||
],
|
||||
)
|
||||
def test_to_period_frequency_M_Q_Y_A_deprecated(self, freq, freq_depr):
|
||||
# GH#9586
|
||||
msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
|
||||
f"in a future version, please use '{freq[1:]}' instead."
|
||||
|
||||
rng = date_range("01-Jan-2012", periods=8, freq=freq)
|
||||
prng = rng.to_period()
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert prng.freq == freq_depr
|
||||
|
||||
def test_to_period_infer(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/33358
|
||||
rng = date_range(
|
||||
start="2019-12-22 06:40:00+00:00",
|
||||
end="2019-12-22 08:45:00+00:00",
|
||||
freq="5min",
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
pi1 = rng.to_period("5min")
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
pi2 = rng.to_period()
|
||||
|
||||
tm.assert_index_equal(pi1, pi2)
|
||||
|
||||
@pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning")
|
||||
def test_period_dt64_round_trip(self):
|
||||
dti = date_range("1/1/2000", "1/7/2002", freq="B")
|
||||
pi = dti.to_period()
|
||||
tm.assert_index_equal(pi.to_timestamp(), dti)
|
||||
|
||||
dti = date_range("1/1/2000", "1/7/2002", freq="B")
|
||||
pi = dti.to_period(freq="h")
|
||||
tm.assert_index_equal(pi.to_timestamp(), dti)
|
||||
|
||||
def test_to_period_millisecond(self):
|
||||
index = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2007-01-01 10:11:12.123456Z"),
|
||||
Timestamp("2007-01-01 10:11:13.789123Z"),
|
||||
]
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
# warning that timezone info will be lost
|
||||
period = index.to_period(freq="ms")
|
||||
assert 2 == len(period)
|
||||
assert period[0] == Period("2007-01-01 10:11:12.123Z", "ms")
|
||||
assert period[1] == Period("2007-01-01 10:11:13.789Z", "ms")
|
||||
|
||||
def test_to_period_microsecond(self):
|
||||
index = DatetimeIndex(
|
||||
[
|
||||
Timestamp("2007-01-01 10:11:12.123456Z"),
|
||||
Timestamp("2007-01-01 10:11:13.789123Z"),
|
||||
]
|
||||
)
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
# warning that timezone info will be lost
|
||||
period = index.to_period(freq="us")
|
||||
assert 2 == len(period)
|
||||
assert period[0] == Period("2007-01-01 10:11:12.123456Z", "us")
|
||||
assert period[1] == Period("2007-01-01 10:11:13.789123Z", "us")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz",
|
||||
["US/Eastern", pytz.utc, tzlocal(), "dateutil/US/Eastern", dateutil.tz.tzutc()],
|
||||
)
|
||||
def test_to_period_tz(self, tz):
|
||||
ts = date_range("1/1/2000", "2/1/2000", tz=tz)
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
# GH#21333 warning that timezone info will be lost
|
||||
# filter warning about freq deprecation
|
||||
|
||||
result = ts.to_period()[0]
|
||||
expected = ts[0].to_period(ts.freq)
|
||||
|
||||
assert result == expected
|
||||
|
||||
expected = date_range("1/1/2000", "2/1/2000").to_period()
|
||||
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
# GH#21333 warning that timezone info will be lost
|
||||
result = ts.to_period(ts.freq)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", ["Etc/GMT-1", "Etc/GMT+1"])
|
||||
def test_to_period_tz_utc_offset_consistency(self, tz):
|
||||
# GH#22905
|
||||
ts = date_range("1/1/2000", "2/1/2000", tz="Etc/GMT-1")
|
||||
with tm.assert_produces_warning(UserWarning):
|
||||
result = ts.to_period()[0]
|
||||
expected = ts[0].to_period(ts.freq)
|
||||
assert result == expected
|
||||
|
||||
def test_to_period_nofreq(self):
|
||||
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"])
|
||||
msg = "You must pass a freq argument as current index has none."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.to_period()
|
||||
|
||||
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="infer")
|
||||
assert idx.freqstr == "D"
|
||||
expected = PeriodIndex(["2000-01-01", "2000-01-02", "2000-01-03"], freq="D")
|
||||
tm.assert_index_equal(idx.to_period(), expected)
|
||||
|
||||
# GH#7606
|
||||
idx = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
|
||||
assert idx.freqstr is None
|
||||
tm.assert_index_equal(idx.to_period(), expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["2BMS", "1SME-15"])
|
||||
def test_to_period_offsets_not_supported(self, freq):
|
||||
# GH#56243
|
||||
msg = f"{freq[1:]} is not supported as period frequency"
|
||||
ts = date_range("1/1/2012", periods=4, freq=freq)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ts.to_period()
|
||||
@ -0,0 +1,51 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timezone,
|
||||
)
|
||||
|
||||
import dateutil.parser
|
||||
import dateutil.tz
|
||||
from dateutil.tz import tzlocal
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
date_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.tests.indexes.datetimes.test_timezones import FixedOffset
|
||||
|
||||
fixed_off = FixedOffset(-420, "-07:00")
|
||||
|
||||
|
||||
class TestToPyDatetime:
|
||||
def test_dti_to_pydatetime(self):
|
||||
dt = dateutil.parser.parse("2012-06-13T01:39:00Z")
|
||||
dt = dt.replace(tzinfo=tzlocal())
|
||||
|
||||
arr = np.array([dt], dtype=object)
|
||||
|
||||
result = to_datetime(arr, utc=True)
|
||||
assert result.tz is timezone.utc
|
||||
|
||||
rng = date_range("2012-11-03 03:00", "2012-11-05 03:00", tz=tzlocal())
|
||||
arr = rng.to_pydatetime()
|
||||
result = to_datetime(arr, utc=True)
|
||||
assert result.tz is timezone.utc
|
||||
|
||||
def test_dti_to_pydatetime_fizedtz(self):
|
||||
dates = np.array(
|
||||
[
|
||||
datetime(2000, 1, 1, tzinfo=fixed_off),
|
||||
datetime(2000, 1, 2, tzinfo=fixed_off),
|
||||
datetime(2000, 1, 3, tzinfo=fixed_off),
|
||||
]
|
||||
)
|
||||
dti = DatetimeIndex(dates)
|
||||
|
||||
result = dti.to_pydatetime()
|
||||
tm.assert_numpy_array_equal(dates, result)
|
||||
|
||||
result = dti._mpl_repr()
|
||||
tm.assert_numpy_array_equal(dates, result)
|
||||
@ -0,0 +1,18 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestToSeries:
|
||||
def test_to_series(self):
|
||||
naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B")
|
||||
idx = naive.tz_localize("US/Pacific")
|
||||
|
||||
expected = Series(np.array(idx.tolist(), dtype="object"), name="B")
|
||||
result = idx.to_series(index=[0, 1])
|
||||
assert expected.dtype == idx.dtype
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,283 @@
|
||||
from datetime import datetime
|
||||
|
||||
import dateutil.tz
|
||||
from dateutil.tz import gettz
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs import timezones
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
Timestamp,
|
||||
date_range,
|
||||
offsets,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestTZConvert:
|
||||
def test_tz_convert_nat(self):
|
||||
# GH#5546
|
||||
dates = [NaT]
|
||||
idx = DatetimeIndex(dates)
|
||||
idx = idx.tz_localize("US/Pacific")
|
||||
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific"))
|
||||
idx = idx.tz_convert("US/Eastern")
|
||||
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern"))
|
||||
idx = idx.tz_convert("UTC")
|
||||
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC"))
|
||||
|
||||
dates = ["2010-12-01 00:00", "2010-12-02 00:00", NaT]
|
||||
idx = DatetimeIndex(dates)
|
||||
idx = idx.tz_localize("US/Pacific")
|
||||
tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific"))
|
||||
idx = idx.tz_convert("US/Eastern")
|
||||
expected = ["2010-12-01 03:00", "2010-12-02 03:00", NaT]
|
||||
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
|
||||
|
||||
idx = idx + offsets.Hour(5)
|
||||
expected = ["2010-12-01 08:00", "2010-12-02 08:00", NaT]
|
||||
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
|
||||
idx = idx.tz_convert("US/Pacific")
|
||||
expected = ["2010-12-01 05:00", "2010-12-02 05:00", NaT]
|
||||
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific"))
|
||||
|
||||
idx = idx + np.timedelta64(3, "h")
|
||||
expected = ["2010-12-01 08:00", "2010-12-02 08:00", NaT]
|
||||
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific"))
|
||||
|
||||
idx = idx.tz_convert("US/Eastern")
|
||||
expected = ["2010-12-01 11:00", "2010-12-02 11:00", NaT]
|
||||
tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
|
||||
|
||||
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
|
||||
def test_dti_tz_convert_compat_timestamp(self, prefix):
|
||||
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
|
||||
idx = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
|
||||
|
||||
conv = idx[0].tz_convert(prefix + "US/Pacific")
|
||||
expected = idx.tz_convert(prefix + "US/Pacific")[0]
|
||||
|
||||
assert conv == expected
|
||||
|
||||
def test_dti_tz_convert_hour_overflow_dst(self):
|
||||
# Regression test for GH#13306
|
||||
|
||||
# sorted case US/Eastern -> UTC
|
||||
ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32"]
|
||||
tt = DatetimeIndex(ts).tz_localize("US/Eastern")
|
||||
ut = tt.tz_convert("UTC")
|
||||
expected = Index([13, 14, 13], dtype=np.int32)
|
||||
tm.assert_index_equal(ut.hour, expected)
|
||||
|
||||
# sorted case UTC -> US/Eastern
|
||||
ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32"]
|
||||
tt = DatetimeIndex(ts).tz_localize("UTC")
|
||||
ut = tt.tz_convert("US/Eastern")
|
||||
expected = Index([9, 9, 9], dtype=np.int32)
|
||||
tm.assert_index_equal(ut.hour, expected)
|
||||
|
||||
# unsorted case US/Eastern -> UTC
|
||||
ts = ["2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32"]
|
||||
tt = DatetimeIndex(ts).tz_localize("US/Eastern")
|
||||
ut = tt.tz_convert("UTC")
|
||||
expected = Index([13, 14, 13], dtype=np.int32)
|
||||
tm.assert_index_equal(ut.hour, expected)
|
||||
|
||||
# unsorted case UTC -> US/Eastern
|
||||
ts = ["2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32"]
|
||||
tt = DatetimeIndex(ts).tz_localize("UTC")
|
||||
ut = tt.tz_convert("US/Eastern")
|
||||
expected = Index([9, 9, 9], dtype=np.int32)
|
||||
tm.assert_index_equal(ut.hour, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz):
|
||||
# Regression test for GH#13306
|
||||
|
||||
# sorted case US/Eastern -> UTC
|
||||
ts = [
|
||||
Timestamp("2008-05-12 09:50:00", tz=tz),
|
||||
Timestamp("2008-12-12 09:50:35", tz=tz),
|
||||
Timestamp("2009-05-12 09:50:32", tz=tz),
|
||||
]
|
||||
tt = DatetimeIndex(ts)
|
||||
ut = tt.tz_convert("UTC")
|
||||
expected = Index([13, 14, 13], dtype=np.int32)
|
||||
tm.assert_index_equal(ut.hour, expected)
|
||||
|
||||
# sorted case UTC -> US/Eastern
|
||||
ts = [
|
||||
Timestamp("2008-05-12 13:50:00", tz="UTC"),
|
||||
Timestamp("2008-12-12 14:50:35", tz="UTC"),
|
||||
Timestamp("2009-05-12 13:50:32", tz="UTC"),
|
||||
]
|
||||
tt = DatetimeIndex(ts)
|
||||
ut = tt.tz_convert("US/Eastern")
|
||||
expected = Index([9, 9, 9], dtype=np.int32)
|
||||
tm.assert_index_equal(ut.hour, expected)
|
||||
|
||||
# unsorted case US/Eastern -> UTC
|
||||
ts = [
|
||||
Timestamp("2008-05-12 09:50:00", tz=tz),
|
||||
Timestamp("2008-12-12 09:50:35", tz=tz),
|
||||
Timestamp("2008-05-12 09:50:32", tz=tz),
|
||||
]
|
||||
tt = DatetimeIndex(ts)
|
||||
ut = tt.tz_convert("UTC")
|
||||
expected = Index([13, 14, 13], dtype=np.int32)
|
||||
tm.assert_index_equal(ut.hour, expected)
|
||||
|
||||
# unsorted case UTC -> US/Eastern
|
||||
ts = [
|
||||
Timestamp("2008-05-12 13:50:00", tz="UTC"),
|
||||
Timestamp("2008-12-12 14:50:35", tz="UTC"),
|
||||
Timestamp("2008-05-12 13:50:32", tz="UTC"),
|
||||
]
|
||||
tt = DatetimeIndex(ts)
|
||||
ut = tt.tz_convert("US/Eastern")
|
||||
expected = Index([9, 9, 9], dtype=np.int32)
|
||||
tm.assert_index_equal(ut.hour, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq, n", [("h", 1), ("min", 60), ("s", 3600)])
|
||||
def test_dti_tz_convert_trans_pos_plus_1__bug(self, freq, n):
|
||||
# Regression test for tslib.tz_convert(vals, tz1, tz2).
|
||||
# See GH#4496 for details.
|
||||
idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq)
|
||||
idx = idx.tz_localize("UTC")
|
||||
idx = idx.tz_convert("Europe/Moscow")
|
||||
|
||||
expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1]))
|
||||
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
|
||||
|
||||
def test_dti_tz_convert_dst(self):
|
||||
for freq, n in [("h", 1), ("min", 60), ("s", 3600)]:
|
||||
# Start DST
|
||||
idx = date_range(
|
||||
"2014-03-08 23:00", "2014-03-09 09:00", freq=freq, tz="UTC"
|
||||
)
|
||||
idx = idx.tz_convert("US/Eastern")
|
||||
expected = np.repeat(
|
||||
np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]),
|
||||
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
|
||||
)
|
||||
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
|
||||
|
||||
idx = date_range(
|
||||
"2014-03-08 18:00", "2014-03-09 05:00", freq=freq, tz="US/Eastern"
|
||||
)
|
||||
idx = idx.tz_convert("UTC")
|
||||
expected = np.repeat(
|
||||
np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
|
||||
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
|
||||
)
|
||||
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
|
||||
|
||||
# End DST
|
||||
idx = date_range(
|
||||
"2014-11-01 23:00", "2014-11-02 09:00", freq=freq, tz="UTC"
|
||||
)
|
||||
idx = idx.tz_convert("US/Eastern")
|
||||
expected = np.repeat(
|
||||
np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]),
|
||||
np.array([n, n, n, n, n, n, n, n, n, n, 1]),
|
||||
)
|
||||
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
|
||||
|
||||
idx = date_range(
|
||||
"2014-11-01 18:00", "2014-11-02 05:00", freq=freq, tz="US/Eastern"
|
||||
)
|
||||
idx = idx.tz_convert("UTC")
|
||||
expected = np.repeat(
|
||||
np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
|
||||
np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]),
|
||||
)
|
||||
tm.assert_index_equal(idx.hour, Index(expected, dtype=np.int32))
|
||||
|
||||
# daily
|
||||
# Start DST
|
||||
idx = date_range("2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="UTC")
|
||||
idx = idx.tz_convert("US/Eastern")
|
||||
tm.assert_index_equal(idx.hour, Index([19, 19], dtype=np.int32))
|
||||
|
||||
idx = date_range(
|
||||
"2014-03-08 00:00", "2014-03-09 00:00", freq="D", tz="US/Eastern"
|
||||
)
|
||||
idx = idx.tz_convert("UTC")
|
||||
tm.assert_index_equal(idx.hour, Index([5, 5], dtype=np.int32))
|
||||
|
||||
# End DST
|
||||
idx = date_range("2014-11-01 00:00", "2014-11-02 00:00", freq="D", tz="UTC")
|
||||
idx = idx.tz_convert("US/Eastern")
|
||||
tm.assert_index_equal(idx.hour, Index([20, 20], dtype=np.int32))
|
||||
|
||||
idx = date_range(
|
||||
"2014-11-01 00:00", "2014-11-02 000:00", freq="D", tz="US/Eastern"
|
||||
)
|
||||
idx = idx.tz_convert("UTC")
|
||||
tm.assert_index_equal(idx.hour, Index([4, 4], dtype=np.int32))
|
||||
|
||||
def test_tz_convert_roundtrip(self, tz_aware_fixture):
|
||||
tz = tz_aware_fixture
|
||||
idx1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME", tz="UTC")
|
||||
exp1 = date_range(start="2014-01-01", end="2014-12-31", freq="ME")
|
||||
|
||||
idx2 = date_range(start="2014-01-01", end="2014-12-31", freq="D", tz="UTC")
|
||||
exp2 = date_range(start="2014-01-01", end="2014-12-31", freq="D")
|
||||
|
||||
idx3 = date_range(start="2014-01-01", end="2014-03-01", freq="h", tz="UTC")
|
||||
exp3 = date_range(start="2014-01-01", end="2014-03-01", freq="h")
|
||||
|
||||
idx4 = date_range(start="2014-08-01", end="2014-10-31", freq="min", tz="UTC")
|
||||
exp4 = date_range(start="2014-08-01", end="2014-10-31", freq="min")
|
||||
|
||||
for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3), (idx4, exp4)]:
|
||||
converted = idx.tz_convert(tz)
|
||||
reset = converted.tz_convert(None)
|
||||
tm.assert_index_equal(reset, expected)
|
||||
assert reset.tzinfo is None
|
||||
expected = converted.tz_convert("UTC").tz_localize(None)
|
||||
expected = expected._with_freq("infer")
|
||||
tm.assert_index_equal(reset, expected)
|
||||
|
||||
def test_dti_tz_convert_tzlocal(self):
|
||||
# GH#13583
|
||||
# tz_convert doesn't affect to internal
|
||||
dti = date_range(start="2001-01-01", end="2001-03-01", tz="UTC")
|
||||
dti2 = dti.tz_convert(dateutil.tz.tzlocal())
|
||||
tm.assert_numpy_array_equal(dti2.asi8, dti.asi8)
|
||||
|
||||
dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal())
|
||||
dti2 = dti.tz_convert(None)
|
||||
tm.assert_numpy_array_equal(dti2.asi8, dti.asi8)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz",
|
||||
[
|
||||
"US/Eastern",
|
||||
"dateutil/US/Eastern",
|
||||
pytz.timezone("US/Eastern"),
|
||||
gettz("US/Eastern"),
|
||||
],
|
||||
)
|
||||
def test_dti_tz_convert_utc_to_local_no_modify(self, tz):
|
||||
rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc")
|
||||
rng_eastern = rng.tz_convert(tz)
|
||||
|
||||
# Values are unmodified
|
||||
tm.assert_numpy_array_equal(rng.asi8, rng_eastern.asi8)
|
||||
|
||||
assert timezones.tz_compare(rng_eastern.tz, timezones.maybe_get_tz(tz))
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_tz_convert_unsorted(self, tzstr):
|
||||
dr = date_range("2012-03-09", freq="h", periods=100, tz="utc")
|
||||
dr = dr.tz_convert(tzstr)
|
||||
|
||||
result = dr[::-1].hour
|
||||
exp = dr.hour[::-1]
|
||||
tm.assert_almost_equal(result, exp)
|
||||
@ -0,0 +1,402 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import dateutil.tz
|
||||
from dateutil.tz import gettz
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Timestamp,
|
||||
bdate_range,
|
||||
date_range,
|
||||
offsets,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
try:
|
||||
from zoneinfo import ZoneInfo
|
||||
except ImportError:
|
||||
# Cannot assign to a type [misc]
|
||||
ZoneInfo = None # type: ignore[misc, assignment]
|
||||
|
||||
|
||||
easts = [pytz.timezone("US/Eastern"), gettz("US/Eastern")]
|
||||
if ZoneInfo is not None:
|
||||
try:
|
||||
tz = ZoneInfo("US/Eastern")
|
||||
except KeyError:
|
||||
# no tzdata
|
||||
pass
|
||||
else:
|
||||
easts.append(tz)
|
||||
|
||||
|
||||
class TestTZLocalize:
|
||||
def test_tz_localize_invalidates_freq(self):
|
||||
# we only preserve freq in unambiguous cases
|
||||
|
||||
# if localized to US/Eastern, this crosses a DST transition
|
||||
dti = date_range("2014-03-08 23:00", "2014-03-09 09:00", freq="h")
|
||||
assert dti.freq == "h"
|
||||
|
||||
result = dti.tz_localize(None) # no-op
|
||||
assert result.freq == "h"
|
||||
|
||||
result = dti.tz_localize("UTC") # unambiguous freq preservation
|
||||
assert result.freq == "h"
|
||||
|
||||
result = dti.tz_localize("US/Eastern", nonexistent="shift_forward")
|
||||
assert result.freq is None
|
||||
assert result.inferred_freq is None # i.e. we are not _too_ strict here
|
||||
|
||||
# Case where we _can_ keep freq because we're length==1
|
||||
dti2 = dti[:1]
|
||||
result = dti2.tz_localize("US/Eastern")
|
||||
assert result.freq == "h"
|
||||
|
||||
def test_tz_localize_utc_copies(self, utc_fixture):
|
||||
# GH#46460
|
||||
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
|
||||
index = DatetimeIndex(times)
|
||||
|
||||
res = index.tz_localize(utc_fixture)
|
||||
assert not tm.shares_memory(res, index)
|
||||
|
||||
res2 = index._data.tz_localize(utc_fixture)
|
||||
assert not tm.shares_memory(index._data, res2)
|
||||
|
||||
def test_dti_tz_localize_nonexistent_raise_coerce(self):
|
||||
# GH#13057
|
||||
times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"]
|
||||
index = DatetimeIndex(times)
|
||||
tz = "US/Eastern"
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)):
|
||||
index.tz_localize(tz=tz)
|
||||
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="|".join(times)):
|
||||
index.tz_localize(tz=tz, nonexistent="raise")
|
||||
|
||||
result = index.tz_localize(tz=tz, nonexistent="NaT")
|
||||
test_times = ["2015-03-08 01:00-05:00", "NaT", "2015-03-08 03:00-04:00"]
|
||||
dti = to_datetime(test_times, utc=True)
|
||||
expected = dti.tz_convert("US/Eastern")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", easts)
|
||||
def test_dti_tz_localize_ambiguous_infer(self, tz):
|
||||
# November 6, 2011, fall back, repeat 2 AM hour
|
||||
# With no repeated hours, we cannot infer the transition
|
||||
dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour())
|
||||
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
|
||||
dr.tz_localize(tz)
|
||||
|
||||
@pytest.mark.parametrize("tz", easts)
|
||||
def test_dti_tz_localize_ambiguous_infer2(self, tz, unit):
|
||||
# With repeated hours, we can infer the transition
|
||||
dr = date_range(
|
||||
datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour(), tz=tz, unit=unit
|
||||
)
|
||||
times = [
|
||||
"11/06/2011 00:00",
|
||||
"11/06/2011 01:00",
|
||||
"11/06/2011 01:00",
|
||||
"11/06/2011 02:00",
|
||||
"11/06/2011 03:00",
|
||||
]
|
||||
di = DatetimeIndex(times).as_unit(unit)
|
||||
result = di.tz_localize(tz, ambiguous="infer")
|
||||
expected = dr._with_freq(None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
result2 = DatetimeIndex(times, tz=tz, ambiguous="infer").as_unit(unit)
|
||||
tm.assert_index_equal(result2, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", easts)
|
||||
def test_dti_tz_localize_ambiguous_infer3(self, tz):
|
||||
# When there is no dst transition, nothing special happens
|
||||
dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=offsets.Hour())
|
||||
localized = dr.tz_localize(tz)
|
||||
localized_infer = dr.tz_localize(tz, ambiguous="infer")
|
||||
tm.assert_index_equal(localized, localized_infer)
|
||||
|
||||
@pytest.mark.parametrize("tz", easts)
|
||||
def test_dti_tz_localize_ambiguous_times(self, tz):
|
||||
# March 13, 2011, spring forward, skip from 2 AM to 3 AM
|
||||
dr = date_range(datetime(2011, 3, 13, 1, 30), periods=3, freq=offsets.Hour())
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:30:00"):
|
||||
dr.tz_localize(tz)
|
||||
|
||||
# after dst transition, it works
|
||||
dr = date_range(
|
||||
datetime(2011, 3, 13, 3, 30), periods=3, freq=offsets.Hour(), tz=tz
|
||||
)
|
||||
|
||||
# November 6, 2011, fall back, repeat 2 AM hour
|
||||
dr = date_range(datetime(2011, 11, 6, 1, 30), periods=3, freq=offsets.Hour())
|
||||
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
|
||||
dr.tz_localize(tz)
|
||||
|
||||
# UTC is OK
|
||||
dr = date_range(
|
||||
datetime(2011, 3, 13), periods=48, freq=offsets.Minute(30), tz=pytz.utc
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_dti_tz_localize_pass_dates_to_utc(self, tzstr):
|
||||
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
|
||||
|
||||
idx = DatetimeIndex(strdates)
|
||||
conv = idx.tz_localize(tzstr)
|
||||
|
||||
fromdates = DatetimeIndex(strdates, tz=tzstr)
|
||||
|
||||
assert conv.tz == fromdates.tz
|
||||
tm.assert_numpy_array_equal(conv.values, fromdates.values)
|
||||
|
||||
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
|
||||
def test_dti_tz_localize(self, prefix):
|
||||
tzstr = prefix + "US/Eastern"
|
||||
dti = date_range(start="1/1/2005", end="1/1/2005 0:00:30.256", freq="ms")
|
||||
dti2 = dti.tz_localize(tzstr)
|
||||
|
||||
dti_utc = date_range(
|
||||
start="1/1/2005 05:00", end="1/1/2005 5:00:30.256", freq="ms", tz="utc"
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(dti2.values, dti_utc.values)
|
||||
|
||||
dti3 = dti2.tz_convert(prefix + "US/Pacific")
|
||||
tm.assert_numpy_array_equal(dti3.values, dti_utc.values)
|
||||
|
||||
dti = date_range(start="11/6/2011 1:59", end="11/6/2011 2:00", freq="ms")
|
||||
with pytest.raises(pytz.AmbiguousTimeError, match="Cannot infer dst time"):
|
||||
dti.tz_localize(tzstr)
|
||||
|
||||
dti = date_range(start="3/13/2011 1:59", end="3/13/2011 2:00", freq="ms")
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="2011-03-13 02:00:00"):
|
||||
dti.tz_localize(tzstr)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz",
|
||||
[
|
||||
"US/Eastern",
|
||||
"dateutil/US/Eastern",
|
||||
pytz.timezone("US/Eastern"),
|
||||
gettz("US/Eastern"),
|
||||
],
|
||||
)
|
||||
def test_dti_tz_localize_utc_conversion(self, tz):
|
||||
# Localizing to time zone should:
|
||||
# 1) check for DST ambiguities
|
||||
# 2) convert to UTC
|
||||
|
||||
rng = date_range("3/10/2012", "3/11/2012", freq="30min")
|
||||
|
||||
converted = rng.tz_localize(tz)
|
||||
expected_naive = rng + offsets.Hour(5)
|
||||
tm.assert_numpy_array_equal(converted.asi8, expected_naive.asi8)
|
||||
|
||||
# DST ambiguity, this should fail
|
||||
rng = date_range("3/11/2012", "3/12/2012", freq="30min")
|
||||
# Is this really how it should fail??
|
||||
with pytest.raises(pytz.NonExistentTimeError, match="2012-03-11 02:00:00"):
|
||||
rng.tz_localize(tz)
|
||||
|
||||
def test_dti_tz_localize_roundtrip(self, tz_aware_fixture):
|
||||
# note: this tz tests that a tz-naive index can be localized
|
||||
# and de-localized successfully, when there are no DST transitions
|
||||
# in the range.
|
||||
idx = date_range(start="2014-06-01", end="2014-08-30", freq="15min")
|
||||
tz = tz_aware_fixture
|
||||
localized = idx.tz_localize(tz)
|
||||
# can't localize a tz-aware object
|
||||
with pytest.raises(
|
||||
TypeError, match="Already tz-aware, use tz_convert to convert"
|
||||
):
|
||||
localized.tz_localize(tz)
|
||||
reset = localized.tz_localize(None)
|
||||
assert reset.tzinfo is None
|
||||
expected = idx._with_freq(None)
|
||||
tm.assert_index_equal(reset, expected)
|
||||
|
||||
def test_dti_tz_localize_naive(self):
|
||||
rng = date_range("1/1/2011", periods=100, freq="h")
|
||||
|
||||
conv = rng.tz_localize("US/Pacific")
|
||||
exp = date_range("1/1/2011", periods=100, freq="h", tz="US/Pacific")
|
||||
|
||||
tm.assert_index_equal(conv, exp._with_freq(None))
|
||||
|
||||
def test_dti_tz_localize_tzlocal(self):
|
||||
# GH#13583
|
||||
offset = dateutil.tz.tzlocal().utcoffset(datetime(2011, 1, 1))
|
||||
offset = int(offset.total_seconds() * 1000000000)
|
||||
|
||||
dti = date_range(start="2001-01-01", end="2001-03-01")
|
||||
dti2 = dti.tz_localize(dateutil.tz.tzlocal())
|
||||
tm.assert_numpy_array_equal(dti2.asi8 + offset, dti.asi8)
|
||||
|
||||
dti = date_range(start="2001-01-01", end="2001-03-01", tz=dateutil.tz.tzlocal())
|
||||
dti2 = dti.tz_localize(None)
|
||||
tm.assert_numpy_array_equal(dti2.asi8 - offset, dti.asi8)
|
||||
|
||||
@pytest.mark.parametrize("tz", easts)
|
||||
def test_dti_tz_localize_ambiguous_nat(self, tz):
|
||||
times = [
|
||||
"11/06/2011 00:00",
|
||||
"11/06/2011 01:00",
|
||||
"11/06/2011 01:00",
|
||||
"11/06/2011 02:00",
|
||||
"11/06/2011 03:00",
|
||||
]
|
||||
di = DatetimeIndex(times)
|
||||
localized = di.tz_localize(tz, ambiguous="NaT")
|
||||
|
||||
times = [
|
||||
"11/06/2011 00:00",
|
||||
np.nan,
|
||||
np.nan,
|
||||
"11/06/2011 02:00",
|
||||
"11/06/2011 03:00",
|
||||
]
|
||||
di_test = DatetimeIndex(times, tz="US/Eastern")
|
||||
|
||||
# left dtype is datetime64[ns, US/Eastern]
|
||||
# right is datetime64[ns, tzfile('/usr/share/zoneinfo/US/Eastern')]
|
||||
tm.assert_numpy_array_equal(di_test.values, localized.values)
|
||||
|
||||
@pytest.mark.parametrize("tz", easts)
|
||||
def test_dti_tz_localize_ambiguous_flags(self, tz, unit):
|
||||
# November 6, 2011, fall back, repeat 2 AM hour
|
||||
|
||||
# Pass in flags to determine right dst transition
|
||||
dr = date_range(
|
||||
datetime(2011, 11, 6, 0), periods=5, freq=offsets.Hour(), tz=tz, unit=unit
|
||||
)
|
||||
times = [
|
||||
"11/06/2011 00:00",
|
||||
"11/06/2011 01:00",
|
||||
"11/06/2011 01:00",
|
||||
"11/06/2011 02:00",
|
||||
"11/06/2011 03:00",
|
||||
]
|
||||
|
||||
# Test tz_localize
|
||||
di = DatetimeIndex(times).as_unit(unit)
|
||||
is_dst = [1, 1, 0, 0, 0]
|
||||
localized = di.tz_localize(tz, ambiguous=is_dst)
|
||||
expected = dr._with_freq(None)
|
||||
tm.assert_index_equal(expected, localized)
|
||||
|
||||
result = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
localized = di.tz_localize(tz, ambiguous=np.array(is_dst))
|
||||
tm.assert_index_equal(dr, localized)
|
||||
|
||||
localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype("bool"))
|
||||
tm.assert_index_equal(dr, localized)
|
||||
|
||||
# Test constructor
|
||||
localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst).as_unit(unit)
|
||||
tm.assert_index_equal(dr, localized)
|
||||
|
||||
# Test duplicate times where inferring the dst fails
|
||||
times += times
|
||||
di = DatetimeIndex(times).as_unit(unit)
|
||||
|
||||
# When the sizes are incompatible, make sure error is raised
|
||||
msg = "Length of ambiguous bool-array must be the same size as vals"
|
||||
with pytest.raises(Exception, match=msg):
|
||||
di.tz_localize(tz, ambiguous=is_dst)
|
||||
|
||||
# When sizes are compatible and there are repeats ('infer' won't work)
|
||||
is_dst = np.hstack((is_dst, is_dst))
|
||||
localized = di.tz_localize(tz, ambiguous=is_dst)
|
||||
dr = dr.append(dr)
|
||||
tm.assert_index_equal(dr, localized)
|
||||
|
||||
@pytest.mark.parametrize("tz", easts)
|
||||
def test_dti_tz_localize_ambiguous_flags2(self, tz, unit):
|
||||
# When there is no dst transition, nothing special happens
|
||||
dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=offsets.Hour())
|
||||
is_dst = np.array([1] * 10)
|
||||
localized = dr.tz_localize(tz)
|
||||
localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst)
|
||||
tm.assert_index_equal(localized, localized_is_dst)
|
||||
|
||||
def test_dti_tz_localize_bdate_range(self):
|
||||
dr = bdate_range("1/1/2009", "1/1/2010")
|
||||
dr_utc = bdate_range("1/1/2009", "1/1/2010", tz=pytz.utc)
|
||||
localized = dr.tz_localize(pytz.utc)
|
||||
tm.assert_index_equal(dr_utc, localized)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start_ts, tz, end_ts, shift",
|
||||
[
|
||||
["2015-03-29 02:20:00", "Europe/Warsaw", "2015-03-29 03:00:00", "forward"],
|
||||
[
|
||||
"2015-03-29 02:20:00",
|
||||
"Europe/Warsaw",
|
||||
"2015-03-29 01:59:59.999999999",
|
||||
"backward",
|
||||
],
|
||||
[
|
||||
"2015-03-29 02:20:00",
|
||||
"Europe/Warsaw",
|
||||
"2015-03-29 03:20:00",
|
||||
timedelta(hours=1),
|
||||
],
|
||||
[
|
||||
"2015-03-29 02:20:00",
|
||||
"Europe/Warsaw",
|
||||
"2015-03-29 01:20:00",
|
||||
timedelta(hours=-1),
|
||||
],
|
||||
["2018-03-11 02:33:00", "US/Pacific", "2018-03-11 03:00:00", "forward"],
|
||||
[
|
||||
"2018-03-11 02:33:00",
|
||||
"US/Pacific",
|
||||
"2018-03-11 01:59:59.999999999",
|
||||
"backward",
|
||||
],
|
||||
[
|
||||
"2018-03-11 02:33:00",
|
||||
"US/Pacific",
|
||||
"2018-03-11 03:33:00",
|
||||
timedelta(hours=1),
|
||||
],
|
||||
[
|
||||
"2018-03-11 02:33:00",
|
||||
"US/Pacific",
|
||||
"2018-03-11 01:33:00",
|
||||
timedelta(hours=-1),
|
||||
],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("tz_type", ["", "dateutil/"])
|
||||
def test_dti_tz_localize_nonexistent_shift(
|
||||
self, start_ts, tz, end_ts, shift, tz_type, unit
|
||||
):
|
||||
# GH#8917
|
||||
tz = tz_type + tz
|
||||
if isinstance(shift, str):
|
||||
shift = "shift_" + shift
|
||||
dti = DatetimeIndex([Timestamp(start_ts)]).as_unit(unit)
|
||||
result = dti.tz_localize(tz, nonexistent=shift)
|
||||
expected = DatetimeIndex([Timestamp(end_ts)]).tz_localize(tz).as_unit(unit)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("offset", [-1, 1])
|
||||
def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, warsaw):
|
||||
# GH#8917
|
||||
tz = warsaw
|
||||
dti = DatetimeIndex([Timestamp("2015-03-29 02:20:00")])
|
||||
msg = "The provided timedelta will relocalize on a nonexistent time"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
dti.tz_localize(tz, nonexistent=timedelta(seconds=offset))
|
||||
@ -0,0 +1,77 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_unique(tz_naive_fixture):
|
||||
idx = DatetimeIndex(["2017"] * 2, tz=tz_naive_fixture)
|
||||
expected = idx[:1]
|
||||
|
||||
result = idx.unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
# GH#21737
|
||||
# Ensure the underlying data is consistent
|
||||
assert result[0] == expected[0]
|
||||
|
||||
|
||||
def test_index_unique(rand_series_with_duplicate_datetimeindex):
|
||||
dups = rand_series_with_duplicate_datetimeindex
|
||||
index = dups.index
|
||||
|
||||
uniques = index.unique()
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
datetime(2000, 1, 2),
|
||||
datetime(2000, 1, 3),
|
||||
datetime(2000, 1, 4),
|
||||
datetime(2000, 1, 5),
|
||||
],
|
||||
dtype=index.dtype,
|
||||
)
|
||||
assert uniques.dtype == index.dtype # sanity
|
||||
tm.assert_index_equal(uniques, expected)
|
||||
assert index.nunique() == 4
|
||||
|
||||
# GH#2563
|
||||
assert isinstance(uniques, DatetimeIndex)
|
||||
|
||||
dups_local = index.tz_localize("US/Eastern")
|
||||
dups_local.name = "foo"
|
||||
result = dups_local.unique()
|
||||
expected = DatetimeIndex(expected, name="foo")
|
||||
expected = expected.tz_localize("US/Eastern")
|
||||
assert result.tz is not None
|
||||
assert result.name == "foo"
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_index_unique2():
|
||||
# NaT, note this is excluded
|
||||
arr = [1370745748 + t for t in range(20)] + [NaT._value]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
|
||||
def test_index_unique3():
|
||||
arr = [
|
||||
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)
|
||||
] + [NaT]
|
||||
idx = DatetimeIndex(arr * 3)
|
||||
tm.assert_index_equal(idx.unique(), DatetimeIndex(arr))
|
||||
assert idx.nunique() == 20
|
||||
assert idx.nunique(dropna=False) == 21
|
||||
|
||||
|
||||
def test_is_unique_monotonic(rand_series_with_duplicate_datetimeindex):
|
||||
index = rand_series_with_duplicate_datetimeindex.index
|
||||
assert not index.is_unique
|
||||
@ -0,0 +1,56 @@
|
||||
# Arithmetic tests specific to DatetimeIndex are generally about `freq`
|
||||
# rentention or inference. Other arithmetic tests belong in
|
||||
# tests/arithmetic/test_datetime64.py
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Timedelta,
|
||||
TimedeltaIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndexArithmetic:
|
||||
def test_add_timedelta_preserves_freq(self):
|
||||
# GH#37295 should hold for any DTI with freq=None or Tick freq
|
||||
tz = "Canada/Eastern"
|
||||
dti = date_range(
|
||||
start=Timestamp("2019-03-26 00:00:00-0400", tz=tz),
|
||||
end=Timestamp("2020-10-17 00:00:00-0400", tz=tz),
|
||||
freq="D",
|
||||
)
|
||||
result = dti + Timedelta(days=1)
|
||||
assert result.freq == dti.freq
|
||||
|
||||
def test_sub_datetime_preserves_freq(self, tz_naive_fixture):
|
||||
# GH#48818
|
||||
dti = date_range("2016-01-01", periods=12, tz=tz_naive_fixture)
|
||||
|
||||
res = dti - dti[0]
|
||||
expected = timedelta_range("0 Days", "11 Days")
|
||||
tm.assert_index_equal(res, expected)
|
||||
assert res.freq == expected.freq
|
||||
|
||||
@pytest.mark.xfail(
|
||||
reason="The inherited freq is incorrect bc dti.freq is incorrect "
|
||||
"https://github.com/pandas-dev/pandas/pull/48818/files#r982793461"
|
||||
)
|
||||
def test_sub_datetime_preserves_freq_across_dst(self):
|
||||
# GH#48818
|
||||
ts = Timestamp("2016-03-11", tz="US/Pacific")
|
||||
dti = date_range(ts, periods=4)
|
||||
|
||||
res = dti - dti[0]
|
||||
expected = TimedeltaIndex(
|
||||
[
|
||||
Timedelta(days=0),
|
||||
Timedelta(days=1),
|
||||
Timedelta(days=2),
|
||||
Timedelta(days=2, hours=23),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, expected)
|
||||
assert res.freq == expected.freq
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,216 @@
|
||||
import datetime as dt
|
||||
from datetime import date
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_long
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Timestamp,
|
||||
date_range,
|
||||
offsets,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndex:
|
||||
def test_is_(self):
|
||||
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
|
||||
assert dti.is_(dti)
|
||||
assert dti.is_(dti.view())
|
||||
assert not dti.is_(dti.copy())
|
||||
|
||||
def test_time_overflow_for_32bit_machines(self):
|
||||
# GH8943. On some machines NumPy defaults to np.int32 (for example,
|
||||
# 32-bit Linux machines). In the function _generate_regular_range
|
||||
# found in tseries/index.py, `periods` gets multiplied by `strides`
|
||||
# (which has value 1e9) and since the max value for np.int32 is ~2e9,
|
||||
# and since those machines won't promote np.int32 to np.int64, we get
|
||||
# overflow.
|
||||
periods = np_long(1000)
|
||||
|
||||
idx1 = date_range(start="2000", periods=periods, freq="s")
|
||||
assert len(idx1) == periods
|
||||
|
||||
idx2 = date_range(end="2000", periods=periods, freq="s")
|
||||
assert len(idx2) == periods
|
||||
|
||||
def test_nat(self):
|
||||
assert DatetimeIndex([np.nan])[0] is pd.NaT
|
||||
|
||||
def test_week_of_month_frequency(self):
|
||||
# GH 5348: "ValueError: Could not evaluate WOM-1SUN" shouldn't raise
|
||||
d1 = date(2002, 9, 1)
|
||||
d2 = date(2013, 10, 27)
|
||||
d3 = date(2012, 9, 30)
|
||||
idx1 = DatetimeIndex([d1, d2])
|
||||
idx2 = DatetimeIndex([d3])
|
||||
result_append = idx1.append(idx2)
|
||||
expected = DatetimeIndex([d1, d2, d3])
|
||||
tm.assert_index_equal(result_append, expected)
|
||||
result_union = idx1.union(idx2)
|
||||
expected = DatetimeIndex([d1, d3, d2])
|
||||
tm.assert_index_equal(result_union, expected)
|
||||
|
||||
def test_append_nondatetimeindex(self):
|
||||
rng = date_range("1/1/2000", periods=10)
|
||||
idx = Index(["a", "b", "c", "d"])
|
||||
|
||||
result = rng.append(idx)
|
||||
assert isinstance(result[0], Timestamp)
|
||||
|
||||
def test_misc_coverage(self):
|
||||
rng = date_range("1/1/2000", periods=5)
|
||||
result = rng.groupby(rng.day)
|
||||
assert isinstance(next(iter(result.values()))[0], Timestamp)
|
||||
|
||||
# TODO: belongs in frame groupby tests?
|
||||
def test_groupby_function_tuple_1677(self):
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).random(100),
|
||||
index=date_range("1/1/2000", periods=100),
|
||||
)
|
||||
monthly_group = df.groupby(lambda x: (x.year, x.month))
|
||||
|
||||
result = monthly_group.mean()
|
||||
assert isinstance(result.index[0], tuple)
|
||||
|
||||
def assert_index_parameters(self, index):
|
||||
assert index.freq == "40960ns"
|
||||
assert index.inferred_freq == "40960ns"
|
||||
|
||||
def test_ns_index(self):
|
||||
nsamples = 400
|
||||
ns = int(1e9 / 24414)
|
||||
dtstart = np.datetime64("2012-09-20T00:00:00")
|
||||
|
||||
dt = dtstart + np.arange(nsamples) * np.timedelta64(ns, "ns")
|
||||
freq = ns * offsets.Nano()
|
||||
index = DatetimeIndex(dt, freq=freq, name="time")
|
||||
self.assert_index_parameters(index)
|
||||
|
||||
new_index = date_range(start=index[0], end=index[-1], freq=index.freq)
|
||||
self.assert_index_parameters(new_index)
|
||||
|
||||
def test_asarray_tz_naive(self):
|
||||
# This shouldn't produce a warning.
|
||||
idx = date_range("2000", periods=2)
|
||||
# M8[ns] by default
|
||||
result = np.asarray(idx)
|
||||
|
||||
expected = np.array(["2000-01-01", "2000-01-02"], dtype="M8[ns]")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# optionally, object
|
||||
result = np.asarray(idx, dtype=object)
|
||||
|
||||
expected = np.array([Timestamp("2000-01-01"), Timestamp("2000-01-02")])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_asarray_tz_aware(self):
|
||||
tz = "US/Central"
|
||||
idx = date_range("2000", periods=2, tz=tz)
|
||||
expected = np.array(["2000-01-01T06", "2000-01-02T06"], dtype="M8[ns]")
|
||||
result = np.asarray(idx, dtype="datetime64[ns]")
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Old behavior with no warning
|
||||
result = np.asarray(idx, dtype="M8[ns]")
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Future behavior with no warning
|
||||
expected = np.array(
|
||||
[Timestamp("2000-01-01", tz=tz), Timestamp("2000-01-02", tz=tz)]
|
||||
)
|
||||
result = np.asarray(idx, dtype=object)
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_CBH_deprecated(self):
|
||||
msg = "'CBH' is deprecated and will be removed in a future version."
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected = date_range(
|
||||
dt.datetime(2022, 12, 11), dt.datetime(2022, 12, 13), freq="CBH"
|
||||
)
|
||||
result = DatetimeIndex(
|
||||
[
|
||||
"2022-12-12 09:00:00",
|
||||
"2022-12-12 10:00:00",
|
||||
"2022-12-12 11:00:00",
|
||||
"2022-12-12 12:00:00",
|
||||
"2022-12-12 13:00:00",
|
||||
"2022-12-12 14:00:00",
|
||||
"2022-12-12 15:00:00",
|
||||
"2022-12-12 16:00:00",
|
||||
],
|
||||
dtype="datetime64[ns]",
|
||||
freq="cbh",
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq_depr, expected_values, expected_freq",
|
||||
[
|
||||
(
|
||||
"AS-AUG",
|
||||
["2021-08-01", "2022-08-01", "2023-08-01"],
|
||||
"YS-AUG",
|
||||
),
|
||||
(
|
||||
"1BAS-MAY",
|
||||
["2021-05-03", "2022-05-02", "2023-05-01"],
|
||||
"1BYS-MAY",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_AS_BAS_deprecated(self, freq_depr, expected_values, expected_freq):
|
||||
# GH#55479
|
||||
freq_msg = re.split("[0-9]*", freq_depr, maxsplit=1)[1]
|
||||
msg = f"'{freq_msg}' is deprecated and will be removed in a future version."
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected = date_range(
|
||||
dt.datetime(2020, 12, 1), dt.datetime(2023, 12, 1), freq=freq_depr
|
||||
)
|
||||
result = DatetimeIndex(
|
||||
expected_values,
|
||||
dtype="datetime64[ns]",
|
||||
freq=expected_freq,
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, expected_values, freq_depr",
|
||||
[
|
||||
("2BYE-MAR", ["2016-03-31"], "2BA-MAR"),
|
||||
("2BYE-JUN", ["2016-06-30"], "2BY-JUN"),
|
||||
("2BME", ["2016-02-29", "2016-04-29", "2016-06-30"], "2BM"),
|
||||
("2BQE", ["2016-03-31"], "2BQ"),
|
||||
("1BQE-MAR", ["2016-03-31", "2016-06-30"], "1BQ-MAR"),
|
||||
],
|
||||
)
|
||||
def test_BM_BQ_BY_deprecated(self, freq, expected_values, freq_depr):
|
||||
# GH#52064
|
||||
msg = f"'{freq_depr[1:]}' is deprecated and will be removed "
|
||||
f"in a future version, please use '{freq[1:]}' instead."
|
||||
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
expected = date_range(start="2016-02-21", end="2016-08-21", freq=freq_depr)
|
||||
result = DatetimeIndex(
|
||||
data=expected_values,
|
||||
dtype="datetime64[ns]",
|
||||
freq=freq,
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,356 @@
|
||||
from datetime import datetime
|
||||
|
||||
import dateutil.tz
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
NaT,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture(params=["s", "ms", "us", "ns"])
|
||||
def unit(request):
|
||||
return request.param
|
||||
|
||||
|
||||
def test_get_values_for_csv():
|
||||
index = pd.date_range(freq="1D", periods=3, start="2017-01-01")
|
||||
|
||||
# First, with no arguments.
|
||||
expected = np.array(["2017-01-01", "2017-01-02", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index._get_values_for_csv()
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# No NaN values, so na_rep has no effect
|
||||
result = index._get_values_for_csv(na_rep="pandas")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# Make sure date formatting works
|
||||
expected = np.array(["01-2017-01", "01-2017-02", "01-2017-03"], dtype=object)
|
||||
|
||||
result = index._get_values_for_csv(date_format="%m-%Y-%d")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# NULL object handling should work
|
||||
index = DatetimeIndex(["2017-01-01", NaT, "2017-01-03"])
|
||||
expected = np.array(["2017-01-01", "NaT", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index._get_values_for_csv(na_rep="NaT")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
expected = np.array(["2017-01-01", "pandas", "2017-01-03"], dtype=object)
|
||||
|
||||
result = index._get_values_for_csv(na_rep="pandas")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index._get_values_for_csv(na_rep="NaT", date_format="%Y-%m-%d %H:%M:%S.%f")
|
||||
expected = np.array(
|
||||
["2017-01-01 00:00:00.000000", "NaT", "2017-01-03 00:00:00.000000"],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# invalid format
|
||||
result = index._get_values_for_csv(na_rep="NaT", date_format="foo")
|
||||
expected = np.array(["foo", "NaT", "foo"], dtype=object)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
class TestDatetimeIndexRendering:
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_dti_with_timezone_repr(self, tzstr):
|
||||
rng = pd.date_range("4/13/2010", "5/6/2010")
|
||||
|
||||
rng_eastern = rng.tz_localize(tzstr)
|
||||
|
||||
rng_repr = repr(rng_eastern)
|
||||
assert "2010-04-13 00:00:00" in rng_repr
|
||||
|
||||
def test_dti_repr_dates(self):
|
||||
text = str(pd.to_datetime([datetime(2013, 1, 1), datetime(2014, 1, 1)]))
|
||||
assert "['2013-01-01'," in text
|
||||
assert ", '2014-01-01']" in text
|
||||
|
||||
def test_dti_repr_mixed(self):
|
||||
text = str(
|
||||
pd.to_datetime(
|
||||
[datetime(2013, 1, 1), datetime(2014, 1, 1, 12), datetime(2014, 1, 1)]
|
||||
)
|
||||
)
|
||||
assert "'2013-01-01 00:00:00'," in text
|
||||
assert "'2014-01-01 00:00:00']" in text
|
||||
|
||||
def test_dti_repr_short(self):
|
||||
dr = pd.date_range(start="1/1/2012", periods=1)
|
||||
repr(dr)
|
||||
|
||||
dr = pd.date_range(start="1/1/2012", periods=2)
|
||||
repr(dr)
|
||||
|
||||
dr = pd.date_range(start="1/1/2012", periods=3)
|
||||
repr(dr)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dates, freq, expected_repr",
|
||||
[
|
||||
(
|
||||
["2012-01-01 00:00:00"],
|
||||
"60min",
|
||||
(
|
||||
"DatetimeIndex(['2012-01-01 00:00:00'], "
|
||||
"dtype='datetime64[ns]', freq='60min')"
|
||||
),
|
||||
),
|
||||
(
|
||||
["2012-01-01 00:00:00", "2012-01-01 01:00:00"],
|
||||
"60min",
|
||||
"DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 01:00:00'], "
|
||||
"dtype='datetime64[ns]', freq='60min')",
|
||||
),
|
||||
(
|
||||
["2012-01-01"],
|
||||
"24h",
|
||||
"DatetimeIndex(['2012-01-01'], dtype='datetime64[ns]', freq='24h')",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_dti_repr_time_midnight(self, dates, freq, expected_repr, unit):
|
||||
# GH53634
|
||||
dti = DatetimeIndex(dates, freq).as_unit(unit)
|
||||
actual_repr = repr(dti)
|
||||
assert actual_repr == expected_repr.replace("[ns]", f"[{unit}]")
|
||||
|
||||
def test_dti_representation(self, unit):
|
||||
idxs = []
|
||||
idxs.append(DatetimeIndex([], freq="D"))
|
||||
idxs.append(DatetimeIndex(["2011-01-01"], freq="D"))
|
||||
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D"))
|
||||
idxs.append(DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D"))
|
||||
idxs.append(
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="h",
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
)
|
||||
idxs.append(
|
||||
DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
|
||||
)
|
||||
)
|
||||
idxs.append(
|
||||
DatetimeIndex(["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="UTC")
|
||||
)
|
||||
|
||||
exp = []
|
||||
exp.append("DatetimeIndex([], dtype='datetime64[ns]', freq='D')")
|
||||
exp.append("DatetimeIndex(['2011-01-01'], dtype='datetime64[ns]', freq='D')")
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01', '2011-01-02'], "
|
||||
"dtype='datetime64[ns]', freq='D')"
|
||||
)
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], "
|
||||
"dtype='datetime64[ns]', freq='D')"
|
||||
)
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01 09:00:00+09:00', "
|
||||
"'2011-01-01 10:00:00+09:00', '2011-01-01 11:00:00+09:00']"
|
||||
", dtype='datetime64[ns, Asia/Tokyo]', freq='h')"
|
||||
)
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01 09:00:00-05:00', "
|
||||
"'2011-01-01 10:00:00-05:00', 'NaT'], "
|
||||
"dtype='datetime64[ns, US/Eastern]', freq=None)"
|
||||
)
|
||||
exp.append(
|
||||
"DatetimeIndex(['2011-01-01 09:00:00+00:00', "
|
||||
"'2011-01-01 10:00:00+00:00', 'NaT'], "
|
||||
"dtype='datetime64[ns, UTC]', freq=None)"
|
||||
""
|
||||
)
|
||||
|
||||
with pd.option_context("display.width", 300):
|
||||
for index, expected in zip(idxs, exp):
|
||||
index = index.as_unit(unit)
|
||||
expected = expected.replace("[ns", f"[{unit}")
|
||||
result = repr(index)
|
||||
assert result == expected
|
||||
result = str(index)
|
||||
assert result == expected
|
||||
|
||||
# TODO: this is a Series.__repr__ test
|
||||
def test_dti_representation_to_series(self, unit):
|
||||
idx1 = DatetimeIndex([], freq="D")
|
||||
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
|
||||
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
||||
idx5 = DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="h",
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
idx6 = DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
|
||||
)
|
||||
idx7 = DatetimeIndex(["2011-01-01 09:00", "2011-01-02 10:15"])
|
||||
|
||||
exp1 = """Series([], dtype: datetime64[ns])"""
|
||||
|
||||
exp2 = "0 2011-01-01\ndtype: datetime64[ns]"
|
||||
|
||||
exp3 = "0 2011-01-01\n1 2011-01-02\ndtype: datetime64[ns]"
|
||||
|
||||
exp4 = (
|
||||
"0 2011-01-01\n"
|
||||
"1 2011-01-02\n"
|
||||
"2 2011-01-03\n"
|
||||
"dtype: datetime64[ns]"
|
||||
)
|
||||
|
||||
exp5 = (
|
||||
"0 2011-01-01 09:00:00+09:00\n"
|
||||
"1 2011-01-01 10:00:00+09:00\n"
|
||||
"2 2011-01-01 11:00:00+09:00\n"
|
||||
"dtype: datetime64[ns, Asia/Tokyo]"
|
||||
)
|
||||
|
||||
exp6 = (
|
||||
"0 2011-01-01 09:00:00-05:00\n"
|
||||
"1 2011-01-01 10:00:00-05:00\n"
|
||||
"2 NaT\n"
|
||||
"dtype: datetime64[ns, US/Eastern]"
|
||||
)
|
||||
|
||||
exp7 = (
|
||||
"0 2011-01-01 09:00:00\n"
|
||||
"1 2011-01-02 10:15:00\n"
|
||||
"dtype: datetime64[ns]"
|
||||
)
|
||||
|
||||
with pd.option_context("display.width", 300):
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5, idx6, idx7],
|
||||
[exp1, exp2, exp3, exp4, exp5, exp6, exp7],
|
||||
):
|
||||
ser = Series(idx.as_unit(unit))
|
||||
result = repr(ser)
|
||||
assert result == expected.replace("[ns", f"[{unit}")
|
||||
|
||||
def test_dti_summary(self):
|
||||
# GH#9116
|
||||
idx1 = DatetimeIndex([], freq="D")
|
||||
idx2 = DatetimeIndex(["2011-01-01"], freq="D")
|
||||
idx3 = DatetimeIndex(["2011-01-01", "2011-01-02"], freq="D")
|
||||
idx4 = DatetimeIndex(["2011-01-01", "2011-01-02", "2011-01-03"], freq="D")
|
||||
idx5 = DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
|
||||
freq="h",
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
idx6 = DatetimeIndex(
|
||||
["2011-01-01 09:00", "2011-01-01 10:00", NaT], tz="US/Eastern"
|
||||
)
|
||||
|
||||
exp1 = "DatetimeIndex: 0 entries\nFreq: D"
|
||||
|
||||
exp2 = "DatetimeIndex: 1 entries, 2011-01-01 to 2011-01-01\nFreq: D"
|
||||
|
||||
exp3 = "DatetimeIndex: 2 entries, 2011-01-01 to 2011-01-02\nFreq: D"
|
||||
|
||||
exp4 = "DatetimeIndex: 3 entries, 2011-01-01 to 2011-01-03\nFreq: D"
|
||||
|
||||
exp5 = (
|
||||
"DatetimeIndex: 3 entries, 2011-01-01 09:00:00+09:00 "
|
||||
"to 2011-01-01 11:00:00+09:00\n"
|
||||
"Freq: h"
|
||||
)
|
||||
|
||||
exp6 = """DatetimeIndex: 3 entries, 2011-01-01 09:00:00-05:00 to NaT"""
|
||||
|
||||
for idx, expected in zip(
|
||||
[idx1, idx2, idx3, idx4, idx5, idx6], [exp1, exp2, exp3, exp4, exp5, exp6]
|
||||
):
|
||||
result = idx._summary()
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, pytz.utc, dateutil.tz.tzutc()])
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_dti_business_repr_etc_smoke(self, tz, freq):
|
||||
# only really care that it works
|
||||
dti = pd.bdate_range(
|
||||
datetime(2009, 1, 1), datetime(2010, 1, 1), tz=tz, freq=freq
|
||||
)
|
||||
repr(dti)
|
||||
dti._summary()
|
||||
dti[2:2]._summary()
|
||||
|
||||
|
||||
class TestFormat:
|
||||
def test_format(self):
|
||||
# GH#35439
|
||||
idx = pd.date_range("20130101", periods=5)
|
||||
expected = [f"{x:%Y-%m-%d}" for x in idx]
|
||||
msg = r"DatetimeIndex\.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
assert idx.format() == expected
|
||||
|
||||
def test_format_with_name_time_info(self):
|
||||
# bug I fixed 12/20/2011
|
||||
dates = pd.date_range("2011-01-01 04:00:00", periods=10, name="something")
|
||||
|
||||
msg = "DatetimeIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
formatted = dates.format(name=True)
|
||||
assert formatted[0] == "something"
|
||||
|
||||
def test_format_datetime_with_time(self):
|
||||
dti = DatetimeIndex([datetime(2012, 2, 7), datetime(2012, 2, 7, 23)])
|
||||
|
||||
msg = "DatetimeIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = dti.format()
|
||||
expected = ["2012-02-07 00:00:00", "2012-02-07 23:00:00"]
|
||||
assert len(result) == 2
|
||||
assert result == expected
|
||||
|
||||
def test_format_datetime(self):
|
||||
msg = "DatetimeIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format()
|
||||
assert formatted[0] == "2003-01-01 12:00:00"
|
||||
assert formatted[1] == "NaT"
|
||||
|
||||
def test_format_date(self):
|
||||
msg = "DatetimeIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
formatted = pd.to_datetime([datetime(2003, 1, 1), NaT]).format()
|
||||
assert formatted[0] == "2003-01-01"
|
||||
assert formatted[1] == "NaT"
|
||||
|
||||
def test_format_date_tz(self):
|
||||
dti = pd.to_datetime([datetime(2013, 1, 1)], utc=True)
|
||||
msg = "DatetimeIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
formatted = dti.format()
|
||||
assert formatted[0] == "2013-01-01 00:00:00+00:00"
|
||||
|
||||
dti = pd.to_datetime([datetime(2013, 1, 1), NaT], utc=True)
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
formatted = dti.format()
|
||||
assert formatted[0] == "2013-01-01 00:00:00+00:00"
|
||||
|
||||
def test_format_date_explicit_date_format(self):
|
||||
dti = pd.to_datetime([datetime(2003, 2, 1), NaT])
|
||||
msg = "DatetimeIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
formatted = dti.format(date_format="%m-%d-%Y", na_rep="UT")
|
||||
assert formatted[0] == "02-01-2003"
|
||||
assert formatted[1] == "UT"
|
||||
@ -0,0 +1,61 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
date_range,
|
||||
)
|
||||
|
||||
from pandas.tseries.offsets import (
|
||||
BDay,
|
||||
DateOffset,
|
||||
Day,
|
||||
Hour,
|
||||
)
|
||||
|
||||
|
||||
class TestFreq:
|
||||
def test_freq_setter_errors(self):
|
||||
# GH#20678
|
||||
idx = DatetimeIndex(["20180101", "20180103", "20180105"])
|
||||
|
||||
# setting with an incompatible freq
|
||||
msg = (
|
||||
"Inferred frequency 2D from passed values does not conform to "
|
||||
"passed frequency 5D"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx._data.freq = "5D"
|
||||
|
||||
# setting with non-freq string
|
||||
with pytest.raises(ValueError, match="Invalid frequency"):
|
||||
idx._data.freq = "foo"
|
||||
|
||||
@pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []])
|
||||
@pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48h", Hour(48)])
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
def test_freq_setter(self, values, freq, tz):
|
||||
# GH#20678
|
||||
idx = DatetimeIndex(values, tz=tz)
|
||||
|
||||
# can set to an offset, converting from string if necessary
|
||||
idx._data.freq = freq
|
||||
assert idx.freq == freq
|
||||
assert isinstance(idx.freq, DateOffset)
|
||||
|
||||
# can reset to None
|
||||
idx._data.freq = None
|
||||
assert idx.freq is None
|
||||
|
||||
def test_freq_view_safe(self):
|
||||
# Setting the freq for one DatetimeIndex shouldn't alter the freq
|
||||
# for another that views the same data
|
||||
|
||||
dti = date_range("2016-01-01", periods=5)
|
||||
dta = dti._data
|
||||
|
||||
dti2 = DatetimeIndex(dta)._with_freq(None)
|
||||
assert dti2.freq is None
|
||||
|
||||
# Original was not altered
|
||||
assert dti.freq == "D"
|
||||
assert dta.freq == "D"
|
||||
@ -0,0 +1,717 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
timedelta,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
from pandas.compat.numpy import np_long
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Timestamp,
|
||||
bdate_range,
|
||||
date_range,
|
||||
notna,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries.frequencies import to_offset
|
||||
|
||||
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
|
||||
|
||||
class TestGetItem:
|
||||
def test_getitem_slice_keeps_name(self):
|
||||
# GH4226
|
||||
st = Timestamp("2013-07-01 00:00:00", tz="America/Los_Angeles")
|
||||
et = Timestamp("2013-07-02 00:00:00", tz="America/Los_Angeles")
|
||||
dr = date_range(st, et, freq="h", name="timebucket")
|
||||
assert dr[1:].name == dr.name
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
|
||||
def test_getitem(self, tz):
|
||||
idx = date_range("2011-01-01", "2011-01-31", freq="D", tz=tz, name="idx")
|
||||
|
||||
result = idx[0]
|
||||
assert result == Timestamp("2011-01-01", tz=idx.tz)
|
||||
|
||||
result = idx[0:5]
|
||||
expected = date_range(
|
||||
"2011-01-01", "2011-01-05", freq="D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[0:10:2]
|
||||
expected = date_range(
|
||||
"2011-01-01", "2011-01-09", freq="2D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[-20:-5:3]
|
||||
expected = date_range(
|
||||
"2011-01-12", "2011-01-24", freq="3D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx[4::-1]
|
||||
expected = DatetimeIndex(
|
||||
["2011-01-05", "2011-01-04", "2011-01-03", "2011-01-02", "2011-01-01"],
|
||||
dtype=idx.dtype,
|
||||
freq="-1D",
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_dti_business_getitem(self, freq):
|
||||
rng = bdate_range(START, END, freq=freq)
|
||||
smaller = rng[:5]
|
||||
exp = DatetimeIndex(rng.view(np.ndarray)[:5], freq=freq)
|
||||
tm.assert_index_equal(smaller, exp)
|
||||
assert smaller.freq == exp.freq
|
||||
assert smaller.freq == rng.freq
|
||||
|
||||
sliced = rng[::5]
|
||||
assert sliced.freq == to_offset(freq) * 5
|
||||
|
||||
fancy_indexed = rng[[4, 3, 2, 1, 0]]
|
||||
assert len(fancy_indexed) == 5
|
||||
assert isinstance(fancy_indexed, DatetimeIndex)
|
||||
assert fancy_indexed.freq is None
|
||||
|
||||
# 32-bit vs. 64-bit platforms
|
||||
assert rng[4] == rng[np_long(4)]
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_dti_business_getitem_matplotlib_hackaround(self, freq):
|
||||
rng = bdate_range(START, END, freq=freq)
|
||||
with pytest.raises(ValueError, match="Multi-dimensional indexing"):
|
||||
# GH#30588 multi-dimensional indexing deprecated
|
||||
rng[:, None]
|
||||
|
||||
def test_getitem_int_list(self):
|
||||
dti = date_range(start="1/1/2005", end="12/1/2005", freq="ME")
|
||||
dti2 = dti[[1, 3, 5]]
|
||||
|
||||
v1 = dti2[0]
|
||||
v2 = dti2[1]
|
||||
v3 = dti2[2]
|
||||
|
||||
assert v1 == Timestamp("2/28/2005")
|
||||
assert v2 == Timestamp("4/30/2005")
|
||||
assert v3 == Timestamp("6/30/2005")
|
||||
|
||||
# getitem with non-slice drops freq
|
||||
assert dti2.freq is None
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where_doesnt_retain_freq(self):
|
||||
dti = date_range("20130101", periods=3, freq="D", name="idx")
|
||||
cond = [True, True, False]
|
||||
expected = DatetimeIndex([dti[0], dti[1], dti[0]], freq=None, name="idx")
|
||||
|
||||
result = dti.where(cond, dti[::-1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_other(self):
|
||||
# other is ndarray or Index
|
||||
i = date_range("20130101", periods=3, tz="US/Eastern")
|
||||
|
||||
for arr in [np.nan, pd.NaT]:
|
||||
result = i.where(notna(i), other=arr)
|
||||
expected = i
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
i2 = i.copy()
|
||||
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
||||
result = i.where(notna(i2), i2)
|
||||
tm.assert_index_equal(result, i2)
|
||||
|
||||
i2 = i.copy()
|
||||
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
||||
result = i.where(notna(i2), i2._values)
|
||||
tm.assert_index_equal(result, i2)
|
||||
|
||||
def test_where_invalid_dtypes(self):
|
||||
dti = date_range("20130101", periods=3, tz="US/Eastern")
|
||||
|
||||
tail = dti[2:].tolist()
|
||||
i2 = Index([pd.NaT, pd.NaT] + tail)
|
||||
|
||||
mask = notna(i2)
|
||||
|
||||
# passing tz-naive ndarray to tzaware DTI
|
||||
result = dti.where(mask, i2.values)
|
||||
expected = Index([pd.NaT.asm8, pd.NaT.asm8] + tail, dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# passing tz-aware DTI to tznaive DTI
|
||||
naive = dti.tz_localize(None)
|
||||
result = naive.where(mask, i2)
|
||||
expected = Index([i2[0], i2[1]] + naive[2:].tolist(), dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
pi = i2.tz_localize(None).to_period("D")
|
||||
result = dti.where(mask, pi)
|
||||
expected = Index([pi[0], pi[1]] + tail, dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
tda = i2.asi8.view("timedelta64[ns]")
|
||||
result = dti.where(mask, tda)
|
||||
expected = Index([tda[0], tda[1]] + tail, dtype=object)
|
||||
assert isinstance(expected[0], np.timedelta64)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = dti.where(mask, i2.asi8)
|
||||
expected = Index([pd.NaT._value, pd.NaT._value] + tail, dtype=object)
|
||||
assert isinstance(expected[0], int)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-matching scalar
|
||||
td = pd.Timedelta(days=4)
|
||||
result = dti.where(mask, td)
|
||||
expected = Index([td, td] + tail, dtype=object)
|
||||
assert expected[0] is td
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_mismatched_nat(self, tz_aware_fixture):
|
||||
tz = tz_aware_fixture
|
||||
dti = date_range("2013-01-01", periods=3, tz=tz)
|
||||
cond = np.array([True, False, True])
|
||||
|
||||
tdnat = np.timedelta64("NaT", "ns")
|
||||
expected = Index([dti[0], tdnat, dti[2]], dtype=object)
|
||||
assert expected[1] is tdnat
|
||||
|
||||
result = dti.where(cond, tdnat)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_where_tz(self):
|
||||
i = date_range("20130101", periods=3, tz="US/Eastern")
|
||||
result = i.where(notna(i))
|
||||
expected = i
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
i2 = i.copy()
|
||||
i2 = Index([pd.NaT, pd.NaT] + i[2:].tolist())
|
||||
result = i.where(notna(i2))
|
||||
expected = i2
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestTake:
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_dti_take_dont_lose_meta(self, tzstr):
|
||||
rng = date_range("1/1/2000", periods=20, tz=tzstr)
|
||||
|
||||
result = rng.take(range(5))
|
||||
assert result.tz == rng.tz
|
||||
assert result.freq == rng.freq
|
||||
|
||||
def test_take_nan_first_datetime(self):
|
||||
index = DatetimeIndex([pd.NaT, Timestamp("20130101"), Timestamp("20130102")])
|
||||
result = index.take([-1, 0, 1])
|
||||
expected = DatetimeIndex([index[-1], index[0], index[1]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo"])
|
||||
def test_take(self, tz):
|
||||
# GH#10295
|
||||
idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx", tz=tz)
|
||||
|
||||
result = idx.take([0])
|
||||
assert result == Timestamp("2011-01-01", tz=idx.tz)
|
||||
|
||||
result = idx.take([0, 1, 2])
|
||||
expected = date_range(
|
||||
"2011-01-01", "2011-01-03", freq="D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([0, 2, 4])
|
||||
expected = date_range(
|
||||
"2011-01-01", "2011-01-05", freq="2D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([7, 4, 1])
|
||||
expected = date_range(
|
||||
"2011-01-08", "2011-01-02", freq="-3D", tz=idx.tz, name="idx"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
result = idx.take([3, 2, 5])
|
||||
expected = DatetimeIndex(
|
||||
["2011-01-04", "2011-01-03", "2011-01-06"],
|
||||
dtype=idx.dtype,
|
||||
freq=None,
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq is None
|
||||
|
||||
result = idx.take([-3, 2, 5])
|
||||
expected = DatetimeIndex(
|
||||
["2011-01-29", "2011-01-03", "2011-01-06"],
|
||||
dtype=idx.dtype,
|
||||
freq=None,
|
||||
name="idx",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq is None
|
||||
|
||||
def test_take_invalid_kwargs(self):
|
||||
idx = date_range("2011-01-01", "2011-01-31", freq="D", name="idx")
|
||||
indices = [1, 6, 5, 9, 10, 13, 15, 3]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
# TODO: This method came from test_datetime; de-dup with version above
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern", "Asia/Tokyo"])
|
||||
def test_take2(self, tz):
|
||||
dates = [
|
||||
datetime(2010, 1, 1, 14),
|
||||
datetime(2010, 1, 1, 15),
|
||||
datetime(2010, 1, 1, 17),
|
||||
datetime(2010, 1, 1, 21),
|
||||
]
|
||||
|
||||
idx = date_range(
|
||||
start="2010-01-01 09:00",
|
||||
end="2010-02-01 09:00",
|
||||
freq="h",
|
||||
tz=tz,
|
||||
name="idx",
|
||||
)
|
||||
expected = DatetimeIndex(dates, freq=None, name="idx", dtype=idx.dtype)
|
||||
|
||||
taken1 = idx.take([5, 6, 8, 12])
|
||||
taken2 = idx[[5, 6, 8, 12]]
|
||||
|
||||
for taken in [taken1, taken2]:
|
||||
tm.assert_index_equal(taken, expected)
|
||||
assert isinstance(taken, DatetimeIndex)
|
||||
assert taken.freq is None
|
||||
assert taken.tz == expected.tz
|
||||
assert taken.name == expected.name
|
||||
|
||||
def test_take_fill_value(self):
|
||||
# GH#12631
|
||||
idx = DatetimeIndex(["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx")
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "NaT"], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = DatetimeIndex(["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
def test_take_fill_value_with_timezone(self):
|
||||
idx = DatetimeIndex(
|
||||
["2011-01-01", "2011-02-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
||||
)
|
||||
result = idx.take(np.array([1, 0, -1]))
|
||||
expected = DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# fill_value
|
||||
result = idx.take(np.array([1, 0, -1]), fill_value=True)
|
||||
expected = DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "NaT"], name="xxx", tz="US/Eastern"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# allow_fill=False
|
||||
result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True)
|
||||
expected = DatetimeIndex(
|
||||
["2011-02-01", "2011-01-01", "2011-03-01"], name="xxx", tz="US/Eastern"
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
msg = (
|
||||
"When allow_fill=True and fill_value is not None, "
|
||||
"all indices must be >= -1"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -2]), fill_value=True)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(np.array([1, 0, -5]), fill_value=True)
|
||||
|
||||
msg = "out of bounds"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.take(np.array([1, -5]))
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
def test_get_loc_key_unit_mismatch(self):
|
||||
idx = date_range("2000-01-01", periods=3)
|
||||
key = idx[1].as_unit("ms")
|
||||
loc = idx.get_loc(key)
|
||||
assert loc == 1
|
||||
assert key in idx
|
||||
|
||||
def test_get_loc_key_unit_mismatch_not_castable(self):
|
||||
dta = date_range("2000-01-01", periods=3)._data.astype("M8[s]")
|
||||
dti = DatetimeIndex(dta)
|
||||
key = dta[0].as_unit("ns") + pd.Timedelta(1)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match=r"Timestamp\('2000-01-01 00:00:00.000000001'\)"
|
||||
):
|
||||
dti.get_loc(key)
|
||||
|
||||
assert key not in dti
|
||||
|
||||
def test_get_loc_time_obj(self):
|
||||
# time indexing
|
||||
idx = date_range("2000-01-01", periods=24, freq="h")
|
||||
|
||||
result = idx.get_loc(time(12))
|
||||
expected = np.array([12])
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
result = idx.get_loc(time(12, 30))
|
||||
expected = np.array([])
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
@pytest.mark.parametrize("offset", [-10, 10])
|
||||
def test_get_loc_time_obj2(self, monkeypatch, offset):
|
||||
# GH#8667
|
||||
size_cutoff = 50
|
||||
n = size_cutoff + offset
|
||||
key = time(15, 11, 30)
|
||||
start = key.hour * 3600 + key.minute * 60 + key.second
|
||||
step = 24 * 3600
|
||||
|
||||
with monkeypatch.context():
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
idx = date_range("2014-11-26", periods=n, freq="s")
|
||||
ts = pd.Series(np.random.default_rng(2).standard_normal(n), index=idx)
|
||||
locs = np.arange(start, n, step, dtype=np.intp)
|
||||
|
||||
result = ts.index.get_loc(key)
|
||||
tm.assert_numpy_array_equal(result, locs)
|
||||
tm.assert_series_equal(ts[key], ts.iloc[locs])
|
||||
|
||||
left, right = ts.copy(), ts.copy()
|
||||
left[key] *= -10
|
||||
right.iloc[locs] *= -10
|
||||
tm.assert_series_equal(left, right)
|
||||
|
||||
def test_get_loc_time_nat(self):
|
||||
# GH#35114
|
||||
# Case where key's total microseconds happens to match iNaT % 1e6 // 1000
|
||||
tic = time(minute=12, second=43, microsecond=145224)
|
||||
dti = DatetimeIndex([pd.NaT])
|
||||
|
||||
loc = dti.get_loc(tic)
|
||||
expected = np.array([], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(loc, expected)
|
||||
|
||||
def test_get_loc_nat(self):
|
||||
# GH#20464
|
||||
index = DatetimeIndex(["1/3/2000", "NaT"])
|
||||
assert index.get_loc(pd.NaT) == 1
|
||||
|
||||
assert index.get_loc(None) == 1
|
||||
|
||||
assert index.get_loc(np.nan) == 1
|
||||
|
||||
assert index.get_loc(pd.NA) == 1
|
||||
|
||||
assert index.get_loc(np.datetime64("NaT")) == 1
|
||||
|
||||
with pytest.raises(KeyError, match="NaT"):
|
||||
index.get_loc(np.timedelta64("NaT"))
|
||||
|
||||
@pytest.mark.parametrize("key", [pd.Timedelta(0), pd.Timedelta(1), timedelta(0)])
|
||||
def test_get_loc_timedelta_invalid_key(self, key):
|
||||
# GH#20464
|
||||
dti = date_range("1970-01-01", periods=10)
|
||||
msg = "Cannot index DatetimeIndex with [Tt]imedelta"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
dti.get_loc(key)
|
||||
|
||||
def test_get_loc_reasonable_key_error(self):
|
||||
# GH#1062
|
||||
index = DatetimeIndex(["1/3/2000"])
|
||||
with pytest.raises(KeyError, match="2000"):
|
||||
index.get_loc("1/1/2000")
|
||||
|
||||
def test_get_loc_year_str(self):
|
||||
rng = date_range("1/1/2000", "1/1/2010")
|
||||
|
||||
result = rng.get_loc("2009")
|
||||
expected = slice(3288, 3653)
|
||||
assert result == expected
|
||||
|
||||
|
||||
class TestContains:
|
||||
def test_dti_contains_with_duplicates(self):
|
||||
d = datetime(2011, 12, 5, 20, 30)
|
||||
ix = DatetimeIndex([d, d])
|
||||
assert d in ix
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"vals",
|
||||
[
|
||||
[0, 1, 0],
|
||||
[0, 0, -1],
|
||||
[0, -1, -1],
|
||||
["2015", "2015", "2016"],
|
||||
["2015", "2015", "2014"],
|
||||
],
|
||||
)
|
||||
def test_contains_nonunique(self, vals):
|
||||
# GH#9512
|
||||
idx = DatetimeIndex(vals)
|
||||
assert idx[0] in idx
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
def test_get_indexer_date_objs(self):
|
||||
rng = date_range("1/1/2000", periods=20)
|
||||
|
||||
result = rng.get_indexer(rng.map(lambda x: x.date()))
|
||||
expected = rng.get_indexer(rng)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer(self):
|
||||
idx = date_range("2000-01-01", periods=3)
|
||||
exp = np.array([0, 1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(idx.get_indexer(idx), exp)
|
||||
|
||||
target = idx[0] + pd.to_timedelta(["-1 hour", "12 hours", "1 day 1 hour"])
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "pad"), np.array([-1, 0, 1], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "backfill"), np.array([0, 1, 2], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest"), np.array([0, 1, 1], dtype=np.intp)
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(target, "nearest", tolerance=pd.Timedelta("1 hour")),
|
||||
np.array([0, -1, 1], dtype=np.intp),
|
||||
)
|
||||
tol_raw = [
|
||||
pd.Timedelta("1 hour"),
|
||||
pd.Timedelta("1 hour"),
|
||||
pd.Timedelta("1 hour").to_timedelta64(),
|
||||
]
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.get_indexer(
|
||||
target, "nearest", tolerance=[np.timedelta64(x) for x in tol_raw]
|
||||
),
|
||||
np.array([0, -1, 1], dtype=np.intp),
|
||||
)
|
||||
tol_bad = [
|
||||
pd.Timedelta("2 hour").to_timedelta64(),
|
||||
pd.Timedelta("1 hour").to_timedelta64(),
|
||||
"foo",
|
||||
]
|
||||
msg = "Could not convert 'foo' to NumPy timedelta"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.get_indexer(target, "nearest", tolerance=tol_bad)
|
||||
with pytest.raises(ValueError, match="abbreviation w/o a number"):
|
||||
idx.get_indexer(idx[[0]], method="nearest", tolerance="foo")
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"target",
|
||||
[
|
||||
[date(2020, 1, 1), Timestamp("2020-01-02")],
|
||||
[Timestamp("2020-01-01"), date(2020, 1, 2)],
|
||||
],
|
||||
)
|
||||
def test_get_indexer_mixed_dtypes(self, target):
|
||||
# https://github.com/pandas-dev/pandas/issues/33741
|
||||
values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
|
||||
result = values.get_indexer(target)
|
||||
expected = np.array([0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"target, positions",
|
||||
[
|
||||
([date(9999, 1, 1), Timestamp("2020-01-01")], [-1, 0]),
|
||||
([Timestamp("2020-01-01"), date(9999, 1, 1)], [0, -1]),
|
||||
([date(9999, 1, 1), date(9999, 1, 1)], [-1, -1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_out_of_bounds_date(self, target, positions):
|
||||
values = DatetimeIndex([Timestamp("2020-01-01"), Timestamp("2020-01-02")])
|
||||
|
||||
result = values.get_indexer(target)
|
||||
expected = np.array(positions, dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_pad_requires_monotonicity(self):
|
||||
rng = date_range("1/1/2000", "3/1/2000", freq="B")
|
||||
|
||||
# neither monotonic increasing or decreasing
|
||||
rng2 = rng[[1, 0, 2]]
|
||||
|
||||
msg = "index must be monotonic increasing or decreasing"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
rng2.get_indexer(rng, method="pad")
|
||||
|
||||
|
||||
class TestMaybeCastSliceBound:
|
||||
def test_maybe_cast_slice_bounds_empty(self):
|
||||
# GH#14354
|
||||
empty_idx = date_range(freq="1h", periods=0, end="2015")
|
||||
|
||||
right = empty_idx._maybe_cast_slice_bound("2015-01-02", "right")
|
||||
exp = Timestamp("2015-01-02 23:59:59.999999999")
|
||||
assert right == exp
|
||||
|
||||
left = empty_idx._maybe_cast_slice_bound("2015-01-02", "left")
|
||||
exp = Timestamp("2015-01-02 00:00:00")
|
||||
assert left == exp
|
||||
|
||||
def test_maybe_cast_slice_duplicate_monotonic(self):
|
||||
# https://github.com/pandas-dev/pandas/issues/16515
|
||||
idx = DatetimeIndex(["2017", "2017"])
|
||||
result = idx._maybe_cast_slice_bound("2017-01-01", "left")
|
||||
expected = Timestamp("2017-01-01")
|
||||
assert result == expected
|
||||
|
||||
|
||||
class TestGetSliceBounds:
|
||||
@pytest.mark.parametrize("box", [date, datetime, Timestamp])
|
||||
@pytest.mark.parametrize("side, expected", [("left", 4), ("right", 5)])
|
||||
def test_get_slice_bounds_datetime_within(
|
||||
self, box, side, expected, tz_aware_fixture
|
||||
):
|
||||
# GH 35690
|
||||
tz = tz_aware_fixture
|
||||
index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
|
||||
key = box(year=2000, month=1, day=7)
|
||||
|
||||
if tz is not None:
|
||||
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
|
||||
# GH#36148 we require tzawareness-compat as of 2.0
|
||||
index.get_slice_bound(key, side=side)
|
||||
else:
|
||||
result = index.get_slice_bound(key, side=side)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("box", [datetime, Timestamp])
|
||||
@pytest.mark.parametrize("side", ["left", "right"])
|
||||
@pytest.mark.parametrize("year, expected", [(1999, 0), (2020, 30)])
|
||||
def test_get_slice_bounds_datetime_outside(
|
||||
self, box, side, year, expected, tz_aware_fixture
|
||||
):
|
||||
# GH 35690
|
||||
tz = tz_aware_fixture
|
||||
index = bdate_range("2000-01-03", "2000-02-11").tz_localize(tz)
|
||||
key = box(year=year, month=1, day=7)
|
||||
|
||||
if tz is not None:
|
||||
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
|
||||
# GH#36148 we require tzawareness-compat as of 2.0
|
||||
index.get_slice_bound(key, side=side)
|
||||
else:
|
||||
result = index.get_slice_bound(key, side=side)
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("box", [datetime, Timestamp])
|
||||
def test_slice_datetime_locs(self, box, tz_aware_fixture):
|
||||
# GH 34077
|
||||
tz = tz_aware_fixture
|
||||
index = DatetimeIndex(["2010-01-01", "2010-01-03"]).tz_localize(tz)
|
||||
key = box(2010, 1, 1)
|
||||
|
||||
if tz is not None:
|
||||
with pytest.raises(TypeError, match="Cannot compare tz-naive"):
|
||||
# GH#36148 we require tzawareness-compat as of 2.0
|
||||
index.slice_locs(key, box(2010, 1, 2))
|
||||
else:
|
||||
result = index.slice_locs(key, box(2010, 1, 2))
|
||||
expected = (0, 1)
|
||||
assert result == expected
|
||||
|
||||
|
||||
class TestIndexerBetweenTime:
|
||||
def test_indexer_between_time(self):
|
||||
# GH#11818
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
|
||||
|
||||
@pytest.mark.parametrize("unit", ["us", "ms", "s"])
|
||||
def test_indexer_between_time_non_nano(self, unit):
|
||||
# For simple cases like this, the non-nano indexer_between_time
|
||||
# should match the nano result
|
||||
|
||||
rng = date_range("1/1/2000", "1/5/2000", freq="5min")
|
||||
arr_nano = rng._data._ndarray
|
||||
|
||||
arr = arr_nano.astype(f"M8[{unit}]")
|
||||
|
||||
dta = type(rng._data)._simple_new(arr, dtype=arr.dtype)
|
||||
dti = DatetimeIndex(dta)
|
||||
assert dti.dtype == arr.dtype
|
||||
|
||||
tic = time(1, 25)
|
||||
toc = time(2, 29)
|
||||
|
||||
result = dti.indexer_between_time(tic, toc)
|
||||
expected = rng.indexer_between_time(tic, toc)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# case with non-zero micros in arguments
|
||||
tic = time(1, 25, 0, 45678)
|
||||
toc = time(2, 29, 0, 1234)
|
||||
|
||||
result = dti.indexer_between_time(tic, toc)
|
||||
expected = rng.indexer_between_time(tic, toc)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@ -0,0 +1,76 @@
|
||||
import dateutil.tz
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
date_range,
|
||||
to_datetime,
|
||||
)
|
||||
from pandas.core.arrays import datetimes
|
||||
|
||||
|
||||
class TestDatetimeIndexIteration:
|
||||
@pytest.mark.parametrize(
|
||||
"tz", [None, "UTC", "US/Central", dateutil.tz.tzoffset(None, -28800)]
|
||||
)
|
||||
def test_iteration_preserves_nanoseconds(self, tz):
|
||||
# GH#19603
|
||||
index = DatetimeIndex(
|
||||
["2018-02-08 15:00:00.168456358", "2018-02-08 15:00:00.168456359"], tz=tz
|
||||
)
|
||||
for i, ts in enumerate(index):
|
||||
assert ts == index[i] # pylint: disable=unnecessary-list-index-lookup
|
||||
|
||||
def test_iter_readonly(self):
|
||||
# GH#28055 ints_to_pydatetime with readonly array
|
||||
arr = np.array([np.datetime64("2012-02-15T12:00:00.000000000")])
|
||||
arr.setflags(write=False)
|
||||
dti = to_datetime(arr)
|
||||
list(dti)
|
||||
|
||||
def test_iteration_preserves_tz(self):
|
||||
# see GH#8890
|
||||
index = date_range("2012-01-01", periods=3, freq="h", tz="US/Eastern")
|
||||
|
||||
for i, ts in enumerate(index):
|
||||
result = ts
|
||||
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
|
||||
assert result == expected
|
||||
|
||||
def test_iteration_preserves_tz2(self):
|
||||
index = date_range(
|
||||
"2012-01-01", periods=3, freq="h", tz=dateutil.tz.tzoffset(None, -28800)
|
||||
)
|
||||
|
||||
for i, ts in enumerate(index):
|
||||
result = ts
|
||||
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
|
||||
assert result._repr_base == expected._repr_base
|
||||
assert result == expected
|
||||
|
||||
def test_iteration_preserves_tz3(self):
|
||||
# GH#9100
|
||||
index = DatetimeIndex(
|
||||
["2014-12-01 03:32:39.987000-08:00", "2014-12-01 04:12:34.987000-08:00"]
|
||||
)
|
||||
for i, ts in enumerate(index):
|
||||
result = ts
|
||||
expected = index[i] # pylint: disable=unnecessary-list-index-lookup
|
||||
assert result._repr_base == expected._repr_base
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("offset", [-5, -1, 0, 1])
|
||||
def test_iteration_over_chunksize(self, offset, monkeypatch):
|
||||
# GH#21012
|
||||
chunksize = 5
|
||||
index = date_range(
|
||||
"2000-01-01 00:00:00", periods=chunksize - offset, freq="min"
|
||||
)
|
||||
num = 0
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(datetimes, "_ITER_CHUNKSIZE", chunksize)
|
||||
for stamp in index:
|
||||
assert index[num] == stamp
|
||||
num += 1
|
||||
assert num == len(index)
|
||||
@ -0,0 +1,153 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timezone,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Timestamp,
|
||||
date_range,
|
||||
period_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries.offsets import (
|
||||
BDay,
|
||||
BMonthEnd,
|
||||
)
|
||||
|
||||
|
||||
class TestJoin:
|
||||
def test_does_not_convert_mixed_integer(self):
|
||||
df = DataFrame(np.ones((3, 2)), columns=date_range("2020-01-01", periods=2))
|
||||
cols = df.columns.join(df.index, how="outer")
|
||||
joined = cols.join(df.columns)
|
||||
assert cols.dtype == np.dtype("O")
|
||||
assert cols.dtype == joined.dtype
|
||||
tm.assert_numpy_array_equal(cols.values, joined.values)
|
||||
|
||||
def test_join_self(self, join_type):
|
||||
index = date_range("1/1/2000", periods=10)
|
||||
joined = index.join(index, how=join_type)
|
||||
assert index is joined
|
||||
|
||||
def test_join_with_period_index(self, join_type):
|
||||
df = DataFrame(
|
||||
np.ones((10, 2)),
|
||||
index=date_range("2020-01-01", periods=10),
|
||||
columns=period_range("2020-01-01", periods=2),
|
||||
)
|
||||
s = df.iloc[:5, 0]
|
||||
|
||||
expected = df.columns.astype("O").join(s.index, how=join_type)
|
||||
result = df.columns.join(s.index, how=join_type)
|
||||
tm.assert_index_equal(expected, result)
|
||||
|
||||
def test_join_object_index(self):
|
||||
rng = date_range("1/1/2000", periods=10)
|
||||
idx = Index(["a", "b", "c", "d"])
|
||||
|
||||
result = rng.join(idx, how="outer")
|
||||
assert isinstance(result[0], Timestamp)
|
||||
|
||||
def test_join_utc_convert(self, join_type):
|
||||
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
|
||||
|
||||
left = rng.tz_convert("US/Eastern")
|
||||
right = rng.tz_convert("Europe/Berlin")
|
||||
|
||||
result = left.join(left[:-5], how=join_type)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz == left.tz
|
||||
|
||||
result = left.join(right[:-5], how=join_type)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
assert result.tz is timezone.utc
|
||||
|
||||
def test_datetimeindex_union_join_empty(self, sort, using_infer_string):
|
||||
dti = date_range(start="1/1/2001", end="2/1/2001", freq="D")
|
||||
empty = Index([])
|
||||
|
||||
result = dti.union(empty, sort=sort)
|
||||
if using_infer_string:
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
tm.assert_index_equal(result, dti)
|
||||
else:
|
||||
expected = dti.astype("O")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = dti.join(empty)
|
||||
assert isinstance(result, DatetimeIndex)
|
||||
tm.assert_index_equal(result, dti)
|
||||
|
||||
def test_join_nonunique(self):
|
||||
idx1 = to_datetime(["2012-11-06 16:00:11.477563", "2012-11-06 16:00:11.477563"])
|
||||
idx2 = to_datetime(["2012-11-06 15:11:09.006507", "2012-11-06 15:11:09.006507"])
|
||||
rs = idx1.join(idx2, how="outer")
|
||||
assert rs.is_monotonic_increasing
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_outer_join(self, freq):
|
||||
# should just behave as union
|
||||
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
rng = date_range(start=start, end=end, freq=freq)
|
||||
|
||||
# overlapping
|
||||
left = rng[:10]
|
||||
right = rng[5:10]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
|
||||
# non-overlapping, gap in middle
|
||||
left = rng[:5]
|
||||
right = rng[10:]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
assert the_join.freq is None
|
||||
|
||||
# non-overlapping, no gap
|
||||
left = rng[:5]
|
||||
right = rng[5:10]
|
||||
|
||||
the_join = left.join(right, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
|
||||
# overlapping, but different offset
|
||||
other = date_range(start, end, freq=BMonthEnd())
|
||||
|
||||
the_join = rng.join(other, how="outer")
|
||||
assert isinstance(the_join, DatetimeIndex)
|
||||
assert the_join.freq is None
|
||||
|
||||
def test_naive_aware_conflicts(self):
|
||||
start, end = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
naive = date_range(start, end, freq=BDay(), tz=None)
|
||||
aware = date_range(start, end, freq=BDay(), tz="Asia/Hong_Kong")
|
||||
|
||||
msg = "tz-naive.*tz-aware"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
naive.join(aware)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
aware.join(naive)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Pacific"])
|
||||
def test_join_preserves_freq(self, tz):
|
||||
# GH#32157
|
||||
dti = date_range("2016-01-01", periods=10, tz=tz)
|
||||
result = dti[:5].join(dti[5:], how="outer")
|
||||
assert result.freq == dti.freq
|
||||
tm.assert_index_equal(result, dti)
|
||||
|
||||
result = dti[:5].join(dti[6:], how="outer")
|
||||
assert result.freq is None
|
||||
expected = dti.delete(5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,13 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import date_range
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSplit:
|
||||
def test_split_non_utc(self):
|
||||
# GH#14042
|
||||
indices = date_range("2016-01-01 00:00:00+0200", freq="s", periods=10)
|
||||
result = np.split(indices, indices_or_sections=[])[0]
|
||||
expected = indices._with_freq(None)
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,56 @@
|
||||
from datetime import datetime
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
bdate_range,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndexOps:
|
||||
def test_infer_freq(self, freq_sample):
|
||||
# GH 11018
|
||||
idx = date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10)
|
||||
result = DatetimeIndex(idx.asi8, freq="infer")
|
||||
tm.assert_index_equal(idx, result)
|
||||
assert result.freq == freq_sample
|
||||
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
class TestBusinessDatetimeIndex:
|
||||
@pytest.fixture
|
||||
def rng(self, freq):
|
||||
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
return bdate_range(START, END, freq=freq)
|
||||
|
||||
def test_comparison(self, rng):
|
||||
d = rng[10]
|
||||
|
||||
comp = rng > d
|
||||
assert comp[11]
|
||||
assert not comp[9]
|
||||
|
||||
def test_copy(self, rng):
|
||||
cp = rng.copy()
|
||||
tm.assert_index_equal(cp, rng)
|
||||
|
||||
def test_identical(self, rng):
|
||||
t1 = rng.copy()
|
||||
t2 = rng.copy()
|
||||
assert t1.identical(t2)
|
||||
|
||||
# name
|
||||
t1 = t1.rename("foo")
|
||||
assert t1.equals(t2)
|
||||
assert not t1.identical(t2)
|
||||
t2 = t2.rename("foo")
|
||||
assert t1.identical(t2)
|
||||
|
||||
# freq
|
||||
t2v = Index(t2.values)
|
||||
assert t1.equals(t2v)
|
||||
assert not t1.identical(t2v)
|
||||
@ -0,0 +1,466 @@
|
||||
""" test partial slicing on Series/Frame """
|
||||
|
||||
from datetime import datetime
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestSlicing:
|
||||
def test_string_index_series_name_converted(self):
|
||||
# GH#1644
|
||||
df = DataFrame(
|
||||
np.random.default_rng(2).standard_normal((10, 4)),
|
||||
index=date_range("1/1/2000", periods=10),
|
||||
)
|
||||
|
||||
result = df.loc["1/3/2000"]
|
||||
assert result.name == df.index[2]
|
||||
|
||||
result = df.T["1/3/2000"]
|
||||
assert result.name == df.index[2]
|
||||
|
||||
def test_stringified_slice_with_tz(self):
|
||||
# GH#2658
|
||||
start = "2013-01-07"
|
||||
idx = date_range(start=start, freq="1d", periods=10, tz="US/Eastern")
|
||||
df = DataFrame(np.arange(10), index=idx)
|
||||
df["2013-01-14 23:44:34.437768-05:00":] # no exception here
|
||||
|
||||
def test_return_type_doesnt_depend_on_monotonicity(self):
|
||||
# GH#24892 we get Series back regardless of whether our DTI is monotonic
|
||||
dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
|
||||
ser = Series(range(3), index=dti)
|
||||
|
||||
# non-monotonic index
|
||||
ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
|
||||
|
||||
# key with resolution strictly lower than "min"
|
||||
key = "2015-5-14 00"
|
||||
|
||||
# monotonic increasing index
|
||||
result = ser.loc[key]
|
||||
expected = ser.iloc[1:]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# monotonic decreasing index
|
||||
result = ser.iloc[::-1].loc[key]
|
||||
expected = ser.iloc[::-1][:-1]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# non-monotonic index
|
||||
result2 = ser2.loc[key]
|
||||
expected2 = ser2.iloc[::2]
|
||||
tm.assert_series_equal(result2, expected2)
|
||||
|
||||
def test_return_type_doesnt_depend_on_monotonicity_higher_reso(self):
|
||||
# GH#24892 we get Series back regardless of whether our DTI is monotonic
|
||||
dti = date_range(start="2015-5-13 23:59:00", freq="min", periods=3)
|
||||
ser = Series(range(3), index=dti)
|
||||
|
||||
# non-monotonic index
|
||||
ser2 = Series(range(3), index=[dti[1], dti[0], dti[2]])
|
||||
|
||||
# key with resolution strictly *higher) than "min"
|
||||
key = "2015-5-14 00:00:00"
|
||||
|
||||
# monotonic increasing index
|
||||
result = ser.loc[key]
|
||||
assert result == 1
|
||||
|
||||
# monotonic decreasing index
|
||||
result = ser.iloc[::-1].loc[key]
|
||||
assert result == 1
|
||||
|
||||
# non-monotonic index
|
||||
result2 = ser2.loc[key]
|
||||
assert result2 == 0
|
||||
|
||||
def test_monotone_DTI_indexing_bug(self):
|
||||
# GH 19362
|
||||
# Testing accessing the first element in a monotonic descending
|
||||
# partial string indexing.
|
||||
|
||||
df = DataFrame(list(range(5)))
|
||||
date_list = [
|
||||
"2018-01-02",
|
||||
"2017-02-10",
|
||||
"2016-03-10",
|
||||
"2015-03-15",
|
||||
"2014-03-16",
|
||||
]
|
||||
date_index = DatetimeIndex(date_list)
|
||||
df["date"] = date_index
|
||||
expected = DataFrame({0: list(range(5)), "date": date_index})
|
||||
tm.assert_frame_equal(df, expected)
|
||||
|
||||
# We get a slice because df.index's resolution is hourly and we
|
||||
# are slicing with a daily-resolution string. If both were daily,
|
||||
# we would get a single item back
|
||||
dti = date_range("20170101 01:00:00", periods=3)
|
||||
df = DataFrame({"A": [1, 2, 3]}, index=dti[::-1])
|
||||
|
||||
expected = DataFrame({"A": 1}, index=dti[-1:][::-1])
|
||||
result = df.loc["2017-01-03"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result2 = df.iloc[::-1].loc["2017-01-03"]
|
||||
expected2 = expected.iloc[::-1]
|
||||
tm.assert_frame_equal(result2, expected2)
|
||||
|
||||
def test_slice_year(self):
|
||||
dti = date_range(freq="B", start=datetime(2005, 1, 1), periods=500)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
result = s["2005"]
|
||||
expected = s[s.index.year == 2005]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
|
||||
result = df.loc["2005"]
|
||||
expected = df[df.index.year == 2005]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"partial_dtime",
|
||||
[
|
||||
"2019",
|
||||
"2019Q4",
|
||||
"Dec 2019",
|
||||
"2019-12-31",
|
||||
"2019-12-31 23",
|
||||
"2019-12-31 23:59",
|
||||
],
|
||||
)
|
||||
def test_slice_end_of_period_resolution(self, partial_dtime):
|
||||
# GH#31064
|
||||
dti = date_range("2019-12-31 23:59:55.999999999", periods=10, freq="s")
|
||||
|
||||
ser = Series(range(10), index=dti)
|
||||
result = ser[partial_dtime]
|
||||
expected = ser.iloc[:5]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
def test_slice_quarter(self):
|
||||
dti = date_range(freq="D", start=datetime(2000, 6, 1), periods=500)
|
||||
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
assert len(s["2001Q1"]) == 90
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
|
||||
assert len(df.loc["1Q01"]) == 90
|
||||
|
||||
def test_slice_month(self):
|
||||
dti = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
|
||||
s = Series(np.arange(len(dti)), index=dti)
|
||||
assert len(s["2005-11"]) == 30
|
||||
|
||||
df = DataFrame(np.random.default_rng(2).random((len(dti), 5)), index=dti)
|
||||
assert len(df.loc["2005-11"]) == 30
|
||||
|
||||
tm.assert_series_equal(s["2005-11"], s["11-2005"])
|
||||
|
||||
def test_partial_slice(self):
|
||||
rng = date_range(freq="D", start=datetime(2005, 1, 1), periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["2005-05":"2006-02"]
|
||||
expected = s["20050501":"20060228"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s["2005-05":]
|
||||
expected = s["20050501":]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s[:"2006-02"]
|
||||
expected = s[:"20060228"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = s["2005-1-1"]
|
||||
assert result == s.iloc[0]
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'2004-12-31'$"):
|
||||
s["2004-12-31"]
|
||||
|
||||
def test_partial_slice_daily(self):
|
||||
rng = date_range(freq="h", start=datetime(2005, 1, 31), periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["2005-1-31"]
|
||||
tm.assert_series_equal(result, s.iloc[:24])
|
||||
|
||||
with pytest.raises(KeyError, match=r"^'2004-12-31 00'$"):
|
||||
s["2004-12-31 00"]
|
||||
|
||||
def test_partial_slice_hourly(self):
|
||||
rng = date_range(freq="min", start=datetime(2005, 1, 1, 20, 0, 0), periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["2005-1-1"]
|
||||
tm.assert_series_equal(result, s.iloc[: 60 * 4])
|
||||
|
||||
result = s["2005-1-1 20"]
|
||||
tm.assert_series_equal(result, s.iloc[:60])
|
||||
|
||||
assert s["2005-1-1 20:00"] == s.iloc[0]
|
||||
with pytest.raises(KeyError, match=r"^'2004-12-31 00:15'$"):
|
||||
s["2004-12-31 00:15"]
|
||||
|
||||
def test_partial_slice_minutely(self):
|
||||
rng = date_range(freq="s", start=datetime(2005, 1, 1, 23, 59, 0), periods=500)
|
||||
s = Series(np.arange(len(rng)), index=rng)
|
||||
|
||||
result = s["2005-1-1 23:59"]
|
||||
tm.assert_series_equal(result, s.iloc[:60])
|
||||
|
||||
result = s["2005-1-1"]
|
||||
tm.assert_series_equal(result, s.iloc[:60])
|
||||
|
||||
assert s[Timestamp("2005-1-1 23:59:00")] == s.iloc[0]
|
||||
with pytest.raises(KeyError, match=r"^'2004-12-31 00:00:00'$"):
|
||||
s["2004-12-31 00:00:00"]
|
||||
|
||||
def test_partial_slice_second_precision(self):
|
||||
rng = date_range(
|
||||
start=datetime(2005, 1, 1, 0, 0, 59, microsecond=999990),
|
||||
periods=20,
|
||||
freq="us",
|
||||
)
|
||||
s = Series(np.arange(20), rng)
|
||||
|
||||
tm.assert_series_equal(s["2005-1-1 00:00"], s.iloc[:10])
|
||||
tm.assert_series_equal(s["2005-1-1 00:00:59"], s.iloc[:10])
|
||||
|
||||
tm.assert_series_equal(s["2005-1-1 00:01"], s.iloc[10:])
|
||||
tm.assert_series_equal(s["2005-1-1 00:01:00"], s.iloc[10:])
|
||||
|
||||
assert s[Timestamp("2005-1-1 00:00:59.999990")] == s.iloc[0]
|
||||
with pytest.raises(KeyError, match="2005-1-1 00:00:00"):
|
||||
s["2005-1-1 00:00:00"]
|
||||
|
||||
def test_partial_slicing_dataframe(self):
|
||||
# GH14856
|
||||
# Test various combinations of string slicing resolution vs.
|
||||
# index resolution
|
||||
# - If string resolution is less precise than index resolution,
|
||||
# string is considered a slice
|
||||
# - If string resolution is equal to or more precise than index
|
||||
# resolution, string is considered an exact match
|
||||
formats = [
|
||||
"%Y",
|
||||
"%Y-%m",
|
||||
"%Y-%m-%d",
|
||||
"%Y-%m-%d %H",
|
||||
"%Y-%m-%d %H:%M",
|
||||
"%Y-%m-%d %H:%M:%S",
|
||||
]
|
||||
resolutions = ["year", "month", "day", "hour", "minute", "second"]
|
||||
for rnum, resolution in enumerate(resolutions[2:], 2):
|
||||
# we check only 'day', 'hour', 'minute' and 'second'
|
||||
unit = Timedelta("1 " + resolution)
|
||||
middate = datetime(2012, 1, 1, 0, 0, 0)
|
||||
index = DatetimeIndex([middate - unit, middate, middate + unit])
|
||||
values = [1, 2, 3]
|
||||
df = DataFrame({"a": values}, index, dtype=np.int64)
|
||||
assert df.index.resolution == resolution
|
||||
|
||||
# Timestamp with the same resolution as index
|
||||
# Should be exact match for Series (return scalar)
|
||||
# and raise KeyError for Frame
|
||||
for timestamp, expected in zip(index, values):
|
||||
ts_string = timestamp.strftime(formats[rnum])
|
||||
# make ts_string as precise as index
|
||||
result = df["a"][ts_string]
|
||||
assert isinstance(result, np.int64)
|
||||
assert result == expected
|
||||
msg = rf"^'{ts_string}'$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[ts_string]
|
||||
|
||||
# Timestamp with resolution less precise than index
|
||||
for fmt in formats[:rnum]:
|
||||
for element, theslice in [[0, slice(None, 1)], [1, slice(1, None)]]:
|
||||
ts_string = index[element].strftime(fmt)
|
||||
|
||||
# Series should return slice
|
||||
result = df["a"][ts_string]
|
||||
expected = df["a"][theslice]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# pre-2.0 df[ts_string] was overloaded to interpret this
|
||||
# as slicing along index
|
||||
with pytest.raises(KeyError, match=ts_string):
|
||||
df[ts_string]
|
||||
|
||||
# Timestamp with resolution more precise than index
|
||||
# Compatible with existing key
|
||||
# Should return scalar for Series
|
||||
# and raise KeyError for Frame
|
||||
for fmt in formats[rnum + 1 :]:
|
||||
ts_string = index[1].strftime(fmt)
|
||||
result = df["a"][ts_string]
|
||||
assert isinstance(result, np.int64)
|
||||
assert result == 2
|
||||
msg = rf"^'{ts_string}'$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[ts_string]
|
||||
|
||||
# Not compatible with existing key
|
||||
# Should raise KeyError
|
||||
for fmt, res in list(zip(formats, resolutions))[rnum + 1 :]:
|
||||
ts = index[1] + Timedelta("1 " + res)
|
||||
ts_string = ts.strftime(fmt)
|
||||
msg = rf"^'{ts_string}'$"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df["a"][ts_string]
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
df[ts_string]
|
||||
|
||||
def test_partial_slicing_with_multiindex(self):
|
||||
# GH 4758
|
||||
# partial string indexing with a multi-index buggy
|
||||
df = DataFrame(
|
||||
{
|
||||
"ACCOUNT": ["ACCT1", "ACCT1", "ACCT1", "ACCT2"],
|
||||
"TICKER": ["ABC", "MNP", "XYZ", "XYZ"],
|
||||
"val": [1, 2, 3, 4],
|
||||
},
|
||||
index=date_range("2013-06-19 09:30:00", periods=4, freq="5min"),
|
||||
)
|
||||
df_multi = df.set_index(["ACCOUNT", "TICKER"], append=True)
|
||||
|
||||
expected = DataFrame(
|
||||
[[1]], index=Index(["ABC"], name="TICKER"), columns=["val"]
|
||||
)
|
||||
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
expected = df_multi.loc[
|
||||
(Timestamp("2013-06-19 09:30:00", tz=None), "ACCT1", "ABC")
|
||||
]
|
||||
result = df_multi.loc[("2013-06-19 09:30:00", "ACCT1", "ABC")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# partial string indexing on first level, scalar indexing on the other two
|
||||
result = df_multi.loc[("2013-06-19", "ACCT1", "ABC")]
|
||||
expected = df_multi.iloc[:1].droplevel([1, 2])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_slicing_with_multiindex_series(self):
|
||||
# GH 4294
|
||||
# partial slice on a series mi
|
||||
ser = Series(
|
||||
range(250),
|
||||
index=MultiIndex.from_product(
|
||||
[date_range("2000-1-1", periods=50), range(5)]
|
||||
),
|
||||
)
|
||||
|
||||
s2 = ser[:-1].copy()
|
||||
expected = s2["2000-1-4"]
|
||||
result = s2[Timestamp("2000-1-4")]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
result = ser[Timestamp("2000-1-4")]
|
||||
expected = ser["2000-1-4"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
df2 = DataFrame(ser)
|
||||
expected = df2.xs("2000-1-4")
|
||||
result = df2.loc[Timestamp("2000-1-4")]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
def test_partial_slice_requires_monotonicity(self):
|
||||
# Disallowed since 2.0 (GH 37819)
|
||||
ser = Series(np.arange(10), date_range("2014-01-01", periods=10))
|
||||
|
||||
nonmonotonic = ser.iloc[[3, 5, 4]]
|
||||
timestamp = Timestamp("2014-01-10")
|
||||
with pytest.raises(
|
||||
KeyError, match="Value based partial slicing on non-monotonic"
|
||||
):
|
||||
nonmonotonic["2014-01-10":]
|
||||
|
||||
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
|
||||
nonmonotonic[timestamp:]
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="Value based partial slicing on non-monotonic"
|
||||
):
|
||||
nonmonotonic.loc["2014-01-10":]
|
||||
|
||||
with pytest.raises(KeyError, match=r"Timestamp\('2014-01-10 00:00:00'\)"):
|
||||
nonmonotonic.loc[timestamp:]
|
||||
|
||||
def test_loc_datetime_length_one(self):
|
||||
# GH16071
|
||||
df = DataFrame(
|
||||
columns=["1"],
|
||||
index=date_range("2016-10-01T00:00:00", "2016-10-01T23:59:59"),
|
||||
)
|
||||
result = df.loc[datetime(2016, 10, 1) :]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
result = df.loc["2016-10-01T00:00:00":]
|
||||
tm.assert_frame_equal(result, df)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start",
|
||||
[
|
||||
"2018-12-02 21:50:00+00:00",
|
||||
Timestamp("2018-12-02 21:50:00+00:00"),
|
||||
Timestamp("2018-12-02 21:50:00+00:00").to_pydatetime(),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"end",
|
||||
[
|
||||
"2018-12-02 21:52:00+00:00",
|
||||
Timestamp("2018-12-02 21:52:00+00:00"),
|
||||
Timestamp("2018-12-02 21:52:00+00:00").to_pydatetime(),
|
||||
],
|
||||
)
|
||||
def test_getitem_with_datestring_with_UTC_offset(self, start, end):
|
||||
# GH 24076
|
||||
idx = date_range(
|
||||
start="2018-12-02 14:50:00-07:00",
|
||||
end="2018-12-02 14:50:00-07:00",
|
||||
freq="1min",
|
||||
)
|
||||
df = DataFrame(1, index=idx, columns=["A"])
|
||||
result = df[start:end]
|
||||
expected = df.iloc[0:3, :]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# GH 16785
|
||||
start = str(start)
|
||||
end = str(end)
|
||||
with pytest.raises(ValueError, match="Both dates must"):
|
||||
df[start : end[:-4] + "1:00"]
|
||||
|
||||
with pytest.raises(ValueError, match="The index must be timezone"):
|
||||
df = df.tz_localize(None)
|
||||
df[start:end]
|
||||
|
||||
def test_slice_reduce_to_series(self):
|
||||
# GH 27516
|
||||
df = DataFrame(
|
||||
{"A": range(24)}, index=date_range("2000", periods=24, freq="ME")
|
||||
)
|
||||
expected = Series(
|
||||
range(12), index=date_range("2000", periods=12, freq="ME"), name="A"
|
||||
)
|
||||
result = df.loc["2000", "A"]
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,45 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
NaT,
|
||||
date_range,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestPickle:
|
||||
def test_pickle(self):
|
||||
# GH#4606
|
||||
idx = to_datetime(["2013-01-01", NaT, "2014-01-06"])
|
||||
idx_p = tm.round_trip_pickle(idx)
|
||||
assert idx_p[0] == idx[0]
|
||||
assert idx_p[1] is NaT
|
||||
assert idx_p[2] == idx[2]
|
||||
|
||||
def test_pickle_dont_infer_freq(self):
|
||||
# GH#11002
|
||||
# don't infer freq
|
||||
idx = date_range("1750-1-1", "2050-1-1", freq="7D")
|
||||
idx_p = tm.round_trip_pickle(idx)
|
||||
tm.assert_index_equal(idx, idx_p)
|
||||
|
||||
def test_pickle_after_set_freq(self):
|
||||
dti = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
|
||||
dti = dti._with_freq(None)
|
||||
|
||||
res = tm.round_trip_pickle(dti)
|
||||
tm.assert_index_equal(res, dti)
|
||||
|
||||
def test_roundtrip_pickle_with_tz(self):
|
||||
# GH#8367
|
||||
# round-trip of timezone
|
||||
index = date_range("20130101", periods=3, tz="US/Eastern", name="foo")
|
||||
unpickled = tm.round_trip_pickle(index)
|
||||
tm.assert_index_equal(index, unpickled)
|
||||
|
||||
@pytest.mark.parametrize("freq", ["B", "C"])
|
||||
def test_pickle_unpickle(self, freq):
|
||||
rng = date_range("2009-01-01", "2010-01-01", freq=freq)
|
||||
unpickled = tm.round_trip_pickle(rng)
|
||||
assert unpickled.freq == freq
|
||||
@ -0,0 +1,56 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestDatetimeIndexReindex:
|
||||
def test_reindex_preserves_tz_if_target_is_empty_list_or_array(self):
|
||||
# GH#7774
|
||||
index = date_range("2013-01-01", periods=3, tz="US/Eastern")
|
||||
assert str(index.reindex([])[0].tz) == "US/Eastern"
|
||||
assert str(index.reindex(np.array([]))[0].tz) == "US/Eastern"
|
||||
|
||||
def test_reindex_with_same_tz_nearest(self):
|
||||
# GH#32740
|
||||
rng_a = date_range("2010-01-01", "2010-01-02", periods=24, tz="utc")
|
||||
rng_b = date_range("2010-01-01", "2010-01-02", periods=23, tz="utc")
|
||||
result1, result2 = rng_a.reindex(
|
||||
rng_b, method="nearest", tolerance=timedelta(seconds=20)
|
||||
)
|
||||
expected_list1 = [
|
||||
"2010-01-01 00:00:00",
|
||||
"2010-01-01 01:05:27.272727272",
|
||||
"2010-01-01 02:10:54.545454545",
|
||||
"2010-01-01 03:16:21.818181818",
|
||||
"2010-01-01 04:21:49.090909090",
|
||||
"2010-01-01 05:27:16.363636363",
|
||||
"2010-01-01 06:32:43.636363636",
|
||||
"2010-01-01 07:38:10.909090909",
|
||||
"2010-01-01 08:43:38.181818181",
|
||||
"2010-01-01 09:49:05.454545454",
|
||||
"2010-01-01 10:54:32.727272727",
|
||||
"2010-01-01 12:00:00",
|
||||
"2010-01-01 13:05:27.272727272",
|
||||
"2010-01-01 14:10:54.545454545",
|
||||
"2010-01-01 15:16:21.818181818",
|
||||
"2010-01-01 16:21:49.090909090",
|
||||
"2010-01-01 17:27:16.363636363",
|
||||
"2010-01-01 18:32:43.636363636",
|
||||
"2010-01-01 19:38:10.909090909",
|
||||
"2010-01-01 20:43:38.181818181",
|
||||
"2010-01-01 21:49:05.454545454",
|
||||
"2010-01-01 22:54:32.727272727",
|
||||
"2010-01-02 00:00:00",
|
||||
]
|
||||
expected1 = DatetimeIndex(
|
||||
expected_list1, dtype="datetime64[ns, UTC]", freq=None
|
||||
)
|
||||
expected2 = np.array([0] + [-1] * 21 + [23], dtype=np.dtype("intp"))
|
||||
tm.assert_index_equal(result1, expected1)
|
||||
tm.assert_numpy_array_equal(result2, expected2)
|
||||
@ -0,0 +1,329 @@
|
||||
"""
|
||||
Tests for DatetimeIndex methods behaving like their Timestamp counterparts
|
||||
"""
|
||||
|
||||
import calendar
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
time,
|
||||
)
|
||||
import locale
|
||||
import unicodedata
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.tslibs import timezones
|
||||
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
NaT,
|
||||
Timestamp,
|
||||
date_range,
|
||||
offsets,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import DatetimeArray
|
||||
|
||||
|
||||
class TestDatetimeIndexOps:
|
||||
def test_dti_no_millisecond_field(self):
|
||||
msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
DatetimeIndex.millisecond
|
||||
|
||||
msg = "'DatetimeIndex' object has no attribute 'millisecond'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
DatetimeIndex([]).millisecond
|
||||
|
||||
def test_dti_time(self):
|
||||
rng = date_range("1/1/2000", freq="12min", periods=10)
|
||||
result = Index(rng).time
|
||||
expected = [t.time() for t in rng]
|
||||
assert (result == expected).all()
|
||||
|
||||
def test_dti_date(self):
|
||||
rng = date_range("1/1/2000", freq="12h", periods=10)
|
||||
result = Index(rng).date
|
||||
expected = [t.date() for t in rng]
|
||||
assert (result == expected).all()
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
|
||||
)
|
||||
def test_dti_date2(self, dtype):
|
||||
# Regression test for GH#21230
|
||||
expected = np.array([date(2018, 6, 4), NaT])
|
||||
|
||||
index = DatetimeIndex(["2018-06-04 10:00:00", NaT], dtype=dtype)
|
||||
result = index.date
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[None, "datetime64[ns, CET]", "datetime64[ns, EST]", "datetime64[ns, UTC]"],
|
||||
)
|
||||
def test_dti_time2(self, dtype):
|
||||
# Regression test for GH#21267
|
||||
expected = np.array([time(10, 20, 30), NaT])
|
||||
|
||||
index = DatetimeIndex(["2018-06-04 10:20:30", NaT], dtype=dtype)
|
||||
result = index.time
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_dti_timetz(self, tz_naive_fixture):
|
||||
# GH#21358
|
||||
tz = timezones.maybe_get_tz(tz_naive_fixture)
|
||||
|
||||
expected = np.array([time(10, 20, 30, tzinfo=tz), NaT])
|
||||
|
||||
index = DatetimeIndex(["2018-06-04 10:20:30", NaT], tz=tz)
|
||||
result = index.timetz
|
||||
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"field",
|
||||
[
|
||||
"dayofweek",
|
||||
"day_of_week",
|
||||
"dayofyear",
|
||||
"day_of_year",
|
||||
"quarter",
|
||||
"days_in_month",
|
||||
"is_month_start",
|
||||
"is_month_end",
|
||||
"is_quarter_start",
|
||||
"is_quarter_end",
|
||||
"is_year_start",
|
||||
"is_year_end",
|
||||
],
|
||||
)
|
||||
def test_dti_timestamp_fields(self, field):
|
||||
# extra fields from DatetimeIndex like quarter and week
|
||||
idx = date_range("2020-01-01", periods=10)
|
||||
expected = getattr(idx, field)[-1]
|
||||
|
||||
result = getattr(Timestamp(idx[-1]), field)
|
||||
assert result == expected
|
||||
|
||||
def test_dti_nanosecond(self):
|
||||
dti = DatetimeIndex(np.arange(10))
|
||||
expected = Index(np.arange(10, dtype=np.int32))
|
||||
|
||||
tm.assert_index_equal(dti.nanosecond, expected)
|
||||
|
||||
@pytest.mark.parametrize("prefix", ["", "dateutil/"])
|
||||
def test_dti_hour_tzaware(self, prefix):
|
||||
strdates = ["1/1/2012", "3/1/2012", "4/1/2012"]
|
||||
rng = DatetimeIndex(strdates, tz=prefix + "US/Eastern")
|
||||
assert (rng.hour == 0).all()
|
||||
|
||||
# a more unusual time zone, GH#1946
|
||||
dr = date_range(
|
||||
"2011-10-02 00:00", freq="h", periods=10, tz=prefix + "America/Atikokan"
|
||||
)
|
||||
|
||||
expected = Index(np.arange(10, dtype=np.int32))
|
||||
tm.assert_index_equal(dr.hour, expected)
|
||||
|
||||
# GH#12806
|
||||
# error: Unsupported operand types for + ("List[None]" and "List[str]")
|
||||
@pytest.mark.parametrize(
|
||||
"time_locale", [None] + tm.get_locales() # type: ignore[operator]
|
||||
)
|
||||
def test_day_name_month_name(self, time_locale):
|
||||
# Test Monday -> Sunday and January -> December, in that sequence
|
||||
if time_locale is None:
|
||||
# If the time_locale is None, day-name and month_name should
|
||||
# return the english attributes
|
||||
expected_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
expected_months = [
|
||||
"January",
|
||||
"February",
|
||||
"March",
|
||||
"April",
|
||||
"May",
|
||||
"June",
|
||||
"July",
|
||||
"August",
|
||||
"September",
|
||||
"October",
|
||||
"November",
|
||||
"December",
|
||||
]
|
||||
else:
|
||||
with tm.set_locale(time_locale, locale.LC_TIME):
|
||||
expected_days = calendar.day_name[:]
|
||||
expected_months = calendar.month_name[1:]
|
||||
|
||||
# GH#11128
|
||||
dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365)
|
||||
english_days = [
|
||||
"Monday",
|
||||
"Tuesday",
|
||||
"Wednesday",
|
||||
"Thursday",
|
||||
"Friday",
|
||||
"Saturday",
|
||||
"Sunday",
|
||||
]
|
||||
for day, name, eng_name in zip(range(4, 11), expected_days, english_days):
|
||||
name = name.capitalize()
|
||||
assert dti.day_name(locale=time_locale)[day] == name
|
||||
assert dti.day_name(locale=None)[day] == eng_name
|
||||
ts = Timestamp(datetime(2016, 4, day))
|
||||
assert ts.day_name(locale=time_locale) == name
|
||||
dti = dti.append(DatetimeIndex([NaT]))
|
||||
assert np.isnan(dti.day_name(locale=time_locale)[-1])
|
||||
ts = Timestamp(NaT)
|
||||
assert np.isnan(ts.day_name(locale=time_locale))
|
||||
|
||||
# GH#12805
|
||||
dti = date_range(freq="ME", start="2012", end="2013")
|
||||
result = dti.month_name(locale=time_locale)
|
||||
expected = Index([month.capitalize() for month in expected_months])
|
||||
|
||||
# work around different normalization schemes GH#22342
|
||||
result = result.str.normalize("NFD")
|
||||
expected = expected.str.normalize("NFD")
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
for item, expected in zip(dti, expected_months):
|
||||
result = item.month_name(locale=time_locale)
|
||||
expected = expected.capitalize()
|
||||
|
||||
result = unicodedata.normalize("NFD", result)
|
||||
expected = unicodedata.normalize("NFD", result)
|
||||
|
||||
assert result == expected
|
||||
dti = dti.append(DatetimeIndex([NaT]))
|
||||
assert np.isnan(dti.month_name(locale=time_locale)[-1])
|
||||
|
||||
def test_dti_week(self):
|
||||
# GH#6538: Check that DatetimeIndex and its TimeStamp elements
|
||||
# return the same weekofyear accessor close to new year w/ tz
|
||||
dates = ["2013/12/29", "2013/12/30", "2013/12/31"]
|
||||
dates = DatetimeIndex(dates, tz="Europe/Brussels")
|
||||
expected = [52, 1, 1]
|
||||
assert dates.isocalendar().week.tolist() == expected
|
||||
assert [d.weekofyear for d in dates] == expected
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
def test_dti_fields(self, tz):
|
||||
# GH#13303
|
||||
dti = date_range(freq="D", start=datetime(1998, 1, 1), periods=365, tz=tz)
|
||||
assert dti.year[0] == 1998
|
||||
assert dti.month[0] == 1
|
||||
assert dti.day[0] == 1
|
||||
assert dti.hour[0] == 0
|
||||
assert dti.minute[0] == 0
|
||||
assert dti.second[0] == 0
|
||||
assert dti.microsecond[0] == 0
|
||||
assert dti.dayofweek[0] == 3
|
||||
|
||||
assert dti.dayofyear[0] == 1
|
||||
assert dti.dayofyear[120] == 121
|
||||
|
||||
assert dti.isocalendar().week.iloc[0] == 1
|
||||
assert dti.isocalendar().week.iloc[120] == 18
|
||||
|
||||
assert dti.quarter[0] == 1
|
||||
assert dti.quarter[120] == 2
|
||||
|
||||
assert dti.days_in_month[0] == 31
|
||||
assert dti.days_in_month[90] == 30
|
||||
|
||||
assert dti.is_month_start[0]
|
||||
assert not dti.is_month_start[1]
|
||||
assert dti.is_month_start[31]
|
||||
assert dti.is_quarter_start[0]
|
||||
assert dti.is_quarter_start[90]
|
||||
assert dti.is_year_start[0]
|
||||
assert not dti.is_year_start[364]
|
||||
assert not dti.is_month_end[0]
|
||||
assert dti.is_month_end[30]
|
||||
assert not dti.is_month_end[31]
|
||||
assert dti.is_month_end[364]
|
||||
assert not dti.is_quarter_end[0]
|
||||
assert not dti.is_quarter_end[30]
|
||||
assert dti.is_quarter_end[89]
|
||||
assert dti.is_quarter_end[364]
|
||||
assert not dti.is_year_end[0]
|
||||
assert dti.is_year_end[364]
|
||||
|
||||
assert len(dti.year) == 365
|
||||
assert len(dti.month) == 365
|
||||
assert len(dti.day) == 365
|
||||
assert len(dti.hour) == 365
|
||||
assert len(dti.minute) == 365
|
||||
assert len(dti.second) == 365
|
||||
assert len(dti.microsecond) == 365
|
||||
assert len(dti.dayofweek) == 365
|
||||
assert len(dti.dayofyear) == 365
|
||||
assert len(dti.isocalendar()) == 365
|
||||
assert len(dti.quarter) == 365
|
||||
assert len(dti.is_month_start) == 365
|
||||
assert len(dti.is_month_end) == 365
|
||||
assert len(dti.is_quarter_start) == 365
|
||||
assert len(dti.is_quarter_end) == 365
|
||||
assert len(dti.is_year_start) == 365
|
||||
assert len(dti.is_year_end) == 365
|
||||
|
||||
dti.name = "name"
|
||||
|
||||
# non boolean accessors -> return Index
|
||||
for accessor in DatetimeArray._field_ops:
|
||||
res = getattr(dti, accessor)
|
||||
assert len(res) == 365
|
||||
assert isinstance(res, Index)
|
||||
assert res.name == "name"
|
||||
|
||||
# boolean accessors -> return array
|
||||
for accessor in DatetimeArray._bool_ops:
|
||||
res = getattr(dti, accessor)
|
||||
assert len(res) == 365
|
||||
assert isinstance(res, np.ndarray)
|
||||
|
||||
# test boolean indexing
|
||||
res = dti[dti.is_quarter_start]
|
||||
exp = dti[[0, 90, 181, 273]]
|
||||
tm.assert_index_equal(res, exp)
|
||||
res = dti[dti.is_leap_year]
|
||||
exp = DatetimeIndex([], freq="D", tz=dti.tz, name="name").as_unit("ns")
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
def test_dti_is_year_quarter_start(self):
|
||||
dti = date_range(freq="BQE-FEB", start=datetime(1998, 1, 1), periods=4)
|
||||
|
||||
assert sum(dti.is_quarter_start) == 0
|
||||
assert sum(dti.is_quarter_end) == 4
|
||||
assert sum(dti.is_year_start) == 0
|
||||
assert sum(dti.is_year_end) == 1
|
||||
|
||||
def test_dti_is_month_start(self):
|
||||
dti = DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-03"])
|
||||
|
||||
assert dti.is_month_start[0] == 1
|
||||
|
||||
def test_dti_is_month_start_custom(self):
|
||||
# Ensure is_start/end accessors throw ValueError for CustomBusinessDay,
|
||||
bday_egypt = offsets.CustomBusinessDay(weekmask="Sun Mon Tue Wed Thu")
|
||||
dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt)
|
||||
msg = "Custom business days is not supported by is_month_start"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
dti.is_month_start
|
||||
@ -0,0 +1,666 @@
|
||||
from datetime import (
|
||||
datetime,
|
||||
timezone,
|
||||
)
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Series,
|
||||
Timestamp,
|
||||
bdate_range,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries.offsets import (
|
||||
BMonthEnd,
|
||||
Minute,
|
||||
MonthEnd,
|
||||
)
|
||||
|
||||
START, END = datetime(2009, 1, 1), datetime(2010, 1, 1)
|
||||
|
||||
|
||||
class TestDatetimeIndexSetOps:
|
||||
tz = [
|
||||
None,
|
||||
"UTC",
|
||||
"Asia/Tokyo",
|
||||
"US/Eastern",
|
||||
"dateutil/Asia/Singapore",
|
||||
"dateutil/US/Pacific",
|
||||
]
|
||||
|
||||
# TODO: moved from test_datetimelike; dedup with version below
|
||||
def test_union2(self, sort):
|
||||
everything = date_range("2020-01-01", periods=10)
|
||||
first = everything[:5]
|
||||
second = everything[5:]
|
||||
union = first.union(second, sort=sort)
|
||||
tm.assert_index_equal(union, everything)
|
||||
|
||||
@pytest.mark.parametrize("box", [np.array, Series, list])
|
||||
def test_union3(self, sort, box):
|
||||
everything = date_range("2020-01-01", periods=10)
|
||||
first = everything[:5]
|
||||
second = everything[5:]
|
||||
|
||||
# GH 10149 support listlike inputs other than Index objects
|
||||
expected = first.union(second, sort=sort)
|
||||
case = box(second.values)
|
||||
result = first.union(case, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", tz)
|
||||
def test_union(self, tz, sort):
|
||||
rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
||||
other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
|
||||
expected1 = date_range("1/1/2000", freq="D", periods=10, tz=tz)
|
||||
expected1_notsorted = DatetimeIndex(list(other1) + list(rng1))
|
||||
|
||||
rng2 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
||||
other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
|
||||
expected2 = date_range("1/1/2000", freq="D", periods=8, tz=tz)
|
||||
expected2_notsorted = DatetimeIndex(list(other2) + list(rng2[:3]))
|
||||
|
||||
rng3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
||||
other3 = DatetimeIndex([], tz=tz).as_unit("ns")
|
||||
expected3 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
|
||||
expected3_notsorted = rng3
|
||||
|
||||
for rng, other, exp, exp_notsorted in [
|
||||
(rng1, other1, expected1, expected1_notsorted),
|
||||
(rng2, other2, expected2, expected2_notsorted),
|
||||
(rng3, other3, expected3, expected3_notsorted),
|
||||
]:
|
||||
result_union = rng.union(other, sort=sort)
|
||||
tm.assert_index_equal(result_union, exp)
|
||||
|
||||
result_union = other.union(rng, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(result_union, exp)
|
||||
else:
|
||||
tm.assert_index_equal(result_union, exp_notsorted)
|
||||
|
||||
def test_union_coverage(self, sort):
|
||||
idx = DatetimeIndex(["2000-01-03", "2000-01-01", "2000-01-02"])
|
||||
ordered = DatetimeIndex(idx.sort_values(), freq="infer")
|
||||
result = ordered.union(idx, sort=sort)
|
||||
tm.assert_index_equal(result, ordered)
|
||||
|
||||
result = ordered[:0].union(ordered, sort=sort)
|
||||
tm.assert_index_equal(result, ordered)
|
||||
assert result.freq == ordered.freq
|
||||
|
||||
def test_union_bug_1730(self, sort):
|
||||
rng_a = date_range("1/1/2012", periods=4, freq="3h")
|
||||
rng_b = date_range("1/1/2012", periods=4, freq="4h")
|
||||
|
||||
result = rng_a.union(rng_b, sort=sort)
|
||||
exp = list(rng_a) + list(rng_b[1:])
|
||||
if sort is None:
|
||||
exp = DatetimeIndex(sorted(exp))
|
||||
else:
|
||||
exp = DatetimeIndex(exp)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_union_bug_1745(self, sort):
|
||||
left = DatetimeIndex(["2012-05-11 15:19:49.695000"])
|
||||
right = DatetimeIndex(
|
||||
[
|
||||
"2012-05-29 13:04:21.322000",
|
||||
"2012-05-11 15:27:24.873000",
|
||||
"2012-05-11 15:31:05.350000",
|
||||
]
|
||||
)
|
||||
|
||||
result = left.union(right, sort=sort)
|
||||
exp = DatetimeIndex(
|
||||
[
|
||||
"2012-05-11 15:19:49.695000",
|
||||
"2012-05-29 13:04:21.322000",
|
||||
"2012-05-11 15:27:24.873000",
|
||||
"2012-05-11 15:31:05.350000",
|
||||
]
|
||||
)
|
||||
if sort is None:
|
||||
exp = exp.sort_values()
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_union_bug_4564(self, sort):
|
||||
from pandas import DateOffset
|
||||
|
||||
left = date_range("2013-01-01", "2013-02-01")
|
||||
right = left + DateOffset(minutes=15)
|
||||
|
||||
result = left.union(right, sort=sort)
|
||||
exp = list(left) + list(right)
|
||||
if sort is None:
|
||||
exp = DatetimeIndex(sorted(exp))
|
||||
else:
|
||||
exp = DatetimeIndex(exp)
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
def test_union_freq_both_none(self, sort):
|
||||
# GH11086
|
||||
expected = bdate_range("20150101", periods=10)
|
||||
expected._data.freq = None
|
||||
|
||||
result = expected.union(expected, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq is None
|
||||
|
||||
def test_union_freq_infer(self):
|
||||
# When taking the union of two DatetimeIndexes, we infer
|
||||
# a freq even if the arguments don't have freq. This matches
|
||||
# TimedeltaIndex behavior.
|
||||
dti = date_range("2016-01-01", periods=5)
|
||||
left = dti[[0, 1, 3, 4]]
|
||||
right = dti[[2, 3, 1]]
|
||||
|
||||
assert left.freq is None
|
||||
assert right.freq is None
|
||||
|
||||
result = left.union(right)
|
||||
tm.assert_index_equal(result, dti)
|
||||
assert result.freq == "D"
|
||||
|
||||
def test_union_dataframe_index(self):
|
||||
rng1 = date_range("1/1/1999", "1/1/2012", freq="MS")
|
||||
s1 = Series(np.random.default_rng(2).standard_normal(len(rng1)), rng1)
|
||||
|
||||
rng2 = date_range("1/1/1980", "12/1/2001", freq="MS")
|
||||
s2 = Series(np.random.default_rng(2).standard_normal(len(rng2)), rng2)
|
||||
df = DataFrame({"s1": s1, "s2": s2})
|
||||
|
||||
exp = date_range("1/1/1980", "1/1/2012", freq="MS")
|
||||
tm.assert_index_equal(df.index, exp)
|
||||
|
||||
def test_union_with_DatetimeIndex(self, sort):
|
||||
i1 = Index(np.arange(0, 20, 2, dtype=np.int64))
|
||||
i2 = date_range(start="2012-01-03 00:00:00", periods=10, freq="D")
|
||||
# Works
|
||||
i1.union(i2, sort=sort)
|
||||
# Fails with "AttributeError: can't set attribute"
|
||||
i2.union(i1, sort=sort)
|
||||
|
||||
def test_union_same_timezone_different_units(self):
|
||||
# GH 55238
|
||||
idx1 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("ms")
|
||||
idx2 = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
|
||||
result = idx1.union(idx2)
|
||||
expected = date_range("2000-01-01", periods=3, tz="UTC").as_unit("us")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# TODO: moved from test_datetimelike; de-duplicate with version below
|
||||
def test_intersection2(self):
|
||||
first = date_range("2020-01-01", periods=10)
|
||||
second = first[5:]
|
||||
intersect = first.intersection(second)
|
||||
tm.assert_index_equal(intersect, second)
|
||||
|
||||
# GH 10149
|
||||
cases = [klass(second.values) for klass in [np.array, Series, list]]
|
||||
for case in cases:
|
||||
result = first.intersection(case)
|
||||
tm.assert_index_equal(result, second)
|
||||
|
||||
third = Index(["a", "b", "c"])
|
||||
result = first.intersection(third)
|
||||
expected = Index([], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz", [None, "Asia/Tokyo", "US/Eastern", "dateutil/US/Pacific"]
|
||||
)
|
||||
def test_intersection(self, tz, sort):
|
||||
# GH 4690 (with tz)
|
||||
base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx")
|
||||
|
||||
# if target has the same name, it is preserved
|
||||
rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx")
|
||||
expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx")
|
||||
|
||||
# if target name is different, it will be reset
|
||||
rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other")
|
||||
expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None)
|
||||
|
||||
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx")
|
||||
expected4 = DatetimeIndex([], freq="D", name="idx", dtype="M8[ns]")
|
||||
|
||||
for rng, expected in [
|
||||
(rng2, expected2),
|
||||
(rng3, expected3),
|
||||
(rng4, expected4),
|
||||
]:
|
||||
result = base.intersection(rng)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
# non-monotonic
|
||||
base = DatetimeIndex(
|
||||
["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx"
|
||||
).as_unit("ns")
|
||||
|
||||
rng2 = DatetimeIndex(
|
||||
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx"
|
||||
).as_unit("ns")
|
||||
expected2 = DatetimeIndex(
|
||||
["2011-01-04", "2011-01-02"], tz=tz, name="idx"
|
||||
).as_unit("ns")
|
||||
|
||||
rng3 = DatetimeIndex(
|
||||
["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"],
|
||||
tz=tz,
|
||||
name="other",
|
||||
).as_unit("ns")
|
||||
expected3 = DatetimeIndex(
|
||||
["2011-01-04", "2011-01-02"], tz=tz, name=None
|
||||
).as_unit("ns")
|
||||
|
||||
# GH 7880
|
||||
rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx")
|
||||
expected4 = DatetimeIndex([], tz=tz, name="idx").as_unit("ns")
|
||||
assert expected4.freq is None
|
||||
|
||||
for rng, expected in [
|
||||
(rng2, expected2),
|
||||
(rng3, expected3),
|
||||
(rng4, expected4),
|
||||
]:
|
||||
result = base.intersection(rng, sort=sort)
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.freq == expected.freq
|
||||
|
||||
# parametrize over both anchored and non-anchored freqs, as they
|
||||
# have different code paths
|
||||
@pytest.mark.parametrize("freq", ["min", "B"])
|
||||
def test_intersection_empty(self, tz_aware_fixture, freq):
|
||||
# empty same freq GH2129
|
||||
tz = tz_aware_fixture
|
||||
rng = date_range("6/1/2000", "6/15/2000", freq=freq, tz=tz)
|
||||
result = rng[0:0].intersection(rng)
|
||||
assert len(result) == 0
|
||||
assert result.freq == rng.freq
|
||||
|
||||
result = rng.intersection(rng[0:0])
|
||||
assert len(result) == 0
|
||||
assert result.freq == rng.freq
|
||||
|
||||
# no overlap GH#33604
|
||||
check_freq = freq != "min" # We don't preserve freq on non-anchored offsets
|
||||
result = rng[:3].intersection(rng[-3:])
|
||||
tm.assert_index_equal(result, rng[:0])
|
||||
if check_freq:
|
||||
# We don't preserve freq on non-anchored offsets
|
||||
assert result.freq == rng.freq
|
||||
|
||||
# swapped left and right
|
||||
result = rng[-3:].intersection(rng[:3])
|
||||
tm.assert_index_equal(result, rng[:0])
|
||||
if check_freq:
|
||||
# We don't preserve freq on non-anchored offsets
|
||||
assert result.freq == rng.freq
|
||||
|
||||
def test_intersection_bug_1708(self):
|
||||
from pandas import DateOffset
|
||||
|
||||
index_1 = date_range("1/1/2012", periods=4, freq="12h")
|
||||
index_2 = index_1 + DateOffset(hours=1)
|
||||
|
||||
result = index_1.intersection(index_2)
|
||||
assert len(result) == 0
|
||||
|
||||
@pytest.mark.parametrize("tz", tz)
|
||||
def test_difference(self, tz, sort):
|
||||
rng_dates = ["1/2/2000", "1/3/2000", "1/1/2000", "1/4/2000", "1/5/2000"]
|
||||
|
||||
rng1 = DatetimeIndex(rng_dates, tz=tz)
|
||||
other1 = date_range("1/6/2000", freq="D", periods=5, tz=tz)
|
||||
expected1 = DatetimeIndex(rng_dates, tz=tz)
|
||||
|
||||
rng2 = DatetimeIndex(rng_dates, tz=tz)
|
||||
other2 = date_range("1/4/2000", freq="D", periods=5, tz=tz)
|
||||
expected2 = DatetimeIndex(rng_dates[:3], tz=tz)
|
||||
|
||||
rng3 = DatetimeIndex(rng_dates, tz=tz)
|
||||
other3 = DatetimeIndex([], tz=tz)
|
||||
expected3 = DatetimeIndex(rng_dates, tz=tz)
|
||||
|
||||
for rng, other, expected in [
|
||||
(rng1, other1, expected1),
|
||||
(rng2, other2, expected2),
|
||||
(rng3, other3, expected3),
|
||||
]:
|
||||
result_diff = rng.difference(other, sort)
|
||||
if sort is None and len(other):
|
||||
# We dont sort (yet?) when empty GH#24959
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result_diff, expected)
|
||||
|
||||
def test_difference_freq(self, sort):
|
||||
# GH14323: difference of DatetimeIndex should not preserve frequency
|
||||
|
||||
index = date_range("20160920", "20160925", freq="D")
|
||||
other = date_range("20160921", "20160924", freq="D")
|
||||
expected = DatetimeIndex(["20160920", "20160925"], dtype="M8[ns]", freq=None)
|
||||
idx_diff = index.difference(other, sort)
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
# preserve frequency when the difference is a contiguous
|
||||
# subset of the original range
|
||||
other = date_range("20160922", "20160925", freq="D")
|
||||
idx_diff = index.difference(other, sort)
|
||||
expected = DatetimeIndex(["20160920", "20160921"], dtype="M8[ns]", freq="D")
|
||||
tm.assert_index_equal(idx_diff, expected)
|
||||
tm.assert_attr_equal("freq", idx_diff, expected)
|
||||
|
||||
def test_datetimeindex_diff(self, sort):
|
||||
dti1 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=100)
|
||||
dti2 = date_range(freq="QE-JAN", start=datetime(1997, 12, 31), periods=98)
|
||||
assert len(dti1.difference(dti2, sort)) == 2
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "Asia/Tokyo", "US/Eastern"])
|
||||
def test_setops_preserve_freq(self, tz):
|
||||
rng = date_range("1/1/2000", "1/1/2002", name="idx", tz=tz)
|
||||
|
||||
result = rng[:50].union(rng[50:100])
|
||||
assert result.name == rng.name
|
||||
assert result.freq == rng.freq
|
||||
assert result.tz == rng.tz
|
||||
|
||||
result = rng[:50].union(rng[30:100])
|
||||
assert result.name == rng.name
|
||||
assert result.freq == rng.freq
|
||||
assert result.tz == rng.tz
|
||||
|
||||
result = rng[:50].union(rng[60:100])
|
||||
assert result.name == rng.name
|
||||
assert result.freq is None
|
||||
assert result.tz == rng.tz
|
||||
|
||||
result = rng[:50].intersection(rng[25:75])
|
||||
assert result.name == rng.name
|
||||
assert result.freqstr == "D"
|
||||
assert result.tz == rng.tz
|
||||
|
||||
nofreq = DatetimeIndex(list(rng[25:75]), name="other")
|
||||
result = rng[:50].union(nofreq)
|
||||
assert result.name is None
|
||||
assert result.freq == rng.freq
|
||||
assert result.tz == rng.tz
|
||||
|
||||
result = rng[:50].intersection(nofreq)
|
||||
assert result.name is None
|
||||
assert result.freq == rng.freq
|
||||
assert result.tz == rng.tz
|
||||
|
||||
def test_intersection_non_tick_no_fastpath(self):
|
||||
# GH#42104
|
||||
dti = DatetimeIndex(
|
||||
[
|
||||
"2018-12-31",
|
||||
"2019-03-31",
|
||||
"2019-06-30",
|
||||
"2019-09-30",
|
||||
"2019-12-31",
|
||||
"2020-03-31",
|
||||
],
|
||||
freq="QE-DEC",
|
||||
)
|
||||
result = dti[::2].intersection(dti[1::2])
|
||||
expected = dti[:0]
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_dti_intersection(self):
|
||||
rng = date_range("1/1/2011", periods=100, freq="h", tz="utc")
|
||||
|
||||
left = rng[10:90][::-1]
|
||||
right = rng[20:80][::-1]
|
||||
|
||||
assert left.tz == rng.tz
|
||||
result = left.intersection(right)
|
||||
assert result.tz == left.tz
|
||||
|
||||
# Note: not difference, as there is no symmetry requirement there
|
||||
@pytest.mark.parametrize("setop", ["union", "intersection", "symmetric_difference"])
|
||||
def test_dti_setop_aware(self, setop):
|
||||
# non-overlapping
|
||||
# GH#39328 as of 2.0 we cast these to UTC instead of object
|
||||
rng = date_range("2012-11-15 00:00:00", periods=6, freq="h", tz="US/Central")
|
||||
|
||||
rng2 = date_range("2012-11-15 12:00:00", periods=6, freq="h", tz="US/Eastern")
|
||||
|
||||
result = getattr(rng, setop)(rng2)
|
||||
|
||||
left = rng.tz_convert("UTC")
|
||||
right = rng2.tz_convert("UTC")
|
||||
expected = getattr(left, setop)(right)
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.tz == left.tz
|
||||
if len(result):
|
||||
assert result[0].tz is timezone.utc
|
||||
assert result[-1].tz is timezone.utc
|
||||
|
||||
def test_dti_union_mixed(self):
|
||||
# GH#21671
|
||||
rng = DatetimeIndex([Timestamp("2011-01-01"), pd.NaT])
|
||||
rng2 = DatetimeIndex(["2012-01-01", "2012-01-02"], tz="Asia/Tokyo")
|
||||
result = rng.union(rng2)
|
||||
expected = Index(
|
||||
[
|
||||
Timestamp("2011-01-01"),
|
||||
pd.NaT,
|
||||
Timestamp("2012-01-01", tz="Asia/Tokyo"),
|
||||
Timestamp("2012-01-02", tz="Asia/Tokyo"),
|
||||
],
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestBusinessDatetimeIndex:
|
||||
def test_union(self, sort):
|
||||
rng = bdate_range(START, END)
|
||||
# overlapping
|
||||
left = rng[:10]
|
||||
right = rng[5:10]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
# non-overlapping, gap in middle
|
||||
left = rng[:5]
|
||||
right = rng[10:]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, Index)
|
||||
|
||||
# non-overlapping, no gap
|
||||
left = rng[:5]
|
||||
right = rng[5:10]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
# order does not matter
|
||||
if sort is None:
|
||||
tm.assert_index_equal(right.union(left, sort=sort), the_union)
|
||||
else:
|
||||
expected = DatetimeIndex(list(right) + list(left))
|
||||
tm.assert_index_equal(right.union(left, sort=sort), expected)
|
||||
|
||||
# overlapping, but different offset
|
||||
rng = date_range(START, END, freq=BMonthEnd())
|
||||
|
||||
the_union = rng.union(rng, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
def test_union_not_cacheable(self, sort):
|
||||
rng = date_range("1/1/2000", periods=50, freq=Minute())
|
||||
rng1 = rng[10:]
|
||||
rng2 = rng[:25]
|
||||
the_union = rng1.union(rng2, sort=sort)
|
||||
if sort is None:
|
||||
tm.assert_index_equal(the_union, rng)
|
||||
else:
|
||||
expected = DatetimeIndex(list(rng[10:]) + list(rng[:10]))
|
||||
tm.assert_index_equal(the_union, expected)
|
||||
|
||||
rng1 = rng[10:]
|
||||
rng2 = rng[15:35]
|
||||
the_union = rng1.union(rng2, sort=sort)
|
||||
expected = rng[10:]
|
||||
tm.assert_index_equal(the_union, expected)
|
||||
|
||||
def test_intersection(self):
|
||||
rng = date_range("1/1/2000", periods=50, freq=Minute())
|
||||
rng1 = rng[10:]
|
||||
rng2 = rng[:25]
|
||||
the_int = rng1.intersection(rng2)
|
||||
expected = rng[10:25]
|
||||
tm.assert_index_equal(the_int, expected)
|
||||
assert isinstance(the_int, DatetimeIndex)
|
||||
assert the_int.freq == rng.freq
|
||||
|
||||
the_int = rng1.intersection(rng2)
|
||||
tm.assert_index_equal(the_int, expected)
|
||||
|
||||
# non-overlapping
|
||||
the_int = rng[:10].intersection(rng[10:])
|
||||
expected = DatetimeIndex([]).as_unit("ns")
|
||||
tm.assert_index_equal(the_int, expected)
|
||||
|
||||
def test_intersection_bug(self):
|
||||
# GH #771
|
||||
a = bdate_range("11/30/2011", "12/31/2011")
|
||||
b = bdate_range("12/10/2011", "12/20/2011")
|
||||
result = a.intersection(b)
|
||||
tm.assert_index_equal(result, b)
|
||||
assert result.freq == b.freq
|
||||
|
||||
def test_intersection_list(self):
|
||||
# GH#35876
|
||||
# values is not an Index -> no name -> retain "a"
|
||||
values = [Timestamp("2020-01-01"), Timestamp("2020-02-01")]
|
||||
idx = DatetimeIndex(values, name="a")
|
||||
res = idx.intersection(values)
|
||||
tm.assert_index_equal(res, idx)
|
||||
|
||||
def test_month_range_union_tz_pytz(self, sort):
|
||||
tz = pytz.timezone("US/Eastern")
|
||||
|
||||
early_start = datetime(2011, 1, 1)
|
||||
early_end = datetime(2011, 3, 1)
|
||||
|
||||
late_start = datetime(2011, 3, 1)
|
||||
late_end = datetime(2011, 5, 1)
|
||||
|
||||
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
|
||||
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
|
||||
|
||||
early_dr.union(late_dr, sort=sort)
|
||||
|
||||
@td.skip_if_windows
|
||||
def test_month_range_union_tz_dateutil(self, sort):
|
||||
from pandas._libs.tslibs.timezones import dateutil_gettz
|
||||
|
||||
tz = dateutil_gettz("US/Eastern")
|
||||
|
||||
early_start = datetime(2011, 1, 1)
|
||||
early_end = datetime(2011, 3, 1)
|
||||
|
||||
late_start = datetime(2011, 3, 1)
|
||||
late_end = datetime(2011, 5, 1)
|
||||
|
||||
early_dr = date_range(start=early_start, end=early_end, tz=tz, freq=MonthEnd())
|
||||
late_dr = date_range(start=late_start, end=late_end, tz=tz, freq=MonthEnd())
|
||||
|
||||
early_dr.union(late_dr, sort=sort)
|
||||
|
||||
@pytest.mark.parametrize("sort", [False, None])
|
||||
def test_intersection_duplicates(self, sort):
|
||||
# GH#38196
|
||||
idx1 = Index(
|
||||
[
|
||||
Timestamp("2019-12-13"),
|
||||
Timestamp("2019-12-12"),
|
||||
Timestamp("2019-12-12"),
|
||||
]
|
||||
)
|
||||
result = idx1.intersection(idx1, sort=sort)
|
||||
expected = Index([Timestamp("2019-12-13"), Timestamp("2019-12-12")])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestCustomDatetimeIndex:
|
||||
def test_union(self, sort):
|
||||
# overlapping
|
||||
rng = bdate_range(START, END, freq="C")
|
||||
left = rng[:10]
|
||||
right = rng[5:10]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
# non-overlapping, gap in middle
|
||||
left = rng[:5]
|
||||
right = rng[10:]
|
||||
|
||||
the_union = left.union(right, sort)
|
||||
assert isinstance(the_union, Index)
|
||||
|
||||
# non-overlapping, no gap
|
||||
left = rng[:5]
|
||||
right = rng[5:10]
|
||||
|
||||
the_union = left.union(right, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
# order does not matter
|
||||
if sort is None:
|
||||
tm.assert_index_equal(right.union(left, sort=sort), the_union)
|
||||
|
||||
# overlapping, but different offset
|
||||
rng = date_range(START, END, freq=BMonthEnd())
|
||||
|
||||
the_union = rng.union(rng, sort=sort)
|
||||
assert isinstance(the_union, DatetimeIndex)
|
||||
|
||||
def test_intersection_bug(self):
|
||||
# GH #771
|
||||
a = bdate_range("11/30/2011", "12/31/2011", freq="C")
|
||||
b = bdate_range("12/10/2011", "12/20/2011", freq="C")
|
||||
result = a.intersection(b)
|
||||
tm.assert_index_equal(result, b)
|
||||
assert result.freq == b.freq
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tz", [None, "UTC", "Europe/Berlin", pytz.FixedOffset(-60)]
|
||||
)
|
||||
def test_intersection_dst_transition(self, tz):
|
||||
# GH 46702: Europe/Berlin has DST transition
|
||||
idx1 = date_range("2020-03-27", periods=5, freq="D", tz=tz)
|
||||
idx2 = date_range("2020-03-30", periods=5, freq="D", tz=tz)
|
||||
result = idx1.intersection(idx2)
|
||||
expected = date_range("2020-03-30", periods=2, freq="D", tz=tz)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH#45863 same problem for union
|
||||
index1 = date_range("2021-10-28", periods=3, freq="D", tz="Europe/London")
|
||||
index2 = date_range("2021-10-30", periods=4, freq="D", tz="Europe/London")
|
||||
result = index1.union(index2)
|
||||
expected = date_range("2021-10-28", periods=6, freq="D", tz="Europe/London")
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,251 @@
|
||||
"""
|
||||
Tests for DatetimeIndex timezone-related methods
|
||||
"""
|
||||
from datetime import (
|
||||
datetime,
|
||||
timedelta,
|
||||
timezone,
|
||||
tzinfo,
|
||||
)
|
||||
|
||||
from dateutil.tz import gettz
|
||||
import numpy as np
|
||||
import pytest
|
||||
import pytz
|
||||
|
||||
from pandas._libs.tslibs import (
|
||||
conversion,
|
||||
timezones,
|
||||
)
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DatetimeIndex,
|
||||
Timestamp,
|
||||
bdate_range,
|
||||
date_range,
|
||||
isna,
|
||||
to_datetime,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class FixedOffset(tzinfo):
|
||||
"""Fixed offset in minutes east from UTC."""
|
||||
|
||||
def __init__(self, offset, name) -> None:
|
||||
self.__offset = timedelta(minutes=offset)
|
||||
self.__name = name
|
||||
|
||||
def utcoffset(self, dt):
|
||||
return self.__offset
|
||||
|
||||
def tzname(self, dt):
|
||||
return self.__name
|
||||
|
||||
def dst(self, dt):
|
||||
return timedelta(0)
|
||||
|
||||
|
||||
fixed_off_no_name = FixedOffset(-330, None)
|
||||
|
||||
|
||||
class TestDatetimeIndexTimezones:
|
||||
# -------------------------------------------------------------
|
||||
# Unsorted
|
||||
|
||||
def test_dti_drop_dont_lose_tz(self):
|
||||
# GH#2621
|
||||
ind = date_range("2012-12-01", periods=10, tz="utc")
|
||||
ind = ind.drop(ind[-1])
|
||||
|
||||
assert ind.tz is not None
|
||||
|
||||
def test_dti_tz_conversion_freq(self, tz_naive_fixture):
|
||||
# GH25241
|
||||
t3 = DatetimeIndex(["2019-01-01 10:00"], freq="h")
|
||||
assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq
|
||||
t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="min")
|
||||
assert t4.tz_convert(tz="UTC").freq == t4.freq
|
||||
|
||||
def test_drop_dst_boundary(self):
|
||||
# see gh-18031
|
||||
tz = "Europe/Brussels"
|
||||
freq = "15min"
|
||||
|
||||
start = Timestamp("201710290100", tz=tz)
|
||||
end = Timestamp("201710290300", tz=tz)
|
||||
index = date_range(start=start, end=end, freq=freq)
|
||||
|
||||
expected = DatetimeIndex(
|
||||
[
|
||||
"201710290115",
|
||||
"201710290130",
|
||||
"201710290145",
|
||||
"201710290200",
|
||||
"201710290215",
|
||||
"201710290230",
|
||||
"201710290245",
|
||||
"201710290200",
|
||||
"201710290215",
|
||||
"201710290230",
|
||||
"201710290245",
|
||||
"201710290300",
|
||||
],
|
||||
dtype="M8[ns, Europe/Brussels]",
|
||||
freq=freq,
|
||||
ambiguous=[
|
||||
True,
|
||||
True,
|
||||
True,
|
||||
True,
|
||||
True,
|
||||
True,
|
||||
True,
|
||||
False,
|
||||
False,
|
||||
False,
|
||||
False,
|
||||
False,
|
||||
],
|
||||
)
|
||||
result = index.drop(index[0])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_date_range_localize(self, unit):
|
||||
rng = date_range(
|
||||
"3/11/2012 03:00", periods=15, freq="h", tz="US/Eastern", unit=unit
|
||||
)
|
||||
rng2 = DatetimeIndex(
|
||||
["3/11/2012 03:00", "3/11/2012 04:00"], dtype=f"M8[{unit}, US/Eastern]"
|
||||
)
|
||||
rng3 = date_range("3/11/2012 03:00", periods=15, freq="h", unit=unit)
|
||||
rng3 = rng3.tz_localize("US/Eastern")
|
||||
|
||||
tm.assert_index_equal(rng._with_freq(None), rng3)
|
||||
|
||||
# DST transition time
|
||||
val = rng[0]
|
||||
exp = Timestamp("3/11/2012 03:00", tz="US/Eastern")
|
||||
|
||||
assert val.hour == 3
|
||||
assert exp.hour == 3
|
||||
assert val == exp # same UTC value
|
||||
tm.assert_index_equal(rng[:2], rng2)
|
||||
|
||||
def test_date_range_localize2(self, unit):
|
||||
# Right before the DST transition
|
||||
rng = date_range(
|
||||
"3/11/2012 00:00", periods=2, freq="h", tz="US/Eastern", unit=unit
|
||||
)
|
||||
rng2 = DatetimeIndex(
|
||||
["3/11/2012 00:00", "3/11/2012 01:00"],
|
||||
dtype=f"M8[{unit}, US/Eastern]",
|
||||
freq="h",
|
||||
)
|
||||
tm.assert_index_equal(rng, rng2)
|
||||
exp = Timestamp("3/11/2012 00:00", tz="US/Eastern")
|
||||
assert exp.hour == 0
|
||||
assert rng[0] == exp
|
||||
exp = Timestamp("3/11/2012 01:00", tz="US/Eastern")
|
||||
assert exp.hour == 1
|
||||
assert rng[1] == exp
|
||||
|
||||
rng = date_range(
|
||||
"3/11/2012 00:00", periods=10, freq="h", tz="US/Eastern", unit=unit
|
||||
)
|
||||
assert rng[2].hour == 3
|
||||
|
||||
def test_timestamp_equality_different_timezones(self):
|
||||
utc_range = date_range("1/1/2000", periods=20, tz="UTC")
|
||||
eastern_range = utc_range.tz_convert("US/Eastern")
|
||||
berlin_range = utc_range.tz_convert("Europe/Berlin")
|
||||
|
||||
for a, b, c in zip(utc_range, eastern_range, berlin_range):
|
||||
assert a == b
|
||||
assert b == c
|
||||
assert a == c
|
||||
|
||||
assert (utc_range == eastern_range).all()
|
||||
assert (utc_range == berlin_range).all()
|
||||
assert (berlin_range == eastern_range).all()
|
||||
|
||||
def test_dti_equals_with_tz(self):
|
||||
left = date_range("1/1/2011", periods=100, freq="h", tz="utc")
|
||||
right = date_range("1/1/2011", periods=100, freq="h", tz="US/Eastern")
|
||||
|
||||
assert not left.equals(right)
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_dti_tz_nat(self, tzstr):
|
||||
idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT])
|
||||
|
||||
assert isna(idx[1])
|
||||
assert idx[0].tzinfo is not None
|
||||
|
||||
@pytest.mark.parametrize("tzstr", ["US/Eastern", "dateutil/US/Eastern"])
|
||||
def test_utc_box_timestamp_and_localize(self, tzstr):
|
||||
tz = timezones.maybe_get_tz(tzstr)
|
||||
|
||||
rng = date_range("3/11/2012", "3/12/2012", freq="h", tz="utc")
|
||||
rng_eastern = rng.tz_convert(tzstr)
|
||||
|
||||
expected = rng[-1].astimezone(tz)
|
||||
|
||||
stamp = rng_eastern[-1]
|
||||
assert stamp == expected
|
||||
assert stamp.tzinfo == expected.tzinfo
|
||||
|
||||
# right tzinfo
|
||||
rng = date_range("3/13/2012", "3/14/2012", freq="h", tz="utc")
|
||||
rng_eastern = rng.tz_convert(tzstr)
|
||||
# test not valid for dateutil timezones.
|
||||
# assert 'EDT' in repr(rng_eastern[0].tzinfo)
|
||||
assert "EDT" in repr(rng_eastern[0].tzinfo) or "tzfile" in repr(
|
||||
rng_eastern[0].tzinfo
|
||||
)
|
||||
|
||||
@pytest.mark.parametrize("tz", [pytz.timezone("US/Central"), gettz("US/Central")])
|
||||
def test_with_tz(self, tz):
|
||||
# just want it to work
|
||||
start = datetime(2011, 3, 12, tzinfo=pytz.utc)
|
||||
dr = bdate_range(start, periods=50, freq=pd.offsets.Hour())
|
||||
assert dr.tz is pytz.utc
|
||||
|
||||
# DateRange with naive datetimes
|
||||
dr = bdate_range("1/1/2005", "1/1/2009", tz=pytz.utc)
|
||||
dr = bdate_range("1/1/2005", "1/1/2009", tz=tz)
|
||||
|
||||
# normalized
|
||||
central = dr.tz_convert(tz)
|
||||
assert central.tz is tz
|
||||
naive = central[0].to_pydatetime().replace(tzinfo=None)
|
||||
comp = conversion.localize_pydatetime(naive, tz).tzinfo
|
||||
assert central[0].tz is comp
|
||||
|
||||
# compare vs a localized tz
|
||||
naive = dr[0].to_pydatetime().replace(tzinfo=None)
|
||||
comp = conversion.localize_pydatetime(naive, tz).tzinfo
|
||||
assert central[0].tz is comp
|
||||
|
||||
# datetimes with tzinfo set
|
||||
dr = bdate_range(
|
||||
datetime(2005, 1, 1, tzinfo=pytz.utc), datetime(2009, 1, 1, tzinfo=pytz.utc)
|
||||
)
|
||||
msg = "Start and end cannot both be tz-aware with different timezones"
|
||||
with pytest.raises(Exception, match=msg):
|
||||
bdate_range(datetime(2005, 1, 1, tzinfo=pytz.utc), "1/1/2009", tz=tz)
|
||||
|
||||
@pytest.mark.parametrize("tz", [pytz.timezone("US/Eastern"), gettz("US/Eastern")])
|
||||
def test_dti_convert_tz_aware_datetime_datetime(self, tz):
|
||||
# GH#1581
|
||||
dates = [datetime(2000, 1, 1), datetime(2000, 1, 2), datetime(2000, 1, 3)]
|
||||
|
||||
dates_aware = [conversion.localize_pydatetime(x, tz) for x in dates]
|
||||
result = DatetimeIndex(dates_aware).as_unit("ns")
|
||||
assert timezones.tz_compare(result.tz, tz)
|
||||
|
||||
converted = to_datetime(dates_aware, utc=True).as_unit("ns")
|
||||
ex_vals = np.array([Timestamp(x).as_unit("ns")._value for x in dates_aware])
|
||||
tm.assert_numpy_array_equal(converted.asi8, ex_vals)
|
||||
assert converted.tz is timezone.utc
|
||||
@ -0,0 +1,254 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import (
|
||||
CategoricalDtype,
|
||||
IntervalDtype,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
IntervalIndex,
|
||||
NaT,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class AstypeTests:
|
||||
"""Tests common to IntervalIndex with any subtype"""
|
||||
|
||||
def test_astype_idempotent(self, index):
|
||||
result = index.astype("interval")
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.astype(index.dtype)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
def test_astype_object(self, index):
|
||||
result = index.astype(object)
|
||||
expected = Index(index.values, dtype="object")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert not result.equals(index)
|
||||
|
||||
def test_astype_category(self, index):
|
||||
result = index.astype("category")
|
||||
expected = CategoricalIndex(index.values)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.astype(CategoricalDtype())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# non-default params
|
||||
categories = index.dropna().unique().values[:-1]
|
||||
dtype = CategoricalDtype(categories=categories, ordered=True)
|
||||
result = index.astype(dtype)
|
||||
expected = CategoricalIndex(index.values, categories=categories, ordered=True)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
"int64",
|
||||
"uint64",
|
||||
"float64",
|
||||
"complex128",
|
||||
"period[M]",
|
||||
"timedelta64",
|
||||
"timedelta64[ns]",
|
||||
"datetime64",
|
||||
"datetime64[ns]",
|
||||
"datetime64[ns, US/Eastern]",
|
||||
],
|
||||
)
|
||||
def test_astype_cannot_cast(self, index, dtype):
|
||||
msg = "Cannot cast IntervalIndex to dtype"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
def test_astype_invalid_dtype(self, index):
|
||||
msg = "data type [\"']fake_dtype[\"'] not understood"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype("fake_dtype")
|
||||
|
||||
|
||||
class TestIntSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with integer-like subtype"""
|
||||
|
||||
indexes = [
|
||||
IntervalIndex.from_breaks(np.arange(-10, 11, dtype="int64")),
|
||||
IntervalIndex.from_breaks(np.arange(100, dtype="uint64"), closed="left"),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"subtype", ["float64", "datetime64[ns]", "timedelta64[ns]"]
|
||||
)
|
||||
def test_subtype_conversion(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, index.closed)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"subtype_start, subtype_end", [("int64", "uint64"), ("uint64", "int64")]
|
||||
)
|
||||
def test_subtype_integer(self, subtype_start, subtype_end):
|
||||
index = IntervalIndex.from_breaks(np.arange(100, dtype=subtype_start))
|
||||
dtype = IntervalDtype(subtype_end, index.closed)
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype_end),
|
||||
index.right.astype(subtype_end),
|
||||
closed=index.closed,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.xfail(reason="GH#15832")
|
||||
def test_subtype_integer_errors(self):
|
||||
# int64 -> uint64 fails with negative values
|
||||
index = interval_range(-10, 10)
|
||||
dtype = IntervalDtype("uint64", "right")
|
||||
|
||||
# Until we decide what the exception message _should_ be, we
|
||||
# assert something that it should _not_ be.
|
||||
# We should _not_ be getting a message suggesting that the -10
|
||||
# has been wrapped around to a large-positive integer
|
||||
msg = "^(?!(left side of interval must be <= right side))"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
|
||||
class TestFloatSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with float subtype"""
|
||||
|
||||
indexes = [
|
||||
interval_range(-10.0, 10.0, closed="neither"),
|
||||
IntervalIndex.from_arrays(
|
||||
[-1.5, np.nan, 0.0, 0.0, 1.5], [-0.5, np.nan, 1.0, 1.0, 3.0], closed="both"
|
||||
),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer(self, subtype):
|
||||
index = interval_range(0.0, 10.0)
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# raises with NA
|
||||
msg = r"Cannot convert non-finite values \(NA or inf\) to integer"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.insert(0, np.nan).astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer_with_non_integer_borders(self, subtype):
|
||||
index = interval_range(0.0, 3.0, freq=0.25)
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
result = index.astype(dtype)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_subtype_integer_errors(self):
|
||||
# float64 -> uint64 fails with negative values
|
||||
index = interval_range(-10.0, 10.0)
|
||||
dtype = IntervalDtype("uint64", "right")
|
||||
msg = re.escape(
|
||||
"Cannot convert interval[float64, right] to interval[uint64, right]; "
|
||||
"subtypes are incompatible"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["datetime64[ns]", "timedelta64[ns]"])
|
||||
def test_subtype_datetimelike(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_astype_category(self, index):
|
||||
super().test_astype_category(index)
|
||||
|
||||
|
||||
class TestDatetimelikeSubtype(AstypeTests):
|
||||
"""Tests specific to IntervalIndex with datetime-like subtype"""
|
||||
|
||||
indexes = [
|
||||
interval_range(Timestamp("2018-01-01"), periods=10, closed="neither"),
|
||||
interval_range(Timestamp("2018-01-01"), periods=10).insert(2, NaT),
|
||||
interval_range(Timestamp("2018-01-01", tz="US/Eastern"), periods=10),
|
||||
interval_range(Timedelta("0 days"), periods=10, closed="both"),
|
||||
interval_range(Timedelta("0 days"), periods=10).insert(2, NaT),
|
||||
]
|
||||
|
||||
@pytest.fixture(params=indexes)
|
||||
def index(self, request):
|
||||
return request.param
|
||||
|
||||
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
|
||||
def test_subtype_integer(self, index, subtype):
|
||||
dtype = IntervalDtype(subtype, "right")
|
||||
|
||||
if subtype != "int64":
|
||||
msg = (
|
||||
r"Cannot convert interval\[(timedelta64|datetime64)\[ns.*\], .*\] "
|
||||
r"to interval\[uint64, .*\]"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
return
|
||||
|
||||
result = index.astype(dtype)
|
||||
new_left = index.left.astype(subtype)
|
||||
new_right = index.right.astype(subtype)
|
||||
|
||||
expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_subtype_float(self, index):
|
||||
dtype = IntervalDtype("float64", "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
def test_subtype_datetimelike(self):
|
||||
# datetime -> timedelta raises
|
||||
dtype = IntervalDtype("timedelta64[ns]", "right")
|
||||
msg = "Cannot convert .* to .*; subtypes are incompatible"
|
||||
|
||||
index = interval_range(Timestamp("2018-01-01"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
index = interval_range(Timestamp("2018-01-01", tz="CET"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
|
||||
# timedelta -> datetime raises
|
||||
dtype = IntervalDtype("datetime64[ns]", "right")
|
||||
index = interval_range(Timedelta("0 days"), periods=10)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.astype(dtype)
|
||||
@ -0,0 +1,535 @@
|
||||
from functools import partial
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas.util._test_decorators as td
|
||||
|
||||
from pandas.core.dtypes.common import is_unsigned_integer_dtype
|
||||
from pandas.core.dtypes.dtypes import IntervalDtype
|
||||
|
||||
from pandas import (
|
||||
Categorical,
|
||||
CategoricalDtype,
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
date_range,
|
||||
notna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
from pandas.core.arrays import IntervalArray
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class ConstructorTests:
|
||||
"""
|
||||
Common tests for all variations of IntervalIndex construction. Input data
|
||||
to be supplied in breaks format, then converted by the subclass method
|
||||
get_kwargs_from_breaks to the expected format.
|
||||
"""
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
([3, 14, 15, 92, 653], np.int64),
|
||||
(np.arange(10, dtype="int64"), np.int64),
|
||||
(Index(np.arange(-10, 11, dtype=np.int64)), np.int64),
|
||||
(Index(np.arange(10, 31, dtype=np.uint64)), np.uint64),
|
||||
(Index(np.arange(20, 30, 0.5), dtype=np.float64), np.float64),
|
||||
(date_range("20180101", periods=10), "<M8[ns]"),
|
||||
(
|
||||
date_range("20180101", periods=10, tz="US/Eastern"),
|
||||
"datetime64[ns, US/Eastern]",
|
||||
),
|
||||
(timedelta_range("1 day", periods=10), "<m8[ns]"),
|
||||
]
|
||||
)
|
||||
def breaks_and_expected_subtype(self, request):
|
||||
return request.param
|
||||
|
||||
def test_constructor(self, constructor, breaks_and_expected_subtype, closed, name):
|
||||
breaks, expected_subtype = breaks_and_expected_subtype
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks, closed)
|
||||
|
||||
result = constructor(closed=closed, name=name, **result_kwargs)
|
||||
|
||||
assert result.closed == closed
|
||||
assert result.name == name
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_index_equal(result.left, Index(breaks[:-1], dtype=expected_subtype))
|
||||
tm.assert_index_equal(result.right, Index(breaks[1:], dtype=expected_subtype))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks, subtype",
|
||||
[
|
||||
(Index([0, 1, 2, 3, 4], dtype=np.int64), "float64"),
|
||||
(Index([0, 1, 2, 3, 4], dtype=np.int64), "datetime64[ns]"),
|
||||
(Index([0, 1, 2, 3, 4], dtype=np.int64), "timedelta64[ns]"),
|
||||
(Index([0, 1, 2, 3, 4], dtype=np.float64), "int64"),
|
||||
(date_range("2017-01-01", periods=5), "int64"),
|
||||
(timedelta_range("1 day", periods=5), "int64"),
|
||||
],
|
||||
)
|
||||
def test_constructor_dtype(self, constructor, breaks, subtype):
|
||||
# GH 19262: conversion via dtype parameter
|
||||
expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype))
|
||||
expected = constructor(**expected_kwargs)
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
iv_dtype = IntervalDtype(subtype, "right")
|
||||
for dtype in (iv_dtype, str(iv_dtype)):
|
||||
result = constructor(dtype=dtype, **result_kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
Index([0, 1, 2, 3, 4], dtype=np.int64),
|
||||
Index([0, 1, 2, 3, 4], dtype=np.uint64),
|
||||
Index([0, 1, 2, 3, 4], dtype=np.float64),
|
||||
date_range("2017-01-01", periods=5),
|
||||
timedelta_range("1 day", periods=5),
|
||||
],
|
||||
)
|
||||
def test_constructor_pass_closed(self, constructor, breaks):
|
||||
# not passing closed to IntervalDtype, but to IntervalArray constructor
|
||||
iv_dtype = IntervalDtype(breaks.dtype)
|
||||
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
|
||||
for dtype in (iv_dtype, str(iv_dtype)):
|
||||
with tm.assert_produces_warning(None):
|
||||
result = constructor(dtype=dtype, closed="left", **result_kwargs)
|
||||
assert result.dtype.closed == "left"
|
||||
|
||||
@pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50])
|
||||
def test_constructor_nan(self, constructor, breaks, closed):
|
||||
# GH 18421
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
result = constructor(closed=closed, **result_kwargs)
|
||||
|
||||
expected_subtype = np.float64
|
||||
expected_values = np.array(breaks[:-1], dtype=object)
|
||||
|
||||
assert result.closed == closed
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_numpy_array_equal(np.array(result), expected_values)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[],
|
||||
np.array([], dtype="int64"),
|
||||
np.array([], dtype="uint64"),
|
||||
np.array([], dtype="float64"),
|
||||
np.array([], dtype="datetime64[ns]"),
|
||||
np.array([], dtype="timedelta64[ns]"),
|
||||
],
|
||||
)
|
||||
def test_constructor_empty(self, constructor, breaks, closed):
|
||||
# GH 18421
|
||||
result_kwargs = self.get_kwargs_from_breaks(breaks)
|
||||
result = constructor(closed=closed, **result_kwargs)
|
||||
|
||||
expected_values = np.array([], dtype=object)
|
||||
expected_subtype = getattr(breaks, "dtype", np.int64)
|
||||
|
||||
assert result.empty
|
||||
assert result.closed == closed
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
tm.assert_numpy_array_equal(np.array(result), expected_values)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
tuple("0123456789"),
|
||||
list("abcdefghij"),
|
||||
np.array(list("abcdefghij"), dtype=object),
|
||||
np.array(list("abcdefghij"), dtype="<U1"),
|
||||
],
|
||||
)
|
||||
def test_constructor_string(self, constructor, breaks):
|
||||
# GH 19016
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(**self.get_kwargs_from_breaks(breaks))
|
||||
|
||||
@pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex])
|
||||
def test_constructor_categorical_valid(self, constructor, cat_constructor):
|
||||
# GH 21243/21253
|
||||
|
||||
breaks = np.arange(10, dtype="int64")
|
||||
expected = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
cat_breaks = cat_constructor(breaks)
|
||||
result_kwargs = self.get_kwargs_from_breaks(cat_breaks)
|
||||
result = constructor(**result_kwargs)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_generic_errors(self, constructor):
|
||||
# filler input data to be used when supplying invalid kwargs
|
||||
filler = self.get_kwargs_from_breaks(range(10))
|
||||
|
||||
# invalid closed
|
||||
msg = "closed must be one of 'right', 'left', 'both', 'neither'"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(closed="invalid", **filler)
|
||||
|
||||
# unsupported dtype
|
||||
msg = "dtype must be an IntervalDtype, got int64"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(dtype="int64", **filler)
|
||||
|
||||
# invalid dtype
|
||||
msg = "data type [\"']invalid[\"'] not understood"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
constructor(dtype="invalid", **filler)
|
||||
|
||||
# no point in nesting periods in an IntervalIndex
|
||||
periods = period_range("2000-01-01", periods=10)
|
||||
periods_kwargs = self.get_kwargs_from_breaks(periods)
|
||||
msg = "Period dtypes are not supported, use a PeriodIndex instead"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(**periods_kwargs)
|
||||
|
||||
# decreasing values
|
||||
decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1))
|
||||
msg = "left side of interval must be <= right side"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
constructor(**decreasing_kwargs)
|
||||
|
||||
|
||||
class TestFromArrays(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_arrays"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_arrays
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_arrays
|
||||
"""
|
||||
return {"left": breaks[:-1], "right": breaks[1:]}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# GH 19016: categorical data
|
||||
data = Categorical(list("01234abcde"), ordered=True)
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntervalIndex.from_arrays(data[:-1], data[1:])
|
||||
|
||||
# unequal length
|
||||
left = [0, 1, 2]
|
||||
right = [2, 3]
|
||||
msg = "left and right must have the same length"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex.from_arrays(left, right)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left_subtype, right_subtype", [(np.int64, np.float64), (np.float64, np.int64)]
|
||||
)
|
||||
def test_mixed_float_int(self, left_subtype, right_subtype):
|
||||
"""mixed int/float left/right results in float for both sides"""
|
||||
left = np.arange(9, dtype=left_subtype)
|
||||
right = np.arange(1, 10, dtype=right_subtype)
|
||||
result = IntervalIndex.from_arrays(left, right)
|
||||
|
||||
expected_left = Index(left, dtype=np.float64)
|
||||
expected_right = Index(right, dtype=np.float64)
|
||||
expected_subtype = np.float64
|
||||
|
||||
tm.assert_index_equal(result.left, expected_left)
|
||||
tm.assert_index_equal(result.right, expected_right)
|
||||
assert result.dtype.subtype == expected_subtype
|
||||
|
||||
@pytest.mark.parametrize("interval_cls", [IntervalArray, IntervalIndex])
|
||||
def test_from_arrays_mismatched_datetimelike_resos(self, interval_cls):
|
||||
# GH#55714
|
||||
left = date_range("2016-01-01", periods=3, unit="s")
|
||||
right = date_range("2017-01-01", periods=3, unit="ms")
|
||||
result = interval_cls.from_arrays(left, right)
|
||||
expected = interval_cls.from_arrays(left.as_unit("ms"), right)
|
||||
tm.assert_equal(result, expected)
|
||||
|
||||
# td64
|
||||
left2 = left - left[0]
|
||||
right2 = right - left[0]
|
||||
result2 = interval_cls.from_arrays(left2, right2)
|
||||
expected2 = interval_cls.from_arrays(left2.as_unit("ms"), right2)
|
||||
tm.assert_equal(result2, expected2)
|
||||
|
||||
# dt64tz
|
||||
left3 = left.tz_localize("UTC")
|
||||
right3 = right.tz_localize("UTC")
|
||||
result3 = interval_cls.from_arrays(left3, right3)
|
||||
expected3 = interval_cls.from_arrays(left3.as_unit("ms"), right3)
|
||||
tm.assert_equal(result3, expected3)
|
||||
|
||||
|
||||
class TestFromBreaks(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_breaks"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_breaks
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_breaks
|
||||
"""
|
||||
return {"breaks": breaks}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# GH 19016: categorical data
|
||||
data = Categorical(list("01234abcde"), ordered=True)
|
||||
msg = (
|
||||
"category, object, and string subtypes are not supported "
|
||||
"for IntervalIndex"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
IntervalIndex.from_breaks(data)
|
||||
|
||||
def test_length_one(self):
|
||||
"""breaks of length one produce an empty IntervalIndex"""
|
||||
breaks = [0]
|
||||
result = IntervalIndex.from_breaks(breaks)
|
||||
expected = IntervalIndex.from_breaks([])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_left_right_dont_share_data(self):
|
||||
# GH#36310
|
||||
breaks = np.arange(5)
|
||||
result = IntervalIndex.from_breaks(breaks)._data
|
||||
assert result._left.base is None or result._left.base is not result._right.base
|
||||
|
||||
|
||||
class TestFromTuples(ConstructorTests):
|
||||
"""Tests specific to IntervalIndex.from_tuples"""
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex.from_tuples
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by IntervalIndex.from_tuples
|
||||
"""
|
||||
if is_unsigned_integer_dtype(breaks):
|
||||
pytest.skip(f"{breaks.dtype} not relevant IntervalIndex.from_tuples tests")
|
||||
|
||||
if len(breaks) == 0:
|
||||
return {"data": breaks}
|
||||
|
||||
tuples = list(zip(breaks[:-1], breaks[1:]))
|
||||
if isinstance(breaks, (list, tuple)):
|
||||
return {"data": tuples}
|
||||
elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
|
||||
return {"data": breaks._constructor(tuples)}
|
||||
return {"data": com.asarray_tuplesafe(tuples)}
|
||||
|
||||
def test_constructor_errors(self):
|
||||
# non-tuple
|
||||
tuples = [(0, 1), 2, (3, 4)]
|
||||
msg = "IntervalIndex.from_tuples received an invalid item, 2"
|
||||
with pytest.raises(TypeError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
# too few/many items
|
||||
tuples = [(0, 1), (2,), (3, 4)]
|
||||
msg = "IntervalIndex.from_tuples requires tuples of length 2, got {t}"
|
||||
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
tuples = [(0, 1), (2, 3, 4), (5, 6)]
|
||||
with pytest.raises(ValueError, match=msg.format(t=tuples)):
|
||||
IntervalIndex.from_tuples(tuples)
|
||||
|
||||
def test_na_tuples(self):
|
||||
# tuple (NA, NA) evaluates the same as NA as an element
|
||||
na_tuple = [(0, 1), (np.nan, np.nan), (2, 3)]
|
||||
idx_na_tuple = IntervalIndex.from_tuples(na_tuple)
|
||||
idx_na_element = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
||||
tm.assert_index_equal(idx_na_tuple, idx_na_element)
|
||||
|
||||
|
||||
class TestClassConstructors(ConstructorTests):
|
||||
"""Tests specific to the IntervalIndex/Index constructors"""
|
||||
|
||||
@pytest.fixture(
|
||||
params=[IntervalIndex, partial(Index, dtype="interval")],
|
||||
ids=["IntervalIndex", "Index"],
|
||||
)
|
||||
def klass(self, request):
|
||||
# We use a separate fixture here to include Index.__new__ with dtype kwarg
|
||||
return request.param
|
||||
|
||||
@pytest.fixture
|
||||
def constructor(self):
|
||||
return IntervalIndex
|
||||
|
||||
def get_kwargs_from_breaks(self, breaks, closed="right"):
|
||||
"""
|
||||
converts intervals in breaks format to a dictionary of kwargs to
|
||||
specific to the format expected by the IntervalIndex/Index constructors
|
||||
"""
|
||||
if is_unsigned_integer_dtype(breaks):
|
||||
pytest.skip(f"{breaks.dtype} not relevant for class constructor tests")
|
||||
|
||||
if len(breaks) == 0:
|
||||
return {"data": breaks}
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed) if notna(left) else left
|
||||
for left, right in zip(breaks[:-1], breaks[1:])
|
||||
]
|
||||
|
||||
if isinstance(breaks, list):
|
||||
return {"data": ivs}
|
||||
elif isinstance(getattr(breaks, "dtype", None), CategoricalDtype):
|
||||
return {"data": breaks._constructor(ivs)}
|
||||
return {"data": np.array(ivs, dtype=object)}
|
||||
|
||||
def test_generic_errors(self, constructor):
|
||||
"""
|
||||
override the base class implementation since errors are handled
|
||||
differently; checks unnecessary since caught at the Interval level
|
||||
"""
|
||||
|
||||
def test_constructor_string(self):
|
||||
# GH23013
|
||||
# When forming the interval from breaks,
|
||||
# the interval of strings is already forbidden.
|
||||
pass
|
||||
|
||||
def test_constructor_errors(self, klass):
|
||||
# mismatched closed within intervals with no constructor override
|
||||
ivs = [Interval(0, 1, closed="right"), Interval(2, 3, closed="left")]
|
||||
msg = "intervals must all be closed on the same side"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
klass(ivs)
|
||||
|
||||
# scalar
|
||||
msg = (
|
||||
r"(IntervalIndex|Index)\(...\) must be called with a collection of "
|
||||
"some kind, 5 was passed"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
klass(5)
|
||||
|
||||
# not an interval; dtype depends on 32bit/windows builds
|
||||
msg = "type <class 'numpy.int(32|64)'> with value 0 is not an interval"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
klass([0, 1])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data, closed",
|
||||
[
|
||||
([], "both"),
|
||||
([np.nan, np.nan], "neither"),
|
||||
(
|
||||
[Interval(0, 3, closed="neither"), Interval(2, 5, closed="neither")],
|
||||
"left",
|
||||
),
|
||||
(
|
||||
[Interval(0, 3, closed="left"), Interval(2, 5, closed="right")],
|
||||
"neither",
|
||||
),
|
||||
(IntervalIndex.from_breaks(range(5), closed="both"), "right"),
|
||||
],
|
||||
)
|
||||
def test_override_inferred_closed(self, constructor, data, closed):
|
||||
# GH 19370
|
||||
if isinstance(data, IntervalIndex):
|
||||
tuples = data.to_tuples()
|
||||
else:
|
||||
tuples = [(iv.left, iv.right) if notna(iv) else iv for iv in data]
|
||||
expected = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = constructor(data, closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values_constructor", [list, np.array, IntervalIndex, IntervalArray]
|
||||
)
|
||||
def test_index_object_dtype(self, values_constructor):
|
||||
# Index(intervals, dtype=object) is an Index (not an IntervalIndex)
|
||||
intervals = [Interval(0, 1), Interval(1, 2), Interval(2, 3)]
|
||||
values = values_constructor(intervals)
|
||||
result = Index(values, dtype=object)
|
||||
|
||||
assert type(result) is Index
|
||||
tm.assert_numpy_array_equal(result.values, np.array(values))
|
||||
|
||||
def test_index_mixed_closed(self):
|
||||
# GH27172
|
||||
intervals = [
|
||||
Interval(0, 1, closed="left"),
|
||||
Interval(1, 2, closed="right"),
|
||||
Interval(2, 3, closed="neither"),
|
||||
Interval(3, 4, closed="both"),
|
||||
]
|
||||
result = Index(intervals)
|
||||
expected = Index(intervals, dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("timezone", ["UTC", "US/Pacific", "GMT"])
|
||||
def test_interval_index_subtype(timezone, inclusive_endpoints_fixture):
|
||||
# GH#46999
|
||||
dates = date_range("2022", periods=3, tz=timezone)
|
||||
dtype = f"interval[datetime64[ns, {timezone}], {inclusive_endpoints_fixture}]"
|
||||
result = IntervalIndex.from_arrays(
|
||||
["2022-01-01", "2022-01-02"],
|
||||
["2022-01-02", "2022-01-03"],
|
||||
closed=inclusive_endpoints_fixture,
|
||||
dtype=dtype,
|
||||
)
|
||||
expected = IntervalIndex.from_arrays(
|
||||
dates[:-1], dates[1:], closed=inclusive_endpoints_fixture
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_dtype_closed_mismatch():
|
||||
# GH#38394 closed specified in both dtype and IntervalIndex constructor
|
||||
|
||||
dtype = IntervalDtype(np.int64, "left")
|
||||
|
||||
msg = "closed keyword does not match dtype.closed"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex([], dtype=dtype, closed="neither")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalArray([], dtype=dtype, closed="neither")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
["Float64", pytest.param("float64[pyarrow]", marks=td.skip_if_no("pyarrow"))],
|
||||
)
|
||||
def test_ea_dtype(dtype):
|
||||
# GH#56765
|
||||
bins = [(0.0, 0.4), (0.4, 0.6)]
|
||||
interval_dtype = IntervalDtype(subtype=dtype, closed="left")
|
||||
result = IntervalIndex.from_tuples(bins, closed="left", dtype=interval_dtype)
|
||||
assert result.dtype == interval_dtype
|
||||
expected = IntervalIndex.from_tuples(bins, closed="left").astype(interval_dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,36 @@
|
||||
import numpy as np
|
||||
|
||||
from pandas import (
|
||||
IntervalIndex,
|
||||
date_range,
|
||||
)
|
||||
|
||||
|
||||
class TestEquals:
|
||||
def test_equals(self, closed):
|
||||
expected = IntervalIndex.from_breaks(np.arange(5), closed=closed)
|
||||
assert expected.equals(expected)
|
||||
assert expected.equals(expected.copy())
|
||||
|
||||
assert not expected.equals(expected.astype(object))
|
||||
assert not expected.equals(np.array(expected))
|
||||
assert not expected.equals(list(expected))
|
||||
|
||||
assert not expected.equals([1, 2])
|
||||
assert not expected.equals(np.array([1, 2]))
|
||||
assert not expected.equals(date_range("20130101", periods=2))
|
||||
|
||||
expected_name1 = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=closed, name="foo"
|
||||
)
|
||||
expected_name2 = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=closed, name="bar"
|
||||
)
|
||||
assert expected.equals(expected_name1)
|
||||
assert expected_name1.equals(expected_name2)
|
||||
|
||||
for other_closed in {"left", "right", "both", "neither"} - {closed}:
|
||||
expected_other_closed = IntervalIndex.from_breaks(
|
||||
np.arange(5), closed=other_closed
|
||||
)
|
||||
assert not expected.equals(expected_other_closed)
|
||||
@ -0,0 +1,119 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Series,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestIntervalIndexRendering:
|
||||
# TODO: this is a test for DataFrame/Series, not IntervalIndex
|
||||
@pytest.mark.parametrize(
|
||||
"constructor,expected",
|
||||
[
|
||||
(
|
||||
Series,
|
||||
(
|
||||
"(0.0, 1.0] a\n"
|
||||
"NaN b\n"
|
||||
"(2.0, 3.0] c\n"
|
||||
"dtype: object"
|
||||
),
|
||||
),
|
||||
(DataFrame, (" 0\n(0.0, 1.0] a\nNaN b\n(2.0, 3.0] c")),
|
||||
],
|
||||
)
|
||||
def test_repr_missing(self, constructor, expected, using_infer_string, request):
|
||||
# GH 25984
|
||||
if using_infer_string and constructor is Series:
|
||||
request.applymarker(pytest.mark.xfail(reason="repr different"))
|
||||
index = IntervalIndex.from_tuples([(0, 1), np.nan, (2, 3)])
|
||||
obj = constructor(list("abc"), index=index)
|
||||
result = repr(obj)
|
||||
assert result == expected
|
||||
|
||||
def test_repr_floats(self):
|
||||
# GH 32553
|
||||
|
||||
markers = Series(
|
||||
[1, 2],
|
||||
index=IntervalIndex(
|
||||
[
|
||||
Interval(left, right)
|
||||
for left, right in zip(
|
||||
Index([329.973, 345.137], dtype="float64"),
|
||||
Index([345.137, 360.191], dtype="float64"),
|
||||
)
|
||||
]
|
||||
),
|
||||
)
|
||||
result = str(markers)
|
||||
expected = "(329.973, 345.137] 1\n(345.137, 360.191] 2\ndtype: int64"
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"tuples, closed, expected_data",
|
||||
[
|
||||
([(0, 1), (1, 2), (2, 3)], "left", ["[0, 1)", "[1, 2)", "[2, 3)"]),
|
||||
(
|
||||
[(0.5, 1.0), np.nan, (2.0, 3.0)],
|
||||
"right",
|
||||
["(0.5, 1.0]", "NaN", "(2.0, 3.0]"],
|
||||
),
|
||||
(
|
||||
[
|
||||
(Timestamp("20180101"), Timestamp("20180102")),
|
||||
np.nan,
|
||||
((Timestamp("20180102"), Timestamp("20180103"))),
|
||||
],
|
||||
"both",
|
||||
[
|
||||
"[2018-01-01 00:00:00, 2018-01-02 00:00:00]",
|
||||
"NaN",
|
||||
"[2018-01-02 00:00:00, 2018-01-03 00:00:00]",
|
||||
],
|
||||
),
|
||||
(
|
||||
[
|
||||
(Timedelta("0 days"), Timedelta("1 days")),
|
||||
(Timedelta("1 days"), Timedelta("2 days")),
|
||||
np.nan,
|
||||
],
|
||||
"neither",
|
||||
[
|
||||
"(0 days 00:00:00, 1 days 00:00:00)",
|
||||
"(1 days 00:00:00, 2 days 00:00:00)",
|
||||
"NaN",
|
||||
],
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_get_values_for_csv(self, tuples, closed, expected_data):
|
||||
# GH 28210
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index._get_values_for_csv(na_rep="NaN")
|
||||
expected = np.array(expected_data)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_timestamp_with_timezone(self, unit):
|
||||
# GH 55035
|
||||
left = DatetimeIndex(["2020-01-01"], dtype=f"M8[{unit}, UTC]")
|
||||
right = DatetimeIndex(["2020-01-02"], dtype=f"M8[{unit}, UTC]")
|
||||
index = IntervalIndex.from_arrays(left, right)
|
||||
result = repr(index)
|
||||
expected = (
|
||||
"IntervalIndex([(2020-01-01 00:00:00+00:00, 2020-01-02 00:00:00+00:00]], "
|
||||
f"dtype='interval[datetime64[{unit}, UTC], right]')"
|
||||
)
|
||||
assert result == expected
|
||||
@ -0,0 +1,674 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import InvalidIndexError
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
CategoricalIndex,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
NaT,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
array,
|
||||
date_range,
|
||||
interval_range,
|
||||
isna,
|
||||
period_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetItem:
|
||||
def test_getitem(self, closed):
|
||||
idx = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed)
|
||||
assert idx[0] == Interval(0.0, 1.0, closed=closed)
|
||||
assert idx[1] == Interval(1.0, 2.0, closed=closed)
|
||||
assert isna(idx[2])
|
||||
|
||||
result = idx[0:1]
|
||||
expected = IntervalIndex.from_arrays((0.0,), (1.0,), closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx[0:2]
|
||||
expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = idx[1:3]
|
||||
expected = IntervalIndex.from_arrays(
|
||||
(1.0, np.nan), (2.0, np.nan), closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_getitem_2d_deprecated(self):
|
||||
# GH#30588 multi-dim indexing is deprecated, but raising is also acceptable
|
||||
idx = IntervalIndex.from_breaks(range(11), closed="right")
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
idx[:, None]
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
# GH#44051
|
||||
idx[True]
|
||||
with pytest.raises(ValueError, match="multi-dimensional indexing not allowed"):
|
||||
# GH#44051
|
||||
idx[False]
|
||||
|
||||
|
||||
class TestWhere:
|
||||
def test_where(self, listlike_box):
|
||||
klass = listlike_box
|
||||
|
||||
idx = IntervalIndex.from_breaks(range(11), closed="right")
|
||||
cond = [True] * len(idx)
|
||||
expected = idx
|
||||
result = expected.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
cond = [False] + [True] * len(idx[1:])
|
||||
expected = IntervalIndex([np.nan] + idx[1:].tolist())
|
||||
result = idx.where(klass(cond))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestTake:
|
||||
def test_take(self, closed):
|
||||
index = IntervalIndex.from_breaks(range(11), closed=closed)
|
||||
|
||||
result = index.take(range(10))
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.take([0, 0, 1])
|
||||
expected = IntervalIndex.from_arrays([0, 0, 1], [1, 1, 2], closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestGetLoc:
|
||||
@pytest.mark.parametrize("side", ["right", "left", "both", "neither"])
|
||||
def test_get_loc_interval(self, closed, side):
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
|
||||
for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]:
|
||||
# if get_loc is supplied an interval, it should only search
|
||||
# for exact matches, not overlaps or covers, else KeyError.
|
||||
msg = re.escape(f"Interval({bound[0]}, {bound[1]}, closed='{side}')")
|
||||
if closed == side:
|
||||
if bound == [0, 1]:
|
||||
assert idx.get_loc(Interval(0, 1, closed=side)) == 0
|
||||
elif bound == [2, 3]:
|
||||
assert idx.get_loc(Interval(2, 3, closed=side)) == 1
|
||||
else:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx.get_loc(Interval(*bound, closed=side))
|
||||
else:
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
idx.get_loc(Interval(*bound, closed=side))
|
||||
|
||||
@pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5])
|
||||
def test_get_loc_scalar(self, closed, scalar):
|
||||
# correct = {side: {query: answer}}.
|
||||
# If query is not in the dict, that query should raise a KeyError
|
||||
correct = {
|
||||
"right": {0.5: 0, 1: 0, 2.5: 1, 3: 1},
|
||||
"left": {0: 0, 0.5: 0, 2: 1, 2.5: 1},
|
||||
"both": {0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1},
|
||||
"neither": {0.5: 0, 2.5: 1},
|
||||
}
|
||||
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
|
||||
# if get_loc is supplied a scalar, it should return the index of
|
||||
# the interval which contains the scalar, or KeyError.
|
||||
if scalar in correct[closed].keys():
|
||||
assert idx.get_loc(scalar) == correct[closed][scalar]
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(scalar)):
|
||||
idx.get_loc(scalar)
|
||||
|
||||
@pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6])
|
||||
def test_get_loc_length_one_scalar(self, scalar, closed):
|
||||
# GH 20921
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
if scalar in index[0]:
|
||||
result = index.get_loc(scalar)
|
||||
assert result == 0
|
||||
else:
|
||||
with pytest.raises(KeyError, match=str(scalar)):
|
||||
index.get_loc(scalar)
|
||||
|
||||
@pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"])
|
||||
@pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)])
|
||||
def test_get_loc_length_one_interval(self, left, right, closed, other_closed):
|
||||
# GH 20921
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
interval = Interval(left, right, closed=other_closed)
|
||||
if interval == index[0]:
|
||||
result = index.get_loc(interval)
|
||||
assert result == 0
|
||||
else:
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(f"Interval({left}, {right}, closed='{other_closed}')"),
|
||||
):
|
||||
index.get_loc(interval)
|
||||
|
||||
# Make consistent with test_interval_new.py (see #16316, #16386)
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_get_loc_datetimelike_nonoverlapping(self, breaks):
|
||||
# GH 20636
|
||||
# nonoverlapping = IntervalIndex method and no i8 conversion
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
value = index[0].mid
|
||||
result = index.get_loc(value)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
interval = Interval(index[0].left, index[0].right)
|
||||
result = index.get_loc(interval)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arrays",
|
||||
[
|
||||
(date_range("20180101", periods=4), date_range("20180103", periods=4)),
|
||||
(
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
date_range("20180103", periods=4, tz="US/Eastern"),
|
||||
),
|
||||
(
|
||||
timedelta_range("0 days", periods=4),
|
||||
timedelta_range("2 days", periods=4),
|
||||
),
|
||||
],
|
||||
ids=lambda x: str(x[0].dtype),
|
||||
)
|
||||
def test_get_loc_datetimelike_overlapping(self, arrays):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_arrays(*arrays)
|
||||
|
||||
value = index[0].mid + Timedelta("12 hours")
|
||||
result = index.get_loc(value)
|
||||
expected = slice(0, 2, None)
|
||||
assert result == expected
|
||||
|
||||
interval = Interval(index[0].left, index[0].right)
|
||||
result = index.get_loc(interval)
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"values",
|
||||
[
|
||||
date_range("2018-01-04", periods=4, freq="-1D"),
|
||||
date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"),
|
||||
timedelta_range("3 days", periods=4, freq="-1D"),
|
||||
np.arange(3.0, -1.0, -1.0),
|
||||
np.arange(3, -1, -1),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_get_loc_decreasing(self, values):
|
||||
# GH 25860
|
||||
index = IntervalIndex.from_arrays(values[1:], values[:-1])
|
||||
result = index.get_loc(index[0])
|
||||
expected = 0
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("key", [[5], (2, 3)])
|
||||
def test_get_loc_non_scalar_errors(self, key):
|
||||
# GH 31117
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)])
|
||||
|
||||
msg = str(key)
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
idx.get_loc(key)
|
||||
|
||||
def test_get_indexer_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, Interval(1, 2), np.nan])
|
||||
|
||||
expected = np.array([True, False, True])
|
||||
for key in [None, np.nan, NA]:
|
||||
assert key in index
|
||||
result = index.get_loc(key)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for key in [NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns")]:
|
||||
with pytest.raises(KeyError, match=str(key)):
|
||||
index.get_loc(key)
|
||||
|
||||
|
||||
class TestGetIndexer:
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([Interval(2, 4, closed="right")], [1]),
|
||||
([Interval(2, 4, closed="left")], [-1]),
|
||||
([Interval(2, 4, closed="both")], [-1]),
|
||||
([Interval(2, 4, closed="neither")], [-1]),
|
||||
([Interval(1, 4, closed="right")], [-1]),
|
||||
([Interval(0, 4, closed="right")], [-1]),
|
||||
([Interval(0.5, 1.5, closed="right")], [-1]),
|
||||
([Interval(2, 4, closed="right"), Interval(0, 1, closed="right")], [1, -1]),
|
||||
([Interval(2, 4, closed="right"), Interval(2, 4, closed="right")], [1, 1]),
|
||||
([Interval(5, 7, closed="right"), Interval(2, 4, closed="right")], [2, 1]),
|
||||
([Interval(2, 4, closed="right"), Interval(2, 4, closed="left")], [1, -1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_interval(self, query, expected):
|
||||
tuples = [(0, 2), (2, 4), (5, 7)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="right")
|
||||
|
||||
result = index.get_indexer(query)
|
||||
expected = np.array(expected, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([-0.5], [-1]),
|
||||
([0], [-1]),
|
||||
([0.5], [0]),
|
||||
([1], [0]),
|
||||
([1.5], [1]),
|
||||
([2], [1]),
|
||||
([2.5], [-1]),
|
||||
([3], [-1]),
|
||||
([3.5], [2]),
|
||||
([4], [2]),
|
||||
([4.5], [-1]),
|
||||
([1, 2], [0, 1]),
|
||||
([1, 2, 3], [0, 1, -1]),
|
||||
([1, 2, 3, 4], [0, 1, -1, 2]),
|
||||
([1, 2, 3, 4, 2], [0, 1, -1, 2, 1]),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_with_int_and_float(self, query, expected):
|
||||
tuples = [(0, 1), (1, 2), (3, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="right")
|
||||
|
||||
result = index.get_indexer(query)
|
||||
expected = np.array(expected, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("item", [[3], np.arange(0.5, 5, 0.5)])
|
||||
def test_get_indexer_length_one(self, item, closed):
|
||||
# GH 17284
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
result = index.get_indexer(item)
|
||||
expected = np.array([0] * len(item), dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("size", [1, 5])
|
||||
def test_get_indexer_length_one_interval(self, size, closed):
|
||||
# GH 17284
|
||||
index = IntervalIndex.from_tuples([(0, 5)], closed=closed)
|
||||
result = index.get_indexer([Interval(0, 5, closed)] * size)
|
||||
expected = np.array([0] * size, dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"target",
|
||||
[
|
||||
IntervalIndex.from_tuples([(7, 8), (1, 2), (3, 4), (0, 1)]),
|
||||
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4), np.nan]),
|
||||
IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)], closed="both"),
|
||||
[-1, 0, 0.5, 1, 2, 2.5, np.nan],
|
||||
["foo", "foo", "bar", "baz"],
|
||||
],
|
||||
)
|
||||
def test_get_indexer_categorical(self, target, ordered):
|
||||
# GH 30063: categorical and non-categorical results should be consistent
|
||||
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
||||
categorical_target = CategoricalIndex(target, ordered=ordered)
|
||||
|
||||
result = index.get_indexer(categorical_target)
|
||||
expected = index.get_indexer(target)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings(
|
||||
"ignore:invalid value encountered in cast:RuntimeWarning"
|
||||
)
|
||||
def test_get_indexer_categorical_with_nans(self):
|
||||
# GH#41934 nans in both index and in target
|
||||
ii = IntervalIndex.from_breaks(range(5))
|
||||
ii2 = ii.append(IntervalIndex([np.nan]))
|
||||
ci2 = CategoricalIndex(ii2)
|
||||
|
||||
result = ii2.get_indexer(ci2)
|
||||
expected = np.arange(5, dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# not-all-matches
|
||||
result = ii2[1:].get_indexer(ci2[::-1])
|
||||
expected = np.array([3, 2, 1, 0, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# non-unique target, non-unique nans
|
||||
result = ii2.get_indexer(ci2.append(ci2))
|
||||
expected = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_datetime(self):
|
||||
ii = IntervalIndex.from_breaks(date_range("2018-01-01", periods=4))
|
||||
# TODO: with mismatched resolution get_indexer currently raises;
|
||||
# this should probably coerce?
|
||||
target = DatetimeIndex(["2018-01-02"], dtype="M8[ns]")
|
||||
result = ii.get_indexer(target)
|
||||
expected = np.array([0], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = ii.get_indexer(target.astype(str))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# https://github.com/pandas-dev/pandas/issues/47772
|
||||
result = ii.get_indexer(target.asi8)
|
||||
expected = np.array([-1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples, closed",
|
||||
[
|
||||
([(0, 2), (1, 3), (3, 4)], "neither"),
|
||||
([(0, 5), (1, 4), (6, 7)], "left"),
|
||||
([(0, 1), (0, 1), (1, 2)], "right"),
|
||||
([(0, 1), (2, 3), (3, 4)], "both"),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_errors(self, tuples, closed):
|
||||
# IntervalIndex needs non-overlapping for uniqueness when querying
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
|
||||
msg = (
|
||||
"cannot handle overlapping indices; use "
|
||||
"IntervalIndex.get_indexer_non_unique"
|
||||
)
|
||||
with pytest.raises(InvalidIndexError, match=msg):
|
||||
index.get_indexer([0, 2])
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"query, expected",
|
||||
[
|
||||
([-0.5], ([-1], [0])),
|
||||
([0], ([0], [])),
|
||||
([0.5], ([0], [])),
|
||||
([1], ([0, 1], [])),
|
||||
([1.5], ([0, 1], [])),
|
||||
([2], ([0, 1, 2], [])),
|
||||
([2.5], ([1, 2], [])),
|
||||
([3], ([2], [])),
|
||||
([3.5], ([2], [])),
|
||||
([4], ([-1], [0])),
|
||||
([4.5], ([-1], [0])),
|
||||
([1, 2], ([0, 1, 0, 1, 2], [])),
|
||||
([1, 2, 3], ([0, 1, 0, 1, 2, 2], [])),
|
||||
([1, 2, 3, 4], ([0, 1, 0, 1, 2, 2, -1], [3])),
|
||||
([1, 2, 3, 4, 2], ([0, 1, 0, 1, 2, 2, -1, 0, 1, 2], [3])),
|
||||
],
|
||||
)
|
||||
def test_get_indexer_non_unique_with_int_and_float(self, query, expected):
|
||||
tuples = [(0, 2.5), (1, 3), (2, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed="left")
|
||||
|
||||
result_indexer, result_missing = index.get_indexer_non_unique(query)
|
||||
expected_indexer = np.array(expected[0], dtype="intp")
|
||||
expected_missing = np.array(expected[1], dtype="intp")
|
||||
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
||||
|
||||
# TODO we may also want to test get_indexer for the case when
|
||||
# the intervals are duplicated, decreasing, non-monotonic, etc..
|
||||
|
||||
def test_get_indexer_non_monotonic(self):
|
||||
# GH 16410
|
||||
idx1 = IntervalIndex.from_tuples([(2, 3), (4, 5), (0, 1)])
|
||||
idx2 = IntervalIndex.from_tuples([(0, 1), (2, 3), (6, 7), (8, 9)])
|
||||
result = idx1.get_indexer(idx2)
|
||||
expected = np.array([2, 0, -1, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = idx1.get_indexer(idx1[1:])
|
||||
expected = np.array([1, 2], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
other = IntervalIndex([np.nan])
|
||||
|
||||
assert not index._index_as_unique
|
||||
|
||||
result = index.get_indexer_for(other)
|
||||
expected = np.array([0, 1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_index_non_unique_non_monotonic(self):
|
||||
# GH#44084 (root cause)
|
||||
index = IntervalIndex.from_tuples(
|
||||
[(0.0, 1.0), (1.0, 2.0), (0.0, 1.0), (1.0, 2.0)]
|
||||
)
|
||||
|
||||
result, _ = index.get_indexer_non_unique([Interval(1.0, 2.0)])
|
||||
expected = np.array([1, 3], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_multiindex_with_intervals(self):
|
||||
# GH#44084 (MultiIndex case as reported)
|
||||
interval_index = IntervalIndex.from_tuples(
|
||||
[(2.0, 3.0), (0.0, 1.0), (1.0, 2.0)], name="interval"
|
||||
)
|
||||
foo_index = Index([1, 2, 3], name="foo")
|
||||
|
||||
multi_index = MultiIndex.from_product([foo_index, interval_index])
|
||||
|
||||
result = multi_index.get_level_values("interval").get_indexer_for(
|
||||
[Interval(0.0, 1.0)]
|
||||
)
|
||||
expected = np.array([1, 4, 7], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("box", [IntervalIndex, array, list])
|
||||
def test_get_indexer_interval_index(self, box):
|
||||
# GH#30178
|
||||
rng = period_range("2022-07-01", freq="D", periods=3)
|
||||
idx = box(interval_range(Timestamp("2022-07-01"), freq="3D", periods=3))
|
||||
|
||||
actual = rng.get_indexer(idx)
|
||||
expected = np.array([-1, -1, -1], dtype=np.intp)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
def test_get_indexer_read_only(self):
|
||||
idx = interval_range(start=0, end=5)
|
||||
arr = np.array([1, 2])
|
||||
arr.flags.writeable = False
|
||||
result = idx.get_indexer(arr)
|
||||
expected = np.array([0, 1])
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
result = idx.get_indexer_non_unique(arr)[0]
|
||||
tm.assert_numpy_array_equal(result, expected, check_dtype=False)
|
||||
|
||||
|
||||
class TestSliceLocs:
|
||||
def test_slice_locs_with_interval(self):
|
||||
# increasing monotonically
|
||||
index = IntervalIndex.from_tuples([(0, 2), (1, 3), (2, 4)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 1)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 1)
|
||||
|
||||
# decreasing monotonically
|
||||
index = IntervalIndex.from_tuples([(2, 4), (1, 3), (0, 2)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (2, 1)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (2, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 1)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (0, 3)
|
||||
|
||||
# sorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
||||
|
||||
# unsorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (2, 4), (0, 2)])
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get left slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(0, 2), end=Interval(2, 4))
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get left slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(0, 2))
|
||||
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 2)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get right slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(end=Interval(0, 2))
|
||||
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=re.escape(
|
||||
'"Cannot get right slice bound for non-unique label: '
|
||||
"Interval(0, 2, closed='right')\""
|
||||
),
|
||||
):
|
||||
index.slice_locs(start=Interval(2, 4), end=Interval(0, 2))
|
||||
|
||||
# another unsorted duplicates
|
||||
index = IntervalIndex.from_tuples([(0, 2), (0, 2), (2, 4), (1, 3)])
|
||||
|
||||
assert index.slice_locs(start=Interval(0, 2), end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(start=Interval(0, 2)) == (0, 4)
|
||||
assert index.slice_locs(end=Interval(2, 4)) == (0, 3)
|
||||
assert index.slice_locs(end=Interval(0, 2)) == (0, 2)
|
||||
assert index.slice_locs(start=Interval(2, 4), end=Interval(0, 2)) == (2, 2)
|
||||
|
||||
def test_slice_locs_with_ints_and_floats_succeeds(self):
|
||||
# increasing non-overlapping
|
||||
index = IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)])
|
||||
|
||||
assert index.slice_locs(0, 1) == (0, 1)
|
||||
assert index.slice_locs(0, 2) == (0, 2)
|
||||
assert index.slice_locs(0, 3) == (0, 2)
|
||||
assert index.slice_locs(3, 1) == (2, 1)
|
||||
assert index.slice_locs(3, 4) == (2, 3)
|
||||
assert index.slice_locs(0, 4) == (0, 3)
|
||||
|
||||
# decreasing non-overlapping
|
||||
index = IntervalIndex.from_tuples([(3, 4), (1, 2), (0, 1)])
|
||||
assert index.slice_locs(0, 1) == (3, 3)
|
||||
assert index.slice_locs(0, 2) == (3, 2)
|
||||
assert index.slice_locs(0, 3) == (3, 1)
|
||||
assert index.slice_locs(3, 1) == (1, 3)
|
||||
assert index.slice_locs(3, 4) == (1, 1)
|
||||
assert index.slice_locs(0, 4) == (3, 1)
|
||||
|
||||
@pytest.mark.parametrize("query", [[0, 1], [0, 2], [0, 3], [0, 4]])
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
[(0, 2), (1, 3), (2, 4)],
|
||||
[(2, 4), (1, 3), (0, 2)],
|
||||
[(0, 2), (0, 2), (2, 4)],
|
||||
[(0, 2), (2, 4), (0, 2)],
|
||||
[(0, 2), (0, 2), (2, 4), (1, 3)],
|
||||
],
|
||||
)
|
||||
def test_slice_locs_with_ints_and_floats_errors(self, tuples, query):
|
||||
start, stop = query
|
||||
index = IntervalIndex.from_tuples(tuples)
|
||||
with pytest.raises(
|
||||
KeyError,
|
||||
match=(
|
||||
"'can only get slices from an IntervalIndex if bounds are "
|
||||
"non-overlapping and all monotonic increasing or decreasing'"
|
||||
),
|
||||
):
|
||||
index.slice_locs(start, stop)
|
||||
|
||||
|
||||
class TestPutmask:
|
||||
@pytest.mark.parametrize("tz", ["US/Pacific", None])
|
||||
def test_putmask_dt64(self, tz):
|
||||
# GH#37968
|
||||
dti = date_range("2016-01-01", periods=9, tz=tz)
|
||||
idx = IntervalIndex.from_breaks(dti)
|
||||
mask = np.zeros(idx.shape, dtype=bool)
|
||||
mask[0:3] = True
|
||||
|
||||
result = idx.putmask(mask, idx[-1])
|
||||
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_putmask_td64(self):
|
||||
# GH#37968
|
||||
dti = date_range("2016-01-01", periods=9)
|
||||
tdi = dti - dti[0]
|
||||
idx = IntervalIndex.from_breaks(tdi)
|
||||
mask = np.zeros(idx.shape, dtype=bool)
|
||||
mask[0:3] = True
|
||||
|
||||
result = idx.putmask(mask, idx[-1])
|
||||
expected = IntervalIndex([idx[-1]] * 3 + list(idx[3:]))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
class TestContains:
|
||||
# .__contains__, not .contains
|
||||
|
||||
def test_contains_dunder(self):
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right")
|
||||
|
||||
# __contains__ requires perfect matches to intervals.
|
||||
assert 0 not in index
|
||||
assert 1 not in index
|
||||
assert 2 not in index
|
||||
|
||||
assert Interval(0, 1, closed="right") in index
|
||||
assert Interval(0, 2, closed="right") not in index
|
||||
assert Interval(0, 0.5, closed="right") not in index
|
||||
assert Interval(3, 5, closed="right") not in index
|
||||
assert Interval(-1, 0, closed="left") not in index
|
||||
assert Interval(0, 1, closed="left") not in index
|
||||
assert Interval(0, 1, closed="both") not in index
|
||||
@ -0,0 +1,918 @@
|
||||
from itertools import permutations
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
interval_range,
|
||||
isna,
|
||||
notna,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
import pandas.core.common as com
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
|
||||
def create_index(self, closed="right"):
|
||||
return IntervalIndex.from_breaks(range(11), closed=closed)
|
||||
|
||||
def create_index_with_nan(self, closed="right"):
|
||||
mask = [True, False] + [True] * 8
|
||||
return IntervalIndex.from_arrays(
|
||||
np.where(mask, np.arange(10), np.nan),
|
||||
np.where(mask, np.arange(1, 11), np.nan),
|
||||
closed=closed,
|
||||
)
|
||||
|
||||
def test_properties(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
assert len(index) == 10
|
||||
assert index.size == 10
|
||||
assert index.shape == (10,)
|
||||
|
||||
tm.assert_index_equal(index.left, Index(np.arange(10, dtype=np.int64)))
|
||||
tm.assert_index_equal(index.right, Index(np.arange(1, 11, dtype=np.int64)))
|
||||
tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5, dtype=np.float64)))
|
||||
|
||||
assert index.closed == closed
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed)
|
||||
for left, right in zip(range(10), range(1, 11))
|
||||
]
|
||||
expected = np.array(ivs, dtype=object)
|
||||
tm.assert_numpy_array_equal(np.asarray(index), expected)
|
||||
|
||||
# with nans
|
||||
index = self.create_index_with_nan(closed=closed)
|
||||
assert len(index) == 10
|
||||
assert index.size == 10
|
||||
assert index.shape == (10,)
|
||||
|
||||
expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9])
|
||||
expected_right = expected_left + 1
|
||||
expected_mid = expected_left + 0.5
|
||||
tm.assert_index_equal(index.left, expected_left)
|
||||
tm.assert_index_equal(index.right, expected_right)
|
||||
tm.assert_index_equal(index.mid, expected_mid)
|
||||
|
||||
assert index.closed == closed
|
||||
|
||||
ivs = [
|
||||
Interval(left, right, closed) if notna(left) else np.nan
|
||||
for left, right in zip(expected_left, expected_right)
|
||||
]
|
||||
expected = np.array(ivs, dtype=object)
|
||||
tm.assert_numpy_array_equal(np.asarray(index), expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
[1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608],
|
||||
[-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf],
|
||||
date_range("2017-01-01", "2017-01-04"),
|
||||
pytest.param(
|
||||
date_range("2017-01-01", "2017-01-04", unit="s"),
|
||||
marks=pytest.mark.xfail(reason="mismatched result unit"),
|
||||
),
|
||||
pd.to_timedelta(["1ns", "2ms", "3s", "4min", "5h", "6D"]),
|
||||
],
|
||||
)
|
||||
def test_length(self, closed, breaks):
|
||||
# GH 18789
|
||||
index = IntervalIndex.from_breaks(breaks, closed=closed)
|
||||
result = index.length
|
||||
expected = Index(iv.length for iv in index)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# with NA
|
||||
index = index.insert(1, np.nan)
|
||||
result = index.length
|
||||
expected = Index(iv.length if notna(iv) else iv for iv in index)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_with_nans(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
assert index.hasnans is False
|
||||
|
||||
result = index.isna()
|
||||
expected = np.zeros(len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.notna()
|
||||
expected = np.ones(len(index), dtype=bool)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
index = self.create_index_with_nan(closed=closed)
|
||||
assert index.hasnans is True
|
||||
|
||||
result = index.isna()
|
||||
expected = np.array([False, True] + [False] * (len(index) - 2))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.notna()
|
||||
expected = np.array([True, False] + [True] * (len(index) - 2))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_copy(self, closed):
|
||||
expected = self.create_index(closed=closed)
|
||||
|
||||
result = expected.copy()
|
||||
assert result.equals(expected)
|
||||
|
||||
result = expected.copy(deep=True)
|
||||
assert result.equals(expected)
|
||||
assert result.left is not expected.left
|
||||
|
||||
def test_ensure_copied_data(self, closed):
|
||||
# exercise the copy flag in the constructor
|
||||
|
||||
# not copying
|
||||
index = self.create_index(closed=closed)
|
||||
result = IntervalIndex(index, copy=False)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.left.values, result.left.values, check_same="same"
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.right.values, result.right.values, check_same="same"
|
||||
)
|
||||
|
||||
# by-definition make a copy
|
||||
result = IntervalIndex(np.array(index), copy=False)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.left.values, result.left.values, check_same="copy"
|
||||
)
|
||||
tm.assert_numpy_array_equal(
|
||||
index.right.values, result.right.values, check_same="copy"
|
||||
)
|
||||
|
||||
def test_delete(self, closed):
|
||||
breaks = np.arange(1, 11, dtype=np.int64)
|
||||
expected = IntervalIndex.from_breaks(breaks, closed=closed)
|
||||
result = self.create_index(closed=closed).delete(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
interval_range(0, periods=10, closed="neither"),
|
||||
interval_range(1.7, periods=8, freq=2.5, closed="both"),
|
||||
interval_range(Timestamp("20170101"), periods=12, closed="left"),
|
||||
interval_range(Timedelta("1 day"), periods=6, closed="right"),
|
||||
],
|
||||
)
|
||||
def test_insert(self, data):
|
||||
item = data[0]
|
||||
idx_item = IntervalIndex([item])
|
||||
|
||||
# start
|
||||
expected = idx_item.append(data)
|
||||
result = data.insert(0, item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# end
|
||||
expected = data.append(idx_item)
|
||||
result = data.insert(len(data), item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# mid
|
||||
expected = data[:3].append(idx_item).append(data[3:])
|
||||
result = data.insert(3, item)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# invalid type
|
||||
res = data.insert(1, "foo")
|
||||
expected = data.astype(object).insert(1, "foo")
|
||||
tm.assert_index_equal(res, expected)
|
||||
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data._data.insert(1, "foo")
|
||||
|
||||
# invalid closed
|
||||
msg = "'value.closed' is 'left', expected 'right'."
|
||||
for closed in {"left", "right", "both", "neither"} - {item.closed}:
|
||||
msg = f"'value.closed' is '{closed}', expected '{item.closed}'."
|
||||
bad_item = Interval(item.left, item.right, closed=closed)
|
||||
res = data.insert(1, bad_item)
|
||||
expected = data.astype(object).insert(1, bad_item)
|
||||
tm.assert_index_equal(res, expected)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
data._data.insert(1, bad_item)
|
||||
|
||||
# GH 18295 (test missing)
|
||||
na_idx = IntervalIndex([np.nan], closed=data.closed)
|
||||
for na in [np.nan, None, pd.NA]:
|
||||
expected = data[:1].append(na_idx).append(data[1:])
|
||||
result = data.insert(1, na)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
if data.left.dtype.kind not in ["m", "M"]:
|
||||
# trying to insert pd.NaT into a numeric-dtyped Index should cast
|
||||
expected = data.astype(object).insert(1, pd.NaT)
|
||||
|
||||
msg = "can only insert Interval objects and NA into an IntervalArray"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
data._data.insert(1, pd.NaT)
|
||||
|
||||
result = data.insert(1, pd.NaT)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_is_unique_interval(self, closed):
|
||||
"""
|
||||
Interval specific tests for is_unique in addition to base class tests
|
||||
"""
|
||||
# unique overlapping - distinct endpoints
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique overlapping - shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique nested
|
||||
idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# unique NaN
|
||||
idx = IntervalIndex.from_tuples([(np.nan, np.nan)], closed=closed)
|
||||
assert idx.is_unique is True
|
||||
|
||||
# non-unique NaN
|
||||
idx = IntervalIndex.from_tuples(
|
||||
[(np.nan, np.nan), (np.nan, np.nan)], closed=closed
|
||||
)
|
||||
assert idx.is_unique is False
|
||||
|
||||
def test_monotonic(self, closed):
|
||||
# increasing non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# unordered non-overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# increasing overlapping
|
||||
idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing overlapping
|
||||
idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# unordered overlapping
|
||||
idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# increasing overlapping shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is False
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# decreasing overlapping shared endpoints
|
||||
idx = IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is False
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
# stationary
|
||||
idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is False
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is False
|
||||
|
||||
# empty
|
||||
idx = IntervalIndex([], closed=closed)
|
||||
assert idx.is_monotonic_increasing is True
|
||||
assert idx._is_strictly_monotonic_increasing is True
|
||||
assert idx.is_monotonic_decreasing is True
|
||||
assert idx._is_strictly_monotonic_decreasing is True
|
||||
|
||||
def test_is_monotonic_with_nans(self):
|
||||
# GH#41831
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
|
||||
assert not index.is_monotonic_increasing
|
||||
assert not index._is_strictly_monotonic_increasing
|
||||
assert not index.is_monotonic_increasing
|
||||
assert not index._is_strictly_monotonic_decreasing
|
||||
assert not index.is_monotonic_decreasing
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
def test_maybe_convert_i8(self, breaks):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
# intervalindex
|
||||
result = index._maybe_convert_i8(index)
|
||||
expected = IntervalIndex.from_breaks(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# interval
|
||||
interval = Interval(breaks[0], breaks[1])
|
||||
result = index._maybe_convert_i8(interval)
|
||||
expected = Interval(breaks[0]._value, breaks[1]._value)
|
||||
assert result == expected
|
||||
|
||||
# datetimelike index
|
||||
result = index._maybe_convert_i8(breaks)
|
||||
expected = Index(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# datetimelike scalar
|
||||
result = index._maybe_convert_i8(breaks[0])
|
||||
expected = breaks[0]._value
|
||||
assert result == expected
|
||||
|
||||
# list-like of datetimelike scalars
|
||||
result = index._maybe_convert_i8(list(breaks))
|
||||
expected = Index(breaks.asi8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks",
|
||||
[date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5)],
|
||||
)
|
||||
def test_maybe_convert_i8_nat(self, breaks):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
to_convert = breaks._constructor([pd.NaT] * 3).as_unit("ns")
|
||||
expected = Index([np.nan] * 3, dtype=np.float64)
|
||||
result = index._maybe_convert_i8(to_convert)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
to_convert = to_convert.insert(0, breaks[0])
|
||||
expected = expected.insert(0, float(breaks[0]._value))
|
||||
result = index._maybe_convert_i8(to_convert)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"make_key",
|
||||
[lambda breaks: breaks, list],
|
||||
ids=["lambda", "list"],
|
||||
)
|
||||
def test_maybe_convert_i8_numeric(self, make_key, any_real_numpy_dtype):
|
||||
# GH 20636
|
||||
breaks = np.arange(5, dtype=any_real_numpy_dtype)
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
key = make_key(breaks)
|
||||
|
||||
result = index._maybe_convert_i8(key)
|
||||
kind = breaks.dtype.kind
|
||||
expected_dtype = {"i": np.int64, "u": np.uint64, "f": np.float64}[kind]
|
||||
expected = Index(key, dtype=expected_dtype)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"make_key",
|
||||
[
|
||||
IntervalIndex.from_breaks,
|
||||
lambda breaks: Interval(breaks[0], breaks[1]),
|
||||
lambda breaks: breaks[0],
|
||||
],
|
||||
ids=["IntervalIndex", "Interval", "scalar"],
|
||||
)
|
||||
def test_maybe_convert_i8_numeric_identical(self, make_key, any_real_numpy_dtype):
|
||||
# GH 20636
|
||||
breaks = np.arange(5, dtype=any_real_numpy_dtype)
|
||||
index = IntervalIndex.from_breaks(breaks)
|
||||
key = make_key(breaks)
|
||||
|
||||
# test if _maybe_convert_i8 won't change key if an Interval or IntervalIndex
|
||||
result = index._maybe_convert_i8(key)
|
||||
assert result is key
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"breaks1, breaks2",
|
||||
permutations(
|
||||
[
|
||||
date_range("20180101", periods=4),
|
||||
date_range("20180101", periods=4, tz="US/Eastern"),
|
||||
timedelta_range("0 days", periods=4),
|
||||
],
|
||||
2,
|
||||
),
|
||||
ids=lambda x: str(x.dtype),
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"make_key",
|
||||
[
|
||||
IntervalIndex.from_breaks,
|
||||
lambda breaks: Interval(breaks[0], breaks[1]),
|
||||
lambda breaks: breaks,
|
||||
lambda breaks: breaks[0],
|
||||
list,
|
||||
],
|
||||
ids=["IntervalIndex", "Interval", "Index", "scalar", "list"],
|
||||
)
|
||||
def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key):
|
||||
# GH 20636
|
||||
index = IntervalIndex.from_breaks(breaks1)
|
||||
key = make_key(breaks2)
|
||||
|
||||
msg = (
|
||||
f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with "
|
||||
f"values of dtype {breaks2.dtype}"
|
||||
)
|
||||
msg = re.escape(msg)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index._maybe_convert_i8(key)
|
||||
|
||||
def test_contains_method(self):
|
||||
# can select values that are IN the range of a value
|
||||
i = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
|
||||
expected = np.array([False, False], dtype="bool")
|
||||
actual = i.contains(0)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = i.contains(3)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
expected = np.array([True, False], dtype="bool")
|
||||
actual = i.contains(0.5)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = i.contains(1)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
# __contains__ not implemented for "interval in interval", follow
|
||||
# that for the contains method for now
|
||||
with pytest.raises(
|
||||
NotImplementedError, match="contains not implemented for two"
|
||||
):
|
||||
i.contains(Interval(0, 1))
|
||||
|
||||
def test_dropna(self, closed):
|
||||
expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed)
|
||||
|
||||
ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed)
|
||||
result = ii.dropna()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed)
|
||||
result = ii.dropna()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_non_contiguous(self, closed):
|
||||
index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed)
|
||||
target = [0.5, 1.5, 2.5]
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([0, -1, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
assert 1.5 not in index
|
||||
|
||||
def test_isin(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
|
||||
expected = np.array([True] + [False] * (len(index) - 1))
|
||||
result = index.isin(index[:1])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin([index[0]])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed)
|
||||
expected = np.array([True] * (len(index) - 1) + [False])
|
||||
result = index.isin(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin(other.tolist())
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
for other_closed in ["right", "left", "both", "neither"]:
|
||||
other = self.create_index(closed=other_closed)
|
||||
expected = np.repeat(closed == other_closed, len(index))
|
||||
result = index.isin(other)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = index.isin(other.tolist())
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_comparison(self):
|
||||
actual = Interval(0, 1) < self.index
|
||||
expected = np.array([False, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = Interval(0.5, 1.5) < self.index
|
||||
expected = np.array([False, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index > Interval(0.5, 1.5)
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == self.index
|
||||
expected = np.array([True, True])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index <= self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index >= self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index < self.index
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
actual = self.index > self.index
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
actual = self.index == self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index.values == self.index
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index <= self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([True, True]))
|
||||
actual = self.index != self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index > self.index.values
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index.values > self.index
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
|
||||
# invalid comparisons
|
||||
actual = self.index == 0
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
actual = self.index == self.index.left
|
||||
tm.assert_numpy_array_equal(actual, np.array([False, False]))
|
||||
|
||||
msg = "|".join(
|
||||
[
|
||||
"not supported between instances of 'int' and '.*.Interval'",
|
||||
r"Invalid comparison between dtype=interval\[int64, right\] and ",
|
||||
]
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index > 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index <= 0
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
self.index > np.arange(2)
|
||||
|
||||
msg = "Lengths must match to compare"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
self.index > np.arange(3)
|
||||
|
||||
def test_missing_values(self, closed):
|
||||
idx = Index(
|
||||
[np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed)]
|
||||
)
|
||||
idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed)
|
||||
assert idx.equals(idx2)
|
||||
|
||||
msg = (
|
||||
"missing values must be missing in the same location both left "
|
||||
"and right sides"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
IntervalIndex.from_arrays(
|
||||
[np.nan, 0, 1], np.array([0, 1, 2]), closed=closed
|
||||
)
|
||||
|
||||
tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False]))
|
||||
|
||||
def test_sort_values(self, closed):
|
||||
index = self.create_index(closed=closed)
|
||||
|
||||
result = index.sort_values()
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
result = index.sort_values(ascending=False)
|
||||
tm.assert_index_equal(result, index[::-1])
|
||||
|
||||
# with nan
|
||||
index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)])
|
||||
|
||||
result = index.sort_values()
|
||||
expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.sort_values(ascending=False, na_position="first")
|
||||
expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
def test_datetime(self, tz):
|
||||
start = Timestamp("2000-01-01", tz=tz)
|
||||
dates = date_range(start=start, periods=10)
|
||||
index = IntervalIndex.from_breaks(dates)
|
||||
|
||||
# test mid
|
||||
start = Timestamp("2000-01-01T12:00", tz=tz)
|
||||
expected = date_range(start=start, periods=9)
|
||||
tm.assert_index_equal(index.mid, expected)
|
||||
|
||||
# __contains__ doesn't check individual points
|
||||
assert Timestamp("2000-01-01", tz=tz) not in index
|
||||
assert Timestamp("2000-01-01T12", tz=tz) not in index
|
||||
assert Timestamp("2000-01-02", tz=tz) not in index
|
||||
iv_true = Interval(
|
||||
Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)
|
||||
)
|
||||
iv_false = Interval(
|
||||
Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)
|
||||
)
|
||||
assert iv_true in index
|
||||
assert iv_false not in index
|
||||
|
||||
# .contains does check individual points
|
||||
assert not index.contains(Timestamp("2000-01-01", tz=tz)).any()
|
||||
assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any()
|
||||
assert index.contains(Timestamp("2000-01-02", tz=tz)).any()
|
||||
|
||||
# test get_indexer
|
||||
start = Timestamp("1999-12-31T12:00", tz=tz)
|
||||
target = date_range(start=start, periods=7, freq="12h")
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
start = Timestamp("2000-01-08T18:00", tz=tz)
|
||||
target = date_range(start=start, periods=7, freq="6h")
|
||||
actual = index.get_indexer(target)
|
||||
expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(actual, expected)
|
||||
|
||||
def test_append(self, closed):
|
||||
index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed)
|
||||
index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed)
|
||||
|
||||
result = index1.append(index2)
|
||||
expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index1.append([index1, index2])
|
||||
expected = IntervalIndex.from_arrays(
|
||||
[0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
for other_closed in {"left", "right", "both", "neither"} - {closed}:
|
||||
index_other_closed = IntervalIndex.from_arrays(
|
||||
[0, 1], [1, 2], closed=other_closed
|
||||
)
|
||||
result = index1.append(index_other_closed)
|
||||
expected = index1.astype(object).append(index_other_closed.astype(object))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_is_non_overlapping_monotonic(self, closed):
|
||||
# Should be True in all cases
|
||||
tpls = [(0, 1), (2, 3), (4, 5), (6, 7)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
# Should be False in all cases (overlapping)
|
||||
tpls = [(0, 2), (1, 3), (4, 5), (6, 7)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
# Should be False in all cases (non-monotonic)
|
||||
tpls = [(0, 1), (2, 3), (6, 7), (4, 5)]
|
||||
idx = IntervalIndex.from_tuples(tpls, closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
|
||||
# Should be False for closed='both', otherwise True (GH16560)
|
||||
if closed == "both":
|
||||
idx = IntervalIndex.from_breaks(range(4), closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is False
|
||||
else:
|
||||
idx = IntervalIndex.from_breaks(range(4), closed=closed)
|
||||
assert idx.is_non_overlapping_monotonic is True
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, shift, na_value",
|
||||
[
|
||||
(0, 1, np.nan),
|
||||
(Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT),
|
||||
(Timedelta("0 days"), Timedelta("1 day"), pd.NaT),
|
||||
],
|
||||
)
|
||||
def test_is_overlapping(self, start, shift, na_value, closed):
|
||||
# GH 23309
|
||||
# see test_interval_tree.py for extensive tests; interface tests here
|
||||
|
||||
# non-overlapping
|
||||
tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is False
|
||||
|
||||
# non-overlapping with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is False
|
||||
|
||||
# overlapping
|
||||
tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is True
|
||||
|
||||
# overlapping with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
assert index.is_overlapping is True
|
||||
|
||||
# common endpoints
|
||||
tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index.is_overlapping
|
||||
expected = closed == "both"
|
||||
assert result is expected
|
||||
|
||||
# common endpoints with NA
|
||||
tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)]
|
||||
index = IntervalIndex.from_tuples(tuples, closed=closed)
|
||||
result = index.is_overlapping
|
||||
assert result is expected
|
||||
|
||||
# intervals with duplicate left values
|
||||
a = [10, 15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85]
|
||||
b = [15, 20, 25, 30, 35, 40, 45, 45, 50, 55, 60, 65, 70, 75, 80, 85, 90]
|
||||
index = IntervalIndex.from_arrays(a, b, closed="right")
|
||||
result = index.is_overlapping
|
||||
assert result is False
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
list(zip(range(10), range(1, 11))),
|
||||
list(
|
||||
zip(
|
||||
date_range("20170101", periods=10),
|
||||
date_range("20170101", periods=10),
|
||||
)
|
||||
),
|
||||
list(
|
||||
zip(
|
||||
timedelta_range("0 days", periods=10),
|
||||
timedelta_range("1 day", periods=10),
|
||||
)
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_to_tuples(self, tuples):
|
||||
# GH 18756
|
||||
idx = IntervalIndex.from_tuples(tuples)
|
||||
result = idx.to_tuples()
|
||||
expected = Index(com.asarray_tuplesafe(tuples))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"tuples",
|
||||
[
|
||||
list(zip(range(10), range(1, 11))) + [np.nan],
|
||||
list(
|
||||
zip(
|
||||
date_range("20170101", periods=10),
|
||||
date_range("20170101", periods=10),
|
||||
)
|
||||
)
|
||||
+ [np.nan],
|
||||
list(
|
||||
zip(
|
||||
timedelta_range("0 days", periods=10),
|
||||
timedelta_range("1 day", periods=10),
|
||||
)
|
||||
)
|
||||
+ [np.nan],
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("na_tuple", [True, False])
|
||||
def test_to_tuples_na(self, tuples, na_tuple):
|
||||
# GH 18756
|
||||
idx = IntervalIndex.from_tuples(tuples)
|
||||
result = idx.to_tuples(na_tuple=na_tuple)
|
||||
|
||||
# check the non-NA portion
|
||||
expected_notna = Index(com.asarray_tuplesafe(tuples[:-1]))
|
||||
result_notna = result[:-1]
|
||||
tm.assert_index_equal(result_notna, expected_notna)
|
||||
|
||||
# check the NA portion
|
||||
result_na = result[-1]
|
||||
if na_tuple:
|
||||
assert isinstance(result_na, tuple)
|
||||
assert len(result_na) == 2
|
||||
assert all(isna(x) for x in result_na)
|
||||
else:
|
||||
assert isna(result_na)
|
||||
|
||||
def test_nbytes(self):
|
||||
# GH 19209
|
||||
left = np.arange(0, 4, dtype="i8")
|
||||
right = np.arange(1, 5, dtype="i8")
|
||||
|
||||
result = IntervalIndex.from_arrays(left, right).nbytes
|
||||
expected = 64 # 4 * 8 * 2
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"])
|
||||
def test_set_closed(self, name, closed, new_closed):
|
||||
# GH 21670
|
||||
index = interval_range(0, 5, closed=closed, name=name)
|
||||
result = index.set_closed(new_closed)
|
||||
expected = interval_range(0, 5, closed=new_closed, name=name)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False])
|
||||
def test_set_closed_errors(self, bad_closed):
|
||||
# GH 21670
|
||||
index = interval_range(0, 5)
|
||||
msg = f"invalid option for 'closed': {bad_closed}"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.set_closed(bad_closed)
|
||||
|
||||
def test_is_all_dates(self):
|
||||
# GH 23576
|
||||
year_2017 = Interval(
|
||||
Timestamp("2017-01-01 00:00:00"), Timestamp("2018-01-01 00:00:00")
|
||||
)
|
||||
year_2017_index = IntervalIndex([year_2017])
|
||||
assert not year_2017_index._is_all_dates
|
||||
|
||||
|
||||
def test_dir():
|
||||
# GH#27571 dir(interval_index) should not raise
|
||||
index = IntervalIndex.from_arrays([0, 1], [1, 2])
|
||||
result = dir(index)
|
||||
assert "str" not in result
|
||||
|
||||
|
||||
def test_searchsorted_different_argument_classes(listlike_box):
|
||||
# https://github.com/pandas-dev/pandas/issues/32762
|
||||
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
|
||||
result = values.searchsorted(listlike_box(values))
|
||||
expected = np.array([0, 1], dtype=result.dtype)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = values._data.searchsorted(listlike_box(values))
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"arg", [[1, 2], ["a", "b"], [Timestamp("2020-01-01", tz="Europe/London")] * 2]
|
||||
)
|
||||
def test_searchsorted_invalid_argument(arg):
|
||||
values = IntervalIndex([Interval(0, 1), Interval(1, 2)])
|
||||
msg = "'<' not supported between instances of 'pandas._libs.interval.Interval' and "
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
values.searchsorted(arg)
|
||||
@ -0,0 +1,369 @@
|
||||
from datetime import timedelta
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_integer
|
||||
|
||||
from pandas import (
|
||||
DateOffset,
|
||||
Interval,
|
||||
IntervalIndex,
|
||||
Timedelta,
|
||||
Timestamp,
|
||||
date_range,
|
||||
interval_range,
|
||||
timedelta_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
from pandas.tseries.offsets import Day
|
||||
|
||||
|
||||
@pytest.fixture(params=[None, "foo"])
|
||||
def name(request):
|
||||
return request.param
|
||||
|
||||
|
||||
class TestIntervalRange:
|
||||
@pytest.mark.parametrize("freq, periods", [(1, 100), (2.5, 40), (5, 20), (25, 4)])
|
||||
def test_constructor_numeric(self, closed, name, freq, periods):
|
||||
start, end = 0, 100
|
||||
breaks = np.arange(101, step=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("tz", [None, "US/Eastern"])
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods", [("D", 364), ("2D", 182), ("22D18h", 16), ("ME", 11)]
|
||||
)
|
||||
def test_constructor_timestamp(self, closed, name, freq, periods, tz):
|
||||
start, end = Timestamp("20180101", tz=tz), Timestamp("20181231", tz=tz)
|
||||
breaks = date_range(start=start, end=end, freq=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
if not breaks.freq.n == 1 and tz is None:
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"freq, periods", [("D", 100), ("2D12h", 40), ("5D", 20), ("25D", 4)]
|
||||
)
|
||||
def test_constructor_timedelta(self, closed, name, freq, periods):
|
||||
start, end = Timedelta("0 days"), Timedelta("100 days")
|
||||
breaks = timedelta_range(start=start, end=end, freq=freq)
|
||||
expected = IntervalIndex.from_breaks(breaks, name=name, closed=closed)
|
||||
|
||||
# defined from start/end/freq
|
||||
result = interval_range(
|
||||
start=start, end=end, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from start/periods/freq
|
||||
result = interval_range(
|
||||
start=start, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# defined from end/periods/freq
|
||||
result = interval_range(
|
||||
end=end, periods=periods, freq=freq, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
result = interval_range(
|
||||
start=start, end=end, periods=periods, name=name, closed=closed
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq, expected_endpoint",
|
||||
[
|
||||
(0, 10, 3, 9),
|
||||
(0, 10, 1.5, 9),
|
||||
(0.5, 10, 3, 9.5),
|
||||
(Timedelta("0D"), Timedelta("10D"), "2D4h", Timedelta("8D16h")),
|
||||
(
|
||||
Timestamp("2018-01-01"),
|
||||
Timestamp("2018-02-09"),
|
||||
"MS",
|
||||
Timestamp("2018-02-01"),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-01-01", tz="US/Eastern"),
|
||||
Timestamp("2018-01-20", tz="US/Eastern"),
|
||||
"5D12h",
|
||||
Timestamp("2018-01-17 12:00:00", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_early_truncation(self, start, end, freq, expected_endpoint):
|
||||
# index truncates early if freq causes end to be skipped
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
result_endpoint = result.right[-1]
|
||||
assert result_endpoint == expected_endpoint
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, end, freq",
|
||||
[(0.5, None, None), (None, 4.5, None), (0.5, None, 1.5), (None, 6.5, 1.5)],
|
||||
)
|
||||
def test_no_invalid_float_truncation(self, start, end, freq):
|
||||
# GH 21161
|
||||
if freq is None:
|
||||
breaks = [0.5, 1.5, 2.5, 3.5, 4.5]
|
||||
else:
|
||||
breaks = [0.5, 2.0, 3.5, 5.0, 6.5]
|
||||
expected = IntervalIndex.from_breaks(breaks)
|
||||
|
||||
result = interval_range(start=start, end=end, periods=4, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"start, mid, end",
|
||||
[
|
||||
(
|
||||
Timestamp("2018-03-10", tz="US/Eastern"),
|
||||
Timestamp("2018-03-10 23:30:00", tz="US/Eastern"),
|
||||
Timestamp("2018-03-12", tz="US/Eastern"),
|
||||
),
|
||||
(
|
||||
Timestamp("2018-11-03", tz="US/Eastern"),
|
||||
Timestamp("2018-11-04 00:30:00", tz="US/Eastern"),
|
||||
Timestamp("2018-11-05", tz="US/Eastern"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_linspace_dst_transition(self, start, mid, end):
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
# accounts for the hour gained/lost during DST transition
|
||||
start = start.as_unit("ns")
|
||||
mid = mid.as_unit("ns")
|
||||
end = end.as_unit("ns")
|
||||
result = interval_range(start=start, end=end, periods=2)
|
||||
expected = IntervalIndex.from_breaks([start, mid, end])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize("freq", [2, 2.0])
|
||||
@pytest.mark.parametrize("end", [10, 10.0])
|
||||
@pytest.mark.parametrize("start", [0, 0.0])
|
||||
def test_float_subtype(self, start, end, freq):
|
||||
# Has float subtype if any of start/end/freq are float, even if all
|
||||
# resulting endpoints can safely be upcast to integers
|
||||
|
||||
# defined from start/end/freq
|
||||
index = interval_range(start=start, end=end, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + end + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# defined from start/periods/freq
|
||||
index = interval_range(start=start, periods=5, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# defined from end/periods/freq
|
||||
index = interval_range(end=end, periods=5, freq=freq)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(end + freq) else "float64"
|
||||
assert result == expected
|
||||
|
||||
# GH 20976: linspace behavior defined from start/end/periods
|
||||
index = interval_range(start=start, end=end, periods=5)
|
||||
result = index.dtype.subtype
|
||||
expected = "int64" if is_integer(start + end) else "float64"
|
||||
assert result == expected
|
||||
|
||||
def test_interval_range_fractional_period(self):
|
||||
# float value for periods
|
||||
expected = interval_range(start=0, periods=10)
|
||||
msg = "Non-integer 'periods' in pd.date_range, .* pd.interval_range"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = interval_range(start=0, periods=10.5)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_constructor_coverage(self):
|
||||
# equivalent timestamp-like start/end
|
||||
start, end = Timestamp("2017-01-01"), Timestamp("2017-01-15")
|
||||
expected = interval_range(start=start, end=end)
|
||||
|
||||
result = interval_range(start=start.to_pydatetime(), end=end.to_pydatetime())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(start=start.asm8, end=end.asm8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent freq with timestamp
|
||||
equiv_freq = [
|
||||
"D",
|
||||
Day(),
|
||||
Timedelta(days=1),
|
||||
timedelta(days=1),
|
||||
DateOffset(days=1),
|
||||
]
|
||||
for freq in equiv_freq:
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent timedelta-like start/end
|
||||
start, end = Timedelta(days=1), Timedelta(days=10)
|
||||
expected = interval_range(start=start, end=end)
|
||||
|
||||
result = interval_range(start=start.to_pytimedelta(), end=end.to_pytimedelta())
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(start=start.asm8, end=end.asm8)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# equivalent freq with timedelta
|
||||
equiv_freq = ["D", Day(), Timedelta(days=1), timedelta(days=1)]
|
||||
for freq in equiv_freq:
|
||||
result = interval_range(start=start, end=end, freq=freq)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_errors(self):
|
||||
# not enough params
|
||||
msg = (
|
||||
"Of the four parameters: start, end, periods, and freq, "
|
||||
"exactly three must be specified"
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=5)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(periods=2)
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range()
|
||||
|
||||
# too many params
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0, end=5, periods=6, freq=1.5)
|
||||
|
||||
# mixed units
|
||||
msg = "start, end, freq need to be type compatible"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=Timestamp("20130101"), freq=2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=Timedelta("1 day"), freq=2)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timestamp("20130101"), end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timestamp("20130101"), end=Timedelta("1 day"), freq="D"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timestamp("20130101"), end=Timestamp("20130110"), freq=2
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timedelta("1 day"), end=10, freq="D")
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(
|
||||
start=Timedelta("1 day"), end=Timestamp("20130110"), freq="D"
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=Timedelta("1 day"), end=Timedelta("10 days"), freq=2)
|
||||
|
||||
# invalid periods
|
||||
msg = "periods must be a number, got foo"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=0, periods="foo")
|
||||
|
||||
# invalid start
|
||||
msg = "start must be numeric or datetime-like, got foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start="foo", periods=10)
|
||||
|
||||
# invalid end
|
||||
msg = r"end must be numeric or datetime-like, got \(0, 1\]"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=Interval(0, 1), periods=10)
|
||||
|
||||
# invalid freq for datetime-like
|
||||
msg = "freq must be numeric or convertible to DateOffset, got foo"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=0, end=10, freq="foo")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(start=Timestamp("20130101"), periods=10, freq="foo")
|
||||
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
interval_range(end=Timedelta("1 day"), periods=10, freq="foo")
|
||||
|
||||
# mixed tz
|
||||
start = Timestamp("2017-01-01", tz="US/Eastern")
|
||||
end = Timestamp("2017-01-07", tz="US/Pacific")
|
||||
msg = "Start and end cannot both be tz-aware with different timezones"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
interval_range(start=start, end=end)
|
||||
|
||||
def test_float_freq(self):
|
||||
# GH 54477
|
||||
result = interval_range(0, 1, freq=0.1)
|
||||
expected = IntervalIndex.from_breaks([0 + 0.1 * n for n in range(11)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = interval_range(0, 1, freq=0.6)
|
||||
expected = IntervalIndex.from_breaks([0, 0.6])
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,208 @@
|
||||
from itertools import permutations
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs.interval import IntervalTree
|
||||
from pandas.compat import IS64
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def skipif_32bit(param):
|
||||
"""
|
||||
Skip parameters in a parametrize on 32bit systems. Specifically used
|
||||
here to skip leaf_size parameters related to GH 23440.
|
||||
"""
|
||||
marks = pytest.mark.skipif(not IS64, reason="GH 23440: int type mismatch on 32bit")
|
||||
return pytest.param(param, marks=marks)
|
||||
|
||||
|
||||
@pytest.fixture(params=["int64", "float64", "uint64"])
|
||||
def dtype(request):
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(params=[skipif_32bit(1), skipif_32bit(2), 10])
|
||||
def leaf_size(request):
|
||||
"""
|
||||
Fixture to specify IntervalTree leaf_size parameter; to be used with the
|
||||
tree fixture.
|
||||
"""
|
||||
return request.param
|
||||
|
||||
|
||||
@pytest.fixture(
|
||||
params=[
|
||||
np.arange(5, dtype="int64"),
|
||||
np.arange(5, dtype="uint64"),
|
||||
np.arange(5, dtype="float64"),
|
||||
np.array([0, 1, 2, 3, 4, np.nan], dtype="float64"),
|
||||
]
|
||||
)
|
||||
def tree(request, leaf_size):
|
||||
left = request.param
|
||||
return IntervalTree(left, left + 2, leaf_size=leaf_size)
|
||||
|
||||
|
||||
class TestIntervalTree:
|
||||
def test_get_indexer(self, tree):
|
||||
result = tree.get_indexer(np.array([1.0, 5.5, 6.5]))
|
||||
expected = np.array([0, 4, -1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
||||
):
|
||||
tree.get_indexer(np.array([3.0]))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, target_value, target_dtype",
|
||||
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
|
||||
)
|
||||
def test_get_indexer_overflow(self, dtype, target_value, target_dtype):
|
||||
left, right = np.array([0, 1], dtype=dtype), np.array([1, 2], dtype=dtype)
|
||||
tree = IntervalTree(left, right)
|
||||
|
||||
result = tree.get_indexer(np.array([target_value], dtype=target_dtype))
|
||||
expected = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
def test_get_indexer_non_unique(self, tree):
|
||||
indexer, missing = tree.get_indexer_non_unique(np.array([1.0, 2.0, 6.5]))
|
||||
|
||||
result = indexer[:1]
|
||||
expected = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(indexer[1:3])
|
||||
expected = np.array([0, 1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = np.sort(indexer[3:])
|
||||
expected = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = missing
|
||||
expected = np.array([2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype, target_value, target_dtype",
|
||||
[("int64", 2**63 + 1, "uint64"), ("uint64", -1, "int64")],
|
||||
)
|
||||
def test_get_indexer_non_unique_overflow(self, dtype, target_value, target_dtype):
|
||||
left, right = np.array([0, 2], dtype=dtype), np.array([1, 3], dtype=dtype)
|
||||
tree = IntervalTree(left, right)
|
||||
target = np.array([target_value], dtype=target_dtype)
|
||||
|
||||
result_indexer, result_missing = tree.get_indexer_non_unique(target)
|
||||
expected_indexer = np.array([-1], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result_indexer, expected_indexer)
|
||||
|
||||
expected_missing = np.array([0], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result_missing, expected_missing)
|
||||
|
||||
def test_duplicates(self, dtype):
|
||||
left = np.array([0, 0, 0], dtype=dtype)
|
||||
tree = IntervalTree(left, left + 1)
|
||||
|
||||
with pytest.raises(
|
||||
KeyError, match="'indexer does not intersect a unique set of intervals'"
|
||||
):
|
||||
tree.get_indexer(np.array([0.5]))
|
||||
|
||||
indexer, missing = tree.get_indexer_non_unique(np.array([0.5]))
|
||||
result = np.sort(indexer)
|
||||
expected = np.array([0, 1, 2], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = missing
|
||||
expected = np.array([], dtype="intp")
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"leaf_size", [skipif_32bit(1), skipif_32bit(10), skipif_32bit(100), 10000]
|
||||
)
|
||||
def test_get_indexer_closed(self, closed, leaf_size):
|
||||
x = np.arange(1000, dtype="float64")
|
||||
found = x.astype("intp")
|
||||
not_found = (-1 * np.ones(1000)).astype("intp")
|
||||
|
||||
tree = IntervalTree(x, x + 0.5, closed=closed, leaf_size=leaf_size)
|
||||
tm.assert_numpy_array_equal(found, tree.get_indexer(x + 0.25))
|
||||
|
||||
expected = found if tree.closed_left else not_found
|
||||
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.0))
|
||||
|
||||
expected = found if tree.closed_right else not_found
|
||||
tm.assert_numpy_array_equal(expected, tree.get_indexer(x + 0.5))
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
(np.array([0, 1, 4], dtype="int64"), np.array([2, 3, 5]), True),
|
||||
(np.array([0, 1, 2], dtype="int64"), np.array([5, 4, 3]), True),
|
||||
(np.array([0, 1, np.nan]), np.array([5, 4, np.nan]), True),
|
||||
(np.array([0, 2, 4], dtype="int64"), np.array([1, 3, 5]), False),
|
||||
(np.array([0, 2, np.nan]), np.array([1, 3, np.nan]), False),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
|
||||
def test_is_overlapping(self, closed, order, left, right, expected):
|
||||
# GH 23309
|
||||
tree = IntervalTree(left[order], right[order], closed=closed)
|
||||
result = tree.is_overlapping
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize("order", (list(x) for x in permutations(range(3))))
|
||||
def test_is_overlapping_endpoints(self, closed, order):
|
||||
"""shared endpoints are marked as overlapping"""
|
||||
# GH 23309
|
||||
left, right = np.arange(3, dtype="int64"), np.arange(1, 4)
|
||||
tree = IntervalTree(left[order], right[order], closed=closed)
|
||||
result = tree.is_overlapping
|
||||
expected = closed == "both"
|
||||
assert result is expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right",
|
||||
[
|
||||
(np.array([], dtype="int64"), np.array([], dtype="int64")),
|
||||
(np.array([0], dtype="int64"), np.array([1], dtype="int64")),
|
||||
(np.array([np.nan]), np.array([np.nan])),
|
||||
(np.array([np.nan] * 3), np.array([np.nan] * 3)),
|
||||
],
|
||||
)
|
||||
def test_is_overlapping_trivial(self, closed, left, right):
|
||||
# GH 23309
|
||||
tree = IntervalTree(left, right, closed=closed)
|
||||
assert tree.is_overlapping is False
|
||||
|
||||
@pytest.mark.skipif(not IS64, reason="GH 23440")
|
||||
def test_construction_overflow(self):
|
||||
# GH 25485
|
||||
left, right = np.arange(101, dtype="int64"), [np.iinfo(np.int64).max] * 101
|
||||
tree = IntervalTree(left, right)
|
||||
|
||||
# pivot should be average of left/right medians
|
||||
result = tree.root.pivot
|
||||
expected = (50 + np.iinfo(np.int64).max) / 2
|
||||
assert result == expected
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"left, right, expected",
|
||||
[
|
||||
([-np.inf, 1.0], [1.0, 2.0], 0.0),
|
||||
([-np.inf, -2.0], [-2.0, -1.0], -2.0),
|
||||
([-2.0, -1.0], [-1.0, np.inf], 0.0),
|
||||
([1.0, 2.0], [2.0, np.inf], 2.0),
|
||||
],
|
||||
)
|
||||
def test_inf_bound_infinite_recursion(self, left, right, expected):
|
||||
# GH 46658
|
||||
|
||||
tree = IntervalTree(left * 101, right * 101)
|
||||
|
||||
result = tree.root.pivot
|
||||
assert result == expected
|
||||
@ -0,0 +1,44 @@
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def range_index():
|
||||
return RangeIndex(3, name="range_index")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def interval_index():
|
||||
return IntervalIndex.from_tuples(
|
||||
[(0.0, 1.0), (1.0, 2.0), (1.5, 2.5)], name="interval_index"
|
||||
)
|
||||
|
||||
|
||||
def test_join_overlapping_in_mi_to_same_intervalindex(range_index, interval_index):
|
||||
# GH-45661
|
||||
multi_index = MultiIndex.from_product([interval_index, range_index])
|
||||
result = multi_index.join(interval_index)
|
||||
|
||||
tm.assert_index_equal(result, multi_index)
|
||||
|
||||
|
||||
def test_join_overlapping_to_multiindex_with_same_interval(range_index, interval_index):
|
||||
# GH-45661
|
||||
multi_index = MultiIndex.from_product([interval_index, range_index])
|
||||
result = interval_index.join(multi_index)
|
||||
|
||||
tm.assert_index_equal(result, multi_index)
|
||||
|
||||
|
||||
def test_join_overlapping_interval_to_another_intervalindex(interval_index):
|
||||
# GH-45661
|
||||
flipped_interval_index = interval_index[::-1]
|
||||
result = interval_index.join(flipped_interval_index)
|
||||
|
||||
tm.assert_index_equal(result, interval_index)
|
||||
@ -0,0 +1,13 @@
|
||||
import pytest
|
||||
|
||||
from pandas import IntervalIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestPickle:
|
||||
@pytest.mark.parametrize("closed", ["left", "right", "both"])
|
||||
def test_pickle_round_trip_closed(self, closed):
|
||||
# https://github.com/pandas-dev/pandas/issues/35658
|
||||
idx = IntervalIndex.from_tuples([(1, 2), (2, 3)], closed=closed)
|
||||
result = tm.round_trip_pickle(idx)
|
||||
tm.assert_index_equal(result, idx)
|
||||
@ -0,0 +1,208 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
IntervalIndex,
|
||||
Timestamp,
|
||||
interval_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def monotonic_index(start, end, dtype="int64", closed="right"):
|
||||
return IntervalIndex.from_breaks(np.arange(start, end, dtype=dtype), closed=closed)
|
||||
|
||||
|
||||
def empty_index(dtype="int64", closed="right"):
|
||||
return IntervalIndex(np.array([], dtype=dtype), closed=closed)
|
||||
|
||||
|
||||
class TestIntervalIndex:
|
||||
def test_union(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
other = monotonic_index(5, 13, closed=closed)
|
||||
|
||||
expected = monotonic_index(0, 13, closed=closed)
|
||||
result = index[::-1].union(other, sort=sort)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
result = other[::-1].union(index, sort=sort)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
tm.assert_index_equal(index.union(index, sort=sort), index)
|
||||
tm.assert_index_equal(index.union(index[:1], sort=sort), index)
|
||||
|
||||
def test_union_empty_result(self, closed, sort):
|
||||
# GH 19101: empty result, same dtype
|
||||
index = empty_index(dtype="int64", closed=closed)
|
||||
result = index.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
# GH 19101: empty result, different numeric dtypes -> common dtype is f8
|
||||
other = empty_index(dtype="float64", closed=closed)
|
||||
result = index.union(other, sort=sort)
|
||||
expected = other
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = index.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = empty_index(dtype="uint64", closed=closed)
|
||||
result = index.union(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = other.union(index, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
other = monotonic_index(5, 13, closed=closed)
|
||||
|
||||
expected = monotonic_index(5, 11, closed=closed)
|
||||
result = index[::-1].intersection(other, sort=sort)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
result = other[::-1].intersection(index, sort=sort)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
tm.assert_index_equal(index.intersection(index, sort=sort), index)
|
||||
|
||||
# GH 26225: nested intervals
|
||||
index = IntervalIndex.from_tuples([(1, 2), (1, 3), (1, 4), (0, 2)])
|
||||
other = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
||||
expected = IntervalIndex.from_tuples([(1, 2), (1, 3)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225
|
||||
index = IntervalIndex.from_tuples([(0, 3), (0, 2)])
|
||||
other = IntervalIndex.from_tuples([(0, 2), (1, 3)])
|
||||
expected = IntervalIndex.from_tuples([(0, 2)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 26225: duplicate nan element
|
||||
index = IntervalIndex([np.nan, np.nan])
|
||||
other = IntervalIndex([np.nan])
|
||||
expected = IntervalIndex([np.nan])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_empty_result(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
other = monotonic_index(300, 314, closed=closed)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, different numeric dtypes -> common dtype is float64
|
||||
other = monotonic_index(300, 314, dtype="float64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
expected = other[:0]
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
other = monotonic_index(300, 314, dtype="uint64", closed=closed)
|
||||
result = index.intersection(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_intersection_duplicates(self):
|
||||
# GH#38743
|
||||
index = IntervalIndex.from_tuples([(1, 2), (1, 2), (2, 3), (3, 4)])
|
||||
other = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
||||
expected = IntervalIndex.from_tuples([(1, 2), (2, 3)])
|
||||
result = index.intersection(other)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_difference(self, closed, sort):
|
||||
index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed)
|
||||
result = index.difference(index[:1], sort=sort)
|
||||
expected = index[1:]
|
||||
if sort is None:
|
||||
expected = expected.sort_values()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
result = index.difference(index, sort=sort)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = IntervalIndex.from_arrays(
|
||||
index.left.astype("float64"), index.right, closed=closed
|
||||
)
|
||||
result = index.difference(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
def test_symmetric_difference(self, closed, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
result = index[1:].symmetric_difference(index[:-1], sort=sort)
|
||||
expected = IntervalIndex([index[0], index[-1]])
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
# GH 19101: empty result, same dtype
|
||||
result = index.symmetric_difference(index, sort=sort)
|
||||
expected = empty_index(dtype="int64", closed=closed)
|
||||
if sort in (None, True):
|
||||
tm.assert_index_equal(result, expected)
|
||||
else:
|
||||
tm.assert_index_equal(result.sort_values(), expected)
|
||||
|
||||
# GH 19101: empty result, different dtypes
|
||||
other = IntervalIndex.from_arrays(
|
||||
index.left.astype("float64"), index.right, closed=closed
|
||||
)
|
||||
result = index.symmetric_difference(other, sort=sort)
|
||||
expected = empty_index(dtype="float64", closed=closed)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:'<' not supported between:RuntimeWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"op_name", ["union", "intersection", "difference", "symmetric_difference"]
|
||||
)
|
||||
def test_set_incompatible_types(self, closed, op_name, sort):
|
||||
index = monotonic_index(0, 11, closed=closed)
|
||||
set_op = getattr(index, op_name)
|
||||
|
||||
# TODO: standardize return type of non-union setops type(self vs other)
|
||||
# non-IntervalIndex
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
else:
|
||||
expected = getattr(index.astype("O"), op_name)(Index([1, 2, 3]))
|
||||
result = set_op(Index([1, 2, 3]), sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# mixed closed -> cast to object
|
||||
for other_closed in {"right", "left", "both", "neither"} - {closed}:
|
||||
other = monotonic_index(0, 11, closed=other_closed)
|
||||
expected = getattr(index.astype(object), op_name)(other, sort=sort)
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
result = set_op(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 19016: incompatible dtypes -> cast to object
|
||||
other = interval_range(Timestamp("20180101"), periods=9, closed=closed)
|
||||
expected = getattr(index.astype(object), op_name)(other, sort=sort)
|
||||
if op_name == "difference":
|
||||
expected = index
|
||||
result = set_op(other, sort=sort)
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,27 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
|
||||
|
||||
# Note: identical the "multi" entry in the top-level "index" fixture
|
||||
@pytest.fixture
|
||||
def idx():
|
||||
# a MultiIndex used to test the general functionality of the
|
||||
# general functionality of this object
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
||||
@ -0,0 +1,263 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
date_range,
|
||||
period_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_infer_objects(idx):
|
||||
with pytest.raises(NotImplementedError, match="to_frame"):
|
||||
idx.infer_objects()
|
||||
|
||||
|
||||
def test_shift(idx):
|
||||
# GH8083 test the base class for shift
|
||||
msg = (
|
||||
"This method is only implemented for DatetimeIndex, PeriodIndex and "
|
||||
"TimedeltaIndex; Got type MultiIndex"
|
||||
)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1)
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.shift(1, 2)
|
||||
|
||||
|
||||
def test_groupby(idx):
|
||||
groups = idx.groupby(np.array([1, 1, 1, 2, 2, 2]))
|
||||
labels = idx.tolist()
|
||||
exp = {1: labels[:3], 2: labels[3:]}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
# GH5620
|
||||
groups = idx.groupby(idx)
|
||||
exp = {key: [key] for key in idx}
|
||||
tm.assert_dict_equal(groups, exp)
|
||||
|
||||
|
||||
def test_truncate_multiindex():
|
||||
# GH 34564 for MultiIndex level names check
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=["L1", "L2"],
|
||||
)
|
||||
|
||||
result = index.truncate(before=1)
|
||||
assert "foo" not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
assert index.names == result.names
|
||||
|
||||
result = index.truncate(after=1)
|
||||
assert 2 not in result.levels[0]
|
||||
assert 1 in result.levels[0]
|
||||
assert index.names == result.names
|
||||
|
||||
result = index.truncate(before=1, after=2)
|
||||
assert len(result.levels[0]) == 2
|
||||
assert index.names == result.names
|
||||
|
||||
msg = "after < before"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.truncate(3, 1)
|
||||
|
||||
|
||||
# TODO: reshape
|
||||
|
||||
|
||||
def test_reorder_levels(idx):
|
||||
# this blows up
|
||||
with pytest.raises(IndexError, match="^Too many levels"):
|
||||
idx.reorder_levels([2, 1, 0])
|
||||
|
||||
|
||||
def test_numpy_repeat():
|
||||
reps = 2
|
||||
numbers = [1, 2, 3]
|
||||
names = np.array(["foo", "bar"])
|
||||
|
||||
m = MultiIndex.from_product([numbers, names], names=names)
|
||||
expected = MultiIndex.from_product([numbers, names.repeat(reps)], names=names)
|
||||
tm.assert_index_equal(np.repeat(m, reps), expected)
|
||||
|
||||
msg = "the 'axis' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
np.repeat(m, reps, axis=1)
|
||||
|
||||
|
||||
def test_append_mixed_dtypes():
|
||||
# GH 13660
|
||||
dti = date_range("2011-01-01", freq="ME", periods=3)
|
||||
dti_tz = date_range("2011-01-01", freq="ME", periods=3, tz="US/Eastern")
|
||||
pi = period_range("2011-01", freq="M", periods=3)
|
||||
|
||||
mi = MultiIndex.from_arrays(
|
||||
[[1, 2, 3], [1.1, np.nan, 3.3], ["a", "b", "c"], dti, dti_tz, pi]
|
||||
)
|
||||
assert mi.nlevels == 6
|
||||
|
||||
res = mi.append(mi)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, 1, 2, 3],
|
||||
[1.1, np.nan, 3.3, 1.1, np.nan, 3.3],
|
||||
["a", "b", "c", "a", "b", "c"],
|
||||
dti.append(dti),
|
||||
dti_tz.append(dti_tz),
|
||||
pi.append(pi),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
other = MultiIndex.from_arrays(
|
||||
[
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
["x", "y", "z"],
|
||||
]
|
||||
)
|
||||
|
||||
res = mi.append(other)
|
||||
exp = MultiIndex.from_arrays(
|
||||
[
|
||||
[1, 2, 3, "x", "y", "z"],
|
||||
[1.1, np.nan, 3.3, "x", "y", "z"],
|
||||
["a", "b", "c", "x", "y", "z"],
|
||||
dti.append(Index(["x", "y", "z"])),
|
||||
dti_tz.append(Index(["x", "y", "z"])),
|
||||
pi.append(Index(["x", "y", "z"])),
|
||||
]
|
||||
)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
|
||||
def test_iter(idx):
|
||||
result = list(idx)
|
||||
expected = [
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
]
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_sub(idx):
|
||||
first = idx
|
||||
|
||||
# - now raises (previously was set op difference)
|
||||
msg = "cannot perform __sub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first - idx[-3:]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx[-3:] - first.tolist()
|
||||
msg = "cannot perform __rsub__ with this index type: MultiIndex"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
first.tolist() - idx[-3:]
|
||||
|
||||
|
||||
def test_map(idx):
|
||||
# callable
|
||||
index = idx
|
||||
|
||||
result = index.map(lambda x: x)
|
||||
tm.assert_index_equal(result, index)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"mapper",
|
||||
[
|
||||
lambda values, idx: {i: e for e, i in zip(values, idx)},
|
||||
lambda values, idx: pd.Series(values, idx),
|
||||
],
|
||||
)
|
||||
def test_map_dictlike(idx, mapper):
|
||||
identity = mapper(idx.values, idx)
|
||||
|
||||
# we don't infer to uint64 dtype for a dict
|
||||
if idx.dtype == np.uint64 and isinstance(identity, dict):
|
||||
expected = idx.astype("int64")
|
||||
else:
|
||||
expected = idx
|
||||
|
||||
result = idx.map(identity)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# empty mappable
|
||||
expected = Index([np.nan] * len(idx))
|
||||
result = idx.map(mapper(expected, idx))
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[
|
||||
np.exp,
|
||||
np.exp2,
|
||||
np.expm1,
|
||||
np.log,
|
||||
np.log2,
|
||||
np.log10,
|
||||
np.log1p,
|
||||
np.sqrt,
|
||||
np.sin,
|
||||
np.cos,
|
||||
np.tan,
|
||||
np.arcsin,
|
||||
np.arccos,
|
||||
np.arctan,
|
||||
np.sinh,
|
||||
np.cosh,
|
||||
np.tanh,
|
||||
np.arcsinh,
|
||||
np.arccosh,
|
||||
np.arctanh,
|
||||
np.deg2rad,
|
||||
np.rad2deg,
|
||||
],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_ufuncs(idx, func):
|
||||
# test ufuncs of numpy. see:
|
||||
# https://numpy.org/doc/stable/reference/ufuncs.html
|
||||
|
||||
expected_exception = TypeError
|
||||
msg = (
|
||||
"loop of ufunc does not support argument 0 of type tuple which "
|
||||
f"has no callable {func.__name__} method"
|
||||
)
|
||||
with pytest.raises(expected_exception, match=msg):
|
||||
func(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func",
|
||||
[np.isfinite, np.isinf, np.isnan, np.signbit],
|
||||
ids=lambda func: func.__name__,
|
||||
)
|
||||
def test_numpy_type_funcs(idx, func):
|
||||
msg = (
|
||||
f"ufunc '{func.__name__}' not supported for the input types, and the inputs "
|
||||
"could not be safely coerced to any supported types according to "
|
||||
"the casting rule ''safe''"
|
||||
)
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
func(idx)
|
||||
@ -0,0 +1,30 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.dtypes import CategoricalDtype
|
||||
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_astype(idx):
|
||||
expected = idx.copy()
|
||||
actual = idx.astype("O")
|
||||
tm.assert_copy(actual.levels, expected.levels)
|
||||
tm.assert_copy(actual.codes, expected.codes)
|
||||
assert actual.names == list(expected.names)
|
||||
|
||||
with pytest.raises(TypeError, match="^Setting.*dtype.*object"):
|
||||
idx.astype(np.dtype(int))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_astype_category(idx, ordered):
|
||||
# GH 18630
|
||||
msg = "> 1 ndim Categorical are not supported at this time"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype(CategoricalDtype(ordered=ordered))
|
||||
|
||||
if ordered is False:
|
||||
# dtype='category' defaults to ordered=False, so only test once
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
idx.astype("category")
|
||||
@ -0,0 +1,122 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_numeric_compat(idx):
|
||||
with pytest.raises(TypeError, match="cannot perform __mul__"):
|
||||
idx * 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rmul__"):
|
||||
1 * idx
|
||||
|
||||
div_err = "cannot perform __truediv__"
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
idx / 1
|
||||
|
||||
div_err = div_err.replace(" __", " __r")
|
||||
with pytest.raises(TypeError, match=div_err):
|
||||
1 / idx
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __floordiv__"):
|
||||
idx // 1
|
||||
|
||||
with pytest.raises(TypeError, match="cannot perform __rfloordiv__"):
|
||||
1 // idx
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["all", "any", "__invert__"])
|
||||
def test_logical_compat(idx, method):
|
||||
msg = f"cannot perform {method}"
|
||||
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
getattr(idx, method)()
|
||||
|
||||
|
||||
def test_inplace_mutation_resets_values():
|
||||
levels = [["a", "b", "c"], [4]]
|
||||
levels2 = [[1, 2, 3], ["a"]]
|
||||
codes = [[0, 1, 0, 2, 2, 0], [0, 0, 0, 0, 0, 0]]
|
||||
|
||||
mi1 = MultiIndex(levels=levels, codes=codes)
|
||||
mi2 = MultiIndex(levels=levels2, codes=codes)
|
||||
|
||||
# instantiating MultiIndex should not access/cache _.values
|
||||
assert "_values" not in mi1._cache
|
||||
assert "_values" not in mi2._cache
|
||||
|
||||
vals = mi1.values.copy()
|
||||
vals2 = mi2.values.copy()
|
||||
|
||||
# accessing .values should cache ._values
|
||||
assert mi1._values is mi1._cache["_values"]
|
||||
assert mi1.values is mi1._cache["_values"]
|
||||
assert isinstance(mi1._cache["_values"], np.ndarray)
|
||||
|
||||
# Make sure level setting works
|
||||
new_vals = mi1.set_levels(levels2).values
|
||||
tm.assert_almost_equal(vals2, new_vals)
|
||||
|
||||
# Doesn't drop _values from _cache [implementation detail]
|
||||
tm.assert_almost_equal(mi1._cache["_values"], vals)
|
||||
|
||||
# ...and values is still same too
|
||||
tm.assert_almost_equal(mi1.values, vals)
|
||||
|
||||
# Make sure label setting works too
|
||||
codes2 = [[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]
|
||||
exp_values = np.empty((6,), dtype=object)
|
||||
exp_values[:] = [(1, "a")] * 6
|
||||
|
||||
# Must be 1d array of tuples
|
||||
assert exp_values.shape == (6,)
|
||||
|
||||
new_mi = mi2.set_codes(codes2)
|
||||
assert "_values" not in new_mi._cache
|
||||
new_values = new_mi.values
|
||||
assert "_values" in new_mi._cache
|
||||
|
||||
# Shouldn't change cache
|
||||
tm.assert_almost_equal(mi2._cache["_values"], vals2)
|
||||
|
||||
# Should have correct values
|
||||
tm.assert_almost_equal(exp_values, new_values)
|
||||
|
||||
|
||||
def test_boxable_categorical_values():
|
||||
cat = pd.Categorical(pd.date_range("2012-01-01", periods=3, freq="h"))
|
||||
result = MultiIndex.from_product([["a", "b", "c"], cat]).values
|
||||
expected = pd.Series(
|
||||
[
|
||||
("a", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("a", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("a", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("b", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 00:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 01:00:00")),
|
||||
("c", pd.Timestamp("2012-01-01 02:00:00")),
|
||||
]
|
||||
).values
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
result = pd.DataFrame({"a": ["a", "b", "c"], "b": cat, "c": np.array(cat)}).values
|
||||
expected = pd.DataFrame(
|
||||
{
|
||||
"a": ["a", "b", "c"],
|
||||
"b": [
|
||||
pd.Timestamp("2012-01-01 00:00:00"),
|
||||
pd.Timestamp("2012-01-01 01:00:00"),
|
||||
pd.Timestamp("2012-01-01 02:00:00"),
|
||||
],
|
||||
"c": [
|
||||
pd.Timestamp("2012-01-01 00:00:00"),
|
||||
pd.Timestamp("2012-01-01 01:00:00"),
|
||||
pd.Timestamp("2012-01-01 02:00:00"),
|
||||
],
|
||||
}
|
||||
).values
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
@ -0,0 +1,860 @@
|
||||
from datetime import (
|
||||
date,
|
||||
datetime,
|
||||
)
|
||||
import itertools
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_constructor_single_level():
|
||||
result = MultiIndex(
|
||||
levels=[["foo", "bar", "baz", "qux"]], codes=[[0, 1, 2, 3]], names=["first"]
|
||||
)
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index(["foo", "bar", "baz", "qux"], name="first")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["first"]
|
||||
|
||||
|
||||
def test_constructor_no_levels():
|
||||
msg = "non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=[], codes=[])
|
||||
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=[])
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(codes=[])
|
||||
|
||||
|
||||
def test_constructor_nonhashable_names():
|
||||
# GH 20527
|
||||
levels = [[1, 2], ["one", "two"]]
|
||||
codes = [[0, 0, 1, 1], [0, 1, 0, 1]]
|
||||
names = (["foo"], ["bar"])
|
||||
msg = r"MultiIndex\.name must be a hashable type"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes, names=names)
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex(
|
||||
levels=[[1, 2], ["one", "two"]],
|
||||
codes=[[0, 0, 1, 1], [0, 1, 0, 1]],
|
||||
names=("foo", "bar"),
|
||||
)
|
||||
renamed = [["fooo"], ["barr"]]
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.rename(names=renamed)
|
||||
|
||||
# With .set_names()
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
mi.set_names(names=renamed)
|
||||
|
||||
|
||||
def test_constructor_mismatched_codes_levels(idx):
|
||||
codes = [np.array([1]), np.array([2]), np.array([3])]
|
||||
levels = ["a"]
|
||||
|
||||
msg = "Length of levels and codes must be the same"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
length_error = (
|
||||
r"On level 0, code max \(3\) >= length of level \(1\)\. "
|
||||
"NOTE: this index is in an inconsistent state"
|
||||
)
|
||||
label_error = r"Unequal code lengths: \[4, 2\]"
|
||||
code_value_error = r"On level 0, code value \(-2\) < -1"
|
||||
|
||||
# important to check that it's looking at the right thing.
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 1, 2, 3], [0, 3, 4, 1]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# external API
|
||||
with pytest.raises(ValueError, match=length_error):
|
||||
idx.copy().set_levels([["a"], ["b"]])
|
||||
|
||||
with pytest.raises(ValueError, match=label_error):
|
||||
idx.copy().set_codes([[0, 0, 0, 0], [0, 0]])
|
||||
|
||||
# test set_codes with verify_integrity=False
|
||||
# the setting should not raise any value error
|
||||
idx.copy().set_codes(codes=[[0, 0, 0, 0], [0, 0]], verify_integrity=False)
|
||||
|
||||
# code value smaller than -1
|
||||
with pytest.raises(ValueError, match=code_value_error):
|
||||
MultiIndex(levels=[["a"], ["b"]], codes=[[0, -2], [0, 0]])
|
||||
|
||||
|
||||
def test_na_levels():
|
||||
# GH26408
|
||||
# test if codes are re-assigned value -1 for levels
|
||||
# with missing values (NaN, NaT, None)
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, None, pd.NaT, 128, 2]], codes=[[-1, -1, -1, -1, 3, 4]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
)
|
||||
expected = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[-1, -1, 1, -1, 3, -1]]
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# verify set_levels and set_codes
|
||||
result = MultiIndex(
|
||||
levels=[[1, 2, 3, 4, 5]], codes=[[0, -1, 1, 2, 3, 4]]
|
||||
).set_levels([[np.nan, "s", pd.NaT, 128, None]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = MultiIndex(
|
||||
levels=[[np.nan, "s", pd.NaT, 128, None]], codes=[[1, 2, 2, 2, 2, 2]]
|
||||
).set_codes([[0, -1, 1, 2, 3, 4]])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_copy_in_constructor():
|
||||
levels = np.array(["a", "b", "c"])
|
||||
codes = np.array([1, 1, 2, 0, 0, 1, 1])
|
||||
val = codes[0]
|
||||
mi = MultiIndex(levels=[levels, levels], codes=[codes, codes], copy=True)
|
||||
assert mi.codes[0][0] == val
|
||||
codes[0] = 15
|
||||
assert mi.codes[0][0] == val
|
||||
val = levels[0]
|
||||
levels[0] = "PANDA"
|
||||
assert mi.levels[0][0] == val
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_arrays
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_arrays(idx):
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# list of arrays as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# infer correctly
|
||||
result = MultiIndex.from_arrays([[pd.NaT, Timestamp("20130101")], ["a", "b"]])
|
||||
assert result.levels[0].equals(Index([Timestamp("20130101")]))
|
||||
assert result.levels[1].equals(Index(["a", "b"]))
|
||||
|
||||
|
||||
def test_from_arrays_iterator(idx):
|
||||
# GH 18434
|
||||
arrays = [
|
||||
np.asarray(lev).take(level_codes)
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
]
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_arrays(iter(arrays), names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
# invalid iterator input
|
||||
msg = "Input must be a list / sequence of array-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(0)
|
||||
|
||||
|
||||
def test_from_arrays_tuples(idx):
|
||||
arrays = tuple(
|
||||
tuple(np.asarray(lev).take(level_codes))
|
||||
for lev, level_codes in zip(idx.levels, idx.codes)
|
||||
)
|
||||
|
||||
# tuple of tuples as input
|
||||
result = MultiIndex.from_arrays(arrays, names=idx.names)
|
||||
tm.assert_index_equal(result, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("idx1", "idx2"),
|
||||
[
|
||||
(
|
||||
pd.period_range("2011-01-01", freq="D", periods=3),
|
||||
pd.period_range("2015-01-01", freq="h", periods=3),
|
||||
),
|
||||
(
|
||||
date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern"),
|
||||
date_range("2015-01-01 10:00", freq="h", periods=3, tz="Asia/Tokyo"),
|
||||
),
|
||||
(
|
||||
pd.timedelta_range("1 days", freq="D", periods=3),
|
||||
pd.timedelta_range("2 hours", freq="h", periods=3),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_index_series_period_datetimetz_and_timedelta(idx1, idx2):
|
||||
result = MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_datetimelike_mixed():
|
||||
idx1 = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern")
|
||||
idx2 = date_range("2015-01-01 10:00", freq="h", periods=3)
|
||||
idx3 = pd.timedelta_range("1 days", freq="D", periods=3)
|
||||
idx4 = pd.period_range("2011-01-01", freq="D", periods=3)
|
||||
|
||||
result = MultiIndex.from_arrays([idx1, idx2, idx3, idx4])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result.get_level_values(3), idx4)
|
||||
|
||||
result2 = MultiIndex.from_arrays(
|
||||
[Series(idx1), Series(idx2), Series(idx3), Series(idx4)]
|
||||
)
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
tm.assert_index_equal(result2.get_level_values(2), idx3)
|
||||
tm.assert_index_equal(result2.get_level_values(3), idx4)
|
||||
|
||||
tm.assert_index_equal(result, result2)
|
||||
|
||||
|
||||
def test_from_arrays_index_series_categorical():
|
||||
# GH13743
|
||||
idx1 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=False)
|
||||
idx2 = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=True)
|
||||
|
||||
result = MultiIndex.from_arrays([idx1, idx2])
|
||||
tm.assert_index_equal(result.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result.get_level_values(1), idx2)
|
||||
|
||||
result2 = MultiIndex.from_arrays([Series(idx1), Series(idx2)])
|
||||
tm.assert_index_equal(result2.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result2.get_level_values(1), idx2)
|
||||
|
||||
result3 = MultiIndex.from_arrays([idx1.values, idx2.values])
|
||||
tm.assert_index_equal(result3.get_level_values(0), idx1)
|
||||
tm.assert_index_equal(result3.get_level_values(1), idx2)
|
||||
|
||||
|
||||
def test_from_arrays_empty():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=[])
|
||||
|
||||
# 1 level
|
||||
result = MultiIndex.from_arrays(arrays=[[]], names=["A"])
|
||||
assert isinstance(result, MultiIndex)
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["A"]
|
||||
|
||||
# N levels
|
||||
for N in [2, 3]:
|
||||
arrays = [[]] * N
|
||||
names = list("ABC")[:N]
|
||||
result = MultiIndex.from_arrays(arrays=arrays, names=names)
|
||||
expected = MultiIndex(levels=[[]] * N, codes=[[]] * N, names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_sequence_of_arrays",
|
||||
[
|
||||
1,
|
||||
[1],
|
||||
[1, 2],
|
||||
[[1], 2],
|
||||
[1, [2]],
|
||||
"a",
|
||||
["a"],
|
||||
["a", "b"],
|
||||
[["a"], "b"],
|
||||
(1,),
|
||||
(1, 2),
|
||||
([1], 2),
|
||||
(1, [2]),
|
||||
"a",
|
||||
("a",),
|
||||
("a", "b"),
|
||||
(["a"], "b"),
|
||||
[(1,), 2],
|
||||
[1, (2,)],
|
||||
[("a",), "b"],
|
||||
((1,), 2),
|
||||
(1, (2,)),
|
||||
(("a",), "b"),
|
||||
],
|
||||
)
|
||||
def test_from_arrays_invalid_input(invalid_sequence_of_arrays):
|
||||
msg = "Input must be a list / sequence of array-likes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_arrays(arrays=invalid_sequence_of_arrays)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"idx1, idx2", [([1, 2, 3], ["a", "b"]), ([], ["a", "b"]), ([1, 2, 3], [])]
|
||||
)
|
||||
def test_from_arrays_different_lengths(idx1, idx2):
|
||||
# see gh-13599
|
||||
msg = "^all arrays must be same length$"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
|
||||
def test_from_arrays_respects_none_names():
|
||||
# GH27292
|
||||
a = Series([1, 2, 3], name="foo")
|
||||
b = Series(["a", "b", "c"], name="bar")
|
||||
|
||||
result = MultiIndex.from_arrays([a, b], names=None)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b", "c"]], codes=[[0, 1, 2], [0, 1, 2]], names=None
|
||||
)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_tuples
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_tuples():
|
||||
msg = "Cannot infer number of levels from empty list"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples([])
|
||||
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
# input tuples
|
||||
result = MultiIndex.from_tuples(((1, 2), (3, 4)), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_iterator():
|
||||
# GH 18434
|
||||
# input iterator for tuples
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 3], [2, 4]], codes=[[0, 1], [0, 1]], names=["a", "b"]
|
||||
)
|
||||
|
||||
result = MultiIndex.from_tuples(zip([1, 3], [2, 4]), names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# input non-iterables
|
||||
msg = "Input must be a list / sequence of tuple-likes."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_tuples(0)
|
||||
|
||||
|
||||
def test_from_tuples_empty():
|
||||
# GH 16777
|
||||
result = MultiIndex.from_tuples([], names=["a", "b"])
|
||||
expected = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_tuples_index_values(idx):
|
||||
result = MultiIndex.from_tuples(idx)
|
||||
assert (result.values == idx.values).all()
|
||||
|
||||
|
||||
def test_tuples_with_name_string():
|
||||
# GH 15110 and GH 14848
|
||||
|
||||
li = [(0, 0, 1), (0, 1, 0), (1, 0, 0)]
|
||||
msg = "Names should be list-like for a MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(li, name="abc")
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
Index(li, name="a")
|
||||
|
||||
|
||||
def test_from_tuples_with_tuple_label():
|
||||
# GH 15457
|
||||
expected = pd.DataFrame(
|
||||
[[2, 1, 2], [4, (1, 2), 3]], columns=["a", "b", "c"]
|
||||
).set_index(["a", "b"])
|
||||
idx = MultiIndex.from_tuples([(2, 1), (4, (1, 2))], names=("a", "b"))
|
||||
result = pd.DataFrame([2, 3], columns=["c"], index=idx)
|
||||
tm.assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_product
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_product_empty_zero_levels():
|
||||
# 0 levels
|
||||
msg = "Must pass non-zero number of levels/codes"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
MultiIndex.from_product([])
|
||||
|
||||
|
||||
def test_from_product_empty_one_level():
|
||||
result = MultiIndex.from_product([[]], names=["A"])
|
||||
expected = Index([], name="A")
|
||||
tm.assert_index_equal(result.levels[0], expected)
|
||||
assert result.names == ["A"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"first, second", [([], []), (["foo", "bar", "baz"], []), ([], ["a", "b", "c"])]
|
||||
)
|
||||
def test_from_product_empty_two_levels(first, second):
|
||||
names = ["A", "B"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
expected = MultiIndex(levels=[first, second], codes=[[], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("N", list(range(4)))
|
||||
def test_from_product_empty_three_levels(N):
|
||||
# GH12258
|
||||
names = ["A", "B", "C"]
|
||||
lvl2 = list(range(N))
|
||||
result = MultiIndex.from_product([[], lvl2, []], names=names)
|
||||
expected = MultiIndex(levels=[[], lvl2, []], codes=[[], [], []], names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"invalid_input", [1, [1], [1, 2], [[1], 2], "a", ["a"], ["a", "b"], [["a"], "b"]]
|
||||
)
|
||||
def test_from_product_invalid_input(invalid_input):
|
||||
msg = r"Input must be a list / sequence of iterables|Input must be list-like"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(iterables=invalid_input)
|
||||
|
||||
|
||||
def test_from_product_datetimeindex():
|
||||
dt_index = date_range("2000-01-01", periods=2)
|
||||
mi = MultiIndex.from_product([[1, 2], dt_index])
|
||||
etalon = construct_1d_object_array_from_listlike(
|
||||
[
|
||||
(1, Timestamp("2000-01-01")),
|
||||
(1, Timestamp("2000-01-02")),
|
||||
(2, Timestamp("2000-01-01")),
|
||||
(2, Timestamp("2000-01-02")),
|
||||
]
|
||||
)
|
||||
tm.assert_numpy_array_equal(mi.values, etalon)
|
||||
|
||||
|
||||
def test_from_product_rangeindex():
|
||||
# RangeIndex is preserved by factorize, so preserved in levels
|
||||
rng = Index(range(5))
|
||||
other = ["a", "b"]
|
||||
mi = MultiIndex.from_product([rng, other])
|
||||
tm.assert_index_equal(mi._levels[0], rng, exact=True)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [False, True])
|
||||
@pytest.mark.parametrize("f", [lambda x: x, lambda x: Series(x), lambda x: x.values])
|
||||
def test_from_product_index_series_categorical(ordered, f):
|
||||
# GH13743
|
||||
first = ["foo", "bar"]
|
||||
|
||||
idx = pd.CategoricalIndex(list("abcaab"), categories=list("bac"), ordered=ordered)
|
||||
expected = pd.CategoricalIndex(
|
||||
list("abcaab") + list("abcaab"), categories=list("bac"), ordered=ordered
|
||||
)
|
||||
|
||||
result = MultiIndex.from_product([first, f(idx)])
|
||||
tm.assert_index_equal(result.get_level_values(1), expected)
|
||||
|
||||
|
||||
def test_from_product():
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
result = MultiIndex.from_product([first, second], names=names)
|
||||
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_iterator():
|
||||
# GH 18434
|
||||
first = ["foo", "bar", "buz"]
|
||||
second = ["a", "b", "c"]
|
||||
names = ["first", "second"]
|
||||
tuples = [
|
||||
("foo", "a"),
|
||||
("foo", "b"),
|
||||
("foo", "c"),
|
||||
("bar", "a"),
|
||||
("bar", "b"),
|
||||
("bar", "c"),
|
||||
("buz", "a"),
|
||||
("buz", "b"),
|
||||
("buz", "c"),
|
||||
]
|
||||
expected = MultiIndex.from_tuples(tuples, names=names)
|
||||
|
||||
# iterator as input
|
||||
result = MultiIndex.from_product(iter([first, second]), names=names)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# Invalid non-iterable input
|
||||
msg = "Input must be a list / sequence of iterables."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
MultiIndex.from_product(0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"a, b, expected_names",
|
||||
[
|
||||
(
|
||||
Series([1, 2, 3], name="foo"),
|
||||
Series(["a", "b"], name="bar"),
|
||||
["foo", "bar"],
|
||||
),
|
||||
(Series([1, 2, 3], name="foo"), ["a", "b"], ["foo", None]),
|
||||
([1, 2, 3], ["a", "b"], None),
|
||||
],
|
||||
)
|
||||
def test_from_product_infer_names(a, b, expected_names):
|
||||
# GH27292
|
||||
result = MultiIndex.from_product([a, b])
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b"]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=expected_names,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_respects_none_names():
|
||||
# GH27292
|
||||
a = Series([1, 2, 3], name="foo")
|
||||
b = Series(["a", "b"], name="bar")
|
||||
|
||||
result = MultiIndex.from_product([a, b], names=None)
|
||||
expected = MultiIndex(
|
||||
levels=[[1, 2, 3], ["a", "b"]],
|
||||
codes=[[0, 0, 1, 1, 2, 2], [0, 1, 0, 1, 0, 1]],
|
||||
names=None,
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_from_product_readonly():
|
||||
# GH#15286 passing read-only array to from_product
|
||||
a = np.array(range(3))
|
||||
b = ["a", "b"]
|
||||
expected = MultiIndex.from_product([a, b])
|
||||
|
||||
a.setflags(write=False)
|
||||
result = MultiIndex.from_product([a, b])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_create_index_existing_name(idx):
|
||||
# GH11193, when an existing index is passed, and a new name is not
|
||||
# specified, the new index should inherit the previous object name
|
||||
index = idx
|
||||
index.names = ["foo", "bar"]
|
||||
result = Index(index)
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
)
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = Index(index, name="A")
|
||||
expected = Index(
|
||||
Index(
|
||||
[
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
],
|
||||
dtype="object",
|
||||
),
|
||||
name="A",
|
||||
)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------------
|
||||
# from_frame
|
||||
# ----------------------------------------------------------------------------
|
||||
def test_from_frame():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]], columns=["L1", "L2"]
|
||||
)
|
||||
expected = MultiIndex.from_tuples(
|
||||
[("a", "a"), ("a", "b"), ("b", "a"), ("b", "b")], names=["L1", "L2"]
|
||||
)
|
||||
result = MultiIndex.from_frame(df)
|
||||
tm.assert_index_equal(expected, result)
|
||||
|
||||
|
||||
def test_from_frame_missing_values_multiIndex():
|
||||
# GH 39984
|
||||
pa = pytest.importorskip("pyarrow")
|
||||
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"a": Series([1, 2, None], dtype="Int64"),
|
||||
"b": pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
|
||||
}
|
||||
)
|
||||
multi_indexed = MultiIndex.from_frame(df)
|
||||
expected = MultiIndex.from_arrays(
|
||||
[
|
||||
Series([1, 2, None]).astype("Int64"),
|
||||
pd.Float64Dtype().__from_arrow__(pa.array([0.2, np.nan, None])),
|
||||
],
|
||||
names=["a", "b"],
|
||||
)
|
||||
tm.assert_index_equal(multi_indexed, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"non_frame",
|
||||
[
|
||||
Series([1, 2, 3, 4]),
|
||||
[1, 2, 3, 4],
|
||||
[[1, 2], [3, 4], [5, 6]],
|
||||
Index([1, 2, 3, 4]),
|
||||
np.array([[1, 2], [3, 4], [5, 6]]),
|
||||
27,
|
||||
],
|
||||
)
|
||||
def test_from_frame_error(non_frame):
|
||||
# GH 22420
|
||||
with pytest.raises(TypeError, match="Input must be a DataFrame"):
|
||||
MultiIndex.from_frame(non_frame)
|
||||
|
||||
|
||||
def test_from_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"dates": date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
"a": [1, 1, 1, 2, 2, 2],
|
||||
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
"c": ["x", "x", "y", "z", "x", "y"],
|
||||
}
|
||||
)
|
||||
original_dtypes = df.dtypes.to_dict()
|
||||
|
||||
expected_mi = MultiIndex.from_arrays(
|
||||
[
|
||||
date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
mi = MultiIndex.from_frame(df)
|
||||
mi_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
tm.assert_index_equal(expected_mi, mi)
|
||||
assert original_dtypes == mi_dtypes
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names_in,names_out", [(None, [("L1", "x"), ("L2", "y")]), (["x", "y"], ["x", "y"])]
|
||||
)
|
||||
def test_from_frame_valid_names(names_in, names_out):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
mi = MultiIndex.from_frame(df, names=names_in)
|
||||
assert mi.names == names_out
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"names,expected_error_msg",
|
||||
[
|
||||
("bad_input", "Names should be list-like for a MultiIndex"),
|
||||
(["a", "b", "c"], "Length of names must match number of levels in MultiIndex"),
|
||||
],
|
||||
)
|
||||
def test_from_frame_invalid_names(names, expected_error_msg):
|
||||
# GH 22420
|
||||
df = pd.DataFrame(
|
||||
[["a", "a"], ["a", "b"], ["b", "a"], ["b", "b"]],
|
||||
columns=MultiIndex.from_tuples([("L1", "x"), ("L2", "y")]),
|
||||
)
|
||||
with pytest.raises(ValueError, match=expected_error_msg):
|
||||
MultiIndex.from_frame(df, names=names)
|
||||
|
||||
|
||||
def test_index_equal_empty_iterable():
|
||||
# #16844
|
||||
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
|
||||
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
|
||||
tm.assert_index_equal(a, b)
|
||||
|
||||
|
||||
def test_raise_invalid_sortorder():
|
||||
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
|
||||
# GH#28518
|
||||
|
||||
levels = [[0, 1], [0, 1, 2]]
|
||||
|
||||
# Correct sortorder
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
|
||||
MultiIndex(
|
||||
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1
|
||||
)
|
||||
|
||||
|
||||
def test_datetimeindex():
|
||||
idx1 = pd.DatetimeIndex(
|
||||
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo"
|
||||
)
|
||||
idx2 = date_range("2010/01/01", periods=6, freq="ME", tz="US/Eastern")
|
||||
idx = MultiIndex.from_arrays([idx1, idx2])
|
||||
|
||||
expected1 = pd.DatetimeIndex(
|
||||
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
|
||||
)
|
||||
|
||||
tm.assert_index_equal(idx.levels[0], expected1)
|
||||
tm.assert_index_equal(idx.levels[1], idx2)
|
||||
|
||||
# from datetime combos
|
||||
# GH 7888
|
||||
date1 = np.datetime64("today")
|
||||
date2 = datetime.today()
|
||||
date3 = Timestamp.today()
|
||||
|
||||
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
|
||||
index = MultiIndex.from_product([[d1], [d2]])
|
||||
assert isinstance(index.levels[0], pd.DatetimeIndex)
|
||||
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
||||
|
||||
# but NOT date objects, matching Index behavior
|
||||
date4 = date.today()
|
||||
index = MultiIndex.from_product([[date4], [date2]])
|
||||
assert not isinstance(index.levels[0], pd.DatetimeIndex)
|
||||
assert isinstance(index.levels[1], pd.DatetimeIndex)
|
||||
|
||||
|
||||
def test_constructor_with_tz():
|
||||
index = pd.DatetimeIndex(
|
||||
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
|
||||
)
|
||||
columns = pd.DatetimeIndex(
|
||||
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
|
||||
)
|
||||
|
||||
result = MultiIndex.from_arrays([index, columns])
|
||||
|
||||
assert result.names == ["dt1", "dt2"]
|
||||
tm.assert_index_equal(result.levels[0], index)
|
||||
tm.assert_index_equal(result.levels[1], columns)
|
||||
|
||||
result = MultiIndex.from_arrays([Series(index), Series(columns)])
|
||||
|
||||
assert result.names == ["dt1", "dt2"]
|
||||
tm.assert_index_equal(result.levels[0], index)
|
||||
tm.assert_index_equal(result.levels[1], columns)
|
||||
|
||||
|
||||
def test_multiindex_inference_consistency():
|
||||
# check that inference behavior matches the base class
|
||||
|
||||
v = date.today()
|
||||
|
||||
arr = [v, v]
|
||||
|
||||
idx = Index(arr)
|
||||
assert idx.dtype == object
|
||||
|
||||
mi = MultiIndex.from_arrays([arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
mi = MultiIndex.from_product([arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
mi = MultiIndex.from_tuples([(x,) for x in arr])
|
||||
lev = mi.levels[0]
|
||||
assert lev.dtype == object
|
||||
|
||||
|
||||
def test_dtype_representation(using_infer_string):
|
||||
# GH#46900
|
||||
pmidx = MultiIndex.from_arrays([[1], ["a"]], names=[("a", "b"), ("c", "d")])
|
||||
result = pmidx.dtypes
|
||||
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
||||
expected = Series(
|
||||
["int64", exp],
|
||||
index=MultiIndex.from_tuples([("a", "b"), ("c", "d")]),
|
||||
dtype=object,
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
@ -0,0 +1,201 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat.numpy import np_version_gt2
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
DataFrame,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_to_numpy(idx):
|
||||
result = idx.to_numpy()
|
||||
exp = idx.values
|
||||
tm.assert_numpy_array_equal(result, exp)
|
||||
|
||||
|
||||
def test_array_interface(idx):
|
||||
# https://github.com/pandas-dev/pandas/pull/60046
|
||||
result = np.asarray(idx)
|
||||
expected = np.empty((6,), dtype=object)
|
||||
expected[:] = [
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# it always gives a copy by default, but the values are cached, so results
|
||||
# are still sharing memory
|
||||
result_copy1 = np.asarray(idx)
|
||||
result_copy2 = np.asarray(idx)
|
||||
assert np.may_share_memory(result_copy1, result_copy2)
|
||||
|
||||
# with explicit copy=True, then it is an actual copy
|
||||
result_copy1 = np.array(idx, copy=True)
|
||||
result_copy2 = np.array(idx, copy=True)
|
||||
assert not np.may_share_memory(result_copy1, result_copy2)
|
||||
|
||||
if not np_version_gt2:
|
||||
# copy=False semantics are only supported in NumPy>=2.
|
||||
return
|
||||
|
||||
# for MultiIndex, copy=False is never allowed
|
||||
msg = "Starting with NumPy 2.0, the behavior of the 'copy' keyword has changed"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
np.array(idx, copy=False)
|
||||
|
||||
|
||||
def test_to_frame():
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
tuples = [(1, "one"), (1, "two"), (2, "one"), (2, "two")]
|
||||
index = MultiIndex.from_tuples(tuples, names=["first", "second"])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
index = MultiIndex.from_tuples(tuples)
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(tuples)
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
expected.columns = ["first", "second"]
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
msg = "'name' must be a list / sequence of column names."
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
index.to_frame(name="first")
|
||||
|
||||
msg = "'name' should have same length as number of levels on index."
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index.to_frame(name=["first"])
|
||||
|
||||
# Tests for datetime index
|
||||
index = MultiIndex.from_product([range(5), pd.date_range("20130101", periods=3)])
|
||||
result = index.to_frame(index=False)
|
||||
expected = DataFrame(
|
||||
{
|
||||
0: np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
1: np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame()
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
# See GH-22580
|
||||
result = index.to_frame(index=False, name=["first", "second"])
|
||||
expected = DataFrame(
|
||||
{
|
||||
"first": np.repeat(np.arange(5, dtype="int64"), 3),
|
||||
"second": np.tile(pd.date_range("20130101", periods=3), 5),
|
||||
}
|
||||
)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
result = index.to_frame(name=["first", "second"])
|
||||
expected.index = index
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_frame_dtype_fidelity():
|
||||
# GH 22420
|
||||
mi = MultiIndex.from_arrays(
|
||||
[
|
||||
pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
[1, 1, 1, 2, 2, 2],
|
||||
pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
["x", "x", "y", "z", "x", "y"],
|
||||
],
|
||||
names=["dates", "a", "b", "c"],
|
||||
)
|
||||
original_dtypes = {name: mi.levels[i].dtype for i, name in enumerate(mi.names)}
|
||||
|
||||
expected_df = DataFrame(
|
||||
{
|
||||
"dates": pd.date_range("19910905", periods=6, tz="US/Eastern"),
|
||||
"a": [1, 1, 1, 2, 2, 2],
|
||||
"b": pd.Categorical(["a", "a", "b", "b", "c", "c"], ordered=True),
|
||||
"c": ["x", "x", "y", "z", "x", "y"],
|
||||
}
|
||||
)
|
||||
df = mi.to_frame(index=False)
|
||||
df_dtypes = df.dtypes.to_dict()
|
||||
|
||||
tm.assert_frame_equal(df, expected_df)
|
||||
assert original_dtypes == df_dtypes
|
||||
|
||||
|
||||
def test_to_frame_resulting_column_order():
|
||||
# GH 22420
|
||||
expected = ["z", 0, "a"]
|
||||
mi = MultiIndex.from_arrays(
|
||||
[["a", "b", "c"], ["x", "y", "z"], ["q", "w", "e"]], names=expected
|
||||
)
|
||||
result = mi.to_frame().columns.tolist()
|
||||
assert result == expected
|
||||
|
||||
|
||||
def test_to_frame_duplicate_labels():
|
||||
# GH 45245
|
||||
data = [(1, 2), (3, 4)]
|
||||
names = ["a", "a"]
|
||||
index = MultiIndex.from_tuples(data, names=names)
|
||||
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
|
||||
index.to_frame()
|
||||
|
||||
result = index.to_frame(allow_duplicates=True)
|
||||
expected = DataFrame(data, index=index, columns=names)
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
names = [None, 0]
|
||||
index = MultiIndex.from_tuples(data, names=names)
|
||||
with pytest.raises(ValueError, match="Cannot create duplicate column labels"):
|
||||
index.to_frame()
|
||||
|
||||
result = index.to_frame(allow_duplicates=True)
|
||||
expected = DataFrame(data, index=index, columns=[0, 0])
|
||||
tm.assert_frame_equal(result, expected)
|
||||
|
||||
|
||||
def test_to_flat_index(idx):
|
||||
expected = pd.Index(
|
||||
(
|
||||
("foo", "one"),
|
||||
("foo", "two"),
|
||||
("bar", "one"),
|
||||
("baz", "two"),
|
||||
("qux", "one"),
|
||||
("qux", "two"),
|
||||
),
|
||||
tupleize_cols=False,
|
||||
)
|
||||
result = idx.to_flat_index()
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,96 @@
|
||||
from copy import (
|
||||
copy,
|
||||
deepcopy,
|
||||
)
|
||||
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_multiindex_copied(copy, original):
|
||||
# Levels should be (at least, shallow copied)
|
||||
tm.assert_copy(copy.levels, original.levels)
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
|
||||
# Labels doesn't matter which way copied
|
||||
tm.assert_almost_equal(copy.codes, original.codes)
|
||||
assert copy.codes is not original.codes
|
||||
|
||||
# Names doesn't matter which way copied
|
||||
assert copy.names == original.names
|
||||
assert copy.names is not original.names
|
||||
|
||||
# Sort order should be copied
|
||||
assert copy.sortorder == original.sortorder
|
||||
|
||||
|
||||
def test_copy(idx):
|
||||
i_copy = idx.copy()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_shallow_copy(idx):
|
||||
i_copy = idx._view()
|
||||
|
||||
assert_multiindex_copied(i_copy, idx)
|
||||
|
||||
|
||||
def test_view(idx):
|
||||
i_view = idx.view()
|
||||
assert_multiindex_copied(i_view, idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("func", [copy, deepcopy])
|
||||
def test_copy_and_deepcopy(func):
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = func(idx)
|
||||
assert idx_copy is not idx
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
def test_copy_method(deep):
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(deep=deep)
|
||||
assert idx_copy.equals(idx)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("deep", [True, False])
|
||||
@pytest.mark.parametrize(
|
||||
"kwarg, value",
|
||||
[
|
||||
("names", ["third", "fourth"]),
|
||||
],
|
||||
)
|
||||
def test_copy_method_kwargs(deep, kwarg, value):
|
||||
# gh-12309: Check that the "name" argument as well other kwargs are honored
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
idx_copy = idx.copy(**{kwarg: value, "deep": deep})
|
||||
assert getattr(idx_copy, kwarg) == value
|
||||
|
||||
|
||||
def test_copy_deep_false_retains_id():
|
||||
# GH#47878
|
||||
idx = MultiIndex(
|
||||
levels=[["foo", "bar"], ["fizz", "buzz"]],
|
||||
codes=[[0, 0, 0, 1], [0, 0, 1, 1]],
|
||||
names=["first", "second"],
|
||||
)
|
||||
|
||||
res = idx.copy(deep=False)
|
||||
assert res._id is idx._id
|
||||
@ -0,0 +1,190 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.errors import PerformanceWarning
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_drop(idx):
|
||||
dropped = idx.drop([("foo", "two"), ("qux", "one")])
|
||||
|
||||
index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
|
||||
dropped2 = idx.drop(index)
|
||||
|
||||
expected = idx[[0, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
tm.assert_index_equal(dropped2, expected)
|
||||
|
||||
dropped = idx.drop(["bar"])
|
||||
expected = idx[[0, 1, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop("foo")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
index = MultiIndex.from_tuples([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop([("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop(index)
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(["foo", "two"])
|
||||
|
||||
# partially correct argument
|
||||
mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
|
||||
with pytest.raises(KeyError, match=r"^\('bar', 'two'\)$"):
|
||||
idx.drop(mixed_index)
|
||||
|
||||
# error='ignore'
|
||||
dropped = idx.drop(index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[0, 1, 2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
dropped = idx.drop(["foo", "two"], errors="ignore")
|
||||
expected = idx[[2, 3, 4, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop
|
||||
dropped = idx.drop(["foo", ("qux", "one")])
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
# mixed partial / full drop / error='ignore'
|
||||
mixed_index = ["foo", ("qux", "one"), "two"]
|
||||
with pytest.raises(KeyError, match=r"^'two'$"):
|
||||
idx.drop(mixed_index)
|
||||
dropped = idx.drop(mixed_index, errors="ignore")
|
||||
expected = idx[[2, 3, 5]]
|
||||
tm.assert_index_equal(dropped, expected)
|
||||
|
||||
|
||||
def test_droplevel_with_names(idx):
|
||||
index = idx[idx.get_loc("foo")]
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.name == "second"
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
dropped = index.droplevel(0)
|
||||
assert dropped.names == ("two", "three")
|
||||
|
||||
dropped = index.droplevel("two")
|
||||
expected = index.droplevel(1)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
|
||||
def test_droplevel_list():
|
||||
index = MultiIndex(
|
||||
levels=[Index(range(4)), Index(range(4)), Index(range(4))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
names=["one", "two", "three"],
|
||||
)
|
||||
|
||||
dropped = index[:2].droplevel(["three", "one"])
|
||||
expected = index[:2].droplevel(2).droplevel(0)
|
||||
assert dropped.equals(expected)
|
||||
|
||||
dropped = index[:2].droplevel([])
|
||||
expected = index[:2]
|
||||
assert dropped.equals(expected)
|
||||
|
||||
msg = (
|
||||
"Cannot remove 3 levels from an index with 3 levels: "
|
||||
"at least one level must be left"
|
||||
)
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
index[:2].droplevel(["one", "two", "three"])
|
||||
|
||||
with pytest.raises(KeyError, match="'Level four not found'"):
|
||||
index[:2].droplevel(["one", "four"])
|
||||
|
||||
|
||||
def test_drop_not_lexsorted():
|
||||
# GH 12078
|
||||
|
||||
# define the lexsorted version of the multi-index
|
||||
tuples = [("a", ""), ("b1", "c1"), ("b2", "c2")]
|
||||
lexsorted_mi = MultiIndex.from_tuples(tuples, names=["b", "c"])
|
||||
assert lexsorted_mi._is_lexsorted()
|
||||
|
||||
# and the not-lexsorted version
|
||||
df = pd.DataFrame(
|
||||
columns=["a", "b", "c", "d"], data=[[1, "b1", "c1", 3], [1, "b2", "c2", 4]]
|
||||
)
|
||||
df = df.pivot_table(index="a", columns=["b", "c"], values="d")
|
||||
df = df.reset_index()
|
||||
not_lexsorted_mi = df.columns
|
||||
assert not not_lexsorted_mi._is_lexsorted()
|
||||
|
||||
# compare the results
|
||||
tm.assert_index_equal(lexsorted_mi, not_lexsorted_mi)
|
||||
with tm.assert_produces_warning(PerformanceWarning):
|
||||
tm.assert_index_equal(lexsorted_mi.drop("a"), not_lexsorted_mi.drop("a"))
|
||||
|
||||
|
||||
def test_drop_with_nan_in_index(nulls_fixture):
|
||||
# GH#18853
|
||||
mi = MultiIndex.from_tuples([("blah", nulls_fixture)], names=["name", "date"])
|
||||
msg = r"labels \[Timestamp\('2001-01-01 00:00:00'\)\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop(pd.Timestamp("2001"), level="date")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
|
||||
def test_drop_with_non_monotonic_duplicates():
|
||||
# GH#33494
|
||||
mi = MultiIndex.from_tuples([(1, 2), (2, 3), (1, 2)])
|
||||
result = mi.drop((1, 2))
|
||||
expected = MultiIndex.from_tuples([(2, 3)])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_single_level_drop_partially_missing_elements():
|
||||
# GH 37820
|
||||
|
||||
mi = MultiIndex.from_tuples([(1, 2), (2, 2), (3, 2)])
|
||||
msg = r"labels \[4\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop(4, level=0)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([1, 4], level=0)
|
||||
msg = r"labels \[nan\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan], level=0)
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan, 1, 2, 3], level=0)
|
||||
|
||||
mi = MultiIndex.from_tuples([(np.nan, 1), (1, 2)])
|
||||
msg = r"labels \['a'\] not found in level"
|
||||
with pytest.raises(KeyError, match=msg):
|
||||
mi.drop([np.nan, 1, "a"], level=0)
|
||||
|
||||
|
||||
def test_droplevel_multiindex_one_level():
|
||||
# GH#37208
|
||||
index = MultiIndex.from_tuples([(2,)], names=("b",))
|
||||
result = index.droplevel([])
|
||||
expected = Index([2], name="b")
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,363 @@
|
||||
from itertools import product
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import (
|
||||
hashtable,
|
||||
index as libindex,
|
||||
)
|
||||
|
||||
from pandas import (
|
||||
NA,
|
||||
DatetimeIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def idx_dup():
|
||||
# compare tests/indexes/multi/conftest.py
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 1, 0, 1, 1])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
index_names = ["first", "second"]
|
||||
mi = MultiIndex(
|
||||
levels=[major_axis, minor_axis],
|
||||
codes=[major_codes, minor_codes],
|
||||
names=index_names,
|
||||
verify_integrity=False,
|
||||
)
|
||||
return mi
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [None, ["first", "second"]])
|
||||
def test_unique(names):
|
||||
mi = MultiIndex.from_arrays([[1, 2, 1, 2], [1, 1, 1, 2]], names=names)
|
||||
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([[1, 2, 2], [1, 1, 2]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("abab")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([list("aa"), list("ab")], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
mi = MultiIndex.from_arrays([list("aaaa"), list("aaaa")], names=names)
|
||||
res = mi.unique()
|
||||
exp = MultiIndex.from_arrays([["a"], ["a"]], names=mi.names)
|
||||
tm.assert_index_equal(res, exp)
|
||||
|
||||
# GH #20568 - empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=names)
|
||||
res = mi.unique()
|
||||
tm.assert_index_equal(mi, res)
|
||||
|
||||
|
||||
def test_unique_datetimelike():
|
||||
idx1 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-01", "2015-01-01", "NaT", "NaT"]
|
||||
)
|
||||
idx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-01", "2015-01-02", "2015-01-02", "NaT", "2015-01-01"],
|
||||
tz="Asia/Tokyo",
|
||||
)
|
||||
result = MultiIndex.from_arrays([idx1, idx2]).unique()
|
||||
|
||||
eidx1 = DatetimeIndex(["2015-01-01", "2015-01-01", "NaT", "NaT"])
|
||||
eidx2 = DatetimeIndex(
|
||||
["2015-01-01", "2015-01-02", "NaT", "2015-01-01"], tz="Asia/Tokyo"
|
||||
)
|
||||
exp = MultiIndex.from_arrays([eidx1, eidx2])
|
||||
tm.assert_index_equal(result, exp)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("level", [0, "first", 1, "second"])
|
||||
def test_unique_level(idx, level):
|
||||
# GH #17896 - with level= argument
|
||||
result = idx.unique(level=level)
|
||||
expected = idx.get_level_values(level).unique()
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With already unique level
|
||||
mi = MultiIndex.from_arrays([[1, 3, 2, 4], [1, 3, 2, 5]], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# With empty MI
|
||||
mi = MultiIndex.from_arrays([[], []], names=["first", "second"])
|
||||
result = mi.unique(level=level)
|
||||
expected = mi.get_level_values(level)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_duplicate_multiindex_codes():
|
||||
# GH 17464
|
||||
# Make sure that a MultiIndex with duplicate levels throws a ValueError
|
||||
msg = r"Level values must be unique: \[[A', ]+\] on level 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi = MultiIndex([["A"] * 10, range(10)], [[0] * 10, range(10)])
|
||||
|
||||
# And that using set_levels with duplicate levels fails
|
||||
mi = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
msg = r"Level values must be unique: \[[AB', ]+\] on level 0"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
mi.set_levels([["A", "B", "A", "A", "B"], [2, 1, 3, -2, 5]])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("names", [["a", "b", "a"], [1, 1, 2], [1, "a", 1]])
|
||||
def test_duplicate_level_names(names):
|
||||
# GH18872, GH19029
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3, names=names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename()
|
||||
mi = MultiIndex.from_product([[0, 1]] * 3)
|
||||
mi = mi.rename(names)
|
||||
assert mi.names == names
|
||||
|
||||
# With .rename(., level=)
|
||||
mi.rename(names[1], level=1, inplace=True)
|
||||
mi = mi.rename([names[0], names[2]], level=[0, 2])
|
||||
assert mi.names == names
|
||||
|
||||
|
||||
def test_duplicate_meta_data():
|
||||
# GH 10115
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
|
||||
for idx in [
|
||||
mi,
|
||||
mi.set_names([None, None]),
|
||||
mi.set_names([None, "Num"]),
|
||||
mi.set_names(["Upper", "Num"]),
|
||||
]:
|
||||
assert idx.has_duplicates
|
||||
assert idx.drop_duplicates().names == idx.names
|
||||
|
||||
|
||||
def test_has_duplicates(idx, idx_dup):
|
||||
# see fixtures
|
||||
assert idx.is_unique is True
|
||||
assert idx.has_duplicates is False
|
||||
assert idx_dup.is_unique is False
|
||||
assert idx_dup.has_duplicates is True
|
||||
|
||||
mi = MultiIndex(
|
||||
levels=[[0, 1], [0, 1, 2]], codes=[[0, 0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 0, 1, 2]]
|
||||
)
|
||||
assert mi.is_unique is False
|
||||
assert mi.has_duplicates is True
|
||||
|
||||
# single instance of NaN
|
||||
mi_nan = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan.is_unique is True
|
||||
assert mi_nan.has_duplicates is False
|
||||
|
||||
# multiple instances of NaN
|
||||
mi_nan_dup = MultiIndex(
|
||||
levels=[["a", "b"], [0, 1]], codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]
|
||||
)
|
||||
assert mi_nan_dup.is_unique is False
|
||||
assert mi_nan_dup.has_duplicates is True
|
||||
|
||||
|
||||
def test_has_duplicates_from_tuples():
|
||||
# GH 9075
|
||||
t = [
|
||||
("x", "out", "z", 5, "y", "in", "z", 169),
|
||||
("x", "out", "z", 7, "y", "in", "z", 119),
|
||||
("x", "out", "z", 9, "y", "in", "z", 135),
|
||||
("x", "out", "z", 13, "y", "in", "z", 145),
|
||||
("x", "out", "z", 14, "y", "in", "z", 158),
|
||||
("x", "out", "z", 16, "y", "in", "z", 122),
|
||||
("x", "out", "z", 17, "y", "in", "z", 160),
|
||||
("x", "out", "z", 18, "y", "in", "z", 180),
|
||||
("x", "out", "z", 20, "y", "in", "z", 143),
|
||||
("x", "out", "z", 21, "y", "in", "z", 128),
|
||||
("x", "out", "z", 22, "y", "in", "z", 129),
|
||||
("x", "out", "z", 25, "y", "in", "z", 111),
|
||||
("x", "out", "z", 28, "y", "in", "z", 114),
|
||||
("x", "out", "z", 29, "y", "in", "z", 121),
|
||||
("x", "out", "z", 31, "y", "in", "z", 126),
|
||||
("x", "out", "z", 32, "y", "in", "z", 155),
|
||||
("x", "out", "z", 33, "y", "in", "z", 123),
|
||||
("x", "out", "z", 12, "y", "in", "z", 144),
|
||||
]
|
||||
|
||||
mi = MultiIndex.from_tuples(t)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
|
||||
@pytest.mark.parametrize("nlevels", [4, 8])
|
||||
@pytest.mark.parametrize("with_nulls", [True, False])
|
||||
def test_has_duplicates_overflow(nlevels, with_nulls):
|
||||
# handle int64 overflow if possible
|
||||
# no overflow with 4
|
||||
# overflow possible with 8
|
||||
codes = np.tile(np.arange(500), 2)
|
||||
level = np.arange(500)
|
||||
|
||||
if with_nulls: # inject some null values
|
||||
codes[500] = -1 # common nan value
|
||||
codes = [codes.copy() for i in range(nlevels)]
|
||||
for i in range(nlevels):
|
||||
codes[i][500 + i - nlevels // 2] = -1
|
||||
|
||||
codes += [np.array([-1, 1]).repeat(500)]
|
||||
else:
|
||||
codes = [codes] * nlevels + [np.arange(2).repeat(500)]
|
||||
|
||||
levels = [level] * nlevels + [[0, 1]]
|
||||
|
||||
# no dups
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
# with a dup
|
||||
if with_nulls:
|
||||
|
||||
def f(a):
|
||||
return np.insert(a, 1000, a[0])
|
||||
|
||||
codes = list(map(f, codes))
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
else:
|
||||
values = mi.values.tolist()
|
||||
mi = MultiIndex.from_tuples(values + [values[0]])
|
||||
|
||||
assert mi.has_duplicates
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"keep, expected",
|
||||
[
|
||||
("first", np.array([False, False, False, True, True, False])),
|
||||
("last", np.array([False, True, True, False, False, False])),
|
||||
(False, np.array([False, True, True, True, True, False])),
|
||||
],
|
||||
)
|
||||
def test_duplicated(idx_dup, keep, expected):
|
||||
result = idx_dup.duplicated(keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.arm_slow
|
||||
def test_duplicated_hashtable_impl(keep, monkeypatch):
|
||||
# GH 9125
|
||||
n, k = 6, 10
|
||||
levels = [np.arange(n), [str(i) for i in range(n)], 1000 + np.arange(n)]
|
||||
codes = [np.random.default_rng(2).choice(n, k * n) for _ in levels]
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", 50)
|
||||
mi = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
result = mi.duplicated(keep=keep)
|
||||
expected = hashtable.duplicated(mi.values, keep=keep)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("val", [101, 102])
|
||||
def test_duplicated_with_nan(val):
|
||||
# GH5873
|
||||
mi = MultiIndex.from_arrays([[101, val], [3.5, np.nan]])
|
||||
assert not mi.has_duplicates
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(2, dtype="bool"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n", range(1, 6))
|
||||
@pytest.mark.parametrize("m", range(1, 5))
|
||||
def test_duplicated_with_nan_multi_shape(n, m):
|
||||
# GH5873
|
||||
# all possible unique combinations, including nan
|
||||
codes = product(range(-1, n), range(-1, m))
|
||||
mi = MultiIndex(
|
||||
levels=[list("abcde")[:n], list("WXYZ")[:m]],
|
||||
codes=np.random.default_rng(2).permutation(list(codes)).T,
|
||||
)
|
||||
assert len(mi) == (n + 1) * (m + 1)
|
||||
assert not mi.has_duplicates
|
||||
|
||||
tm.assert_numpy_array_equal(mi.duplicated(), np.zeros(len(mi), dtype="bool"))
|
||||
|
||||
|
||||
def test_duplicated_drop_duplicates():
|
||||
# GH#4060
|
||||
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))
|
||||
|
||||
expected = np.array([False, False, False, True, False, False], dtype=bool)
|
||||
duplicated = idx.duplicated()
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(), expected)
|
||||
|
||||
expected = np.array([True, False, False, False, False, False])
|
||||
duplicated = idx.duplicated(keep="last")
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)
|
||||
|
||||
expected = np.array([True, False, False, True, False, False])
|
||||
duplicated = idx.duplicated(keep=False)
|
||||
tm.assert_numpy_array_equal(duplicated, expected)
|
||||
assert duplicated.dtype == bool
|
||||
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
|
||||
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"dtype",
|
||||
[
|
||||
np.complex64,
|
||||
np.complex128,
|
||||
],
|
||||
)
|
||||
def test_duplicated_series_complex_numbers(dtype):
|
||||
# GH 17927
|
||||
expected = Series(
|
||||
[False, False, False, True, False, False, False, True, False, True],
|
||||
dtype=bool,
|
||||
)
|
||||
result = Series(
|
||||
[
|
||||
np.nan + np.nan * 1j,
|
||||
0,
|
||||
1j,
|
||||
1j,
|
||||
1,
|
||||
1 + 1j,
|
||||
1 + 2j,
|
||||
1 + 1j,
|
||||
np.nan,
|
||||
np.nan + np.nan * 1j,
|
||||
],
|
||||
dtype=dtype,
|
||||
).duplicated()
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_midx_unique_ea_dtype():
|
||||
# GH#48335
|
||||
vals_a = Series([1, 2, NA, NA], dtype="Int64")
|
||||
vals_b = np.array([1, 2, 3, 3])
|
||||
midx = MultiIndex.from_arrays([vals_a, vals_b], names=["a", "b"])
|
||||
result = midx.unique()
|
||||
|
||||
exp_vals_a = Series([1, 2, NA], dtype="Int64")
|
||||
exp_vals_b = np.array([1, 2, 3])
|
||||
expected = MultiIndex.from_arrays([exp_vals_a, exp_vals_b], names=["a", "b"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
@ -0,0 +1,284 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.core.dtypes.common import is_any_real_numeric_dtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
Series,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_equals(idx):
|
||||
assert idx.equals(idx)
|
||||
assert idx.equals(idx.copy())
|
||||
assert idx.equals(idx.astype(object))
|
||||
assert idx.equals(idx.to_flat_index())
|
||||
assert idx.equals(idx.to_flat_index().astype("category"))
|
||||
|
||||
assert not idx.equals(list(idx))
|
||||
assert not idx.equals(np.array(idx))
|
||||
|
||||
same_values = Index(idx, dtype=object)
|
||||
assert idx.equals(same_values)
|
||||
assert same_values.equals(idx)
|
||||
|
||||
if idx.nlevels == 1:
|
||||
# do not test MultiIndex
|
||||
assert not idx.equals(Series(idx))
|
||||
|
||||
|
||||
def test_equals_op(idx):
|
||||
# GH9947, GH10637
|
||||
index_a = idx
|
||||
|
||||
n = len(index_a)
|
||||
index_b = index_a[0:-1]
|
||||
index_c = index_a[0:-1].append(index_a[-2:-1])
|
||||
index_d = index_a[0:1]
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_b
|
||||
expected1 = np.array([True] * n)
|
||||
expected2 = np.array([True] * (n - 1) + [False])
|
||||
tm.assert_numpy_array_equal(index_a == index_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == index_c, expected2)
|
||||
|
||||
# test comparisons with numpy arrays
|
||||
array_a = np.array(index_a)
|
||||
array_b = np.array(index_a[0:-1])
|
||||
array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
|
||||
array_d = np.array(index_a[0:1])
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_b
|
||||
tm.assert_numpy_array_equal(index_a == array_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == array_c, expected2)
|
||||
|
||||
# test comparisons with Series
|
||||
series_a = Series(array_a)
|
||||
series_b = Series(array_b)
|
||||
series_c = Series(array_c)
|
||||
series_d = Series(array_d)
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_b
|
||||
|
||||
tm.assert_numpy_array_equal(index_a == series_a, expected1)
|
||||
tm.assert_numpy_array_equal(index_a == series_c, expected2)
|
||||
|
||||
# cases where length is 1 for one of them
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == index_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
index_a == array_d
|
||||
msg = "Can only compare identically-labeled Series objects"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
series_a == series_d
|
||||
with pytest.raises(ValueError, match="Lengths must match"):
|
||||
series_a == array_d
|
||||
|
||||
# comparing with a scalar should broadcast; note that we are excluding
|
||||
# MultiIndex because in this case each item in the index is a tuple of
|
||||
# length 2, and therefore is considered an array of length 2 in the
|
||||
# comparison instead of a scalar
|
||||
if not isinstance(index_a, MultiIndex):
|
||||
expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
|
||||
# assuming the 2nd to last item is unique in the data
|
||||
item = index_a[-2]
|
||||
tm.assert_numpy_array_equal(index_a == item, expected3)
|
||||
tm.assert_series_equal(series_a == item, Series(expected3))
|
||||
|
||||
|
||||
def test_compare_tuple():
|
||||
# GH#21517
|
||||
mi = MultiIndex.from_product([[1, 2]] * 2)
|
||||
|
||||
all_false = np.array([False, False, False, False])
|
||||
|
||||
result = mi == mi[0]
|
||||
expected = np.array([True, False, False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi != mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
result = mi < mi[0]
|
||||
tm.assert_numpy_array_equal(result, all_false)
|
||||
|
||||
result = mi <= mi[0]
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi > mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~expected)
|
||||
|
||||
result = mi >= mi[0]
|
||||
tm.assert_numpy_array_equal(result, ~all_false)
|
||||
|
||||
|
||||
def test_compare_tuple_strs():
|
||||
# GH#34180
|
||||
|
||||
mi = MultiIndex.from_tuples([("a", "b"), ("b", "c"), ("c", "a")])
|
||||
|
||||
result = mi == ("c", "a")
|
||||
expected = np.array([False, False, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
result = mi == ("c",)
|
||||
expected = np.array([False, False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals_multi(idx):
|
||||
assert idx.equals(idx)
|
||||
assert not idx.equals(idx.values)
|
||||
assert idx.equals(Index(idx.values))
|
||||
|
||||
assert idx.equal_levels(idx)
|
||||
assert not idx.equals(idx[:-1])
|
||||
assert not idx.equals(idx[-1])
|
||||
|
||||
# different number of levels
|
||||
index = MultiIndex(
|
||||
levels=[Index(list(range(4))), Index(list(range(4))), Index(list(range(4)))],
|
||||
codes=[
|
||||
np.array([0, 0, 1, 2, 2, 2, 3, 3]),
|
||||
np.array([0, 1, 0, 0, 0, 1, 0, 1]),
|
||||
np.array([1, 0, 1, 1, 0, 0, 1, 0]),
|
||||
],
|
||||
)
|
||||
|
||||
index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
|
||||
assert not index.equals(index2)
|
||||
assert not index.equal_levels(index2)
|
||||
|
||||
# levels are different
|
||||
major_axis = Index(list(range(4)))
|
||||
minor_axis = Index(list(range(2)))
|
||||
|
||||
major_codes = np.array([0, 0, 1, 2, 2, 3])
|
||||
minor_codes = np.array([0, 1, 0, 0, 1, 0])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
assert not idx.equal_levels(index)
|
||||
|
||||
# some of the labels are different
|
||||
major_axis = Index(["foo", "bar", "baz", "qux"])
|
||||
minor_axis = Index(["one", "two"])
|
||||
|
||||
major_codes = np.array([0, 0, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 0, 1])
|
||||
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
assert not idx.equals(index)
|
||||
|
||||
|
||||
def test_identical(idx):
|
||||
mi = idx.copy()
|
||||
mi2 = idx.copy()
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi = mi.set_names(["new1", "new2"])
|
||||
assert mi.equals(mi2)
|
||||
assert not mi.identical(mi2)
|
||||
|
||||
mi2 = mi2.set_names(["new1", "new2"])
|
||||
assert mi.identical(mi2)
|
||||
|
||||
mi4 = Index(mi.tolist(), tupleize_cols=False)
|
||||
assert not mi.identical(mi4)
|
||||
assert mi.equals(mi4)
|
||||
|
||||
|
||||
def test_equals_operator(idx):
|
||||
# GH9785
|
||||
assert (idx == idx).all()
|
||||
|
||||
|
||||
def test_equals_missing_values():
|
||||
# make sure take is not using -1
|
||||
i = MultiIndex.from_tuples([(0, pd.NaT), (0, pd.Timestamp("20130101"))])
|
||||
result = i[0:1].equals(i[0])
|
||||
assert not result
|
||||
result = i[1:2].equals(i[1])
|
||||
assert not result
|
||||
|
||||
|
||||
def test_equals_missing_values_differently_sorted():
|
||||
# GH#38439
|
||||
mi1 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
||||
mi2 = MultiIndex.from_tuples([(np.nan, np.nan), (81.0, np.nan)])
|
||||
assert not mi1.equals(mi2)
|
||||
|
||||
mi2 = MultiIndex.from_tuples([(81.0, np.nan), (np.nan, np.nan)])
|
||||
assert mi1.equals(mi2)
|
||||
|
||||
|
||||
def test_is_():
|
||||
mi = MultiIndex.from_tuples(zip(range(10), range(10)))
|
||||
assert mi.is_(mi)
|
||||
assert mi.is_(mi.view())
|
||||
assert mi.is_(mi.view().view().view().view())
|
||||
mi2 = mi.view()
|
||||
# names are metadata, they don't change id
|
||||
mi2.names = ["A", "B"]
|
||||
assert mi2.is_(mi)
|
||||
assert mi.is_(mi2)
|
||||
|
||||
assert not mi.is_(mi.set_names(["C", "D"]))
|
||||
# levels are inherent properties, they change identity
|
||||
mi3 = mi2.set_levels([list(range(10)), list(range(10))])
|
||||
assert not mi3.is_(mi2)
|
||||
# shouldn't change
|
||||
assert mi2.is_(mi)
|
||||
mi4 = mi3.view()
|
||||
|
||||
# GH 17464 - Remove duplicate MultiIndex levels
|
||||
mi4 = mi4.set_levels([list(range(10)), list(range(10))])
|
||||
assert not mi4.is_(mi3)
|
||||
mi5 = mi.view()
|
||||
mi5 = mi5.set_levels(mi5.levels)
|
||||
assert not mi5.is_(mi)
|
||||
|
||||
|
||||
def test_is_all_dates(idx):
|
||||
assert not idx._is_all_dates
|
||||
|
||||
|
||||
def test_is_numeric(idx):
|
||||
# MultiIndex is never numeric
|
||||
assert not is_any_real_numeric_dtype(idx)
|
||||
|
||||
|
||||
def test_multiindex_compare():
|
||||
# GH 21149
|
||||
# Ensure comparison operations for MultiIndex with nlevels == 1
|
||||
# behave consistently with those for MultiIndex with nlevels > 1
|
||||
|
||||
midx = MultiIndex.from_product([[0, 1]])
|
||||
|
||||
# Equality self-test: MultiIndex object vs self
|
||||
expected = Series([True, True])
|
||||
result = Series(midx == midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
# Greater than comparison: MultiIndex object vs self
|
||||
expected = Series([False, False])
|
||||
result = Series(midx > midx)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_equals_ea_int_regular_int():
|
||||
# GH#46026
|
||||
mi1 = MultiIndex.from_arrays([Index([1, 2], dtype="Int64"), [3, 4]])
|
||||
mi2 = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
assert not mi1.equals(mi2)
|
||||
assert not mi2.equals(mi1)
|
||||
@ -0,0 +1,249 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_format(idx):
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
idx.format()
|
||||
idx[:0].format()
|
||||
|
||||
|
||||
def test_format_integer_names():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1]], codes=[[0, 0, 1, 1], [0, 1, 0, 1]], names=[0, 1]
|
||||
)
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
index.format(names=True)
|
||||
|
||||
|
||||
def test_format_sparse_config(idx):
|
||||
# GH1538
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with pd.option_context("display.multi_sparse", False):
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = idx.format()
|
||||
assert result[1] == "foo two"
|
||||
|
||||
|
||||
def test_format_sparse_display():
|
||||
index = MultiIndex(
|
||||
levels=[[0, 1], [0, 1], [0, 1], [0]],
|
||||
codes=[
|
||||
[0, 0, 0, 1, 1, 1],
|
||||
[0, 0, 1, 0, 0, 1],
|
||||
[0, 1, 0, 0, 1, 0],
|
||||
[0, 0, 0, 0, 0, 0],
|
||||
],
|
||||
)
|
||||
msg = "MultiIndex.format is deprecated"
|
||||
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||||
result = index.format()
|
||||
assert result[3] == "1 0 0 0"
|
||||
|
||||
|
||||
def test_repr_with_unicode_data():
|
||||
with pd.option_context("display.encoding", "UTF-8"):
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
index = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
assert "\\" not in repr(index) # we don't want unicode-escaped
|
||||
|
||||
|
||||
def test_repr_roundtrip_raises():
|
||||
mi = MultiIndex.from_product([list("ab"), range(3)], names=["first", "second"])
|
||||
msg = "Must pass both levels and codes"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
eval(repr(mi))
|
||||
|
||||
|
||||
def test_unicode_string_with_unicode():
|
||||
d = {"a": ["\u05d0", 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
|
||||
idx = pd.DataFrame(d).set_index(["a", "b"]).index
|
||||
str(idx)
|
||||
|
||||
|
||||
def test_repr_max_seq_item_setting(idx):
|
||||
# GH10182
|
||||
idx = idx.repeat(50)
|
||||
with pd.option_context("display.max_seq_items", None):
|
||||
repr(idx)
|
||||
assert "..." not in str(idx)
|
||||
|
||||
|
||||
class TestRepr:
|
||||
def test_unicode_repr_issues(self):
|
||||
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
|
||||
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
repr(index.levels)
|
||||
repr(index.get_level_values(1))
|
||||
|
||||
def test_repr_max_seq_items_equal_to_n(self, idx):
|
||||
# display.max_seq_items == n
|
||||
with pd.option_context("display.max_seq_items", 6):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
def test_repr(self, idx):
|
||||
result = idx[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
('bar', 'one'),
|
||||
('baz', 'two'),
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'])"""
|
||||
assert result == expected
|
||||
|
||||
with pd.option_context("display.max_seq_items", 5):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('foo', 'one'),
|
||||
('foo', 'two'),
|
||||
...
|
||||
('qux', 'one'),
|
||||
('qux', 'two')],
|
||||
names=['first', 'second'], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
# display.max_seq_items == 1
|
||||
with pd.option_context("display.max_seq_items", 1):
|
||||
result = idx.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([...
|
||||
('qux', 'two')],
|
||||
names=['first', ...], length=6)"""
|
||||
assert result == expected
|
||||
|
||||
def test_rjust(self):
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
mi = MultiIndex.from_arrays([ci, ci.codes + 9, dti], names=["a", "b", "dti"])
|
||||
result = mi[:1].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi[::500].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:08:20'),
|
||||
('abc', 10, '2000-01-01 00:16:40'),
|
||||
('abc', 10, '2000-01-01 00:25:00')],
|
||||
names=['a', 'b', 'dti'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00'),
|
||||
( 'a', 9, '2000-01-01 00:00:01'),
|
||||
( 'a', 9, '2000-01-01 00:00:02'),
|
||||
( 'a', 9, '2000-01-01 00:00:03'),
|
||||
( 'a', 9, '2000-01-01 00:00:04'),
|
||||
( 'a', 9, '2000-01-01 00:00:05'),
|
||||
( 'a', 9, '2000-01-01 00:00:06'),
|
||||
( 'a', 9, '2000-01-01 00:00:07'),
|
||||
( 'a', 9, '2000-01-01 00:00:08'),
|
||||
( 'a', 9, '2000-01-01 00:00:09'),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10'),
|
||||
('abc', 10, '2000-01-01 00:33:11'),
|
||||
('abc', 10, '2000-01-01 00:33:12'),
|
||||
('abc', 10, '2000-01-01 00:33:13'),
|
||||
('abc', 10, '2000-01-01 00:33:14'),
|
||||
('abc', 10, '2000-01-01 00:33:15'),
|
||||
('abc', 10, '2000-01-01 00:33:16'),
|
||||
('abc', 10, '2000-01-01 00:33:17'),
|
||||
('abc', 10, '2000-01-01 00:33:18'),
|
||||
('abc', 10, '2000-01-01 00:33:19')],
|
||||
names=['a', 'b', 'dti'], length=2000)"""
|
||||
assert result == expected
|
||||
|
||||
def test_tuple_width(self):
|
||||
n = 1000
|
||||
ci = pd.CategoricalIndex(list("a" * n) + (["abc"] * n))
|
||||
dti = pd.date_range("2000-01-01", freq="s", periods=n * 2)
|
||||
levels = [ci, ci.codes + 9, dti, dti, dti]
|
||||
names = ["a", "b", "dti_1", "dti_2", "dti_3"]
|
||||
mi = MultiIndex.from_arrays(levels, names=names)
|
||||
result = mi[:1].__repr__()
|
||||
expected = """MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])""" # noqa: E501
|
||||
assert result == expected
|
||||
|
||||
result = mi[:10].__repr__()
|
||||
expected = """\
|
||||
MultiIndex([('a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
('a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
('a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
('a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
('a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
('a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
('a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
('a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
('a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
('a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'])"""
|
||||
assert result == expected
|
||||
|
||||
result = mi.__repr__()
|
||||
expected = """\
|
||||
MultiIndex([( 'a', 9, '2000-01-01 00:00:00', '2000-01-01 00:00:00', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:01', '2000-01-01 00:00:01', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:02', '2000-01-01 00:00:02', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:03', '2000-01-01 00:00:03', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:04', '2000-01-01 00:00:04', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:05', '2000-01-01 00:00:05', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:06', '2000-01-01 00:00:06', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:07', '2000-01-01 00:00:07', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:08', '2000-01-01 00:00:08', ...),
|
||||
( 'a', 9, '2000-01-01 00:00:09', '2000-01-01 00:00:09', ...),
|
||||
...
|
||||
('abc', 10, '2000-01-01 00:33:10', '2000-01-01 00:33:10', ...),
|
||||
('abc', 10, '2000-01-01 00:33:11', '2000-01-01 00:33:11', ...),
|
||||
('abc', 10, '2000-01-01 00:33:12', '2000-01-01 00:33:12', ...),
|
||||
('abc', 10, '2000-01-01 00:33:13', '2000-01-01 00:33:13', ...),
|
||||
('abc', 10, '2000-01-01 00:33:14', '2000-01-01 00:33:14', ...),
|
||||
('abc', 10, '2000-01-01 00:33:15', '2000-01-01 00:33:15', ...),
|
||||
('abc', 10, '2000-01-01 00:33:16', '2000-01-01 00:33:16', ...),
|
||||
('abc', 10, '2000-01-01 00:33:17', '2000-01-01 00:33:17', ...),
|
||||
('abc', 10, '2000-01-01 00:33:18', '2000-01-01 00:33:18', ...),
|
||||
('abc', 10, '2000-01-01 00:33:19', '2000-01-01 00:33:19', ...)],
|
||||
names=['a', 'b', 'dti_1', 'dti_2', 'dti_3'], length=2000)"""
|
||||
assert result == expected
|
||||
|
||||
def test_multiindex_long_element(self):
|
||||
# Non-regression test towards GH#52960
|
||||
data = MultiIndex.from_tuples([("c" * 62,)])
|
||||
|
||||
expected = (
|
||||
"MultiIndex([('cccccccccccccccccccccccccccccccccccccccc"
|
||||
"cccccccccccccccccccccc',)],\n )"
|
||||
)
|
||||
assert str(data) == expected
|
||||
@ -0,0 +1,124 @@
|
||||
import numpy as np
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
Index,
|
||||
MultiIndex,
|
||||
Timestamp,
|
||||
date_range,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
class TestGetLevelValues:
|
||||
def test_get_level_values_box_datetime64(self):
|
||||
dates = date_range("1/1/2000", periods=4)
|
||||
levels = [dates, [0, 1]]
|
||||
codes = [[0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 0, 1, 0, 1]]
|
||||
|
||||
index = MultiIndex(levels=levels, codes=codes)
|
||||
|
||||
assert isinstance(index.get_level_values(0)[0], Timestamp)
|
||||
|
||||
|
||||
def test_get_level_values(idx):
|
||||
result = idx.get_level_values(0)
|
||||
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
|
||||
tm.assert_index_equal(result, expected)
|
||||
assert result.name == "first"
|
||||
|
||||
result = idx.get_level_values("first")
|
||||
expected = idx.get_level_values(0)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
# GH 10460
|
||||
index = MultiIndex(
|
||||
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
|
||||
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
|
||||
)
|
||||
|
||||
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
|
||||
tm.assert_index_equal(index.get_level_values(0), exp)
|
||||
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
|
||||
tm.assert_index_equal(index.get_level_values(1), exp)
|
||||
|
||||
|
||||
def test_get_level_values_all_na():
|
||||
# GH#17924 when level entirely consists of nan
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([np.nan, np.nan, np.nan], dtype=np.float64)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = Index(["a", np.nan, 1], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_int_with_na():
|
||||
# GH#17924
|
||||
arrays = [["a", "b", "b"], [1, np.nan, 2]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([1, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = Index([np.nan, np.nan, 2])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_na():
|
||||
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([np.nan, np.nan, np.nan])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = index.get_level_values(1)
|
||||
expected = Index(["a", np.nan, 1])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(1)
|
||||
expected = pd.DatetimeIndex([0, 1, pd.NaT])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
arrays = [[], []]
|
||||
index = MultiIndex.from_arrays(arrays)
|
||||
result = index.get_level_values(0)
|
||||
expected = Index([], dtype=object)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_values_when_periods():
|
||||
# GH33131. See also discussion in GH32669.
|
||||
# This test can probably be removed when PeriodIndex._engine is removed.
|
||||
from pandas import (
|
||||
Period,
|
||||
PeriodIndex,
|
||||
)
|
||||
|
||||
idx = MultiIndex.from_arrays(
|
||||
[PeriodIndex([Period("2019Q1"), Period("2019Q2")], name="b")]
|
||||
)
|
||||
idx2 = MultiIndex.from_arrays(
|
||||
[idx._get_level_values(level) for level in range(idx.nlevels)]
|
||||
)
|
||||
assert all(x.is_monotonic_increasing for x in idx2.levels)
|
||||
|
||||
|
||||
def test_values_loses_freq_of_underlying_index():
|
||||
# GH#49054
|
||||
idx = pd.DatetimeIndex(date_range("20200101", periods=3, freq="BME"))
|
||||
expected = idx.copy(deep=True)
|
||||
idx2 = Index([1, 2, 3])
|
||||
midx = MultiIndex(levels=[idx, idx2], codes=[[0, 1, 2], [0, 1, 2]])
|
||||
midx.values
|
||||
assert idx.freq is not None
|
||||
tm.assert_index_equal(idx, expected)
|
||||
@ -0,0 +1,384 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas.compat import PY311
|
||||
|
||||
from pandas.core.dtypes.dtypes import DatetimeTZDtype
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
CategoricalIndex,
|
||||
MultiIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def assert_matching(actual, expected, check_dtype=False):
|
||||
# avoid specifying internal representation
|
||||
# as much as possible
|
||||
assert len(actual) == len(expected)
|
||||
for act, exp in zip(actual, expected):
|
||||
act = np.asarray(act)
|
||||
exp = np.asarray(exp)
|
||||
tm.assert_numpy_array_equal(act, exp, check_dtype=check_dtype)
|
||||
|
||||
|
||||
def test_get_level_number_integer(idx):
|
||||
idx.names = [1, 0]
|
||||
assert idx._get_level_number(1) == 0
|
||||
assert idx._get_level_number(0) == 1
|
||||
msg = "Too many levels: Index has only 2 levels, not 3"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx._get_level_number(2)
|
||||
with pytest.raises(KeyError, match="Level fourth not found"):
|
||||
idx._get_level_number("fourth")
|
||||
|
||||
|
||||
def test_get_dtypes(using_infer_string):
|
||||
# Test MultiIndex.dtypes (# Gh37062)
|
||||
idx_multitype = MultiIndex.from_product(
|
||||
[[1, 2, 3], ["a", "b", "c"], pd.date_range("20200101", periods=2, tz="UTC")],
|
||||
names=["int", "string", "dt"],
|
||||
)
|
||||
|
||||
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
||||
expected = pd.Series(
|
||||
{
|
||||
"int": np.dtype("int64"),
|
||||
"string": exp,
|
||||
"dt": DatetimeTZDtype(tz="utc"),
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(expected, idx_multitype.dtypes)
|
||||
|
||||
|
||||
def test_get_dtypes_no_level_name(using_infer_string):
|
||||
# Test MultiIndex.dtypes (# GH38580 )
|
||||
idx_multitype = MultiIndex.from_product(
|
||||
[
|
||||
[1, 2, 3],
|
||||
["a", "b", "c"],
|
||||
pd.date_range("20200101", periods=2, tz="UTC"),
|
||||
],
|
||||
)
|
||||
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
||||
expected = pd.Series(
|
||||
{
|
||||
"level_0": np.dtype("int64"),
|
||||
"level_1": exp,
|
||||
"level_2": DatetimeTZDtype(tz="utc"),
|
||||
}
|
||||
)
|
||||
tm.assert_series_equal(expected, idx_multitype.dtypes)
|
||||
|
||||
|
||||
def test_get_dtypes_duplicate_level_names(using_infer_string):
|
||||
# Test MultiIndex.dtypes with non-unique level names (# GH45174)
|
||||
result = MultiIndex.from_product(
|
||||
[
|
||||
[1, 2, 3],
|
||||
["a", "b", "c"],
|
||||
pd.date_range("20200101", periods=2, tz="UTC"),
|
||||
],
|
||||
names=["A", "A", "A"],
|
||||
).dtypes
|
||||
exp = "object" if not using_infer_string else pd.StringDtype(na_value=np.nan)
|
||||
expected = pd.Series(
|
||||
[np.dtype("int64"), exp, DatetimeTZDtype(tz="utc")],
|
||||
index=["A", "A", "A"],
|
||||
)
|
||||
tm.assert_series_equal(result, expected)
|
||||
|
||||
|
||||
def test_get_level_number_out_of_bounds(multiindex_dataframe_random_data):
|
||||
frame = multiindex_dataframe_random_data
|
||||
|
||||
with pytest.raises(IndexError, match="Too many levels"):
|
||||
frame.index._get_level_number(2)
|
||||
with pytest.raises(IndexError, match="not a valid level number"):
|
||||
frame.index._get_level_number(-3)
|
||||
|
||||
|
||||
def test_set_name_methods(idx):
|
||||
# so long as these are synonyms, we don't need to test set_names
|
||||
index_names = ["first", "second"]
|
||||
assert idx.rename == idx.set_names
|
||||
new_names = [name + "SUFFIX" for name in index_names]
|
||||
ind = idx.set_names(new_names)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
msg = "Length of names must match number of levels in MultiIndex"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ind.set_names(new_names + new_names)
|
||||
new_names2 = [name + "SUFFIX2" for name in new_names]
|
||||
res = ind.set_names(new_names2, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
# set names for specific level (# GH7792)
|
||||
ind = idx.set_names(new_names[0], level=0)
|
||||
assert idx.names == index_names
|
||||
assert ind.names == [new_names[0], index_names[1]]
|
||||
|
||||
res = ind.set_names(new_names2[0], level=0, inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == [new_names2[0], index_names[1]]
|
||||
|
||||
# set names for multiple levels
|
||||
ind = idx.set_names(new_names, level=[0, 1])
|
||||
assert idx.names == index_names
|
||||
assert ind.names == new_names
|
||||
|
||||
res = ind.set_names(new_names2, level=[0, 1], inplace=True)
|
||||
assert res is None
|
||||
assert ind.names == new_names2
|
||||
|
||||
|
||||
def test_set_levels_codes_directly(idx):
|
||||
# setting levels/codes directly raises AttributeError
|
||||
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
msg = "Can't set attribute"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.levels = new_levels
|
||||
|
||||
msg = (
|
||||
"property 'codes' of 'MultiIndex' object has no setter"
|
||||
if PY311
|
||||
else "can't set attribute"
|
||||
)
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
idx.codes = new_codes
|
||||
|
||||
|
||||
def test_set_levels(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
levels = idx.levels
|
||||
new_levels = [[lev + "a" for lev in level] for level in levels]
|
||||
|
||||
# level changing [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels)
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing specific level [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels[0], level=0)
|
||||
assert_matching(ind2.levels, [new_levels[0], levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
ind2 = idx.set_levels(new_levels[1], level=1)
|
||||
assert_matching(ind2.levels, [levels[0], new_levels[1]])
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# level changing multiple levels [w/o mutation]
|
||||
ind2 = idx.set_levels(new_levels, level=[0, 1])
|
||||
assert_matching(ind2.levels, new_levels)
|
||||
assert_matching(idx.levels, levels)
|
||||
|
||||
# illegal level changing should not change levels
|
||||
# GH 13754
|
||||
original_index = idx.copy()
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_levels(["c"], level=0)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(ValueError, match="^On"):
|
||||
idx.set_codes([0, 1, 2, 3, 4, 5], level=0)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Levels"):
|
||||
idx.set_levels("c", level=0)
|
||||
assert_matching(idx.levels, original_index.levels, check_dtype=True)
|
||||
|
||||
with pytest.raises(TypeError, match="^Codes"):
|
||||
idx.set_codes(1, level=0)
|
||||
assert_matching(idx.codes, original_index.codes, check_dtype=True)
|
||||
|
||||
|
||||
def test_set_codes(idx):
|
||||
# side note - you probably wouldn't want to use levels and codes
|
||||
# directly like this - but it is possible.
|
||||
codes = idx.codes
|
||||
major_codes, minor_codes = codes
|
||||
major_codes = [(x + 1) % 3 for x in major_codes]
|
||||
minor_codes = [(x + 1) % 1 for x in minor_codes]
|
||||
new_codes = [major_codes, minor_codes]
|
||||
|
||||
# changing codes w/o mutation
|
||||
ind2 = idx.set_codes(new_codes)
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing specific level w/o mutation
|
||||
ind2 = idx.set_codes(new_codes[0], level=0)
|
||||
assert_matching(ind2.codes, [new_codes[0], codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
ind2 = idx.set_codes(new_codes[1], level=1)
|
||||
assert_matching(ind2.codes, [codes[0], new_codes[1]])
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# codes changing multiple levels w/o mutation
|
||||
ind2 = idx.set_codes(new_codes, level=[0, 1])
|
||||
assert_matching(ind2.codes, new_codes)
|
||||
assert_matching(idx.codes, codes)
|
||||
|
||||
# label changing for levels of different magnitude of categories
|
||||
ind = MultiIndex.from_tuples([(0, i) for i in range(130)])
|
||||
new_codes = range(129, -1, -1)
|
||||
expected = MultiIndex.from_tuples([(0, i) for i in new_codes])
|
||||
|
||||
# [w/o mutation]
|
||||
result = ind.set_codes(codes=new_codes, level=1)
|
||||
assert result.equals(expected)
|
||||
|
||||
|
||||
def test_set_levels_codes_names_bad_input(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
names = idx.names
|
||||
|
||||
with pytest.raises(ValueError, match="Length of levels"):
|
||||
idx.set_levels([levels[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of codes"):
|
||||
idx.set_codes([codes[0]])
|
||||
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names([names[0]])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0])
|
||||
|
||||
# shouldn't scalar data error, instead should demand list-like
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_names(names[0])
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_levels(levels[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_levels(levels, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(TypeError, match="list of lists-like"):
|
||||
idx.set_codes(codes[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="list-like"):
|
||||
idx.set_codes(codes, level=0)
|
||||
|
||||
# should have equal lengths
|
||||
with pytest.raises(ValueError, match="Length of names"):
|
||||
idx.set_names(names[0], level=[0, 1])
|
||||
|
||||
with pytest.raises(TypeError, match="Names must be a"):
|
||||
idx.set_names(names, level=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("inplace", [True, False])
|
||||
def test_set_names_with_nlevel_1(inplace):
|
||||
# GH 21149
|
||||
# Ensure that .set_names for MultiIndex with
|
||||
# nlevels == 1 does not raise any errors
|
||||
expected = MultiIndex(levels=[[0, 1]], codes=[[0, 1]], names=["first"])
|
||||
m = MultiIndex.from_product([[0, 1]])
|
||||
result = m.set_names("first", level=0, inplace=inplace)
|
||||
|
||||
if inplace:
|
||||
result = m
|
||||
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ordered", [True, False])
|
||||
def test_set_levels_categorical(ordered):
|
||||
# GH13854
|
||||
index = MultiIndex.from_arrays([list("xyzx"), [0, 1, 2, 3]])
|
||||
|
||||
cidx = CategoricalIndex(list("bac"), ordered=ordered)
|
||||
result = index.set_levels(cidx, level=0)
|
||||
expected = MultiIndex(levels=[cidx, [0, 1, 2, 3]], codes=index.codes)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result_lvl = result.get_level_values(0)
|
||||
expected_lvl = CategoricalIndex(
|
||||
list("bacb"), categories=cidx.categories, ordered=cidx.ordered
|
||||
)
|
||||
tm.assert_index_equal(result_lvl, expected_lvl)
|
||||
|
||||
|
||||
def test_set_value_keeps_names():
|
||||
# motivating example from #3742
|
||||
lev1 = ["hans", "hans", "hans", "grethe", "grethe", "grethe"]
|
||||
lev2 = ["1", "2", "3"] * 2
|
||||
idx = MultiIndex.from_arrays([lev1, lev2], names=["Name", "Number"])
|
||||
df = pd.DataFrame(
|
||||
np.random.default_rng(2).standard_normal((6, 4)),
|
||||
columns=["one", "two", "three", "four"],
|
||||
index=idx,
|
||||
)
|
||||
df = df.sort_index()
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
df.at[("grethe", "4"), "one"] = 99.34
|
||||
assert df._is_copy is None
|
||||
assert df.index.names == ("Name", "Number")
|
||||
|
||||
|
||||
def test_set_levels_with_iterable():
|
||||
# GH23273
|
||||
sizes = [1, 2, 3]
|
||||
colors = ["black"] * 3
|
||||
index = MultiIndex.from_arrays([sizes, colors], names=["size", "color"])
|
||||
|
||||
result = index.set_levels(map(int, ["3", "2", "1"]), level="size")
|
||||
|
||||
expected_sizes = [3, 2, 1]
|
||||
expected = MultiIndex.from_arrays([expected_sizes, colors], names=["size", "color"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_empty_level():
|
||||
# GH#48636
|
||||
midx = MultiIndex.from_arrays([[]], names=["A"])
|
||||
result = midx.set_levels(pd.DatetimeIndex([]), level=0)
|
||||
expected = MultiIndex.from_arrays([pd.DatetimeIndex([])], names=["A"])
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_set_levels_pos_args_removal():
|
||||
# https://github.com/pandas-dev/pandas/issues/41485
|
||||
idx = MultiIndex.from_tuples(
|
||||
[
|
||||
(1, "one"),
|
||||
(3, "one"),
|
||||
],
|
||||
names=["foo", "bar"],
|
||||
)
|
||||
with pytest.raises(TypeError, match="positional arguments"):
|
||||
idx.set_levels(["a", "b", "c"], 0)
|
||||
|
||||
with pytest.raises(TypeError, match="positional arguments"):
|
||||
idx.set_codes([[0, 1], [1, 0]], 0)
|
||||
|
||||
|
||||
def test_set_levels_categorical_keep_dtype():
|
||||
# GH#52125
|
||||
midx = MultiIndex.from_arrays([[5, 6]])
|
||||
result = midx.set_levels(levels=pd.Categorical([1, 2]), level=0)
|
||||
expected = MultiIndex.from_arrays([pd.Categorical([1, 2])])
|
||||
tm.assert_index_equal(result, expected)
|
||||
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,289 @@
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas._libs import index as libindex
|
||||
|
||||
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
|
||||
|
||||
import pandas as pd
|
||||
from pandas import (
|
||||
Index,
|
||||
IntervalIndex,
|
||||
MultiIndex,
|
||||
RangeIndex,
|
||||
)
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_labels_dtypes():
|
||||
# GH 8456
|
||||
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
|
||||
assert i.codes[0].dtype == "int8"
|
||||
assert i.codes[1].dtype == "int8"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(40)])
|
||||
assert i.codes[1].dtype == "int8"
|
||||
i = MultiIndex.from_product([["a"], range(400)])
|
||||
assert i.codes[1].dtype == "int16"
|
||||
i = MultiIndex.from_product([["a"], range(40000)])
|
||||
assert i.codes[1].dtype == "int32"
|
||||
|
||||
i = MultiIndex.from_product([["a"], range(1000)])
|
||||
assert (i.codes[0] >= 0).all()
|
||||
assert (i.codes[1] >= 0).all()
|
||||
|
||||
|
||||
def test_values_boxed():
|
||||
tuples = [
|
||||
(1, pd.Timestamp("2000-01-01")),
|
||||
(2, pd.NaT),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
(1, pd.Timestamp("2000-01-04")),
|
||||
(2, pd.Timestamp("2000-01-02")),
|
||||
(3, pd.Timestamp("2000-01-03")),
|
||||
]
|
||||
result = MultiIndex.from_tuples(tuples)
|
||||
expected = construct_1d_object_array_from_listlike(tuples)
|
||||
tm.assert_numpy_array_equal(result.values, expected)
|
||||
# Check that code branches for boxed values produce identical results
|
||||
tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
|
||||
|
||||
|
||||
def test_values_multiindex_datetimeindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(10**18, 10**18 + 5)
|
||||
naive = pd.DatetimeIndex(ints)
|
||||
|
||||
aware = pd.DatetimeIndex(ints, tz="US/Central")
|
||||
|
||||
idx = MultiIndex.from_arrays([naive, aware])
|
||||
result = idx.values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive)
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = pd.DatetimeIndex([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, naive[:2])
|
||||
|
||||
inner = pd.DatetimeIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, aware[:2])
|
||||
|
||||
|
||||
def test_values_multiindex_periodindex():
|
||||
# Test to ensure we hit the boxing / nobox part of MI.values
|
||||
ints = np.arange(2007, 2012)
|
||||
pidx = pd.PeriodIndex(ints, freq="D")
|
||||
|
||||
idx = MultiIndex.from_arrays([ints, pidx])
|
||||
result = idx.values
|
||||
|
||||
outer = Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, Index(ints, dtype=np.int64))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx)
|
||||
|
||||
# n_lev > n_lab
|
||||
result = idx[:2].values
|
||||
|
||||
outer = Index([x[0] for x in result])
|
||||
tm.assert_index_equal(outer, Index(ints[:2], dtype=np.int64))
|
||||
|
||||
inner = pd.PeriodIndex([x[1] for x in result])
|
||||
tm.assert_index_equal(inner, pidx[:2])
|
||||
|
||||
|
||||
def test_consistency():
|
||||
# need to construct an overflow
|
||||
major_axis = list(range(70000))
|
||||
minor_axis = list(range(10))
|
||||
|
||||
major_codes = np.arange(70000)
|
||||
minor_codes = np.repeat(range(10), 7000)
|
||||
|
||||
# the fact that is works means it's consistent
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
# inconsistent
|
||||
major_codes = np.array([0, 0, 1, 1, 1, 2, 2, 3, 3])
|
||||
minor_codes = np.array([0, 1, 0, 1, 1, 0, 1, 0, 1])
|
||||
index = MultiIndex(
|
||||
levels=[major_axis, minor_axis], codes=[major_codes, minor_codes]
|
||||
)
|
||||
|
||||
assert index.is_unique is False
|
||||
|
||||
|
||||
@pytest.mark.slow
|
||||
def test_hash_collisions(monkeypatch):
|
||||
# non-smoke test that we don't get hash collisions
|
||||
size_cutoff = 50
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
index = MultiIndex.from_product(
|
||||
[np.arange(8), np.arange(8)], names=["one", "two"]
|
||||
)
|
||||
result = index.get_indexer(index.values)
|
||||
tm.assert_numpy_array_equal(result, np.arange(len(index), dtype="intp"))
|
||||
|
||||
for i in [0, 1, len(index) - 2, len(index) - 1]:
|
||||
result = index.get_loc(index[i])
|
||||
assert result == i
|
||||
|
||||
|
||||
def test_dims():
|
||||
pass
|
||||
|
||||
|
||||
def test_take_invalid_kwargs():
|
||||
vals = [["A", "B"], [pd.Timestamp("2011-01-01"), pd.Timestamp("2011-01-02")]]
|
||||
idx = MultiIndex.from_product(vals, names=["str", "dt"])
|
||||
indices = [1, 2]
|
||||
|
||||
msg = r"take\(\) got an unexpected keyword argument 'foo'"
|
||||
with pytest.raises(TypeError, match=msg):
|
||||
idx.take(indices, foo=2)
|
||||
|
||||
msg = "the 'out' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, out=indices)
|
||||
|
||||
msg = "the 'mode' parameter is not supported"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
idx.take(indices, mode="clip")
|
||||
|
||||
|
||||
def test_isna_behavior(idx):
|
||||
# should not segfault GH5123
|
||||
# NOTE: if MI representation changes, may make sense to allow
|
||||
# isna(MI)
|
||||
msg = "isna is not defined for MultiIndex"
|
||||
with pytest.raises(NotImplementedError, match=msg):
|
||||
pd.isna(idx)
|
||||
|
||||
|
||||
def test_large_multiindex_error(monkeypatch):
|
||||
# GH12527
|
||||
size_cutoff = 50
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(libindex, "_SIZE_CUTOFF", size_cutoff)
|
||||
df_below_cutoff = pd.DataFrame(
|
||||
1,
|
||||
index=MultiIndex.from_product([[1, 2], range(size_cutoff - 1)]),
|
||||
columns=["dest"],
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_below_cutoff.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_below_cutoff.loc[(3, 0), "dest"]
|
||||
df_above_cutoff = pd.DataFrame(
|
||||
1,
|
||||
index=MultiIndex.from_product([[1, 2], range(size_cutoff + 1)]),
|
||||
columns=["dest"],
|
||||
)
|
||||
with pytest.raises(KeyError, match=r"^\(-1, 0\)$"):
|
||||
df_above_cutoff.loc[(-1, 0), "dest"]
|
||||
with pytest.raises(KeyError, match=r"^\(3, 0\)$"):
|
||||
df_above_cutoff.loc[(3, 0), "dest"]
|
||||
|
||||
|
||||
def test_mi_hashtable_populated_attribute_error(monkeypatch):
|
||||
# GH 18165
|
||||
monkeypatch.setattr(libindex, "_SIZE_CUTOFF", 50)
|
||||
r = range(50)
|
||||
df = pd.DataFrame({"a": r, "b": r}, index=MultiIndex.from_arrays([r, r]))
|
||||
|
||||
msg = "'Series' object has no attribute 'foo'"
|
||||
with pytest.raises(AttributeError, match=msg):
|
||||
df["a"].foo()
|
||||
|
||||
|
||||
def test_can_hold_identifiers(idx):
|
||||
key = idx[0]
|
||||
assert idx._can_hold_identifiers_and_holds_name(key) is True
|
||||
|
||||
|
||||
def test_metadata_immutable(idx):
|
||||
levels, codes = idx.levels, idx.codes
|
||||
# shouldn't be able to set at either the top level or base level
|
||||
mutable_regex = re.compile("does not support mutable operations")
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0] = levels[0]
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
levels[0][0] = levels[0][0]
|
||||
# ditto for labels
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
codes[0] = codes[0]
|
||||
with pytest.raises(ValueError, match="assignment destination is read-only"):
|
||||
codes[0][0] = codes[0][0]
|
||||
# and for names
|
||||
names = idx.names
|
||||
with pytest.raises(TypeError, match=mutable_regex):
|
||||
names[0] = names[0]
|
||||
|
||||
|
||||
def test_level_setting_resets_attributes():
|
||||
ind = MultiIndex.from_arrays([["A", "A", "B", "B", "B"], [1, 2, 1, 2, 3]])
|
||||
assert ind.is_monotonic_increasing
|
||||
ind = ind.set_levels([["A", "B"], [1, 3, 2]])
|
||||
# if this fails, probably didn't reset the cache correctly.
|
||||
assert not ind.is_monotonic_increasing
|
||||
|
||||
|
||||
def test_rangeindex_fallback_coercion_bug():
|
||||
# GH 12893
|
||||
df1 = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df2 = pd.DataFrame(np.arange(100).reshape((10, 10)))
|
||||
df = pd.concat(
|
||||
{"df1": df1.stack(future_stack=True), "df2": df2.stack(future_stack=True)},
|
||||
axis=1,
|
||||
)
|
||||
df.index.names = ["fizz", "buzz"]
|
||||
|
||||
expected = pd.DataFrame(
|
||||
{"df2": np.arange(100), "df1": np.arange(100)},
|
||||
index=MultiIndex.from_product([range(10), range(10)], names=["fizz", "buzz"]),
|
||||
)
|
||||
tm.assert_frame_equal(df, expected, check_like=True)
|
||||
|
||||
result = df.index.get_level_values("fizz")
|
||||
expected = Index(np.arange(10, dtype=np.int64), name="fizz").repeat(10)
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
result = df.index.get_level_values("buzz")
|
||||
expected = Index(np.tile(np.arange(10, dtype=np.int64), 10), name="buzz")
|
||||
tm.assert_index_equal(result, expected)
|
||||
|
||||
|
||||
def test_memory_usage(idx):
|
||||
result = idx.memory_usage()
|
||||
if len(idx):
|
||||
idx.get_loc(idx[0])
|
||||
result2 = idx.memory_usage()
|
||||
result3 = idx.memory_usage(deep=True)
|
||||
|
||||
# RangeIndex, IntervalIndex
|
||||
# don't have engines
|
||||
if not isinstance(idx, (RangeIndex, IntervalIndex)):
|
||||
assert result2 > result
|
||||
|
||||
if idx.inferred_type == "object":
|
||||
assert result3 > result2
|
||||
|
||||
else:
|
||||
# we report 0 for no-length
|
||||
assert result == 0
|
||||
|
||||
|
||||
def test_nlevels(idx):
|
||||
assert idx.nlevels == 2
|
||||
@ -0,0 +1,103 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from pandas import MultiIndex
|
||||
import pandas._testing as tm
|
||||
|
||||
|
||||
def test_isin_nan():
|
||||
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
|
||||
tm.assert_numpy_array_equal(idx.isin([("bar", np.nan)]), np.array([False, True]))
|
||||
tm.assert_numpy_array_equal(
|
||||
idx.isin([("bar", float("nan"))]), np.array([False, True])
|
||||
)
|
||||
|
||||
|
||||
def test_isin_missing(nulls_fixture):
|
||||
# GH48905
|
||||
mi1 = MultiIndex.from_tuples([(1, nulls_fixture)])
|
||||
mi2 = MultiIndex.from_tuples([(1, 1), (1, 2)])
|
||||
result = mi2.isin(mi1)
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin():
|
||||
values = [("foo", 2), ("bar", 3), ("quux", 4)]
|
||||
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
result = idx.isin(values)
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
# empty, return dtype bool
|
||||
idx = MultiIndex.from_arrays([[], []])
|
||||
result = idx.isin(values)
|
||||
assert len(result) == 0
|
||||
assert result.dtype == np.bool_
|
||||
|
||||
|
||||
def test_isin_level_kwarg():
|
||||
idx = MultiIndex.from_arrays([["qux", "baz", "foo", "bar"], np.arange(4)])
|
||||
|
||||
vals_0 = ["foo", "bar", "quux"]
|
||||
vals_1 = [2, 3, 10]
|
||||
|
||||
expected = np.array([False, False, True, True])
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=0))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level=-2))
|
||||
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=1))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level=-1))
|
||||
|
||||
msg = "Too many levels: Index has only 2 levels, not 6"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=5)
|
||||
msg = "Too many levels: Index has only 2 levels, -5 is not a valid level number"
|
||||
with pytest.raises(IndexError, match=msg):
|
||||
idx.isin(vals_0, level=-5)
|
||||
|
||||
with pytest.raises(KeyError, match=r"'Level 1\.0 not found'"):
|
||||
idx.isin(vals_0, level=1.0)
|
||||
with pytest.raises(KeyError, match=r"'Level -1\.0 not found'"):
|
||||
idx.isin(vals_1, level=-1.0)
|
||||
with pytest.raises(KeyError, match="'Level A not found'"):
|
||||
idx.isin(vals_1, level="A")
|
||||
|
||||
idx.names = ["A", "B"]
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_0, level="A"))
|
||||
tm.assert_numpy_array_equal(expected, idx.isin(vals_1, level="B"))
|
||||
|
||||
with pytest.raises(KeyError, match="'Level C not found'"):
|
||||
idx.isin(vals_1, level="C")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"labels,expected,level",
|
||||
[
|
||||
([("b", np.nan)], np.array([False, False, True]), None),
|
||||
([np.nan, "a"], np.array([True, True, False]), 0),
|
||||
(["d", np.nan], np.array([False, True, True]), 1),
|
||||
],
|
||||
)
|
||||
def test_isin_multi_index_with_missing_value(labels, expected, level):
|
||||
# GH 19132
|
||||
midx = MultiIndex.from_arrays([[np.nan, "a", "b"], ["c", "d", np.nan]])
|
||||
result = midx.isin(labels, level=level)
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin_empty():
|
||||
# GH#51599
|
||||
midx = MultiIndex.from_arrays([[1, 2], [3, 4]])
|
||||
result = midx.isin([])
|
||||
expected = np.array([False, False])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
|
||||
|
||||
def test_isin_generator():
|
||||
# GH#52568
|
||||
midx = MultiIndex.from_tuples([(1, 2)])
|
||||
result = midx.isin(x for x in [(1, 2)])
|
||||
expected = np.array([True])
|
||||
tm.assert_numpy_array_equal(result, expected)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user