dash-api/lib/python3.11/site-packages/pandas/tests/indexes/string/test_indexing.py

import numpy as np
import pytest

import pandas as pd
from pandas import Index
import pandas._testing as tm


def _isnan(val):
    try:
        return val is not pd.NA and np.isnan(val)
    except TypeError:
        return False


def _equivalent_na(dtype, null):
    if dtype.na_value is pd.NA and null is pd.NA:
        return True
    elif _isnan(dtype.na_value) and _isnan(null):
        return True
    else:
        return False


class TestGetLoc:
    def test_get_loc(self, any_string_dtype):
        index = Index(["a", "b", "c"], dtype=any_string_dtype)
        assert index.get_loc("b") == 1

    def test_get_loc_raises(self, any_string_dtype):
        index = Index(["a", "b", "c"], dtype=any_string_dtype)
        with pytest.raises(KeyError, match="d"):
            index.get_loc("d")

    def test_get_loc_invalid_value(self, any_string_dtype):
        index = Index(["a", "b", "c"], dtype=any_string_dtype)
        with pytest.raises(KeyError, match="1"):
            index.get_loc(1)

    def test_get_loc_non_unique(self, any_string_dtype):
        index = Index(["a", "b", "a"], dtype=any_string_dtype)
        result = index.get_loc("a")
        expected = np.array([True, False, True])
        tm.assert_numpy_array_equal(result, expected)

    def test_get_loc_non_missing(self, any_string_dtype, nulls_fixture):
        index = Index(["a", "b", "c"], dtype=any_string_dtype)
        with pytest.raises(KeyError):
            index.get_loc(nulls_fixture)

    def test_get_loc_missing(self, any_string_dtype, nulls_fixture):
        index = Index(["a", "b", nulls_fixture], dtype=any_string_dtype)
        assert index.get_loc(nulls_fixture) == 2


class TestGetIndexer:
    @pytest.mark.parametrize(
        "method,expected",
        [
            ("pad", [-1, 0, 1, 1]),
            ("backfill", [0, 0, 1, -1]),
        ],
    )
    def test_get_indexer_strings(self, any_string_dtype, method, expected):
        expected = np.array(expected, dtype=np.intp)
        index = Index(["b", "c"], dtype=any_string_dtype)
        actual = index.get_indexer(["a", "b", "c", "d"], method=method)

        tm.assert_numpy_array_equal(actual, expected)

    def test_get_indexer_strings_raises(self, any_string_dtype):
        index = Index(["b", "c"], dtype=any_string_dtype)

        msg = "|".join(
            [
                "operation 'sub' not supported for dtype 'str",
                r"unsupported operand type\(s\) for -: 'str' and 'str'",
            ]
        )
        with pytest.raises(TypeError, match=msg):
            index.get_indexer(["a", "b", "c", "d"], method="nearest")

        with pytest.raises(TypeError, match=msg):
            index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)

        with pytest.raises(TypeError, match=msg):
            index.get_indexer(
                ["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
            )

    @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
    def test_get_indexer_missing(self, any_string_dtype, null, using_infer_string):
        # NaT and Decimal("NaN") from null_fixture are not supported for string dtype
        index = Index(["a", "b", null], dtype=any_string_dtype)
        result = index.get_indexer(["a", null, "c"])
        if using_infer_string:
            expected = np.array([0, 2, -1], dtype=np.intp)
        elif any_string_dtype == "string" and not _equivalent_na(
            any_string_dtype, null
        ):
            expected = np.array([0, -1, -1], dtype=np.intp)
        else:
            expected = np.array([0, 2, -1], dtype=np.intp)

        tm.assert_numpy_array_equal(result, expected)


class TestGetIndexerNonUnique:
    @pytest.mark.parametrize("null", [None, np.nan, float("nan"), pd.NA])
    def test_get_indexer_non_unique_nas(
        self, any_string_dtype, null, using_infer_string
    ):
        index = Index(["a", "b", null], dtype=any_string_dtype)
        indexer, missing = index.get_indexer_non_unique(["a", null])

        if using_infer_string:
            expected_indexer = np.array([0, 2], dtype=np.intp)
            expected_missing = np.array([], dtype=np.intp)
        elif any_string_dtype == "string" and not _equivalent_na(
            any_string_dtype, null
        ):
            expected_indexer = np.array([0, -1], dtype=np.intp)
            expected_missing = np.array([1], dtype=np.intp)
        else:
            expected_indexer = np.array([0, 2], dtype=np.intp)
            expected_missing = np.array([], dtype=np.intp)
        tm.assert_numpy_array_equal(indexer, expected_indexer)
        tm.assert_numpy_array_equal(missing, expected_missing)

        # actually non-unique
        index = Index(["a", null, "b", null], dtype=any_string_dtype)
        indexer, missing = index.get_indexer_non_unique(["a", null])

        if using_infer_string:
            expected_indexer = np.array([0, 1, 3], dtype=np.intp)
        elif any_string_dtype == "string" and not _equivalent_na(
            any_string_dtype, null
        ):
            pass
        else:
            expected_indexer = np.array([0, 1, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(indexer, expected_indexer)
        tm.assert_numpy_array_equal(missing, expected_missing)


class TestSliceLocs:
    @pytest.mark.parametrize(
        "in_slice,expected",
        [
            # error: Slice index must be an integer or None
            (pd.IndexSlice[::-1], "yxdcb"),
            (pd.IndexSlice["b":"y":-1], ""),  # type: ignore[misc]
            (pd.IndexSlice["b"::-1], "b"),  # type: ignore[misc]
            (pd.IndexSlice[:"b":-1], "yxdcb"),  # type: ignore[misc]
            (pd.IndexSlice[:"y":-1], "y"),  # type: ignore[misc]
            (pd.IndexSlice["y"::-1], "yxdcb"),  # type: ignore[misc]
            (pd.IndexSlice["y"::-4], "yb"),  # type: ignore[misc]
            # absent labels
            (pd.IndexSlice[:"a":-1], "yxdcb"),  # type: ignore[misc]
            (pd.IndexSlice[:"a":-2], "ydb"),  # type: ignore[misc]
            (pd.IndexSlice["z"::-1], "yxdcb"),  # type: ignore[misc]
            (pd.IndexSlice["z"::-3], "yc"),  # type: ignore[misc]
            (pd.IndexSlice["m"::-1], "dcb"),  # type: ignore[misc]
            (pd.IndexSlice[:"m":-1], "yx"),  # type: ignore[misc]
            (pd.IndexSlice["a":"a":-1], ""),  # type: ignore[misc]
            (pd.IndexSlice["z":"z":-1], ""),  # type: ignore[misc]
            (pd.IndexSlice["m":"m":-1], ""),  # type: ignore[misc]
        ],
    )
    def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
        index = Index(list("bcdxy"), dtype=any_string_dtype)

        s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
        result = index[s_start : s_stop : in_slice.step]
        expected = Index(list(expected), dtype=any_string_dtype)
        tm.assert_index_equal(result, expected)

    def test_slice_locs_negative_step_oob(self, any_string_dtype):
        index = Index(list("bcdxy"), dtype=any_string_dtype)

        result = index[-10:5:1]
        tm.assert_index_equal(result, index)

        result = index[4:-10:-1]
        expected = Index(list("yxdcb"), dtype=any_string_dtype)
        tm.assert_index_equal(result, expected)

    def test_slice_locs_dup(self, any_string_dtype):
        index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype)
        assert index.slice_locs("a", "d") == (0, 6)
        assert index.slice_locs(end="d") == (0, 6)
        assert index.slice_locs("a", "c") == (0, 4)
        assert index.slice_locs("b", "d") == (2, 6)

        index2 = index[::-1]
        assert index2.slice_locs("d", "a") == (0, 6)
        assert index2.slice_locs(end="a") == (0, 6)
        assert index2.slice_locs("d", "b") == (0, 4)
        assert index2.slice_locs("c", "a") == (2, 6)