diff --git a/tests/frame/test_frame.py b/tests/frame/test_frame.py index da83cb605..67e5b0445 100644 --- a/tests/frame/test_frame.py +++ b/tests/frame/test_frame.py @@ -72,7 +72,6 @@ if TYPE_CHECKING: from pandas.core.frame import _PandasNamedTuple - else: _PandasNamedTuple: TypeAlias = tuple diff --git a/tests/series/test_indexing.py b/tests/series/test_indexing.py new file mode 100644 index 000000000..ddef397ed --- /dev/null +++ b/tests/series/test_indexing.py @@ -0,0 +1,257 @@ +from __future__ import annotations + +from typing import Any + +import numpy as np +import pandas as pd +from typing_extensions import assert_type + +from tests import ( + PD_LTE_23, + check, + pytest_warns_bounded, +) + + +def test_types_select() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + if PD_LTE_23: + # Not valid in 3.0 + with pytest_warns_bounded( + FutureWarning, + "Series.__getitem__ treating keys as positions is deprecated", + lower="2.0.99", + ): + s[0] + check(assert_type(s[1:], "pd.Series[int]"), pd.Series, np.integer) + + +def test_types_iloc_iat() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s2 = pd.Series(data=[1, 2]) + s.loc["row1"] + s.iat[0] + s2.loc[0] + s2.iat[0] + + +def test_types_loc_at() -> None: + s = pd.Series(data={"row1": 1, "row2": 2}) + s2 = pd.Series(data=[1, 2]) + s.loc["row1"] + s.at["row1"] + s2.loc[1] + s2.at[1] + + +def test_types_getitem() -> None: + s = pd.Series({"key": [0, 1, 2, 3]}) + check(assert_type(s["key"], Any), list) + s2 = pd.Series([0, 1, 2, 3]) + check(assert_type(s2[0], int), np.integer) + check(assert_type(s[:2], pd.Series), pd.Series) + + +def test_types_getitem_by_timestamp() -> None: + index = pd.date_range("2018-01-01", periods=2, freq="D") + series = pd.Series(range(2), index=index) + check(assert_type(series[index[-1]], int), np.integer) + + +def test_multiindex_loc() -> None: + s = pd.Series( + [1, 2, 3, 4], index=pd.MultiIndex.from_product([[1, 2], ["a", "b"]]), dtype=int + ) + check(assert_type(s.loc[1, :], "pd.Series[int]"), pd.Series, np.int_) + check(assert_type(s.loc[pd.Index([1]), :], "pd.Series[int]"), pd.Series, np.int_) + check(assert_type(s.loc[1, "a"], int), np.int_) + + +def test_multiindex_loc_str_tuple() -> None: + s = pd.Series( + [1, 2, 3, 4, 5, 6], + index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]]), + dtype=int, + ) + check(assert_type(s.loc[("A", "c")], int), np.int_) + check( + assert_type(s.loc[[("A", "c"), ("B", "d")]], "pd.Series[int]"), + pd.Series, + np.int_, + ) + + +def test_types_boolean_indexing() -> None: + s = pd.Series([0, 1, 2]) + s[s > 1] + s[s] + + +def test_series_loc_setitem() -> None: + s = pd.Series([1, 2, 3, 4, 5]) + v = s.loc[[0, 2, 4]].values + s.loc[[0, 2, 4]] = v + + +def test_series_isin() -> None: + s = pd.Series([1, 2, 3, 4, 5]) + check(assert_type(s.isin([3, 4]), "pd.Series[bool]"), pd.Series, np.bool_) + check(assert_type(s.isin({3, 4}), "pd.Series[bool]"), pd.Series, np.bool_) + check( + assert_type(s.isin(pd.Series([3, 4])), "pd.Series[bool]"), pd.Series, np.bool_ + ) + check(assert_type(s.isin(pd.Index([3, 4])), "pd.Series[bool]"), pd.Series, np.bool_) + check(assert_type(s.isin(iter([3, "4"])), "pd.Series[bool]"), pd.Series, np.bool_) + + +def test_series_index_isin() -> None: + s = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 2, 3, 3]) + t1 = s.loc[s.index.isin([1, 3])] + t2 = s.loc[~s.index.isin([1, 3])] + t3 = s[s.index.isin([1, 3])] + t4 = s[~s.index.isin([1, 3])] + check(assert_type(t1, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(t2, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(t3, "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(t4, "pd.Series[int]"), pd.Series, np.integer) + + +def test_series_invert() -> None: + s1 = pd.Series([True, False, True]) + s2 = ~s1 + check(assert_type(s2, "pd.Series[bool]"), pd.Series, np.bool_) + s3 = pd.Series([1, 2, 3]) + check(assert_type(s3[s2], "pd.Series[int]"), pd.Series, np.integer) + check(assert_type(s3.loc[s2], "pd.Series[int]"), pd.Series, np.integer) + + +def test_series_multiindex_getitem() -> None: + s = pd.Series( + [1, 2, 3, 4], index=pd.MultiIndex.from_product([["a", "b"], ["x", "y"]]) + ) + _s1: pd.Series = s["a", :] + + +def test_iloc_getitem_ndarray() -> None: + # GH 85 + # GH 86 + indices_i8 = np.array([0, 1, 2, 3], dtype=np.int8) + indices_i16 = np.array([0, 1, 2, 3], dtype=np.int16) + indices_i32 = np.array([0, 1, 2, 3], dtype=np.int_) + indices_i64 = np.array([0, 1, 2, 3], dtype=np.int64) + + indices_u8 = np.array([0, 1, 2, 3], dtype=np.uint8) + indices_u16 = np.array([0, 1, 2, 3], dtype=np.uint16) + indices_u32 = np.array([0, 1, 2, 3], dtype=np.uint32) + indices_u64 = np.array([0, 1, 2, 3], dtype=np.uint64) + + values_s = pd.Series(np.arange(10), name="a") + + check(assert_type(values_s.iloc[indices_i8], pd.Series), pd.Series) + check(assert_type(values_s.iloc[indices_i16], pd.Series), pd.Series) + check(assert_type(values_s.iloc[indices_i32], pd.Series), pd.Series) + check(assert_type(values_s.iloc[indices_i64], pd.Series), pd.Series) + + check(assert_type(values_s.iloc[indices_u8], pd.Series), pd.Series) + check(assert_type(values_s.iloc[indices_u16], pd.Series), pd.Series) + check(assert_type(values_s.iloc[indices_u32], pd.Series), pd.Series) + check(assert_type(values_s.iloc[indices_u64], pd.Series), pd.Series) + + +def test_iloc_setitem_ndarray() -> None: + # GH 85 + # GH 86 + indices_i8 = np.array([0, 1, 2, 3], dtype=np.int8) + indices_i16 = np.array([0, 1, 2, 3], dtype=np.int16) + indices_i32 = np.array([0, 1, 2, 3], dtype=np.int_) + indices_i64 = np.array([0, 1, 2, 3], dtype=np.int64) + + indices_u8 = np.array([0, 1, 2, 3], dtype=np.uint8) + indices_u16 = np.array([0, 1, 2, 3], dtype=np.uint16) + indices_u32 = np.array([0, 1, 2, 3], dtype=np.uint32) + indices_u64 = np.array([0, 1, 2, 3], dtype=np.uint64) + + values_s = pd.Series(np.arange(10), name="a") + + values_s.iloc[indices_i8] = -1 + values_s.iloc[indices_i16] = -1 + values_s.iloc[indices_i32] = -1 + values_s.iloc[indices_i64] = -1 + + values_s.iloc[indices_u8] = -1 + values_s.iloc[indices_u16] = -1 + values_s.iloc[indices_u32] = -1 + values_s.iloc[indices_u64] = -1 + + +def test_loc_callable() -> None: + # GH 586 + s = pd.Series([1, 2]) + check(assert_type(s.loc[lambda x: x > 1], "pd.Series[int]"), pd.Series, np.integer) + + +def test_series_setitem_multiindex() -> None: + # GH 767 + df = ( + pd.DataFrame({"x": [1, 2, 3, 4]}) + .assign(y=lambda df: df["x"] * 10, z=lambda df: df["x"] * 100) + .set_index(["x", "y"]) + ) + ind = pd.Index([2, 3]) + s = df["z"] + + s.loc[pd.IndexSlice[ind, :]] = 30 + + +def test_series_setitem_na() -> None: + # GH 743 + df = pd.DataFrame( + {"x": [1, 2, 3], "y": pd.date_range("3/1/2023", "3/3/2023")}, + index=pd.Index(["a", "b", "c"]), + ).convert_dtypes() + + ind = pd.Index(["a", "c"]) + s = df["x"].copy() + + s.loc[ind] = pd.NA + s.iloc[[0, 2]] = pd.NA + + s2 = df["y"].copy() + s2.loc[ind] = pd.NaT + s2.iloc[[0, 2]] = pd.NaT + + +def test_slice_timestamp() -> None: + dti = pd.date_range("1/1/2025", "2/28/2025") + + s = pd.Series(list(range(len(dti))), index=dti) + + # For `s1`, see discussion in GH 397. Needs mypy fix. + # s1 = s.loc["2025-01-15":"2025-01-20"] + + # GH 397 + check( + assert_type( + s.loc[pd.Timestamp("2025-01-15") : pd.Timestamp("2025-01-20")], + "pd.Series[int]", + ), + pd.Series, + np.integer, + ) + + +def test_series_single_slice() -> None: + # GH 572 + s = pd.Series([1, 2, 3]) + check(assert_type(s.loc[:], "pd.Series[int]"), pd.Series, np.integer) + + s.loc[:] = 1 + s + + +def test_series_index_timestamp() -> None: + # GH 620 + dt1 = pd.to_datetime("2023-05-01") + dt2 = pd.to_datetime("2023-05-02") + s = pd.Series([1, 2], index=[dt1, dt2]) + check(assert_type(s[dt1], int), np.integer) + check(assert_type(s.loc[[dt1]], "pd.Series[int]"), pd.Series, np.integer) diff --git a/tests/series/test_series.py b/tests/series/test_series.py index 33eee7915..96d3e411c 100644 --- a/tests/series/test_series.py +++ b/tests/series/test_series.py @@ -200,66 +200,6 @@ def test_types_copy() -> None: check(assert_type(s.copy(), "pd.Series[int]"), pd.Series, np.integer) -def test_types_select() -> None: - s = pd.Series(data={"row1": 1, "row2": 2}) - if PD_LTE_23: - # Not valid in 3.0 - with pytest_warns_bounded( - FutureWarning, - "Series.__getitem__ treating keys as positions is deprecated", - lower="2.0.99", - ): - s[0] - check(assert_type(s[1:], "pd.Series[int]"), pd.Series, np.integer) - - -def test_types_iloc_iat() -> None: - s = pd.Series(data={"row1": 1, "row2": 2}) - s2 = pd.Series(data=[1, 2]) - s.loc["row1"] - s.iat[0] - s2.loc[0] - s2.iat[0] - - -def test_types_loc_at() -> None: - s = pd.Series(data={"row1": 1, "row2": 2}) - s2 = pd.Series(data=[1, 2]) - s.loc["row1"] - s.at["row1"] - s2.loc[1] - s2.at[1] - - -def test_multiindex_loc() -> None: - s = pd.Series( - [1, 2, 3, 4], index=pd.MultiIndex.from_product([[1, 2], ["a", "b"]]), dtype=int - ) - check(assert_type(s.loc[1, :], "pd.Series[int]"), pd.Series, np.int_) - check(assert_type(s.loc[pd.Index([1]), :], "pd.Series[int]"), pd.Series, np.int_) - check(assert_type(s.loc[1, "a"], int), np.int_) - - -def test_multiindex_loc_str_tuple() -> None: - s = pd.Series( - [1, 2, 3, 4, 5, 6], - index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]]), - dtype=int, - ) - check(assert_type(s.loc[("A", "c")], int), np.int_) - check( - assert_type(s.loc[[("A", "c"), ("B", "d")]], "pd.Series[int]"), - pd.Series, - np.int_, - ) - - -def test_types_boolean_indexing() -> None: - s = pd.Series([0, 1, 2]) - s[s > 1] - s[s] - - def test_types_df_to_df_comparison() -> None: s = pd.Series(data={"col1": [1, 2]}) s2 = pd.Series(data={"col1": [3, 2]}) @@ -1460,20 +1400,6 @@ def test_types_set_flags() -> None: pd.Series([5, 2], index=["a", "a"]) -def test_types_getitem() -> None: - s = pd.Series({"key": [0, 1, 2, 3]}) - check(assert_type(s["key"], Any), list) - s2 = pd.Series([0, 1, 2, 3]) - check(assert_type(s2[0], int), np.integer) - check(assert_type(s[:2], pd.Series), pd.Series) - - -def test_types_getitem_by_timestamp() -> None: - index = pd.date_range("2018-01-01", periods=2, freq="D") - series = pd.Series(range(2), index=index) - check(assert_type(series[index[-1]], int), np.integer) - - def test_types_eq() -> None: s1 = pd.Series([1, 2, 3]) check(assert_type(s1 == 1, "pd.Series[bool]"), pd.Series, np.bool) @@ -1692,57 +1618,12 @@ def test_types_dot() -> None: check(assert_type(s1 @ n1, np_ndarray_num), np.ndarray) -def test_series_loc_setitem() -> None: - s = pd.Series([1, 2, 3, 4, 5]) - v = s.loc[[0, 2, 4]].values - s.loc[[0, 2, 4]] = v - - def test_series_min_max_sub_axis() -> None: df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [5, 4, 3, 2, 1]}) check(assert_type(df.min(axis=1), pd.Series), pd.Series) check(assert_type(df.max(axis=1), pd.Series), pd.Series) -def test_series_isin() -> None: - s = pd.Series([1, 2, 3, 4, 5]) - check(assert_type(s.isin([3, 4]), "pd.Series[bool]"), pd.Series, np.bool_) - check(assert_type(s.isin({3, 4}), "pd.Series[bool]"), pd.Series, np.bool_) - check( - assert_type(s.isin(pd.Series([3, 4])), "pd.Series[bool]"), pd.Series, np.bool_ - ) - check(assert_type(s.isin(pd.Index([3, 4])), "pd.Series[bool]"), pd.Series, np.bool_) - check(assert_type(s.isin(iter([3, "4"])), "pd.Series[bool]"), pd.Series, np.bool_) - - -def test_series_index_isin() -> None: - s = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 2, 3, 3]) - t1 = s.loc[s.index.isin([1, 3])] - t2 = s.loc[~s.index.isin([1, 3])] - t3 = s[s.index.isin([1, 3])] - t4 = s[~s.index.isin([1, 3])] - check(assert_type(t1, "pd.Series[int]"), pd.Series, np.integer) - check(assert_type(t2, "pd.Series[int]"), pd.Series, np.integer) - check(assert_type(t3, "pd.Series[int]"), pd.Series, np.integer) - check(assert_type(t4, "pd.Series[int]"), pd.Series, np.integer) - - -def test_series_invert() -> None: - s1 = pd.Series([True, False, True]) - s2 = ~s1 - check(assert_type(s2, "pd.Series[bool]"), pd.Series, np.bool_) - s3 = pd.Series([1, 2, 3]) - check(assert_type(s3[s2], "pd.Series[int]"), pd.Series, np.integer) - check(assert_type(s3.loc[s2], "pd.Series[int]"), pd.Series, np.integer) - - -def test_series_multiindex_getitem() -> None: - s = pd.Series( - [1, 2, 3, 4], index=pd.MultiIndex.from_product([["a", "b"], ["x", "y"]]) - ) - _s1: pd.Series = s["a", :] - - def test_reset_index() -> None: s = pd.Series( [1, 2, 3, 4], @@ -1874,32 +1755,6 @@ def test_cat_ctor_values() -> None: ) -def test_iloc_getitem_ndarray() -> None: - # GH 85 - # GH 86 - indices_i8 = np.array([0, 1, 2, 3], dtype=np.int8) - indices_i16 = np.array([0, 1, 2, 3], dtype=np.int16) - indices_i32 = np.array([0, 1, 2, 3], dtype=np.int_) - indices_i64 = np.array([0, 1, 2, 3], dtype=np.int64) - - indices_u8 = np.array([0, 1, 2, 3], dtype=np.uint8) - indices_u16 = np.array([0, 1, 2, 3], dtype=np.uint16) - indices_u32 = np.array([0, 1, 2, 3], dtype=np.uint32) - indices_u64 = np.array([0, 1, 2, 3], dtype=np.uint64) - - values_s = pd.Series(np.arange(10), name="a") - - check(assert_type(values_s.iloc[indices_i8], pd.Series), pd.Series) - check(assert_type(values_s.iloc[indices_i16], pd.Series), pd.Series) - check(assert_type(values_s.iloc[indices_i32], pd.Series), pd.Series) - check(assert_type(values_s.iloc[indices_i64], pd.Series), pd.Series) - - check(assert_type(values_s.iloc[indices_u8], pd.Series), pd.Series) - check(assert_type(values_s.iloc[indices_u16], pd.Series), pd.Series) - check(assert_type(values_s.iloc[indices_u32], pd.Series), pd.Series) - check(assert_type(values_s.iloc[indices_u64], pd.Series), pd.Series) - - def test_take() -> None: s = pd.Series(np.arange(10), name="a") check(assert_type(s.take([0, 1]), pd.Series), pd.Series) @@ -1910,32 +1765,6 @@ def test_take() -> None: check(assert_type(s.take(np.array([0, 1])), pd.Series), pd.Series) -def test_iloc_setitem_ndarray() -> None: - # GH 85 - # GH 86 - indices_i8 = np.array([0, 1, 2, 3], dtype=np.int8) - indices_i16 = np.array([0, 1, 2, 3], dtype=np.int16) - indices_i32 = np.array([0, 1, 2, 3], dtype=np.int_) - indices_i64 = np.array([0, 1, 2, 3], dtype=np.int64) - - indices_u8 = np.array([0, 1, 2, 3], dtype=np.uint8) - indices_u16 = np.array([0, 1, 2, 3], dtype=np.uint16) - indices_u32 = np.array([0, 1, 2, 3], dtype=np.uint32) - indices_u64 = np.array([0, 1, 2, 3], dtype=np.uint64) - - values_s = pd.Series(np.arange(10), name="a") - - values_s.iloc[indices_i8] = -1 - values_s.iloc[indices_i16] = -1 - values_s.iloc[indices_i32] = -1 - values_s.iloc[indices_i64] = -1 - - values_s.iloc[indices_u8] = -1 - values_s.iloc[indices_u16] = -1 - values_s.iloc[indices_u32] = -1 - values_s.iloc[indices_u64] = -1 - - def test_types_iter() -> None: s = pd.Series([1, 2, 3], dtype=int) _iterable: Iterable[int] = s @@ -3213,12 +3042,6 @@ def test_apply_returns_none() -> None: check(assert_type(s.apply(lambda x: None), pd.Series), pd.Series) -def test_loc_callable() -> None: - # GH 586 - s = pd.Series([1, 2]) - check(assert_type(s.loc[lambda x: x > 1], "pd.Series[int]"), pd.Series, np.integer) - - def test_to_json_mode() -> None: s = pd.Series([1, 2, 3, 4]) result = s.to_json(orient="records", lines=True, mode="a") @@ -3332,37 +3155,6 @@ def test_rank() -> None: ) -def test_series_setitem_multiindex() -> None: - # GH 767 - df = ( - pd.DataFrame({"x": [1, 2, 3, 4]}) - .assign(y=lambda df: df["x"] * 10, z=lambda df: df["x"] * 100) - .set_index(["x", "y"]) - ) - ind = pd.Index([2, 3]) - s = df["z"] - - s.loc[pd.IndexSlice[ind, :]] = 30 - - -def test_series_setitem_na() -> None: - # GH 743 - df = pd.DataFrame( - {"x": [1, 2, 3], "y": pd.date_range("3/1/2023", "3/3/2023")}, - index=pd.Index(["a", "b", "c"]), - ).convert_dtypes() - - ind = pd.Index(["a", "c"]) - s = df["x"].copy() - - s.loc[ind] = pd.NA - s.iloc[[0, 2]] = pd.NA - - s2 = df["y"].copy() - s2.loc[ind] = pd.NaT - s2.iloc[[0, 2]] = pd.NaT - - def test_round() -> None: # GH 791 check(assert_type(round(pd.DataFrame([])), pd.DataFrame), pd.DataFrame) @@ -3773,25 +3565,6 @@ def test_series_unique_timedelta() -> None: check(assert_type(sr.unique(), TimedeltaArray), TimedeltaArray) -def test_slice_timestamp() -> None: - dti = pd.date_range("1/1/2025", "2/28/2025") - - s = pd.Series(list(range(len(dti))), index=dti) - - # For `s1`, see discussion in GH 397. Needs mypy fix. - # s1 = s.loc["2025-01-15":"2025-01-20"] - - # GH 397 - check( - assert_type( - s.loc[pd.Timestamp("2025-01-15") : pd.Timestamp("2025-01-20")], - "pd.Series[int]", - ), - pd.Series, - np.integer, - ) - - def test_apply_dateoffset() -> None: # GH 454 months = [1, 2, 3] @@ -3805,14 +3578,6 @@ def test_apply_dateoffset() -> None: ) -def test_series_single_slice() -> None: - # GH 572 - s = pd.Series([1, 2, 3]) - check(assert_type(s.loc[:], "pd.Series[int]"), pd.Series, np.integer) - - s.loc[:] = 1 + s - - def test_series_typed_dict() -> None: """Test that no error is raised when constructing a series from a typed dict.""" @@ -3834,15 +3599,6 @@ def test_series_empty_dtype() -> None: check(assert_type(pd.Series(""), "pd.Series[str]"), pd.Series) -def test_series_index_timestamp() -> None: - # GH 620 - dt1 = pd.to_datetime("2023-05-01") - dt2 = pd.to_datetime("2023-05-02") - s = pd.Series([1, 2], index=[dt1, dt2]) - check(assert_type(s[dt1], int), np.integer) - check(assert_type(s.loc[[dt1]], "pd.Series[int]"), pd.Series, np.integer) - - def test_series_bool_fails() -> None: # GH 663 s = pd.Series([1, 2, 3])