|
| 1 | +from __future__ import annotations |
| 2 | + |
| 3 | +from typing import Any |
| 4 | + |
| 5 | +import numpy as np |
| 6 | +import pandas as pd |
| 7 | +from typing_extensions import assert_type |
| 8 | + |
| 9 | +from tests import ( |
| 10 | + PD_LTE_23, |
| 11 | + check, |
| 12 | + pytest_warns_bounded, |
| 13 | +) |
| 14 | + |
| 15 | + |
| 16 | +def test_types_select() -> None: |
| 17 | + s = pd.Series(data={"row1": 1, "row2": 2}) |
| 18 | + if PD_LTE_23: |
| 19 | + # Not valid in 3.0 |
| 20 | + with pytest_warns_bounded( |
| 21 | + FutureWarning, |
| 22 | + "Series.__getitem__ treating keys as positions is deprecated", |
| 23 | + lower="2.0.99", |
| 24 | + ): |
| 25 | + s[0] |
| 26 | + check(assert_type(s[1:], "pd.Series[int]"), pd.Series, np.integer) |
| 27 | + |
| 28 | + |
| 29 | +def test_types_iloc_iat() -> None: |
| 30 | + s = pd.Series(data={"row1": 1, "row2": 2}) |
| 31 | + s2 = pd.Series(data=[1, 2]) |
| 32 | + s.loc["row1"] |
| 33 | + s.iat[0] |
| 34 | + s2.loc[0] |
| 35 | + s2.iat[0] |
| 36 | + |
| 37 | + |
| 38 | +def test_types_loc_at() -> None: |
| 39 | + s = pd.Series(data={"row1": 1, "row2": 2}) |
| 40 | + s2 = pd.Series(data=[1, 2]) |
| 41 | + s.loc["row1"] |
| 42 | + s.at["row1"] |
| 43 | + s2.loc[1] |
| 44 | + s2.at[1] |
| 45 | + |
| 46 | + |
| 47 | +def test_types_getitem() -> None: |
| 48 | + s = pd.Series({"key": [0, 1, 2, 3]}) |
| 49 | + check(assert_type(s["key"], Any), list) |
| 50 | + s2 = pd.Series([0, 1, 2, 3]) |
| 51 | + check(assert_type(s2[0], int), np.integer) |
| 52 | + check(assert_type(s[:2], pd.Series), pd.Series) |
| 53 | + |
| 54 | + |
| 55 | +def test_types_getitem_by_timestamp() -> None: |
| 56 | + index = pd.date_range("2018-01-01", periods=2, freq="D") |
| 57 | + series = pd.Series(range(2), index=index) |
| 58 | + check(assert_type(series[index[-1]], int), np.integer) |
| 59 | + |
| 60 | + |
| 61 | +def test_multiindex_loc() -> None: |
| 62 | + s = pd.Series( |
| 63 | + [1, 2, 3, 4], index=pd.MultiIndex.from_product([[1, 2], ["a", "b"]]), dtype=int |
| 64 | + ) |
| 65 | + check(assert_type(s.loc[1, :], "pd.Series[int]"), pd.Series, np.int_) |
| 66 | + check(assert_type(s.loc[pd.Index([1]), :], "pd.Series[int]"), pd.Series, np.int_) |
| 67 | + check(assert_type(s.loc[1, "a"], int), np.int_) |
| 68 | + |
| 69 | + |
| 70 | +def test_multiindex_loc_str_tuple() -> None: |
| 71 | + s = pd.Series( |
| 72 | + [1, 2, 3, 4, 5, 6], |
| 73 | + index=pd.MultiIndex.from_product([["A", "B"], ["c", "d", "e"]]), |
| 74 | + dtype=int, |
| 75 | + ) |
| 76 | + check(assert_type(s.loc[("A", "c")], int), np.int_) |
| 77 | + check( |
| 78 | + assert_type(s.loc[[("A", "c"), ("B", "d")]], "pd.Series[int]"), |
| 79 | + pd.Series, |
| 80 | + np.int_, |
| 81 | + ) |
| 82 | + |
| 83 | + |
| 84 | +def test_types_boolean_indexing() -> None: |
| 85 | + s = pd.Series([0, 1, 2]) |
| 86 | + s[s > 1] |
| 87 | + s[s] |
| 88 | + |
| 89 | + |
| 90 | +def test_series_loc_setitem() -> None: |
| 91 | + s = pd.Series([1, 2, 3, 4, 5]) |
| 92 | + v = s.loc[[0, 2, 4]].values |
| 93 | + s.loc[[0, 2, 4]] = v |
| 94 | + |
| 95 | + |
| 96 | +def test_series_isin() -> None: |
| 97 | + s = pd.Series([1, 2, 3, 4, 5]) |
| 98 | + check(assert_type(s.isin([3, 4]), "pd.Series[bool]"), pd.Series, np.bool_) |
| 99 | + check(assert_type(s.isin({3, 4}), "pd.Series[bool]"), pd.Series, np.bool_) |
| 100 | + check( |
| 101 | + assert_type(s.isin(pd.Series([3, 4])), "pd.Series[bool]"), pd.Series, np.bool_ |
| 102 | + ) |
| 103 | + check(assert_type(s.isin(pd.Index([3, 4])), "pd.Series[bool]"), pd.Series, np.bool_) |
| 104 | + check(assert_type(s.isin(iter([3, "4"])), "pd.Series[bool]"), pd.Series, np.bool_) |
| 105 | + |
| 106 | + |
| 107 | +def test_series_index_isin() -> None: |
| 108 | + s = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 2, 3, 3]) |
| 109 | + t1 = s.loc[s.index.isin([1, 3])] |
| 110 | + t2 = s.loc[~s.index.isin([1, 3])] |
| 111 | + t3 = s[s.index.isin([1, 3])] |
| 112 | + t4 = s[~s.index.isin([1, 3])] |
| 113 | + check(assert_type(t1, "pd.Series[int]"), pd.Series, np.integer) |
| 114 | + check(assert_type(t2, "pd.Series[int]"), pd.Series, np.integer) |
| 115 | + check(assert_type(t3, "pd.Series[int]"), pd.Series, np.integer) |
| 116 | + check(assert_type(t4, "pd.Series[int]"), pd.Series, np.integer) |
| 117 | + |
| 118 | + |
| 119 | +def test_series_invert() -> None: |
| 120 | + s1 = pd.Series([True, False, True]) |
| 121 | + s2 = ~s1 |
| 122 | + check(assert_type(s2, "pd.Series[bool]"), pd.Series, np.bool_) |
| 123 | + s3 = pd.Series([1, 2, 3]) |
| 124 | + check(assert_type(s3[s2], "pd.Series[int]"), pd.Series, np.integer) |
| 125 | + check(assert_type(s3.loc[s2], "pd.Series[int]"), pd.Series, np.integer) |
| 126 | + |
| 127 | + |
| 128 | +def test_series_multiindex_getitem() -> None: |
| 129 | + s = pd.Series( |
| 130 | + [1, 2, 3, 4], index=pd.MultiIndex.from_product([["a", "b"], ["x", "y"]]) |
| 131 | + ) |
| 132 | + _s1: pd.Series = s["a", :] |
| 133 | + |
| 134 | + |
| 135 | +def test_iloc_getitem_ndarray() -> None: |
| 136 | + # GH 85 |
| 137 | + # GH 86 |
| 138 | + indices_i8 = np.array([0, 1, 2, 3], dtype=np.int8) |
| 139 | + indices_i16 = np.array([0, 1, 2, 3], dtype=np.int16) |
| 140 | + indices_i32 = np.array([0, 1, 2, 3], dtype=np.int_) |
| 141 | + indices_i64 = np.array([0, 1, 2, 3], dtype=np.int64) |
| 142 | + |
| 143 | + indices_u8 = np.array([0, 1, 2, 3], dtype=np.uint8) |
| 144 | + indices_u16 = np.array([0, 1, 2, 3], dtype=np.uint16) |
| 145 | + indices_u32 = np.array([0, 1, 2, 3], dtype=np.uint32) |
| 146 | + indices_u64 = np.array([0, 1, 2, 3], dtype=np.uint64) |
| 147 | + |
| 148 | + values_s = pd.Series(np.arange(10), name="a") |
| 149 | + |
| 150 | + check(assert_type(values_s.iloc[indices_i8], pd.Series), pd.Series) |
| 151 | + check(assert_type(values_s.iloc[indices_i16], pd.Series), pd.Series) |
| 152 | + check(assert_type(values_s.iloc[indices_i32], pd.Series), pd.Series) |
| 153 | + check(assert_type(values_s.iloc[indices_i64], pd.Series), pd.Series) |
| 154 | + |
| 155 | + check(assert_type(values_s.iloc[indices_u8], pd.Series), pd.Series) |
| 156 | + check(assert_type(values_s.iloc[indices_u16], pd.Series), pd.Series) |
| 157 | + check(assert_type(values_s.iloc[indices_u32], pd.Series), pd.Series) |
| 158 | + check(assert_type(values_s.iloc[indices_u64], pd.Series), pd.Series) |
| 159 | + |
| 160 | + |
| 161 | +def test_iloc_setitem_ndarray() -> None: |
| 162 | + # GH 85 |
| 163 | + # GH 86 |
| 164 | + indices_i8 = np.array([0, 1, 2, 3], dtype=np.int8) |
| 165 | + indices_i16 = np.array([0, 1, 2, 3], dtype=np.int16) |
| 166 | + indices_i32 = np.array([0, 1, 2, 3], dtype=np.int_) |
| 167 | + indices_i64 = np.array([0, 1, 2, 3], dtype=np.int64) |
| 168 | + |
| 169 | + indices_u8 = np.array([0, 1, 2, 3], dtype=np.uint8) |
| 170 | + indices_u16 = np.array([0, 1, 2, 3], dtype=np.uint16) |
| 171 | + indices_u32 = np.array([0, 1, 2, 3], dtype=np.uint32) |
| 172 | + indices_u64 = np.array([0, 1, 2, 3], dtype=np.uint64) |
| 173 | + |
| 174 | + values_s = pd.Series(np.arange(10), name="a") |
| 175 | + |
| 176 | + values_s.iloc[indices_i8] = -1 |
| 177 | + values_s.iloc[indices_i16] = -1 |
| 178 | + values_s.iloc[indices_i32] = -1 |
| 179 | + values_s.iloc[indices_i64] = -1 |
| 180 | + |
| 181 | + values_s.iloc[indices_u8] = -1 |
| 182 | + values_s.iloc[indices_u16] = -1 |
| 183 | + values_s.iloc[indices_u32] = -1 |
| 184 | + values_s.iloc[indices_u64] = -1 |
| 185 | + |
| 186 | + |
| 187 | +def test_loc_callable() -> None: |
| 188 | + # GH 586 |
| 189 | + s = pd.Series([1, 2]) |
| 190 | + check(assert_type(s.loc[lambda x: x > 1], "pd.Series[int]"), pd.Series, np.integer) |
| 191 | + |
| 192 | + |
| 193 | +def test_series_setitem_multiindex() -> None: |
| 194 | + # GH 767 |
| 195 | + df = ( |
| 196 | + pd.DataFrame({"x": [1, 2, 3, 4]}) |
| 197 | + .assign(y=lambda df: df["x"] * 10, z=lambda df: df["x"] * 100) |
| 198 | + .set_index(["x", "y"]) |
| 199 | + ) |
| 200 | + ind = pd.Index([2, 3]) |
| 201 | + s = df["z"] |
| 202 | + |
| 203 | + s.loc[pd.IndexSlice[ind, :]] = 30 |
| 204 | + |
| 205 | + |
| 206 | +def test_series_setitem_na() -> None: |
| 207 | + # GH 743 |
| 208 | + df = pd.DataFrame( |
| 209 | + {"x": [1, 2, 3], "y": pd.date_range("3/1/2023", "3/3/2023")}, |
| 210 | + index=pd.Index(["a", "b", "c"]), |
| 211 | + ).convert_dtypes() |
| 212 | + |
| 213 | + ind = pd.Index(["a", "c"]) |
| 214 | + s = df["x"].copy() |
| 215 | + |
| 216 | + s.loc[ind] = pd.NA |
| 217 | + s.iloc[[0, 2]] = pd.NA |
| 218 | + |
| 219 | + s2 = df["y"].copy() |
| 220 | + s2.loc[ind] = pd.NaT |
| 221 | + s2.iloc[[0, 2]] = pd.NaT |
| 222 | + |
| 223 | + |
| 224 | +def test_slice_timestamp() -> None: |
| 225 | + dti = pd.date_range("1/1/2025", "2/28/2025") |
| 226 | + |
| 227 | + s = pd.Series(list(range(len(dti))), index=dti) |
| 228 | + |
| 229 | + # For `s1`, see discussion in GH 397. Needs mypy fix. |
| 230 | + # s1 = s.loc["2025-01-15":"2025-01-20"] |
| 231 | + |
| 232 | + # GH 397 |
| 233 | + check( |
| 234 | + assert_type( |
| 235 | + s.loc[pd.Timestamp("2025-01-15") : pd.Timestamp("2025-01-20")], |
| 236 | + "pd.Series[int]", |
| 237 | + ), |
| 238 | + pd.Series, |
| 239 | + np.integer, |
| 240 | + ) |
| 241 | + |
| 242 | + |
| 243 | +def test_series_single_slice() -> None: |
| 244 | + # GH 572 |
| 245 | + s = pd.Series([1, 2, 3]) |
| 246 | + check(assert_type(s.loc[:], "pd.Series[int]"), pd.Series, np.integer) |
| 247 | + |
| 248 | + s.loc[:] = 1 + s |
| 249 | + |
| 250 | + |
| 251 | +def test_series_index_timestamp() -> None: |
| 252 | + # GH 620 |
| 253 | + dt1 = pd.to_datetime("2023-05-01") |
| 254 | + dt2 = pd.to_datetime("2023-05-02") |
| 255 | + s = pd.Series([1, 2], index=[dt1, dt2]) |
| 256 | + check(assert_type(s[dt1], int), np.integer) |
| 257 | + check(assert_type(s.loc[[dt1]], "pd.Series[int]"), pd.Series, np.integer) |
0 commit comments