From e40496d8a33427e13cefa03c0b92a296461def1a Mon Sep 17 00:00:00 2001 From: Raj Aryan Date: Sat, 13 Dec 2025 16:49:52 +0530 Subject: [PATCH 1/6] BUG: Fix MultiIndex lookup with mixed datetime types GH#55969 --- pandas/core/indexes/multi.py | 26 ++++++++++++++--- pandas/tests/indexes/multi/test_gh55969.py | 34 ++++++++++++++++++++++ 2 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 pandas/tests/indexes/multi/test_gh55969.py diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 90f710b0de4de..1434a20ab176b 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2957,8 +2957,7 @@ def sortlevel( # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has # no attribute "__iter__" (not iterable) level = [ - self._get_level_number(lev) - for lev in level # type: ignore[union-attr] + self._get_level_number(lev) for lev in level # type: ignore[union-attr] ] sortorder = None @@ -3264,9 +3263,9 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: else: return level_index.get_loc(key) - def get_loc(self, key): + def get_loc(self, key, method=None): """ - Get location for a label or a tuple of labels. The location is returned \ + Get location for a label or a tuple of labels. The location is returned as an integer/slice or boolean mask. This method returns the integer location, slice object, or boolean mask @@ -3310,6 +3309,25 @@ def get_loc(self, key): >>> mi.get_loc(("b", "e")) 1 """ + # --- FIX GH#55969 START --- + # If the key contains np.datetime64 but the level is object-dtype (python objects), + # strict lookups (and binary search) can fail. Convert to python objects to match. + if isinstance(key, tuple): + new_key = list(key) + modified = False + for i, (k, level) in enumerate(zip(new_key, self.levels)): + if isinstance(k, np.datetime64) and level.dtype == object: + try: + new_key[i] = k.item() + modified = True + except (ValueError, TypeError): + pass + if modified: + key = tuple(new_key) + + if method is not None: + return Index.get_loc(self, key, method=method) + self._check_indexing_error(key) def _maybe_to_slice(loc): diff --git a/pandas/tests/indexes/multi/test_gh55969.py b/pandas/tests/indexes/multi/test_gh55969.py new file mode 100644 index 0000000000000..a792689308806 --- /dev/null +++ b/pandas/tests/indexes/multi/test_gh55969.py @@ -0,0 +1,34 @@ +import numpy as np +import pytest + +from pandas import ( + DataFrame, + MultiIndex, + Timestamp, +) +import pandas._testing as tm + + +def test_mixed_datetime_types_lookup(): + + import datetime as dt + + dates = [dt.date(2023, 11, 1), dt.date(2023, 11, 1), dt.date(2023, 11, 2)] + t1 = ["A", "B", "C"] + t2 = ["C", "D", "E"] + vals = [10, 20, 30] + + df = DataFrame({"dates": dates, "t1": t1, "t2": t2, "vals": vals}).set_index( + ["dates", "t1", "t2"] + ) + + date_np = np.datetime64("2023-11-01") + + result = df.loc[(date_np, "A")] + expected_val = 10 + assert len(result) == 1 + assert result["vals"].iloc[0] == expected_val + + msg = "'C'" + with pytest.raises(KeyError, match=msg): + df.loc[(date_np, "C")] From b4a38b038cf29437396c3a8cecb5b310a07c74c6 Mon Sep 17 00:00:00 2001 From: Raj Aryan Date: Sun, 14 Dec 2025 02:26:59 +0530 Subject: [PATCH 2/6] BUG: Fix MultiIndex lookup with mixed datetime types GH#55969 --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 1434a20ab176b..46712b1fed44d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3315,7 +3315,7 @@ def get_loc(self, key, method=None): if isinstance(key, tuple): new_key = list(key) modified = False - for i, (k, level) in enumerate(zip(new_key, self.levels)): + for i, (k, level) in enumerate(zip(new_key, self.levels, strict=False)): if isinstance(k, np.datetime64) and level.dtype == object: try: new_key[i] = k.item() From cc3fa4b159b3f9fe3aadb7e58e687e926d6cca31 Mon Sep 17 00:00:00 2001 From: Raj Aryan Date: Sun, 14 Dec 2025 02:35:49 +0530 Subject: [PATCH 3/6] BUG: Fix MultiIndex lookup with mixed datetime types GH#55969 --- pandas/core/indexes/multi.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 46712b1fed44d..69182208f7efb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3265,25 +3265,30 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: def get_loc(self, key, method=None): """ - Get location for a label or a tuple of labels. The location is returned + Get location for a label or a tuple of labels. + The location is returned as an integer/slice or boolean mask. - This method returns the integer location, slice object, or boolean mask - corresponding to the specified key, which can be a single label or a tuple - of labels. The key represents a position in the MultiIndex, and the location + This method returns the integer location, slice + object, or boolean mask + corresponding to the specified key, which can be + a single label or a tuple + of labels. The key represents a position in the + MultiIndex, and the location indicates where the key is found within the index. Parameters ---------- key : label or tuple of labels (one for each level) - A label or tuple of labels that correspond to the levels of the MultiIndex. + A label or tuple of labels that correspond to the + levels of the MultiIndex. The key must match the structure of the MultiIndex. Returns ------- int, slice object or boolean mask - If the key is past the lexsort depth, the return may be a - boolean mask array, otherwise it is always a slice or int. + If the key is past the lexsort depth, the return may be a + boolean mask array, otherwise it is always a slice or int. See Also -------- From 12ecefb87c143afec3e56884daecf3696df39ce4 Mon Sep 17 00:00:00 2001 From: Raj Aryan Date: Sun, 14 Dec 2025 02:42:56 +0530 Subject: [PATCH 4/6] BUG: Fix linting errors in MultiIndex GH#55969 --- pandas/core/indexes/multi.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 69182208f7efb..46712b1fed44d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3265,30 +3265,25 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: def get_loc(self, key, method=None): """ - Get location for a label or a tuple of labels. - The location is returned + Get location for a label or a tuple of labels. The location is returned as an integer/slice or boolean mask. - This method returns the integer location, slice - object, or boolean mask - corresponding to the specified key, which can be - a single label or a tuple - of labels. The key represents a position in the - MultiIndex, and the location + This method returns the integer location, slice object, or boolean mask + corresponding to the specified key, which can be a single label or a tuple + of labels. The key represents a position in the MultiIndex, and the location indicates where the key is found within the index. Parameters ---------- key : label or tuple of labels (one for each level) - A label or tuple of labels that correspond to the - levels of the MultiIndex. + A label or tuple of labels that correspond to the levels of the MultiIndex. The key must match the structure of the MultiIndex. Returns ------- int, slice object or boolean mask - If the key is past the lexsort depth, the return may be a - boolean mask array, otherwise it is always a slice or int. + If the key is past the lexsort depth, the return may be a + boolean mask array, otherwise it is always a slice or int. See Also -------- From cf3b5706e7416e1c06e99b8a8797bbcc9d24a44c Mon Sep 17 00:00:00 2001 From: Raj Aryan Date: Sun, 14 Dec 2025 02:49:50 +0530 Subject: [PATCH 5/6] BUG: Fix linting errors GH#55969 --- pandas/core/indexes/multi.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 46712b1fed44d..ac66db54c83d7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3309,12 +3309,13 @@ def get_loc(self, key, method=None): >>> mi.get_loc(("b", "e")) 1 """ - # --- FIX GH#55969 START --- - # If the key contains np.datetime64 but the level is object-dtype (python objects), - # strict lookups (and binary search) can fail. Convert to python objects to match. + # GH#55969: If key has np.datetime64 but level is object-dtype + # (python objects), strict lookups/binary search can fail. + # Convert to python objects to match. if isinstance(key, tuple): new_key = list(key) modified = False + # Use strict=False as key len might be < levels len for i, (k, level) in enumerate(zip(new_key, self.levels, strict=False)): if isinstance(k, np.datetime64) and level.dtype == object: try: From e870356e7f48872f7160169dcdae692ef0342fef Mon Sep 17 00:00:00 2001 From: Raj Aryan Date: Sun, 14 Dec 2025 03:17:45 +0530 Subject: [PATCH 6/6] Fix linting errors GH#55969 --- pandas/core/indexes/multi.py | 42 ++++++------------------------------ 1 file changed, 6 insertions(+), 36 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ac66db54c83d7..4bc418a9a33d6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3265,49 +3265,19 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: def get_loc(self, key, method=None): """ - Get location for a label or a tuple of labels. The location is returned - as an integer/slice or boolean mask. - - This method returns the integer location, slice object, or boolean mask - corresponding to the specified key, which can be a single label or a tuple - of labels. The key represents a position in the MultiIndex, and the location - indicates where the key is found within the index. + Get location for a label or a tuple of labels. Parameters ---------- key : label or tuple of labels (one for each level) - A label or tuple of labels that correspond to the levels of the MultiIndex. - The key must match the structure of the MultiIndex. + The key to locate. + method : str or None, optional + Method for getting the location (see Index.get_loc). Returns ------- - int, slice object or boolean mask - If the key is past the lexsort depth, the return may be a - boolean mask array, otherwise it is always a slice or int. - - See Also - -------- - Index.get_loc : The get_loc method for (single-level) index. - MultiIndex.slice_locs : Get slice location given start label(s) and - end label(s). - MultiIndex.get_locs : Get location for a label/slice/list/mask or a - sequence of such. - - Notes - ----- - The key cannot be a slice, list of same-level labels, a boolean mask, - or a sequence of such. If you want to use those, use - :meth:`MultiIndex.get_locs` instead. - - Examples - -------- - >>> mi = pd.MultiIndex.from_arrays([list("abb"), list("def")]) - - >>> mi.get_loc("b") - slice(1, 3, None) - - >>> mi.get_loc(("b", "e")) - 1 + int, slice, or boolean mask + Location(s) of the key. """ # GH#55969: If key has np.datetime64 but level is object-dtype # (python objects), strict lookups/binary search can fail.