From ab9ee88dd5f00864ca670359abf125562f56bfdb Mon Sep 17 00:00:00 2001 From: Shashwati Date: Fri, 13 Feb 2026 12:19:25 +0530 Subject: [PATCH] PYTHON: Fix pandas Categorical deprecation warnings in tests (GH-49255) Replace pd.Categorical() calls that specify categories containing values not in the categories list with the recommended pattern: create the Categorical first, then use .set_categories() to restrict. Fixes deprecation warnings: - test_category: cat_strings_with_na - test_category_implicit_from_pandas: two Categorical instances Fixes #49255 --- python/pyarrow/tests/test_pandas.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index cecf10f2165..dde607d756f 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -3069,15 +3069,19 @@ def test_category(self): v2 = [4, 5, 6, 7, 8] v3 = [b'foo', None, b'bar', b'qux', np.nan] + cat_strings = pd.Categorical(v1 * repeats) + cat_strings_with_na = cat_strings.set_categories(['foo', 'bar']) + + cat_strings_ordered = pd.Categorical( + v1 * repeats, categories=['bar', 'qux', 'foo'], ordered=True + ) + arrays = { - 'cat_strings': pd.Categorical(v1 * repeats), - 'cat_strings_with_na': pd.Categorical(v1 * repeats, - categories=['foo', 'bar']), + 'cat_strings': cat_strings, + 'cat_strings_with_na': cat_strings_with_na, 'cat_ints': pd.Categorical(v2 * repeats), 'cat_binary': pd.Categorical(v3 * repeats), - 'cat_strings_ordered': pd.Categorical( - v1 * repeats, categories=['bar', 'qux', 'foo'], - ordered=True), + 'cat_strings_ordered': cat_strings_ordered, 'ints': v2 * repeats, 'ints2': v2 * repeats, 'strings': v1 * repeats, @@ -3096,10 +3100,10 @@ def _check(v): result = arr.to_pandas() tm.assert_series_equal(pd.Series(result), pd.Series(v)) + base = pd.Categorical(['a', 'b', 'c']) arrays = [ - pd.Categorical(['a', 'b', 'c'], categories=['a', 'b']), - pd.Categorical(['a', 'b', 'c'], categories=['a', 'b'], - ordered=True) + base.set_categories(['a', 'b']), + base.set_categories(['a', 'b']).as_ordered(), ] for arr in arrays: _check(arr)