From 6f1fda5ef1cfe7ee40ccd1ddefc3861c2718d920 Mon Sep 17 00:00:00 2001 From: AlenkaF Date: Wed, 11 Feb 2026 20:59:32 +0100 Subject: [PATCH] Initial commit --- python/pyarrow/array.pxi | 7 +++++++ python/pyarrow/tests/test_pandas.py | 23 +++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index ec58ac727e5..6480d015de8 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -2308,6 +2308,13 @@ cdef _array_like_to_pandas(obj, options, types_mapper): dtype = "object" elif types_mapper: dtype = types_mapper(original_type) + elif _pandas_api.uses_string_dtype() and ( + original_type.id == _Type_STRING or + original_type.id == _Type_LARGE_STRING or + original_type.id == _Type_STRING_VIEW + ): + # for pandas 3.0+, use pandas' new default string dtype + dtype = _pandas_api.pd.StringDtype(na_value=np.nan) else: dtype = None diff --git a/python/pyarrow/tests/test_pandas.py b/python/pyarrow/tests/test_pandas.py index cecf10f2165..8afe93d4e00 100644 --- a/python/pyarrow/tests/test_pandas.py +++ b/python/pyarrow/tests/test_pandas.py @@ -4651,6 +4651,29 @@ def test_chunked_array_to_pandas_types_mapper(): assert result.dtype == np.dtype("int64") +@pytest.mark.parametrize( + "string_type", [pa.string(), pa.large_string(), pa.string_view()] +) +@pytest.mark.parametrize("data", [[], [None]]) +def test_array_to_pandas_string_dtype(string_type, data): + # GH-49002 + if Version(pd.__version__) < Version("3.0.0"): + pytest.skip("PyArrow backed string dtype missing") + + arr = pa.array(data, type=string_type) + result = arr.to_pandas() + assert result.dtype == pd.StringDtype(na_value=np.nan) + + arr = pa.chunked_array([data], type=string_type) + result = arr.to_pandas() + assert result.dtype == pd.StringDtype(na_value=np.nan) + + # Test types_mapper takes precedence + types_mapper = {string_type: None}.get + result = arr.to_pandas(types_mapper=types_mapper) + assert result.dtype == np.dtype("object") + + # ---------------------------------------------------------------------- # Legacy metadata compatibility tests