From 73c582cbcc0c17f05612ce88f94abcf42db623d1 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Fri, 12 Dec 2025 08:41:36 -0800
Subject: [PATCH 1/2] API: to_datetime(ints, unit) give requested unit

---
 pandas/_libs/tslib.pyx                        | 27 ++++++++++---
 pandas/core/tools/datetimes.py                |  7 ++--
 pandas/tests/io/json/test_pandas.py           | 10 ++---
 .../tests/resample/test_resampler_grouper.py  |  2 +-
 pandas/tests/tools/test_to_datetime.py        | 39 +++++++++++--------
 5 files changed, 54 insertions(+), 31 deletions(-)
diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx
index 2a53e604423d8..a97af3cbb6186 100644
--- a/pandas/_libs/tslib.pyx
+++ b/pandas/_libs/tslib.pyx
@@ -26,6 +26,7 @@ import numpy as np
 cnp.import_array()
 
 from pandas._libs.tslibs.dtypes cimport (
+    abbrev_to_npy_unit,
     get_supported_reso,
     npy_unit_to_abbrev,
 )
@@ -312,7 +313,7 @@ cpdef array_to_datetime(
         _TSObject tsobj
         tzinfo tz, tz_out = None
         cnp.flatiter it = cnp.PyArray_IterNew(values)
-        NPY_DATETIMEUNIT item_reso
+        NPY_DATETIMEUNIT item_reso, int_reso
         bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
         DatetimeParseState state = DatetimeParseState(creso)
         str abbrev
@@ -325,11 +326,11 @@ cpdef array_to_datetime(
     else:
         abbrev = npy_unit_to_abbrev(creso)
 
-    if unit_for_numerics is not None:
-        # either creso or unit_for_numerics should be passed, not both
-        assert creso == NPY_FR_ns
-    else:
+    if unit_for_numerics is None:
         unit_for_numerics = abbrev
+        int_reso = NPY_FR_ns
+    else:
+        int_reso = get_supported_reso(abbrev_to_npy_unit(unit_for_numerics))
 
     result = np.empty((<object>values).shape, dtype=f"M8[{abbrev}]")
     iresult = result.view("i8").ravel()
@@ -370,7 +371,20 @@ cpdef array_to_datetime(
                 iresult[i] = get_datetime64_nanos(val, creso)
                 state.found_other = True
 
-            elif is_integer_object(val) or is_float_object(val):
+            elif is_integer_object(val):
+                if val == NPY_NAT:
+                    iresult[i] = NPY_NAT
+                else:
+                    item_reso = int_reso
+                    state.update_creso(item_reso)
+                    if infer_reso:
+                        creso = state.creso
+
+                    iresult[i] = cast_from_unit(val, unit_for_numerics, out_reso=creso)
+
+                    state.found_other = True
+
+            elif is_float_object(val):
                 # these must be ns unit by-definition
 
                 if val != val or val == NPY_NAT:
@@ -460,6 +474,7 @@ cpdef array_to_datetime(
                 dayfirst=dayfirst,
                 utc=utc,
                 creso=state.creso,
+                unit_for_numerics=unit_for_numerics,
             )
         elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
             # i.e. we never encountered anything non-NaT, default to "s". This
diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py
index c5c0aa4d61187..5078829308ce1 100644
--- a/pandas/core/tools/datetimes.py
+++ b/pandas/core/tools/datetimes.py
@@ -26,11 +26,11 @@
     Timedelta,
     Timestamp,
     astype_overflowsafe,
+    get_supported_dtype,
     is_supported_dtype,
     timezones as libtimezones,
 )
 from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
-from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
 from pandas._libs.tslibs.parsing import (
     DateParseError,
     guess_datetime_format,
@@ -503,8 +503,9 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
             # Note we can't do "f" here because that could induce unwanted
             #  rounding GH#14156, GH#20445
             arr = arg.astype(f"datetime64[{unit}]", copy=False)
+            dtype = get_supported_dtype(arr.dtype)
             try:
-                arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
+                arr = astype_overflowsafe(arr, dtype, copy=False)
             except OutOfBoundsDatetime:
                 if errors == "raise":
                     raise
@@ -534,7 +535,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
                 utc=utc,
                 errors=errors,
                 unit_for_numerics=unit,
-                creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value),
+                # creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value),
             )
 
     result = DatetimeIndex(arr, name=name)
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 5a3ec254c96b0..92ff2357304d9 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -955,7 +955,7 @@ def test_date_format_frame_raises(self, datetime_frame):
         ],
     )
     def test_date_format_series(self, date, date_unit, datetime_series):
-        ts = Series(Timestamp(date).as_unit("ns"), index=datetime_series.index)
+        ts = Series(Timestamp(date), index=datetime_series.index)
         ts.iloc[1] = pd.NaT
         ts.iloc[5] = pd.NaT
         if date_unit:
@@ -1118,9 +1118,9 @@ def test_round_trip_exception(self, datapath):
     @pytest.mark.parametrize(
         "field,dtype",
         [
-            ["created_at", pd.DatetimeTZDtype(tz="UTC")],
-            ["closed_at", "datetime64[ns]"],
-            ["updated_at", pd.DatetimeTZDtype(tz="UTC")],
+            ["created_at", pd.DatetimeTZDtype(tz="UTC", unit="us")],
+            ["closed_at", "datetime64[us]"],
+            ["updated_at", pd.DatetimeTZDtype(tz="UTC", unit="us")],
         ],
     )
     def test_url(self, field, dtype, httpserver):
@@ -1756,7 +1756,7 @@ def test_read_timezone_information(self):
         result = read_json(
             StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index"
         )
-        exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[ns, UTC]")
+        exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[us, UTC]")
         expected = Series([88], index=exp_dti)
         tm.assert_series_equal(result, expected)
 
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 1d319600e632d..862578decb782 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -502,7 +502,7 @@ def test_groupby_resample_empty_sum_string(
     result = gbrs.sum(min_count=min_count)
 
     index = pd.MultiIndex(
-        levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns")]],
+        levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns").as_unit("ns")]],
         codes=[[0, 1, 2], [0, 0, 0]],
         names=["A", None],
     )
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index 466ac5582dc65..c786c01b58292 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -1782,7 +1782,8 @@ class TestToDatetimeUnit:
     def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
         # GH#50870 Note we have separate tests that pd.Timestamp gets these right
         ts = Timestamp(item, unit=unit)
-        expected = DatetimeIndex([ts], dtype="M8[ns]")
+        dtype = "M8[ns]" if isinstance(item, float) else "M8[s]"
+        expected = DatetimeIndex([ts], dtype=dtype)
 
         result = to_datetime([item], unit=unit, cache=cache)
         tm.assert_index_equal(result, expected)
@@ -1796,7 +1797,7 @@ def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
         # with a nan!
         result = to_datetime(np.array([item, np.nan]), unit=unit, cache=cache)
         assert result.isna()[1]
-        tm.assert_index_equal(result[:1], expected)
+        tm.assert_index_equal(result[:1], expected.astype("M8[ns]"))
 
     @pytest.mark.parametrize("unit", ["Y", "M"])
     def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
@@ -1820,12 +1821,12 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
         # In 3.0, the string "1.5" is parsed as as it would be without unit,
         #  which fails. With errors="coerce" this becomes NaT.
         res = to_datetime(["1.5"], unit=unit, errors="coerce")
-        expected = to_datetime([NaT]).as_unit("ns")
+        expected = to_datetime([NaT])
         tm.assert_index_equal(res, expected)
 
         # round floats are OK
         res = to_datetime([1.0], unit=unit)
-        expected = to_datetime([1], unit=unit)
+        expected = to_datetime([1], unit=unit).as_unit("ns")
         tm.assert_index_equal(res, expected)
 
     def test_unit(self, cache):
@@ -1853,7 +1854,7 @@ def test_unit_array_mixed_nans_large_int(self, cache):
         values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"]
 
         result = to_datetime(values, errors="coerce", unit="s", cache=cache)
-        expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]")
+        expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[s]")
         tm.assert_index_equal(result, expected)
 
         msg = "cannot convert input 1420043460000000000000000 with the unit 's'"
@@ -1950,12 +1951,13 @@ def test_to_datetime_unit(self, dtype):
         epoch = 1370745748
         ser = Series([epoch + t for t in range(20)]).astype(dtype)
         result = to_datetime(ser, unit="s")
+        unit = "s" if dtype is int else "ns"
         expected = Series(
             [
                 Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
                 for t in range(20)
             ],
-            dtype="M8[ns]",
+            dtype=f"M8[{unit}]",
         )
         tm.assert_series_equal(result, expected)
 
@@ -1964,10 +1966,13 @@ def test_to_datetime_unit_with_nulls(self, null):
         epoch = 1370745748
         ser = Series([epoch + t for t in range(20)] + [null])
         result = to_datetime(ser, unit="s")
+        # With np.nan, the list gets cast to a float64 array, which always
+        #  gets ns unit.
+        unit = "ns" if null is np.nan else "s"
         expected = Series(
             [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
             + [NaT],
-            dtype="M8[ns]",
+            dtype=f"M8[{unit}]",
         )
         tm.assert_series_equal(result, expected)
 
@@ -1992,25 +1997,25 @@ def test_to_datetime_unit_na_values(self):
         result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D")
         expected = DatetimeIndex(
             [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3,
-            dtype="M8[ns]",
+            dtype="M8[s]",
         )
         tm.assert_index_equal(result, expected)
 
-    @pytest.mark.parametrize("bad_val", ["foo", 111111111])
+    @pytest.mark.parametrize("bad_val", ["foo", 111111111111111])
     def test_to_datetime_unit_invalid(self, bad_val):
         if bad_val == "foo":
             msg = f"Unknown datetime string format, unable to parse: {bad_val}"
         else:
-            msg = "cannot convert input 111111111 with the unit 'D'"
+            msg = "cannot convert input 111111111111111 with the unit 'D'"
         with pytest.raises(ValueError, match=msg):
             to_datetime([1, 2, bad_val], unit="D")
 
-    @pytest.mark.parametrize("bad_val", ["foo", 111111111])
+    @pytest.mark.parametrize("bad_val", ["foo", 111111111111111])
     def test_to_timestamp_unit_coerce(self, bad_val):
         # coerce we can process
         expected = DatetimeIndex(
             [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1,
-            dtype="M8[ns]",
+            dtype="M8[s]",
         )
         result = to_datetime([1, 2, bad_val], unit="D", errors="coerce")
         tm.assert_index_equal(result, expected)
@@ -3223,7 +3228,7 @@ def test_unix(self):
         result = Series(to_datetime([0, 1, 2], unit="D", origin="unix"))
         expected = Series(
             [Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")],
-            dtype="M8[ns]",
+            dtype="M8[s]",
         )
         tm.assert_series_equal(result, expected)
 
@@ -3262,8 +3267,10 @@ def test_invalid_origin(self, unit):
     def test_epoch(self, units, epochs):
         epoch_1960 = Timestamp(1960, 1, 1)
         units_from_epochs = np.arange(5, dtype=np.int64)
+        exp_unit = "s" if units == "D" else units
         expected = Series(
-            [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
+            [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs],
+            dtype=f"M8[{exp_unit}]",
         )
 
         result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs))
@@ -3358,7 +3365,7 @@ def test_arg_tz_ns_unit(self, offset, utc, exp):
         # GH 25546
         arg = "2019-01-01T00:00:00.000" + offset
         result = to_datetime([arg], unit="ns", utc=utc)
-        expected = to_datetime([exp]).as_unit("ns")
+        expected = to_datetime([exp]).as_unit("us")
         tm.assert_index_equal(result, expected)
 
 
@@ -3458,7 +3465,7 @@ def test_empty_string_datetime_coerce__unit():
     # GH13044
     # coerce empty string to pd.NaT
     result = to_datetime([1, ""], unit="s", errors="coerce")
-    expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]")
+    expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[s]")
     tm.assert_index_equal(expected, result)
 
     # verify that no exception is raised even when errors='raise' is set

From 0eda6d913b0efea9e1066188e419362558ce1d72 Mon Sep 17 00:00:00 2001
From: Brock <jbrockmendel@gmail.com>
Date: Sat, 13 Dec 2025 10:34:14 -0800
Subject: [PATCH 2/2] fix json cases

---
 pandas/io/json/_json.py             |  6 +++++-
 pandas/tests/io/json/test_pandas.py | 10 +++++-----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 193189eb624ec..b4409de70dde2 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -1312,7 +1312,11 @@ def _try_convert_to_date(self, data: Series) -> Series:
         date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS
         for date_unit in date_units:
             try:
-                return to_datetime(new_data, errors="raise", unit=date_unit)
+                # Without this as_unit cast, we would fail to overflow
+                #  and get much-too-large dates
+                return to_datetime(new_data, errors="raise", unit=date_unit).dt.as_unit(
+                    "ns"
+                )
             except (ValueError, OverflowError, TypeError):
                 continue
         return data
diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py
index 92ff2357304d9..ba24836e2672f 100644
--- a/pandas/tests/io/json/test_pandas.py
+++ b/pandas/tests/io/json/test_pandas.py
@@ -964,7 +964,7 @@ def test_date_format_series(self, date, date_unit, datetime_series):
             json = ts.to_json(date_format="iso")
 
         result = read_json(StringIO(json), typ="series")
-        expected = ts.copy()
+        expected = ts.copy().dt.as_unit("ns")
         tm.assert_series_equal(result, expected)
 
     def test_date_format_series_raises(self, datetime_series):
@@ -1118,9 +1118,9 @@ def test_round_trip_exception(self, datapath):
     @pytest.mark.parametrize(
         "field,dtype",
         [
-            ["created_at", pd.DatetimeTZDtype(tz="UTC", unit="us")],
-            ["closed_at", "datetime64[us]"],
-            ["updated_at", pd.DatetimeTZDtype(tz="UTC", unit="us")],
+            ["created_at", pd.DatetimeTZDtype(tz="UTC")],
+            ["closed_at", "datetime64[ns]"],
+            ["updated_at", pd.DatetimeTZDtype(tz="UTC")],
         ],
     )
     def test_url(self, field, dtype, httpserver):
@@ -1756,7 +1756,7 @@ def test_read_timezone_information(self):
         result = read_json(
             StringIO('{"2019-01-01T11:00:00.000Z":88}'), typ="series", orient="index"
         )
-        exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[us, UTC]")
+        exp_dti = DatetimeIndex(["2019-01-01 11:00:00"], dtype="M8[ns, UTC]")
         expected = Series([88], index=exp_dti)
         tm.assert_series_equal(result, expected)