Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 21 additions & 6 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import numpy as np
cnp.import_array()

from pandas._libs.tslibs.dtypes cimport (
abbrev_to_npy_unit,
get_supported_reso,
npy_unit_to_abbrev,
)
Expand Down Expand Up @@ -312,7 +313,7 @@ cpdef array_to_datetime(
_TSObject tsobj
tzinfo tz, tz_out = None
cnp.flatiter it = cnp.PyArray_IterNew(values)
NPY_DATETIMEUNIT item_reso
NPY_DATETIMEUNIT item_reso, int_reso
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
DatetimeParseState state = DatetimeParseState(creso)
str abbrev
Expand All @@ -325,11 +326,11 @@ cpdef array_to_datetime(
else:
abbrev = npy_unit_to_abbrev(creso)

if unit_for_numerics is not None:
# either creso or unit_for_numerics should be passed, not both
assert creso == NPY_FR_ns
else:
if unit_for_numerics is None:
unit_for_numerics = abbrev
int_reso = NPY_FR_ns
else:
int_reso = get_supported_reso(abbrev_to_npy_unit(unit_for_numerics))

result = np.empty((<object>values).shape, dtype=f"M8[{abbrev}]")
iresult = result.view("i8").ravel()
Expand Down Expand Up @@ -370,7 +371,20 @@ cpdef array_to_datetime(
iresult[i] = get_datetime64_nanos(val, creso)
state.found_other = True

elif is_integer_object(val) or is_float_object(val):
elif is_integer_object(val):
if val == NPY_NAT:
iresult[i] = NPY_NAT
else:
item_reso = int_reso
state.update_creso(item_reso)
if infer_reso:
creso = state.creso

iresult[i] = cast_from_unit(val, unit_for_numerics, out_reso=creso)

state.found_other = True

elif is_float_object(val):
# these must be ns unit by-definition

if val != val or val == NPY_NAT:
Expand Down Expand Up @@ -460,6 +474,7 @@ cpdef array_to_datetime(
dayfirst=dayfirst,
utc=utc,
creso=state.creso,
unit_for_numerics=unit_for_numerics,
)
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
# i.e. we never encountered anything non-NaT, default to "s". This
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/tools/datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@
Timedelta,
Timestamp,
astype_overflowsafe,
get_supported_dtype,
is_supported_dtype,
timezones as libtimezones,
)
from pandas._libs.tslibs.conversion import cast_from_unit_vectorized
from pandas._libs.tslibs.dtypes import NpyDatetimeUnit
from pandas._libs.tslibs.parsing import (
DateParseError,
guess_datetime_format,
Expand Down Expand Up @@ -503,8 +503,9 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
# Note we can't do "f" here because that could induce unwanted
# rounding GH#14156, GH#20445
arr = arg.astype(f"datetime64[{unit}]", copy=False)
dtype = get_supported_dtype(arr.dtype)
try:
arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False)
arr = astype_overflowsafe(arr, dtype, copy=False)
except OutOfBoundsDatetime:
if errors == "raise":
raise
Expand Down Expand Up @@ -534,7 +535,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index:
utc=utc,
errors=errors,
unit_for_numerics=unit,
creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value),
# creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value),
)

result = DatetimeIndex(arr, name=name)
Expand Down
6 changes: 5 additions & 1 deletion pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -1312,7 +1312,11 @@ def _try_convert_to_date(self, data: Series) -> Series:
date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS
for date_unit in date_units:
try:
return to_datetime(new_data, errors="raise", unit=date_unit)
# Without this as_unit cast, we would fail to overflow
# and get much-too-large dates
return to_datetime(new_data, errors="raise", unit=date_unit).dt.as_unit(
"ns"
)
except (ValueError, OverflowError, TypeError):
continue
return data
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/json/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,7 +955,7 @@ def test_date_format_frame_raises(self, datetime_frame):
],
)
def test_date_format_series(self, date, date_unit, datetime_series):
ts = Series(Timestamp(date).as_unit("ns"), index=datetime_series.index)
ts = Series(Timestamp(date), index=datetime_series.index)
ts.iloc[1] = pd.NaT
ts.iloc[5] = pd.NaT
if date_unit:
Expand All @@ -964,7 +964,7 @@ def test_date_format_series(self, date, date_unit, datetime_series):
json = ts.to_json(date_format="iso")

result = read_json(StringIO(json), typ="series")
expected = ts.copy()
expected = ts.copy().dt.as_unit("ns")
tm.assert_series_equal(result, expected)

def test_date_format_series_raises(self, datetime_series):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/resample/test_resampler_grouper.py
Original file line number Diff line number Diff line change
Expand Up @@ -502,7 +502,7 @@ def test_groupby_resample_empty_sum_string(
result = gbrs.sum(min_count=min_count)

index = pd.MultiIndex(
levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns")]],
levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns").as_unit("ns")]],
codes=[[0, 1, 2], [0, 0, 0]],
names=["A", None],
)
Expand Down
39 changes: 23 additions & 16 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1782,7 +1782,8 @@ class TestToDatetimeUnit:
def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
# GH#50870 Note we have separate tests that pd.Timestamp gets these right
ts = Timestamp(item, unit=unit)
expected = DatetimeIndex([ts], dtype="M8[ns]")
dtype = "M8[ns]" if isinstance(item, float) else "M8[s]"
expected = DatetimeIndex([ts], dtype=dtype)

result = to_datetime([item], unit=unit, cache=cache)
tm.assert_index_equal(result, expected)
Expand All @@ -1796,7 +1797,7 @@ def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request):
# with a nan!
result = to_datetime(np.array([item, np.nan]), unit=unit, cache=cache)
assert result.isna()[1]
tm.assert_index_equal(result[:1], expected)
tm.assert_index_equal(result[:1], expected.astype("M8[ns]"))

@pytest.mark.parametrize("unit", ["Y", "M"])
def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
Expand All @@ -1820,12 +1821,12 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
# In 3.0, the string "1.5" is parsed as as it would be without unit,
# which fails. With errors="coerce" this becomes NaT.
res = to_datetime(["1.5"], unit=unit, errors="coerce")
expected = to_datetime([NaT]).as_unit("ns")
expected = to_datetime([NaT])
tm.assert_index_equal(res, expected)

# round floats are OK
res = to_datetime([1.0], unit=unit)
expected = to_datetime([1], unit=unit)
expected = to_datetime([1], unit=unit).as_unit("ns")
tm.assert_index_equal(res, expected)

def test_unit(self, cache):
Expand Down Expand Up @@ -1853,7 +1854,7 @@ def test_unit_array_mixed_nans_large_int(self, cache):
values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"]

result = to_datetime(values, errors="coerce", unit="s", cache=cache)
expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]")
expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[s]")
tm.assert_index_equal(result, expected)

msg = "cannot convert input 1420043460000000000000000 with the unit 's'"
Expand Down Expand Up @@ -1950,12 +1951,13 @@ def test_to_datetime_unit(self, dtype):
epoch = 1370745748
ser = Series([epoch + t for t in range(20)]).astype(dtype)
result = to_datetime(ser, unit="s")
unit = "s" if dtype is int else "ns"
expected = Series(
[
Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t)
for t in range(20)
],
dtype="M8[ns]",
dtype=f"M8[{unit}]",
)
tm.assert_series_equal(result, expected)

Expand All @@ -1964,10 +1966,13 @@ def test_to_datetime_unit_with_nulls(self, null):
epoch = 1370745748
ser = Series([epoch + t for t in range(20)] + [null])
result = to_datetime(ser, unit="s")
# With np.nan, the list gets cast to a float64 array, which always
# gets ns unit.
unit = "ns" if null is np.nan else "s"
expected = Series(
[Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)]
+ [NaT],
dtype="M8[ns]",
dtype=f"M8[{unit}]",
)
tm.assert_series_equal(result, expected)

Expand All @@ -1992,25 +1997,25 @@ def test_to_datetime_unit_na_values(self):
result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D")
expected = DatetimeIndex(
[Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3,
dtype="M8[ns]",
dtype="M8[s]",
)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("bad_val", ["foo", 111111111])
@pytest.mark.parametrize("bad_val", ["foo", 111111111111111])
def test_to_datetime_unit_invalid(self, bad_val):
if bad_val == "foo":
msg = f"Unknown datetime string format, unable to parse: {bad_val}"
else:
msg = "cannot convert input 111111111 with the unit 'D'"
msg = "cannot convert input 111111111111111 with the unit 'D'"
with pytest.raises(ValueError, match=msg):
to_datetime([1, 2, bad_val], unit="D")

@pytest.mark.parametrize("bad_val", ["foo", 111111111])
@pytest.mark.parametrize("bad_val", ["foo", 111111111111111])
def test_to_timestamp_unit_coerce(self, bad_val):
# coerce we can process
expected = DatetimeIndex(
[Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1,
dtype="M8[ns]",
dtype="M8[s]",
)
result = to_datetime([1, 2, bad_val], unit="D", errors="coerce")
tm.assert_index_equal(result, expected)
Expand Down Expand Up @@ -3223,7 +3228,7 @@ def test_unix(self):
result = Series(to_datetime([0, 1, 2], unit="D", origin="unix"))
expected = Series(
[Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")],
dtype="M8[ns]",
dtype="M8[s]",
)
tm.assert_series_equal(result, expected)

Expand Down Expand Up @@ -3262,8 +3267,10 @@ def test_invalid_origin(self, unit):
def test_epoch(self, units, epochs):
epoch_1960 = Timestamp(1960, 1, 1)
units_from_epochs = np.arange(5, dtype=np.int64)
exp_unit = "s" if units == "D" else units
expected = Series(
[pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs]
[pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs],
dtype=f"M8[{exp_unit}]",
)

result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs))
Expand Down Expand Up @@ -3358,7 +3365,7 @@ def test_arg_tz_ns_unit(self, offset, utc, exp):
# GH 25546
arg = "2019-01-01T00:00:00.000" + offset
result = to_datetime([arg], unit="ns", utc=utc)
expected = to_datetime([exp]).as_unit("ns")
expected = to_datetime([exp]).as_unit("us")
tm.assert_index_equal(result, expected)


Expand Down Expand Up @@ -3458,7 +3465,7 @@ def test_empty_string_datetime_coerce__unit():
# GH13044
# coerce empty string to pd.NaT
result = to_datetime([1, ""], unit="s", errors="coerce")
expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]")
expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[s]")
tm.assert_index_equal(expected, result)

# verify that no exception is raised even when errors='raise' is set
Expand Down
Loading