Skip to content

Commit 9227738

Browse files
committed
BUG: preserve datetime64 units during concat
1 parent bb2e215 commit 9227738

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed

pandas/core/dtypes/cast.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,35 @@ def ensure_dtype_can_hold_na(dtype: DtypeObj) -> DtypeObj:
447447
type(np.nan): np.nan,
448448
}
449449

450+
def maybe_align_dt64_units(arrs: list) -> list:
451+
"""
452+
Align datetime64 arrays to the same unit *without* forcing conversion
453+
to nanoseconds.
454+
"""
455+
from pandas.core.dtypes.common import is_datetime64_dtype
456+
457+
if not any(is_datetime64_dtype(a.dtype) for a in arrs):
458+
return arrs
459+
460+
def extract_unit(dtype):
461+
name = dtype.name
462+
if name.startswith("datetime64["):
463+
return name[name.find("[")+1:-1]
464+
return "ns"
465+
466+
units = [extract_unit(a.dtype) for a in arrs]
467+
468+
order = ["s", "ms", "us", "ns"]
469+
target_unit = max(units, key=lambda u: order.index(u))
470+
471+
aligned = []
472+
for arr in arrs:
473+
if extract_unit(arr.dtype) != target_unit:
474+
aligned.append(arr.astype(f"datetime64[{target_unit}]"))
475+
else:
476+
aligned.append(arr)
477+
478+
return aligned
450479

451480
def maybe_promote(dtype: np.dtype, fill_value=np.nan):
452481
"""

pandas/core/dtypes/concat.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,10 @@ def concat_compat(
124124
else:
125125
to_concat_arrs = cast("Sequence[np.ndarray]", to_concat)
126126
result = np.concatenate(to_concat_arrs, axis=axis)
127+
# PR FIX: preserve datetime64/timedelta64 units during concat
128+
from pandas.core.dtypes.cast import maybe_align_dt64_units
129+
result = maybe_align_dt64_units(result, to_concat_arrs)
130+
127131

128132
if not any_ea and "b" in kinds and result.dtype.kind in "iuf":
129133
# GH#39817 cast to object instead of casting bools to numeric
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import numpy as np
2+
import pandas as pd
3+
4+
5+
def test_concat_series_datetime64_different_units():
6+
a = pd.Series(np.array(["2020-01-01T00:00:00"], dtype="datetime64[s]"))
7+
b = pd.Series(np.array(["2020-01-02T00:00:00"], dtype="datetime64[ms]"))
8+
9+
result = pd.concat([a, b], ignore_index=True)
10+
11+
# dtype should still be datetime64 (M)
12+
assert result.dtype.kind == "M"
13+
14+
assert result.iloc[0] == pd.Timestamp("2020-01-01")
15+
assert result.iloc[1] == pd.Timestamp("2020-01-02")
16+
17+
18+
def test_concat_dataframe_datetime64_different_units():
19+
df1 = pd.DataFrame({
20+
"ts": np.array(["2020-01-01T00:00:00"], dtype="datetime64[s]")
21+
})
22+
df2 = pd.DataFrame({
23+
"ts": np.array(["2020-01-02T00:00:00"], dtype="datetime64[ms]")
24+
})
25+
26+
result = pd.concat([df1, df2], ignore_index=True)
27+
28+
assert result["ts"].dtype.kind == "M"
29+
assert result["ts"].iloc[0] == pd.Timestamp("2020-01-01")
30+
assert result["ts"].iloc[1] == pd.Timestamp("2020-01-02")
31+
32+
33+
def test_concat_datetime64_preserves_unit_order():
34+
# Ensures that units are properly aligned, and no value shifts occur
35+
a = pd.Series(np.array(["2020-01-01T00:00:00"], dtype="datetime64[us]"))
36+
b = pd.Series(np.array(["2020-01-01T00:00:01"], dtype="datetime64[ns]"))
37+
38+
result = pd.concat([a, b], ignore_index=True)
39+
40+
assert result.dtype.kind == "M"
41+
assert result.iloc[0] == pd.Timestamp("2020-01-01 00:00:00")
42+
assert result.iloc[1] == pd.Timestamp("2020-01-01 00:00:01")

0 commit comments

Comments
 (0)