diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 193189eb624ec..e109124883bc7 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -13,6 +13,7 @@ Literal, Self, TypeVar, + cast, final, overload, ) @@ -38,10 +39,12 @@ from pandas.core.dtypes.common import ( ensure_str, is_string_dtype, + is_timedelta64_dtype, pandas_dtype, ) from pandas.core.dtypes.dtypes import PeriodDtype +import pandas as pd from pandas import ( ArrowDtype, DataFrame, @@ -71,6 +74,7 @@ ) from pandas.io.parsers.readers import validate_integer +DateUnit = Literal["s", "ms", "us", "ns"] if TYPE_CHECKING: from collections.abc import ( Callable, @@ -222,6 +226,47 @@ def to_json( return None +def _format_timedelta_labels(index, date_format: str, date_unit: str | None): + """ + Format TimedeltaIndex labels for JSON serialization. + + Rules: + - Timedelta values → ISO 8601 (iso) or integer (epoch) + - NaT MUST stay missing so JSON encodes it as null + """ + + # Fast-path: empty index + if len(index) == 0: + return index + + values = index._values # ndarray[td64] + result: list[object] = [] + + if date_format == "iso": + for val in values: + if isna(val): + # critical: preserve missing → JSON null + result.append("null") + else: + td = pd.Timedelta(val) + result.append(td.isoformat()) + + else: # epoch + if date_unit is None: + unit: DateUnit = "ms" + else: + unit = cast(DateUnit, date_unit) + + for val in values: + if isna(val): + result.append("null") + else: + td = pd.Timedelta(val).as_unit(unit) + result.append(int(td._value)) + + return Index(result, dtype=object) + + class Writer(ABC): _default_orient: str @@ -287,6 +332,12 @@ def obj_to_write(self) -> NDFrame | Mapping[IndexLabel, Any]: def _format_axes(self) -> None: if not self.obj.index.is_unique and self.orient == "index": raise ValueError(f"Series index must be unique for orient='{self.orient}'") + # FIX:GH#63236 format TimedeltaIndex labels correctly before ujson_dumps + if is_timedelta64_dtype(self.obj.index.dtype): + self.obj = self.obj.copy(deep=False) + self.obj.index = _format_timedelta_labels( + self.obj.index, self.date_format, self.date_unit + ) class FrameWriter(Writer): @@ -317,6 +368,29 @@ def _format_axes(self) -> None: raise ValueError( f"DataFrame columns must be unique for orient='{self.orient}'." ) + # FIX:GH#63236 format Timedelta labels (Index and Columns) correctly + if ( + not isinstance(self.obj.index, MultiIndex) + and is_timedelta64_dtype(self.obj.index.dtype) + ) or ( + not isinstance(self.obj.columns, MultiIndex) + and is_timedelta64_dtype(self.obj.columns.dtype) + ): + self.obj = self.obj.copy(deep=False) + + if not isinstance(self.obj.index, MultiIndex) and is_timedelta64_dtype( + self.obj.index.dtype + ): + self.obj.index = _format_timedelta_labels( + self.obj.index, self.date_format, self.date_unit + ) + + if not isinstance(self.obj.columns, MultiIndex) and is_timedelta64_dtype( + self.obj.columns.dtype + ): + self.obj.columns = _format_timedelta_labels( + self.obj.columns, self.date_format, self.date_unit + ) class JSONTableWriter(FrameWriter): diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index 5a3ec254c96b0..0b59958825e7a 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1794,9 +1794,9 @@ def test_read_json_with_very_long_file_path(self, compression): "date_format,key", [("epoch", 86400000), ("iso", "P1DT0H0M0S")] ) def test_timedelta_as_label(self, date_format, key, unit, request): - if unit != "ns": - mark = pytest.mark.xfail(reason="GH#63236 failure to round-trip") - request.applymarker(mark) + # if unit != "ns": + # mark = pytest.mark.xfail(reason="GH#63236 failure to round-trip") + # request.applymarker(mark) df = DataFrame([[1]], columns=[pd.Timedelta("1D").as_unit(unit)]) expected = f'{{"{key}":{{"0":1}}}}'