Source code for nested_dask.accessor
# Python 3.9 doesn't support "|" for types
from __future__ import annotations
import dask.dataframe as dd
import nested_pandas as npd
from dask.dataframe.extensions import register_series_accessor
from nested_pandas import NestedDtype
@register_series_accessor("nest")
[docs]
class DaskNestSeriesAccessor(npd.NestSeriesAccessor):
"""The nested-dask version of the nested-pandas NestSeriesAccessor.
Note that this has a very limited implementation relative to nested-pandas.
Parameters
----------
series: dd.series
A series to tie to the accessor
"""
def __init__(self, series):
self._check_series(series)
self._series = series
@staticmethod
def _check_series(series):
"""chcek the validity of the tied series dtype"""
dtype = series.dtype
if not isinstance(dtype, NestedDtype):
raise AttributeError(f"Can only use .nest accessor with a Series of NestedDtype, got {dtype}")
@property
[docs]
def fields(self) -> list[str]:
"""Names of the nested columns"""
return list(self._series.dtype.fields)
[docs]
def to_lists(self, fields: list[str] | None = None) -> dd.DataFrame:
"""Convert nested series into dataframe of list-array columns
Parameters
----------
fields : list[str] or None, optional
Names of the fields to include. Default is None, which means all fields.
Returns
-------
dd.DataFrame
Dataframe of list-arrays.
"""
return self._series.map_partitions(lambda x: x.nest.to_lists(fields=fields))
[docs]
def to_flat(self, fields: list[str] | None = None) -> dd.DataFrame:
"""Convert nested series into dataframe of flat arrays
Parameters
----------
fields : list[str] or None, optional
Names of the fields to include. Default is None, which means all fields.
Returns
-------
dd.DataFrame
Dataframe of flat arrays.
"""
return self._series.map_partitions(lambda x: x.nest.to_flat(fields=fields))