Extensions

Pandas extensions for latent-calendar and primary interface for the package.

Provides a cal accessor to DataFrame and Series instances for easy transformation and plotting after import of latent_calendar.

Functionality includes:

aggregation of events to wide format
convolutions of wide formats
making transformations and predictions with models
plotting of events, predictions, and comparisons as calendars

Each DataFrame will be either at event level or an aggregated wide format.

Methods that end in row or by_row will be for wide format DataFrames and will plot each row as a calendar.

Examples:

Plotting an event level Series as a calendar

import pandas as pd
import latent_calendar

dates = pd.date_range("2023-01-01", "2023-01-14", freq="h")
ser = (
    pd.Series(dates)
    .sample(10, random_state=42)
)

ser.cal.plot()

Transform event level DataFrame to wide format and plot

from latent_calendar.datasets import load_online_transactions

df = load_online_transactions()

# (n_customer, n_timeslots)
df_wide = (
    df
    .cal.aggregate_events("Customer ID", timestamp_col="InvoiceDate")
)

(
    df_wide
    .sample(n=12, random_state=42)
    .cal.plot_by_row(max_cols=4)
)

Customer Transactions

Train a model and plot predictions

from latent_calendar import LatentCalendar

model = LatentCalendar(n_components=5, random_state=42)
model.fit(df_wide.to_numpy())

(
    df_wide
    .head(2)
    .cal.plot_profile_by_row(model=model)
)

`DataFrameAccessor`

DataFrame accessor for latent_calendar accessed through cal attribute of DataFrames.

Source code in latent_calendar/extensions.py

@pd.api.extensions.register_dataframe_accessor("cal")
class DataFrameAccessor:
    """DataFrame accessor for latent_calendar accessed through `cal` attribute of DataFrames."""

    def __init__(self, pandas_obj: pd.DataFrame):
        self._obj = pandas_obj

    def divide_by_max(self) -> pd.DataFrame:
        """Divide each row by the max value.

        Returns:
            DataFrame with row-wise operations applied

        """
        return self._obj.div(self._obj.max(axis=1), axis=0)

    def divide_by_sum(self) -> pd.DataFrame:
        """Divide each row by the sum of the row.

        Returns:
            DataFrame with row-wise operations applied

        """
        return self._obj.div(self._obj.sum(axis=1), axis=0)

    def divide_by_even_rate(self) -> pd.DataFrame:
        """Divide each row by the number of columns.

        Returns:
            DataFrame with row-wise operations applied

        """
        value = self._obj.shape[1]
        return self._obj.mul(value)

    def normalize(self, kind: Literal["max", "probs", "even_rate"]) -> pd.DataFrame:
        """Row-wise operations on DataFrame.

        Args:
            kind: The normalization to apply.

        Returns:
            DataFrame with row-wise operations applied

        """
        import warnings

        def warn(message):
            warnings.warn(message, DeprecationWarning, stacklevel=3)

        warning_message = "This method will be deprecated in future versions"

        funcs = {
            "max": self.divide_by_max,
            "probs": self.divide_by_sum,
            "even_rate": self.divide_by_even_rate,
        }

        if kind not in funcs:
            warn(warning_message)
            raise ValueError(
                f"kind must be one of ['max', 'probs', 'even_rate'], got {kind}"
            )

        func = funcs[kind]

        warning_message = f"{warning_message} in favor of df.cal.{func.__name__}()"
        warn(warning_message)

        return func()

    def conditional_probabilities(
        self,
        *,
        level: int | str = 0,
    ) -> pd.DataFrame:
        """Calculate conditional probabilities for each row over the level.

        Args:
            level: level of the columns MultiIndex.
                Default 0 or day_of_week

        Returns:
            DataFrame with conditional probabilities

        """
        if not isinstance(self._obj.columns, pd.MultiIndex):
            raise ValueError(
                "DataFrame is expected to have a MultiIndex with the last column as the vocab."
            )

        return self._obj.div(
            self._obj.T.groupby(level=level).sum().T, level=level, axis=1
        )

    def timestamp_features(
        self,
        column: str,
        discretize: bool = True,
        minutes: int = 60,
        create_vocab: bool = True,
    ) -> pd.DataFrame:
        """Create day of week and proportion into day columns for event level DataFrame

        Exposed as a method on DataFrame for convenience. Use `cal.aggregate_events` instead to create the wide format DataFrame.

        Args:
            column: The name of the timestamp column.
            discretize: Whether to discretize the hour column.
            minutes: The number of minutes to discretize by. Ingored if `discretize` is False.
            create_vocab: Whether to create the vocab column.

        Returns:
            DataFrame with features added

        """
        transformer = create_timestamp_feature_pipeline(
            timestamp_col=column,
            discretize=discretize,
            create_vocab=create_vocab,
            minutes=minutes,
        )

        return transformer.fit_transform(self._obj)

    def widen(
        self,
        column: str,
        as_int: bool = True,
        minutes: int = 60,
        multiindex: bool = True,
    ) -> pd.DataFrame:
        """Transform an aggregated DataFrame to wide calendar format.

        Wrapper around `LongToWide` transformer to transform to wide format.

        Args:
            column: column to widen
            as_int: whether to cast the column to int
            minutes: number of minutes to
            multiindex: whether to use a MultiIndex

        Returns:
            DataFrame in wide format

        """
        if not isinstance(self._obj.index, pd.MultiIndex):
            raise ValueError(
                "DataFrame is expected to have a MultiIndex with the last column as the vocab."
            )

        transformer = LongToWide(
            col=column, as_int=as_int, minutes=minutes, multiindex=multiindex
        )

        return transformer.fit_transform(self._obj)

    def aggregate_events(
        self,
        by: str | list[str],
        timestamp_col: str,
        minutes: int = 60,
        as_multiindex: bool = True,
    ) -> pd.DataFrame:
        """Transform event level DataFrame to wide format with groups as index.

        Wrapper around `create_raw_to_vocab_transformer` to transform to wide format.

        Args:
            by: column(s) to use as index
            timestamp_col: column to use as timestamp
            minutes: The number of minutes to discretize by.
            as_multiindex: whether to use MultiIndex columns

        Returns:
            DataFrame in wide format

        """
        if not isinstance(by, list):
            id_col = by
            additional_groups = None
        else:
            id_col, *additional_groups = by

        transformer = create_raw_to_vocab_transformer(
            id_col=id_col,
            timestamp_col=timestamp_col,
            minutes=minutes,
            additional_groups=additional_groups,
            as_multiindex=as_multiindex,
        )
        return transformer.fit_transform(self._obj)

    def sum_over_vocab(self, aggregation: str = "dow") -> pd.DataFrame:
        """Sum the wide format to day of week or hour of day.

        Args:
            aggregation: one of ['dow', 'hour']

        Returns:
            DataFrame with summed values

        Examples:
            Sum to day of week

            ```python
            df_dow = df_wide.cal.sum_over_vocab(aggregation='dow')
            ```

        """
        return sum_over_vocab(self._obj, aggregation=aggregation)

    def sum_next_hours(self, hours: int) -> pd.DataFrame:
        """Sum the wide format over next hours.

        Args:
            hours: number of hours to sum over

        Returns:
            DataFrame with summed values

        """
        return sum_next_hours(self._obj, hours=hours)

    def sum_over_segments(self, df_segments: pd.DataFrame) -> pd.DataFrame:
        """Sum the wide format over user defined segments.

        Args:
            df_segments: DataFrame in wide format with segments as index

        Returns:
            DataFrame with columns as the segments and summed values

        """
        return sum_over_segments(self._obj, df_segments=df_segments)

    def transform(self, *, model: LatentCalendar) -> pd.DataFrame:
        """Transform DataFrame with model.

        Applies the dimensionality reduction to each row of the DataFrame.

        Args:
            model: model to use for transformation

        Returns:
            DataFrame with transformed values

        """
        return transform_on_dataframe(self._obj, model=model)

    def predict(self, *, model: LatentCalendar) -> pd.DataFrame:
        """Predict DataFrame with model.

        Args:
            model: model to use for prediction

        Returns:
            DataFrame with predicted values (wide format)

        """
        return predict_on_dataframe(self._obj, model=model)

    def plot(
        self,
        start_col: str,
        *,
        end_col: str | None = None,
        duration: int | None = None,
        alpha: float = None,
        cmap=None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        monday_start: bool = True,
        ax: plt.Axes | None = None,
    ) -> plt.Axes:
        """Plot DataFrame of timestamps as a calendar.

        Args:
            start_col: column with start timestamp
            end_col: column with end timestamp
            duration: length of event in minutes. Alternative to end_col
            alpha: alpha value for the color
            cmap: function that maps floats to string colors
            monday_start: whether to start the week on Monday or Sunday
            ax: optional matplotlib axis to plot on

        Returns:
            Modified matplotlib axis

        """
        config = StartEndConfig(start=start_col, end=end_col, minutes=duration)

        return plot_dataframe_as_calendar(
            self._obj,
            config=config,
            alpha=alpha,
            cmap=cmap,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            grid_lines=grid_lines,
            monday_start=monday_start,
            ax=ax,
        )

    def plot_across_column(
        self,
        start_col: str,
        grid_col: str,
        *,
        end_col: str | None = None,
        duration: int | None = None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        max_cols: int = 3,
        alpha: float = None,
    ) -> None:
        """Plot DataFrame of timestamps as a calendar as grid across column values.

        NA values are excluded

        Args:
            start_col: column with start timestamp
            grid_col: column of values to use as grid
            end_col: column with end timestamp
            duration: length of event in minutes. Alternative to end_col
            max_cols: max number of columns per row
            alpha: alpha value for the color

        Returns:
            None

        """
        config = StartEndConfig(start=start_col, end=end_col, minutes=duration)

        plot_dataframe_grid_across_column(
            self._obj,
            grid_col=grid_col,
            config=config,
            max_cols=max_cols,
            alpha=alpha,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            grid_lines=grid_lines,
        )

    def plot_by_row(
        self,
        *,
        max_cols: int = 3,
        title_func: TITLE_FUNC | None = None,
        cmaps: CMAP | ColorMap | CMAP_GENERATOR | None = None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        monday_start: bool = True,
    ) -> None:
        """Plot each row of the DataFrame as a calendar plot. Data must have been transformed to wide format first.

        Wrapper around `latent_calendar.plot.plot_calendar_by_row`.

        Args:
            max_cols: max number of columns per row of grid
            title_func: function to generate title for each row
            day_labeler: function to generate day labels
            time_labeler: function to generate time labels
            cmaps: optional generator of colormaps
            grid_lines: optional grid lines
            monday_start: whether to start the week on Monday or Sunday

        Returns:
            None

        """
        return plot_calendar_by_row(
            self._obj,
            max_cols=max_cols,
            title_func=title_func,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            cmaps=cmaps,
            grid_lines=grid_lines,
            monday_start=monday_start,
        )

    def plot_profile_by_row(
        self,
        *,
        model: LatentCalendar,
        index_func=lambda idx: idx,
        include_components: bool = True,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
    ) -> np.ndarray:
        """Plot each row of the DataFrame as a profile plot. Data must have been transformed to wide format first.

        Args:
            model: model to use for prediction and transform
            index_func: function to generate title for each row
            include_components: whether to include components in the plot
            day_labeler: DayLabeler instance to use for day labels
            time_labeler: TimeLabeler instance to use for time labels

        Returns:
            grid of axes

        """
        return plot_profile_by_row(
            self._obj,
            model=model,
            index_func=index_func,
            include_components=include_components,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
        )

    def plot_raw_and_predicted_by_row(
        self,
        *,
        model: LatentCalendar,
        index_func=lambda idx: idx,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
    ) -> np.ndarray:
        """Plot raw and predicted values for a model. Data must have been transformed to wide format first.

        Args:
            model: model to use for prediction
            index_func: function to generate title for each row
            day_labeler: DayLabeler instance to use for day labels
            time_labeler: TimeLabeler instance to use for time labels

        Returns:
            grid of axes

        """
        return plot_profile_by_row(
            self._obj,
            model=model,
            index_func=index_func,
            include_components=False,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
        )

    def plot_model_predictions_by_row(
        self,
        df_holdout: pd.DataFrame,
        *,
        model: LatentCalendar,
        index_func=lambda idx: idx,
        divergent: bool = True,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
    ) -> np.ndarray:
        """Plot model predictions for each row of the DataFrame. Data must have been transformed to wide format first.

        Args:
            df_holdout: holdout DataFrame for comparison
            model: model to use for prediction
            index_func: function to generate title for each row
            divergent: whether to use divergent colormap
            day_labeler: DayLabeler instance to use for day labels
            time_labeler: TimeLabeler instance to use for time labels

        Returns:
            grid of axes

        """
        return plot_model_predictions_by_row(
            self._obj,
            df_holdout=df_holdout,
            model=model,
            index_func=index_func,
            divergent=divergent,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
        )

`aggregate_events(by, timestamp_col, minutes=60, as_multiindex=True)`

Transform event level DataFrame to wide format with groups as index.

Wrapper around create_raw_to_vocab_transformer to transform to wide format.

Parameters:

Name	Type	Description	Default
`by`	`str \| list[str]`	column(s) to use as index	required
`timestamp_col`	`str`	column to use as timestamp	required
`minutes`	`int`	The number of minutes to discretize by.	`60`
`as_multiindex`	`bool`	whether to use MultiIndex columns	`True`

Returns:

Type	Description
`DataFrame`	DataFrame in wide format

Source code in latent_calendar/extensions.py

def aggregate_events(
    self,
    by: str | list[str],
    timestamp_col: str,
    minutes: int = 60,
    as_multiindex: bool = True,
) -> pd.DataFrame:
    """Transform event level DataFrame to wide format with groups as index.

    Wrapper around `create_raw_to_vocab_transformer` to transform to wide format.

    Args:
        by: column(s) to use as index
        timestamp_col: column to use as timestamp
        minutes: The number of minutes to discretize by.
        as_multiindex: whether to use MultiIndex columns

    Returns:
        DataFrame in wide format

    """
    if not isinstance(by, list):
        id_col = by
        additional_groups = None
    else:
        id_col, *additional_groups = by

    transformer = create_raw_to_vocab_transformer(
        id_col=id_col,
        timestamp_col=timestamp_col,
        minutes=minutes,
        additional_groups=additional_groups,
        as_multiindex=as_multiindex,
    )
    return transformer.fit_transform(self._obj)

`conditional_probabilities(*, level=0)`

Calculate conditional probabilities for each row over the level.

Parameters:

Name	Type	Description	Default
`level`	`int \| str`	level of the columns MultiIndex. Default 0 or day_of_week	`0`

Returns:

Type	Description
`DataFrame`	DataFrame with conditional probabilities

Source code in latent_calendar/extensions.py

def conditional_probabilities(
    self,
    *,
    level: int | str = 0,
) -> pd.DataFrame:
    """Calculate conditional probabilities for each row over the level.

    Args:
        level: level of the columns MultiIndex.
            Default 0 or day_of_week

    Returns:
        DataFrame with conditional probabilities

    """
    if not isinstance(self._obj.columns, pd.MultiIndex):
        raise ValueError(
            "DataFrame is expected to have a MultiIndex with the last column as the vocab."
        )

    return self._obj.div(
        self._obj.T.groupby(level=level).sum().T, level=level, axis=1
    )

`divide_by_even_rate()`

Divide each row by the number of columns.

Returns:

Type	Description
`DataFrame`	DataFrame with row-wise operations applied

Source code in latent_calendar/extensions.py

def divide_by_even_rate(self) -> pd.DataFrame:
    """Divide each row by the number of columns.

    Returns:
        DataFrame with row-wise operations applied

    """
    value = self._obj.shape[1]
    return self._obj.mul(value)

`divide_by_max()`

Divide each row by the max value.

Returns:

Type	Description
`DataFrame`	DataFrame with row-wise operations applied

Source code in latent_calendar/extensions.py

def divide_by_max(self) -> pd.DataFrame:
    """Divide each row by the max value.

    Returns:
        DataFrame with row-wise operations applied

    """
    return self._obj.div(self._obj.max(axis=1), axis=0)

`divide_by_sum()`

Divide each row by the sum of the row.

Returns:

Type	Description
`DataFrame`	DataFrame with row-wise operations applied

Source code in latent_calendar/extensions.py

def divide_by_sum(self) -> pd.DataFrame:
    """Divide each row by the sum of the row.

    Returns:
        DataFrame with row-wise operations applied

    """
    return self._obj.div(self._obj.sum(axis=1), axis=0)

`normalize(kind)`

Row-wise operations on DataFrame.

Parameters:

Name	Type	Description	Default
`kind`	`Literal['max', 'probs', 'even_rate']`	The normalization to apply.	required

Returns:

Type	Description
`DataFrame`	DataFrame with row-wise operations applied

Source code in latent_calendar/extensions.py

def normalize(self, kind: Literal["max", "probs", "even_rate"]) -> pd.DataFrame:
    """Row-wise operations on DataFrame.

    Args:
        kind: The normalization to apply.

    Returns:
        DataFrame with row-wise operations applied

    """
    import warnings

    def warn(message):
        warnings.warn(message, DeprecationWarning, stacklevel=3)

    warning_message = "This method will be deprecated in future versions"

    funcs = {
        "max": self.divide_by_max,
        "probs": self.divide_by_sum,
        "even_rate": self.divide_by_even_rate,
    }

    if kind not in funcs:
        warn(warning_message)
        raise ValueError(
            f"kind must be one of ['max', 'probs', 'even_rate'], got {kind}"
        )

    func = funcs[kind]

    warning_message = f"{warning_message} in favor of df.cal.{func.__name__}()"
    warn(warning_message)

    return func()

`plot(start_col, *, end_col=None, duration=None, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)`

Plot DataFrame of timestamps as a calendar.

Parameters:

Name	Type	Description	Default
`start_col`	`str`	column with start timestamp	required
`end_col`	`str \| None`	column with end timestamp	`None`
`duration`	`int \| None`	length of event in minutes. Alternative to end_col	`None`
`alpha`	`float`	alpha value for the color	`None`
`cmap`		function that maps floats to string colors	`None`
`monday_start`	`bool`	whether to start the week on Monday or Sunday	`True`
`ax`	`Axes \| None`	optional matplotlib axis to plot on	`None`

Returns:

Type	Description
`Axes`	Modified matplotlib axis

Source code in latent_calendar/extensions.py

def plot(
    self,
    start_col: str,
    *,
    end_col: str | None = None,
    duration: int | None = None,
    alpha: float = None,
    cmap=None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    monday_start: bool = True,
    ax: plt.Axes | None = None,
) -> plt.Axes:
    """Plot DataFrame of timestamps as a calendar.

    Args:
        start_col: column with start timestamp
        end_col: column with end timestamp
        duration: length of event in minutes. Alternative to end_col
        alpha: alpha value for the color
        cmap: function that maps floats to string colors
        monday_start: whether to start the week on Monday or Sunday
        ax: optional matplotlib axis to plot on

    Returns:
        Modified matplotlib axis

    """
    config = StartEndConfig(start=start_col, end=end_col, minutes=duration)

    return plot_dataframe_as_calendar(
        self._obj,
        config=config,
        alpha=alpha,
        cmap=cmap,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        grid_lines=grid_lines,
        monday_start=monday_start,
        ax=ax,
    )

`plot_across_column(start_col, grid_col, *, end_col=None, duration=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), max_cols=3, alpha=None)`

Plot DataFrame of timestamps as a calendar as grid across column values.

NA values are excluded

Parameters:

Name	Type	Description	Default
`start_col`	`str`	column with start timestamp	required
`grid_col`	`str`	column of values to use as grid	required
`end_col`	`str \| None`	column with end timestamp	`None`
`duration`	`int \| None`	length of event in minutes. Alternative to end_col	`None`
`max_cols`	`int`	max number of columns per row	`3`
`alpha`	`float`	alpha value for the color	`None`

Returns:

Type	Description
`None`	None

Source code in latent_calendar/extensions.py

def plot_across_column(
    self,
    start_col: str,
    grid_col: str,
    *,
    end_col: str | None = None,
    duration: int | None = None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    max_cols: int = 3,
    alpha: float = None,
) -> None:
    """Plot DataFrame of timestamps as a calendar as grid across column values.

    NA values are excluded

    Args:
        start_col: column with start timestamp
        grid_col: column of values to use as grid
        end_col: column with end timestamp
        duration: length of event in minutes. Alternative to end_col
        max_cols: max number of columns per row
        alpha: alpha value for the color

    Returns:
        None

    """
    config = StartEndConfig(start=start_col, end=end_col, minutes=duration)

    plot_dataframe_grid_across_column(
        self._obj,
        grid_col=grid_col,
        config=config,
        max_cols=max_cols,
        alpha=alpha,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        grid_lines=grid_lines,
    )

`plot_by_row(*, max_cols=3, title_func=None, cmaps=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True)`

Plot each row of the DataFrame as a calendar plot. Data must have been transformed to wide format first.

Wrapper around latent_calendar.plot.plot_calendar_by_row.

Parameters:

Name	Type	Description	Default
`max_cols`	`int`	max number of columns per row of grid	`3`
`title_func`	`TITLE_FUNC \| None`	function to generate title for each row	`None`
`day_labeler`	`DayLabeler`	function to generate day labels	`DayLabeler()`
`time_labeler`	`TimeLabeler`	function to generate time labels	`TimeLabeler()`
`cmaps`	`CMAP \| ColorMap \| CMAP_GENERATOR \| None`	optional generator of colormaps	`None`
`grid_lines`	`GridLines`	optional grid lines	`GridLines()`
`monday_start`	`bool`	whether to start the week on Monday or Sunday	`True`

Returns:

Type	Description
`None`	None

Source code in latent_calendar/extensions.py

def plot_by_row(
    self,
    *,
    max_cols: int = 3,
    title_func: TITLE_FUNC | None = None,
    cmaps: CMAP | ColorMap | CMAP_GENERATOR | None = None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    monday_start: bool = True,
) -> None:
    """Plot each row of the DataFrame as a calendar plot. Data must have been transformed to wide format first.

    Wrapper around `latent_calendar.plot.plot_calendar_by_row`.

    Args:
        max_cols: max number of columns per row of grid
        title_func: function to generate title for each row
        day_labeler: function to generate day labels
        time_labeler: function to generate time labels
        cmaps: optional generator of colormaps
        grid_lines: optional grid lines
        monday_start: whether to start the week on Monday or Sunday

    Returns:
        None

    """
    return plot_calendar_by_row(
        self._obj,
        max_cols=max_cols,
        title_func=title_func,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        cmaps=cmaps,
        grid_lines=grid_lines,
        monday_start=monday_start,
    )

`plot_model_predictions_by_row(df_holdout, *, model, index_func=lambda idx: idx, divergent=True, day_labeler=DayLabeler(), time_labeler=TimeLabeler())`

Plot model predictions for each row of the DataFrame. Data must have been transformed to wide format first.

Parameters:

Name	Type	Description	Default
`df_holdout`	`DataFrame`	holdout DataFrame for comparison	required
`model`	`LatentCalendar`	model to use for prediction	required
`index_func`		function to generate title for each row	`lambda idx: idx`
`divergent`	`bool`	whether to use divergent colormap	`True`
`day_labeler`	`DayLabeler`	DayLabeler instance to use for day labels	`DayLabeler()`
`time_labeler`	`TimeLabeler`	TimeLabeler instance to use for time labels	`TimeLabeler()`

Returns:

Type	Description
`ndarray`	grid of axes

Source code in latent_calendar/extensions.py

def plot_model_predictions_by_row(
    self,
    df_holdout: pd.DataFrame,
    *,
    model: LatentCalendar,
    index_func=lambda idx: idx,
    divergent: bool = True,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
) -> np.ndarray:
    """Plot model predictions for each row of the DataFrame. Data must have been transformed to wide format first.

    Args:
        df_holdout: holdout DataFrame for comparison
        model: model to use for prediction
        index_func: function to generate title for each row
        divergent: whether to use divergent colormap
        day_labeler: DayLabeler instance to use for day labels
        time_labeler: TimeLabeler instance to use for time labels

    Returns:
        grid of axes

    """
    return plot_model_predictions_by_row(
        self._obj,
        df_holdout=df_holdout,
        model=model,
        index_func=index_func,
        divergent=divergent,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
    )

`plot_profile_by_row(*, model, index_func=lambda idx: idx, include_components=True, day_labeler=DayLabeler(), time_labeler=TimeLabeler())`

Plot each row of the DataFrame as a profile plot. Data must have been transformed to wide format first.

Parameters:

Name	Type	Description	Default
`model`	`LatentCalendar`	model to use for prediction and transform	required
`index_func`		function to generate title for each row	`lambda idx: idx`
`include_components`	`bool`	whether to include components in the plot	`True`
`day_labeler`	`DayLabeler`	DayLabeler instance to use for day labels	`DayLabeler()`
`time_labeler`	`TimeLabeler`	TimeLabeler instance to use for time labels	`TimeLabeler()`

Returns:

Type	Description
`ndarray`	grid of axes

Source code in latent_calendar/extensions.py

def plot_profile_by_row(
    self,
    *,
    model: LatentCalendar,
    index_func=lambda idx: idx,
    include_components: bool = True,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
) -> np.ndarray:
    """Plot each row of the DataFrame as a profile plot. Data must have been transformed to wide format first.

    Args:
        model: model to use for prediction and transform
        index_func: function to generate title for each row
        include_components: whether to include components in the plot
        day_labeler: DayLabeler instance to use for day labels
        time_labeler: TimeLabeler instance to use for time labels

    Returns:
        grid of axes

    """
    return plot_profile_by_row(
        self._obj,
        model=model,
        index_func=index_func,
        include_components=include_components,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
    )

`plot_raw_and_predicted_by_row(*, model, index_func=lambda idx: idx, day_labeler=DayLabeler(), time_labeler=TimeLabeler())`

Plot raw and predicted values for a model. Data must have been transformed to wide format first.

Parameters:

Name	Type	Description	Default
`model`	`LatentCalendar`	model to use for prediction	required
`index_func`		function to generate title for each row	`lambda idx: idx`
`day_labeler`	`DayLabeler`	DayLabeler instance to use for day labels	`DayLabeler()`
`time_labeler`	`TimeLabeler`	TimeLabeler instance to use for time labels	`TimeLabeler()`

Returns:

Type	Description
`ndarray`	grid of axes

Source code in latent_calendar/extensions.py

def plot_raw_and_predicted_by_row(
    self,
    *,
    model: LatentCalendar,
    index_func=lambda idx: idx,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
) -> np.ndarray:
    """Plot raw and predicted values for a model. Data must have been transformed to wide format first.

    Args:
        model: model to use for prediction
        index_func: function to generate title for each row
        day_labeler: DayLabeler instance to use for day labels
        time_labeler: TimeLabeler instance to use for time labels

    Returns:
        grid of axes

    """
    return plot_profile_by_row(
        self._obj,
        model=model,
        index_func=index_func,
        include_components=False,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
    )

`predict(*, model)`

Predict DataFrame with model.

Parameters:

Name	Type	Description	Default
`model`	`LatentCalendar`	model to use for prediction	required

Returns:

Type	Description
`DataFrame`	DataFrame with predicted values (wide format)

Source code in latent_calendar/extensions.py

def predict(self, *, model: LatentCalendar) -> pd.DataFrame:
    """Predict DataFrame with model.

    Args:
        model: model to use for prediction

    Returns:
        DataFrame with predicted values (wide format)

    """
    return predict_on_dataframe(self._obj, model=model)

`sum_next_hours(hours)`

Sum the wide format over next hours.

Parameters:

Name	Type	Description	Default
`hours`	`int`	number of hours to sum over	required

Returns:

Type	Description
`DataFrame`	DataFrame with summed values

Source code in latent_calendar/extensions.py

def sum_next_hours(self, hours: int) -> pd.DataFrame:
    """Sum the wide format over next hours.

    Args:
        hours: number of hours to sum over

    Returns:
        DataFrame with summed values

    """
    return sum_next_hours(self._obj, hours=hours)

`sum_over_segments(df_segments)`

Sum the wide format over user defined segments.

Parameters:

Name	Type	Description	Default
`df_segments`	`DataFrame`	DataFrame in wide format with segments as index	required

Returns:

Type	Description
`DataFrame`	DataFrame with columns as the segments and summed values

Source code in latent_calendar/extensions.py

def sum_over_segments(self, df_segments: pd.DataFrame) -> pd.DataFrame:
    """Sum the wide format over user defined segments.

    Args:
        df_segments: DataFrame in wide format with segments as index

    Returns:
        DataFrame with columns as the segments and summed values

    """
    return sum_over_segments(self._obj, df_segments=df_segments)

`sum_over_vocab(aggregation='dow')`

Sum the wide format to day of week or hour of day.

Parameters:

Name	Type	Description	Default
`aggregation`	`str`	one of ['dow', 'hour']	`'dow'`

Returns:

Type	Description
`DataFrame`	DataFrame with summed values

Examples:

Sum to day of week

df_dow = df_wide.cal.sum_over_vocab(aggregation='dow')

Source code in latent_calendar/extensions.py

def sum_over_vocab(self, aggregation: str = "dow") -> pd.DataFrame:
    """Sum the wide format to day of week or hour of day.

    Args:
        aggregation: one of ['dow', 'hour']

    Returns:
        DataFrame with summed values

    Examples:
        Sum to day of week

        ```python
        df_dow = df_wide.cal.sum_over_vocab(aggregation='dow')
        ```

    """
    return sum_over_vocab(self._obj, aggregation=aggregation)

`timestamp_features(column, discretize=True, minutes=60, create_vocab=True)`

Create day of week and proportion into day columns for event level DataFrame

Exposed as a method on DataFrame for convenience. Use cal.aggregate_events instead to create the wide format DataFrame.

Parameters:

Name	Type	Description	Default
`column`	`str`	The name of the timestamp column.	required
`discretize`	`bool`	Whether to discretize the hour column.	`True`
`minutes`	`int`	The number of minutes to discretize by. Ingored if `discretize` is False.	`60`
`create_vocab`	`bool`	Whether to create the vocab column.	`True`

Returns:

Type	Description
`DataFrame`	DataFrame with features added

Source code in latent_calendar/extensions.py

def timestamp_features(
    self,
    column: str,
    discretize: bool = True,
    minutes: int = 60,
    create_vocab: bool = True,
) -> pd.DataFrame:
    """Create day of week and proportion into day columns for event level DataFrame

    Exposed as a method on DataFrame for convenience. Use `cal.aggregate_events` instead to create the wide format DataFrame.

    Args:
        column: The name of the timestamp column.
        discretize: Whether to discretize the hour column.
        minutes: The number of minutes to discretize by. Ingored if `discretize` is False.
        create_vocab: Whether to create the vocab column.

    Returns:
        DataFrame with features added

    """
    transformer = create_timestamp_feature_pipeline(
        timestamp_col=column,
        discretize=discretize,
        create_vocab=create_vocab,
        minutes=minutes,
    )

    return transformer.fit_transform(self._obj)

`transform(*, model)`

Transform DataFrame with model.

Applies the dimensionality reduction to each row of the DataFrame.

Parameters:

Name	Type	Description	Default
`model`	`LatentCalendar`	model to use for transformation	required

Returns:

Type	Description
`DataFrame`	DataFrame with transformed values

Source code in latent_calendar/extensions.py

def transform(self, *, model: LatentCalendar) -> pd.DataFrame:
    """Transform DataFrame with model.

    Applies the dimensionality reduction to each row of the DataFrame.

    Args:
        model: model to use for transformation

    Returns:
        DataFrame with transformed values

    """
    return transform_on_dataframe(self._obj, model=model)

`widen(column, as_int=True, minutes=60, multiindex=True)`

Transform an aggregated DataFrame to wide calendar format.

Wrapper around LongToWide transformer to transform to wide format.

Parameters:

Name	Type	Description	Default
`column`	`str`	column to widen	required
`as_int`	`bool`	whether to cast the column to int	`True`
`minutes`	`int`	number of minutes to	`60`
`multiindex`	`bool`	whether to use a MultiIndex	`True`

Returns:

Type	Description
`DataFrame`	DataFrame in wide format

Source code in latent_calendar/extensions.py

def widen(
    self,
    column: str,
    as_int: bool = True,
    minutes: int = 60,
    multiindex: bool = True,
) -> pd.DataFrame:
    """Transform an aggregated DataFrame to wide calendar format.

    Wrapper around `LongToWide` transformer to transform to wide format.

    Args:
        column: column to widen
        as_int: whether to cast the column to int
        minutes: number of minutes to
        multiindex: whether to use a MultiIndex

    Returns:
        DataFrame in wide format

    """
    if not isinstance(self._obj.index, pd.MultiIndex):
        raise ValueError(
            "DataFrame is expected to have a MultiIndex with the last column as the vocab."
        )

    transformer = LongToWide(
        col=column, as_int=as_int, minutes=minutes, multiindex=multiindex
    )

    return transformer.fit_transform(self._obj)

`SeriesAccessor`

Series accessor for latent_calendar accessed through cal attribute of Series.

Source code in latent_calendar/extensions.py

@pd.api.extensions.register_series_accessor("cal")
class SeriesAccessor:
    """Series accessor for latent_calendar accessed through `cal` attribute of Series."""

    def __init__(self, pandas_obj: pd.Series):
        self._obj = pandas_obj

    def aggregate_events(
        self,
        minutes: int = 60,
        as_multiindex: bool = True,
    ) -> pd.Series:
        """Transform event level Series to row of wide format.

        Args:
            minutes: The number of minutes to discretize by.
            as_multiindex: whether to use MultiIndex columns

        Returns:
            Series that would be row of wide format

        Examples:
            Discretize datetime Series to 30 minutes

            ```python
            import pandas as pd

            import matplotlib.pyplot as plt

            from latent_calendar.datasets import load_chicago_bikes

            df_trips = load_chicago_bikes()

            start_times = df_trips["started_at"]

            agg_start_times = start_times.cal.aggregate_events(minutes=30)
            agg_start_times.cal.plot_row()
            plt.show()


            ```


        """
        name = self._obj.name or "timestamp"
        return (
            self._obj.rename(name)
            .to_frame()
            .assign(tmp=1)
            .cal.aggregate_events(
                by="tmp",
                timestamp_col=name,
                minutes=minutes,
                as_multiindex=as_multiindex,
            )
            .iloc[0]
            .rename(name)
        )

    def timestamp_features(
        self, discretize: bool = True, minutes: int = 60, create_vocab: bool = True
    ) -> pd.DataFrame:
        """Create day of week and proportion into day columns.

        Exposed as a method on Series for convenience.

        Args:
            discretize: Whether to discretize the hour column.
            minutes: The number of minutes to discretize by. Ingored if `discretize` is False.
            create_vocab: Whether to create the vocab column.

        Returns:
            DataFrame with features

        Examples:
            Create the features for some dates

            ```python
            ser = pd.Series(pd.date_range("2023-01-01", "2023-01-14", freq="h"))

            ser.cal.timestamp_features()
            ```

            ```text
                        timestamp  day_of_week  hour
            0   2023-01-01 00:00:00            6   0.0
            1   2023-01-01 01:00:00            6   1.0
            2   2023-01-01 02:00:00            6   2.0
            3   2023-01-01 03:00:00            6   3.0
            4   2023-01-01 04:00:00            6   4.0
            ..                  ...          ...   ...
            308 2023-01-13 20:00:00            4  20.0
            309 2023-01-13 21:00:00            4  21.0
            310 2023-01-13 22:00:00            4  22.0
            311 2023-01-13 23:00:00            4  23.0
            312 2023-01-14 00:00:00            5   0.0

            [313 rows x 3 columns]
            ```

        """
        name = self._obj.name or "timestamp"
        transformer = create_timestamp_feature_pipeline(
            timestamp_col=name,
            discretize=discretize,
            minutes=minutes,
            create_vocab=create_vocab,
        )

        return transformer.fit_transform(self._obj.rename(name).to_frame())

    def conditional_probabilities(
        self,
        *,
        level: int | str = 0,
    ) -> pd.Series:
        """Calculate conditional probabilities for each the row over the level.

        Args:
            level: level of the column MultiIndex.
                Default 0 or day_of_week

        Returns:
            Series with conditional probabilities

        """

        if not isinstance(self._obj.index, pd.MultiIndex):
            raise ValueError(
                "Series is expected to have a MultiIndex with the last column as the vocab."
            )

        return self._obj.div(self._obj.groupby(level=level).sum(), level=level)

    def plot(
        self,
        *,
        duration: int = 5,
        alpha: float = None,
        cmap=None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        monday_start: bool = True,
        ax: plt.Axes | None = None,
    ) -> plt.Axes:
        """Plot Series of timestamps as a calendar.

        Args:
            duration: duration of each event in minutes
            alpha: alpha value for the color
            cmap: function that maps floats to string colors
            day_labeler: DayLabeler instance
            time_labeler: TimeLabeler instance
            grid_lines: GridLines instance
            monday_start: whether to start the week on Monday or Sunday
            ax: matplotlib axis to plot on

        Returns:
            Modified matplotlib axis

        """
        tmp_name = "tmp_name"
        config = StartEndConfig(start=tmp_name, end=None, minutes=duration)

        return plot_dataframe_as_calendar(
            self._obj.rename(tmp_name).to_frame(),
            config=config,
            alpha=alpha,
            cmap=cmap,
            monday_start=monday_start,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            grid_lines=grid_lines,
            ax=ax,
        )

    def plot_row(
        self,
        *,
        alpha: float = None,
        cmap=None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        monday_start: bool = True,
        ax: plt.Axes | None = None,
    ) -> plt.Axes:
        """Plot Series of timestamps as a calendar.

        Args:
            alpha: alpha value for the color
            cmap: function that maps floats to string colors
            monday_start: whether to start the week on Monday or Sunday
            ax: matplotlib axis to plot on

        Returns:
            Modified matplotlib axis

        """
        return plot_series_as_calendar(
            self._obj,
            alpha=alpha,
            cmap=cmap,
            ax=ax,
            monday_start=monday_start,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            grid_lines=grid_lines,
        )

`aggregate_events(minutes=60, as_multiindex=True)`

Transform event level Series to row of wide format.

Parameters:

Name	Type	Description	Default
`minutes`	`int`	The number of minutes to discretize by.	`60`
`as_multiindex`	`bool`	whether to use MultiIndex columns	`True`

Returns:

Type	Description
`Series`	Series that would be row of wide format

Examples:

Discretize datetime Series to 30 minutes

import pandas as pd

import matplotlib.pyplot as plt

from latent_calendar.datasets import load_chicago_bikes

df_trips = load_chicago_bikes()

start_times = df_trips["started_at"]

agg_start_times = start_times.cal.aggregate_events(minutes=30)
agg_start_times.cal.plot_row()
plt.show()

Source code in latent_calendar/extensions.py

def aggregate_events(
    self,
    minutes: int = 60,
    as_multiindex: bool = True,
) -> pd.Series:
    """Transform event level Series to row of wide format.

    Args:
        minutes: The number of minutes to discretize by.
        as_multiindex: whether to use MultiIndex columns

    Returns:
        Series that would be row of wide format

    Examples:
        Discretize datetime Series to 30 minutes

        ```python
        import pandas as pd

        import matplotlib.pyplot as plt

        from latent_calendar.datasets import load_chicago_bikes

        df_trips = load_chicago_bikes()

        start_times = df_trips["started_at"]

        agg_start_times = start_times.cal.aggregate_events(minutes=30)
        agg_start_times.cal.plot_row()
        plt.show()


        ```


    """
    name = self._obj.name or "timestamp"
    return (
        self._obj.rename(name)
        .to_frame()
        .assign(tmp=1)
        .cal.aggregate_events(
            by="tmp",
            timestamp_col=name,
            minutes=minutes,
            as_multiindex=as_multiindex,
        )
        .iloc[0]
        .rename(name)
    )

`conditional_probabilities(*, level=0)`

Calculate conditional probabilities for each the row over the level.

Parameters:

Name	Type	Description	Default
`level`	`int \| str`	level of the column MultiIndex. Default 0 or day_of_week	`0`

Returns:

Type	Description
`Series`	Series with conditional probabilities

Source code in latent_calendar/extensions.py

def conditional_probabilities(
    self,
    *,
    level: int | str = 0,
) -> pd.Series:
    """Calculate conditional probabilities for each the row over the level.

    Args:
        level: level of the column MultiIndex.
            Default 0 or day_of_week

    Returns:
        Series with conditional probabilities

    """

    if not isinstance(self._obj.index, pd.MultiIndex):
        raise ValueError(
            "Series is expected to have a MultiIndex with the last column as the vocab."
        )

    return self._obj.div(self._obj.groupby(level=level).sum(), level=level)

`plot(*, duration=5, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)`

Plot Series of timestamps as a calendar.

Parameters:

Name	Type	Description	Default
`duration`	`int`	duration of each event in minutes	`5`
`alpha`	`float`	alpha value for the color	`None`
`cmap`		function that maps floats to string colors	`None`
`day_labeler`	`DayLabeler`	DayLabeler instance	`DayLabeler()`
`time_labeler`	`TimeLabeler`	TimeLabeler instance	`TimeLabeler()`
`grid_lines`	`GridLines`	GridLines instance	`GridLines()`
`monday_start`	`bool`	whether to start the week on Monday or Sunday	`True`
`ax`	`Axes \| None`	matplotlib axis to plot on	`None`

Returns:

Type	Description
`Axes`	Modified matplotlib axis

Source code in latent_calendar/extensions.py

def plot(
    self,
    *,
    duration: int = 5,
    alpha: float = None,
    cmap=None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    monday_start: bool = True,
    ax: plt.Axes | None = None,
) -> plt.Axes:
    """Plot Series of timestamps as a calendar.

    Args:
        duration: duration of each event in minutes
        alpha: alpha value for the color
        cmap: function that maps floats to string colors
        day_labeler: DayLabeler instance
        time_labeler: TimeLabeler instance
        grid_lines: GridLines instance
        monday_start: whether to start the week on Monday or Sunday
        ax: matplotlib axis to plot on

    Returns:
        Modified matplotlib axis

    """
    tmp_name = "tmp_name"
    config = StartEndConfig(start=tmp_name, end=None, minutes=duration)

    return plot_dataframe_as_calendar(
        self._obj.rename(tmp_name).to_frame(),
        config=config,
        alpha=alpha,
        cmap=cmap,
        monday_start=monday_start,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        grid_lines=grid_lines,
        ax=ax,
    )

`plot_row(*, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)`

Plot Series of timestamps as a calendar.

Parameters:

Name	Type	Description	Default
`alpha`	`float`	alpha value for the color	`None`
`cmap`		function that maps floats to string colors	`None`
`monday_start`	`bool`	whether to start the week on Monday or Sunday	`True`
`ax`	`Axes \| None`	matplotlib axis to plot on	`None`

Returns:

Type	Description
`Axes`	Modified matplotlib axis

Source code in latent_calendar/extensions.py

def plot_row(
    self,
    *,
    alpha: float = None,
    cmap=None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    monday_start: bool = True,
    ax: plt.Axes | None = None,
) -> plt.Axes:
    """Plot Series of timestamps as a calendar.

    Args:
        alpha: alpha value for the color
        cmap: function that maps floats to string colors
        monday_start: whether to start the week on Monday or Sunday
        ax: matplotlib axis to plot on

    Returns:
        Modified matplotlib axis

    """
    return plot_series_as_calendar(
        self._obj,
        alpha=alpha,
        cmap=cmap,
        ax=ax,
        monday_start=monday_start,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        grid_lines=grid_lines,
    )

`timestamp_features(discretize=True, minutes=60, create_vocab=True)`

Create day of week and proportion into day columns.

Exposed as a method on Series for convenience.

Parameters:

Name	Type	Description	Default
`discretize`	`bool`	Whether to discretize the hour column.	`True`
`minutes`	`int`	The number of minutes to discretize by. Ingored if `discretize` is False.	`60`
`create_vocab`	`bool`	Whether to create the vocab column.	`True`

Returns:

Type	Description
`DataFrame`	DataFrame with features

Examples:

Create the features for some dates

ser = pd.Series(pd.date_range("2023-01-01", "2023-01-14", freq="h"))

ser.cal.timestamp_features()

            timestamp  day_of_week  hour
0   2023-01-01 00:00:00            6   0.0
1   2023-01-01 01:00:00            6   1.0
2   2023-01-01 02:00:00            6   2.0
3   2023-01-01 03:00:00            6   3.0
4   2023-01-01 04:00:00            6   4.0
..                  ...          ...   ...
308 2023-01-13 20:00:00            4  20.0
309 2023-01-13 21:00:00            4  21.0
310 2023-01-13 22:00:00            4  22.0
311 2023-01-13 23:00:00            4  23.0
312 2023-01-14 00:00:00            5   0.0

[313 rows x 3 columns]

Source code in latent_calendar/extensions.py

def timestamp_features(
    self, discretize: bool = True, minutes: int = 60, create_vocab: bool = True
) -> pd.DataFrame:
    """Create day of week and proportion into day columns.

    Exposed as a method on Series for convenience.

    Args:
        discretize: Whether to discretize the hour column.
        minutes: The number of minutes to discretize by. Ingored if `discretize` is False.
        create_vocab: Whether to create the vocab column.

    Returns:
        DataFrame with features

    Examples:
        Create the features for some dates

        ```python
        ser = pd.Series(pd.date_range("2023-01-01", "2023-01-14", freq="h"))

        ser.cal.timestamp_features()
        ```

        ```text
                    timestamp  day_of_week  hour
        0   2023-01-01 00:00:00            6   0.0
        1   2023-01-01 01:00:00            6   1.0
        2   2023-01-01 02:00:00            6   2.0
        3   2023-01-01 03:00:00            6   3.0
        4   2023-01-01 04:00:00            6   4.0
        ..                  ...          ...   ...
        308 2023-01-13 20:00:00            4  20.0
        309 2023-01-13 21:00:00            4  21.0
        310 2023-01-13 22:00:00            4  22.0
        311 2023-01-13 23:00:00            4  23.0
        312 2023-01-14 00:00:00            5   0.0

        [313 rows x 3 columns]
        ```

    """
    name = self._obj.name or "timestamp"
    transformer = create_timestamp_feature_pipeline(
        timestamp_col=name,
        discretize=discretize,
        minutes=minutes,
        create_vocab=create_vocab,
    )

    return transformer.fit_transform(self._obj.rename(name).to_frame())

Extensions

DataFrameAccessor

aggregate_events(by, timestamp_col, minutes=60, as_multiindex=True)

conditional_probabilities(*, level=0)

divide_by_even_rate()

divide_by_max()

divide_by_sum()

normalize(kind)

plot(start_col, *, end_col=None, duration=None, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)

plot_across_column(start_col, grid_col, *, end_col=None, duration=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), max_cols=3, alpha=None)

plot_by_row(*, max_cols=3, title_func=None, cmaps=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True)

plot_model_predictions_by_row(df_holdout, *, model, index_func=lambda idx: idx, divergent=True, day_labeler=DayLabeler(), time_labeler=TimeLabeler())

plot_profile_by_row(*, model, index_func=lambda idx: idx, include_components=True, day_labeler=DayLabeler(), time_labeler=TimeLabeler())

plot_raw_and_predicted_by_row(*, model, index_func=lambda idx: idx, day_labeler=DayLabeler(), time_labeler=TimeLabeler())

predict(*, model)

sum_next_hours(hours)

sum_over_segments(df_segments)

sum_over_vocab(aggregation='dow')

timestamp_features(column, discretize=True, minutes=60, create_vocab=True)

transform(*, model)

widen(column, as_int=True, minutes=60, multiindex=True)

SeriesAccessor

aggregate_events(minutes=60, as_multiindex=True)

conditional_probabilities(*, level=0)

plot(*, duration=5, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)

plot_row(*, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)

timestamp_features(discretize=True, minutes=60, create_vocab=True)

Comments

`DataFrameAccessor`

`aggregate_events(by, timestamp_col, minutes=60, as_multiindex=True)`

`conditional_probabilities(*, level=0)`

`divide_by_even_rate()`

`divide_by_max()`

`divide_by_sum()`

`normalize(kind)`

`plot(start_col, *, end_col=None, duration=None, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)`

`plot_across_column(start_col, grid_col, *, end_col=None, duration=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), max_cols=3, alpha=None)`

`plot_by_row(*, max_cols=3, title_func=None, cmaps=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True)`

`plot_model_predictions_by_row(df_holdout, *, model, index_func=lambda idx: idx, divergent=True, day_labeler=DayLabeler(), time_labeler=TimeLabeler())`

`plot_profile_by_row(*, model, index_func=lambda idx: idx, include_components=True, day_labeler=DayLabeler(), time_labeler=TimeLabeler())`

`plot_raw_and_predicted_by_row(*, model, index_func=lambda idx: idx, day_labeler=DayLabeler(), time_labeler=TimeLabeler())`

`predict(*, model)`

`sum_next_hours(hours)`

`sum_over_segments(df_segments)`

`sum_over_vocab(aggregation='dow')`

`timestamp_features(column, discretize=True, minutes=60, create_vocab=True)`

`transform(*, model)`

`widen(column, as_int=True, minutes=60, multiindex=True)`

`SeriesAccessor`

`aggregate_events(minutes=60, as_multiindex=True)`

`conditional_probabilities(*, level=0)`

`plot(*, duration=5, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)`

`plot_row(*, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)`

`timestamp_features(discretize=True, minutes=60, create_vocab=True)`