Skip to content

Extensions

Pandas extensions for latent-calendar and primary interface for the package.

Provides a cal accessor to DataFrame and Series instances for easy transformation and plotting after import of latent_calendar.

Functionality includes:

  • aggregation of events to wide format
  • convolutions of wide formats
  • making transformations and predictions with models
  • plotting of events, predictions, and comparisons as calendars

Each DataFrame will be either at event level or an aggregated wide format.

Methods that end in row or by_row will be for wide format DataFrames and will plot each row as a calendar.

Examples:

Plotting an event level Series as a calendar

import pandas as pd
import latent_calendar

dates = pd.date_range("2023-01-01", "2023-01-14", freq="h")
ser = (
    pd.Series(dates)
    .sample(10, random_state=42)
)

ser.cal.plot()

Series Calendar

Transform event level DataFrame to wide format and plot

from latent_calendar.datasets import load_online_transactions

df = load_online_transactions()

# (n_customer, n_timeslots)
df_wide = (
    df
    .cal.aggregate_events("Customer ID", timestamp_col="InvoiceDate")
)

(
    df_wide
    .sample(n=12, random_state=42)
    .cal.plot_by_row(max_cols=4)
)

Customer Transactions

Train a model and plot predictions

from latent_calendar import LatentCalendar

model = LatentCalendar(n_components=5, random_state=42)
model.fit(df_wide.to_numpy())

(
    df_wide
    .head(2)
    .cal.plot_profile_by_row(model=model)
)

Profile By Row

DataFrameAccessor

DataFrame accessor for latent_calendar accessed through cal attribute of DataFrames.

Source code in latent_calendar/extensions.py
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
@pd.api.extensions.register_dataframe_accessor("cal")
class DataFrameAccessor:
    """DataFrame accessor for latent_calendar accessed through `cal` attribute of DataFrames."""

    def __init__(self, pandas_obj: pd.DataFrame):
        self._obj = pandas_obj

    def divide_by_max(self) -> pd.DataFrame:
        """Divide each row by the max value.

        Returns:
            DataFrame with row-wise operations applied

        """
        return self._obj.div(self._obj.max(axis=1), axis=0)

    def divide_by_sum(self) -> pd.DataFrame:
        """Divide each row by the sum of the row.

        Returns:
            DataFrame with row-wise operations applied

        """
        return self._obj.div(self._obj.sum(axis=1), axis=0)

    def divide_by_even_rate(self) -> pd.DataFrame:
        """Divide each row by the number of columns.

        Returns:
            DataFrame with row-wise operations applied

        """
        value = self._obj.shape[1]
        return self._obj.mul(value)

    def normalize(self, kind: Literal["max", "probs", "even_rate"]) -> pd.DataFrame:
        """Row-wise operations on DataFrame.

        Args:
            kind: The normalization to apply.

        Returns:
            DataFrame with row-wise operations applied

        """
        import warnings

        def warn(message):
            warnings.warn(message, DeprecationWarning, stacklevel=3)

        warning_message = "This method will be deprecated in future versions"

        funcs = {
            "max": self.divide_by_max,
            "probs": self.divide_by_sum,
            "even_rate": self.divide_by_even_rate,
        }

        if kind not in funcs:
            warn(warning_message)
            raise ValueError(
                f"kind must be one of ['max', 'probs', 'even_rate'], got {kind}"
            )

        func = funcs[kind]

        warning_message = f"{warning_message} in favor of df.cal.{func.__name__}()"
        warn(warning_message)

        return func()

    def conditional_probabilities(
        self,
        *,
        level: Union[int, str] = 0,
    ) -> pd.DataFrame:
        """Calculate conditional probabilities for each row over the level.

        Args:
            level: level of the columns MultiIndex.
                Default 0 or day_of_week

        Returns:
            DataFrame with conditional probabilities

        """
        if not isinstance(self._obj.columns, pd.MultiIndex):
            raise ValueError(
                "DataFrame is expected to have a MultiIndex with the last column as the vocab."
            )

        return self._obj.div(
            self._obj.T.groupby(level=level).sum().T, level=level, axis=1
        )

    def timestamp_features(
        self,
        column: str,
        discretize: bool = True,
        minutes: int = 60,
        create_vocab: bool = True,
    ) -> pd.DataFrame:
        """Create day of week and proportion into day columns for event level DataFrame

        Exposed as a method on DataFrame for convenience. Use `cal.aggregate_events` instead to create the wide format DataFrame.

        Args:
            column: The name of the timestamp column.
            discretize: Whether to discretize the hour column.
            minutes: The number of minutes to discretize by. Ingored if `discretize` is False.
            create_vocab: Whether to create the vocab column.

        Returns:
            DataFrame with features added

        """
        transformer = create_timestamp_feature_pipeline(
            timestamp_col=column,
            discretize=discretize,
            create_vocab=create_vocab,
            minutes=minutes,
        )

        return transformer.fit_transform(self._obj)

    def widen(
        self,
        column: str,
        as_int: bool = True,
        minutes: int = 60,
        multiindex: bool = True,
    ) -> pd.DataFrame:
        """Transform an aggregated DataFrame to wide calendar format.

        Wrapper around `LongToWide` transformer to transform to wide format.

        Args:
            column: column to widen
            as_int: whether to cast the column to int
            minutes: number of minutes to
            multiindex: whether to use a MultiIndex

        Returns:
            DataFrame in wide format

        """
        if not isinstance(self._obj.index, pd.MultiIndex):
            raise ValueError(
                "DataFrame is expected to have a MultiIndex with the last column as the vocab."
            )

        transformer = LongToWide(
            col=column, as_int=as_int, minutes=minutes, multiindex=multiindex
        )

        return transformer.fit_transform(self._obj)

    def aggregate_events(
        self,
        by: Union[str, List[str]],
        timestamp_col: str,
        minutes: int = 60,
        as_multiindex: bool = True,
    ) -> pd.DataFrame:
        """Transform event level DataFrame to wide format with groups as index.

        Wrapper around `create_raw_to_vocab_transformer` to transform to wide format.

        Args:
            by: column(s) to use as index
            timestamp_col: column to use as timestamp
            minutes: The number of minutes to discretize by.
            as_multiindex: whether to use MultiIndex columns

        Returns:
            DataFrame in wide format

        """
        if not isinstance(by, list):
            id_col = by
            additional_groups = None
        else:
            id_col, *additional_groups = by

        transformer = create_raw_to_vocab_transformer(
            id_col=id_col,
            timestamp_col=timestamp_col,
            minutes=minutes,
            additional_groups=additional_groups,
            as_multiindex=as_multiindex,
        )
        return transformer.fit_transform(self._obj)

    def sum_over_vocab(self, aggregation: str = "dow") -> pd.DataFrame:
        """Sum the wide format to day of week or hour of day.

        Args:
            aggregation: one of ['dow', 'hour']

        Returns:
            DataFrame with summed values

        Examples:
            Sum to day of week

            ```python
            df_dow = df_wide.cal.sum_over_vocab(aggregation='dow')
            ```

        """
        return sum_over_vocab(self._obj, aggregation=aggregation)

    def sum_next_hours(self, hours: int) -> pd.DataFrame:
        """Sum the wide format over next hours.

        Args:
            hours: number of hours to sum over

        Returns:
            DataFrame with summed values

        """
        return sum_next_hours(self._obj, hours=hours)

    def sum_over_segments(self, df_segments: pd.DataFrame) -> pd.DataFrame:
        """Sum the wide format over user defined segments.

        Args:
            df_segments: DataFrame in wide format with segments as index

        Returns:
            DataFrame with columns as the segments and summed values

        """
        return sum_over_segments(self._obj, df_segments=df_segments)

    def transform(self, *, model: LatentCalendar) -> pd.DataFrame:
        """Transform DataFrame with model.

        Applies the dimensionality reduction to each row of the DataFrame.

        Args:
            model: model to use for transformation

        Returns:
            DataFrame with transformed values

        """
        return transform_on_dataframe(self._obj, model=model)

    def predict(self, *, model: LatentCalendar) -> pd.DataFrame:
        """Predict DataFrame with model.

        Args:
            model: model to use for prediction

        Returns:
            DataFrame with predicted values (wide format)

        """
        return predict_on_dataframe(self._obj, model=model)

    def plot(
        self,
        start_col: str,
        *,
        end_col: Optional[str] = None,
        duration: Optional[int] = None,
        alpha: float = None,
        cmap=None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        monday_start: bool = True,
        ax: Optional[plt.Axes] = None,
    ) -> plt.Axes:
        """Plot DataFrame of timestamps as a calendar.

        Args:
            start_col: column with start timestamp
            end_col: column with end timestamp
            duration: length of event in minutes. Alternative to end_col
            alpha: alpha value for the color
            cmap: function that maps floats to string colors
            monday_start: whether to start the week on Monday or Sunday
            ax: optional matplotlib axis to plot on

        Returns:
            Modified matplotlib axis

        """
        config = StartEndConfig(start=start_col, end=end_col, minutes=duration)

        return plot_dataframe_as_calendar(
            self._obj,
            config=config,
            alpha=alpha,
            cmap=cmap,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            grid_lines=grid_lines,
            monday_start=monday_start,
            ax=ax,
        )

    def plot_across_column(
        self,
        start_col: str,
        grid_col: str,
        *,
        end_col: Optional[str] = None,
        duration: Optional[int] = None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        max_cols: int = 3,
        alpha: float = None,
    ) -> None:
        """Plot DataFrame of timestamps as a calendar as grid across column values.

        NA values are excluded

        Args:
            start_col: column with start timestamp
            grid_col: column of values to use as grid
            end_col: column with end timestamp
            duration: length of event in minutes. Alternative to end_col
            max_cols: max number of columns per row
            alpha: alpha value for the color

        Returns:
            None

        """
        config = StartEndConfig(start=start_col, end=end_col, minutes=duration)

        plot_dataframe_grid_across_column(
            self._obj,
            grid_col=grid_col,
            config=config,
            max_cols=max_cols,
            alpha=alpha,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            grid_lines=grid_lines,
        )

    def plot_by_row(
        self,
        *,
        max_cols: int = 3,
        title_func: Optional[TITLE_FUNC] = None,
        cmaps: Optional[Union[CMAP, ColorMap, CMAP_GENERATOR]] = None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        monday_start: bool = True,
    ) -> None:
        """Plot each row of the DataFrame as a calendar plot. Data must have been transformed to wide format first.

        Wrapper around `latent_calendar.plot.plot_calendar_by_row`.

        Args:
            max_cols: max number of columns per row of grid
            title_func: function to generate title for each row
            day_labeler: function to generate day labels
            time_labeler: function to generate time labels
            cmaps: optional generator of colormaps
            grid_lines: optional grid lines
            monday_start: whether to start the week on Monday or Sunday

        Returns:
            None

        """
        return plot_calendar_by_row(
            self._obj,
            max_cols=max_cols,
            title_func=title_func,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            cmaps=cmaps,
            grid_lines=grid_lines,
            monday_start=monday_start,
        )

    def plot_profile_by_row(
        self,
        *,
        model: LatentCalendar,
        index_func=lambda idx: idx,
        include_components: bool = True,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
    ) -> np.ndarray:
        """Plot each row of the DataFrame as a profile plot. Data must have been transformed to wide format first.

        Args:
            model: model to use for prediction and transform
            index_func: function to generate title for each row
            include_components: whether to include components in the plot
            day_labeler: DayLabeler instance to use for day labels
            time_labeler: TimeLabeler instance to use for time labels

        Returns:
            grid of axes

        """
        return plot_profile_by_row(
            self._obj,
            model=model,
            index_func=index_func,
            include_components=include_components,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
        )

    def plot_raw_and_predicted_by_row(
        self,
        *,
        model: LatentCalendar,
        index_func=lambda idx: idx,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
    ) -> np.ndarray:
        """Plot raw and predicted values for a model. Data must have been transformed to wide format first.

        Args:
            model: model to use for prediction
            index_func: function to generate title for each row
            day_labeler: DayLabeler instance to use for day labels
            time_labeler: TimeLabeler instance to use for time labels

        Returns:
            grid of axes

        """
        return plot_profile_by_row(
            self._obj,
            model=model,
            index_func=index_func,
            include_components=False,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
        )

    def plot_model_predictions_by_row(
        self,
        df_holdout: pd.DataFrame,
        *,
        model: LatentCalendar,
        index_func=lambda idx: idx,
        divergent: bool = True,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
    ) -> np.ndarray:
        """Plot model predictions for each row of the DataFrame. Data must have been transformed to wide format first.

        Args:
            df_holdout: holdout DataFrame for comparison
            model: model to use for prediction
            index_func: function to generate title for each row
            divergent: whether to use divergent colormap
            day_labeler: DayLabeler instance to use for day labels
            time_labeler: TimeLabeler instance to use for time labels

        Returns:
            grid of axes

        """
        return plot_model_predictions_by_row(
            self._obj,
            df_holdout=df_holdout,
            model=model,
            index_func=index_func,
            divergent=divergent,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
        )

aggregate_events(by, timestamp_col, minutes=60, as_multiindex=True)

Transform event level DataFrame to wide format with groups as index.

Wrapper around create_raw_to_vocab_transformer to transform to wide format.

Parameters:

Name Type Description Default
by Union[str, List[str]]

column(s) to use as index

required
timestamp_col str

column to use as timestamp

required
minutes int

The number of minutes to discretize by.

60
as_multiindex bool

whether to use MultiIndex columns

True

Returns:

Type Description
DataFrame

DataFrame in wide format

Source code in latent_calendar/extensions.py
def aggregate_events(
    self,
    by: Union[str, List[str]],
    timestamp_col: str,
    minutes: int = 60,
    as_multiindex: bool = True,
) -> pd.DataFrame:
    """Transform event level DataFrame to wide format with groups as index.

    Wrapper around `create_raw_to_vocab_transformer` to transform to wide format.

    Args:
        by: column(s) to use as index
        timestamp_col: column to use as timestamp
        minutes: The number of minutes to discretize by.
        as_multiindex: whether to use MultiIndex columns

    Returns:
        DataFrame in wide format

    """
    if not isinstance(by, list):
        id_col = by
        additional_groups = None
    else:
        id_col, *additional_groups = by

    transformer = create_raw_to_vocab_transformer(
        id_col=id_col,
        timestamp_col=timestamp_col,
        minutes=minutes,
        additional_groups=additional_groups,
        as_multiindex=as_multiindex,
    )
    return transformer.fit_transform(self._obj)

conditional_probabilities(*, level=0)

Calculate conditional probabilities for each row over the level.

Parameters:

Name Type Description Default
level Union[int, str]

level of the columns MultiIndex. Default 0 or day_of_week

0

Returns:

Type Description
DataFrame

DataFrame with conditional probabilities

Source code in latent_calendar/extensions.py
def conditional_probabilities(
    self,
    *,
    level: Union[int, str] = 0,
) -> pd.DataFrame:
    """Calculate conditional probabilities for each row over the level.

    Args:
        level: level of the columns MultiIndex.
            Default 0 or day_of_week

    Returns:
        DataFrame with conditional probabilities

    """
    if not isinstance(self._obj.columns, pd.MultiIndex):
        raise ValueError(
            "DataFrame is expected to have a MultiIndex with the last column as the vocab."
        )

    return self._obj.div(
        self._obj.T.groupby(level=level).sum().T, level=level, axis=1
    )

divide_by_even_rate()

Divide each row by the number of columns.

Returns:

Type Description
DataFrame

DataFrame with row-wise operations applied

Source code in latent_calendar/extensions.py
def divide_by_even_rate(self) -> pd.DataFrame:
    """Divide each row by the number of columns.

    Returns:
        DataFrame with row-wise operations applied

    """
    value = self._obj.shape[1]
    return self._obj.mul(value)

divide_by_max()

Divide each row by the max value.

Returns:

Type Description
DataFrame

DataFrame with row-wise operations applied

Source code in latent_calendar/extensions.py
def divide_by_max(self) -> pd.DataFrame:
    """Divide each row by the max value.

    Returns:
        DataFrame with row-wise operations applied

    """
    return self._obj.div(self._obj.max(axis=1), axis=0)

divide_by_sum()

Divide each row by the sum of the row.

Returns:

Type Description
DataFrame

DataFrame with row-wise operations applied

Source code in latent_calendar/extensions.py
def divide_by_sum(self) -> pd.DataFrame:
    """Divide each row by the sum of the row.

    Returns:
        DataFrame with row-wise operations applied

    """
    return self._obj.div(self._obj.sum(axis=1), axis=0)

normalize(kind)

Row-wise operations on DataFrame.

Parameters:

Name Type Description Default
kind Literal['max', 'probs', 'even_rate']

The normalization to apply.

required

Returns:

Type Description
DataFrame

DataFrame with row-wise operations applied

Source code in latent_calendar/extensions.py
def normalize(self, kind: Literal["max", "probs", "even_rate"]) -> pd.DataFrame:
    """Row-wise operations on DataFrame.

    Args:
        kind: The normalization to apply.

    Returns:
        DataFrame with row-wise operations applied

    """
    import warnings

    def warn(message):
        warnings.warn(message, DeprecationWarning, stacklevel=3)

    warning_message = "This method will be deprecated in future versions"

    funcs = {
        "max": self.divide_by_max,
        "probs": self.divide_by_sum,
        "even_rate": self.divide_by_even_rate,
    }

    if kind not in funcs:
        warn(warning_message)
        raise ValueError(
            f"kind must be one of ['max', 'probs', 'even_rate'], got {kind}"
        )

    func = funcs[kind]

    warning_message = f"{warning_message} in favor of df.cal.{func.__name__}()"
    warn(warning_message)

    return func()

plot(start_col, *, end_col=None, duration=None, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)

Plot DataFrame of timestamps as a calendar.

Parameters:

Name Type Description Default
start_col str

column with start timestamp

required
end_col Optional[str]

column with end timestamp

None
duration Optional[int]

length of event in minutes. Alternative to end_col

None
alpha float

alpha value for the color

None
cmap

function that maps floats to string colors

None
monday_start bool

whether to start the week on Monday or Sunday

True
ax Optional[Axes]

optional matplotlib axis to plot on

None

Returns:

Type Description
Axes

Modified matplotlib axis

Source code in latent_calendar/extensions.py
def plot(
    self,
    start_col: str,
    *,
    end_col: Optional[str] = None,
    duration: Optional[int] = None,
    alpha: float = None,
    cmap=None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    monday_start: bool = True,
    ax: Optional[plt.Axes] = None,
) -> plt.Axes:
    """Plot DataFrame of timestamps as a calendar.

    Args:
        start_col: column with start timestamp
        end_col: column with end timestamp
        duration: length of event in minutes. Alternative to end_col
        alpha: alpha value for the color
        cmap: function that maps floats to string colors
        monday_start: whether to start the week on Monday or Sunday
        ax: optional matplotlib axis to plot on

    Returns:
        Modified matplotlib axis

    """
    config = StartEndConfig(start=start_col, end=end_col, minutes=duration)

    return plot_dataframe_as_calendar(
        self._obj,
        config=config,
        alpha=alpha,
        cmap=cmap,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        grid_lines=grid_lines,
        monday_start=monday_start,
        ax=ax,
    )

plot_across_column(start_col, grid_col, *, end_col=None, duration=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), max_cols=3, alpha=None)

Plot DataFrame of timestamps as a calendar as grid across column values.

NA values are excluded

Parameters:

Name Type Description Default
start_col str

column with start timestamp

required
grid_col str

column of values to use as grid

required
end_col Optional[str]

column with end timestamp

None
duration Optional[int]

length of event in minutes. Alternative to end_col

None
max_cols int

max number of columns per row

3
alpha float

alpha value for the color

None

Returns:

Type Description
None

None

Source code in latent_calendar/extensions.py
def plot_across_column(
    self,
    start_col: str,
    grid_col: str,
    *,
    end_col: Optional[str] = None,
    duration: Optional[int] = None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    max_cols: int = 3,
    alpha: float = None,
) -> None:
    """Plot DataFrame of timestamps as a calendar as grid across column values.

    NA values are excluded

    Args:
        start_col: column with start timestamp
        grid_col: column of values to use as grid
        end_col: column with end timestamp
        duration: length of event in minutes. Alternative to end_col
        max_cols: max number of columns per row
        alpha: alpha value for the color

    Returns:
        None

    """
    config = StartEndConfig(start=start_col, end=end_col, minutes=duration)

    plot_dataframe_grid_across_column(
        self._obj,
        grid_col=grid_col,
        config=config,
        max_cols=max_cols,
        alpha=alpha,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        grid_lines=grid_lines,
    )

plot_by_row(*, max_cols=3, title_func=None, cmaps=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True)

Plot each row of the DataFrame as a calendar plot. Data must have been transformed to wide format first.

Wrapper around latent_calendar.plot.plot_calendar_by_row.

Parameters:

Name Type Description Default
max_cols int

max number of columns per row of grid

3
title_func Optional[TITLE_FUNC]

function to generate title for each row

None
day_labeler DayLabeler

function to generate day labels

DayLabeler()
time_labeler TimeLabeler

function to generate time labels

TimeLabeler()
cmaps Optional[Union[CMAP, ColorMap, CMAP_GENERATOR]]

optional generator of colormaps

None
grid_lines GridLines

optional grid lines

GridLines()
monday_start bool

whether to start the week on Monday or Sunday

True

Returns:

Type Description
None

None

Source code in latent_calendar/extensions.py
def plot_by_row(
    self,
    *,
    max_cols: int = 3,
    title_func: Optional[TITLE_FUNC] = None,
    cmaps: Optional[Union[CMAP, ColorMap, CMAP_GENERATOR]] = None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    monday_start: bool = True,
) -> None:
    """Plot each row of the DataFrame as a calendar plot. Data must have been transformed to wide format first.

    Wrapper around `latent_calendar.plot.plot_calendar_by_row`.

    Args:
        max_cols: max number of columns per row of grid
        title_func: function to generate title for each row
        day_labeler: function to generate day labels
        time_labeler: function to generate time labels
        cmaps: optional generator of colormaps
        grid_lines: optional grid lines
        monday_start: whether to start the week on Monday or Sunday

    Returns:
        None

    """
    return plot_calendar_by_row(
        self._obj,
        max_cols=max_cols,
        title_func=title_func,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        cmaps=cmaps,
        grid_lines=grid_lines,
        monday_start=monday_start,
    )

plot_model_predictions_by_row(df_holdout, *, model, index_func=lambda idx: idx, divergent=True, day_labeler=DayLabeler(), time_labeler=TimeLabeler())

Plot model predictions for each row of the DataFrame. Data must have been transformed to wide format first.

Parameters:

Name Type Description Default
df_holdout DataFrame

holdout DataFrame for comparison

required
model LatentCalendar

model to use for prediction

required
index_func

function to generate title for each row

lambda idx: idx
divergent bool

whether to use divergent colormap

True
day_labeler DayLabeler

DayLabeler instance to use for day labels

DayLabeler()
time_labeler TimeLabeler

TimeLabeler instance to use for time labels

TimeLabeler()

Returns:

Type Description
ndarray

grid of axes

Source code in latent_calendar/extensions.py
def plot_model_predictions_by_row(
    self,
    df_holdout: pd.DataFrame,
    *,
    model: LatentCalendar,
    index_func=lambda idx: idx,
    divergent: bool = True,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
) -> np.ndarray:
    """Plot model predictions for each row of the DataFrame. Data must have been transformed to wide format first.

    Args:
        df_holdout: holdout DataFrame for comparison
        model: model to use for prediction
        index_func: function to generate title for each row
        divergent: whether to use divergent colormap
        day_labeler: DayLabeler instance to use for day labels
        time_labeler: TimeLabeler instance to use for time labels

    Returns:
        grid of axes

    """
    return plot_model_predictions_by_row(
        self._obj,
        df_holdout=df_holdout,
        model=model,
        index_func=index_func,
        divergent=divergent,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
    )

plot_profile_by_row(*, model, index_func=lambda idx: idx, include_components=True, day_labeler=DayLabeler(), time_labeler=TimeLabeler())

Plot each row of the DataFrame as a profile plot. Data must have been transformed to wide format first.

Parameters:

Name Type Description Default
model LatentCalendar

model to use for prediction and transform

required
index_func

function to generate title for each row

lambda idx: idx
include_components bool

whether to include components in the plot

True
day_labeler DayLabeler

DayLabeler instance to use for day labels

DayLabeler()
time_labeler TimeLabeler

TimeLabeler instance to use for time labels

TimeLabeler()

Returns:

Type Description
ndarray

grid of axes

Source code in latent_calendar/extensions.py
def plot_profile_by_row(
    self,
    *,
    model: LatentCalendar,
    index_func=lambda idx: idx,
    include_components: bool = True,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
) -> np.ndarray:
    """Plot each row of the DataFrame as a profile plot. Data must have been transformed to wide format first.

    Args:
        model: model to use for prediction and transform
        index_func: function to generate title for each row
        include_components: whether to include components in the plot
        day_labeler: DayLabeler instance to use for day labels
        time_labeler: TimeLabeler instance to use for time labels

    Returns:
        grid of axes

    """
    return plot_profile_by_row(
        self._obj,
        model=model,
        index_func=index_func,
        include_components=include_components,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
    )

plot_raw_and_predicted_by_row(*, model, index_func=lambda idx: idx, day_labeler=DayLabeler(), time_labeler=TimeLabeler())

Plot raw and predicted values for a model. Data must have been transformed to wide format first.

Parameters:

Name Type Description Default
model LatentCalendar

model to use for prediction

required
index_func

function to generate title for each row

lambda idx: idx
day_labeler DayLabeler

DayLabeler instance to use for day labels

DayLabeler()
time_labeler TimeLabeler

TimeLabeler instance to use for time labels

TimeLabeler()

Returns:

Type Description
ndarray

grid of axes

Source code in latent_calendar/extensions.py
def plot_raw_and_predicted_by_row(
    self,
    *,
    model: LatentCalendar,
    index_func=lambda idx: idx,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
) -> np.ndarray:
    """Plot raw and predicted values for a model. Data must have been transformed to wide format first.

    Args:
        model: model to use for prediction
        index_func: function to generate title for each row
        day_labeler: DayLabeler instance to use for day labels
        time_labeler: TimeLabeler instance to use for time labels

    Returns:
        grid of axes

    """
    return plot_profile_by_row(
        self._obj,
        model=model,
        index_func=index_func,
        include_components=False,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
    )

predict(*, model)

Predict DataFrame with model.

Parameters:

Name Type Description Default
model LatentCalendar

model to use for prediction

required

Returns:

Type Description
DataFrame

DataFrame with predicted values (wide format)

Source code in latent_calendar/extensions.py
def predict(self, *, model: LatentCalendar) -> pd.DataFrame:
    """Predict DataFrame with model.

    Args:
        model: model to use for prediction

    Returns:
        DataFrame with predicted values (wide format)

    """
    return predict_on_dataframe(self._obj, model=model)

sum_next_hours(hours)

Sum the wide format over next hours.

Parameters:

Name Type Description Default
hours int

number of hours to sum over

required

Returns:

Type Description
DataFrame

DataFrame with summed values

Source code in latent_calendar/extensions.py
def sum_next_hours(self, hours: int) -> pd.DataFrame:
    """Sum the wide format over next hours.

    Args:
        hours: number of hours to sum over

    Returns:
        DataFrame with summed values

    """
    return sum_next_hours(self._obj, hours=hours)

sum_over_segments(df_segments)

Sum the wide format over user defined segments.

Parameters:

Name Type Description Default
df_segments DataFrame

DataFrame in wide format with segments as index

required

Returns:

Type Description
DataFrame

DataFrame with columns as the segments and summed values

Source code in latent_calendar/extensions.py
def sum_over_segments(self, df_segments: pd.DataFrame) -> pd.DataFrame:
    """Sum the wide format over user defined segments.

    Args:
        df_segments: DataFrame in wide format with segments as index

    Returns:
        DataFrame with columns as the segments and summed values

    """
    return sum_over_segments(self._obj, df_segments=df_segments)

sum_over_vocab(aggregation='dow')

Sum the wide format to day of week or hour of day.

Parameters:

Name Type Description Default
aggregation str

one of ['dow', 'hour']

'dow'

Returns:

Type Description
DataFrame

DataFrame with summed values

Examples:

Sum to day of week

df_dow = df_wide.cal.sum_over_vocab(aggregation='dow')
Source code in latent_calendar/extensions.py
def sum_over_vocab(self, aggregation: str = "dow") -> pd.DataFrame:
    """Sum the wide format to day of week or hour of day.

    Args:
        aggregation: one of ['dow', 'hour']

    Returns:
        DataFrame with summed values

    Examples:
        Sum to day of week

        ```python
        df_dow = df_wide.cal.sum_over_vocab(aggregation='dow')
        ```

    """
    return sum_over_vocab(self._obj, aggregation=aggregation)

timestamp_features(column, discretize=True, minutes=60, create_vocab=True)

Create day of week and proportion into day columns for event level DataFrame

Exposed as a method on DataFrame for convenience. Use cal.aggregate_events instead to create the wide format DataFrame.

Parameters:

Name Type Description Default
column str

The name of the timestamp column.

required
discretize bool

Whether to discretize the hour column.

True
minutes int

The number of minutes to discretize by. Ingored if discretize is False.

60
create_vocab bool

Whether to create the vocab column.

True

Returns:

Type Description
DataFrame

DataFrame with features added

Source code in latent_calendar/extensions.py
def timestamp_features(
    self,
    column: str,
    discretize: bool = True,
    minutes: int = 60,
    create_vocab: bool = True,
) -> pd.DataFrame:
    """Create day of week and proportion into day columns for event level DataFrame

    Exposed as a method on DataFrame for convenience. Use `cal.aggregate_events` instead to create the wide format DataFrame.

    Args:
        column: The name of the timestamp column.
        discretize: Whether to discretize the hour column.
        minutes: The number of minutes to discretize by. Ingored if `discretize` is False.
        create_vocab: Whether to create the vocab column.

    Returns:
        DataFrame with features added

    """
    transformer = create_timestamp_feature_pipeline(
        timestamp_col=column,
        discretize=discretize,
        create_vocab=create_vocab,
        minutes=minutes,
    )

    return transformer.fit_transform(self._obj)

transform(*, model)

Transform DataFrame with model.

Applies the dimensionality reduction to each row of the DataFrame.

Parameters:

Name Type Description Default
model LatentCalendar

model to use for transformation

required

Returns:

Type Description
DataFrame

DataFrame with transformed values

Source code in latent_calendar/extensions.py
def transform(self, *, model: LatentCalendar) -> pd.DataFrame:
    """Transform DataFrame with model.

    Applies the dimensionality reduction to each row of the DataFrame.

    Args:
        model: model to use for transformation

    Returns:
        DataFrame with transformed values

    """
    return transform_on_dataframe(self._obj, model=model)

widen(column, as_int=True, minutes=60, multiindex=True)

Transform an aggregated DataFrame to wide calendar format.

Wrapper around LongToWide transformer to transform to wide format.

Parameters:

Name Type Description Default
column str

column to widen

required
as_int bool

whether to cast the column to int

True
minutes int

number of minutes to

60
multiindex bool

whether to use a MultiIndex

True

Returns:

Type Description
DataFrame

DataFrame in wide format

Source code in latent_calendar/extensions.py
def widen(
    self,
    column: str,
    as_int: bool = True,
    minutes: int = 60,
    multiindex: bool = True,
) -> pd.DataFrame:
    """Transform an aggregated DataFrame to wide calendar format.

    Wrapper around `LongToWide` transformer to transform to wide format.

    Args:
        column: column to widen
        as_int: whether to cast the column to int
        minutes: number of minutes to
        multiindex: whether to use a MultiIndex

    Returns:
        DataFrame in wide format

    """
    if not isinstance(self._obj.index, pd.MultiIndex):
        raise ValueError(
            "DataFrame is expected to have a MultiIndex with the last column as the vocab."
        )

    transformer = LongToWide(
        col=column, as_int=as_int, minutes=minutes, multiindex=multiindex
    )

    return transformer.fit_transform(self._obj)

SeriesAccessor

Series accessor for latent_calendar accessed through cal attribute of Series.

Source code in latent_calendar/extensions.py
@pd.api.extensions.register_series_accessor("cal")
class SeriesAccessor:
    """Series accessor for latent_calendar accessed through `cal` attribute of Series."""

    def __init__(self, pandas_obj: pd.Series):
        self._obj = pandas_obj

    def aggregate_events(
        self,
        minutes: int = 60,
        as_multiindex: bool = True,
    ) -> pd.Series:
        """Transform event level Series to row of wide format.

        Args:
            minutes: The number of minutes to discretize by.
            as_multiindex: whether to use MultiIndex columns

        Returns:
            Series that would be row of wide format

        Examples:
            Discretize datetime Series to 30 minutes

            ```python
            import pandas as pd

            import matplotlib.pyplot as plt

            from latent_calendar.datasets import load_chicago_bikes

            df_trips = load_chicago_bikes()

            start_times = df_trips["started_at"]

            agg_start_times = start_times.cal.aggregate_events(minutes=30)
            agg_start_times.cal.plot_row()
            plt.show()


            ```


        """
        name = self._obj.name or "timestamp"
        return (
            self._obj.rename(name)
            .to_frame()
            .assign(tmp=1)
            .cal.aggregate_events(
                by="tmp",
                timestamp_col=name,
                minutes=minutes,
                as_multiindex=as_multiindex,
            )
            .iloc[0]
            .rename(name)
        )

    def timestamp_features(
        self, discretize: bool = True, minutes: int = 60, create_vocab: bool = True
    ) -> pd.DataFrame:
        """Create day of week and proportion into day columns.

        Exposed as a method on Series for convenience.

        Args:
            discretize: Whether to discretize the hour column.
            minutes: The number of minutes to discretize by. Ingored if `discretize` is False.
            create_vocab: Whether to create the vocab column.

        Returns:
            DataFrame with features

        Examples:
            Create the features for some dates

            ```python
            ser = pd.Series(pd.date_range("2023-01-01", "2023-01-14", freq="h"))

            ser.cal.timestamp_features()
            ```

            ```text
                        timestamp  day_of_week  hour
            0   2023-01-01 00:00:00            6   0.0
            1   2023-01-01 01:00:00            6   1.0
            2   2023-01-01 02:00:00            6   2.0
            3   2023-01-01 03:00:00            6   3.0
            4   2023-01-01 04:00:00            6   4.0
            ..                  ...          ...   ...
            308 2023-01-13 20:00:00            4  20.0
            309 2023-01-13 21:00:00            4  21.0
            310 2023-01-13 22:00:00            4  22.0
            311 2023-01-13 23:00:00            4  23.0
            312 2023-01-14 00:00:00            5   0.0

            [313 rows x 3 columns]
            ```

        """
        name = self._obj.name or "timestamp"
        transformer = create_timestamp_feature_pipeline(
            timestamp_col=name,
            discretize=discretize,
            minutes=minutes,
            create_vocab=create_vocab,
        )

        return transformer.fit_transform(self._obj.rename(name).to_frame())

    def conditional_probabilities(
        self,
        *,
        level: Union[int, str] = 0,
    ) -> pd.Series:
        """Calculate conditional probabilities for each the row over the level.

        Args:
            level: level of the column MultiIndex.
                Default 0 or day_of_week

        Returns:
            Series with conditional probabilities

        """

        if not isinstance(self._obj.index, pd.MultiIndex):
            raise ValueError(
                "Series is expected to have a MultiIndex with the last column as the vocab."
            )

        return self._obj.div(self._obj.groupby(level=level).sum(), level=level)

    def plot(
        self,
        *,
        duration: int = 5,
        alpha: float = None,
        cmap=None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        monday_start: bool = True,
        ax: Optional[plt.Axes] = None,
    ) -> plt.Axes:
        """Plot Series of timestamps as a calendar.

        Args:
            duration: duration of each event in minutes
            alpha: alpha value for the color
            cmap: function that maps floats to string colors
            day_labeler: DayLabeler instance
            time_labeler: TimeLabeler instance
            grid_lines: GridLines instance
            monday_start: whether to start the week on Monday or Sunday
            ax: matplotlib axis to plot on

        Returns:
            Modified matplotlib axis

        """
        tmp_name = "tmp_name"
        config = StartEndConfig(start=tmp_name, end=None, minutes=duration)

        return plot_dataframe_as_calendar(
            self._obj.rename(tmp_name).to_frame(),
            config=config,
            alpha=alpha,
            cmap=cmap,
            monday_start=monday_start,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            grid_lines=grid_lines,
            ax=ax,
        )

    def plot_row(
        self,
        *,
        alpha: float = None,
        cmap=None,
        day_labeler: DayLabeler = DayLabeler(),
        time_labeler: TimeLabeler = TimeLabeler(),
        grid_lines: GridLines = GridLines(),
        monday_start: bool = True,
        ax: Optional[plt.Axes] = None,
    ) -> plt.Axes:
        """Plot Series of timestamps as a calendar.

        Args:
            alpha: alpha value for the color
            cmap: function that maps floats to string colors
            monday_start: whether to start the week on Monday or Sunday
            ax: matplotlib axis to plot on

        Returns:
            Modified matplotlib axis

        """
        return plot_series_as_calendar(
            self._obj,
            alpha=alpha,
            cmap=cmap,
            ax=ax,
            monday_start=monday_start,
            day_labeler=day_labeler,
            time_labeler=time_labeler,
            grid_lines=grid_lines,
        )

aggregate_events(minutes=60, as_multiindex=True)

Transform event level Series to row of wide format.

Parameters:

Name Type Description Default
minutes int

The number of minutes to discretize by.

60
as_multiindex bool

whether to use MultiIndex columns

True

Returns:

Type Description
Series

Series that would be row of wide format

Examples:

Discretize datetime Series to 30 minutes

import pandas as pd

import matplotlib.pyplot as plt

from latent_calendar.datasets import load_chicago_bikes

df_trips = load_chicago_bikes()

start_times = df_trips["started_at"]

agg_start_times = start_times.cal.aggregate_events(minutes=30)
agg_start_times.cal.plot_row()
plt.show()
Source code in latent_calendar/extensions.py
def aggregate_events(
    self,
    minutes: int = 60,
    as_multiindex: bool = True,
) -> pd.Series:
    """Transform event level Series to row of wide format.

    Args:
        minutes: The number of minutes to discretize by.
        as_multiindex: whether to use MultiIndex columns

    Returns:
        Series that would be row of wide format

    Examples:
        Discretize datetime Series to 30 minutes

        ```python
        import pandas as pd

        import matplotlib.pyplot as plt

        from latent_calendar.datasets import load_chicago_bikes

        df_trips = load_chicago_bikes()

        start_times = df_trips["started_at"]

        agg_start_times = start_times.cal.aggregate_events(minutes=30)
        agg_start_times.cal.plot_row()
        plt.show()


        ```


    """
    name = self._obj.name or "timestamp"
    return (
        self._obj.rename(name)
        .to_frame()
        .assign(tmp=1)
        .cal.aggregate_events(
            by="tmp",
            timestamp_col=name,
            minutes=minutes,
            as_multiindex=as_multiindex,
        )
        .iloc[0]
        .rename(name)
    )

conditional_probabilities(*, level=0)

Calculate conditional probabilities for each the row over the level.

Parameters:

Name Type Description Default
level Union[int, str]

level of the column MultiIndex. Default 0 or day_of_week

0

Returns:

Type Description
Series

Series with conditional probabilities

Source code in latent_calendar/extensions.py
def conditional_probabilities(
    self,
    *,
    level: Union[int, str] = 0,
) -> pd.Series:
    """Calculate conditional probabilities for each the row over the level.

    Args:
        level: level of the column MultiIndex.
            Default 0 or day_of_week

    Returns:
        Series with conditional probabilities

    """

    if not isinstance(self._obj.index, pd.MultiIndex):
        raise ValueError(
            "Series is expected to have a MultiIndex with the last column as the vocab."
        )

    return self._obj.div(self._obj.groupby(level=level).sum(), level=level)

plot(*, duration=5, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)

Plot Series of timestamps as a calendar.

Parameters:

Name Type Description Default
duration int

duration of each event in minutes

5
alpha float

alpha value for the color

None
cmap

function that maps floats to string colors

None
day_labeler DayLabeler

DayLabeler instance

DayLabeler()
time_labeler TimeLabeler

TimeLabeler instance

TimeLabeler()
grid_lines GridLines

GridLines instance

GridLines()
monday_start bool

whether to start the week on Monday or Sunday

True
ax Optional[Axes]

matplotlib axis to plot on

None

Returns:

Type Description
Axes

Modified matplotlib axis

Source code in latent_calendar/extensions.py
def plot(
    self,
    *,
    duration: int = 5,
    alpha: float = None,
    cmap=None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    monday_start: bool = True,
    ax: Optional[plt.Axes] = None,
) -> plt.Axes:
    """Plot Series of timestamps as a calendar.

    Args:
        duration: duration of each event in minutes
        alpha: alpha value for the color
        cmap: function that maps floats to string colors
        day_labeler: DayLabeler instance
        time_labeler: TimeLabeler instance
        grid_lines: GridLines instance
        monday_start: whether to start the week on Monday or Sunday
        ax: matplotlib axis to plot on

    Returns:
        Modified matplotlib axis

    """
    tmp_name = "tmp_name"
    config = StartEndConfig(start=tmp_name, end=None, minutes=duration)

    return plot_dataframe_as_calendar(
        self._obj.rename(tmp_name).to_frame(),
        config=config,
        alpha=alpha,
        cmap=cmap,
        monday_start=monday_start,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        grid_lines=grid_lines,
        ax=ax,
    )

plot_row(*, alpha=None, cmap=None, day_labeler=DayLabeler(), time_labeler=TimeLabeler(), grid_lines=GridLines(), monday_start=True, ax=None)

Plot Series of timestamps as a calendar.

Parameters:

Name Type Description Default
alpha float

alpha value for the color

None
cmap

function that maps floats to string colors

None
monday_start bool

whether to start the week on Monday or Sunday

True
ax Optional[Axes]

matplotlib axis to plot on

None

Returns:

Type Description
Axes

Modified matplotlib axis

Source code in latent_calendar/extensions.py
def plot_row(
    self,
    *,
    alpha: float = None,
    cmap=None,
    day_labeler: DayLabeler = DayLabeler(),
    time_labeler: TimeLabeler = TimeLabeler(),
    grid_lines: GridLines = GridLines(),
    monday_start: bool = True,
    ax: Optional[plt.Axes] = None,
) -> plt.Axes:
    """Plot Series of timestamps as a calendar.

    Args:
        alpha: alpha value for the color
        cmap: function that maps floats to string colors
        monday_start: whether to start the week on Monday or Sunday
        ax: matplotlib axis to plot on

    Returns:
        Modified matplotlib axis

    """
    return plot_series_as_calendar(
        self._obj,
        alpha=alpha,
        cmap=cmap,
        ax=ax,
        monday_start=monday_start,
        day_labeler=day_labeler,
        time_labeler=time_labeler,
        grid_lines=grid_lines,
    )

timestamp_features(discretize=True, minutes=60, create_vocab=True)

Create day of week and proportion into day columns.

Exposed as a method on Series for convenience.

Parameters:

Name Type Description Default
discretize bool

Whether to discretize the hour column.

True
minutes int

The number of minutes to discretize by. Ingored if discretize is False.

60
create_vocab bool

Whether to create the vocab column.

True

Returns:

Type Description
DataFrame

DataFrame with features

Examples:

Create the features for some dates

ser = pd.Series(pd.date_range("2023-01-01", "2023-01-14", freq="h"))

ser.cal.timestamp_features()
            timestamp  day_of_week  hour
0   2023-01-01 00:00:00            6   0.0
1   2023-01-01 01:00:00            6   1.0
2   2023-01-01 02:00:00            6   2.0
3   2023-01-01 03:00:00            6   3.0
4   2023-01-01 04:00:00            6   4.0
..                  ...          ...   ...
308 2023-01-13 20:00:00            4  20.0
309 2023-01-13 21:00:00            4  21.0
310 2023-01-13 22:00:00            4  22.0
311 2023-01-13 23:00:00            4  23.0
312 2023-01-14 00:00:00            5   0.0

[313 rows x 3 columns]
Source code in latent_calendar/extensions.py
def timestamp_features(
    self, discretize: bool = True, minutes: int = 60, create_vocab: bool = True
) -> pd.DataFrame:
    """Create day of week and proportion into day columns.

    Exposed as a method on Series for convenience.

    Args:
        discretize: Whether to discretize the hour column.
        minutes: The number of minutes to discretize by. Ingored if `discretize` is False.
        create_vocab: Whether to create the vocab column.

    Returns:
        DataFrame with features

    Examples:
        Create the features for some dates

        ```python
        ser = pd.Series(pd.date_range("2023-01-01", "2023-01-14", freq="h"))

        ser.cal.timestamp_features()
        ```

        ```text
                    timestamp  day_of_week  hour
        0   2023-01-01 00:00:00            6   0.0
        1   2023-01-01 01:00:00            6   1.0
        2   2023-01-01 02:00:00            6   2.0
        3   2023-01-01 03:00:00            6   3.0
        4   2023-01-01 04:00:00            6   4.0
        ..                  ...          ...   ...
        308 2023-01-13 20:00:00            4  20.0
        309 2023-01-13 21:00:00            4  21.0
        310 2023-01-13 22:00:00            4  22.0
        311 2023-01-13 23:00:00            4  23.0
        312 2023-01-14 00:00:00            5   0.0

        [313 rows x 3 columns]
        ```

    """
    name = self._obj.name or "timestamp"
    transformer = create_timestamp_feature_pipeline(
        timestamp_col=name,
        discretize=discretize,
        minutes=minutes,
        create_vocab=create_vocab,
    )

    return transformer.fit_transform(self._obj.rename(name).to_frame())

Comments