Skip to content

Iterate

Generalize the iteration to support different data formats. Namely,

  • 2d numpy array
  • 1d numpy array (long format)
  • pandas Series
  • pandas DataFrame with various columns

This powers the calendar plot and is passed into the plot_calendar function.

Examples:

Plot calendar based on 1d numpy array.

import numpy as np

from latent_calendar.plot import plot_calendar
from latent_calendar.plot.iterate import iterate_long_array

data = np.ones(7 * 24)
plot_calendar(
    iterate_long_array(data),
)

Plot calendar based on 2d numpy array.

from latent_calendar.plot import plot_calendar

data = np.ones((7, 24))
plot_calendar(
    iterate_matrix(data),
)

Plot calendar for every half hour instead of every hour. NOTE: This happens automatically!

from latent_calendar.plot import plot_calendar

data = np.ones((7, 24 * 2))
plot_calendar(
    iterate_matrix(data),
)

CalendarData dataclass

All the data that goes into calendar plot.

Source code in latent_calendar/plot/iterate.py
@dataclass
class CalendarData:
    """All the data that goes into calendar plot."""

    day: int
    start: float
    end: float
    value: float

IterConfig dataclass

Bases: DataFrameConfig

Small wrapper to hold the column mapping in DataFrame.

Source code in latent_calendar/plot/iterate.py
@dataclass
class IterConfig(DataFrameConfig):
    """Small wrapper to hold the column mapping in DataFrame."""

    day: str = "day_of_week"
    start: str = "hour_start"
    end: str = "hour_end"
    value: str = "value"

    @property
    def columns(self) -> list[str]:
        return [self.day, self.start, self.end]

    def extract_columns(self, df: pd.DataFrame) -> FRAME_ITER:
        self._check_columns(df)
        return (
            df[self.day],
            df[self.start],
            df[self.end],
            self._default_repeat(df, self.value, VALUE_DEFAULT),
        )

VocabIterConfig dataclass

Bases: DataFrameConfig

Small wrapper to hold the column mapping in the DataFrame.

Source code in latent_calendar/plot/iterate.py
@dataclass
class VocabIterConfig(DataFrameConfig):
    """Small wrapper to hold the column mapping in the DataFrame."""

    vocab: str = "vocab"
    value: str = "value"

    @property
    def columns(self) -> list[str]:
        return [self.vocab]

    def extract_columns(self, df: pd.DataFrame) -> FRAME_ITER:
        self._check_columns(df)

        day = df[self.vocab].str.split(" ").apply(lambda x: int(x[0]))
        start = df[self.vocab].str.split(" ").apply(lambda x: int(x[1]))

        return (
            day,
            start,
            start + 1,
            self._default_repeat(df, self.value, VALUE_DEFAULT),
        )

iterate_dataframe(df, config)

Iterate the calendar data in DataFrame form based on config.

Parameters:

Name Type Description Default
df DataFrame

DataFrame with calendar data.

required
config DataFrameConfig

Configuration to describe what columns to use.

required
Source code in latent_calendar/plot/iterate.py
def iterate_dataframe(
    df: pd.DataFrame,
    config: DataFrameConfig,
) -> CALENDAR_ITERATION:
    """Iterate the calendar data in DataFrame form based on config.

    Args:
        df: DataFrame with calendar data.
        config: Configuration to describe what columns to use.

    """
    for values in zip(*config.extract_columns(df)):
        yield CalendarData(*values)

iterate_matrix(calendar_data)

Iterates the calendar matrix of values.

Source code in latent_calendar/plot/iterate.py
def iterate_matrix(calendar_data: np.ndarray) -> CALENDAR_ITERATION:
    """Iterates the calendar matrix of values."""
    if calendar_data.ndim != 2:
        raise ValueError(f"Data must be 2d not of shape {calendar_data.shape}")

    n_days, n_hours = calendar_data.shape

    if n_days != DAYS_IN_WEEK:
        raise ValueError(f"Data must have {DAYS_IN_WEEK} days not {n_days}")

    step_size = HOURS_IN_DAY / n_hours

    for day, hours in enumerate(calendar_data):
        for hour, value in enumerate(hours):
            start = hour * step_size
            end = start + step_size
            yield CalendarData(day, start, end, value)

Comments