Segments

Create hand picked segments on the calendar.

Examples:

Create some segments for a calendar:

mornings = create_box_segment(
    day_start=0, day_end=7, hour_start=6, hour_end=11, name="Mornings"
)
afternoons = create_box_segment(
    day_start=0, day_end=7, hour_start=11, hour_end=16, name="Afternoons"
)
evenings = create_box_segment(
    day_start=0, day_end=7, hour_start=16, hour_end=21, name="Evenings"
)

df_segments = stack_segments([
    mornings,
    afternoons,
    evenings,
])

df_segments.cal.plot_by_row()

New Segments

`create_blank_segment_series()`

Helper for making segments programatically.

Source code in latent_calendar/segments/hand_picked.py

def create_blank_segment_series() -> pd.Series:
    """Helper for making segments programatically."""
    return pd.Series(0, index=FULL_VOCAB)

`create_box_segment(day_start, day_end, hour_start, hour_end, name=None)`

Programmatically make segment of box described by inputs.

Source code in latent_calendar/segments/hand_picked.py

def create_box_segment(
    day_start: int,
    day_end: int,
    hour_start: int,
    hour_end: int,
    name: str | None = None,
) -> pd.Series:
    """Programmatically make segment of box described by inputs."""
    ser = create_blank_segment_series()

    for dow in range(day_start, day_end):
        start = DOWHour(dow=dow, hour=hour_start)
        end = DOWHour(dow=dow, hour=hour_end)

        ser += create_series_for_range(start=start, end=end)

    name = name or f"{day_start}-{day_end} {hour_start}-{hour_end}"
    return ser.rename(name)

`create_dow_segments()`

Programmatically make the DOW segments.

Each row is just each day of the week.

Returns:

Type	Description
`DataFrame`	DataFrame in the df_segments wide format

Source code in latent_calendar/segments/hand_picked.py

def create_dow_segments() -> pd.DataFrame:
    """Programmatically make the DOW segments.

    Each row is just each day of the week.

    Returns:
        DataFrame in the df_segments wide format

    """
    segments = []

    for i, day in enumerate(create_default_days()):
        day_number = str(i).zfill(2)
        name = f"{day_number}-{day}"

        start = DOWHour(dow=i, hour=0)
        end = DOWHour(dow=i, hour=24)

        segments.append(create_hourly_segment(start=start, end=end, name=name))

    return stack_segments(segments)

`create_empty_template()`

Create blank template in order

Source code in latent_calendar/segments/hand_picked.py

def create_empty_template() -> pd.DataFrame:
    """Create blank template in order"""
    index = pd.Index(range(HOURS_IN_DAY), name="hour_start")
    return pd.DataFrame(
        np.nan,
        index=index,
        columns=create_default_days(),
    )

`create_every_hour_segments()`

Programmatically segments for every hour

Each row is just each time slot

Returns:

Type	Description
`DataFrame`	DataFrame in the df_segments wide format

Source code in latent_calendar/segments/hand_picked.py

def create_every_hour_segments() -> pd.DataFrame:
    """Programmatically segments for every hour

    Each row is just each time slot

    Returns:
        DataFrame in the df_segments wide format

    """
    segments = []

    for dow, hour in itertools.product(range(DAYS_IN_WEEK), range(HOURS_IN_DAY)):
        name = format_dow_hour(dow, hour)

        start = DOWHour(dow=dow, hour=hour)
        end = DOWHour(dow=dow, hour=hour + 1)
        segments.append(create_hourly_segment(start=start, end=end, name=name))

    return stack_segments(segments)

`create_hourly_segment(start, end, name)`

Highlight from start until end.

Source code in latent_calendar/segments/hand_picked.py

def create_hourly_segment(start: DOWHour, end: DOWHour, name: str) -> pd.Series:
    """Highlight from start until end."""
    return create_series_for_range(start=start, end=end).rename(name)

`create_series_for_range(start, end)`

Create a series for a range of hours with ones for those in range.

Source code in latent_calendar/segments/hand_picked.py

def create_series_for_range(start: DOWHour, end: DOWHour) -> pd.Series:
    """Create a series for a range of hours with ones for those in range."""
    ser = create_blank_segment_series()

    if start.is_after(end):
        end, start = start, end
        negate = True
    else:
        negate = False

    if isinstance(ser.index, pd.MultiIndex):
        start_idx = pd.IndexSlice[start.dow, start.hour]
        end_idx = pd.IndexSlice[end.dow, end.hour - 1]
    else:
        start_idx = format_dow_hour(start.dow, start.hour)
        end_idx = format_dow_hour(end.dow, end.hour - 1)

    ser.loc[start_idx:end_idx] = 1

    if negate:
        ser = (ser - 1) * -1

    return ser.astype(int)

`get_vocab_for_range(start, end)`

Get the vocab for a range of hours.

Source code in latent_calendar/segments/hand_picked.py

def get_vocab_for_range(start: DOWHour, end: DOWHour) -> list[str]:
    """Get the vocab for a range of hours."""
    return (
        create_series_for_range(start=start, end=end)
        .loc[lambda x: x == 1]
        .index.tolist()
    )

`stack_segments(segments)`

Stack segments into a single dataframe.

Source code in latent_calendar/segments/hand_picked.py

def stack_segments(segments: list[SEGMENT]) -> pd.DataFrame:
    """Stack segments into a single dataframe."""
    segments = [seg.T if isinstance(seg, pd.DataFrame) else seg for seg in segments]
    return pd.concat(segments, axis=1).T

Processing off calendar distribution.

`_mask_probs(X_segments, X_pred)`

Multiply out the mask.

Parameters:

Name	Type	Description	Default
`X_segments`		(n_segments, n_times)	required
`X_pred`		(nrows, n_times)	required

Returns:

Type	Description
`ndarray`	(n_segments, nrows, n_times) matrix of only the values that fall into the segments times

Source code in latent_calendar/segments/convolution.py

def _mask_probs(X_segments, X_pred) -> np.ndarray:
    """Multiply out the mask.

    Args:
        X_segments: (n_segments, n_times)
        X_pred: (nrows, n_times)

    Returns:
        (n_segments, nrows, n_times) matrix of only the values that fall into the segments times

    """
    return X_segments[:, None, :] * X_pred

`_reverse_columns(df)`

Reverse the order of the columns.

Source code in latent_calendar/segments/convolution.py

def _reverse_columns(df: pd.DataFrame) -> pd.DataFrame:
    """Reverse the order of the columns."""
    return df.iloc[:, ::-1]

`sum_array_over_segments(X_pred, X_segment)`

Get the probability of the mask for the probabilities.

Parameters:

Name	Type	Description	Default
`X_pred`	`ndarray`	(nrows, n_times)	required
`X_segment`	`ndarray`	(n_segments, n_times)	required

Returns:

Type	Description
`ndarray`	Matrix of (nrows, n_segments) defining the probabilities of each segments

Source code in latent_calendar/segments/convolution.py

def sum_array_over_segments(X_pred: np.ndarray, X_segment: np.ndarray) -> np.ndarray:
    """Get the probability of the mask for the probabilities.

    Args:
        X_pred: (nrows, n_times)
        X_segment: (n_segments, n_times)

    Returns:
        Matrix of (nrows, n_segments) defining the probabilities of each segments

    """
    return _mask_probs(X_segment, X_pred).sum(axis=2).T

`sum_next_hours(df, hours)`

Sum the next hours columns.

Useful for finding probability of having tour in the next 5 hours 00 00 column would be 06 06 23

TODO: Consider if negative hours should be allowed TODO: Handle when minutes are not 60

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	DataFrame of probabilities or counts in wide format	required
`hours`	`int`	Number of hours to sum after the current hour	required

Returns:

Type	Description
`DataFrame`	DataFrame summed over the next hours

Source code in latent_calendar/segments/convolution.py

def sum_next_hours(df: pd.DataFrame, hours: int) -> pd.DataFrame:
    """Sum the next hours columns.

    Useful for finding probability of having tour in the next 5 hours
    00 00 column would be 06 06 23

    TODO: Consider if negative hours should be allowed
    TODO: Handle when minutes are not 60

    Arguments:
        df: DataFrame of probabilities or counts in wide format
        hours: Number of hours to sum after the current hour

    Returns:
        DataFrame summed over the next hours

    """
    if hours < 0:
        msg = "hours cannot be negative"
        raise ValueError(msg)

    if hours == 0:
        return df

    return (
        pd.concat([df, df.iloc[:, :hours]], axis=1)
        .pipe(_reverse_columns)
        .T.rolling(hours + 1)
        .sum()
        .T.iloc[:, hours:]
        .pipe(_reverse_columns)
    )

`sum_over_segments(df, df_segments)`

Sum DataFrame over user defined segments.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	DataFrame of probabilities or counts in wide format	required
`df_segments`	`DataFrame`	DataFrame of segments in wide format	required

Returns:

Type	Description
`DataFrame`	DataFrame of probabilities or counts summed over the segments

Source code in latent_calendar/segments/convolution.py

def sum_over_segments(df: pd.DataFrame, df_segments: pd.DataFrame) -> pd.DataFrame:
    """Sum DataFrame over user defined segments.

    Args:
        df: DataFrame of probabilities or counts in wide format
        df_segments: DataFrame of segments in wide format

    Returns:
        DataFrame of probabilities or counts summed over the segments

    """
    return pd.DataFrame(
        sum_array_over_segments(df.to_numpy(), df_segments.to_numpy()),
        index=df.index,
        columns=df_segments.index,
    )

`sum_over_vocab(df, aggregation='dow')`

Sum the wide DataFrame columns to hours or dow.

Parameters:

Name	Type	Description	Default
`df`	`DataFrame`	DataFrame in wide format with vocab column names	required
`aggregation`	`str`	either dow or hour	`'dow'`

Returns:

Type	Description
`DataFrame`	DataFrame columns associated with the aggregation

Source code in latent_calendar/segments/convolution.py

def sum_over_vocab(df: pd.DataFrame, aggregation: str = "dow") -> pd.DataFrame:
    """Sum the wide DataFrame columns to hours or dow.

    Args:
        df: DataFrame in wide format with vocab column names
        aggregation: either dow or hour

    Returns:
        DataFrame columns associated with the aggregation

    """
    if aggregation not in {"dow", "hour"}:
        msg = "The aggregation must be hour or dow"
        raise ValueError(msg)

    if not isinstance(df.columns, pd.MultiIndex):
        raise ValueError("The columns must be a MultiIndex of day_of_week and hour.")

    level = 1 if aggregation == "hour" else 0
    return df.T.groupby(level=level).sum().T

Segments

create_blank_segment_series()

create_box_segment(day_start, day_end, hour_start, hour_end, name=None)

create_dow_segments()

create_empty_template()

create_every_hour_segments()

create_hourly_segment(start, end, name)

create_series_for_range(start, end)

get_vocab_for_range(start, end)

stack_segments(segments)

_mask_probs(X_segments, X_pred)

_reverse_columns(df)

sum_array_over_segments(X_pred, X_segment)

sum_next_hours(df, hours)

sum_over_segments(df, df_segments)

sum_over_vocab(df, aggregation='dow')

Comments

`create_blank_segment_series()`

`create_box_segment(day_start, day_end, hour_start, hour_end, name=None)`

`create_dow_segments()`

`create_empty_template()`

`create_every_hour_segments()`

`create_hourly_segment(start, end, name)`

`create_series_for_range(start, end)`

`get_vocab_for_range(start, end)`

`stack_segments(segments)`

`_mask_probs(X_segments, X_pred)`

`_reverse_columns(df)`

`sum_array_over_segments(X_pred, X_segment)`

`sum_next_hours(df, hours)`

`sum_over_segments(df, df_segments)`

`sum_over_vocab(df, aggregation='dow')`