Skip to content

Segments

Create hand picked segments on the calendar.

Examples:

Create some segments for a calendar:

mornings = create_box_segment(
    day_start=0, day_end=7, hour_start=6, hour_end=11, name="Mornings"
)
afternoons = create_box_segment(
    day_start=0, day_end=7, hour_start=11, hour_end=16, name="Afternoons"
)
evenings = create_box_segment(
    day_start=0, day_end=7, hour_start=16, hour_end=21, name="Evenings"
)

df_segments = stack_segments([
    mornings,
    afternoons,
    evenings,
])

df_segments.cal.plot_by_row()

New Segments

create_blank_segment_series()

Helper for making segments programatically.

Source code in latent_calendar/segments/hand_picked.py
def create_blank_segment_series() -> pd.Series:
    """Helper for making segments programatically."""
    return pd.Series(0, index=FULL_VOCAB)

create_box_segment(day_start, day_end, hour_start, hour_end, name=None)

Programmatically make segment of box described by inputs.

Source code in latent_calendar/segments/hand_picked.py
def create_box_segment(
    day_start: int,
    day_end: int,
    hour_start: int,
    hour_end: int,
    name: str | None = None,
) -> pd.Series:
    """Programmatically make segment of box described by inputs."""
    ser = create_blank_segment_series()

    for dow in range(day_start, day_end):
        start = DOWHour(dow=dow, hour=hour_start)
        end = DOWHour(dow=dow, hour=hour_end)

        ser += create_series_for_range(start=start, end=end)

    name = name or f"{day_start}-{day_end} {hour_start}-{hour_end}"
    return ser.rename(name)

create_dow_segments()

Programmatically make the DOW segments.

Each row is just each day of the week.

Returns:

Type Description
DataFrame

DataFrame in the df_segments wide format

Source code in latent_calendar/segments/hand_picked.py
def create_dow_segments() -> pd.DataFrame:
    """Programmatically make the DOW segments.

    Each row is just each day of the week.

    Returns:
        DataFrame in the df_segments wide format

    """
    segments = []

    for i, day in enumerate(create_default_days()):
        day_number = str(i).zfill(2)
        name = f"{day_number}-{day}"

        start = DOWHour(dow=i, hour=0)
        end = DOWHour(dow=i, hour=24)

        segments.append(create_hourly_segment(start=start, end=end, name=name))

    return stack_segments(segments)

create_empty_template()

Create blank template in order

Source code in latent_calendar/segments/hand_picked.py
def create_empty_template() -> pd.DataFrame:
    """Create blank template in order"""
    index = pd.Index(range(HOURS_IN_DAY), name="hour_start")
    return pd.DataFrame(
        np.nan,
        index=index,
        columns=create_default_days(),
    )

create_every_hour_segments()

Programmatically segments for every hour

Each row is just each time slot

Returns:

Type Description
DataFrame

DataFrame in the df_segments wide format

Source code in latent_calendar/segments/hand_picked.py
def create_every_hour_segments() -> pd.DataFrame:
    """Programmatically segments for every hour

    Each row is just each time slot

    Returns:
        DataFrame in the df_segments wide format

    """
    segments = []

    for dow, hour in itertools.product(range(DAYS_IN_WEEK), range(HOURS_IN_DAY)):
        name = format_dow_hour(dow, hour)

        start = DOWHour(dow=dow, hour=hour)
        end = DOWHour(dow=dow, hour=hour + 1)
        segments.append(create_hourly_segment(start=start, end=end, name=name))

    return stack_segments(segments)

create_hourly_segment(start, end, name)

Highlight from start until end.

Source code in latent_calendar/segments/hand_picked.py
def create_hourly_segment(start: DOWHour, end: DOWHour, name: str) -> pd.Series:
    """Highlight from start until end."""
    return create_series_for_range(start=start, end=end).rename(name)

create_series_for_range(start, end)

Create a series for a range of hours with ones for those in range.

Source code in latent_calendar/segments/hand_picked.py
def create_series_for_range(start: DOWHour, end: DOWHour) -> pd.Series:
    """Create a series for a range of hours with ones for those in range."""
    ser = create_blank_segment_series()

    if start.is_after(end):
        end, start = start, end
        negate = True
    else:
        negate = False

    if isinstance(ser.index, pd.MultiIndex):
        start_idx = pd.IndexSlice[start.dow, start.hour]
        end_idx = pd.IndexSlice[end.dow, end.hour - 1]
    else:
        start_idx = format_dow_hour(start.dow, start.hour)
        end_idx = format_dow_hour(end.dow, end.hour - 1)

    ser.loc[start_idx:end_idx] = 1

    if negate:
        ser = (ser - 1) * -1

    return ser.astype(int)

get_vocab_for_range(start, end)

Get the vocab for a range of hours.

Source code in latent_calendar/segments/hand_picked.py
def get_vocab_for_range(start: DOWHour, end: DOWHour) -> list[str]:
    """Get the vocab for a range of hours."""
    return (
        create_series_for_range(start=start, end=end)
        .loc[lambda x: x == 1]
        .index.tolist()
    )

stack_segments(segments)

Stack segments into a single dataframe.

Source code in latent_calendar/segments/hand_picked.py
def stack_segments(segments: list[SEGMENT]) -> pd.DataFrame:
    """Stack segments into a single dataframe."""
    segments = [seg.T if isinstance(seg, pd.DataFrame) else seg for seg in segments]
    return pd.concat(segments, axis=1).T

Processing off calendar distribution.

sum_array_over_segments(X_pred, X_segment)

Get the probability of the mask for the probabilities.

Parameters:

Name Type Description Default
X_pred ndarray

(nrows, n_times)

required
X_segment ndarray

(n_segments, n_times)

required

Returns:

Type Description
ndarray

Matrix of (nrows, n_segments) defining the probabilities of each segments

Source code in latent_calendar/segments/convolution.py
def sum_array_over_segments(X_pred: np.ndarray, X_segment: np.ndarray) -> np.ndarray:
    """Get the probability of the mask for the probabilities.

    Args:
        X_pred: (nrows, n_times)
        X_segment: (n_segments, n_times)

    Returns:
        Matrix of (nrows, n_segments) defining the probabilities of each segments

    """
    return _mask_probs(X_segment, X_pred).sum(axis=2).T

sum_next_hours(df, hours)

Sum the next hours columns.

Useful for finding probability of having tour in the next 5 hours 00 00 column would be 06 06 23

TODO: Consider if negative hours should be allowed TODO: Handle when minutes are not 60

Parameters:

Name Type Description Default
df DataFrame

DataFrame of probabilities or counts in wide format

required
hours int

Number of hours to sum after the current hour

required

Returns:

Type Description
DataFrame

DataFrame summed over the next hours

Source code in latent_calendar/segments/convolution.py
def sum_next_hours(df: pd.DataFrame, hours: int) -> pd.DataFrame:
    """Sum the next hours columns.

    Useful for finding probability of having tour in the next 5 hours
    00 00 column would be 06 06 23

    TODO: Consider if negative hours should be allowed
    TODO: Handle when minutes are not 60

    Arguments:
        df: DataFrame of probabilities or counts in wide format
        hours: Number of hours to sum after the current hour

    Returns:
        DataFrame summed over the next hours

    """
    if hours < 0:
        msg = "hours cannot be negative"
        raise ValueError(msg)

    if hours == 0:
        return df

    return (
        pd.concat([df, df.iloc[:, :hours]], axis=1)
        .pipe(_reverse_columns)
        .T.rolling(hours + 1)
        .sum()
        .T.iloc[:, hours:]
        .pipe(_reverse_columns)
    )

sum_over_segments(df, df_segments)

Sum DataFrame over user defined segments.

Parameters:

Name Type Description Default
df DataFrame

DataFrame of probabilities or counts in wide format

required
df_segments DataFrame

DataFrame of segments in wide format

required

Returns:

Type Description
DataFrame

DataFrame of probabilities or counts summed over the segments

Source code in latent_calendar/segments/convolution.py
def sum_over_segments(df: pd.DataFrame, df_segments: pd.DataFrame) -> pd.DataFrame:
    """Sum DataFrame over user defined segments.

    Args:
        df: DataFrame of probabilities or counts in wide format
        df_segments: DataFrame of segments in wide format

    Returns:
        DataFrame of probabilities or counts summed over the segments

    """
    return pd.DataFrame(
        sum_array_over_segments(df.to_numpy(), df_segments.to_numpy()),
        index=df.index,
        columns=df_segments.index,
    )

sum_over_vocab(df, aggregation='dow')

Sum the wide DataFrame columns to hours or dow.

Parameters:

Name Type Description Default
df DataFrame

DataFrame in wide format with vocab column names

required
aggregation str

either dow or hour

'dow'

Returns:

Type Description
DataFrame

DataFrame columns associated with the aggregation

Source code in latent_calendar/segments/convolution.py
def sum_over_vocab(df: pd.DataFrame, aggregation: str = "dow") -> pd.DataFrame:
    """Sum the wide DataFrame columns to hours or dow.

    Args:
        df: DataFrame in wide format with vocab column names
        aggregation: either dow or hour

    Returns:
        DataFrame columns associated with the aggregation

    """
    if aggregation not in {"dow", "hour"}:
        msg = "The aggregation must be hour or dow"
        raise ValueError(msg)

    if not isinstance(df.columns, pd.MultiIndex):
        raise ValueError("The columns must be a MultiIndex of day_of_week and hour.")

    level = 1 if aggregation == "hour" else 0
    return df.T.groupby(level=level).sum().T

Comments