Skip to content

Model

Models for the joint distribution of weekly calendar data.

model = LatentCalendar(n_components=3, random_state=42)

X = df_wide.to_numpy()
model.fit(X)

X_latent = model.transform(X)
X_pred = model.predict(X)

ConjugateModel

Bases: BaseEstimator, TransformerMixin

Conjugate model for the calendar joint distribution.

This is a wrapper around the conjugate model for the multinomial distribution. It is a wrapper around the Dirichlet distribution.

This doesn't use dimensionality reduction, but it does use the conjugate model.

Parameters:

Name Type Description Default
a ndarray | None

(n_times,) prior for each hour of the day. If None, then the prior is the average of the data.

None
Source code in latent_calendar/model/latent_calendar.py
class ConjugateModel(BaseEstimator, TransformerMixin):
    """Conjugate model for the calendar joint distribution.

    This is a wrapper around the conjugate model for the multinomial
    distribution. It is a wrapper around the Dirichlet distribution.

    This doesn't use dimensionality reduction, but it does use the
    conjugate model.

    Args:
        a: (n_times,) prior for each hour of the day. If None, then
            the prior is the average of the data.

    """

    def __init__(self, a: np.ndarray | None = None) -> None:
        self.a = a

    def fit(self, X, y=None) -> "ConjugateModel":
        """Fit the conjugate model."""
        if self.a is None:
            self.a = hourly_prior(X)

        self.prior_ = Dirichlet(alpha=self.a)
        return self

    def transform(self, X, y=None) -> np.ndarray:
        return multinomial_dirichlet(x=X, prior=self.prior_).dist.mean()

    def predict(self, X, y=None) -> np.ndarray:
        return self.transform(X, y=y)

fit(X, y=None)

Fit the conjugate model.

Source code in latent_calendar/model/latent_calendar.py
def fit(self, X, y=None) -> "ConjugateModel":
    """Fit the conjugate model."""
    if self.a is None:
        self.a = hourly_prior(X)

    self.prior_ = Dirichlet(alpha=self.a)
    return self

DummyModel

Bases: LatentCalendar

Return even probability of a latent.

This can be used as the worse possible baseline.

Source code in latent_calendar/model/latent_calendar.py
class DummyModel(LatentCalendar):
    """Return even probability of a latent.

    This can be used as the worse possible baseline.

    """

    def fit(self, X, y=None) -> "DummyModel":
        """All components are equal probabilty of every hour."""
        # Even probabilty for every thing
        self.n_components = 1
        TIME_SLOTS = X.shape[1]
        EVEN_PROBABILITY = 1 / TIME_SLOTS
        self.components_ = np.ones((self.n_components, TIME_SLOTS)) * EVEN_PROBABILITY

        return self

    def transform(self, X, y=None) -> np.ndarray:
        """Everyone has equal probability of being in each group."""
        nrows = len(X)

        return np.ones((nrows, self.n_components)) / self.n_components

    @classmethod
    def create(cls) -> "DummyModel":
        """Return a dummy model ready for transforming and predicting."""
        model = cls()
        model.fit(X=None)

        return model

    @classmethod
    def from_prior(cls, prior: np.ndarray) -> "DummyModel":
        """Return a dummy model from a prior."""
        model = cls()
        model.components_ = prior[np.newaxis, :]
        model.n_components = 1

        return model

create() classmethod

Return a dummy model ready for transforming and predicting.

Source code in latent_calendar/model/latent_calendar.py
@classmethod
def create(cls) -> "DummyModel":
    """Return a dummy model ready for transforming and predicting."""
    model = cls()
    model.fit(X=None)

    return model

fit(X, y=None)

All components are equal probabilty of every hour.

Source code in latent_calendar/model/latent_calendar.py
def fit(self, X, y=None) -> "DummyModel":
    """All components are equal probabilty of every hour."""
    # Even probabilty for every thing
    self.n_components = 1
    TIME_SLOTS = X.shape[1]
    EVEN_PROBABILITY = 1 / TIME_SLOTS
    self.components_ = np.ones((self.n_components, TIME_SLOTS)) * EVEN_PROBABILITY

    return self

from_prior(prior) classmethod

Return a dummy model from a prior.

Source code in latent_calendar/model/latent_calendar.py
@classmethod
def from_prior(cls, prior: np.ndarray) -> "DummyModel":
    """Return a dummy model from a prior."""
    model = cls()
    model.components_ = prior[np.newaxis, :]
    model.n_components = 1

    return model

transform(X, y=None)

Everyone has equal probability of being in each group.

Source code in latent_calendar/model/latent_calendar.py
def transform(self, X, y=None) -> np.ndarray:
    """Everyone has equal probability of being in each group."""
    nrows = len(X)

    return np.ones((nrows, self.n_components)) / self.n_components

LatentCalendar

Bases: LatentDirichletAllocation

Model weekly calendar data as a mixture of multinomial distributions.

Adapted from sklearn's Latent Dirichlet Allocation model.

Provides a predict method that returns the marginal probability of each time slot for a given row and a transform method that returns the latent representation of each row.

Source code in latent_calendar/model/latent_calendar.py
class LatentCalendar(BaseLDA):
    """Model weekly calendar data as a mixture of multinomial distributions.

    Adapted from sklearn's [Latent Dirichlet Allocation](https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.LatentDirichletAllocation.html) model.

    Provides a `predict` method that returns the marginal probability of each time slot for a given row and
    a `transform` method that returns the latent representation of each row.

    """

    @property
    def normalized_components_(self) -> np.ndarray:
        """Components that each sum to 1."""
        return self.components_ / self.components_.sum(axis=1)[:, np.newaxis]

    def joint_distribution(self, X_latent: np.ndarray) -> np.ndarray:
        """Marginalize out the components."""
        return joint_distribution(
            X_latent=X_latent, components=self.normalized_components_
        )

    def predict(self, X: np.ndarray, y=None) -> np.ndarray:
        """Return the marginal probabilities for a given row.

        Marginalize out the loads via law of total probability

        $$P[time=t | Row=r] = \sum_{l=0}^{c} P[time=t | L=l, Row=r] * P[L=l | Row=r]$$

        """
        # (n, n_components)
        X_latent = self.transform(X)

        return self.joint_distribution(X_latent=X_latent)

    @property
    def component_distribution_(self) -> np.ndarray:
        """Population frequency of each component."""
        return self.components_.sum(axis=1) / self.components_.sum()

component_distribution_: np.ndarray property

Population frequency of each component.

normalized_components_: np.ndarray property

Components that each sum to 1.

joint_distribution(X_latent)

Marginalize out the components.

Source code in latent_calendar/model/latent_calendar.py
def joint_distribution(self, X_latent: np.ndarray) -> np.ndarray:
    """Marginalize out the components."""
    return joint_distribution(
        X_latent=X_latent, components=self.normalized_components_
    )

predict(X, y=None)

Return the marginal probabilities for a given row.

Marginalize out the loads via law of total probability

\[P[time=t | Row=r] = \sum_{l=0}^{c} P[time=t | L=l, Row=r] * P[L=l | Row=r]\]
Source code in latent_calendar/model/latent_calendar.py
def predict(self, X: np.ndarray, y=None) -> np.ndarray:
    """Return the marginal probabilities for a given row.

    Marginalize out the loads via law of total probability

    $$P[time=t | Row=r] = \sum_{l=0}^{c} P[time=t | L=l, Row=r] * P[L=l | Row=r]$$

    """
    # (n, n_components)
    X_latent = self.transform(X)

    return self.joint_distribution(X_latent=X_latent)

MarginalModel

Bases: LatentCalendar

Source code in latent_calendar/model/latent_calendar.py
class MarginalModel(LatentCalendar):
    def fit(self, X, y=None) -> "MarginalModel":
        """Just sum over all the rows."""
        self.n_components = 1
        # (1, n_times)
        self.components_ = X.sum(axis=0)[np.newaxis, :]

        return self

    def transform(self, X, y=None) -> np.ndarray:
        """There is only one component to be a part of."""
        nrows = len(X)

        # (nrows, 1)
        return np.repeat(1, nrows)[:, np.newaxis]

fit(X, y=None)

Just sum over all the rows.

Source code in latent_calendar/model/latent_calendar.py
def fit(self, X, y=None) -> "MarginalModel":
    """Just sum over all the rows."""
    self.n_components = 1
    # (1, n_times)
    self.components_ = X.sum(axis=0)[np.newaxis, :]

    return self

transform(X, y=None)

There is only one component to be a part of.

Source code in latent_calendar/model/latent_calendar.py
def transform(self, X, y=None) -> np.ndarray:
    """There is only one component to be a part of."""
    nrows = len(X)

    # (nrows, 1)
    return np.repeat(1, nrows)[:, np.newaxis]

constant_prior(X, value=1.0)

Return the prior for each hour of the day.

This is the average of all the rows.

Parameters:

Name Type Description Default
X ndarray

(nrows, n_times)

required
Source code in latent_calendar/model/latent_calendar.py
def constant_prior(X: np.ndarray, value: float = 1.0) -> np.ndarray:
    """Return the prior for each hour of the day.

    This is the average of all the rows.

    Args:
        X: (nrows, n_times)
    """
    TIME_SLOTS = X.shape[1]
    return np.repeat(value, TIME_SLOTS)

hourly_prior(X)

Return the prior for each hour of the day.

This is the average of all the rows.

Parameters:

Name Type Description Default
X ndarray

(nrows, n_times)

required

Returns:

Type Description
ndarray

(n_times,)

Source code in latent_calendar/model/latent_calendar.py
def hourly_prior(X: np.ndarray) -> np.ndarray:
    """Return the prior for each hour of the day.

    This is the average of all the rows.

    Args:
        X: (nrows, n_times)

    Returns:
        (n_times,)

    """
    return (X > 0).sum(axis=0) / len(X)

joint_distribution(X_latent, components)

Marginalize out the components.

Source code in latent_calendar/model/latent_calendar.py
def joint_distribution(X_latent: np.ndarray, components: np.ndarray) -> np.ndarray:
    """Marginalize out the components."""
    return X_latent @ components

predict_on_dataframe(df, model)

Small wrapper to predict on DataFrame and keep same columns and index.

Source code in latent_calendar/model/utils.py
def predict_on_dataframe(df: pd.DataFrame, model: LatentCalendar) -> pd.DataFrame:
    """Small wrapper to predict on DataFrame and keep same columns and index."""
    return pd.DataFrame(
        model.predict(df.to_numpy()), columns=df.columns, index=df.index
    )

transform_on_dataframe(df, model)

Small wrapper to transform on DataFrame and keep index.

Source code in latent_calendar/model/utils.py
6
7
8
def transform_on_dataframe(df: pd.DataFrame, model: LatentCalendar) -> pd.DataFrame:
    """Small wrapper to transform on DataFrame and keep index."""
    return pd.DataFrame(model.transform(df.to_numpy()), index=df.index)

Comments