Skip to content

UFO Sightings

import matplotlib.pyplot as plt

from latent_calendar.datasets import load_ufo_sightings

df = load_ufo_sightings()

Each row of the dataset is a UFO sighting somewhere around the world.

            Date_time                  city state/province country  ...                                        description date_documented    latitude   longitude
0 1949-10-10 20:30:00            san marcos             tx      us  ...  This event took place in early fall around 194...       4/27/2004  29.8830556  -97.941111
1 1949-10-10 21:00:00          lackland afb             tx     NaN  ...  1949 Lackland AFB&#44 TX.  Lights racing acros...      12/16/2005    29.38421  -98.581082
2 1955-10-10 17:00:00  chester (uk/england)            NaN      gb  ...  Green/Orange circular disc over Chester&#44 En...       1/21/2008        53.2   -2.916667
3 1956-10-10 21:00:00                  edna             tx      us  ...  My older brother and twin sister were leaving ...       1/17/2004  28.9783333  -96.645833
4 1960-10-10 20:00:00               kaneohe             hi      us  ...  AS a Marine 1st Lt. flying an FJ4B fighter/att...       1/22/2004  21.4180556 -157.803611

[5 rows x 11 columns]
df["year"] = df["Date_time"].dt.year

df_wide = df.cal.aggregate_events("year", "Date_time")

df_5_year = df_wide.tail(5)

which has the data in the wide format where each row is a weekly calendar.

day_of_week  0                                                                                  1     ...  5     6
hour        0  1  2  3  4  5  6  7   8   9   10  11   12  13  14   15  16 17 18 19 20 21 22 23 0  1   ... 22 23 0  1  2  3  4  5  6  7  8  9   10  11  12  13  14   15 16 17 18 19 20 21 22 23
Country                                                                                               ...
Australia    0  0  0  0  0  0  0  0   0  27   6  19   20   7   2  105  27  0  0  0  0  0  0  0  0  0  ...  0  0  0  0  0  0  0  0  0  0  0  0  11  37  20   0  16    0  0  0  0  0  0  0  0  0
Austria      0  0  0  0  0  0  0  0   9  55  81   0   18   0   0   33   0  0  0  0  0  0  0  0  0  0  ...  0  0  0  0  0  0  0  0  0  0  0  0   0   0   0   0  37    0  0  0  0  0  0  0  0  0
Bahrain      0  0  0  0  0  0  0  0   0   0   1   6    0  13   0    0   0  0  0  0  0  0  0  0  0  0  ...  0  0  0  0  0  0  0  0  0  0  0  0   0   0   0   0   0    0  0  0  0  0  0  0  0  0
Belgium      0  0  0  0  0  0  0  0  15  15  42  26  109  54  38   17  26  0  0  0  0  0  0  0  0  0  ...  0  0  0  0  0  0  0  0  0  0  0  0  18  20  28  41   0  131  0  0  0  0  0  0  0  0
Bermuda      0  0  0  0  0  0  0  0   0   0   0   0    0   0   0    0   0  0  0  0  0  0  0  0  0  0  ...  0  0  0  0  0  0  0  0  0  0  0  0   0   0   0   0   0    0  0  0  0  0  0  0  0  0

[5 rows x 168 columns]

We can make use the cal attribute further to plot an aggregate of the data and plots of the data by day of week and hour of day.

fig, axes = plt.subplots(ncols=3)

df_5_year.sum().cal.plot_row(ax=axes[0])

axes[0].set(
    title="Weekly UFO sightings",
)

for aggregation, ax in zip(["dow", "hour"], axes.ravel()[1:]):
    (
        df_5_year
        .cal.sum_over_vocab(aggregation=aggregation)
        .cal.normalize("probs")
        .mul(100)
        .T.plot(ax=ax)
    )
    ymax = ax.get_ylim()[1]
    ax.set_ylim(0, ymax * 1.1)

axes[1].set_xticks(axes[0].get_xticks())

axes[1].set(
    title="Chance of UFO sighting by day of week",
    xlabel="Day of week",
    ylabel="Chance of UFO sighting (%)",
)
axes[2].set(
    title="Chance of UFO sighting by hour of day",
    xlabel="Hour of day",
    ylabel="",
)
fig.suptitle("UFO sightings over 5 years")
plt.show()

UFO Sightings

Comments