import datetime
import requests
import pandas as pd
from cowidev.utils.clean import clean_count, clean_date
from cowidev.vax.utils.base import CountryVaxBase
from cowidev.vax.utils.utils import build_vaccine_timeline, make_monotonic
[docs]class Sweden(CountryVaxBase):
def __init__(self):
"""Constructor."""
self.source_url_daily = (
"https://fohm.se/smittskydd-beredskap/utbrott/aktuella-utbrott/covid-19/statistik-och-analyser/"
"statistik-over-registrerade-vaccinationer-covid-19/"
)
self.source_url_weekly = (
"https://fohm.maps.arcgis.com/sharing/rest/content/items/fc749115877443d29c2a49ea9eca77e9/data"
)
self.location = "Sweden"
self.columns_rename = None
[docs] def read(self) -> pd.DataFrame:
daily = self._read_daily_data()
weekly = self._read_weekly_data()
weekly = weekly[weekly["date"] < daily["date"].min()]
return pd.concat([daily, weekly]).sort_values("date").reset_index(drop=True)
[docs] def _read_daily_data(self) -> pd.DataFrame:
"""Read daily data (latest) from HTML page."""
text = requests.get(self.source_url_daily, verify=False).content
dfs = pd.read_html(text, encoding="utf-8")
df_doses = self._read_daily_data_doses(dfs[0])
df_people = self._read_daily_data_people(dfs[1])
df_boosters = self._read_daily_data_boosters(dfs[2], dfs[3])
df = self._merge_tables_daily(df_people, df_doses, df_boosters)
return df
[docs] def _read_daily_data_people(self, df):
return df.assign(
people_vaccinated=df["Antal vaccinerademed minst 1 dos (1)"].apply(clean_count),
people_fully_vaccinated=df["Antal vaccinerademed minst 2 doser"].apply(clean_count),
)
[docs] def _read_daily_data_doses(self, df):
# Total vaccinations
return df.assign(
total_vaccinations=df["Antal vaccinationer"].apply(clean_count),
)
[docs] def _read_daily_data_boosters(self, df_1, df_2):
# Total vaccinations
return df_1.assign(
total_boosters=(
df_1["Antal vaccinerade med 3 doser"].apply(clean_count)
+ df_2["Antal vaccinerade med 4 doser"].apply(clean_count)
),
)
[docs] def _merge_tables_daily(self, df_people, df_doses, df_boosters):
# Merge
df = (
df_people.merge(df_doses, on="Datum")
.rename(
columns={
"Datum": "date",
}
)
.merge(df_boosters, left_on="date", right_on="Datum")
)
df[["people_vaccinated", "people_fully_vaccinated", "total_boosters"]] = df[
["people_vaccinated", "people_fully_vaccinated", "total_boosters"]
].astype("Int64")
df = df[["date", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"]]
return df
[docs] def _read_weekly_data(self) -> pd.DataFrame:
"""Read weekly data
This data is loaded from an excel. It contains very clean (but sparse, i.e. weekly) data.
"""
dfs = pd.read_excel(self.source_url_weekly, sheet_name=None)
# Read data
df_doses = self._read_weekly_data_doses(dfs)
df_people = self._read_weekly_data_people(dfs)
# Merge
df = df_doses.merge(df_people, on=["År", "Vecka"])
# Date
ds = df["År"].astype(str) + "-W" + df["Vecka"].astype(str) + "+0"
df["date"] = ds.apply(lambda x: clean_date(x, "%Y-W%W+%w"))
# Prepare output
df = df.drop(columns=["Vecka", "År"]).sort_values("date")
# print(df)
return df
[docs] def _read_weekly_data_doses(self, dfs) -> pd.DataFrame:
"""Read weekly data for number of vaccinations administered."""
# DOSES
df = dfs["Vaccinationer tidsserie"]
# Filter rows and columns of interest
df_doses = df.loc[df.Region == "| Sverige |", ["Vecka", "År", "Antal vaccinationer"]]
df_doses = df_doses.rename(columns={"Antal vaccinationer": "total_vaccinations"})
self.latest_boosdters = (
dfs["Vaccinerade kommun dos 4"]["Antal_dos4"].sum() + dfs["Vaccinerade kommun dos 3"]["Antal_dos3"].sum()
)
# boosters = dfs["Dos 1 till 3 per åldersgrupp"] # Dos 4 per åldersgrupp
# boosters = dfs["Dos 4 per åldersgrupp"]
# self.latest_boosters = boosters.loc[
# (boosters.Region == "| Sverige |") & (boosters["Åldersgrupp"] == "Totalt"), "Antal vaccinerade"
# ].item()
return df_doses
[docs] def _read_weekly_data_people(self, dfs) -> pd.DataFrame:
"""Read weekly data for number of vaccinated people."""
# PEOPLE VAX
df = dfs["Vaccinerade tidsserie"]
# Filter rows and columns of interest
df_people = df.loc[df.Region == "| Sverige |", ["Vecka", "År", "Antal vaccinerade", "Vaccinationsstatus"]]
# Pivot & rename columns
cols_wrong = set(df_people.Vaccinationsstatus).difference({"Minst 1 dos", "Minst 2 doser"})
if cols_wrong:
raise ValueError(f"Unknown columns: {cols_wrong}")
df_people = (
df_people.pivot(index=["Vecka", "År"], columns="Vaccinationsstatus", values="Antal vaccinerade")
.reset_index()
.rename(
columns={
"Minst 1 dos": "people_vaccinated",
"Minst 2 doser": "people_fully_vaccinated",
}
)
)
return df_people
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame:
return (
df.pipe(self.pipe_vaccine)
.pipe(self.pipe_columns)
.pipe(self.pipe_out_columns)
# .pipe(self.pipe_add_boosters)
.pipe(make_monotonic)
.drop_duplicates(subset=["date"], keep=False)
)
[docs] def pipe_vaccine(self, df: pd.DataFrame) -> pd.DataFrame:
# Source: https://www.ecdc.europa.eu/en/publications-data/data-covid-19-vaccination-eu-eea
return build_vaccine_timeline(
df,
{
"Pfizer/BioNTech": "2021-01-01",
"Moderna": "2021-01-15",
"Oxford/AstraZeneca": "2021-02-12",
"Novavax": "2022-03-11",
},
)
[docs] def pipe_columns(self, df: pd.DataFrame) -> pd.DataFrame:
return df.assign(location=self.location, source_url=self.source_url_daily)
[docs] def pipe_out_columns(self, df: pd.DataFrame):
return df[
[
"date",
"location",
"source_url",
"vaccine",
"total_vaccinations",
"people_vaccinated",
"people_fully_vaccinated",
"total_boosters",
]
]
[docs] def pipe_add_boosters(self, df: pd.DataFrame):
# The existing data only allows us to know the latest value of total_boosters. Ideally at
# some point we'll get a full time series, but for now we assign this value to the last day
# of the time series, so that it can at least be shown on bar charts and maps.
df = df[df.date <= str(datetime.date.today() - datetime.timedelta(days=1))]
df.loc[df.date == df.date.max(), "total_boosters"] = self.latest_boosters
return df
[docs] def _read_daily_data_age_split(self) -> pd.DataFrame:
"""[deprecated] Read daily data (latest) from HTML page with two tables.
One table with adult numbers, the other one with teen numbers (12-15 yo)."""
text = requests.get(self.source_url_daily, verify=False).content
dfs = pd.read_html(text, encoding="utf-8")
df_adults = self._read_daily_data_people(dfs[1])
df_teens = self._get_df_teens_daily(dfs[2])
df_doses = self._read_daily_data_doses(dfs[0])
df = self._merge_tables_daily_split(df_adults, df_teens, df_doses)
return df
[docs] def _get_df_teens_daily(self, df):
"""[deprecated]"""
# People vaccinated < 16 yo
# df_teens = df.pivot("Datum", "Status", "Antal vaccinerade födda 2003-2005").reset_index()
return df.assign(
people_vaccinated=df["Antal vaccinerade med minst 1 dos"].apply(clean_count),
# people_fully_vaccinated=df_teens["2 doser"].apply(clean_count),
)
[docs] def _merge_tables_daily_split(self, df_adults, df_teens, df_doses):
"""[deprecated"""
# Merge people metrics
df = df_adults.merge(df_teens, on="Datum", how="left")
df = df.assign(
people_vaccinated=df.filter(regex="people_vaccinated_*").sum(axis=1),
people_fully_vaccinated=df.filter(regex="people_fully_vaccinated_*").sum(axis=1),
)
# Merge
df = df.merge(df_doses, on="Datum").rename(
columns={
"Datum": "date",
}
)
df[["people_vaccinated", "people_fully_vaccinated"]] = df[
["people_vaccinated", "people_fully_vaccinated"]
].astype("Int64")
df = df[["date", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated"]]
return df
[docs] def merge_with_current_booster_data(self, output_path, df):
df_current = pd.read_csv(output_path, usecols=["date", "total_boosters"]).rename(
columns={"total_boosters": "total_boosters_hist"}
)
df = df.merge(df_current, on="date", how="left")
df = df.assign(total_boosters=df.total_boosters.fillna(df.total_boosters_hist)).drop(
columns=["total_boosters_hist"]
)
return df
[docs] def export(self):
"""Generalized."""
df = self.read().pipe(self.pipeline)
df = self.merge_with_current_booster_data(self.output_path, df)
self.export_datafile(df)
[docs]def main():
Sweden().export()