from datetime import datetime, timedelta
from cowidev.vax.utils.base import CountryVaxBase
import epiweeks
import pandas as pd
from cowidev.utils.utils import check_known_columns
from cowidev.vax.utils.checks import VACCINES_ONE_DOSE
from cowidev.vax.utils.utils import build_vaccine_timeline
[docs]class Slovakia(CountryVaxBase):
location = "Slovakia"
source_url = (
"https://github.com/Institut-Zdravotnych-Analyz/covid19-data/raw/main/Vaccination/"
"OpenData_Slovakia_Vaccination_AgeGroup_District.csv"
)
source_url_ref = "https://github.com/Institut-Zdravotnych-Analyz/covid19-data"
vaccine_mapping = {
"ASTRAZENECA": "Oxford/AstraZeneca",
"COMIRNATY": "Pfizer/BioNTech",
"JANSSEN": "Johnson&Johnson",
"MODERNA": "Moderna",
"SPUTNIK": "Sputnik V",
"NUVAXOVID": "Novavax",
}
vax_timeline = None
date_start = datetime(2021, 1, 4)
[docs] def read(self):
df = pd.read_csv(self.source_url, sep=";")
check_known_columns(
df,
[
"iso_week",
"iso_year",
"week",
"vaccine",
"gender",
"AgeGroup",
"region",
"district",
"district_code",
"dose",
"doses_administered",
],
)
return df
[docs] def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame:
# Change week to date
df["date"] = df.apply(self._week_to_date, axis=1)
return df.drop(columns=["iso_year", "iso_week", "week"])
[docs] def _week_to_date(self, row):
return epiweeks.Week(row.iso_year, row.iso_week).startdate()
[docs] def pipe_vaccine_checks(self, df: pd.DataFrame) -> pd.DataFrame:
# Get vax timeline
vax_wrong = set(df.vaccine).difference(self.vaccine_mapping)
if vax_wrong:
raise ValueError(f"Unknown vaccine(s): {vax_wrong}")
df = df.assign(vaccine=df.vaccine.replace(self.vaccine_mapping))
self.vax_timeline = df.groupby("vaccine").date.min().to_dict()
# Check one dose vax
vax_1d = [vax for vax in self.vaccine_mapping.values() if vax in VACCINES_ONE_DOSE]
if "2" in set(df.loc[df.vaccine.isin(vax_1d), "dose"]):
raise ValueError("Some single-dose vaccines are registering second doses!")
return df
[docs] def pipe_reshape(self, df: pd.DataFrame) -> pd.DataFrame:
# Group
df = df.groupby(["date", "dose"], as_index=False).doses_administered.sum()
# Pivot
df = df.pivot(index=["date"], columns="dose", values="doses_administered").reset_index()
return df
[docs] def pipe_cumsum(self, df: pd.DataFrame) -> pd.DataFrame:
# Cummulative
df = df.sort_values("date")
cols = ["1", "2", "fully", "3"]
df[cols] = df[cols].cumsum().fillna(method="ffill").fillna(0)
return df
[docs] def pipe_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
# Metrics
return df.assign(
total_vaccinations=df["1"] + df["2"] + df["3"],
people_vaccinated=df["1"],
people_fully_vaccinated=df.fully,
total_boosters=df["3"],
)
[docs] def pipe_out_columns(self, df: pd.DataFrame) -> pd.DataFrame:
return df[
[
"location",
"date",
"vaccine",
"source_url",
"total_vaccinations",
"people_vaccinated",
"people_fully_vaccinated",
"total_boosters",
]
]
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame:
return (
df.pipe(self.pipe_date)
.pipe(self.pipe_vaccine_checks)
.pipe(self.pipe_reshape)
.pipe(self.pipe_cumsum)
.pipe(self.pipe_metrics)
# .pipe(self.pipe_vaccine)
.pipe(self.pipe_metadata)
.pipe(self.pipe_out_columns)
)
[docs] def export(self):
df = self.read().pipe(self.pipeline)
self.export_datafile(df)
[docs]def main():
Slovakia().export()