import pandas as pd
from cowidev.utils.utils import check_known_columns
from cowidev.vax.utils.base import CountryVaxBase
from cowidev.vax.utils.utils import build_vaccine_timeline
vaccine_mapping = {
"Comirnaty": "Pfizer/BioNTech",
"Comirnaty 5-11": "Pfizer/BioNTech",
"Comirnaty Original/Omicron BA.1": "Pfizer/BioNTech",
"Comirnaty Original/Omicron BA.4/BA.5": "Pfizer/BioNTech",
"COVID-19 Vaccine Janssen": "Johnson&Johnson",
"Covishield": "Oxford/AstraZeneca",
"Nuvaxovid": "Novavax",
"Sinopharm": "Sinopharm/Beijing",
"Sinovac": "Sinovac",
"SPIKEVAX": "Moderna",
"Spikevax bivalent Original/Omicron BA.1": "Moderna",
"VAXZEVRIA": "Oxford/AstraZeneca",
"COVAXIN": "Covaxin",
}
one_dose_vaccines = ["Johnson&Johnson"]
[docs]def read(source: str) -> pd.DataFrame:
df = pd.read_csv(source)
check_known_columns(
df,
[
"id",
"datum",
"vakcina",
"kraj_nuts_kod",
"kraj_nazev",
"vekova_skupina",
"prvnich_davek",
"druhych_davek",
"celkem_davek",
],
)
return df
[docs]def check_vaccine_names(df: pd.DataFrame) -> pd.DataFrame:
df = df.dropna(subset=["vakcina"])
unknown_vaccines = set(df.vakcina.unique()).difference(set(vaccine_mapping.keys()))
if unknown_vaccines:
raise ValueError("Found unknown vaccines: {}".format(unknown_vaccines))
return df
[docs]def translate_vaccine_names(df: pd.DataFrame) -> pd.DataFrame:
return df.replace(vaccine_mapping)
[docs]def enrich_source(df: pd.DataFrame) -> pd.DataFrame:
return df.assign(source_url="https://onemocneni-aktualne.mzcr.cz/covid-19")
[docs]def enrich_location(df: pd.DataFrame) -> pd.DataFrame:
return df.assign(location="Czechia")
[docs]def base_pipeline(df: pd.DataFrame) -> pd.DataFrame:
return df.pipe(check_vaccine_names).pipe(translate_vaccine_names)
[docs]def breakdown_per_vaccine(df: pd.DataFrame) -> pd.DataFrame:
return (
df.groupby(by=["datum", "vakcina"], as_index=False)[["celkem_davek"]]
.sum()
.sort_values("datum")
.assign(size=lambda df: df.groupby(by=["vakcina"], as_index=False)["celkem_davek"].cumsum())
.drop("celkem_davek", axis=1)
.rename(
columns={
"datum": "date",
"vakcina": "vaccine",
"size": "total_vaccinations",
}
)
.pipe(enrich_location)
)
[docs]def aggregate_by_date_vaccine(df: pd.DataFrame) -> pd.DataFrame:
return (
df.assign(boosters=df["celkem_davek"] - df["prvnich_davek"] - df["druhych_davek"])
.groupby(by=["datum", "vakcina"])[["prvnich_davek", "druhych_davek", "boosters", "celkem_davek"]]
.sum()
.reset_index()
.rename(
{
"prvnich_davek": 1,
"druhych_davek": 2,
"boosters": "total_boosters",
"celkem_davek": "total_vaccinations",
},
axis=1,
)
)
[docs]def infer_one_dose_vaccines(df: pd.DataFrame) -> pd.DataFrame:
df.loc[df.vakcina.isin(one_dose_vaccines), 2] = df[1]
return df
[docs]def remove_vaccines(df: pd.DataFrame, vaccine_schedule: dict) -> dict:
vax_amount = df[["vakcina", "total_vaccinations"]].groupby("vakcina").sum()
vax_amount = vax_amount.where(lambda x: x < 100).dropna()
for v in vax_amount.index:
vaccine_schedule.pop(v, None) # It has never been approved in Czechia
return vaccine_schedule
[docs]def aggregate_by_date(df: pd.DataFrame) -> pd.DataFrame:
vaccine_schedule = df[["datum", "vakcina"]].groupby("vakcina").min().to_dict()["datum"]
vaccine_schedule = remove_vaccines(df, vaccine_schedule)
return (
df.groupby(by="datum")
.agg(
people_vaccinated=(1, "sum"), # 1 means 1st dose
people_fully_vaccinated=(2, "sum"),
total_vaccinations=("total_vaccinations", "sum"),
total_boosters=("total_boosters", "sum"),
)
.reset_index()
.rename(columns={"datum": "date"})
.pipe(build_vaccine_timeline, vaccine_schedule)
)
[docs]def enrich_cumulated_sums(df: pd.DataFrame) -> pd.DataFrame:
return df.sort_values(by="date").assign(
**{
col: df[col].cumsum().astype(int)
for col in [
"total_vaccinations",
"people_vaccinated",
"people_fully_vaccinated",
"total_boosters",
]
}
)
[docs]class Czechia(CountryVaxBase):
location = "Czechia"
source_url = "https://onemocneni-aktualne.mzcr.cz/api/v2/covid-19/ockovani.csv"
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame:
return (
df.pipe(aggregate_by_date_vaccine)
.pipe(infer_one_dose_vaccines)
.pipe(aggregate_by_date)
.pipe(format_date)
.pipe(enrich_cumulated_sums)
.pipe(enrich_metadata)
)
[docs] def export(self):
base = read(self.source_url).pipe(base_pipeline)
# Manufacturer data
df_man = base.pipe(breakdown_per_vaccine)
# Main data
df = base.pipe(self.pipeline)
self.export_datafile(
df=df,
df_manufacturer=df_man,
meta_manufacturer={"source_name": "Ministry of Health", "source_url": self.source_url},
)
[docs]def main():
Czechia().export()