Source code for cowidev.vax.batch.netherlands

import pandas as pd

from cowidev.utils.clean.dates import week_to_date
from cowidev.vax.utils.utils import build_vaccine_timeline
from cowidev.vax.utils.checks import VACCINES_ONE_DOSE
from cowidev.vax.utils.base import CountryVaxBase


[docs]class Netherlands(CountryVaxBase):
    source_url: str = (
        "https://github.com/mzelst/covid-19/raw/master/data-rivm/vaccines-ecdc/vaccines_administered_nl.csv"
    )
    source_url_ref = "https://github.com/mzelst/covid-19"
    location: str = "Netherlands"
    vax_timeline: dict = None
    vaccines_mapping: dict = {
        "Oxford/AstraZeneca": "Oxford/AstraZeneca",
        "Pfizer/BioNTech": "Pfizer/BioNTech",
        "Moderna": "Moderna",
        "Johnson&Johnson": "Johnson&Johnson",
        "NVXD": "Novavax",
    }

[docs]    def read(self):
        return pd.read_csv(self.source_url)

[docs]    def pipe_filter_rows(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df[df.total_administered > 0]
        return df

[docs]    def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.assign(date=df.apply(lambda x: week_to_date(x.year, x.week), axis=1)).drop(columns=["week", "year"])
        return df

[docs]    def pipe_get_vax_timeline(self, df: pd.DataFrame) -> pd.DataFrame:
        df_ = df[df.vaccine != "UNK"]
        vax_wrong = set(df_.vaccine).difference(self.vaccines_mapping)
        if vax_wrong:
            raise ValueError(f"Some unknown vaccines were found {vax_wrong}")
        self.vax_timeline = df_[["vaccine", "date"]].groupby("vaccine").min().to_dict()["date"]
        self.vax_timeline = {self.vaccines_mapping[vax]: date for vax, date in self.vax_timeline.items()}
        # print(self.vax_timeline)
        return df

[docs]    def pipe_metrics(self, df: pd.DataFrame) -> pd.DataFrame:
        # Total vaccinations
        df["total_vaccinations"] = df.total_administered
        # People vaccinated
        df.loc[df.dose_number == 1, "people_vaccinated"] = df.total_administered
        # People fully vaccinated
        df.loc[
            (df.dose_number == 2) & (-df.vaccine.isin(VACCINES_ONE_DOSE)), "people_fully_vaccinated"
        ] = df.total_administered
        df.loc[
            (df.dose_number == 1) & (df.vaccine.isin(VACCINES_ONE_DOSE)), "people_fully_vaccinated"
        ] = df.total_administered
        # Boosters
        df.loc[(df.dose_number > 2) & (-df.vaccine.isin(VACCINES_ONE_DOSE)), "total_boosters"] = df.total_administered
        df.loc[(df.dose_number > 1) & (df.vaccine.isin(VACCINES_ONE_DOSE)), "total_boosters"] = df.total_administered
        return df

[docs]    def pipe_metrics_aggregate(self, df: pd.DataFrame) -> pd.DataFrame:
        df = (
            df.drop(columns=["dose_number", "total_administered", "vaccine"])
            .fillna(0)
            .groupby("date", as_index=False)
            .sum()
            .sort_values("date")
        )
        return df

[docs]    def pipe_metrics_cumsum(self, df: pd.DataFrame) -> pd.DataFrame:
        df[["people_vaccinated", "people_fully_vaccinated", "total_vaccinations", "total_boosters"]] = df[
            ["people_vaccinated", "people_fully_vaccinated", "total_vaccinations", "total_boosters"]
        ].cumsum()
        return df

[docs]    def pipe_metadata(self, df: pd.DataFrame) -> pd.DataFrame:
        return df.assign(location=self.location, source_url=self.source_url_ref)

[docs]    def pipe_vaccine(self, df: pd.DataFrame) -> pd.DataFrame:
        df = build_vaccine_timeline(df, self.vax_timeline)
        return df

[docs]    def pipeline(self, df: pd.DataFrame) -> pd.DataFrame:
        return (
            df.pipe(self.pipe_filter_rows)
            .pipe(self.pipe_date)
            .pipe(self.pipe_get_vax_timeline)
            .pipe(self.pipe_metrics)
            .pipe(self.pipe_metrics_aggregate)
            .pipe(self.pipe_metrics_cumsum)
            .pipe(self.pipe_metadata)
            .pipe(self.pipe_vaccine)[
                [
                    "location",
                    "date",
                    "vaccine",
                    "source_url",
                    "total_vaccinations",
                    "people_vaccinated",
                    "people_fully_vaccinated",
                    "total_boosters",
                ]
            ]
        )

[docs]    def export(self):
        df = self.read().pipe(self.pipeline)
        self.export_datafile(df)


[docs]def main():
    Netherlands().export()