Source code for cowidev.vax.batch.malaysia

from cowidev.vax.utils.base import CountryVaxBase
import pandas as pd

from cowidev.utils.utils import check_known_columns
from cowidev.vax.utils.utils import build_vaccine_timeline


[docs]class Malaysia(CountryVaxBase): location = "Malaysia" source_url = "https://github.com/MoH-Malaysia/covid19-public/raw/main/vaccination/vax_malaysia.csv" source_url_ref = "https://github.com/MoH-Malaysia/covid19-public" # Dec 29, 2021 / Given the very low proportion of CanSino vaccines used in the country # we infer than "pending" doses are very likely to be 2-dose protocols, and therefore use # them as such in the calculations. _vax_2d = [ "pfizer", "astra", "sinovac", "sinopharm", "pending", ] _vax_1d = [ "cansino", ]
[docs] def read(self) -> pd.DataFrame: df = pd.read_csv(self.source_url) check_known_columns( df, [ "date", "daily_partial", "daily_full", "daily", "daily_partial_child", "daily_full_child", "daily_booster", "daily_booster_adol", "daily_booster_child", "daily_booster2", "daily_booster2_adol", "daily_booster2_child", "cumul_partial", "cumul_full", "cumul", "cumul_partial_child", "cumul_full_child", "cumul_booster", "cumul_booster_adol", "cumul_booster_child", "cumul_booster2", "cumul_booster2_adol", "cumul_booster2_child", "pfizer1", "pfizer2", "pfizer3", "pfizer4", "sinovac1", "sinovac2", "sinovac3", "sinovac4", "astra1", "astra2", "astra3", "astra4", "sinopharm1", "sinopharm2", "sinopharm3", "sinopharm4", "cansino", "cansino3", "cansino4", "pending1", "pending2", "pending3", "pending4", "daily_partial_adol", "daily_full_adol", "cumul_full_adol", "cumul_partial_adol", ], ) return df
[docs] def pipe_check_columns(self, df: pd.DataFrame) -> pd.DataFrame: expected_cols = 28 if df.shape[1] > expected_cols: # print(df.columns) raise Exception( f"More columns ({df.shape[1]}) than expected ({expected_cols}) are present. Check for new vaccines?" ) return df
[docs] def pipe_filter_columns(self, df: pd.DataFrame) -> pd.DataFrame: all_vaccines = self._vax_2d + self._vax_1d + ["date"] reg = "|".join(all_vaccines) columns_kept = df.filter(regex=reg).columns.tolist() df = df[columns_kept].rename(columns={"cansino": "cansino1"}) return df
[docs] def pipe_calculate_metrics(self, df: pd.DataFrame) -> pd.DataFrame: df = df.melt(id_vars="date", var_name="vaccine", value_name="doses") df["dose_number"] = df.vaccine.str.extract(r"(\d+)$").astype(int) df["vaccine"] = df.vaccine.str.replace(r"(\d+)$", "", regex=True) df = df.pivot(index=["date", "vaccine"], columns="dose_number", values="doses").reset_index().fillna(0) # total_vaccinations df["total_vaccinations"] = df[1] + df[2] + df[3] + df[4] # people_vaccinated df["people_vaccinated"] = df[1] # people_fully_vaccinated df.loc[df.vaccine.isin(self._vax_2d), "people_fully_vaccinated"] = df[2] df.loc[df.vaccine.isin(self._vax_1d), "people_fully_vaccinated"] = df[1] # total_boosters df.loc[df.vaccine.isin(self._vax_2d), "total_boosters"] = df[3] + df[4] df.loc[df.vaccine.isin(self._vax_1d), "total_boosters"] = df[2] + df[3] + df[4] df = ( df[["date", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"]] .groupby("date", as_index=False) .sum() .sort_values("date") ) df[["total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"]] = ( df[["total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"]] .cumsum() .astype(int) ) return df
[docs] def pipe_metadata(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign( location=self.location, source_url=self.source_url_ref, )
[docs] def pipe_columns_out(self, df: pd.DataFrame) -> pd.DataFrame: return df[ [ "date", "people_vaccinated", "people_fully_vaccinated", "total_vaccinations", "total_boosters", "vaccine", "location", "source_url", ] ]
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: return ( df.pipe(self.pipe_filter_columns) .pipe(self.pipe_check_columns) .pipe(self.pipe_calculate_metrics) .pipe( build_vaccine_timeline, { "Pfizer/BioNTech": "2021-02-24", "Sinovac": "2021-03-03", "Oxford/AstraZeneca": "2021-05-03", "CanSino": "2021-05-09", "Sinopharm/Beijing": "2021-09-18", }, ) .pipe(self.pipe_metadata) .pipe(self.pipe_columns_out) )
[docs] def export(self): df = self.read().pipe(self.pipeline) self.export_datafile(df)
[docs]def main(): Malaysia().export()