Source code for cowidev.vax.batch.ecuador

import pandas as pd

from cowidev.utils.clean import clean_date_series
from cowidev.utils.utils import check_known_columns
from cowidev.vax.utils.utils import make_monotonic, build_vaccine_timeline
from cowidev.vax.utils.base import CountryVaxBase


[docs]class Ecuador(CountryVaxBase): location = "Ecuador" source_url_ref = "https://github.com/andrab/ecuacovid" source_url = { "manufacturer": f"{source_url_ref}/raw/master/datos_crudos/vacunometro/fabricantes.csv", "main": f"{source_url_ref}/raw/master/datos_crudos/vacunas/vacunas.csv", } columns_rename_manuf = { "fabricante": "vaccine", "dosis_total": "total_vaccinations", "administered_at": "date", } columns_rename = { "fecha": "date", "dosis_total": "total_vaccinations", "primera_dosis": "people_vaccinated", "segunda_dosis": "people_fully_vaccinated", "dosis_unica": "single_shots", "refuerzo_1": "boosters_1", "refuerzo_2": "boosters_2", } vaccine_mapping = { "Pfizer/BioNTech": "Pfizer/BioNTech", "Sinovac": "Sinovac", "Oxford/AstraZeneca": "Oxford/AstraZeneca", "CanSino": "CanSino", } vax_timeline = { "Pfizer/BioNTech": "2020-12-01", "Sinovac": "2021-03-06", "Oxford/AstraZeneca": "2021-03-17", "CanSino": "2021-08-03", }
[docs] def read_manuf(self) -> pd.DataFrame: return pd.read_csv(self.source_url["manufacturer"])
[docs] def pipe_manuf_rename_cols(self, df: pd.DataFrame) -> pd.DataFrame: return df.rename(columns=self.columns_rename_manuf)
[docs] def pipe_manuf_aggregate(self, df: pd.DataFrame) -> pd.DataFrame: # Aggregate zones return df.groupby(["vaccine", "date"], as_index=False).sum()
[docs] def pipe_manuf_vaccine_checks(self, df: pd.DataFrame) -> pd.DataFrame: # Check vaccines vaccines_wrong = set(df.vaccine).difference(self.vaccine_mapping) if vaccines_wrong: raise ValueError(f"Unknown vaccine(s) {vaccines_wrong}") return df
[docs] def pipe_manuf_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(date=clean_date_series(df.date, format_input="%d/%m/%Y"))
[docs] def pipeline_manufacturer(self, df: pd.DataFrame) -> pd.DataFrame: check_known_columns( df, [ "zona", "fabricante", "dosis_total", "primera_dosis", "segunda_dosis", "dosis_unica", "dosis_refuerzo", "administered_at", ], ) return ( df.pipe(self.pipe_manuf_rename_cols) .pipe(self.pipe_manuf_aggregate) .pipe(self.pipe_manuf_vaccine_checks) .pipe(self.pipe_manuf_date) .assign(location=self.location) .sort_values(["vaccine", "date"])[["location", "date", "vaccine", "total_vaccinations"]] )
[docs] def read(self) -> pd.DataFrame: df = pd.read_csv(self.source_url["main"]) return df
[docs] def pipe_column_rename(self, df: pd.DataFrame) -> pd.DataFrame: return df.rename(columns=self.columns_rename)
[docs] def pipe_metrics(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign( people_vaccinated=df.people_vaccinated + df.single_shots, people_fully_vaccinated=df.people_fully_vaccinated + df.single_shots, total_boosters=df.boosters_1 + df.boosters_2, )
[docs] def pipe_checks(self, df: pd.DataFrame) -> pd.DataFrame: return df
[docs] def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(date=clean_date_series(df.date, "%d/%m/%Y"))
[docs] def pipe_vaccines(self, df: pd.DataFrame) -> pd.DataFrame: return build_vaccine_timeline(df, self.vax_timeline)
[docs] def pipe_exclude_dp(self, df: pd.DataFrame) -> pd.DataFrame: return df[(df.date < "2021-09-01") | (df.date > "2021-09-07")]
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: check_known_columns( df, ["fecha", "dosis_total", "primera_dosis", "segunda_dosis", "dosis_unica", "refuerzo_1", "refuerzo_2"], ) return ( df.pipe(self.pipe_column_rename) .pipe(self.pipe_metrics) .pipe(self.pipe_checks) .pipe(self.pipe_date) .pipe(self.pipe_vaccines) .assign(location=self.location, source_url=self.source_url_ref)[ [ "location", "date", "vaccine", "source_url", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters", ] ] .sort_values("date") .pipe(self.pipe_exclude_dp) .pipe(make_monotonic) )
[docs] def export(self): # Manufacturer df_man = self.read_manuf().pipe(self.pipeline_manufacturer) # Main df = self.read().pipe(self.pipeline) # Export self.export_datafile( df=df, df_manufacturer=df_man, meta_manufacturer={ "source_name": f"Ministerio de Salud Pública del Ecuador (via {self.source_url_ref})", "source_url": self.source_url_ref, }, )
[docs]def main(): Ecuador().export()