Source code for cowidev.vax.batch.israel

import datetime
from cowidev.vax.utils.base import CountryVaxBase

import pandas as pd

from cowidev.utils.clean import clean_date_series
from cowidev.utils.utils import check_known_columns
from cowidev.utils.web import request_json


[docs]class Israel(CountryVaxBase): location: str = "Israel" source_url: str = "https://datadashboardapi.health.gov.il/api/queries/vaccinated" source_url_ref: str = "https://datadashboard.health.gov.il/COVID-19/general" source_url_age: str = ( "https://github.com/dancarmoz/israel_moh_covid_dashboard_data/raw/master/vaccinated_by_age.csv" ) source_url_age_old = "https://github.com/dancarmoz/israel_moh_covid_dashboard_data/raw/master/old_files/vaccinated_by_age_2022_01_25.csv"
[docs] def read(self) -> pd.DataFrame: data = request_json(self.source_url) df = pd.DataFrame.from_records(data) check_known_columns( df, [ "Day_Date", "vaccinated", "vaccinated_cum", "vaccinated_population_perc", "vaccinated_seconde_dose", "vaccinated_seconde_dose_cum", "vaccinated_seconde_dose_population_perc", "vaccinated_third_dose", "vaccinated_third_dose_cum", "vaccinated_third_dose_population_perc", "vaccinated_fourth_dose_population_perc", "vaccinated_fourth_dose", "vaccinated_validity_perc", "vaccinated_expired_perc", "not_vaccinated_perc", "vaccinated_fourth_dose_cum", ], ) return df
[docs] def read_age(self): df = pd.read_csv(self.source_url_age) df_old = pd.read_csv(self.source_url_age_old) return pd.concat([df, df_old], ignore_index=True)
[docs] def pipe_rename_columns(self, df: pd.DataFrame) -> pd.DataFrame: return df.rename( columns={ "Day_Date": "date", "vaccinated_cum": "people_vaccinated", "vaccinated_seconde_dose_cum": "people_fully_vaccinated", "vaccinated_third_dose_cum": "third_dose", "vaccinated_fourth_dose_cum": "fourth_dose", } )
[docs] def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(date=df.date.str.slice(0, 10))
[docs] def pipe_filter_date(self, df: pd.DataFrame) -> pd.DataFrame: return df[df.date < str(datetime.date.today())]
[docs] def pipe_select_min_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.groupby(["people_vaccinated", "people_fully_vaccinated"], as_index=False).min()
[docs] def pipe_total_boosters(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(total_boosters=df.third_dose + df.fourth_dose)
[docs] def pipe_total_vaccinations(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(total_vaccinations=df.people_vaccinated + df.people_fully_vaccinated + df.total_boosters)
[docs] def pipe_location(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign( location=self.location, )
[docs] def pipe_source(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(source_url=self.source_url_ref)
[docs] def pipe_vaccine(self, df: pd.DataFrame) -> pd.DataFrame: def _enrich_vaccine(date: str) -> str: if date >= "2021-01-07": return "Moderna, Pfizer/BioNTech" return "Pfizer/BioNTech" return df.assign(vaccine=df.date.apply(_enrich_vaccine))
[docs] def pipe_nulls_as_nans(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(people_fully_vaccinated=df.people_fully_vaccinated.replace(0, pd.NA))
[docs] def pipe_output_columns(self, df: pd.DataFrame) -> pd.DataFrame: df = df[ [ "date", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters", "location", "source_url", "vaccine", ] ] return df
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: return ( df.pipe(self.pipe_rename_columns) .pipe(self.pipe_date) .pipe(self.pipe_filter_date) .pipe(self.pipe_select_min_date) .pipe(self.pipe_total_boosters) .pipe(self.pipe_total_vaccinations) .pipe(self.pipe_location) .pipe(self.pipe_source) .pipe(self.pipe_vaccine) .pipe(self.pipe_nulls_as_nans) .pipe(self.pipe_output_columns) )
[docs] def pipeline_age(self, df): # Melt df = df.melt("Date") # Separate age group and variable var = df.variable.str.extract(r"(\d+)[\+\-](\d*)\s(.+)") # Assign new columns and clean date df = df.assign( age_group_min=var[0], age_group_max=var[1], variable=var[2], date=clean_date_series(df.Date, "%Y-%m-%dT%H:%M:%S.%fZ"), ) # Keep last entry for each date df = df.sort_values("date") df = df.drop_duplicates(subset=["date", "variable", "age_group_min", "age_group_max"], keep="last") df = df.drop(columns="Date") # Pivot and fix column names df = df.pivot(index=["date", "age_group_min", "age_group_max"], columns=["variable"], values=["value"]) df.columns = [col[1] for col in df.columns] df = df.reset_index() # Ignore agr group 10-19 df = df[(df.age_group_min != "10") | (df.age_group_max != "19")] # Final column creations df = df.assign(location=self.location).rename( columns={ "1st perc": "people_vaccinated_per_hundred", "2nd perc": "people_fully_vaccinated_per_hundred", "3rd perc": "people_with_booster_per_hundred", } ) # Select output columns df = df[ [ "location", "date", "age_group_min", "age_group_max", "people_vaccinated_per_hundred", "people_fully_vaccinated_per_hundred", "people_with_booster_per_hundred", ] ] return df
[docs] def export(self): # Main data df = self.read().pipe(self.pipeline) # Age data df_age = self.read_age().pipe(self.pipeline_age) self.export_datafile( df, df_age=df_age, meta_age={ "source_name": "Ministry of Health via github.com/dancarmoz/israel_moh_covid_dashboard_data", "source_url": self.source_url_age, }, )
[docs]def main(): Israel().export()