Source code for cowidev.vax.batch.australia

import pandas as pd

from cowidev.utils import clean_date, clean_date_series
from cowidev.utils.utils import check_known_columns
from cowidev.utils.web.download import read_csv_from_url
from cowidev.vax.utils.base import CountryVaxBase
from cowidev.vax.utils.utils import build_vaccine_timeline


[docs]class Australia(CountryVaxBase): source_url = { "main": "https://covidbaseau.com/people-vaccinated.csv", "age_1d": "https://covidbaseau.com/historical/Vaccinations%20By%20Age%20Group%20and%20State%20First.csv", "age_2d": "https://covidbaseau.com/historical/Vaccinations%20By%20Age%20Group%20and%20State%20Second.csv", } source_url_ref = "https://covidbaseau.com/" source_file = "https://covidbaseau.com/people-vaccinated.csv" location = "Australia" columns_rename = { "dose_1": "people_vaccinated", "dose_2": "people_fully_vaccinated", "dose_3": "total_boosters", } vaccine_timeline = { "Pfizer/BioNTech": "2021-01-01", "Moderna": "2021-03-06", "Oxford/AstraZeneca": "2021-03-06", "Novavax": "2022-02-17", }
[docs] def read(self) -> pd.DataFrame: df = read_csv_from_url(self.source_url["main"]) check_known_columns(df, ["date", "dose_1", "dose_2", "dose_3"]) return df
[docs] def read_age(self) -> pd.DataFrame: df_1 = read_csv_from_url(self.source_url["age_1d"], header=1).dropna(axis=1, how="all") df_1 = df_1.melt("Date", var_name="age_group", value_name="people_vaccinated_per_hundred") df_2 = read_csv_from_url(self.source_url["age_2d"], header=1).dropna(axis=1, how="all") df_2 = df_2.melt("Date", var_name="age_group", value_name="people_fully_vaccinated_per_hundred") df = df_1.merge(df_2, on=["Date", "age_group"], how="left") return df
[docs] def pipe_total_vaccinations(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(total_vaccinations=df.dose_1 + df.dose_2 + df.dose_3)
[docs] def pipe_rename_columns(self, df: pd.DataFrame) -> pd.DataFrame: return df.rename(columns=self.columns_rename)
[docs] def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: df = df.assign(date=df.date.apply(clean_date, fmt="%Y-%m-%d", minus_days=1)) return df
[docs] def pipe_vaccine(self, df: pd.DataFrame) -> pd.DataFrame: return df.pipe(build_vaccine_timeline, self.vaccine_timeline)
[docs] def pipe_metadata(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(location=self.location, source_url=self.source_url_ref)
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: return ( df.pipe(self.pipe_total_vaccinations) .pipe(self.pipe_rename_columns) .pipe(self.pipe_date) .pipe(self.pipe_vaccine) .pipe(self.pipe_metadata) .pipe(self.make_monotonic) .sort_values("date") )
[docs] def pipe_age_groups(self, df): regex = r"(\d{1,2})+?(?:-(\d{1,2}))?" df[["age_group_min", "age_group_max"]] = df.age_group.str.extract(regex) return df
[docs] def pipe_age_numeric(self, df): regex = r"([\d\.]+).*" metrics = ["people_vaccinated_per_hundred", "people_fully_vaccinated_per_hundred"] for metric in metrics: df.loc[:, metric] = df[metric].str.extract(regex, expand=False).astype(float) return df
[docs] def pipe_age_metadata(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign( date=clean_date_series(df.Date), location=self.location, )
[docs] def pipeline_age(self, df: pd.DataFrame) -> pd.DataFrame: return ( df.pipe(self.pipe_age_groups) .pipe(self.pipe_age_numeric) .pipe(self.pipe_age_metadata) # .dropna(subset=["people_vaccinated_per_hundred", "people_fully_vaccinated_per_hundred"], how="all") .drop_duplicates(subset=["people_vaccinated_per_hundred", "people_fully_vaccinated_per_hundred"]) .sort_values(["date", "age_group_min"])[ [ "location", "date", "age_group_min", "age_group_max", "people_vaccinated_per_hundred", "people_fully_vaccinated_per_hundred", ] ] )
[docs] def export(self): # Main df = self.read().pipe(self.pipeline) # Age df_age = self.read_age().pipe(self.pipeline_age) self.export_datafile( df=df, df_age=df_age, meta_age={"source_name": "Ministry of Health via covidbaseau.com", "source_url": self.source_url_ref}, )
[docs]def main(): Australia().export()