Source code for cowidev.vax.batch.trinidad_and_tobago

from cowidev.vax.utils.base import CountryVaxBase
import pandas as pd

from cowidev.utils.clean import clean_date_series
from cowidev.utils.web import request_json
from cowidev.vax.utils.files import load_query, load_data
from cowidev.vax.utils.utils import build_vaccine_timeline, make_monotonic


[docs]class TrinidadTobago(CountryVaxBase): source_ref = "https://experience.arcgis.com/experience/59226cacd2b441c7a939dca13f832112/" source = ( "https://services3.arcgis.com/x3I4DqUw3b3MfTwQ/arcgis/rest/services/service_7a519502598f492a9094fd0ad503cf80/" "FeatureServer/0/query" ) location: str = "Trinidad and Tobago"
[docs] def read(self) -> pd.DataFrame: params = load_query("trinidad-and-tobago-metrics", to_str=False) data = request_json(self.source, params=params) return self._parse_data(data)
[docs] def _parse_data(self, data: dict) -> int: records = [ { "date": x["attributes"]["report_date_str"], # ppl vaxxed with 2-dose vax fd "people_vaccinated_2dosevax": x["attributes"]["total_vaccinated"], # ppl vaxxed with second dose of a 2-dose vax "people_fully_vaccinated_2dosevax": x["attributes"]["sd_total_second_dose"], # ppl fully vaxxed (second dose of a 2-dose vax or single shot) "people_fully_vaccinated": x["attributes"]["total_second_dose"], # doses of specific brands "d1_jj": x["attributes"]["fd_j_and_j"], "d1_pfizer": x["attributes"]["fd_pfizer"], "d1_sinopharm": x["attributes"]["fd_sinopharm"], # "d1_astrazeneca": x["attributes"]["fd_astrazeneca"], # booster doses "total_boosters": x["attributes"]["additional_primary_dose"], } for x in data["features"] ] return pd.DataFrame.from_records(records)
[docs] def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(date=clean_date_series(df.date, "%d/%m/%Y")).sort_values("date")
[docs] def pipe_vaccine_name(self, df: pd.DataFrame) -> pd.DataFrame: vaccine_timeline = { "Oxford/AstraZeneca": "2021-02-15", "Johnson&Johnson": df.loc[df.d1_jj.notnull(), "date"].min(), "Pfizer/BioNTech": df.loc[df.d1_pfizer.notnull(), "date"].min(), "Sinopharm/Beijing": df.loc[df.d1_sinopharm.notnull(), "date"].min(), } return ( df.pipe(build_vaccine_timeline, vaccine_timeline) .drop(columns=["d1_pfizer", "d1_sinopharm"]) .dropna(subset=["people_vaccinated_2dosevax"]) )
[docs] def pipe_checks(self, df: pd.DataFrame) -> pd.DataFrame: df_ = df.dropna(subset=["people_fully_vaccinated_2dosevax", "d1_jj", "people_fully_vaccinated"], how="any") diff = ( df_["people_fully_vaccinated_2dosevax"].fillna(method="ffill") + df_["d1_jj"].fillna(method="ffill") - df_["people_fully_vaccinated"] ).apply(abs) / df_["people_fully_vaccinated"] msk = diff > 0.01 if msk.any(): raise ValueError(f"Fully vasccinated != single_dose + second_dose ({len(df_[msk])}):\n {df_[msk]}") return df
[docs] def pipe_metrics(self, df: pd.DataFrame) -> pd.DataFrame: df["people_fully_vaccinated"] = df.people_fully_vaccinated.fillna(method="ffill") df["people_vaccinated"] = df.people_vaccinated_2dosevax.fillna(method="ffill").fillna(0) + df.d1_jj.fillna( method="ffill" ).fillna(0) df["total_vaccinations"] = ( df.people_vaccinated.fillna(method="ffill").fillna(0) + df.people_fully_vaccinated_2dosevax.fillna(method="ffill").fillna(0) + df.total_boosters.fillna(method="ffill").fillna(0) ) return df
[docs] def pipe_location(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(location=self.location)
[docs] def pipe_source(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(source_url=self.source)
[docs] def pipe_legacy(self, df: pd.DataFrame) -> pd.DataFrame: df_legacy = load_data("trinidad-and-tobago-legacy") df_legacy = df_legacy[~df_legacy.date.isin(df.date)] return pd.concat([df, df_legacy]).sort_values("date")
[docs] def pipe_filter_dp(self, df: pd.DataFrame) -> pd.DataFrame: dates_exclude = ["2022-01-10"] return df[~df.date.isin(dates_exclude)]
[docs] def pipe_out_columns(self, df: pd.DataFrame) -> pd.DataFrame: return df[ [ "location", "date", "vaccine", "source_url", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters", ] ]
[docs] def pipe_data_correction(self, df: pd.DataFrame) -> pd.DataFrame: # Row with typo in number of boosters return df[-df.date.isin(["2022-02-07", "2022-04-18"])]
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: return ( df.pipe(self.pipe_date) .pipe(self.pipe_vaccine_name) .pipe(self.pipe_checks) .pipe(self.pipe_metrics) .pipe(self.pipe_location) .pipe(self.pipe_source) .pipe(self.pipe_legacy) .pipe(self.pipe_filter_dp) .pipe(self.pipe_out_columns) .pipe(self.pipe_data_correction) .pipe(self.make_monotonic, max_removed_rows=30) )
[docs] def export(self): df = self.read().pipe(self.pipeline) self.export_datafile(df)
[docs]def main(): TrinidadTobago().export()