from cowidev.vax.utils.utils import build_vaccine_timeline
import requests
import pandas as pd
from cowidev.utils import clean_date
from cowidev.utils.clean.dates import clean_date_series
from cowidev.vax.utils.base import CountryVaxBase
from cowidev.vax.utils.checks import VACCINES_ONE_DOSE
[docs]class Argentina(CountryVaxBase):
location = "Argentina"
source_url = "https://covidstats.com.ar/ws/vacunadosargentina?portipovacuna=1"
source_url_age = "https://covidstats.com.ar/ws/vacunadosargentina?porgrupoetario=1"
source_url_ref = "https://covidstats.com.ar/"
age_group_valid = {
"30-39",
"80-89",
"18-29",
"90-99",
"50-59",
"70-79",
"60-69",
">=100",
"40-49",
"<12",
"12-17",
}
vaccine_mapping = {
"Cansino Ad5 nCoV": "CanSino",
"Sputnik V COVID19 Instituto Gamaleya": "Sputnik V",
"Moderna ARNm": "Moderna",
"Moderna Pediátrica": "Moderna",
"Pfizer Pediátrica": "Pfizer/BioNTech",
"COVISHIELD ChAdOx1nCoV COVID 19": "Oxford/AstraZeneca",
"Pfizer BioNTech Comirnaty": "Pfizer/BioNTech",
"AstraZeneca ChAdOx1 S recombinante": "Oxford/AstraZeneca",
"Sinopharm Vacuna SARSCOV 2 inactivada": "Sinopharm/Beijing",
}
[docs] def read(self):
data = requests.get(self.source_url).json()
data = list(data.values())
if data[-1] == True:
data = data[:-1]
else:
raise ValueError("Source data format changed!")
data = self._parse_data(data)
return data
[docs] def read_age(self):
data = requests.get(self.source_url_age).json()
data = list(data.values())[:-1]
self._check_data_age(data)
data = self._parse_data_age(data)
return data
[docs] def _parse_data(self, data):
# Merge
dfs = [self._build_df(d) for d in data]
df = pd.concat(dfs, ignore_index=True).assign(location=self.location)
return df
[docs] def _build_df(self, data):
# Get dates
dt = clean_date(data["fecha_inicial"], "%Y-%m-%dT%H:%M:%S%z", as_datetime=False)
dates = pd.date_range(dt, periods=data["dias"], freq="D")
# Build df
# Notes on differences adicional vs refuerzo:
# https://github.com/owid/covid-19-data/issues/2532#issuecomment-1074137207
df = pd.DataFrame(
{
"date": clean_date_series(list(dates)),
"vaccine": data["denominacion"],
"dose_1": data["dosis1"],
"dose_2": data["dosis2"],
"dose_additional": data["adicional"],
"people_fully_vaccinated": data["esquemacompleto"],
"total_boosters": data["refuerzo"],
}
)
return df
[docs] def pipe_base_vaccines(self, df: pd.DataFrame):
vaccines_wrong = set(df.vaccine).difference(self.vaccine_mapping)
if vaccines_wrong:
raise ValueError(f"Unknown vaccines detected! {vaccines_wrong}")
df = df.assign(vaccine=df.vaccine.replace(self.vaccine_mapping))
df = df.groupby(["date", "vaccine"], as_index=False).sum()
return df
[docs] def pipe_base_cumsum(self, df: pd.DataFrame):
cols = ["dose_1", "dose_2", "dose_additional", "people_fully_vaccinated", "total_boosters"]
df[cols] = df.sort_values("date").groupby("vaccine")[cols].cumsum()
return df
[docs] def pipe_base_metrics(self, df):
# Split df by single/two dose protocols
msk = df.vaccine.isin(VACCINES_ONE_DOSE)
df_2d = df[~msk]
df_1d = df[msk]
# Estimate metrics
df_2d = df_2d.assign(
total_vaccinations=df.dose_1 + df.dose_2 + df.dose_additional + df.total_boosters,
people_vaccinated=df.dose_1,
total_boosters=df.dose_additional + df.total_boosters,
)
df_1d = df_1d.assign(
total_vaccinations=df.dose_1 + df.dose_2 + df.dose_additional + df.total_boosters,
people_vaccinated=df.dose_1,
total_boosters=df.dose_2 + df.dose_additional + df.total_boosters,
)
# Single dose check
if not (df_1d.people_fully_vaccinated == df_1d.people_vaccinated).all():
raise ValueError(
"Something wrong with single-dose vaccines! We should have that `people_vaccinated =="
" people_fully_vaccinated`"
)
df = pd.concat([df_1d, df_2d], ignore_index=True).sort_values(["date", "vaccine"])
# Only report when total_Vaccinations > 0
df = df[df.total_vaccinations > 0]
return df[
["date", "vaccine", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"]
]
[docs] def pipe_aggregate_vaccines(self, df: pd.DataFrame):
return df.groupby("date", as_index=False).sum()
[docs] def pipe_vaccine(self, df: pd.DataFrame):
return build_vaccine_timeline(
df,
{
"Sputnik V": "2020-12-29",
"Sinopharm/Beijing": "2021-03-08",
"Oxford/AstraZeneca": "2021-03-08",
"Moderna": "2021-08-03",
"CanSino": "2021-09-09",
"Pfizer/BioNTech": "2021-09-17",
},
)
[docs] def _check_data_age(self, data):
ages = {d["denominacion"] for d in data}
age_wrong = ages.difference(self.age_group_valid | {"Otros (sin especificar)"})
if age_wrong:
raise ValueError(f"Unknown age group {age_wrong}")
[docs] def _parse_data_age(self, data):
# Merge
dfs = [self._build_df_age_group(d) for d in data if d["denominacion"] in self.age_group_valid]
df = pd.concat(dfs, ignore_index=True).assign(location=self.location)
df[["age_group_min", "age_group_max"]] = df[["age_group_min", "age_group_max"]].astype(str)
return df
[docs] def _build_df_age_group(self, data):
# Get dates
dt = clean_date(data["fecha_inicial"], "%Y-%m-%dT%H:%M:%S%z", as_datetime=False)
dates = pd.date_range(dt, periods=data["dias"], freq="D")
# Build df
df = pd.DataFrame(
{
"date": dates,
"people_vaccinated": data["dosis1"],
"people_fully_vaccinated": data["esquemacompleto"],
# "people_with_booster": [d + b for d, b in zip(data["refuerzo"], data["adicional"])],
"people_with_booster": data[
"refuerzo"
], # likely an under estimate (missing doses for immunocompromised)
}
).assign(
**{
"age_group_min": data["desdeedad"],
"age_group_max": data["hastaedad"] if data["hastaedad"] is not None else "",
"age_group": data["denominacion"],
}
)
return df
[docs] def pipe_age_cumsum(self, df):
# cumsum
cols = ["people_vaccinated", "people_fully_vaccinated", "people_with_booster"]
df[cols] = df.sort_values("date").groupby("age_group")[cols].cumsum()
return df
[docs] def pipe_age_date(self, df):
return df.assign(date=clean_date_series(df.date))
[docs] def pipeline_base(self, df: pd.DataFrame):
return df.pipe(self.pipe_base_vaccines).pipe(self.pipe_base_cumsum).pipe(self.pipe_base_metrics)
[docs] def pipeline(self, df: pd.DataFrame) -> pd.Series:
return df.pipe(self.pipe_aggregate_vaccines).pipe(self.pipe_metadata).pipe(self.pipe_vaccine)
[docs] def pipeline_manufacturer(self, df: pd.DataFrame) -> pd.Series:
df = df.assign(location="Argentina")[["location", "date", "vaccine", "total_vaccinations"]]
return df
[docs] def pipeline_age(self, df):
return df.pipe(self.pipe_age_cumsum).pipe(self.pipe_age_date).pipe(self.pipe_age_per_capita)
[docs] def export(self):
# Base data
df_base = self.read().pipe(self.pipeline_base)
# Main data
df = df_base.pipe(self.pipeline)
# Manufacturer data
df_man = df_base.pipe(self.pipeline_manufacturer)
# Age data
df_age = self.read_age().pipe(self.pipeline_age)
# Export
self.export_datafile(
df=df,
df_age=df_age,
df_manufacturer=df_man,
meta_age={
"source_name": f"Ministry of Health via {self.source_url}",
"source_url": self.source_url_ref,
},
meta_manufacturer={
"source_name": f"Ministry of Health via {self.source_url}",
"source_url": self.source_url_ref,
},
)
[docs]def main():
Argentina().export()