import pandas as pd
from cowidev.utils.clean import clean_count, extract_clean_date
from cowidev.utils.web.scraping import get_soup
from cowidev.vax.utils.incremental import increment, enrich_data
[docs]class AntiguaBarbuda:
source_url = "https://covid19.gov.ag"
location = "Antigua and Barbuda"
regex = {"date": r"\[Updated on ([a-zA-Z]+ \d{1,2}, 202\d)\]"}
[docs] def read(self) -> pd.DataFrame:
soup = get_soup(self.source_url)
return self.parse_data(soup)
[docs] def parse_data(self, soup):
dose1_elem, dose2_elem = self._get_elements(soup)
return pd.Series(
{
"date": self._parse_date(dose1_elem, dose2_elem),
"people_vaccinated": self._parse_metric(dose1_elem),
"people_fully_vaccinated": self._parse_metric(dose2_elem),
}
)
[docs] def _get_elements(self, soup):
# Get elements
h1 = soup.find_all("h1")
for h in h1:
text = h.text.strip()
if text == "Vaccinated Cases 1st Dose":
dose1_elem = h.parent
if text == "Vaccinated Cases 2nd Dose":
dose2_elem = h.parent
return dose1_elem, dose2_elem
[docs] def _parse_date(self, dose1_elem, dose2_elem):
date1_raw = dose1_elem.find("h2").text
date1 = extract_clean_date(date1_raw, self.regex["date"], "%B %d, %Y", minus_days=1, lang="en")
date2_raw = dose2_elem.find("h2").text
date2 = extract_clean_date(date2_raw, self.regex["date"], "%B %d, %Y", minus_days=1, lang="en")
if date1 == date2:
return date1
raise ValueError("Dates in first and second doses are not aligned")
[docs] def _parse_metric(self, elem):
elems = elem.find_all("div")
for elem in elems:
if "Total Vaccinated" in elem.text:
return clean_count(elem.find(class_="case-Number").text)
[docs] def pipe_people_vaccinated(self, ds: pd.Series) -> pd.Series:
total_vaccinations = ds.loc["people_vaccinated"] + ds.loc["people_fully_vaccinated"]
return enrich_data(ds, "total_vaccinations", total_vaccinations)
[docs] def pipe_location(self, ds: pd.Series) -> pd.Series:
return enrich_data(ds, "location", self.location)
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series:
return enrich_data(ds, "vaccine", "Oxford/AstraZeneca, Pfizer/BioNTech, Sputnik V")
[docs] def pipe_source(self, ds: pd.Series) -> pd.Series:
return enrich_data(ds, "source_url", self.source_url)
[docs] def pipeline(self, df: pd.Series) -> pd.Series:
return (
df.pipe(self.pipe_people_vaccinated)
.pipe(self.pipe_location)
.pipe(self.pipe_vaccine)
.pipe(self.pipe_source)
)
[docs] def export(self):
"""Generalized."""
ds = self.read().pipe(self.pipeline)
increment(
location=ds["location"],
total_vaccinations=ds["total_vaccinations"],
people_vaccinated=ds["people_vaccinated"],
people_fully_vaccinated=ds["people_fully_vaccinated"],
date=ds["date"],
source_url=ds["source_url"],
vaccine=ds["vaccine"],
)
[docs]def main():
AntiguaBarbuda().export()