import pandas as pd
from cowidev.utils.clean import extract_clean_date, clean_count
from cowidev.utils.web.scraping import get_soup
from cowidev.vax.utils.incremental import enrich_data, increment
[docs]class Guernsey:
source_url = "https://covid19.gov.gg/guidance/vaccine/stats"
location = "Guernsey"
_regex_date = r"This page was last updated on (\d{1,2} [A-Za-z]+ 202\d)"
[docs] def read(self) -> pd.Series:
soup = get_soup(self.source_url)
df = self.parse_data(soup)
# print(df)
return df
[docs] def parse_data(self, soup):
# Get table
tables = soup.find_all("table")
ds = pd.read_html(str(tables[0]))[0].squeeze()
# print(ds.loc[ds[0] == "Total doses", 1].values[0])
# Rename, add/remove columns
return pd.Series(
{
"date": extract_clean_date(
text=str(soup.text), regex=self._regex_date, date_format="%d %B %Y", lang="en"
),
"total_vaccinations": clean_count(
ds.loc[ds[0] == "Total doses", 1].values[0].replace("*", ""),
),
}
)
[docs] def pipe_location(self, ds: pd.Series) -> pd.Series:
return enrich_data(ds, "location", self.location)
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series:
return enrich_data(ds, "vaccine", "Moderna, Oxford/AstraZeneca, Pfizer/BioNTech")
[docs] def pipe_source(self, ds: pd.Series) -> pd.Series:
return enrich_data(ds, "source_url", self.source_url)
[docs] def pipeline(self, ds: pd.Series) -> pd.Series:
return ds.pipe(self.pipe_location).pipe(self.pipe_vaccine).pipe(self.pipe_source)
[docs] def export(self):
"""Generalized."""
data = self.read().pipe(self.pipeline)
increment(
location=data["location"],
total_vaccinations=data["total_vaccinations"],
date=data["date"],
source_url=data["source_url"],
vaccine=data["vaccine"],
)
[docs]def main():
Guernsey().export()