Source code for cowidev.vax.incremental.guernsey

import pandas as pd

from cowidev.utils.clean import extract_clean_date, clean_count
from cowidev.utils.web.scraping import get_soup
from cowidev.vax.utils.incremental import enrich_data, increment


[docs]class Guernsey: source_url = "https://covid19.gov.gg/guidance/vaccine/stats" location = "Guernsey" _regex_date = r"This page was last updated on (\d{1,2} [A-Za-z]+ 202\d)"
[docs] def read(self) -> pd.Series: soup = get_soup(self.source_url) df = self.parse_data(soup) # print(df) return df
[docs] def parse_data(self, soup): # Get table tables = soup.find_all("table") ds = pd.read_html(str(tables[0]))[0].squeeze() # print(ds.loc[ds[0] == "Total doses", 1].values[0]) # Rename, add/remove columns return pd.Series( { "date": extract_clean_date( text=str(soup.text), regex=self._regex_date, date_format="%d %B %Y", lang="en" ), "total_vaccinations": clean_count( ds.loc[ds[0] == "Total doses", 1].values[0].replace("*", ""), ), } )
[docs] def pipe_location(self, ds: pd.Series) -> pd.Series: return enrich_data(ds, "location", self.location)
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series: return enrich_data(ds, "vaccine", "Moderna, Oxford/AstraZeneca, Pfizer/BioNTech")
[docs] def pipe_source(self, ds: pd.Series) -> pd.Series: return enrich_data(ds, "source_url", self.source_url)
[docs] def pipeline(self, ds: pd.Series) -> pd.Series: return ds.pipe(self.pipe_location).pipe(self.pipe_vaccine).pipe(self.pipe_source)
[docs] def export(self): """Generalized.""" data = self.read().pipe(self.pipeline) increment( location=data["location"], total_vaccinations=data["total_vaccinations"], date=data["date"], source_url=data["source_url"], vaccine=data["vaccine"], )
[docs]def main(): Guernsey().export()