Source code for cowidev.vax.incremental.barbados

from multiprocessing.sharedctypes import Value
import re

from bs4 import BeautifulSoup
import pandas as pd

from cowidev.utils import get_soup, clean_count, clean_date
from cowidev.vax.utils.base import CountryVaxBase


[docs]class Barbados(CountryVaxBase): location: str = "Barbados" source_url: str = "https://gisbarbados.gov.bb/top-stories/" source_url_ref: str = None regex: dict = { "title": r"COVID-19 Update", "people_vaccinated": r"at least one dose is ([\d,\s]+)", # "people_vaccinated": r"COVID\-19, ([\d,\s]+) persons \(.*\) have received at least one dose", "people_fully_vaccinated": r"fully? (?:vaccinated|vaccinated persons) is ([\d,\s]+)", # "people_fully_vaccinated": ( # r"To date, ([\d,\s]+) individuals \– (?:[\d,\s]+) males and (?:[\d,\s]+) females \(.*\) \– have been fully" # r" vaccinated" # ), # "people_vaccinated": ( # r"(\d+) persons \((?:[\d.]+) per cent of the eligible population\) have received at least one dose" # ), # "people_fully_vaccinated": ( # r"(\d+) individuals – (?:[\d]+) males and (?:[\d]+) females \((?:[\d.]+) per cent of the total" # r" population or (?:[\d.]+) per cent of the eligible population\) are fully vaccinated" # ), }
[docs] def read(self) -> pd.DataFrame: """Read data from source""" soup = get_soup(self.source_url) df = self._parse_data(soup) return df
[docs] def _parse_data(self, soup: BeautifulSoup) -> pd.DataFrame: """Parse data from soup""" # Get the article URL elem = soup.find("a", text=re.compile(self.regex["title"])) if elem: link = elem["href"] else: raise ValueError("No COVID-19 update new was found!") if not link: raise ValueError("Article not found, please update the script") self.source_url_ref = link soup = get_soup(link) # Get the metrics metrics = self._parse_metrics(soup) # Get the date date = self._parse_date(soup) df = pd.DataFrame( { "date": [date], **metrics, } ) return df
[docs] def _parse_metrics(self, soup: BeautifulSoup) -> int: """Parse metrics from soup""" text = soup.get_text() text = re.sub(r"(\d),(\d)", r"\1\2", text) people_vaccinated = clean_count(re.search(self.regex["people_vaccinated"], text).group(1).replace(" ", "")) people_fully_vaccinated = clean_count( re.search(self.regex["people_fully_vaccinated"], text).group(1).replace(" ", "") ) total_vaccinations = people_vaccinated + people_fully_vaccinated df = { "people_vaccinated": [people_vaccinated], "people_fully_vaccinated": [people_fully_vaccinated], "total_vaccinations": [total_vaccinations], } return df
[docs] def _parse_date(self, soup: BeautifulSoup) -> str: """Parse date from soup""" date_str = soup.find(class_="published").text if not date_str: raise ValueError("Date not found, please update the script") return clean_date(date_str, "%b %d, %Y", minus_days=1)
[docs] def pipe_vaccine(self, df: pd.DataFrame) -> pd.DataFrame: """Pipes vaccine names for main data.""" return df.assign(vaccine="Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing")
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: """Pipeline for data processing""" return df.pipe(self.pipe_metadata).pipe(self.pipe_vaccine)
[docs] def export(self): """Export data to csv""" df = self.read().pipe(self.pipeline) self.export_datafile(df, attach=True)
[docs]def main(): Barbados().export()