Source code for cowidev.vax.incremental.united_arab_emirates

import re

import pandas as pd

from cowidev.utils.clean import clean_count, extract_clean_date
from cowidev.utils.web.scraping import get_driver
from cowidev.vax.utils.incremental import enrich_data, increment


[docs]class UnitedArabEmirates: def __init__(self) -> None: self.location = "United Arab Emirates" self.source_url = "https://fcsc.gov.ae/en-us/Pages/Covid19/UAE-Covid-19-Updates.aspx"
[docs] def read(self) -> pd.Series: return self._parse_data()
[docs] def _parse_data(self) -> pd.Series: with get_driver() as driver: driver.get(self.source_url) elem = driver.find_element_by_class_name("total_vaccination") total_vaccinations = self._parse_total_vaccinations(elem) population = self._estimate_population(elem, total_vaccinations) return pd.Series( { "total_vaccinations": total_vaccinations, "people_vaccinated": self._parse_people_vaccinated(elem, population), "people_fully_vaccinated": self._parse_people_fully_vaccinated(elem, population), "date": self._parse_date(driver), } )
[docs] def _parse_total_vaccinations(self, elem) -> pd.Series: text_total = elem.find_element_by_class_name("numbers").text regex_total = r"Total: ([\d\,]+)" total_vaccinations = clean_count(re.search(regex_total, text_total).group(1)) return total_vaccinations
[docs] def _estimate_population(self, elem, total_vaccinations) -> pd.Series: regex = r"([\d\.]+) per 100 people" share_total = self._parse_relative_metric(elem, "percentage", regex) return total_vaccinations / share_total
[docs] def _parse_people_vaccinated(self, elem, population) -> pd.Series: regex = r"Percentage of population who received one dose \(of COVID-19 vaccine\)\s{1,2}([\d\.]+)%" share_vaccinated = self._parse_relative_metric(elem, "dose1pct", regex) return round(share_vaccinated * population) if share_vaccinated else None
[docs] def _parse_people_fully_vaccinated(self, elem, population) -> pd.Series: regex = r"Percentage of population fully vaccinated \(against COVID-19\)\s{1,2}([\d\.]+)%" share_fully_vaccinated = self._parse_relative_metric(elem, "fullyVaccintedpct", regex) return round(share_fully_vaccinated * population) if share_fully_vaccinated else None
[docs] def _parse_relative_metric(self, elem, class_name: str, regex: str): try: text = elem.find_element_by_class_name(class_name).text metric = float(re.search(regex, text).group(1)) / 100 return metric except: return None
[docs] def _parse_date(self, driver) -> pd.Series: text_date = driver.find_element_by_class_name("full_data_set").text regex_date = r"Time period: 29 January 2020 - (\d{2} [a-zA-Z]+ 202\d)" return extract_clean_date(text_date, regex_date, "%d %B %Y", lang="en")
[docs] def pipe_calculate_boosters(self, ds: pd.Series) -> pd.Series: total_boosters = ( ds.total_vaccinations - ds.people_vaccinated - ds.people_fully_vaccinated if ds.people_vaccinated and ds.people_fully_vaccinated else None ) return enrich_data(ds, "total_boosters", total_boosters)
[docs] def pipe_location(self, ds: pd.Series) -> pd.Series: return enrich_data(ds, "location", self.location)
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series: return enrich_data( ds, "vaccine", "Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinopharm/Wuhan, Sputnik V", )
[docs] def pipe_source(self, ds: pd.Series) -> pd.Series: return enrich_data(ds, "source_url", self.source_url)
[docs] def pipeline(self, ds: pd.Series) -> pd.Series: return ( ds.pipe(self.pipe_calculate_boosters) .pipe(self.pipe_location) .pipe(self.pipe_vaccine) .pipe(self.pipe_source) )
[docs] def export(self): data = self.read().pipe(self.pipeline) increment( location=data["location"], total_vaccinations=data["total_vaccinations"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], total_boosters=data["total_boosters"], date=data["date"], source_url=data["source_url"], vaccine=data["vaccine"], make_series_monotonic=True, )
[docs]def main(): UnitedArabEmirates().export()