Source code for cowidev.vax.incremental.united_arab_emirates

import re

import pandas as pd

from cowidev.utils.clean import clean_count, extract_clean_date
from cowidev.utils.web.scraping import get_driver
from cowidev.vax.utils.incremental import enrich_data, increment


[docs]class UnitedArabEmirates:
    def __init__(self) -> None:
        self.location = "United Arab Emirates"
        self.source_url = "https://fcsc.gov.ae/en-us/Pages/Covid19/UAE-Covid-19-Updates.aspx"

[docs]    def read(self) -> pd.Series:
        return self._parse_data()

[docs]    def _parse_data(self) -> pd.Series:
        with get_driver() as driver:
            driver.get(self.source_url)
            elem = driver.find_element_by_class_name("total_vaccination")
            total_vaccinations = self._parse_total_vaccinations(elem)
            population = self._estimate_population(elem, total_vaccinations)
            return pd.Series(
                {
                    "total_vaccinations": total_vaccinations,
                    "people_vaccinated": self._parse_people_vaccinated(elem, population),
                    "people_fully_vaccinated": self._parse_people_fully_vaccinated(elem, population),
                    "date": self._parse_date(driver),
                }
            )

[docs]    def _parse_total_vaccinations(self, elem) -> pd.Series:
        text_total = elem.find_element_by_class_name("numbers").text
        regex_total = r"Total: ([\d\,]+)"
        total_vaccinations = clean_count(re.search(regex_total, text_total).group(1))
        return total_vaccinations

[docs]    def _estimate_population(self, elem, total_vaccinations) -> pd.Series:
        regex = r"([\d\.]+) per 100 people"
        share_total = self._parse_relative_metric(elem, "percentage", regex)
        return total_vaccinations / share_total

[docs]    def _parse_people_vaccinated(self, elem, population) -> pd.Series:
        regex = r"Percentage of population who received one dose \(of COVID-19 vaccine\)\s{1,2}([\d\.]+)%"
        share_vaccinated = self._parse_relative_metric(elem, "dose1pct", regex)
        return round(share_vaccinated * population) if share_vaccinated else None

[docs]    def _parse_people_fully_vaccinated(self, elem, population) -> pd.Series:
        regex = r"Percentage of population fully vaccinated \(against COVID-19\)\s{1,2}([\d\.]+)%"
        share_fully_vaccinated = self._parse_relative_metric(elem, "fullyVaccintedpct", regex)
        return round(share_fully_vaccinated * population) if share_fully_vaccinated else None

[docs]    def _parse_relative_metric(self, elem, class_name: str, regex: str):
        try:
            text = elem.find_element_by_class_name(class_name).text
            metric = float(re.search(regex, text).group(1)) / 100
            return metric
        except:
            return None

[docs]    def _parse_date(self, driver) -> pd.Series:
        text_date = driver.find_element_by_class_name("full_data_set").text
        regex_date = r"Time period: 29 January 2020 - (\d{2} [a-zA-Z]+ 202\d)"
        return extract_clean_date(text_date, regex_date, "%d %B %Y", lang="en")

[docs]    def pipe_calculate_boosters(self, ds: pd.Series) -> pd.Series:
        total_boosters = (
            ds.total_vaccinations - ds.people_vaccinated - ds.people_fully_vaccinated
            if ds.people_vaccinated and ds.people_fully_vaccinated
            else None
        )
        return enrich_data(ds, "total_boosters", total_boosters)

[docs]    def pipe_location(self, ds: pd.Series) -> pd.Series:
        return enrich_data(ds, "location", self.location)

[docs]    def pipe_vaccine(self, ds: pd.Series) -> pd.Series:
        return enrich_data(
            ds,
            "vaccine",
            "Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinopharm/Wuhan, Sputnik V",
        )

[docs]    def pipe_source(self, ds: pd.Series) -> pd.Series:
        return enrich_data(ds, "source_url", self.source_url)

[docs]    def pipeline(self, ds: pd.Series) -> pd.Series:
        return (
            ds.pipe(self.pipe_calculate_boosters)
            .pipe(self.pipe_location)
            .pipe(self.pipe_vaccine)
            .pipe(self.pipe_source)
        )

[docs]    def export(self):
        data = self.read().pipe(self.pipeline)
        increment(
            location=data["location"],
            total_vaccinations=data["total_vaccinations"],
            people_vaccinated=data["people_vaccinated"],
            people_fully_vaccinated=data["people_fully_vaccinated"],
            total_boosters=data["total_boosters"],
            date=data["date"],
            source_url=data["source_url"],
            vaccine=data["vaccine"],
            make_series_monotonic=True,
        )


[docs]def main():
    UnitedArabEmirates().export()