Source code for cowidev.testing.incremental.pakistan

from bs4 import BeautifulSoup, element
import pandas as pd

from cowidev.utils.web import get_soup
from cowidev.utils.clean import clean_count, extract_clean_date
from cowidev.testing.utils.incremental import increment


[docs]class Pakistan:
    location = "Pakistan"
    units = "tests performed"
    source_label = "Government of Pakistan"
    notes = ""
    source_url = "http://www.covid.gov.pk/"
    regex = {
        "header": "Pakistan statistics ",
        "count": r"Total Tests",
        "date": r"(\d+ \w+, \d+)",
    }

[docs]    def read(self) -> pd.Series:
        """Read data from source."""
        soup = get_soup(self.source_url)
        data = self._parse_data(soup)
        return pd.Series(data)

[docs]    def _parse_data(self, soup: BeautifulSoup) -> dict:
        """Get data from the source page."""
        # Get relevant element
        elem = self._get_relevant_element(soup)
        if not elem:
            raise TypeError("Website Structure Changed, please update the script")
        # Extract metrics from relevant element
        count = self._parse_metrics(elem)
        # Extract date from soup
        date = self._parse_date_from_soup(soup)

        record = {
            "source_url": self.source_url,
            "date": date,
            "count": count,
        }
        return record

[docs]    def _get_relevant_element(self, soup: BeautifulSoup) -> element.Tag:
        """Get the relevant element from soup."""
        elem = soup.find(text=self.regex["count"]).parent.parent
        return elem

[docs]    def _parse_metrics(self, elem: element.Tag) -> int:
        """Get metrics from element."""
        count = elem.span.text
        return clean_count(count)

[docs]    def _parse_date_from_soup(self, soup: BeautifulSoup) -> str:
        """Get date from soup."""
        date_text = soup.find(text=self.regex["header"]).parent.findChild(id="last-update")
        return extract_clean_date(date_text.text, self.regex["date"], "%d %b, %Y")

[docs]    def export(self):
        """Export data to csv."""
        data = self.read()
        increment(
            sheet_name=self.location,
            country=self.location,
            units=self.units,
            date=data["date"],
            source_url=data["source_url"],
            source_label=self.source_label,
            count=data["count"],
        )


[docs]def main():
    Pakistan().export()