Source code for cowidev.testing.incremental.ukraine

import re
from datetime import date

from bs4 import BeautifulSoup, element
import pandas as pd

from cowidev.utils.web import get_soup
from cowidev.utils.clean import clean_count, clean_date
from cowidev.testing.utils.incremental import increment


[docs]class Ukraine: location = "Ukraine" units = "tests performed" source_label = "Cabinet of Ministers of Ukraine" notes = "" source_url = "https://covid19.gov.ua/en" regex = { "count": r"total of tests", "date": r"Information as of (\w+) (\d{1,2})", }
[docs] def read(self) -> pd.Series: """Read data from source.""" soup = get_soup(self.source_url) data = self._parse_data(soup) return pd.Series(data)
[docs] def _parse_data(self, soup: BeautifulSoup) -> tuple: """Get data from the source page.""" # Get relevant element elem = self._get_relevant_element(soup) # Extract date from soup date_ = self._parse_date(soup) # parse metrics from element count = self._parse_metrics(elem) record = { "source_url": self.source_url, "date": date_, "count": count, } return record
[docs] def _get_relevant_element(self, soup: BeautifulSoup) -> element.Tag: """Get the relevant element in news feed.""" elem = soup.find(text=re.compile(self.regex["count"])).parent.find_next_sibling(class_="field-value") if not elem: raise TypeError("Website Structure Changed, please update the script") return elem
[docs] def _parse_metrics(self, elem: element.Tag) -> int: """Gets metrics from the element.""" count = elem.text.strip().replace(" ", "") return clean_count(count)
[docs] def _parse_date(self, soup: BeautifulSoup) -> str: """Gets date from the source page.""" year = date.today().year text = soup.find(text=re.compile(self.regex["date"])) month, day = re.search(self.regex["date"], text).group(1, 2) return clean_date(f"{year} {month} {day}", "%Y %B %d")
[docs] def export(self): """Export data to csv.""" data = self.read() increment( sheet_name=self.location, country=self.location, units=self.units, date=data["date"], source_url=data["source_url"], source_label=self.source_label, count=data["count"], )
[docs]def main(): Ukraine().export()