Source code for cowidev.testing.incremental.palau

import re
import tempfile
from cowidev.utils.web.scraping import get_soup
import pandas as pd
from bs4 import BeautifulSoup


from pdfminer.high_level import extract_text
from cowidev.utils.web.download import download_file_from_url
from cowidev.utils.clean import clean_date, clean_count

from cowidev.testing.utils.incremental import increment
from cowidev.testing import CountryTestBase


[docs]class Palau(CountryTestBase): location: str = "Palau" units: str = "tests performed" source_label: str = "Ministry of Health and Human Services" source_url: str = "http://www.palauhealth.org/" source_url_ref: str = "" regex: dict = {"date": r"(\d{1,2} \w+ 20\d{2})", "count": r"((\d+),(\d+))COVID-19 Testsconducted \(since"}
[docs] def read(self) -> pd.Series: data = [] soup = get_soup(self.source_url) data = self._parse_data(soup) return pd.Series(data)
[docs] def _parse_data(self, soup: BeautifulSoup) -> tuple: """Gets pdf url""" sd = soup.find("a", id="HyperLink21")["href"] pdf_url = f"{self.source_url}/{sd}" """Parses pdf text""" with tempfile.NamedTemporaryFile() as tmp: download_file_from_url(pdf_url, tmp.name) with open(tmp.name, "rb") as f: text = extract_text(f) text = text.replace("\n", "") """Gets metrics from report text""" count, date = self._parse_metrics(text) record = { "count": count, "date": date, } return record, False
[docs] def _parse_metrics(self, text: str) -> tuple: """Get metrics from report text.""" count = clean_count(re.search(self.regex["count"], text).group(1)) date = clean_date(re.search(self.regex["date"], text).group(1), "%d %B %Y") return count, date
[docs] def export(self): data = self.read()[0] increment( count=data["count"], sheet_name=self.location, country=self.location, units=self.units, date=data["date"], source_url=self.source_url, source_label=self.source_label, )
[docs]def main(): Palau().export()