Source code for cowidev.vax.incremental.laos

import re

from bs4 import BeautifulSoup, element
import pandas as pd

from cowidev.utils import clean_count, clean_date, get_soup
from cowidev.vax.utils.incremental import increment, enrich_data


[docs]class Laos: location = "Laos" source_url = "https://www.covid19.gov.la/index.php" regex = { "dose_1": r"ຮັບວັກຊິນເຂັມທີ 1 (\d+)", "dose_2": r"ຮັບວັກຊິນເຂັມທີ 2 (\d+)", "date": r"ຂໍ້ມູນ ເວລາ .*? (\d+\/\d+\/\d+)", }
[docs] def read(self) -> pd.Series: """Reads data from source.""" soup = get_soup(self.source_url) data = self._parse_data(soup) return pd.Series(data)
[docs] def _parse_data(self, soup: BeautifulSoup) -> dict: """Gets data from the source page.""" # Extract the relevant element elem = self._get_relevant_element(soup) # Extract the text from the element text = self._get_text_from_element(elem) # Extract the metrics people_vaccinated, people_fully_vaccinated = self._parse_metrics(text) total_vaccinations = people_vaccinated + people_fully_vaccinated # Extract date date = self._parse_date(text) record = { "date": date, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "total_vaccinations": total_vaccinations, } return record
[docs] def _get_relevant_element(self, soup: BeautifulSoup) -> element.Tag: """Gets element from the soup.""" elem = soup.find("section", {"id": "aa-blog-archive"}) if not elem: raise TypeError("Website Structure Changed, please update the script") return elem
[docs] def _get_text_from_element(self, elem: element.Tag) -> str: """Gets text from element.""" return elem.text.replace("\n", " ").replace(",", "")
[docs] def _parse_metrics(self, text: str) -> tuple: """Gets metrics from the text.""" dose_1 = re.search(self.regex["dose_1"], text).group(1) dose_2 = re.search(self.regex["dose_2"], text).group(1) return clean_count(dose_1), clean_count(dose_2)
[docs] def _parse_date(self, text: str) -> str: """Gets date from the text.""" date = re.search(self.regex["date"], text).group(1) return clean_date(date, "%d/%m/%Y")
[docs] def pipe_source(self, ds: pd.Series) -> pd.Series: """Pipes source url.""" return enrich_data(ds, "source_url", self.source_url)
[docs] def pipe_location(self, ds: pd.Series) -> pd.Series: """Pipes location.""" return enrich_data(ds, "location", self.location)
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series: """Pipes vaccine names.""" return enrich_data( ds, "vaccine", "Johnson&Johnson, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinovac, Sputnik Light, Sputnik V", )
[docs] def pipeline(self, ds: pd.Series) -> pd.Series: """Pipeline for the data.""" return ds.pipe(self.pipe_source).pipe(self.pipe_location).pipe(self.pipe_vaccine)
[docs] def export(self): """Exports data to csv.""" data = self.read().pipe(self.pipeline) increment( location=data["location"], date=data["date"], vaccine=data["vaccine"], source_url=data["source_url"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], total_vaccinations=data["total_vaccinations"], )
[docs]def main(): Laos().export()