Source code for cowidev.vax.incremental.macao

import time

import tabula

from cowidev.utils.clean import clean_count, extract_clean_date
from cowidev.utils.web import get_driver
from cowidev.vax.utils.incremental import increment


[docs]class Macao: source_url = "https://www.ssm.gov.mo/apps1/covid19vaccine/en.aspx" location = "Macao"
[docs] def read(self): """Create data.""" with get_driver() as driver: driver.get(self.source_url) time.sleep(5) # Get element elem = driver.find_element_by_partial_link_text("Weekly Bulletin on COVID-19") # Build data data = self._parse_data(elem) return data
[docs] def _parse_pdf_table(self, url): """Extract table""" dfs = tabula.read_pdf(url) df = dfs[0] # Checks data cols = ["Unnamed: 0", "Unnamed: 1", "滅活疫苗", "Unnamed: 2", "其他種類疫苗", "混合種類", "Unnamed: 3"] # , "Unnamed: 4"] if df.shape[1] != 7: raise ValueError("New columns added!") if not (df.columns == cols).all(): raise ValueError("Source data columns changed!") df = df.set_index("Unnamed: 0") return df
[docs] def _parse_date(self, element): """Get data from report file title.""" r = r".* \(Last updated: (\d\d\/\d\d\/20\d\d) .*\)" return extract_clean_date(element.text, r, "%d/%m/%Y")
[docs] def _parse_data(self, element): # Obtain pdf url url = element.get_property("href") # Obtain date from element date = self._parse_date(element) # Extract table data df = self._parse_pdf_table(url) # try: total_vaccinations = clean_count(df.loc["Total de doses administradas", "Unnamed: 3"]) people_vaccinated = clean_count(df.loc["N o Pessoas inoculadas com pelo menos uma", "Unnamed: 3"]) people_only_2_doses = clean_count(df.loc["N.o de pessoas vacinadas com a 2a dose", "Unnamed: 3"]) people_only_3_doses = clean_count(df.loc["N.o de pessoas vacinadas com a 3a dose", "Unnamed: 3"]) people_only_4_doses = clean_count(df.loc["N.o de pessoas vacinadas com a 4a dose", "Unnamed: 3"]) # except Exception as e: # print(e) # print(df.index) data = { "total_vaccinations": total_vaccinations, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_only_2_doses + people_only_3_doses + people_only_4_doses, "total_boosters": people_only_3_doses + people_only_4_doses, "source_url": url, "date": date, } return data
[docs] def export(self): data = self.read() increment( location=self.location, total_vaccinations=data["total_vaccinations"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], total_boosters=data["total_boosters"], date=data["date"], source_url=data["source_url"], vaccine="Pfizer/BioNTech, Sinopharm/Beijing", )
[docs]def main(): Macao().export()