import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from cowidev.utils.clean import clean_count, clean_date
from cowidev.vax.utils.incremental import increment, enrich_data
from cowidev.vax.utils.base import CountryVaxBase
[docs]class Kazakhstan(CountryVaxBase):
location = "Kazakhstan"
source_url = "https://www.coronavirus2020.kz/"
[docs] def read(self) -> pd.Series:
op = Options()
op.add_argument("--headless")
with webdriver.Chrome(options=op) as driver:
driver.get(self.source_url)
people_vaccinated, people_fully_vaccinated = self._parse_vaccinations(driver)
date = self._parse_date(driver)
data = {
"people_vaccinated": people_vaccinated,
"people_fully_vaccinated": people_fully_vaccinated,
"date": date,
}
try:
total_boosters = self._parse_boosters(driver)
except IndexError:
pass
else:
data["total_boosters"] = total_boosters
return pd.Series(data)
[docs] def _parse_vaccinations(self, driver: webdriver.Chrome) -> tuple:
people_vaccinated = clean_count(driver.find_element_by_id("vaccinated_1").text)
people_fully_vaccinated = clean_count(driver.find_element_by_id("vaccinated_2").text)
return people_vaccinated, people_fully_vaccinated
[docs] def _parse_boosters(self, driver: webdriver.Chrome) -> tuple:
elems = driver.find_elements_by_class_name("number_revac_info")
elem = [e for e in elems if "Всего" in e.find_element_by_xpath("..").text][0]
total_boosters = clean_count(elem.text)
return total_boosters
[docs] def _parse_date(self, driver: webdriver.Chrome) -> str:
elem = driver.find_element_by_class_name("tabl_vactination")
date_str_raw = pd.read_html(elem.get_attribute("innerHTML"))[0].iloc[-1, -1]
return clean_date(date_str_raw, "*данные на %d.%m.%Y")
[docs] def pipe_vaccine(self, ds: pd.Series):
return enrich_data(ds, "vaccine", "QazVac, Sinopharm/Beijing, Sputnik V")
[docs] def pipe_metrics(self, ds: pd.Series):
if "total_boosters" in ds:
total_vaccintations = ds["people_vaccinated"] + ds["people_fully_vaccinated"] + ds["total_boosters"]
return enrich_data(ds, "total_vaccinations", total_vaccintations)
return ds
[docs] def pipe_to_frame(self, ds: pd.Series):
return ds.to_frame().T
[docs] def pipeline(self, ds: pd.Series) -> pd.Series:
df = ds.pipe(self.pipe_metadata).pipe(self.pipe_vaccine).pipe(self.pipe_metrics).pipe(self.pipe_to_frame)
# df = add_latest_who_values(df, "Kazakhstan", ["total_vaccinations"])
return df
[docs] def export(self):
df = self.read().pipe(self.pipeline)
self.export_datafile(df, attach=True)
[docs]def main():
Kazakhstan().export()