import re
from bs4 import BeautifulSoup
import pandas as pd
from cowidev.utils.clean import clean_count, clean_date
from cowidev.utils.web.scraping import get_soup
from cowidev.vax.utils.incremental import enrich_data, increment
[docs]class Serbia:
def __init__(self):
self.location = "Serbia"
self.source_url = "https://vakcinacija.gov.rs/"
self.regex = {
"metrics": (
r"Број доза: ([\d\.]+)\s?(?:–|-) прва доза ([\d\.]+), друга доза ([\d\.]+), трећа доза ([\d\.]+)"
),
"date": r"ажурирано .*",
}
[docs] def read(self) -> pd.Series:
soup = get_soup(self.source_url)
(
total_vaccinations,
people_vaccinated,
people_fully_vaccinated,
total_boosters,
) = self._parse_metrics(soup)
return pd.Series(
{
"total_vaccinations": total_vaccinations,
"people_vaccinated": people_vaccinated,
"people_fully_vaccinated": people_fully_vaccinated,
"total_boosters": total_boosters,
"source_url": self.source_url,
"date": self._parse_date(soup),
}
)
[docs] def _parse_metrics(self, soup: BeautifulSoup):
match = re.search(self.regex["metrics"], soup.text)
total_vaccinations = clean_count(match.group(1))
people_vaccinated = clean_count(match.group(2))
people_fully_vaccinated = clean_count(match.group(3))
total_boosters = clean_count(match.group(4))
return total_vaccinations, people_vaccinated, people_fully_vaccinated, total_boosters
[docs] def _parse_date(self, soup: BeautifulSoup) -> str:
elems = soup.find_all("p")
x = []
for elem in elems:
if elem.find(text=re.compile(self.regex["date"])):
x.append(elem)
if len(x) > 1:
raise ValueError("Format of source has changed")
date_str = clean_date(x[0].text.strip(), "ажурирано %d.%m.%Y")
return date_str
[docs] def pipe_location(self, ds: pd.Series) -> pd.Series:
return enrich_data(ds, "location", self.location)
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series:
return enrich_data(
ds,
"vaccine",
"Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sputnik V",
)
[docs] def pipeline(self, ds: pd.Series) -> pd.Series:
return ds.pipe(self.pipe_vaccine).pipe(self.pipe_location)
[docs] def export(self):
data = self.read().pipe(self.pipeline)
increment(
location=data["location"],
total_vaccinations=data["total_vaccinations"],
people_vaccinated=data["people_vaccinated"],
people_fully_vaccinated=data["people_fully_vaccinated"],
total_boosters=data["total_boosters"],
date=data["date"],
source_url=data["source_url"],
vaccine=data["vaccine"],
)
[docs]def main():
Serbia().export()