import re
from datetime import datetime
from bs4 import BeautifulSoup
import pandas as pd
from cowidev.utils import clean_count, clean_date, get_soup
from cowidev.utils.clean.dates import localdate
from cowidev.vax.utils.incremental import increment, enrich_data
[docs]class Cuba:
def __init__(self):
self.source_url = "https://salud.msp.gob.cu/actualizacion-de-la-vacunacion-en-el-marco-de-los-estudios-de-los-candidatos-vacunales-cubanos-y-la-intervencion-sanitaria/"
self.location = "Cuba"
self.regex = {
"title": (
r"Al cierre del (\d{1,2}(?:ro)? de [a-z]+)\s+se acumulan en el país ([\d\s]+) (?:de )?dosis"
r" administradas"
),
"people_vaccinated": r"al menos una dosis [^\.]+, ([\d ]+) personas",
"people_fully_vaccinated": r"Tienen esquema de vacunación completo ([\d ]+) personas",
"total_boosters": r"Cuentan con dosis de refuerzo un total de ([\d ]+) personas",
}
[docs] def read(self) -> pd.Series:
soup = get_soup(self.source_url)
return self._parse_data(soup)
[docs] def _parse_data(self, soup: BeautifulSoup) -> pd.Series:
return pd.Series(
data={
"date": self._parse_date(soup),
**self._parse_metrics(soup),
}
)
[docs] def _parse_date(self, soup):
match = re.search(self.regex["title"], soup.text)
date_str = match.group(1)
date = clean_date(f"{date_str} {datetime.now().year}", "%d de %B %Y", lang="es")
if date > localdate("America/Havana", force_today=True):
date = clean_date(f"{date_str} {datetime.now().year-1}", "%d de %b %Y", lang="es")
return date
[docs] def _parse_metrics(self, soup):
match = re.search(self.regex["title"], soup.text)
data = {"total_vaccinations": clean_count(match.group(2))}
for metric in ["people_vaccinated", "people_fully_vaccinated", "total_boosters"]:
match = re.search(self.regex[metric], soup.text)
data[metric] = clean_count(match.group(1))
return data
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series:
return enrich_data(ds, "vaccine", "Abdala, Soberana02, Soberana Plus")
[docs] def pipeline(self, df: pd.Series) -> pd.Series:
return df.pipe(self.pipe_vaccine)
[docs] def export(self):
data = self.read().pipe(self.pipeline)
increment(
location=self.location,
total_vaccinations=data["total_vaccinations"],
people_vaccinated=data["people_vaccinated"],
people_fully_vaccinated=data["people_fully_vaccinated"],
total_boosters=data["total_boosters"],
date=data["date"],
source_url=self.source_url,
vaccine=data["vaccine"],
)
[docs]def main():
Cuba().export()