Source code for cowidev.vax.incremental.el_salvador

import json

import pandas as pd
from bs4 import BeautifulSoup

from cowidev.utils.clean import clean_count, extract_clean_date
from cowidev.utils.web.scraping import get_soup
from cowidev.vax.utils.incremental import enrich_data, increment


[docs]class ElSalvador: location: str = "El Salvador" source_url: str = "https://covid19.gob.sv/"
[docs] def read(self) -> pd.Series: soup = get_soup(self.source_url) link = self.parse_infogram_link(soup) # print(link) soup = get_soup(link) infogram_data = self.parse_infogram_data(soup) return pd.Series( { "date": self.parse_infogram_date(infogram_data), "source_url": self.source_url, "total_vaccinations": self._parse_total_vaccinations(infogram_data), "people_vaccinated": self._parse_people_vaccinated(infogram_data), "people_fully_vaccinated": self._parse_people_fully_vaccinated(infogram_data), "total_boosters": self._parse_boosters(infogram_data), } )
[docs] def parse_infogram_data(self, soup: BeautifulSoup) -> dict: for script in soup.find_all("script"): if "infographicData" in str(script): json_data = script.string[:-1].replace("window.infographicData=", "") json_data = json.loads(json_data) break json_data = json_data["elements"]["content"]["content"]["entities"] return json_data
[docs] def _get_infogram_value(self, infogram_data: dict, field_id: str, join_text: bool = False): if join_text: return "".join(x["text"] for x in infogram_data[field_id]["props"]["content"]["blocks"]) return infogram_data[field_id]["props"]["content"]["blocks"][0]["text"]
[docs] def _parse_total_vaccinations(self, infogram_data: dict) -> int: return clean_count( self._get_infogram_value( infogram_data, "33f6ba96-fffd-4a95-a7ac-b65aa9f808c35b18e4d2-a9b9-4383-a4c4-7aea197c5322" ) )
[docs] def _parse_people_vaccinated(self, infogram_data: dict) -> int: ppl_vaxed = clean_count( self._get_infogram_value( infogram_data, "4275cc3f-7ae8-4af3-9c5a-ef94203d47d71c0589f5-607d-4eb2-86b0-b24303e5455a" ) ) ppl_vaxed_for = clean_count( self._get_infogram_value( infogram_data, "8a007cb6-7384-4af1-9f92-c41699d77aab1ed244af-7cfe-47be-972f-210df044e204" ) ) return ppl_vaxed + ppl_vaxed_for
[docs] def _parse_people_fully_vaccinated(self, infogram_data: dict) -> int: ppl_fully_vaxed = clean_count( self._get_infogram_value( infogram_data, "7b45d34f-b8d0-47d7-8c3a-35c89a4d4cdfbeb30c31-b4de-45fc-bdc5-78e39d37039b" ) ) ppl_fully_vaxed_for = clean_count( self._get_infogram_value( infogram_data, "9eee1f41-c398-4a15-81aa-2588250e53cbbc920ff4-fff6-4ef4-a174-af1915c4b1a7" ) ) return ppl_fully_vaxed + ppl_fully_vaxed_for
[docs] def _parse_boosters(self, infogram_data: dict) -> int: third_dose = clean_count( self._get_infogram_value( infogram_data, "296d4de1-bd72-44d9-bca2-2fdff2477c2c463a2de2-f70e-4b18-aee0-43f01852970f" ) ) third_dose_for = clean_count( self._get_infogram_value( infogram_data, "de709d4d-bac0-4a75-84bc-1d1d1104169b3df9b985-04fb-48fc-b6b0-7ce028ab9aa1" ) ) forth_dose = clean_count( self._get_infogram_value( infogram_data, "2fbd1738-f9c3-49ad-8855-c933c83abc185dc2112e-d914-4551-8da9-2466a4916ca2" ) ) forth_dose_for = clean_count( self._get_infogram_value( infogram_data, "20d53fe7-91f8-4778-a13f-c938f18dd8fe92d01672-d40e-4099-96ff-9d125c46f748" ) ) return third_dose + third_dose_for + forth_dose + forth_dose_for
[docs] def parse_infogram_date(self, infogram_data: dict) -> str: field_id = "d58d673d-f6f7-44d2-8825-8f83ea806a695bbc73d2-f5d6-493e-890b-b7499db493a2" x = self._get_infogram_value(infogram_data, field_id, join_text=True) dt = extract_clean_date(x, "RESUMEN DE VACUNACIÓN\s?(\d+-[A-Z]+-2\d)\s?", "%d-%b-%y", lang="es") return dt
[docs] def pipe_location(self, ds: pd.Series) -> pd.Series: return enrich_data(ds, "location", "El Salvador")
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series: return enrich_data(ds, "vaccine", "Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinovac")
[docs] def pipeline(self, ds: pd.Series) -> pd.Series: return ds.pipe(self.pipe_location).pipe(self.pipe_vaccine)
[docs] def export(self): data = self.read().pipe(self.pipeline) increment( location=data["location"], total_vaccinations=data["total_vaccinations"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], total_boosters=data["total_boosters"], date=data["date"], source_url=data["source_url"], vaccine=data["vaccine"], )
[docs]def main(): ElSalvador().export()