import json
from bs4 import BeautifulSoup
import pandas as pd
from cowidev.utils import clean_date, clean_count, get_soup
from cowidev.vax.utils.incremental import increment, enrich_data
from cowidev.vax.utils.utils import add_latest_who_values
from cowidev.vax.utils.base import CountryVaxBase
[docs]class Philippines(CountryVaxBase):
location: str = "Philippines"
source_url: str = "https://e.infogram.com/_/yFVE69R1WlSdqY3aCsBF"
source_url_ref: str = (
"https://news.abs-cbn.com/spotlight/multimedia/infographic/03/23/21/philippines-covid-19-vaccine-tracker"
)
metric_entities: dict = {
"total_vaccinations": "4b9e949e-2990-4349-aa85-5aff8501068a",
"people_vaccinated": "32ae0a31-293e-48ea-91cf-e4518496d6bdc9fe1875-6600-4e45-ae6d-a48d9b8a1eae",
"people_fully_vaccinated": "a4c3cd88-85f7-44ea-b48f-1c97618f1e48",
"total_boosters": "2c3bf26f-5d71-4793-b6de-4f6b0f1735626ba8b43e-d7c0-4f38-91ff-61d7d8770432",
# "total_boosters_2": "1d6e2083-6212-429f-8599-109454eaef84a586833a-32b3-43c9-ac61-4d3703c816e8",
}
date_entity: str = "01ff1d02-e027-4eee-9de1-5e19f7fdd5e8"
[docs] def read(self) -> pd.Series:
"""Reada data from source"""
soup = get_soup(self.source_url)
json_data = self._get_json_data(soup)
data = self._parse_data(json_data)
return pd.Series(data)
[docs] def _print_entitiy_ids(self):
# For debugging whenever IDs change
soup = get_soup(self.source_url)
json_data = self._get_json_data(soup)
entities = json_data["elements"]["content"]["content"]["entities"]
for k, v in entities.items():
vv = v["props"]
if "content" in vv:
print(k, vv["content"]["blocks"][0]["text"])
[docs] def _parse_data(self, json_data: dict) -> dict:
"""Parses data from JSON"""
data = {**self._parse_metrics(json_data), "date": self._parse_date(json_data)}
return data
[docs] def _get_json_data(self, soup: BeautifulSoup) -> dict:
"""Gets JSON from Soup"""
for script in soup.find_all("script"):
if "infographicData" in str(script):
json_data = str(script).replace("<script>window.infographicData=", "").replace(";</script>", "")
json_data = json.loads(json_data)
break
return json_data
[docs] def _parse_metrics(self, json_data: dict) -> dict:
"""Parses metrics from JSON"""
data = {}
for metric, entity in self.metric_entities.items():
value = json_data["elements"]["content"]["content"]["entities"][entity]["props"]["content"]["blocks"][0][
"text"
]
value = clean_count(value)
data[metric] = value
return data
[docs] def _parse_date(self, json_data: dict) -> str:
"""Parses date from JSON"""
value = json_data["elements"]["content"]["content"]["entities"][self.date_entity]["props"]["content"][
"blocks"
][0]["text"]
date = clean_date(value.lower(), "as of %B %d, %Y")
return date
[docs] def pipe_location(self, ds: pd.Series) -> pd.Series:
"""Pipes location"""
return enrich_data(ds, "location", self.location)
[docs] def pipe_vaccine(self, ds: pd.Series) -> pd.Series:
"""Pipes vaccine names"""
return enrich_data(
ds,
"vaccine",
"Johnson&Johnson, Moderna, Oxford/AstraZeneca, Pfizer/BioNTech, Sinopharm/Beijing, Sinovac, Sputnik Light,"
" Sputnik V",
)
[docs] def pipe_source(self, ds: pd.Series) -> pd.Series:
"""Pipes source url"""
return enrich_data(
ds,
"source_url",
self.source_url_ref,
)
[docs] def pipe_boosters(self, ds: pd.Series) -> pd.Series:
"""Pipes source url"""
return ds
# return enrich_data(
# ds,
# "total_boosters",
# ds.loc["total_boosters_1"] + ds.loc["total_boosters_2"],
# )
[docs] def pipeline(self, ds: pd.Series) -> pd.Series:
"""Pipeline for data"""
df = ds.pipe(self.pipe_location).pipe(self.pipe_vaccine).pipe(self.pipe_source).pipe(self.pipe_boosters)
df = add_latest_who_values(df, "Philippines", ["total_vaccinations", "people_vaccinated"])
return df
[docs] def export(self):
"""Exports data to CSV"""
df = self.read().pipe(self.pipeline)
self.export_datafile(df, attach=True)
[docs]def main():
Philippines().export()
if __name__ == "__main__":
main()