Source code for cowidev.vax.incremental.iceland

import re
import json

import pandas as pd

from cowidev.utils import get_soup, clean_count, clean_date_series
from cowidev.vax.utils.incremental import increment
from cowidev.vax.utils.base import CountryVaxBase


VACCINE_PROTOCOLS = {
    "Pfizer": 2,
    "Moderna": 2,
    "AstraZeneca": 2,
    "Janssen": 1,
}

VACCINE_MAPPING = {
    "Pfizer/BioNTech": "Pfizer/BioNTech",
    "Moderna": "Moderna",
    "Oxford/AstraZeneca": "Oxford/AstraZeneca",
    "Janssen": "Johnson&Johnson",
}


[docs]class Iceland(CountryVaxBase): location: str = "Iceland" source_url: str = "https://e.infogram.com/c3bc3569-c86d-48a7-9d4c-377928f102bf" source_url_ref: str = "https://www.covid.is/tolulegar-upplysingar-boluefni" metric_entities: dict = { "total_vaccinations": "7287c058-7921-4abc-a667-ce298827c969", "people_vaccinated": "8d14f33a-d482-4176-af55-71209314b07b", "people_fully_vaccinated": "16a69e30-01fd-4806-920c-436f8f29e9bf", "total_boosters": "209af2de-9927-4c51-a704-ddc85e28bab9", "additional_doses": "c1286d9e-254c-434a-9455-21b94969d163", }
[docs] def read(self): soup = get_soup(self.source_url) json_data = self._get_json_data(soup) data = self._parse_data(json_data) df_manuf = self._parse_data_manufacturer(json_data) return data, df_manuf
[docs] def _parse_data_manufacturer(self, json_data): data = json_data["elements"]["content"]["content"]["entities"]["e329559c-c3cc-48e9-8b7b-1a5f87ea7ad3"][ "props" ]["chartData"]["data"][0] df = pd.DataFrame(data[1:]).reset_index(drop=True) df.columns = ["date"] + data[0][1:] return df
[docs] def _parse_data(self, json_data): data = {**self._parse_metrics(json_data), "date": self._parse_date(json_data)} return data
[docs] def _get_json_data(self, soup): for script in soup.find_all("script"): if "infographicData" in str(script): json_data = str(script).replace("<script>window.infographicData=", "").replace(";</script>", "") json_data = json.loads(json_data) break return json_data
[docs] def _parse_metrics(self, json_data): data = {} for metric, entity in self.metric_entities.items(): value = json_data["elements"]["content"]["content"]["entities"][entity]["props"]["chartData"]["data"][0][ 0 ][0] value = re.search(r'18px;">([\d\.]+)', value).group(1) value = clean_count(value) data[metric] = value return data
[docs] def _parse_date(self, json_data): date = json_data["updatedAt"][:10] return date
[docs] def pipeline_manufacturer(self, df): df = df.melt("date", var_name="vaccine", value_name="total_vaccinations") df["date"] = clean_date_series(df["date"], "%d.%m.%Y") df["total_vaccinations"] = pd.to_numeric(df["total_vaccinations"], errors="coerce").fillna(0) df["total_vaccinations"] = ( df.sort_values("date").groupby("vaccine", as_index=False)["total_vaccinations"].cumsum() ) df["location"] = "Iceland" assert set(df["vaccine"].unique()) == set( VACCINE_MAPPING.keys() ), f"Vaccines present in data: {df['vaccine'].unique()}" df = df.replace(VACCINE_MAPPING) return df
[docs] def export(self): data, df_manuf = self.read() # Main increment( location=self.location, total_vaccinations=data["total_vaccinations"], people_vaccinated=data["people_vaccinated"], people_fully_vaccinated=data["people_fully_vaccinated"], total_boosters=data["total_boosters"] + data["additional_doses"], date=data["date"], source_url=self.source_url_ref, vaccine=", ".join(sorted(VACCINE_MAPPING.values())), ) # By manufacturer df_manuf = df_manuf.pipe(self.pipeline_manufacturer).dropna(subset=["date"]) self.export_datafile( df_manufacturer=df_manuf, meta_manufacturer={ "source_name": "Ministry of Health", "source_url": self.source_url, }, )
[docs]def main(): Iceland().export()