Source code for cowidev.vax.incremental.bangladesh

from multiprocessing.sharedctypes import Value
import pandas as pd

from cowidev.utils.clean import clean_count
from cowidev.utils.clean.dates import localdate
from cowidev.utils.web.scraping import get_soup
from cowidev.vax.utils.incremental import enrich_data, increment


[docs]class Bangladesh:
    location: str = "Bangladesh"
    source_url: str = "http://103.247.238.92/webportal/pages/covid19-vaccination-update.php"
    vaccines_rename = {
        "AstraZeneca": "Oxford/AstraZeneca",
        "Pfizer": "Pfizer/BioNTech",
        "Pfizer-PF (Comirnaty)": "Pfizer/BioNTech",
        "Sinopharm": "Sinopharm/Beijing",
        "Moderna": "Moderna",
        "Sinovac": "Sinovac",
        "Janssen (Johnson & Johnson)": "Johnson&Johnson",
    }

[docs]    def read(self) -> pd.Series:
        soup = get_soup(self.source_url, timeout=30)
        metrics = self._parse_metrics(soup)
        vaccines = self._parse_vaccines(soup)
        date = localdate("Asia/Dhaka")
        return pd.Series(
            data={
                **metrics,
                "date": date,
                "vaccine": vaccines,
            }
        )

[docs]    def _parse_single_doses(self):
        url = "http://103.247.238.92/webportal/pages/covid19-vaccination-johnson.php"
        soup = get_soup(url, timeout=30)
        metrics = self._parse_metrics_raw(soup, raise_err=False)
        if metrics["people_vaccinated"] != 0:
            raise ValueError("First dose for one dose vaccines should be 0!")
        return metrics["people_fully_vaccinated"]

[docs]    def _parse_metrics_raw(self, soup, raise_err=True):
        elems = soup.find_all(class_="ttip")
        has_d3 = False
        for e in elems:
            if p := e.find("p"):
                if (text := p.text.strip()) == "1st doses administered":
                    dose1 = clean_count(e.span.text)
                elif text == "2nd doses administered":
                    dose2 = clean_count(e.span.text)
                elif text == "3rd doses administered":
                    dose3 = clean_count(e.span.text)
                    has_d3 = True
        if has_d3:
            return {
                "total_vaccinations": dose1 + dose2 + dose3,
                "people_vaccinated": dose1,
                "people_fully_vaccinated": dose2,
                "total_boosters": dose3,
            }
        if raise_err:
            raise ValueError("Dose 3 data missing!")

        return {
            "total_vaccinations": dose1 + dose2,
            "people_vaccinated": dose1,
            "people_fully_vaccinated": dose2,
        }

[docs]    def _parse_metrics(self, soup):
        metrics = self._parse_metrics_raw(soup)
        single_doses = self._parse_single_doses()
        metrics["people_vaccinated"] = metrics["people_vaccinated"] + single_doses
        return metrics

[docs]    def _parse_vaccines(self, soup):
        elem = soup.find(class_="nav nav-pills")
        vaccines = {a.text.strip() for a in elem.find_all("a")}
        if vaccines_unk := vaccines.difference(set(self.vaccines_rename) | {"All Vaccine"}):
            raise ValueError(f"Unknown vaccines found {vaccines_unk}")
        return ", ".join(sorted(self.vaccines_rename.values()))

    def pipe_location(self, ds: pd.Series) -> pd.Series:
        return enrich_data(ds, "location", "Bangladesh")

[docs]    def pipe_location(self, ds: pd.Series) -> pd.Series:
        return enrich_data(ds, "location", "Bangladesh")

[docs]    def pipe_source(self, ds: pd.Series) -> pd.Series:
        return enrich_data(ds, "source_url", self.source_url)

[docs]    def pipeline(self, ds: pd.Series) -> pd.Series:
        return ds.pipe(self.pipe_location).pipe(self.pipe_source)

[docs]    def export(self):
        data = self.read().pipe(self.pipeline)
        increment(
            location=data["location"],
            total_vaccinations=data["total_vaccinations"],
            people_vaccinated=data["people_vaccinated"],
            people_fully_vaccinated=data["people_fully_vaccinated"],
            total_boosters=data["total_boosters"],
            date=data["date"],
            source_url=data["source_url"],
            vaccine=data["vaccine"],
        )


[docs]def main():
    Bangladesh().export()