Source code for cowidev.vax.batch.indonesia

import requests

import pandas as pd
from tableauscraper import TableauScraper as TS

from cowidev.vax.utils.utils import build_vaccine_timeline
from cowidev.vax.utils.base import CountryVaxBase
from cowidev.vax.utils.files import load_data
from cowidev.vax.utils.utils import make_monotonic


[docs]class Indonesia(CountryVaxBase): location = "Indonesia" source_url_ref = "https://data.covid19.go.id/public/index.html" source_url = "https://data.covid19.go.id/public/api/pemeriksaan-vaksinasi.json"
[docs] def read(self) -> pd.DataFrame: data = requests.get(self.source_url).json() assert set(data["vaksinasi"]["harian"][-1].keys()) == { "key_as_string", "key", "doc_count", "jumlah_vaksinasi_2", "jumlah_vaksinasi_1", "jumlah_jumlah_vaksinasi_1_kum", "jumlah_jumlah_vaksinasi_2_kum", }, f'New columns found! Check {data["vaksinasi"]["harian"][-1].keys()}' records = [ { "date": record["key_as_string"], "dose_1": record["jumlah_jumlah_vaksinasi_1_kum"]["value"], "dose_2": record["jumlah_jumlah_vaksinasi_2_kum"]["value"], } for record in data["vaksinasi"]["harian"] ] df = pd.DataFrame(records) return df
[docs] def pipe_metadata(self, df: pd.DataFrame) -> pd.DataFrame: return df.assign(location=self.location, source_url=self.source_url_ref)
[docs] def pipe_vaccine(self, df: pd.DataFrame) -> pd.DataFrame: return df.pipe( build_vaccine_timeline, { "Sinovac": "2020-12-01", "Oxford/AstraZeneca": "2021-03-22", "Sinopharm/Beijing": "2021-05-18", "Moderna": "2021-07-17", "Pfizer/BioNTech": "2021-08-29", "Johnson&Johnson": "2021-09-11", "Novavax": "2021-11-27", }, )
[docs] def pipe_merge_legacy(self, df: pd.DataFrame) -> pd.DataFrame: df_legacy = load_data(f"{self.location.lower()}-legacy") # df_legacy = df_legacy[~df_legacy.date.isin(df.date)] df = df[df.date > (df_legacy.date.max())] return pd.concat([df, df_legacy]).sort_values("date")
[docs] def pipe_metrics(self, df: pd.DataFrame) -> pd.DataFrame: df = df.assign( people_vaccinated=df["dose_1"], total_vaccinations=df["dose_1"] + df["dose_2"], # single-shot data is missing, but the proportion of # J&J is very small, so it's an acceptable approximation # (see https://github.com/owid/covid-19-data/issues/2323#issuecomment-1031114133) people_fully_vaccinated=df["dose_2"], ) df.loc[df.date >= "2022-01-01", "total_vaccinations"] = pd.NA # booster data missing return df
[docs] def pipe_add_latest_boosters(self, df: pd.DataFrame) -> pd.DataFrame: ts = TS() ts.loads("https://public.tableau.com/views/DashboardVaksinKemkes/TotalVaksinasiDosis1") first_doses = ts.getWorkbook().worksheets[0].data["SUM(Divaksin 1)-alias"].values[0] ts.loads("https://public.tableau.com/views/DashboardVaksinKemkes/TotalVaksinasiDosis2") second_doses = ts.getWorkbook().worksheets[0].data["SUM(Divaksin 2)-alias"].values[0] ts.loads("https://public.tableau.com/views/DashboardVaksinKemkes/TotalVaksinasiDosis3") boosters = ts.getWorkbook().worksheets[0].data["SUM(Divaksin 3)-alias"].values[0] df.loc[df.date == df.date.max(), "total_boosters"] = boosters df.loc[df.date == df.date.max(), "total_vaccinations"] = first_doses + second_doses + boosters return df
[docs] def pipe_merge_current(self, df: pd.DataFrame, df_current: pd.DataFrame) -> pd.DataFrame: df = df.merge(df_current, on="date", how="left") df = df.assign( total_vaccinations=df.total_vaccinations.fillna(df.total_vaccinations_current), total_boosters=df.total_boosters.fillna(df.total_boosters_current), ) return df
[docs] def pipeline(self, df: pd.DataFrame, df_current: pd.DataFrame) -> pd.DataFrame: return ( df.pipe(self.pipe_metadata) .pipe(self.pipe_metrics) .pipe(self.pipe_add_latest_boosters) .pipe(make_monotonic) .pipe(self.pipe_merge_legacy) .pipe(self.pipe_merge_current, df_current) .pipe(self.pipe_vaccine)[ [ "location", "date", "vaccine", "source_url", "total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters", ] ] )
[docs] def read_current(self): return pd.read_csv(self.output_path, usecols=["date", "total_boosters", "total_vaccinations"]).rename( columns={"total_boosters": "total_boosters_current", "total_vaccinations": "total_vaccinations_current"} )
[docs] def export(self): # Read current df_current = self.read_current() df = self.read().pipe(self.pipeline, df_current) df.to_csv(self.output_path, index=False)
[docs]def main(): Indonesia().export()