Source code for cowidev.testing.batch.hong_kong

import pandas as pd

from cowidev.utils.clean import clean_date_series
from cowidev.testing import CountryTestBase


[docs]class HongKong(CountryTestBase): location = "Hong Kong" units = "tests performed" source_label = "Department of Health" source_url = "http://www.chp.gov.hk/files/misc/statistics_on_covid_19_testing_cumulative.csv" source_url_ref = "http://www.chp.gov.hk/files/misc/statistics_on_covid_19_testing_cumulative.csv" rename_columns = { "日期由 From Date": "from", "日期至 To Date": "Date", "檢測數字 Number of tests": "t1", "特定群組檢測計劃下的檢測數目 Number of tests under Target Group Testing Scheme": "t2", "普及社區檢測計劃下的檢測數目 Number of tests under Universal Community Testing Programme": "t3", "臨時檢測中心的檢測數目 Number of tests in Temporary Testing Centres": "t4", "社區檢測中心的檢測數目 Number of tests in Community Testing Centres": "t5", }
[docs] def read(self) -> pd.DataFrame: return pd.read_csv(self.source_url)
[docs] def _load_cases(self): url = "http://www.chp.gov.hk/files/misc/latest_situation_of_reported_cases_covid_19_eng.csv" df = pd.read_csv( url, usecols=[ "As of date", "Number of confirmed cases", "Number of cases tested positive for SARS-CoV-2 virus by nucleic acid tests", "Number of cases tested positive for SARS-CoV-2 virus by rapid antigen tests", ], ) df["Number of cases tested positive for SARS-CoV-2 virus"] = df[ "Number of cases tested positive for SARS-CoV-2 virus by nucleic acid tests" ] + df["Number of cases tested positive for SARS-CoV-2 virus by rapid antigen tests"].fillna(0) df["Number of confirmed cases"] = df["Number of confirmed cases"].fillna( df["Number of cases tested positive for SARS-CoV-2 virus"] ) return df.assign(Date=clean_date_series(df["As of date"], "%d/%m/%Y"))
[docs] def pipe_row_sum(self, df): return df.assign(change=df[["t1", "t2", "t3", "t4", "t5"]].sum(axis=1))
[docs] def pipe_date(self, df): return df.assign(Date=clean_date_series(df["Date"], "%d/%m/%Y"))
[docs] def pipe_metrics(self, df): df = df.groupby("Date", as_index=False).change.sum() df = df.sort_values("Date") df = df.assign(**{"Cumulative total": df.change.cumsum()}) return df
[docs] def pipe_pr(self, df: pd.DataFrame) -> pd.DataFrame: cases = self._load_cases() df = df.merge(cases, on="Date") df = df.sort_values("Date") cases_over_period = df["Number of confirmed cases"].diff() tests_over_period = df["Cumulative total"].diff() return df.assign(**{"Positive rate": (cases_over_period / tests_over_period).round(5)})
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: return ( df.pipe(self.pipe_rename_columns) .pipe(self.pipe_row_sum) .pipe(self.pipe_date) .pipe(self.pipe_metrics) .pipe(self.pipe_pr) .pipe(self.pipe_metadata) )
[docs] def export(self): df = self.read().pipe(self.pipeline) self.export_datafile(df, float_format="%.5f")
[docs]def main(): HongKong().export()