Source code for cowidev.testing.batch.south_korea

from distutils.command.clean import clean
import pandas as pd

from cowidev.testing import CountryTestBase
from cowidev.utils import clean_date_series, clean_count


[docs]class SouthKorea(CountryTestBase): location = "South Korea" units = "people tested" source_url_ref = "https://sites.google.com/view/snuaric/data-service/covid-19/covid-19-data" source_label = "Korea Disease Control and prevention Agency" notes = "Data made available by Asia Regional Information Center at Seoul National University"
[docs] def _read_old(self): ## data < 2020-12-18; 'Number of suspicious report testing' (의심신고 검사자 수) df = pd.read_csv( "https://docs.google.com/spreadsheets/d/10c9jNi8VnV0YYCfV_7AZrzBY5l18dOFHEJMIJsP4THI/export?format=csv&gid=334130338", usecols=["DATE", "TOTAL_TEST"], ) # Daily change and clean df = df[pd.to_numeric(df["TOTAL_TEST"], errors="coerce").notnull()] df = df.assign(Date=clean_date_series(df["DATE"], "%Y-%m-%d")) df["Daily change in cumulative total"] = df["TOTAL_TEST"].astype("int32").diff(periods=-1) df["TOTAL_TEST"] = df["TOTAL_TEST"].apply(clean_count) return df[["Date", "Daily change in cumulative total"]].loc[df["Date"] < "2020-12-18"]
[docs] def _read_new(self): df = pd.read_csv( "https://docs.google.com/spreadsheets/d/10c9jNi8VnV0YYCfV_7AZrzBY5l18dOFHEJMIJsP4THI/export?format=csv&gid=512078862", usecols=[ "Date", "Total", "선별진료소(통합)", "의심신고 검사자 수", "임시선별검사소 검사건수", "수도권 임시선별검사소 검사건수", "비수도권 임시선별검사소", ], ) df = df.assign(Date=clean_date_series(df["Date"], "%Y-%m-%d")) # 2021-04-21 < data < 2021-10-25; 'Number of testing at temporary screening stations' (임시선별검사소 검사건수) = 'Number of inspections by temporary screening and inspection centers in the metropolitan area' # (수도권 임시선별검사소 검사건수) + 'Non-Metropolitan Temporary Screening Center' (비수도권 임시선별검사소) df.iloc[:, 4].fillna((df.iloc[:, 5] + df.iloc[:, 6]), inplace=True) ## 2020-12-17 < data < 2022-02-08; 'Number of testing at screening stations' (Aggregate) (선별진료소(통합)) = 'Number of suspicious report testing' (의심신고 검사자 수) + 'Number of testing at temporary screening stations' (임시선별검사소 검사건수) df.iloc[:, 2].fillna((df.iloc[:, 3] + df.iloc[:, 4]), inplace=True) ## Use 'total' after 2022-02-06 df.loc[df["Date"] > "2022-02-06", "선별진료소(통합)"] = df["Total"] df["Daily change in cumulative total"] = df["선별진료소(통합)"] df["Daily change in cumulative total"] = pd.to_numeric( df["Daily change in cumulative total"].str.replace(",", "") ) return df[["Date", "Daily change in cumulative total"]]
[docs] def read(self): old = self._read_old() new = self._read_new() df = pd.concat([new, old], ignore_index=True).dropna() return df
[docs] def pipeline(self, df: pd.DataFrame): df = df.pipe(self.pipe_metadata) return df
[docs] def export(self): df = self.read().pipe(self.pipeline) self.export_datafile(df, reset_index=True)
[docs]def main(): SouthKorea().export()