Source code for cowidev.testing.batch.south_africa

import pandas as pd

from cowidev.testing import CountryTestBase
from cowidev.utils import clean_date_series


[docs]class SouthAfrica(CountryTestBase): location: str = "South Africa" source_url: str = "https://raw.githubusercontent.com/dsfsi/covid19za/master/data/covid19za_timeline_testing.csv" units: str = "people tested" source_label: str = "National Institute for Communicable Diseases (NICD)" source_url_ref: str = "https://github.com/dsfsi/covid19za" notes: str = "Made available by the University of Pretoria on Github" rename_columns = { "YYYYMMDD": "Date", "cumulative_tests": "Cumulative total", }
[docs] def read(self): return pd.read_csv(self.source_url, usecols=["YYYYMMDD", "cumulative_tests"], parse_dates=["YYYYMMDD"])
[docs] def pipe_add_datapoint(self, df: pd.DataFrame) -> pd.DataFrame: # Hard-coded first point for 7 February 2020, missing from GitHub datapoint = { "Date": "2020-02-07", "Country": self.location, "Units": self.units, "Cumulative total": 42, "Source label": self.source_label, "Source URL": "https://www.nicd.ac.za/novel-coronavirus-update", "Notes": pd.NA, } return df.append(datapoint, ignore_index=True)
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: df = ( df.pipe(self.pipe_rename_columns) .pipe(pipe_drop_nan) .pipe(pipe_metrics) .pipe(pipe_date) .pipe(self.pipe_metadata) .pipe(self.pipe_add_datapoint) ) return df
[docs] def export(self): df = self.read().pipe(self.pipeline) self.export_datafile(df)
[docs]def pipe_drop_nan(df: pd.DataFrame): return df.dropna(subset=["Cumulative total"])
[docs]def pipe_metrics(df: pd.DataFrame) -> pd.DataFrame: return df.groupby("Date", as_index=False).agg(**{"Cumulative total": ("Cumulative total", min)})
[docs]def pipe_date(df: pd.DataFrame) -> pd.DataFrame: return df.assign(Date=clean_date_series(df.Date, "%Y-%m-%d"))
[docs]def main(): SouthAfrica().export()