Source code for cowidev.testing.batch.peru

import pandas as pd
from cowidev.testing import CountryTestBase


[docs]class Peru(CountryTestBase): location: str = "Peru" units: str = "tests performed" source_label: str = "National Institute of Health" notes: str = "Ministerio de Salud via https://github.com/jmcastagnetto/covid-19-peru-data" source_url: str = ( "https://raw.githubusercontent.com/jmcastagnetto/covid-19-peru-data/main/datos/covid-19-peru-data.csv" ) source_url_ref: str = ( "https://datos.ins.gob.pe/dataset/dataset-de-pruebas-moleculares-del-instituto-nacional-de-salud-ins" ) rename_columns: dict = {"date": "Date", "confirmed": "positive", "total_tests": "Cumulative total"} # To avoid removing previous data obtained from source_url_ref date_start: str = "2020-04-08"
[docs] def read(self) -> pd.DataFrame: """Read data from source""" df = pd.read_csv(self.source_url) return df
[docs] def pipe_filter(self, df: pd.DataFrame) -> pd.DataFrame: """Filter data""" df = df[(df["region"].isna()) & (df["Date"] >= self.date_start)] # corrections df = df[df["Date"] != "2021-05-31"] df = df[df["Date"] != "2021-11-16"] return df
[docs] def pipe_pr(self, df: pd.DataFrame) -> pd.DataFrame: """Calculate positive rate""" df = df.sort_values("Date") cases_over_period = df["positive"].diff().rolling(7).sum() tests_over_period = df["Cumulative total"].diff().rolling(7).sum() return df.assign(**{"Positive rate": (cases_over_period / tests_over_period).round(3)}).fillna(0)
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: """Pipeline for data""" return df.pipe(self.pipe_rename_columns).pipe(self.pipe_filter).pipe(self.pipe_pr).pipe(self.pipe_metadata)
[docs] def export(self): """Export data to CSV""" df = self.read().pipe(self.pipeline) self.export_datafile(df, attach=True)
[docs]def main(): Peru().export()