Source code for cowidev.testing.batch.germany

from datetime import datetime
import tempfile

import pandas as pd
import requests

from cowidev.testing import CountryTestBase


[docs]def read_xlsx_from_url(url, **kwargs):
    headers = {"User-Agent": "Mozilla/5.0 (X11; Linux i686)"}
    response = requests.get(url, headers=headers)
    with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp:
        with open(tmp.name, "wb") as f:
            f.write(response.content)
        df = pd.read_excel(tmp.name, **kwargs)
    df = df.dropna(how="all")
    return df


[docs]class Germany(CountryTestBase):
    location: str = "Germany"
    source_url: str = "https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Testzahlen-gesamt.xlsx?__blob=publicationFile"

[docs]    def read(self):
        df = read_xlsx_from_url(self.source_url, sheet_name="1_Testzahlerfassung")
        mask = df.Kalenderwoche.str.match(r"\d{1,2}/\d{4}")
        df = df[mask]
        return df

[docs]    def pipeline(self, df: pd.DataFrame) -> pd.DataFrame:
        df = df.assign(
            **{
                "Date": df.Kalenderwoche.apply(
                    lambda x: datetime.strptime(x + " +0", "%V/%G +%w").strftime("%Y-%m-%d")
                ),
                "Cumulative total": df["Anzahl Testungen"].cumsum(),
                "Positive rate": (df["Positivenanteil (%)"] / 100).round(3),
                "Source URL": self.source_url,
                "Source label": "Robert Koch Institut",
                "Units": "tests performed",
                "Country": self.location,
                "Notes": pd.NA,
            }
        ).sort_values("Date")

        df = df[
            [
                "Date",
                "Cumulative total",
                "Positive rate",
                "Source URL",
                "Source label",
                "Units",
                "Country",
                "Notes",
            ]
        ]
        return df

[docs]    def export(self):
        df = self.read().pipe(self.pipeline)
        self.export_datafile(df)


[docs]def main():
    Germany().export()