Source code for cowidev.testing.batch.germany

from datetime import datetime
import tempfile

import pandas as pd
import requests

from cowidev.testing import CountryTestBase


[docs]def read_xlsx_from_url(url, **kwargs): headers = {"User-Agent": "Mozilla/5.0 (X11; Linux i686)"} response = requests.get(url, headers=headers) with tempfile.NamedTemporaryFile(mode="w+", delete=False) as tmp: with open(tmp.name, "wb") as f: f.write(response.content) df = pd.read_excel(tmp.name, **kwargs) df = df.dropna(how="all") return df
[docs]class Germany(CountryTestBase): location: str = "Germany" source_url: str = "https://www.rki.de/DE/Content/InfAZ/N/Neuartiges_Coronavirus/Daten/Testzahlen-gesamt.xlsx?__blob=publicationFile"
[docs] def read(self): df = read_xlsx_from_url(self.source_url, sheet_name="1_Testzahlerfassung") mask = df.Kalenderwoche.str.match(r"\d{1,2}/\d{4}") df = df[mask] return df
[docs] def pipeline(self, df: pd.DataFrame) -> pd.DataFrame: df = df.assign( **{ "Date": df.Kalenderwoche.apply( lambda x: datetime.strptime(x + " +0", "%V/%G +%w").strftime("%Y-%m-%d") ), "Cumulative total": df["Anzahl Testungen"].cumsum(), "Positive rate": (df["Positivenanteil (%)"] / 100).round(3), "Source URL": self.source_url, "Source label": "Robert Koch Institut", "Units": "tests performed", "Country": self.location, "Notes": pd.NA, } ).sort_values("Date") df = df[ [ "Date", "Cumulative total", "Positive rate", "Source URL", "Source label", "Units", "Country", "Notes", ] ] return df
[docs] def export(self): df = self.read().pipe(self.pipeline) self.export_datafile(df)
[docs]def main(): Germany().export()