import pandas as pd
from cowidev.utils import clean_date
from cowidev.utils.log import get_logger
from cowidev.utils.web.download import read_csv_from_url
from cowidev.testing.utils.orgs import ECDC_COUNTRIES
from cowidev.testing.utils.base import CountryTestBase
logger = get_logger()
[docs]class ECDC(CountryTestBase):
location: str = "ECDC"
source_url_ref: str = "https://www.ecdc.europa.eu/en/publications-data/covid-19-testing"
source_url: str = "https://opendata.ecdc.europa.eu/covid19/testing/csv/data.csv"
source_label: str = "European Centre for Disease Prevention and Control (ECDC)"
units: str = "tests performed"
columns_use: list = [
"year_week",
"region_name",
"tests_done",
]
rename_columns = {
"region_name": "location",
"tests_done": "Cumulative total",
"year_week": "Date",
}
[docs] def read(self):
"""Read data from source."""
return read_csv_from_url(self.source_url, timeout=20)
[docs] def _yearweek_to_date(self, year_week: str) -> str:
"""Convert year_week(yyyy-Www) to date."""
date = clean_date(year_week + "+4", "%Y-W%W+%w")
return date
[docs] def pipe_rename_countries(self, df: pd.DataFrame) -> pd.DataFrame:
"""Rename countries to match OWID naming convention."""
df["location"] = df.location.replace(ECDC_COUNTRIES)
return df
[docs] def pipe_filter_entries(self, df: pd.DataFrame) -> pd.DataFrame:
"""
Get valid entries:
- Discard subnational data.
- Countries not coming from OWID (avoid loop).
"""
df = df[df.location.isin(ECDC_COUNTRIES.values())]
return df
[docs] def pipe_date(self, df: pd.DataFrame) -> pd.DataFrame:
"""Pipe to convert year_week to date."""
return df.assign(Date=df.Date.apply(self._yearweek_to_date))
[docs] def pipe_cumsum(self, df: pd.DataFrame) -> pd.DataFrame:
"""Calculate cumulative sum of tests."""
df = df.assign(**{"Cumulative total": df.groupby(["location"])["Cumulative total"].cumsum()})
return df.drop_duplicates(subset="Date")
[docs] def pipeline(self, df: pd.DataFrame):
"""Pipeline for data."""
return (
df.pipe(self.pipe_rename_columns)
.pipe(self.pipe_rename_countries)
.pipe(self.pipe_filter_entries)
.pipe(self.pipe_date)
.pipe(self.pipe_cumsum)
.pipe(self.pipe_metadata)
)
[docs] def export_countries(self, df: pd.DataFrame):
"""Export data to the relevant csv and log the confirmation."""
locations = set(df.location)
for location in locations:
df_c = df[df.location == location]
df_c = df_c.dropna(
subset=["Cumulative total"],
how="all",
)
if not df_c.empty:
self.export_datafile(df_c, filename=location)
logger.info(f"\tcowidev.testing.batch.ecdc.{location}: SUCCESS ✅")
[docs] def export(self):
"""Export data to CSV."""
df = self.read().pipe(self.pipeline)
self.export_countries(df)
[docs]def main():
ECDC().export()