Source code for cowidev.hosp.sources.ecdc

import pandas as pd

from cowidev import PATHS
from cowidev.utils.clean import clean_date_series
from cowidev.utils.web.download import read_csv_from_url

METADATA_BASE = {
    "source_url": "https://opendata.ecdc.europa.eu/covid19/hospitalicuadmissionrates/csv/data.csv",
    "source_url_ref": "https://www.ecdc.europa.eu/en/publications-data/download-data-hospital-and-icu-admission-rates-and-current-occupancy-covid-19",
    "source_name": "European Centre for Disease Prevention and Control",
}


POPULATION = pd.read_csv(
    PATHS.INTERNAL_INPUT_UN_POPULATION_FILE,
    usecols=["entity", "population"],
)
EXCLUDED_COUNTRIES = [
    "Austria",
    "Belgium",
    "Czechia",
    "Denmark",
    "Finland",
    "France",
    "Germany",
    "Italy",
    "Netherlands",
    "Portugal",
    "Spain",
    "Sweden",
]


[docs]def download_data(): df = read_csv_from_url( METADATA_BASE["source_url"], usecols=["country", "indicator", "date", "value", "year_week"], use_proxy=True ) df = df[-df.country.isin(EXCLUDED_COUNTRIES)] df = df.drop_duplicates() df = df.rename(columns={"country": "entity"}) return df
[docs]def update_metadata(df): entities = df.entity.unique() METADATA = [{**METADATA_BASE, **{"entity": entity}} for entity in entities] return METADATA
[docs]def pipe_undo_100k(df): df = pd.merge(df, POPULATION, on="entity", how="left") assert df[df.population.isna()].shape[0] == 0, "Country missing from population file" df.loc[df["indicator"].str.contains(" per 100k"), "value"] = df["value"].div(100000).mul(df["population"]) df.loc[:, "indicator"] = df["indicator"].str.replace(" per 100k", "") return df
[docs]def pipe_week_to_date(df): df["date"] = clean_date_series(df.date, "%Y-%m-%d") if df.date.dtypes == "int64": df["date"] = clean_date_series(df.date, "%Y%m%d") daily_records = df[df["indicator"].str.contains("Daily")] date_week_mapping = daily_records[["year_week", "date"]].groupby("year_week", as_index=False).max() weekly_records = df[df["indicator"].str.contains("Weekly")].drop(columns="date") weekly_records = pd.merge(weekly_records, date_week_mapping, on="year_week") df = pd.concat([daily_records, weekly_records]).drop(columns="year_week") return df
[docs]def main(): df = download_data() METADATA = update_metadata(df) df = df.pipe(pipe_undo_100k).pipe(pipe_week_to_date).drop(columns=["population"]) return df, METADATA
if __name__ == "__main__": main()