Source code for cowidev.hosp.sources.ecdc

import pandas as pd

from cowidev import PATHS
from cowidev.utils.clean import clean_date_series
from cowidev.utils.web.download import read_csv_from_url

METADATA_BASE = {
    "source_url": "https://opendata.ecdc.europa.eu/covid19/hospitalicuadmissionrates/csv/data.csv",
    "source_url_ref": "https://www.ecdc.europa.eu/en/publications-data/download-data-hospital-and-icu-admission-rates-and-current-occupancy-covid-19",
    "source_name": "European Centre for Disease Prevention and Control",
}


POPULATION = pd.read_csv(
    PATHS.INTERNAL_INPUT_UN_POPULATION_FILE,
    usecols=["entity", "population"],
)
EXCLUDED_COUNTRIES = [
    "Austria",
    "Belgium",
    "Czechia",
    "Denmark",
    "Finland",
    "France",
    "Germany",
    "Italy",
    "Netherlands",
    "Portugal",
    "Spain",
    "Sweden",
]


[docs]def download_data():
    df = read_csv_from_url(
        METADATA_BASE["source_url"], usecols=["country", "indicator", "date", "value", "year_week"], use_proxy=True
    )
    df = df[-df.country.isin(EXCLUDED_COUNTRIES)]
    df = df.drop_duplicates()
    df = df.rename(columns={"country": "entity"})
    return df


[docs]def update_metadata(df):
    entities = df.entity.unique()
    METADATA = [{**METADATA_BASE, **{"entity": entity}} for entity in entities]
    return METADATA


[docs]def pipe_undo_100k(df):
    df = pd.merge(df, POPULATION, on="entity", how="left")
    assert df[df.population.isna()].shape[0] == 0, "Country missing from population file"
    df.loc[df["indicator"].str.contains(" per 100k"), "value"] = df["value"].div(100000).mul(df["population"])
    df.loc[:, "indicator"] = df["indicator"].str.replace(" per 100k", "")
    return df


[docs]def pipe_week_to_date(df):
    df["date"] = clean_date_series(df.date, "%Y-%m-%d")
    if df.date.dtypes == "int64":
        df["date"] = clean_date_series(df.date, "%Y%m%d")
    daily_records = df[df["indicator"].str.contains("Daily")]
    date_week_mapping = daily_records[["year_week", "date"]].groupby("year_week", as_index=False).max()
    weekly_records = df[df["indicator"].str.contains("Weekly")].drop(columns="date")
    weekly_records = pd.merge(weekly_records, date_week_mapping, on="year_week")
    df = pd.concat([daily_records, weekly_records]).drop(columns="year_week")
    return df


[docs]def main():
    df = download_data()
    METADATA = update_metadata(df)
    df = df.pipe(pipe_undo_100k).pipe(pipe_week_to_date).drop(columns=["population"])
    return df, METADATA


if __name__ == "__main__":
    main()