Source code for cowidev.megafile.steps.xm

import pandas as pd
import numpy as np
import datetime


[docs]def add_excess_mortality(df: pd.DataFrame, wmd_hmd_file: str, economist_file: str) -> pd.DataFrame: # XM data from HMD & WMD column_mapping = { "p_proj_all_ages": "excess_mortality", # excess_mortality_perc_weekly "cum_p_proj_all_ages": "excess_mortality_cumulative", # excess_mortality_perc_cum "cum_excess_proj_all_ages": "excess_mortality_cumulative_absolute", # excess_mortality_count_cum "cum_excess_per_million_proj_all_ages": "excess_mortality_cumulative_per_million", # excess_mortality_count_cum_pm "excess_proj_all_ages": "excess_mortality_count_week", # excess_mortality_count_week "excess_per_million_proj_all_ages": "excess_mortality_count_week_pm", # excess_mortality_count_week_pm } wmd_hmd = pd.read_csv(wmd_hmd_file, usecols=["location", "date"] + list(column_mapping.keys())) df = df.merge(wmd_hmd, how="left", on=["location", "date"]).rename(columns=column_mapping) # XM data from The Economist econ = pd.read_csv( economist_file, usecols=[ "country", "date", "cumulative_estimated_daily_excess_deaths", "cumulative_estimated_daily_excess_deaths_ci_95_top", "cumulative_estimated_daily_excess_deaths_ci_95_bot", "cumulative_estimated_daily_excess_deaths_per_100k", "cumulative_estimated_daily_excess_deaths_ci_95_top_per_100k", "cumulative_estimated_daily_excess_deaths_ci_95_bot_per_100k", "estimated_daily_excess_deaths", "estimated_daily_excess_deaths_ci_95_top", "estimated_daily_excess_deaths_ci_95_bot", "estimated_daily_excess_deaths_per_100k", "estimated_daily_excess_deaths_ci_95_top_per_100k", "estimated_daily_excess_deaths_ci_95_bot_per_100k", ], ).rename(columns={"country": "location"}) df = df.merge(econ, how="left", on=["location", "date"]) # Add last 12m df = _add_last12m_to_metric(df, "excess_mortality_cumulative_absolute", "location", 1000000, "per_million") df = _add_last12m_to_metric(df, "cumulative_estimated_daily_excess_deaths", "location", 100000, "per_100k") df = _add_last12m_to_metric( df, "cumulative_estimated_daily_excess_deaths_ci_95_top", "location", 100000, "per_100k" ) df = _add_last12m_to_metric( df, "cumulative_estimated_daily_excess_deaths_ci_95_bot", "location", 100000, "per_100k" ) # print(df.columns) return df
[docs]def _add_last12m_to_metric( df: pd.DataFrame, column_metric: str, column_location: str, scaling: int, scaling_slug: str ) -> pd.DataFrame: column_metric_12m = f"{column_metric}_last12m" # Get only last 12 month of data date_cutoff = datetime.datetime.now() - datetime.timedelta(days=365.2425) # df = df[pd.to_datetime(df.date) > date_cutoff] # Get metric value 12 months ago df_tmp = ( df[pd.to_datetime(df.date) > date_cutoff] .dropna(subset=[column_metric]) .sort_values([column_location, "date"]) .drop_duplicates(column_location)[[column_location, column_metric]] .rename(columns={column_metric: column_metric_12m}) ) # Compute the difference, obtain last12m metric df = df.merge(df_tmp, on=[column_location], how="left") values = df[column_metric] - df[column_metric_12m] # Assign NaN to >1 year old data values[pd.to_datetime(df.date) < date_cutoff] = np.nan # Assign to df df = df.assign( **{ column_metric_12m: values, f"{column_metric_12m}_{scaling_slug}": values.mul(scaling).div(df.population), } ) return df