Source code for cowidev.megafile.steps.macro

import os
import pandas as pd


[docs]def add_macro_variables(complete_dataset: pd.DataFrame, macro_variables: dict, data_dir: str): """ Appends a list of 'macro' (non-directly COVID related) variables to the dataset The data is denormalized, i.e. each yearly value (for example GDP per capita) is added to each row of the complete dataset. This is meant to facilitate the use of our dataset by non-experts. """ original_shape = complete_dataset.shape for var, file in macro_variables.items(): var_df = pd.read_csv(os.path.join(data_dir, file), usecols=["iso_code", var]) var_df = var_df[-var_df["iso_code"].isnull()] var_df[var] = var_df[var].round(3) complete_dataset = complete_dataset.merge(var_df, on="iso_code", how="left") assert complete_dataset.shape[0] == original_shape[0] assert complete_dataset.shape[1] == original_shape[1] + len(macro_variables) return complete_dataset