Source code for cowidev.megafile.steps.macro
import os
import pandas as pd
[docs]def add_macro_variables(complete_dataset: pd.DataFrame, macro_variables: dict, data_dir: str):
"""
Appends a list of 'macro' (non-directly COVID related) variables to the dataset
The data is denormalized, i.e. each yearly value (for example GDP per capita)
is added to each row of the complete dataset. This is meant to facilitate the use
of our dataset by non-experts.
"""
original_shape = complete_dataset.shape
for var, file in macro_variables.items():
var_df = pd.read_csv(os.path.join(data_dir, file), usecols=["iso_code", var])
var_df = var_df[-var_df["iso_code"].isnull()]
var_df[var] = var_df[var].round(3)
complete_dataset = complete_dataset.merge(var_df, on="iso_code", how="left")
assert complete_dataset.shape[0] == original_shape[0]
assert complete_dataset.shape[1] == original_shape[1] + len(macro_variables)
return complete_dataset