Source code for cowidev.megafile.steps.cgrt

"merge"
import pandas as pd


[docs]def get_cgrt(bsg_latest: str, bsg_diff_latest: str, country_mapping: str): """ Downloads the latest OxCGRT dataset from BSG's GitHub repository Remaps BSG country names to OWID country names Returns: cgrt {dataframe} """ country_mapping = pd.read_csv(country_mapping) cgrt = clean_cgrt( url=bsg_latest, columns_rename={ "Date": "date", "StringencyIndex_Average": "stringency_index", }, country_mapping=country_mapping, ) cgrt_diff = clean_cgrt( url=bsg_diff_latest, columns_rename={ "Date": "date", "StringencyIndex_NonVaccinated": "stringency_index_nonvac", "StringencyIndex_Vaccinated": "stringency_index_vac", "StringencyIndex_WeightedAverage": "stringency_index_weighted_avg", }, country_mapping=country_mapping, ) cgrt = cgrt.merge(cgrt_diff, on=["location", "date"], how="outer") return cgrt
[docs]def clean_cgrt(url, columns_rename, country_mapping): # Read file df = pd.read_csv(url, low_memory=False) # Filter rows if "RegionCode" in df.columns: df = df[df.RegionCode.isnull()] columns = list(columns_rename.keys()) # Filter columns df = df[columns + ["CountryName"]] # Format date df.loc[:, "Date"] = pd.to_datetime(df["Date"], format="%Y%m%d").dt.date.astype(str) # Merge with country mapping df = country_mapping.merge(df, on="CountryName", how="right") # Check missing countries missing_from_mapping = df[df["Country"].isna()]["CountryName"].unique() if len(missing_from_mapping) > 0: raise Exception(f"Missing countries in OxCGRT mapping: {missing_from_mapping}") # Final column transformations df = df.rename( columns={ "Country": "location", **columns_rename, } ).drop(columns=["CountryName"]) return df