Source code for cowidev.megafile.steps.core

import os

from cowidev import PATHS
from cowidev.megafile.steps.cgrt import get_cgrt
from cowidev.megafile.steps.hosp import get_hosp
from cowidev.megafile.steps.jhu import get_jhu
from cowidev.megafile.steps.reprod import get_reprod
from cowidev.megafile.steps.test import get_testing
from cowidev.megafile.steps.variants import get_variants
from cowidev.megafile.steps.vax import get_vax


INPUT_DIR = PATHS.INTERNAL_INPUT_DIR
GRAPHER_DIR = PATHS.INTERNAL_GRAPHER_DIR
DATA_DIR = PATHS.DATA_DIR


[docs]def get_base_dataset(logger): """Get owid datasets from: jhu, reproduction rate, hospitalizations, testing ,vaccinations, CGRT.""" logger.info("Fetching JHU dataset…") jhu = get_jhu(jhu_dir=PATHS.DATA_JHU_DIR) logger.info("Fetching reproduction rate…") reprod = get_reprod( file_url="https://github.com/crondonm/TrackingR/raw/main/Estimates-Database/database_7.csv", country_mapping=os.path.join(INPUT_DIR, "reproduction", "reprod_country_standardized.csv"), ) logger.info("Fetching hospital dataset…") hosp = get_hosp(data_file=os.path.join(GRAPHER_DIR, "COVID-2019 - Hospital & ICU.csv")) logger.info("Fetching testing dataset…") testing = get_testing() logger.info("Fetching vaccination dataset…") vax = get_vax(data_file=os.path.join(DATA_DIR, "vaccinations", "vaccinations.csv")) vax = vax[-vax.location.isin(["England", "Northern Ireland", "Scotland", "Wales"])] logger.info("Fetching OxCGRT dataset…") cgrt = get_cgrt( bsg_latest=os.path.join(INPUT_DIR, "bsg", "latest.csv"), bsg_diff_latest=os.path.join(INPUT_DIR, "bsg", "latest-differentiated.csv"), country_mapping=os.path.join(INPUT_DIR, "bsg", "bsg_country_standardised.csv"), ) logger.info("Fetching variants dataset…") variants = get_variants( variants_file="s3://covid-19/internal/variants/covid-variants.csv", cases_file=os.path.join(DATA_DIR, "jhu", "full_data.csv"), ) # Big merge return ( jhu.merge(reprod, on=["date", "location"], how="outer") .merge(hosp, on=["date", "location"], how="outer") .merge(testing, on=["date", "location"], how="outer") .merge(vax, on=["date", "location"], how="outer") .merge(cgrt, on=["date", "location"], how="left") .merge(variants, on=["date", "location"], how="left") .sort_values(["location", "date"]) )