Source code for cowidev.jhu.subnational

import pandas as pd
from cowidev.utils.s3 import obj_to_s3


[docs]def clean_global_subnational(metric): url = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric}_global.csv" metric = "cases" if metric == "confirmed" else "deaths" df = ( pd.read_csv(url, na_values="") .drop(columns=["Lat", "Long"]) .dropna(subset=["Province/State"]) .melt(id_vars=["Country/Region", "Province/State"], var_name="date", value_name=f"total_{metric}") .rename(columns={"Country/Region": "location1", "Province/State": "location2"}) ) df["date"] = pd.to_datetime(df.date).dt.date.astype(str) df = df.sort_values(["location1", "location2", "date"]) df[f"new_{metric}"] = df[f"total_{metric}"] - df.groupby(["location1", "location2"])[f"total_{metric}"].shift(1) df[f"new_{metric}_smoothed"] = ( df.groupby(["location1", "location2"]).rolling(7)[f"new_{metric}"].mean().droplevel(level=[0, 1]).round(2) ) df["location3"] = pd.NA return df
[docs]def clean_us_subnational(metric): url = f"https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_{metric}_US.csv" metric = "cases" if metric == "confirmed" else "deaths" df = ( pd.read_csv(url) .drop( columns=[ "UID", "iso2", "iso3", "code3", "FIPS", "Country_Region", "Lat", "Long_", "Combined_Key", "Population", ], errors="ignore", ) .melt(id_vars=["Province_State", "Admin2"], var_name="date", value_name=f"total_{metric}") .rename(columns={"Province_State": "location2", "Admin2": "location3"}) ) df["date"] = pd.to_datetime(df.date).dt.date.astype(str) df = df.sort_values(["location2", "location3", "date"]) df[f"new_{metric}"] = df[f"total_{metric}"] - df.groupby(["location2", "location3"])[f"total_{metric}"].shift(1) df[f"new_{metric}_smoothed"] = ( df.groupby(["location2", "location3"]).rolling(7)[f"new_{metric}"].mean().droplevel(level=[0, 1]).round(2) ) df["location1"] = "United States" return df
[docs]def create_subnational(): global_cases = clean_global_subnational("confirmed") global_deaths = clean_global_subnational("deaths") us_cases = clean_us_subnational("confirmed") us_deaths = clean_us_subnational("deaths") df = pd.concat( [ pd.merge(global_cases, global_deaths, on=["location1", "location2", "location3", "date"], how="outer"), pd.merge(us_cases, us_deaths, on=["location1", "location2", "location3", "date"], how="outer"), ] ).sort_values(["location1", "location2", "location3", "date"])[ [ "location1", "location2", "location3", "date", "total_cases", "new_cases", "new_cases_smoothed", "total_deaths", "new_deaths", "new_deaths_smoothed", ] ] df = df[df.total_cases > 0] filename = "subnational_cases_deaths" compression = {"method": "zip", "archive_name": f"{filename}.csv"} # df.to_csv(os.path.join(OUTPUT_PATH, f"{filename}.zip"), index=False, compression=compression) obj_to_s3(df, s3_path="s3://covid-19/public/jhu/{filename}.zip", compression=compression, public=True)