Source code for cowidev.cmd.vax.track.countries

from datetime import datetime

import pandas as pd

from cowidev.cmd.vax.track.vaccines import vaccines_comparison_with_who
from cowidev import PATHS


[docs]def get_who_data(): # Load WHO url = "https://covid19.who.int/who-data/vaccination-data.csv" df_who = pd.read_csv(url, usecols=["ISO3", "COUNTRY", "DATA_SOURCE"]) df_who = df_who.rename(columns={"COUNTRY": "location_WHO"}) # Countries WHO relies on us df_who = df_who.assign(reporting_to_WHO=df_who.DATA_SOURCE == "OWID") return df_who
[docs]def country_updates_summary( path_vaccinations: str = None, path_locations: str = None, path_automation_state: str = None, as_dict: bool = False, sortby_counts: bool = False, sortby_updatefreq: bool = False, who: bool = False, vaccines: bool = False, metric_counts: bool = False, ): """Check last updated countries. It loads the content from locations.csv, vaccinations.csv and automation_state.csv to present results on the update frequency and timeline of all countries. By default, the countries are sorted from least to most recently updated. You can also sort them from least to most frequently updated ones by using argument `sortby_counts`. In Jupyter is recommended to ass the following lines to enable the DataFrame to be fully shown: ```python import pandas as pd pd.set_option('display.max_columns', None) pd.set_option('display.max_rows', None) pd.set_option('display.max_colwidth', None) ``` Args: path_vaccinations (str, optional): Path to vaccinations csv file. Default value works if repo structure is left unmodified. path_locations (str, optional): Path to locations csv file. Default value works if repo structure is left unmodified. path_automation_state (str, optional): Path to automation state csv file. Default value works if repo structure is left unmodified. as_dict (bool, optional): Set to True for the return value to be shaped as a dictionary. Otherwise returns a DataFrame. sortby_counts (bool, optional): Set to True to sort resuls from least to most updated countries. who (bool, optional): Display WHO columns metric_counts (bool, optional): Set to True to display how many rows with a non-null value for each metric appear. Returns: Union[pd.DataFrame, dict]: List or DataFrame, where each row (or element) contains five fields: - 'last_observation_date': Last update date. - 'location': Country name. - 'source_website': Source used to retrieve last added data. - 'automated': True if country process is automated. - 'counts': Number of times the country has been updated. """ # Get data paths if not path_vaccinations: path_vaccinations = PATHS.DATA_VAX_MAIN_FILE if not path_locations: path_locations = PATHS.DATA_VAX_META_FILE if not path_automation_state: path_automation_state = PATHS.INTERNAL_OUTPUT_VAX_AUTOM_FILE columns_output = [ "location", "last_observation_date", "first_observation_date", "counts", "update_frequency", "num_observation_days", "source_website", "automated", ] # Read data df_vax = pd.read_csv(path_vaccinations) df_loc = pd.read_csv(path_locations) df_state = pd.read_csv(path_automation_state) df_who = get_who_data() # Get counts metrics = ["total_vaccinations", "people_vaccinated", "people_fully_vaccinated", "total_boosters"] df_vax = df_vax.dropna( subset=metrics, how="all", ) df_vax_ = pd.DataFrame( { "counts": df_vax.groupby("location").date.count().sort_values(), "first_observation_date": df_vax.groupby("location").date.min(), } ) if metric_counts: metrics_rename = {m: f"{m}_counts" for m in metrics} df_vax_metrics = df_vax.groupby("location", as_index=False)[metrics].nunique().rename(columns=metrics_rename) df_vax_ = df_vax_.merge(df_vax_metrics, on="location") columns_output += list(metrics_rename.values()) df_vax = df_vax_.copy() # Merge data df = df_loc.merge(df_state, on="location") df = df.merge(df_vax, on="location") # Merge with WHO if who: # print(df_who.columns) df = df.merge(df_who, left_on="iso_code", right_on="ISO3", how="left") columns_output += ["reporting_to_WHO", "location_WHO"] # Additional fields num_observation_days = (datetime.now() - pd.to_datetime(df.first_observation_date)).dt.days + 1 num_updates_per_observation_day = df.counts / num_observation_days df = df.assign( num_observation_days=num_observation_days, update_frequency=num_updates_per_observation_day, ) # Sort data if sortby_updatefreq: sort_column = "update_frequency" elif sortby_counts: sort_column = "counts" else: sort_column = "last_observation_date" df = df.sort_values(by=sort_column)[columns_output] def _web_type(x): govs = [ ".gov/", "gov.", ".gob.", ".moh.", ".gub.", ".go.", ".gouv.", "govern", ".govt", ".coronavirus2020.kz/", "thl.fi", ".gv.", "corona.nun.gl", "exploregov.ky", "covid19response.lc/", "corona.fo/", "103.247.238.92/webportal/", "data.public.lu/", "vaccinocovid.iss.sm/", "koronavirus.hr", "koronavirusinfo.az", "covid.is", "government.", "covid19ireland-geohive.hub.arcgis", "sacoronavirus.co.za", "covidodgovor.me", "experience.arcgis.com/experience/59226cacd2b441c7a939dca13f832112/", "guineasalud.org/estadisticas/", "bakuna.cw/", "laatjevaccineren.sr/", "coronavirus.bg/bg/statistika", "admin.ch", "folkhalsomyndigheten.se/", "covid19.ssi.dk/", "fhi.no/", "impfdashboard.de/", "covid-19.nczisk.sk", "opendata.digilugu.ee", ".mzcr.cz/", "ghanahealthservice.org/", "ccss.sa.cr/", "epistat.wiv-isp.be", "covidmaroc.ma", "experience.arcgis.com/experience/cab84dcfe0464c2a8050a78f817924ca", "gtmvigilanciacovid.shinyapps", "belta.by", "fohm.se", "moh.", "vaccines.ncdc.ge", "opendata.swiss", ] if "facebook." in x.lower(): return "Facebook" elif "twitter." in x.lower(): return "Twitter" elif "github." in x.lower() or "githubusercontent" in x.lower(): return "GitHub" elif any(gov in x.lower() for gov in govs): return "Govern/Official" elif (".who.int" in x.lower()) or ("who.maps.arcgis.com" in x.lower()): return "WHO" elif ".pacificdata.org" in x.lower(): return "SPC" elif "ecdc.europa." in x.lower(): return "ECDC" elif "paho.org" in x.lower(): return "PAHO" elif "africacdc.org" in x.lower(): return "Africa CDC" else: return "Others" df = df.assign(**{"web_type": df.source_website.apply(_web_type)}) if vaccines: df_vax = vaccines_comparison_with_who() df = df.merge( df_vax[["location", "vaccines_used_owid", "vaccines_used_who", "missing_in_who", "missing_in_owid"]], on="location", how="left", ) # Return data if as_dict: return df.to_dict(orient="records") return df
[docs]def countries_missing( path_population: str = None, path_locations: str = None, ascending: bool = False, as_dict: bool = False, ): """Get countries currently not present in our dataset. Args: path_population (str, optional): Path to UN population csv file. Default value works if repo structure is left unmodified. path_locations (str, optional): Path to locations csv file. Default value works if repo structure is left unmodified. ascending (bool, optional): Set to True to sort results in ascending order. By default sorts in ascedning order. as_dict (bool, optional): Set to True for the return value to be shaped as a dictionary. Otherwise returns a DataFrame. """ if not path_population: path_population = PATHS.INTERNAL_INPUT_UN_POPULATION_FILE if not path_locations: path_locations = PATHS.DATA_VAX_META_FILE df_loc = pd.read_csv(path_locations, usecols=["location"]) df_pop = pd.read_csv(path_population) df_pop = df_pop[df_pop.iso_code.apply(lambda x: isinstance(x, str) and len(x) == 3)] df_mis = df_pop.loc[~df_pop["entity"].isin(df_loc["location"]), ["entity", "population"]] # Sort if not ascending: df_mis = df_mis.sort_values(by="population", ascending=False) # Return data if as_dict: return df_mis.to_dict(orient="records") return df_mis