Source code for cowidev.cmd.vax.track.vaccines

import pandas as pd
import requests
from bs4 import BeautifulSoup

from cowidev import PATHS
from cowidev.utils.web.scraping import get_soup, get_headers
from cowidev.vax.utils.orgs import WHO_VACCINES


VAX_MAPPING = {
    "Bharat Biotech": "Covaxin",
    "FBRI": "EpiVacCorona",
    "Janssen (Johnson & Johnson)": "Johnson&Johnson",
    "Moderna": "Moderna",
    "Oxford/AstraZeneca": "Oxford/AstraZeneca",
    "Pfizer/BioNTech": "Pfizer/BioNTech",
    "Sinopharm (Beijing)": "Sinopharm/Beijing",
    "Sinopharm (Wuhan)": "Sinopharm/Wuhan",
    "Sinovac": "Sinovac",
    "Gamaleya": "Sputnik V",
    "Serum Institute of India": "Oxford/AstraZeneca",
}


COUNTRY_MAP = {
    "United Kingdom": "united-kingdom-of-great-britain-and-northern-ireland",
    "United States": "united-states-of-america",
    "Venezuela": "venezuela-bolivarian-republic-of",
    "South Korea": "republic-of-korea",
    "Scotland": "united-kingdom-of-great-britain-and-northern-ireland",
    "Wales": "united-kingdom-of-great-britain-and-northern-ireland",
    "England": "united-kingdom-of-great-britain-and-northern-ireland",
    "Northern Ireland": "united-kingdom-of-great-britain-and-northern-ireland",
    "Faeroe Islands": "faroe-islands/",
    "Cape Verde": "cabo-verde",
    "Brunei": "brunei-darussalam",
    "Bahamas": "the-bahamas",
}


[docs]class TrackVaccinesClient: """Client to interact with https://covid19.trackvaccines.org.""" def __init__(self): self.base_url = "https://covid19.trackvaccines.org"
[docs] def get_country_url(self, location: str): # Build URL if location in COUNTRY_MAP: location = COUNTRY_MAP[location] else: location = location.lower().replace(" ", "-") url = f"{self.base_url}/country/{location}/" if not self._valid_url(url): raise ValueError(f"Couldn't find vaccines for {location}. Check {url}") return url
@property def all_vaccines_url(self): return f"{self.base_url}/vaccines/"
[docs] def _valid_url(self, url: str): resp = requests.get(url, headers=get_headers()) if not resp.ok: return False return True
[docs] def vaccines_approved(self, location: str = None, original_names: bool = False) -> list: """Get list of approved vaccines in a country (or all if None specified). Args: location (str, optional): Country name. If None, retrieves all approved vaccines. Defaults to None. original_names (bool, optional): Set to True to keep vaccine from web. Defaults to False. Returns: list: Approved vaccines """ if location: try: url = self.get_country_url(location) soup = get_soup(url) return self._parse_vaccines_location(soup, original_names) except ValueError: return None else: soup = get_soup(self.all_vaccines_url) return self._parse_vaccines_all(soup, original_names)
[docs] def _parse_vaccines_location(self, soup: BeautifulSoup, original_names: bool = False): content = soup.find(class_="card-grid alignwide") vaccines_html = content.find_all(class_="card__title has-text-align-center") vaccines = [e.find("span").text for e in vaccines_html] if original_names: return vaccines return list(set(map(map_vaccine, vaccines)))
[docs] def _parse_vaccines_all(self, soup: BeautifulSoup, original_names: bool = False): vaccines_html = soup.find("ul", class_="card-grid alignwide").find_all("h2") vaccines = [vax.find("span").text for vax in vaccines_html] if original_names: return vaccines return list(set(map(map_vaccine, vaccines)))
[docs]def map_vaccine(vaccine): if vaccine in VAX_MAPPING: return VAX_MAPPING[vaccine] return vaccine
[docs]def vaccines_tracked(path_locations: str = None, location: str = None, as_list: bool = False) -> pd.DataFrame: """Get tracked vaccines for tracked countries. Args: path_locations (str, optional): Path to locations csv file. Default value works if repo structure is left unmodified. location (str, optional): Country name. Defaults to None. as_list (bool, optional): Set to True to return a (flattened) list. Returns: pd.DataFrame: Dataframe with location and vaccines tracked. """ if not path_locations: path_locations = PATHS.DATA_VAX_META_FILE df = pd.read_csv(path_locations, usecols=["vaccines", "location"]) df = df.assign(vaccines=df.vaccines.apply(lambda x: set(x.split(", ")))) if location: if isinstance(location, str): location = [location] df = df[df.location.isin(location)] if as_list: return list(set([vv for v in df.vaccines for vv in v])) return df
[docs]def vaccines_approved(path_locations: str = None, verbose: bool = False) -> pd.DataFrame: """Get approved vaccines for tracked countries. This may take between 2-3 minutes. Args: path_locations (str, optional): Path to locations csv file. Default value works if repo structure is left unmodified. Returns: pd.DataFrame: Dataframe with location and vaccines approved. """ if verbose: print("This may take from 2 to 3 minutes...") if not path_locations: path_locations = PATHS.DATA_VAX_META_FILE df = pd.read_csv(path_locations, usecols=["location"]) client = TrackVaccinesClient() y = df.location.apply(lambda x: client.vaccines_approved(x)) return df.assign(vaccines=y.apply(lambda x: set(x) if x is not None else None))
[docs]def vaccines_missing(aggregated: bool = False, verbose: bool = False): """Get missing vaccines. - Columns "_unapproved" mean vaccines not approved but currently being administered. - Columns "_untracked" mean vaccines approved but not tracked. Note: Unapproved might mean that trackvaccines.org are not counting a vaccine that was actually approved. Args: aggregated (bool, optional): Set to True to get list of untracked/unapproved global vaccines. Defaults to False. Returns: Union[pd.DataFrame, dict]: Unapproved/untracked vaccines """ if aggregated: # Get tracked vaccines vax_tracked = vaccines_tracked(as_list=True) client = TrackVaccinesClient() vax_approved = client.vaccines_approved() return { "vaccines_untracked": [v for v in vax_approved if v not in vax_tracked], "vaccines_unapproved": [v for v in vax_tracked if v not in vax_approved], } else: vax_tracked = vaccines_tracked() vax_approved = vaccines_approved(verbose=True) # Build result dataframe df = vax_tracked.merge(vax_approved, on="location", suffixes=("_tracked", "_approved")) df = df[df.vaccines_tracked != df.vaccines_approved].dropna() df = df.assign( unapproved=( df.apply( lambda x: [xx for xx in x["vaccines_tracked"] if xx not in x["vaccines_approved"]], axis=1, ) ), untracked=( df.apply( lambda x: [xx for xx in x["vaccines_approved"] if xx not in x["vaccines_tracked"]], axis=1, ) ), ) df = df.assign( num_unapproved=df.unapproved.apply(len), num_untracked=df.untracked.apply(len), ) df = df[["location", "unapproved", "num_unapproved", "untracked", "num_untracked"]] df = df.sort_values(by="num_untracked", ascending=False) return df
[docs]def vaccines_comparison_with_who(): # Load WHO url = "https://covid19.who.int/who-data/vaccination-metadata.csv" df_who = pd.read_csv(url) vaccines_used_who = df_who.groupby("ISO3").apply( lambda x: set(WHO_VACCINES[xx] for xx in x[~x.START_DATE.isnull()].VACCINE_NAME) ) vaccines_used_who.name = "vaccines_used_who" # Load OWID url = "https://github.com/owid/covid-19-data/raw/master/public/data/vaccinations/locations.csv" df_owid = pd.read_csv(url) vaccines_used_owid = df_owid.assign( vaccines_used_owid=(df_owid.vaccines.apply(lambda x: set(xx.strip() for xx in x.split(", ")))) )[["iso_code", "location", "vaccines_used_owid"]].set_index("iso_code") # Merge vaccines_used = vaccines_used_owid.merge(vaccines_used_who, right_index=True, left_index=True) # Obtain differences vaccines_used = vaccines_used.assign( missing_in_who=vaccines_used.apply(lambda x: x.vaccines_used_owid.difference(x.vaccines_used_who), axis=1), missing_in_owid=vaccines_used.apply(lambda x: x.vaccines_used_who.difference(x.vaccines_used_owid), axis=1), ) return vaccines_used