Source code for cowidev.vax.utils.extra_source

import pandas as pd

from cowidev.vax.incremental.africacdc import AfricaCDC
from cowidev.vax.utils.orgs import ACDC_COUNTRIES_ALL
from cowidev.vax.utils.checks import METRICS


[docs]def add_latest_from_acdc(df: pd.DataFrame, metrics: list, priority: bool = False): # Get mapping countries locations = set(df.location) countries = {acdc: owid for acdc, owid in ACDC_COUNTRIES_ALL.items() if owid in locations} # Get ACDC data api = AfricaCDC(True) dfa = api.read() dfa = dfa.pipe(api.pipeline, countries, exclude=False) # Set ignored metrics to NA metrics_ignore = {m: pd.NA for m in METRICS if m not in metrics} dfa = dfa.assign(**metrics_ignore) # Do not use all-zero valued msk = (dfa[metrics] == 0).all(axis=1) dfa = dfa[~msk] # Concatenate df = pd.concat([df, dfa], ignore_index=True).sort_values(["date", "location"]) # Propagate vaccines x = ( df.dropna(subset=["vaccine"]) .sort_values("date") .drop_duplicates() .rename(columns={"vaccine": "vaccine_latest"}) ) df = df.merge(x[["location", "vaccine_latest"]], on="location", how="outer") df = df.assign(vaccine=df.vaccine.fillna(df.vaccine_latest)).drop(columns=["vaccine_latest"]) # Remove duplicates coming from WHO if priority: df = df.sort_values(["source_url"]).drop_duplicates(subset=["location", "date"], keep="first") return df