Source code for cowidev.megafile.steps.vax

import numpy as np
import pandas as pd


[docs]def get_vax(data_file):
    vax = pd.read_csv(
        data_file,
        usecols=[
            "location",
            "date",
            "total_vaccinations",
            "total_vaccinations_per_hundred",
            "daily_vaccinations_raw",
            "daily_vaccinations",
            "daily_vaccinations_per_million",
            "people_vaccinated",
            "people_vaccinated_per_hundred",
            "people_fully_vaccinated",
            "people_fully_vaccinated_per_hundred",
            "total_boosters",
            "total_boosters_per_hundred",
            "daily_people_vaccinated",
            "daily_people_vaccinated_per_hundred",
        ],
    )
    vax = vax.rename(
        columns={
            "daily_vaccinations_raw": "new_vaccinations",
            "daily_vaccinations": "new_vaccinations_smoothed",
            "daily_vaccinations_per_million": "new_vaccinations_smoothed_per_million",
            "daily_people_vaccinated": "new_people_vaccinated_smoothed",
            "daily_people_vaccinated_per_hundred": "new_people_vaccinated_smoothed_per_hundred",
        }
    )
    rounded_cols = [
        "total_vaccinations_per_hundred",
        "people_vaccinated_per_hundred",
        "people_fully_vaccinated_per_hundred",
        "total_boosters_per_hundred",
    ]
    vax[rounded_cols] = vax[rounded_cols].round(3)
    return vax


[docs]def _add_rolling(df: pd.DataFrame) -> pd.DataFrame:
    last_known_date = df.loc[df.total_vaccinations.notnull(), "date"].max()
    for n_months in (6, 9, 12):
        n_days = round(365.2425 * n_months / 12)
        df[f"rolling_vaccinations_{n_months}m"] = (
            df.total_vaccinations.interpolate(method="linear").diff().rolling(n_days, min_periods=1).sum().round()
        )
        df.loc[df.date > last_known_date, f"rolling_vaccinations_{n_months}m"] = np.NaN
        df[f"rolling_vaccinations_{n_months}m_per_hundred"] = (
            df[f"rolling_vaccinations_{n_months}m"] * 100 / df.population
        ).round(2)
    return df


[docs]def add_rolling_vaccinations(df: pd.DataFrame) -> pd.DataFrame:
    return df.groupby("location").apply(_add_rolling).reset_index(drop=True)