cowidev.vax.utils

cowidev.vax.utils.base

class cowidev.vax.utils.base.CountryVaxBase[source]

Bases: object

_check_attributes(mapping)[source]
_check_metadata(metadata)[source]
_export_datafile_age(df, metadata, filename, attach)[source]

Export age data.

_export_datafile_main(df, filename, attach=False, reset_index=False, valid_cols_only=False, **kwargs)[source]

Export main data.

_export_datafile_manufacturer(df, metadata, filename, attach)[source]

Export manufacturer data

_export_datafile_secondary(df, metadata, output_path, output_path_meta)[source]

Export secondary data.

_postprocessing(df, valid_cols_only)[source]

Minor post processing after all transformations.

Basically sort by date, ensure correct column order, correct type for metrics.

_postprocessing_age(df)[source]

Minor post processing after all transformations.

Basically sort by date, ensure correct column order, correct type for metrics.

_postprocessing_manufacturer(df)[source]

Minor post processing after all transformations.

Basically sort by date, ensure correct column order, correct type for metrics.

check_column_values(df: DataFrame, col_name: str, values_accepted: list) DataFrame[source]
export_datafile(df=None, df_age=None, df_manufacturer=None, meta_age=None, meta_manufacturer=None, filename=None, attach=False, attach_age=False, attach_manufacturer=False, reset_index=False, valid_cols_only=False, **kwargs)[source]

Export country data.

Parameters:
  • df (pd.DataFrame) – Main country data.

  • df_age (pd.DataFrame, optional) – Country data by age group. Defaults to None.

  • df_manufacturer (pd.DataFrame, optional) – Country data by manufacturer. Defaults to None.

  • meta_age (dict, optional) – Country metadata by age. Defaults to None.

  • meta_manufacturer (dict, optional) – Country metadata by manufacturer. Defaults to None.

  • filename (str, optional) – Name of output file. If None, defaults to country name.

  • attach (bool, optional) – Set to True to attach to already existing data. Defaults to False.

  • attach_age (bool, optional) – Set to True to attach to already existing data. Defaults to False.

  • attach_manufacturer (bool, optional) – Set to True to attach to already existing data. Defaults to False.

  • valid_cols_only (bool, optional) – Export only valid columns. Defaults to False.

  • reset_index (bool, optional) – Brin index back as a column. Defaults to False.

force_monotonic()[source]
from_ice()[source]

Loads single CSV location.csv from S3 as DataFrame.

get_output_path(filename=None, age=False, manufacturer=False)[source]
last_update(**kwargs)[source]
load_datafile(**kwargs)[source]
location: str = None
make_monotonic(df, group_cols=None, max_removed_rows=10, strict=False)[source]
property output_path

Country output file.

property output_path_age

Country output file for age-group data.

property output_path_manufacturer

Country output file for manufacturer data.

pipe_age_per_capita(df: DataFrame) DataFrame[source]
pipe_check_vaccine(df: DataFrame, vaccines_accepted=None) DataFrame[source]
pipe_merge_with_current(df, filename=None)[source]
pipe_metadata(df: DataFrame) DataFrame[source]
pipe_rename_columns(df: DataFrame) DataFrame[source]
cowidev.vax.utils.base._build_population_age_group_df(location, df)[source]
cowidev.vax.utils.base._check_last_update(path, country)[source]
cowidev.vax.utils.base.merge_with_current_data(df: DataFrame, filepath: str) DataFrame[source]

cowidev.vax.utils.checks

class cowidev.vax.utils.checks.CountryChecker(df: DataFrame, allow_extra_cols: bool = True, monotonic_check_skip: list = [], anomalies: bool = True, anomaly_check_skip: list = [])[source]

Bases: object

_check_anomalies(df, metric, th=6)[source]
_check_metrics_anomalies(df)[source]
_check_metrics_inequalities(df: DataFrame)[source]
_check_metrics_monotonic(df: DataFrame)[source]
_get_location(df)[source]
_skip_check_ids(check_skip)[source]
check_column_names()[source]
check_date()[source]
check_location()[source]
check_metrics()[source]
check_source_url()[source]
check_vaccine()[source]
property metrics_present
run()[source]
cowidev.vax.utils.checks.country_df_sanity_checks(df: DataFrame, monotonic_check_skip: list = [], anomalies: bool = True, anomaly_check_skip: list = []) DataFrame[source]
cowidev.vax.utils.checks.validate_vaccines(df, vaccines_accepted, vaccines_raw=None)[source]

cowidev.vax.utils.extra_source

cowidev.vax.utils.extra_source.add_latest_from_acdc(df: DataFrame, metrics: list, priority: bool = False)[source]

cowidev.vax.utils.files

cowidev.vax.utils.files.export_metadata(df: DataFrame, source_name: str, source_url: str, output_path: str)[source]
cowidev.vax.utils.files.get_file_encoding(file_path)[source]
cowidev.vax.utils.files.load_data(data_filename: str, file_ext: str = 'csv')[source]

Load a data from a file in vax._static folder.

Parameters:
  • data_filename (str) – Name of the data file. If no extension is provided, {query_filename}.{file_ext} will be loaded

  • file_ext (str, optional) – Extension of the file. Defaults to “csv”.

Raises:
  • FileNotFoundError – If no file is found

  • ValueError – If non-supported format is provided

Returns:

Loaded query as a string

Return type:

dict

cowidev.vax.utils.files.load_query(query_filename: str, file_ext: str = 'json', to_str: bool = True)[source]

Load a query from a file in vax._static folder.

Parameters:
  • query_filename (str) – Name of the query file. If no extension is provided, {query_filename}.{file_ext} will be loaded

  • file_ext (str, optional) – Extension of the file. Defaults to “json”.

Raises:
  • FileNotFoundError – If no file is found

  • ValueError – If non-supported format is provided

Returns:

Loaded query as a string

Return type:

dict

cowidev.vax.utils.incremental

cowidev.vax.utils.incremental._build_df(location, total_vaccinations, date, vaccine, source_url, people_vaccinated=None, people_partly_vaccinated=None, people_fully_vaccinated=None, total_boosters=None)[source]
cowidev.vax.utils.incremental._check_fields(location, source_url, vaccine, date, total_vaccinations, people_vaccinated, people_partly_vaccinated, people_fully_vaccinated, total_boosters)[source]
cowidev.vax.utils.incremental._from_gh_to_scripts(location)[source]
cowidev.vax.utils.incremental._increment(filepath, location, total_vaccinations, date, vaccine, source_url, people_vaccinated=None, people_partly_vaccinated=None, people_fully_vaccinated=None, total_boosters=None)[source]
cowidev.vax.utils.incremental.enrich_data(ds: Series, row, value) Series[source]
cowidev.vax.utils.incremental.increment(location, total_vaccinations, date, vaccine, source_url, people_vaccinated=None, people_partly_vaccinated=None, people_fully_vaccinated=None, total_boosters=None, make_series_monotonic=False)[source]
cowidev.vax.utils.incremental.merge_with_current_data(df: DataFrame, filepath: str) DataFrame[source]

cowidev.vax.utils.utils

cowidev.vax.utils.utils.add_latest_who_values(df: DataFrame, who_location_name: str, metrics: list)[source]

Inserts the latest data available from the WHO vaccination dataset into the existing dataframe. metrics: list of metrics to be used from the WHO dataset. Other metrics that aren’t listed will be automatically set to pd.NA for this specific row.

cowidev.vax.utils.utils.build_vaccine_timeline(df: DataFrame, vaccine_timeline: dict) DataFrame[source]

Add vaccines to the data based on admin. starting date.

vaccine_timeline: dictionary of “vaccine” -> “start_date”

Example:

{

“Pfizer/BioNTech”: “2021-02-24”, “Sinovac”: “2021-03-03”, “Oxford/AstraZeneca”: “2021-05-03”, “CanSino”: “2021-05-09”, “Sinopharm”: “2021-09-18”,

}

cowidev.vax.utils.utils.get_latest_file(path, extension)[source]
cowidev.vax.utils.utils.make_monotonic(df: DataFrame, max_removed_rows=10, new_version=False) DataFrame[source]